项目的完整代码在 C2j-Compiler <https://github.com/dejavudwh/C2j-Compiler>
前言
在上一篇解释完了一些基础的Java字节码指令后,就可以正式进入真正的代码生成部分了。但是这部分先说的是代码生成依靠的几个类,也就是用来生成指令的操作。
这一篇用到的文件都在codegen下:
 * Directive.java 
 * Instruction.java 
 * CodeGenerator.java 
 * ProgramGenerator.java 
Directive.java
这个是枚举类,用来生成一些比较特殊的指令
都生成像声明一个类或者一个方法的范围的指令,比较简单。
public enum Directive { CLASS_PUBLIC(".class public"), END_CLASS(".end 
class"), SUPER(".super"), FIELD_PRIVATE_STATIC(".field private static"), 
METHOD_STATIC(".method static"), METHOD_PUBLIC(".method public"), 
FIELD_PUBLIC(".field public"), METHOD_PUBBLIC_STATIC(".method public static"), 
END_METHOD(".end method"), LIMIT_LOCALS(".limit locals"), LIMIT_STACK(".limit 
stack"), VAR(".var"), LINE(".line"); private String text; Directive(String 
text) { this.text = text; } public String toString() { return text; } } 
Instruction.java
这也是一个枚举类,用来生成一些基本的指令
public enum Instruction { LDC("ldc"), GETSTATIC("getstatic"), 
SIPUSH("sipush"), IADD("iadd"), IMUL("imul"), ISUB("isub"), IDIV("idiv"), 
INVOKEVIRTUAL("invokevirtual"), INVOKESTATIC("invokestatic"), 
INVOKESPECIAL("invokespecial"), RETURN("return"), IRETURN("ireturn"), 
ILOAD("iload"), ISTORE("istore"), NEWARRAY("newarray"), NEW("new"), DUP("dup"), 
ASTORE("astore"), IASTORE("iastore"), ALOAD("aload"), PUTFIELD("putfield"), 
GETFIELD("getfield"), ANEWARRAY("anewarray"), AASTORE("aastore"), 
AALOAD("aaload"), IF_ICMPEG("if_icmpeq"), IF_ICMPNE("if_icmpne"), 
IF_ICMPLT("if_icmplt"), IF_ICMPGE("if_icmpge"), IF_ICMPGT("if_icmpgt"), 
IF_ICMPLE("if_icmple"), GOTO("goto"), IALOAD("iaload"); private String text; 
Instruction(String s) { this.text = s; } public String toString() { return 
text; } } 
CodeGenerator.java
重点来了,生成的逻辑主要都在CodeGenerator和ProgramGenerator里,CodeGenerator是ProgramGenerator的父类
CodeGenerator的构造函数new了一个输出流,用来输出字节码到xxx.j里
public CodeGenerator() { String assemblyFileName = programName + ".j"; try { 
bytecodeFile = new PrintWriter(new PrintStream(new File(assemblyFileName))); } 
catch (FileNotFoundException e) { e.printStackTrace(); } } 
emit、emitString、emitDirective、emitBlankLine都属于输出基本指令的方法,都有多个重载方法来应对不一样操作和操作数。需要注意的是,有的指令可能需要先缓存起来,在最后的时候一起提交,比如buffered、classDefine就是用来判断是不是应该先缓存的布尔值
public void emitString(String s) { if (buffered) { bufferedContent += s + 
"\n"; return; } if (classDefine) { classDefinition += s + "\n"; return; } 
bytecodeFile.print(s); bytecodeFile.flush(); } public void emit(Instruction 
opcode) { if (buffered) { bufferedContent += "\t" + opcode.toString() + "\n"; 
return; } if (classDefine) { classDefinition += "\t" + opcode.toString() + 
"\n"; return; } bytecodeFile.println("\t" + opcode.toString()); 
bytecodeFile.flush(); ++instructionCount; } public void emitDirective(Directive 
directive, String operand1, String operand2, String operand3) { if (buffered) { 
bufferedContent += directive.toString() + " " + operand1 + " " + operand2 + " " 
+ operand3 + "\n"; return; } if (classDefine) { classDefinition += 
directive.toString() + " " + operand1 + " " + operand2 + " " + operand3 + "\n"; 
return; } bytecodeFile.println(directive.toString() + " " + operand1 + " " + 
operand2 + " " + operand3); ++instructionCount; } public void emitBlankLine() { 
if (buffered) { bufferedContent += "\n"; return; } if (classDefine) { 
classDefinition += "\n"; return; } bytecodeFile.println(); 
bytecodeFile.flush(); } 
ProgramGenerator.java
ProgramGenerator继承了CodeGenerator,也就是继承了一些基本的操作,在上一篇像结构体、数组的指令输出都在这个类里
处理嵌套
先看四个属性,这四个属性主要是就来处理嵌套的分支和循环。
private int branch_count = 0; private int branch_out = 0; private String 
embedded = ""; private int loopCount = 0; 
 * 
当没嵌套一个ifelse语句时候 embedded属性就会加上一个字符‘i’,而当退出一个分支的时候,就把这个‘i’切割掉
 * 
branch_count和branch_out都用来标志相同作用域的分支跳转
 * 
也就是说如果有嵌套就用embedded来处理,如果是用一个作用域的分支就用branch_count和branch_out来做标志
public void incraseIfElseEmbed() { embedded += "i"; } public void 
decraseIfElseEmbed() { embedded = embedded.substring(1); } public void 
emitBranchOut() { String s = "\n" + embedded + "branch_out" + branch_out + 
":\n"; this.emitString(s); branch_out++; } 
loopCount则是对嵌套循环的处理
public void emitLoopBranch() { String s = "\n" + "loop" + loopCount + ":" + 
"\n"; emitString(s); } public String getLoopBranch() { return "loop" + 
loopCount; } public void increaseLoopCount() { loopCount++; } 
处理结构体
putStructToClassDeclaration是定义结构体的,也就是new一个类。declareStructAsClass则是处理结构体里的变量,也就是相当于处理类的属性
 * 结构体如果已经类的定义的话,就会加入structNameList,不要进行重复的定义 
 * symbol.getValueSetter()如果不是空的话就表明是一个结构体数组,这样就直接从数组加载这个实例,不用在堆栈上创建 
 * declareStructAsClass则是依照上一篇说的Java字节码有关类的指令来创建一个类 public void 
putStructToClassDeclaration(Symbol symbol) { Specifier sp = 
symbol.getSpecifierByType(Specifier.STRUCTURE); if (sp == null) { return; } 
StructDefine struct = sp.getStruct(); if 
(structNameList.contains(struct.getTag())) { return; } else { 
structNameList.add(struct.getTag()); } if (symbol.getValueSetter() == null) { 
this.emit(Instruction.NEW, struct.getTag()); this.emit(Instruction.DUP); 
this.emit(Instruction.INVOKESPECIAL, struct.getTag() + "/" + "<init>()V"); int 
idx = this.getLocalVariableIndex(symbol); this.emit(Instruction.ASTORE, "" + 
idx); } declareStructAsClass(struct); } private void 
declareStructAsClass(StructDefine struct) { this.setClassDefinition(true); 
this.emitDirective(Directive.CLASS_PUBLIC, struct.getTag()); 
this.emitDirective(Directive.SUPER, "java/lang/Object"); Symbol fields = 
struct.getFields(); do { String fieldName = fields.getName() + " "; if 
(fields.getDeclarator(Declarator.ARRAY) != null) { fieldName += "["; } if 
(fields.hasType(Specifier.INT)) { fieldName += "I"; } else if 
(fields.hasType(Specifier.CHAR)) { fieldName += "C"; } else if 
(fields.hasType(Specifier.CHAR) && fields.getDeclarator(Declarator.POINTER) != 
null) { fieldName += "Ljava/lang/String;"; } 
this.emitDirective(Directive.FIELD_PUBLIC, fieldName); fields = 
fields.getNextSymbol(); } while (fields != null); 
this.emitDirective(Directive.METHOD_PUBLIC, "<init>()V"); 
this.emit(Instruction.ALOAD, "0"); String superInit = 
"java/lang/Object/<init>()V"; this.emit(Instruction.INVOKESPECIAL, superInit); 
fields = struct.getFields(); do { this.emit(Instruction.ALOAD, "0"); String 
fieldName = struct.getTag() + "/" + fields.getName(); String fieldType = ""; if 
(fields.hasType(Specifier.INT)) { fieldType = "I"; 
this.emit(Instruction.SIPUSH, "0"); } else if (fields.hasType(Specifier.CHAR)) 
{ fieldType = "C"; this.emit(Instruction.SIPUSH, "0"); } else if 
(fields.hasType(Specifier.CHAR) && fields.getDeclarator(Declarator.POINTER) != 
null) { fieldType = "Ljava/lang/String;"; this.emit(Instruction.LDC, " "); } 
String classField = fieldName + " " + fieldType; 
this.emit(Instruction.PUTFIELD, classField); fields = fields.getNextSymbol(); } 
while (fields != null); this.emit(Instruction.RETURN); 
this.emitDirective(Directive.END_METHOD); 
this.emitDirective(Directive.END_CLASS); this.setClassDefinition(false); } 
获取堆栈信息
其它有关Java字节码其实都是根据上一篇来完成的,逻辑不复杂,现在来看一个方法:getLocalVariableIndex,这个方法是获取变量当前在队列里的位置的
 * 先拿到当前执行的函数,然后拿到函数的对应参数,再反转(这和参数压栈的顺序有关) 
 * 然后把当前符号对应作用域的符号都添加到列表里 
 * 之后遍历这个列表就可以算出这个符号对应在队列里的位置 public int getLocalVariableIndex(Symbol symbol) 
{ TypeSystem typeSys = TypeSystem.getInstance(); String funcName = 
nameStack.peek(); Symbol funcSym = typeSys.getSymbolByText(funcName, 0, 
"main"); ArrayList<Symbol> localVariables = new ArrayList<>(); Symbol s = 
funcSym.getArgList(); while (s != null) { localVariables.add(s); s = 
s.getNextSymbol(); } Collections.reverse(localVariables); ArrayList<Symbol> 
list = typeSys.getSymbolsByScope(symbol.getScope()); for (int i = 0; i < 
list.size(); i++) { if (!localVariables.contains(list.get(i))) { 
localVariables.add(list.get(i)); } } for (int i = 0; i < localVariables.size(); 
i++) { if (localVariables.get(i) == symbol) { return i; } } return -1; } 
小结
这一篇主要是根据上一篇的JVM字节码来对不同的操作提供不同的方法来去输出这些指令
欢迎Star!
热门工具 换一换