diff --git a/src/Assembler.c b/src/Assembler.c index 4c27d39..9f526ff 100644 --- a/src/Assembler.c +++ b/src/Assembler.c @@ -9,16 +9,17 @@ /* - * If the entry in UsedRegisters - * that correlates to the position of a register in Registers - * is 1, - * then that register is classed as used - - * it has useful data inside it. + * Stores how many hardware registers are being used at any one time. + * It is empirically proven that only 4 clobber registers are + * needed for any arbitrary length program. + * + * If UsedRegisters[i] =? 1, then Registers[i] contains useful data. + * If UsedRegisters[i] =? 0, then Registers[i] is unused. * - * if the entry is 0, then it is free. */ static int UsedRegisters[4]; + /* The https://en.wikipedia.org/wiki/X86_calling_conventions#Microsoft_x64_calling_convention * calling convention on Windows requires that * the last 4 arguments are placed in registers @@ -26,25 +27,43 @@ static int UsedRegisters[4]; * This order must be preserved, and they must be placed * right to left. * - * That is the reason for the weird arrangement here. - * The parameter registers are last, in reverse order. + * The 4 clobber registers are first, and the 4 parameter registers are last. + */ +static char* Registers[8] = { "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" }; +static char* DoubleRegisters[8] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" }; +static char* ByteRegisters[8] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" }; + +/* + * For ease of reading later code, we store the valid x86 comparison instructions, + * and the inverse jump instructions together, in a synchronized fashion. */ -static char* Registers[10] = { "%rsi", "%rdi", "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" }; -static char* DoubleRegisters[10] = { "%esi", "%edi", "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" }; -static char* ByteRegisters[10] = { "%sil", "%dil", "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" }; static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" }; static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"}; +// How far above the base pointer is the last local? static int LocalVarOffset; +// How far must we lower the base pointer to retrieve the parameters? static int StackFrameOffset; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * R O O T O F A S S E M B L E R * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +// Just a short "hack" to make sure we only dump the tree the first time this function is called static int Started = 0; +/* + * Walk the AST tree given, and generate the assembly code that represents + * it. + * + * @param Node: The current Node to compile. If needed, its children will be parsed recursively. + * @param Register: The index of Registers to store the result of the current compilation. + * @param ParentOp: The Operation of the parent of the current Node. + * + * @return dependant on the Node. Typically the Register that stores the result of the Node's operation. + * + */ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { int LeftVal, RightVal; if(!Started && OptDumpTree) @@ -83,14 +102,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { if(Node->Right) RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation); - -/* if(Node->Operation == TERM_INTLITERAL) - printf("int %d\n", Node->IntValue); - else - printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal); - - */ - switch(Node->Operation) { case OP_ADD: return AsAdd(LeftVal, RightVal); @@ -141,31 +152,13 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { case OP_WIDEN: printf("\tWidening types..\r\n"); - return LeftVal; //AsWiden(LeftVal, Node->Left->ExprType, Node->ExprType); + return LeftVal; case OP_RET: printf("\tReturning from %s\n", Node->Symbol->Name); AsReturn(FunctionEntry, LeftVal); return -1; - /* case OP_EQUAL: - return AsEqual(LeftVal, RightVal); - - case OP_INEQ: - return AsIneq(LeftVal, RightVal); - - case OP_LESS: - return AsLess(LeftVal, RightVal); - - case OP_GREAT: - return AsGreat(LeftVal, RightVal); - - case OP_LESSE: - return AsLessE(LeftVal, RightVal); - - case OP_GREATE: - return AsGreatE(LeftVal, RightVal); */ - case OP_EQUAL: case OP_INEQ: case OP_LESS: @@ -179,7 +172,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { case REF_IDENT: - //printf("\tReferencing variable %s %s with type %s and storage %d\r\n", Symbols[Node->Value.ID].Name, Node->RVal ? " rval " : "", ParentOp, Symbols[Node->Value.ID].Storage); if(Node->RVal || ParentOp == OP_DEREF) { if(Node->Symbol->Storage == SC_LOCAL || Node->Symbol->Storage == SC_PARAM) return AsLdLocalVar(Node->Symbol, Node->Operation); @@ -199,11 +191,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { DeallocateAllRegisters(); return -1; - /* case OP_LOOP: - // We only do while for now.. - return AsWhile(Node); - break; */ - case OP_BITAND: return AsBitwiseAND(LeftVal, RightVal); @@ -252,24 +239,31 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { * * * * R E G I S T E R M A N A G E M E N T * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +// Set all Registers to unused. void DeallocateAllRegisters() { UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0; } +/* + * Search for an unused register, allocate it, and return it. + * If none available, cancel compilation. + */ int RetrieveRegister() { - //printf("Current state of registers: %x, %x, %x, %x\n", UsedRegisters[0], UsedRegisters[1], UsedRegisters[2], UsedRegisters[3]); - for (size_t i = 0; i < 4; i++) { if(UsedRegisters[i] == 0) { UsedRegisters[i] = 1; return i; } } - fprintf(stderr, "Out of registers!\n"); exit(1); } +/* + * Set the given register to unused. + * If the register is not used, it is an invalid state. + * @param Register: The Registers index to deallocate. + */ void DeallocateRegister(int Register) { if(UsedRegisters[Register] != 1) { fprintf(stderr, "Error trying to free register %d\n", Register); @@ -283,10 +277,25 @@ void DeallocateRegister(int Register) { * * * * * * S T A C K M A N A G E M E N T * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * Prepare a new stack frame pointer. + * This resets the highest local. + * + */ void AsNewStackFrame() { LocalVarOffset = 0; } +/* + * Given the type of input, how far do we need to go down the stack frame + * to store or retrieve this type? + * + * The stack must be 4-bytes aligned, so we set a hard minimum. + * + * @param Type: The DataTypes we want to store. + * @return the offset to store the type, taking into account the current state of the stack frame. + * + */ int AsCalcOffset(int Type) { LocalVarOffset += PrimitiveSize(Type) > 4 ? PrimitiveSize(Type) : 4; return -LocalVarOffset; @@ -296,12 +305,19 @@ int AsCalcOffset(int Type) { * * * * C O D E G E N E R A T I O N * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * A way to keep track of the largest label number. + * Call this function to increase the number SRG-like. + * + * @return the highest available label number + * + */ int NewLabel(void) { static int id = 1; return id++; } - +// Assemble an If statement int AsIf(struct ASTNode* Node) { int FalseLabel, EndLabel; @@ -333,6 +349,7 @@ int AsIf(struct ASTNode* Node) { return -1; } +// Assemble a comparison int AsCompare(int Operation, int RegisterLeft, int RegisterRight) { printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight); @@ -346,6 +363,7 @@ int AsCompare(int Operation, int RegisterLeft, int RegisterRight) { return RegisterRight; } +// Assemble an inverse comparison (a one-line jump) int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) { if(Operation < OP_EQUAL || Operation > OP_GREATE) Die("Bad Operation in AsCompareJmp"); @@ -359,16 +377,24 @@ int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) return -1; } +// Assemble an immediate jump void AsJmp(int Label) { printf("\t\tJumping to label %d\n", Label); fprintf(OutputFile, "\tjmp\tL%d\n", Label); } +/* Create a new base label + * @param Label: The number to create the label of + */ void AsLabel(int Label) { printf("\tCreating label %d\n", Label); fprintf(OutputFile, "\nL%d:\n", Label); } +/* + * Assemble a new global string into the data segment. + * @param Value: The name of the string, as a string + */ int AsNewString(char* Value) { int Label = NewLabel(); char* CharPtr; @@ -382,12 +408,17 @@ int AsNewString(char* Value) { return Label; } +/* + * Load a string into a Register. + * @param ID: the Label number of the string + */ int AsLoadString(int ID) { int Register = RetrieveRegister(); fprintf(OutputFile, "\tleaq\tL%d(\%%rip), %s\r\n", ID, Registers[Register]); return Register; } +// Assemble a While loop int AsWhile(struct ASTNode* Node) { int BodyLabel, BreakLabel; @@ -418,6 +449,7 @@ int AsWhile(struct ASTNode* Node) { } +// Load a value into a register. int AsLoad(int Value) { int Register = RetrieveRegister(); @@ -428,6 +460,7 @@ int AsLoad(int Value) { return Register; } +// Assemble an addition. int AsAdd(int Left, int Right) { printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]); @@ -437,6 +470,7 @@ int AsAdd(int Left, int Right) { return Right; } +// Assemble a multiplication. int AsMul(int Left, int Right) { printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]); @@ -446,6 +480,7 @@ int AsMul(int Left, int Right) { return Right; } +// Assemble a subtraction. int AsSub(int Left, int Right) { printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]); @@ -455,6 +490,7 @@ int AsSub(int Left, int Right) { return Left; } +// Assemble a division. int AsDiv(int Left, int Right) { printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]); @@ -467,12 +503,18 @@ int AsDiv(int Left, int Right) { return Left; } +// Assemble an ASL int AsShl(int Register, int Val) { printf("\tShifting %s to the left by %d bits.\n", Registers[Register], Val); fprintf(OutputFile, "\tsalq\t$%d, %s\n", Val, Registers[Register]); return Register; } +/* + * Load a global variable into a register, with optional pre/post-inc/dec + * @param Entry: The variable to load. + * @param Operation: An optional SyntaxOps element + */ int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) { int Reg = RetrieveRegister(); @@ -543,6 +585,11 @@ int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) { return Reg; } +/* + * Store a value from a register into a global variable. + * @param Entry: The variable to store into. + * @param Regsiter: The Registers index containing the value to store. + */ int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) { printf("\tStoring contents of %s into %s, type %d, globally:\n", Registers[Register], Entry->Name, Entry->Type); @@ -568,6 +615,12 @@ int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) { return Register; } +/* + * Load a value from a local variable into a register, with optional post/pre-inc/dec + * @param Entry: The local variable to read + * @param Operation: An optional SyntaxOps entry + */ + int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) { int Reg = RetrieveRegister(); @@ -637,7 +690,13 @@ int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) { return Reg; } - + +/* + * Store a value from a register into a local variable. + * @param Entry: The local variable to write to. + * @param Register: The Registers index containing the desired value + * + */ int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) { printf("\tStoring contents of %s into %s, type %d, locally\n", Registers[Register], Entry->Name, Entry->Type); @@ -663,6 +722,7 @@ int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) { return Register; } +// Assemble a pointerisation int AsAddr(struct SymbolTableEntry* Entry) { int Register = RetrieveRegister(); printf("\tSaving pointer of %s into %s\n", Entry->Name, Registers[Register]); @@ -671,6 +731,7 @@ int AsAddr(struct SymbolTableEntry* Entry) { return Register; } +// Assemble a dereference int AsDeref(int Reg, int Type) { int DestSize = PrimitiveSize(ValueAt(Type)); @@ -693,6 +754,7 @@ int AsDeref(int Reg, int Type) { return Reg; } +// Assemble a store-through-dereference int AsStrDeref(int Register1, int Register2, int Type) { printf("\tStoring contents of %s into %s through a dereference, type %d\n", Registers[Register1], Registers[Register2], Type); @@ -711,6 +773,7 @@ int AsStrDeref(int Register1, int Register2, int Type) { return Register1; } +// Assemble a global symbol (variable, struct, enum, function, string) void AsGlobalSymbol(struct SymbolTableEntry* Entry) { int TypeSize; @@ -732,6 +795,7 @@ void AsGlobalSymbol(struct SymbolTableEntry* Entry) { } } +// Assemble a function call, with all associated parameter bumping and stack movement. int AsCallWrapper(struct ASTNode* Node) { struct ASTNode* CompositeTree = Node->Left; int Register, Args = 0; @@ -747,6 +811,7 @@ int AsCallWrapper(struct ASTNode* Node) { return AsCall(Node->Symbol, Args); } +// Copy a function argument from Register to argument Position void AsCopyArgs(int Register, int Position) { if(Position > 4) { // Args above 4 go on the stack fprintf(OutputFile, "\tpushq\t%s\n", Registers[Register]); @@ -755,6 +820,8 @@ void AsCopyArgs(int Register, int Position) { } } +// Assemble an actual function call. +// NOTE: this should not be called. Use AsCallWrapper. int AsCall(struct SymbolTableEntry* Entry, int Args) { int OutRegister = RetrieveRegister(); @@ -771,6 +838,7 @@ int AsCall(struct SymbolTableEntry* Entry, int Args) { return OutRegister; } +// Assemble a function return. int AsReturn(struct SymbolTableEntry* Entry, int Register) { printf("\t\tCreating return for function %s\n", Entry->Name); @@ -794,39 +862,46 @@ int AsReturn(struct SymbolTableEntry* Entry, int Register) { } AsJmp(Entry->EndLabel); - } + +// Assemble a =? int AsEqual(int Left, int Right) { // Set the lowest bit if left = right return AsCompare(OP_EQUAL, Left, Right); } +// Assemble a != int AsIneq(int Left, int Right) { // Set the lowest bit if left != right return AsCompare(OP_INEQ, Left, Right); } +// Assemble a < int AsLess(int Left, int Right) { // Set the lowest bit if left < right return AsCompare(OP_LESS, Left, Right); } +// Assemble a > int AsGreat(int Left, int Right) { // Set the lowest bit if left > right return AsCompare(OP_GREAT, Left, Right); } +// Assemble a <= int AsLessE(int Left, int Right) { // Set the lowest bit if left <= right return AsCompare(OP_LESSE, Left, Right); } +// Assemble a => int AsGreatE(int Left, int Right) { // Set the lowest bit if left => right return AsCompare(OP_GREATE, Left, Right); } +// Assemble a print statement void AssemblerPrint(int Register) { printf("\t\tPrinting Register %s\n", Registers[Register]); @@ -837,34 +912,40 @@ void AssemblerPrint(int Register) { DeallocateRegister(Register); } +// Assemble a & int AsBitwiseAND(int Left, int Right) { fprintf(OutputFile, "\tandq\t%s, %s\n", Registers[Left], Registers[Right]); DeallocateRegister(Left); return Right; } +// Assemble a | int AsBitwiseOR(int Left, int Right) { fprintf(OutputFile, "\torq\t%s, %s\n", Registers[Left], Registers[Right]); DeallocateRegister(Left); return Right; } +// Assemble a ^ int AsBitwiseXOR(int Left, int Right) { fprintf(OutputFile, "\txorq\t%s, %s\n", Registers[Left], Registers[Right]); DeallocateRegister(Left); return Right; } +// Assemble a ~ int AsNegate(int Register) { fprintf(OutputFile, "\tnegq\t%s\n", Registers[Register]); return Register; } +// Assemble a ! int AsInvert(int Register) { fprintf(OutputFile, "\tnotq\t%s\n", Registers[Register]); return Register; } +// Assemble a ! int AsBooleanNOT(int Register) { fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]); fprintf(OutputFile, "\tsete\t%s\n", ByteRegisters[Register]); @@ -872,6 +953,7 @@ int AsBooleanNOT(int Register) { return Register; } +// Assemble a << int AsShiftLeft(int Left, int Right) { fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]); fprintf(OutputFile, "\tshlq\t\%%cl, %s\n", Registers[Left]); @@ -879,6 +961,7 @@ int AsShiftLeft(int Left, int Right) { return Left; } +// Assemble a >> int AsShiftRight(int Left, int Right) { fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]); fprintf(OutputFile, "\tshrq\t\%%cl, %s\n", Registers[Left]); @@ -886,6 +969,8 @@ int AsShiftRight(int Left, int Right) { return Left; } +// Assemble a conversion from arbitrary type to boolean. +// Facilitates if(ptr) int AsBooleanConvert(int Register, int Operation, int Label) { fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]); @@ -903,6 +988,7 @@ int AsBooleanConvert(int Register, int Operation, int Label) { return Register; } +// Assemble the start of an assembly file void AssemblerPreamble() { DeallocateAllRegisters(); fputs( @@ -912,6 +998,15 @@ void AssemblerPreamble() { OutputFile); } +/* + * Assemble a function block for the Entry. + * Handles all stack logic for local variables, + * as well as copying parameters out of registers and + * into the spill space. + * + * @param Entry: The function to generate + * + */ void AsFunctionPreamble(struct SymbolTableEntry* Entry) { char* Name = Entry->Name; struct SymbolTableEntry* Param, *Local; @@ -958,6 +1053,8 @@ void AsFunctionPreamble(struct SymbolTableEntry* Entry) { } + +// Assemble the epilogue of a function void AsFunctionEpilogue(struct SymbolTableEntry* Entry) { AsLabel(Entry->EndLabel); diff --git a/src/Delegate.c b/src/Delegate.c index a38f287..a8e9842 100644 --- a/src/Delegate.c +++ b/src/Delegate.c @@ -56,6 +56,14 @@ char* Suffixate(char* String, char Suffix) { * Opens the input and output files, * Parses the global symbols of the file, including function blocks. * Generates the assembly representation of the source code + * Saves said assembly into the OutputFile + * Returns the name of the file containing the generated assembly. + * Note that the Input file must have a valid extension. + * For Erythro code, this is .er + * The generated assembly will have the extension .s + * + * @param InputFile: The filename of the Erythro Source code to compile + * @return the filename of the generated PECOFF32+ assembly */ char* Compile(char* InputFile) { char* OutputName; @@ -93,6 +101,20 @@ char* Compile(char* InputFile) { return OutputName; } +/* + * Processes the output from the Compile function. + * Passes the generated .s file to (currently, as of + * 21/01/2021), the GNU GAS assembler, to create an + * object file. + * + * It does this by invoking the command on a shell. + * TODO: fork it? + * + * @param InputFile: The .s assembly file to be processed + * @output the name of the generated object file. + * + */ + char* Assemble(char* InputFile) { char Command[TEXTLEN]; int Error; @@ -116,6 +138,18 @@ char* Assemble(char* InputFile) { return OutputName; } +/* + * Processes the outputted object files, turning them into an executable. + * It does this by invoking (currently, as of 21/01/2021) the GNU GCC + * compiler. + * It invokes GCC rather than LD so that it automatically links against + * libc and the CRT natives. + * + * @param Output: The desired name for the executable. + * @param Objects: A list of the Object files to be linked. + * + */ + void Link(char* Output, char* Objects[]) { int Count, Size = TEXTLEN, Error; char Command[TEXTLEN], *CommandPtr; @@ -143,7 +177,16 @@ void Link(char* Output, char* Objects[]) { } } +/* + * Prints information about the available flags and + * how to structure the command. + * @param ProgName: The name of the file that was + * attempted to run. + */ + void DisplayUsage(char* ProgName) { + fprintf(stderr, "Erythro Compiler v5 - Gemwire Institute\n"); + fprintf(stderr, "***************************************\n"); fprintf(stderr, "Usage: %s -[vcST] {-o output} file [file ...]\n", ProgName); fprintf(stderr, " -v: Verbose Output Level\n"); fprintf(stderr, " -c: Compile without Linking\n"); diff --git a/src/Dump.c b/src/Dump.c index f80dfef..218af32 100644 --- a/src/Dump.c +++ b/src/Dump.c @@ -12,6 +12,9 @@ static int GenerateSrg() { return srgId++; } +/* + * Walk the Node tree, and dump the AST tree to stdout. + */ void DumpTree(struct ASTNode* Node, int level) { int Lfalse, Lstart, Lend;