Compare commits

..

3 Commits

Author SHA1 Message Date
e19a945934
Finish commenting all files 2021-01-21 20:01:30 +00:00
01d293f2c6
More comments - Pointers and Delegate 2021-01-21 01:53:00 +00:00
18b5da209d
Major refactoring
Comments added to the parser and lexer, more to come
2021-01-20 19:22:15 +00:00
11 changed files with 952 additions and 372 deletions

View File

@ -18,6 +18,11 @@
extern_ struct SymbolTableEntry* Globals, *GlobalsEnd; extern_ struct SymbolTableEntry* Globals, *GlobalsEnd;
extern_ struct SymbolTableEntry* Locals, *LocalsEnd; extern_ struct SymbolTableEntry* Locals, *LocalsEnd;
extern_ struct SymbolTableEntry* Params, *ParamsEnd; extern_ struct SymbolTableEntry* Params, *ParamsEnd;
extern_ struct SymbolTableEntry* Structs, *StructsEnd;
extern_ struct SymbolTableEntry* StructMembers, *StructMembersEnd;
extern_ struct SymbolTableEntry* Unions, *UnionsEnd;
extern_ struct SymbolTableEntry* Enums, *EnumsEnd;
extern_ bool OptDumpTree; extern_ bool OptDumpTree;
extern_ bool OptKeepAssembly; extern_ bool OptKeepAssembly;

View File

@ -92,7 +92,8 @@ enum TokenTypes {
KW_ELSE, KW_ELSE,
KW_WHILE, KW_WHILE,
KW_FOR, KW_FOR,
KW_RETURN KW_RETURN,
KW_STRUCT
}; };
/* /*
@ -178,7 +179,6 @@ struct ASTNode {
union { union {
int Size; // OP_SCALE's linear representation int Size; // OP_SCALE's linear representation
int IntValue; // TERM_INTLIT's Value int IntValue; // TERM_INTLIT's Value
int ID; // LV_IDENT's Symbols[] index.
}; };
}; };
@ -215,6 +215,9 @@ struct SymbolTableEntry {
enum StorageScope { enum StorageScope {
SC_GLOBAL = 1, // Global Scope SC_GLOBAL = 1, // Global Scope
SC_STRUCT, // Struct Definitions
SC_ENUM, // Enum Definitions
SC_MEMBER, // The members of Structs or Enums
//SC_CLASS, // Class-local definitions //SC_CLASS, // Class-local definitions
//SC_STATIC, // Static storage definitions //SC_STATIC, // Static storage definitions
SC_PARAM, // Function parameters SC_PARAM, // Function parameters
@ -274,7 +277,7 @@ void DisplayUsage(char* ProgName);
* * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * */
int Tokenise(struct Token* Token); void Tokenise();
void VerifyToken(int Type, char* TokenExpected); void VerifyToken(int Type, char* TokenExpected);
void RejectToken(struct Token* Token); void RejectToken(struct Token* Token);
@ -355,14 +358,15 @@ struct ASTNode* PrintStatement(void);
struct SymbolTableEntry* FindSymbol(char* Symbol); struct SymbolTableEntry* FindSymbol(char* Symbol);
struct SymbolTableEntry* FindLocal(char* Symbol); struct SymbolTableEntry* FindLocal(char* Symbol);
struct SymbolTableEntry* FindGlobal(char* Symbol); struct SymbolTableEntry* FindGlobal(char* Symbol);
struct SymbolTableEntry* FindStruct(char* Symbol);
struct SymbolTableEntry* FindMember(char* Symbol);
void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node); void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node);
void FreeLocals(); void FreeLocals();
void ClearTables(); void ClearTables();
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset); struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O N T R O L S T A T U S * * * * * * * * C O N T R O L S T A T U S * * * *
@ -460,7 +464,7 @@ void AsFunctionEpilogue(struct SymbolTableEntry* Entry);
* * * * D E C L A R A T I O N * * * * * * * * D E C L A R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * */
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope); struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope);
struct ASTNode* ParseIdentifier(void); struct ASTNode* ParseIdentifier(void);
struct ASTNode* IfStatement(); struct ASTNode* IfStatement();

View File

@ -9,16 +9,17 @@
/* /*
* If the entry in UsedRegisters * Stores how many hardware registers are being used at any one time.
* that correlates to the position of a register in Registers * It is empirically proven that only 4 clobber registers are
* is 1, * needed for any arbitrary length program.
* then that register is classed as used - *
* it has useful data inside it. * If UsedRegisters[i] =? 1, then Registers[i] contains useful data.
* If UsedRegisters[i] =? 0, then Registers[i] is unused.
* *
* if the entry is 0, then it is free.
*/ */
static int UsedRegisters[4]; static int UsedRegisters[4];
/* The https://en.wikipedia.org/wiki/X86_calling_conventions#Microsoft_x64_calling_convention /* The https://en.wikipedia.org/wiki/X86_calling_conventions#Microsoft_x64_calling_convention
* calling convention on Windows requires that * calling convention on Windows requires that
* the last 4 arguments are placed in registers * the last 4 arguments are placed in registers
@ -26,25 +27,43 @@ static int UsedRegisters[4];
* This order must be preserved, and they must be placed * This order must be preserved, and they must be placed
* right to left. * right to left.
* *
* That is the reason for the weird arrangement here. * The 4 clobber registers are first, and the 4 parameter registers are last.
* The parameter registers are last, in reverse order. */
static char* Registers[8] = { "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" };
static char* DoubleRegisters[8] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" };
static char* ByteRegisters[8] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" };
/*
* For ease of reading later code, we store the valid x86 comparison instructions,
* and the inverse jump instructions together, in a synchronized fashion.
*/ */
static char* Registers[10] = { "%rsi", "%rdi", "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" };
static char* DoubleRegisters[10] = { "%esi", "%edi", "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" };
static char* ByteRegisters[10] = { "%sil", "%dil", "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" };
static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" }; static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" };
static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"}; static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"};
// How far above the base pointer is the last local?
static int LocalVarOffset; static int LocalVarOffset;
// How far must we lower the base pointer to retrieve the parameters?
static int StackFrameOffset; static int StackFrameOffset;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * R O O T O F A S S E M B L E R * * * * * * * * R O O T O F A S S E M B L E R * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// Just a short "hack" to make sure we only dump the tree the first time this function is called
static int Started = 0; static int Started = 0;
/*
* Walk the AST tree given, and generate the assembly code that represents
* it.
*
* @param Node: The current Node to compile. If needed, its children will be parsed recursively.
* @param Register: The index of Registers to store the result of the current compilation.
* @param ParentOp: The Operation of the parent of the current Node.
*
* @return dependant on the Node. Typically the Register that stores the result of the Node's operation.
*
*/
int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
int LeftVal, RightVal; int LeftVal, RightVal;
if(!Started && OptDumpTree) if(!Started && OptDumpTree)
@ -83,14 +102,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
if(Node->Right) if(Node->Right)
RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation); RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation);
/* if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
*/
switch(Node->Operation) { switch(Node->Operation) {
case OP_ADD: case OP_ADD:
return AsAdd(LeftVal, RightVal); return AsAdd(LeftVal, RightVal);
@ -141,31 +152,13 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
case OP_WIDEN: case OP_WIDEN:
printf("\tWidening types..\r\n"); printf("\tWidening types..\r\n");
return LeftVal; //AsWiden(LeftVal, Node->Left->ExprType, Node->ExprType); return LeftVal;
case OP_RET: case OP_RET:
printf("\tReturning from %s\n", Node->Symbol->Name); printf("\tReturning from %s\n", Node->Symbol->Name);
AsReturn(FunctionEntry, LeftVal); AsReturn(FunctionEntry, LeftVal);
return -1; return -1;
/* case OP_EQUAL:
return AsEqual(LeftVal, RightVal);
case OP_INEQ:
return AsIneq(LeftVal, RightVal);
case OP_LESS:
return AsLess(LeftVal, RightVal);
case OP_GREAT:
return AsGreat(LeftVal, RightVal);
case OP_LESSE:
return AsLessE(LeftVal, RightVal);
case OP_GREATE:
return AsGreatE(LeftVal, RightVal); */
case OP_EQUAL: case OP_EQUAL:
case OP_INEQ: case OP_INEQ:
case OP_LESS: case OP_LESS:
@ -179,7 +172,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
case REF_IDENT: case REF_IDENT:
//printf("\tReferencing variable %s %s with type %s and storage %d\r\n", Symbols[Node->Value.ID].Name, Node->RVal ? " rval " : "", ParentOp, Symbols[Node->Value.ID].Storage);
if(Node->RVal || ParentOp == OP_DEREF) { if(Node->RVal || ParentOp == OP_DEREF) {
if(Node->Symbol->Storage == SC_LOCAL || Node->Symbol->Storage == SC_PARAM) if(Node->Symbol->Storage == SC_LOCAL || Node->Symbol->Storage == SC_PARAM)
return AsLdLocalVar(Node->Symbol, Node->Operation); return AsLdLocalVar(Node->Symbol, Node->Operation);
@ -199,11 +191,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
DeallocateAllRegisters(); DeallocateAllRegisters();
return -1; return -1;
/* case OP_LOOP:
// We only do while for now..
return AsWhile(Node);
break; */
case OP_BITAND: case OP_BITAND:
return AsBitwiseAND(LeftVal, RightVal); return AsBitwiseAND(LeftVal, RightVal);
@ -252,24 +239,31 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
* * * * R E G I S T E R M A N A G E M E N T * * * * * * * * R E G I S T E R M A N A G E M E N T * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// Set all Registers to unused.
void DeallocateAllRegisters() { void DeallocateAllRegisters() {
UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0; UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0;
} }
/*
* Search for an unused register, allocate it, and return it.
* If none available, cancel compilation.
*/
int RetrieveRegister() { int RetrieveRegister() {
//printf("Current state of registers: %x, %x, %x, %x\n", UsedRegisters[0], UsedRegisters[1], UsedRegisters[2], UsedRegisters[3]);
for (size_t i = 0; i < 4; i++) { for (size_t i = 0; i < 4; i++) {
if(UsedRegisters[i] == 0) { if(UsedRegisters[i] == 0) {
UsedRegisters[i] = 1; UsedRegisters[i] = 1;
return i; return i;
} }
} }
fprintf(stderr, "Out of registers!\n"); fprintf(stderr, "Out of registers!\n");
exit(1); exit(1);
} }
/*
* Set the given register to unused.
* If the register is not used, it is an invalid state.
* @param Register: The Registers index to deallocate.
*/
void DeallocateRegister(int Register) { void DeallocateRegister(int Register) {
if(UsedRegisters[Register] != 1) { if(UsedRegisters[Register] != 1) {
fprintf(stderr, "Error trying to free register %d\n", Register); fprintf(stderr, "Error trying to free register %d\n", Register);
@ -283,10 +277,25 @@ void DeallocateRegister(int Register) {
* * * * * * S T A C K M A N A G E M E N T * * * * * * * * * * * * S T A C K M A N A G E M E N T * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Prepare a new stack frame pointer.
* This resets the highest local.
*
*/
void AsNewStackFrame() { void AsNewStackFrame() {
LocalVarOffset = 0; LocalVarOffset = 0;
} }
/*
* Given the type of input, how far do we need to go down the stack frame
* to store or retrieve this type?
*
* The stack must be 4-bytes aligned, so we set a hard minimum.
*
* @param Type: The DataTypes we want to store.
* @return the offset to store the type, taking into account the current state of the stack frame.
*
*/
int AsCalcOffset(int Type) { int AsCalcOffset(int Type) {
LocalVarOffset += PrimitiveSize(Type) > 4 ? PrimitiveSize(Type) : 4; LocalVarOffset += PrimitiveSize(Type) > 4 ? PrimitiveSize(Type) : 4;
return -LocalVarOffset; return -LocalVarOffset;
@ -296,12 +305,19 @@ int AsCalcOffset(int Type) {
* * * * C O D E G E N E R A T I O N * * * * * * * * C O D E G E N E R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* A way to keep track of the largest label number.
* Call this function to increase the number SRG-like.
*
* @return the highest available label number
*
*/
int NewLabel(void) { int NewLabel(void) {
static int id = 1; static int id = 1;
return id++; return id++;
} }
// Assemble an If statement
int AsIf(struct ASTNode* Node) { int AsIf(struct ASTNode* Node) {
int FalseLabel, EndLabel; int FalseLabel, EndLabel;
@ -333,6 +349,7 @@ int AsIf(struct ASTNode* Node) {
return -1; return -1;
} }
// Assemble a comparison
int AsCompare(int Operation, int RegisterLeft, int RegisterRight) { int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight); printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight);
@ -346,6 +363,7 @@ int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
return RegisterRight; return RegisterRight;
} }
// Assemble an inverse comparison (a one-line jump)
int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) { int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) {
if(Operation < OP_EQUAL || Operation > OP_GREATE) if(Operation < OP_EQUAL || Operation > OP_GREATE)
Die("Bad Operation in AsCompareJmp"); Die("Bad Operation in AsCompareJmp");
@ -359,16 +377,24 @@ int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label)
return -1; return -1;
} }
// Assemble an immediate jump
void AsJmp(int Label) { void AsJmp(int Label) {
printf("\t\tJumping to label %d\n", Label); printf("\t\tJumping to label %d\n", Label);
fprintf(OutputFile, "\tjmp\tL%d\n", Label); fprintf(OutputFile, "\tjmp\tL%d\n", Label);
} }
/* Create a new base label
* @param Label: The number to create the label of
*/
void AsLabel(int Label) { void AsLabel(int Label) {
printf("\tCreating label %d\n", Label); printf("\tCreating label %d\n", Label);
fprintf(OutputFile, "\nL%d:\n", Label); fprintf(OutputFile, "\nL%d:\n", Label);
} }
/*
* Assemble a new global string into the data segment.
* @param Value: The name of the string, as a string
*/
int AsNewString(char* Value) { int AsNewString(char* Value) {
int Label = NewLabel(); int Label = NewLabel();
char* CharPtr; char* CharPtr;
@ -382,12 +408,17 @@ int AsNewString(char* Value) {
return Label; return Label;
} }
/*
* Load a string into a Register.
* @param ID: the Label number of the string
*/
int AsLoadString(int ID) { int AsLoadString(int ID) {
int Register = RetrieveRegister(); int Register = RetrieveRegister();
fprintf(OutputFile, "\tleaq\tL%d(\%%rip), %s\r\n", ID, Registers[Register]); fprintf(OutputFile, "\tleaq\tL%d(\%%rip), %s\r\n", ID, Registers[Register]);
return Register; return Register;
} }
// Assemble a While loop
int AsWhile(struct ASTNode* Node) { int AsWhile(struct ASTNode* Node) {
int BodyLabel, BreakLabel; int BodyLabel, BreakLabel;
@ -418,6 +449,7 @@ int AsWhile(struct ASTNode* Node) {
} }
// Load a value into a register.
int AsLoad(int Value) { int AsLoad(int Value) {
int Register = RetrieveRegister(); int Register = RetrieveRegister();
@ -428,6 +460,7 @@ int AsLoad(int Value) {
return Register; return Register;
} }
// Assemble an addition.
int AsAdd(int Left, int Right) { int AsAdd(int Left, int Right) {
printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]); printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]);
@ -437,6 +470,7 @@ int AsAdd(int Left, int Right) {
return Right; return Right;
} }
// Assemble a multiplication.
int AsMul(int Left, int Right) { int AsMul(int Left, int Right) {
printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]); printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]);
@ -446,6 +480,7 @@ int AsMul(int Left, int Right) {
return Right; return Right;
} }
// Assemble a subtraction.
int AsSub(int Left, int Right) { int AsSub(int Left, int Right) {
printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]); printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]); fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]);
@ -455,6 +490,7 @@ int AsSub(int Left, int Right) {
return Left; return Left;
} }
// Assemble a division.
int AsDiv(int Left, int Right) { int AsDiv(int Left, int Right) {
printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]); printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]); fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]);
@ -467,12 +503,18 @@ int AsDiv(int Left, int Right) {
return Left; return Left;
} }
// Assemble an ASL
int AsShl(int Register, int Val) { int AsShl(int Register, int Val) {
printf("\tShifting %s to the left by %d bits.\n", Registers[Register], Val); printf("\tShifting %s to the left by %d bits.\n", Registers[Register], Val);
fprintf(OutputFile, "\tsalq\t$%d, %s\n", Val, Registers[Register]); fprintf(OutputFile, "\tsalq\t$%d, %s\n", Val, Registers[Register]);
return Register; return Register;
} }
/*
* Load a global variable into a register, with optional pre/post-inc/dec
* @param Entry: The variable to load.
* @param Operation: An optional SyntaxOps element
*/
int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) { int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) {
int Reg = RetrieveRegister(); int Reg = RetrieveRegister();
@ -543,6 +585,11 @@ int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) {
return Reg; return Reg;
} }
/*
* Store a value from a register into a global variable.
* @param Entry: The variable to store into.
* @param Regsiter: The Registers index containing the value to store.
*/
int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) { int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) {
printf("\tStoring contents of %s into %s, type %d, globally:\n", Registers[Register], Entry->Name, Entry->Type); printf("\tStoring contents of %s into %s, type %d, globally:\n", Registers[Register], Entry->Name, Entry->Type);
@ -568,6 +615,12 @@ int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) {
return Register; return Register;
} }
/*
* Load a value from a local variable into a register, with optional post/pre-inc/dec
* @param Entry: The local variable to read
* @param Operation: An optional SyntaxOps entry
*/
int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) { int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) {
int Reg = RetrieveRegister(); int Reg = RetrieveRegister();
@ -637,7 +690,13 @@ int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) {
return Reg; return Reg;
} }
/*
* Store a value from a register into a local variable.
* @param Entry: The local variable to write to.
* @param Register: The Registers index containing the desired value
*
*/
int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) { int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) {
printf("\tStoring contents of %s into %s, type %d, locally\n", Registers[Register], Entry->Name, Entry->Type); printf("\tStoring contents of %s into %s, type %d, locally\n", Registers[Register], Entry->Name, Entry->Type);
@ -663,6 +722,7 @@ int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) {
return Register; return Register;
} }
// Assemble a pointerisation
int AsAddr(struct SymbolTableEntry* Entry) { int AsAddr(struct SymbolTableEntry* Entry) {
int Register = RetrieveRegister(); int Register = RetrieveRegister();
printf("\tSaving pointer of %s into %s\n", Entry->Name, Registers[Register]); printf("\tSaving pointer of %s into %s\n", Entry->Name, Registers[Register]);
@ -671,6 +731,7 @@ int AsAddr(struct SymbolTableEntry* Entry) {
return Register; return Register;
} }
// Assemble a dereference
int AsDeref(int Reg, int Type) { int AsDeref(int Reg, int Type) {
int DestSize = PrimitiveSize(ValueAt(Type)); int DestSize = PrimitiveSize(ValueAt(Type));
@ -693,6 +754,7 @@ int AsDeref(int Reg, int Type) {
return Reg; return Reg;
} }
// Assemble a store-through-dereference
int AsStrDeref(int Register1, int Register2, int Type) { int AsStrDeref(int Register1, int Register2, int Type) {
printf("\tStoring contents of %s into %s through a dereference, type %d\n", Registers[Register1], Registers[Register2], Type); printf("\tStoring contents of %s into %s through a dereference, type %d\n", Registers[Register1], Registers[Register2], Type);
@ -711,6 +773,7 @@ int AsStrDeref(int Register1, int Register2, int Type) {
return Register1; return Register1;
} }
// Assemble a global symbol (variable, struct, enum, function, string)
void AsGlobalSymbol(struct SymbolTableEntry* Entry) { void AsGlobalSymbol(struct SymbolTableEntry* Entry) {
int TypeSize; int TypeSize;
@ -732,6 +795,7 @@ void AsGlobalSymbol(struct SymbolTableEntry* Entry) {
} }
} }
// Assemble a function call, with all associated parameter bumping and stack movement.
int AsCallWrapper(struct ASTNode* Node) { int AsCallWrapper(struct ASTNode* Node) {
struct ASTNode* CompositeTree = Node->Left; struct ASTNode* CompositeTree = Node->Left;
int Register, Args = 0; int Register, Args = 0;
@ -747,6 +811,7 @@ int AsCallWrapper(struct ASTNode* Node) {
return AsCall(Node->Symbol, Args); return AsCall(Node->Symbol, Args);
} }
// Copy a function argument from Register to argument Position
void AsCopyArgs(int Register, int Position) { void AsCopyArgs(int Register, int Position) {
if(Position > 4) { // Args above 4 go on the stack if(Position > 4) { // Args above 4 go on the stack
fprintf(OutputFile, "\tpushq\t%s\n", Registers[Register]); fprintf(OutputFile, "\tpushq\t%s\n", Registers[Register]);
@ -755,6 +820,8 @@ void AsCopyArgs(int Register, int Position) {
} }
} }
// Assemble an actual function call.
// NOTE: this should not be called. Use AsCallWrapper.
int AsCall(struct SymbolTableEntry* Entry, int Args) { int AsCall(struct SymbolTableEntry* Entry, int Args) {
int OutRegister = RetrieveRegister(); int OutRegister = RetrieveRegister();
@ -771,6 +838,7 @@ int AsCall(struct SymbolTableEntry* Entry, int Args) {
return OutRegister; return OutRegister;
} }
// Assemble a function return.
int AsReturn(struct SymbolTableEntry* Entry, int Register) { int AsReturn(struct SymbolTableEntry* Entry, int Register) {
printf("\t\tCreating return for function %s\n", Entry->Name); printf("\t\tCreating return for function %s\n", Entry->Name);
@ -794,39 +862,46 @@ int AsReturn(struct SymbolTableEntry* Entry, int Register) {
} }
AsJmp(Entry->EndLabel); AsJmp(Entry->EndLabel);
} }
// Assemble a =?
int AsEqual(int Left, int Right) { int AsEqual(int Left, int Right) {
// Set the lowest bit if left = right // Set the lowest bit if left = right
return AsCompare(OP_EQUAL, Left, Right); return AsCompare(OP_EQUAL, Left, Right);
} }
// Assemble a !=
int AsIneq(int Left, int Right) { int AsIneq(int Left, int Right) {
// Set the lowest bit if left != right // Set the lowest bit if left != right
return AsCompare(OP_INEQ, Left, Right); return AsCompare(OP_INEQ, Left, Right);
} }
// Assemble a <
int AsLess(int Left, int Right) { int AsLess(int Left, int Right) {
// Set the lowest bit if left < right // Set the lowest bit if left < right
return AsCompare(OP_LESS, Left, Right); return AsCompare(OP_LESS, Left, Right);
} }
// Assemble a >
int AsGreat(int Left, int Right) { int AsGreat(int Left, int Right) {
// Set the lowest bit if left > right // Set the lowest bit if left > right
return AsCompare(OP_GREAT, Left, Right); return AsCompare(OP_GREAT, Left, Right);
} }
// Assemble a <=
int AsLessE(int Left, int Right) { int AsLessE(int Left, int Right) {
// Set the lowest bit if left <= right // Set the lowest bit if left <= right
return AsCompare(OP_LESSE, Left, Right); return AsCompare(OP_LESSE, Left, Right);
} }
// Assemble a =>
int AsGreatE(int Left, int Right) { int AsGreatE(int Left, int Right) {
// Set the lowest bit if left => right // Set the lowest bit if left => right
return AsCompare(OP_GREATE, Left, Right); return AsCompare(OP_GREATE, Left, Right);
} }
// Assemble a print statement
void AssemblerPrint(int Register) { void AssemblerPrint(int Register) {
printf("\t\tPrinting Register %s\n", Registers[Register]); printf("\t\tPrinting Register %s\n", Registers[Register]);
@ -837,34 +912,40 @@ void AssemblerPrint(int Register) {
DeallocateRegister(Register); DeallocateRegister(Register);
} }
// Assemble a &
int AsBitwiseAND(int Left, int Right) { int AsBitwiseAND(int Left, int Right) {
fprintf(OutputFile, "\tandq\t%s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\tandq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left); DeallocateRegister(Left);
return Right; return Right;
} }
// Assemble a |
int AsBitwiseOR(int Left, int Right) { int AsBitwiseOR(int Left, int Right) {
fprintf(OutputFile, "\torq\t%s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\torq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left); DeallocateRegister(Left);
return Right; return Right;
} }
// Assemble a ^
int AsBitwiseXOR(int Left, int Right) { int AsBitwiseXOR(int Left, int Right) {
fprintf(OutputFile, "\txorq\t%s, %s\n", Registers[Left], Registers[Right]); fprintf(OutputFile, "\txorq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left); DeallocateRegister(Left);
return Right; return Right;
} }
// Assemble a ~
int AsNegate(int Register) { int AsNegate(int Register) {
fprintf(OutputFile, "\tnegq\t%s\n", Registers[Register]); fprintf(OutputFile, "\tnegq\t%s\n", Registers[Register]);
return Register; return Register;
} }
// Assemble a !
int AsInvert(int Register) { int AsInvert(int Register) {
fprintf(OutputFile, "\tnotq\t%s\n", Registers[Register]); fprintf(OutputFile, "\tnotq\t%s\n", Registers[Register]);
return Register; return Register;
} }
// Assemble a !
int AsBooleanNOT(int Register) { int AsBooleanNOT(int Register) {
fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]); fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]);
fprintf(OutputFile, "\tsete\t%s\n", ByteRegisters[Register]); fprintf(OutputFile, "\tsete\t%s\n", ByteRegisters[Register]);
@ -872,6 +953,7 @@ int AsBooleanNOT(int Register) {
return Register; return Register;
} }
// Assemble a <<
int AsShiftLeft(int Left, int Right) { int AsShiftLeft(int Left, int Right) {
fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]); fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]);
fprintf(OutputFile, "\tshlq\t\%%cl, %s\n", Registers[Left]); fprintf(OutputFile, "\tshlq\t\%%cl, %s\n", Registers[Left]);
@ -879,6 +961,7 @@ int AsShiftLeft(int Left, int Right) {
return Left; return Left;
} }
// Assemble a >>
int AsShiftRight(int Left, int Right) { int AsShiftRight(int Left, int Right) {
fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]); fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]);
fprintf(OutputFile, "\tshrq\t\%%cl, %s\n", Registers[Left]); fprintf(OutputFile, "\tshrq\t\%%cl, %s\n", Registers[Left]);
@ -886,6 +969,8 @@ int AsShiftRight(int Left, int Right) {
return Left; return Left;
} }
// Assemble a conversion from arbitrary type to boolean.
// Facilitates if(ptr)
int AsBooleanConvert(int Register, int Operation, int Label) { int AsBooleanConvert(int Register, int Operation, int Label) {
fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]); fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]);
@ -903,6 +988,7 @@ int AsBooleanConvert(int Register, int Operation, int Label) {
return Register; return Register;
} }
// Assemble the start of an assembly file
void AssemblerPreamble() { void AssemblerPreamble() {
DeallocateAllRegisters(); DeallocateAllRegisters();
fputs( fputs(
@ -912,6 +998,15 @@ void AssemblerPreamble() {
OutputFile); OutputFile);
} }
/*
* Assemble a function block for the Entry.
* Handles all stack logic for local variables,
* as well as copying parameters out of registers and
* into the spill space.
*
* @param Entry: The function to generate
*
*/
void AsFunctionPreamble(struct SymbolTableEntry* Entry) { void AsFunctionPreamble(struct SymbolTableEntry* Entry) {
char* Name = Entry->Name; char* Name = Entry->Name;
struct SymbolTableEntry* Param, *Local; struct SymbolTableEntry* Param, *Local;
@ -958,6 +1053,8 @@ void AsFunctionPreamble(struct SymbolTableEntry* Entry) {
} }
// Assemble the epilogue of a function
void AsFunctionEpilogue(struct SymbolTableEntry* Entry) { void AsFunctionEpilogue(struct SymbolTableEntry* Entry) {
AsLabel(Entry->EndLabel); AsLabel(Entry->EndLabel);

View File

@ -7,6 +7,29 @@
#include <Data.h> #include <Data.h>
#include <errno.h> #include <errno.h>
/********************************************************************************
* The Delegate is what allows the compiler backend to be abstracted. *
* *
* It delegates the operation of compiling, assembling and linking *
* to the proper subsystems. *
* *
* As of right now (20/01/2021) it uses the GCC backend. *
* *
* Compile parses files to their AST and generates mingw PECOFF32+ assembly, *
* Assemble uses GCC-as to compile the assembly to an object file. *
* Link links the object files into an executable. *
* *
********************************************************************************/
/*
* Files inputted must have a suffix/extension (because we're on Windows right now)
* This is the way to change the suffix for when a file is converted to another.
*
* @param String: The full, current file name
* @param Suffix: The new, desired extension.
*
*/
char* Suffixate(char* String, char Suffix) { char* Suffixate(char* String, char Suffix) {
char* Pos, *NewStr; char* Pos, *NewStr;
@ -26,6 +49,22 @@ char* Suffixate(char* String, char Suffix) {
return NewStr; return NewStr;
} }
/*
* Starts most of the work to do with the Erythro compiler.
* It:
* Opens the input and output files,
* Parses the global symbols of the file, including function blocks.
* Generates the assembly representation of the source code
* Saves said assembly into the OutputFile
* Returns the name of the file containing the generated assembly.
* Note that the Input file must have a valid extension.
* For Erythro code, this is .er
* The generated assembly will have the extension .s
*
* @param InputFile: The filename of the Erythro Source code to compile
* @return the filename of the generated PECOFF32+ assembly
*/
char* Compile(char* InputFile) { char* Compile(char* InputFile) {
char* OutputName; char* OutputName;
OutputName = Suffixate(InputFile, 's'); OutputName = Suffixate(InputFile, 's');
@ -52,7 +91,7 @@ char* Compile(char* InputFile) {
if(OptVerboseOutput) if(OptVerboseOutput)
printf("Compiling %s\r\n", InputFile); printf("Compiling %s\r\n", InputFile);
Tokenise(&CurrentToken); Tokenise();
AssemblerPreamble(); AssemblerPreamble();
@ -62,6 +101,20 @@ char* Compile(char* InputFile) {
return OutputName; return OutputName;
} }
/*
* Processes the output from the Compile function.
* Passes the generated .s file to (currently, as of
* 21/01/2021), the GNU GAS assembler, to create an
* object file.
*
* It does this by invoking the command on a shell.
* TODO: fork it?
*
* @param InputFile: The .s assembly file to be processed
* @output the name of the generated object file.
*
*/
char* Assemble(char* InputFile) { char* Assemble(char* InputFile) {
char Command[TEXTLEN]; char Command[TEXTLEN];
int Error; int Error;
@ -85,6 +138,18 @@ char* Assemble(char* InputFile) {
return OutputName; return OutputName;
} }
/*
* Processes the outputted object files, turning them into an executable.
* It does this by invoking (currently, as of 21/01/2021) the GNU GCC
* compiler.
* It invokes GCC rather than LD so that it automatically links against
* libc and the CRT natives.
*
* @param Output: The desired name for the executable.
* @param Objects: A list of the Object files to be linked.
*
*/
void Link(char* Output, char* Objects[]) { void Link(char* Output, char* Objects[]) {
int Count, Size = TEXTLEN, Error; int Count, Size = TEXTLEN, Error;
char Command[TEXTLEN], *CommandPtr; char Command[TEXTLEN], *CommandPtr;
@ -112,7 +177,16 @@ void Link(char* Output, char* Objects[]) {
} }
} }
/*
* Prints information about the available flags and
* how to structure the command.
* @param ProgName: The name of the file that was
* attempted to run.
*/
void DisplayUsage(char* ProgName) { void DisplayUsage(char* ProgName) {
fprintf(stderr, "Erythro Compiler v5 - Gemwire Institute\n");
fprintf(stderr, "***************************************\n");
fprintf(stderr, "Usage: %s -[vcST] {-o output} file [file ...]\n", ProgName); fprintf(stderr, "Usage: %s -[vcST] {-o output} file [file ...]\n", ProgName);
fprintf(stderr, " -v: Verbose Output Level\n"); fprintf(stderr, " -v: Verbose Output Level\n");
fprintf(stderr, " -c: Compile without Linking\n"); fprintf(stderr, " -c: Compile without Linking\n");

View File

@ -12,6 +12,9 @@ static int GenerateSrg() {
return srgId++; return srgId++;
} }
/*
* Walk the Node tree, and dump the AST tree to stdout.
*/
void DumpTree(struct ASTNode* Node, int level) { void DumpTree(struct ASTNode* Node, int level) {
int Lfalse, Lstart, Lend; int Lfalse, Lstart, Lend;

View File

@ -11,10 +11,29 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * C H A R S T R E AM * * * * * * * * * * * * C H A R S T R E AM * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* The Lexer holds a "stream" of characters.
* You may read a character from the stream, and if it is not
* the desired character, it may be placed into an "overread" buffer.
* The overread buffer is checked before the source file is read any further.
* This provides an effective way to "un-read" a character.
*
* @param Char: The character to "un-read"
*
*/
static void ReturnCharToStream(int Char) { static void ReturnCharToStream(int Char) {
Overread = Char; Overread = Char;
} }
/*
* NextChar allows you to ask the Lexer for the next useful character.
* As mentioned above, it checks the overread buffer first.
*
* @return the character as int
*
*/
static int NextChar(void) { static int NextChar(void) {
int Char; int Char;
@ -32,6 +51,10 @@ static int NextChar(void) {
return Char; return Char;
} }
/*
* Searches for the next useful character, skipping whitespace.
* @return the character as int.
*/
static int FindChar() { static int FindChar() {
int Char; int Char;
@ -45,14 +68,31 @@ static int FindChar() {
return Char; return Char;
} }
/*
* Allows the conversion between ASCII, hex and numerals.
* @param String: The set of all valid results
* @param Char: The ASCII character to convert
* @return the ASCII character in int form, if in the set of valid results. -1 if not.
*/
static int FindDigitFromPos(char* String, char Char) { static int FindDigitFromPos(char* String, char Char) {
char* Result = strchr(String, Char); char* Result = strchr(String, Char);
return(Result ? Result - String : -1); return(Result ? Result - String : -1);
} }
/*
* Facilitates the easy checking of expected tokens.
* NOTE: there is (soon to be) an optional variant of this function that
* reads a token but does not consume it ( via Tokenise )
*
* @param Type: The expected token, in terms of value of the TokenTypes enum.
* @param TokenExpected: A string to output when the token is not found.
*
*/
void VerifyToken(int Type, char* TokenExpected) { void VerifyToken(int Type, char* TokenExpected) {
if(CurrentToken.type == Type) if(CurrentToken.type == Type)
Tokenise(&CurrentToken); Tokenise();
else { else {
printf("Expected %s on line %d\n", TokenExpected, Line); printf("Expected %s on line %d\n", TokenExpected, Line);
exit(1); exit(1);
@ -61,6 +101,12 @@ void VerifyToken(int Type, char* TokenExpected) {
static struct Token* RejectedToken = NULL; static struct Token* RejectedToken = NULL;
/*
* Rejected Tokens and the Overread Stream are identical concepts.
* This was implemented first, but it is no longer used.
* TODO: Refactor this function out.
*/
void RejectToken(struct Token* Token) { void RejectToken(struct Token* Token) {
if(RejectedToken != NULL) if(RejectedToken != NULL)
Die("Cannot reject two tokens in a row!"); Die("Cannot reject two tokens in a row!");
@ -72,6 +118,21 @@ void RejectToken(struct Token* Token) {
* * * * L I T E R A L S A N D I D E N T I F I E R S * * * * * * * * L I T E R A L S A N D I D E N T I F I E R S * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Facilitates the parsing of integer literals from the file.
* Currently only supports the decimal numbers, despite the
* FindDigitFromPos function allowing conversion.
*
* The functon loops over the characters, multiplying by 10 and adding
* the new value on top, until a non-numeric character is found.
* At that point, it returns the non-numeric character to the Overread Stream
* and returns the calculated number.
*
* @param Char: The first number to scan.
* @return the full parsed number as an int.
*
*/
static int ReadInteger(int Char) { static int ReadInteger(int Char) {
int CurrentChar = 0; int CurrentChar = 0;
int IntegerValue = 0; int IntegerValue = 0;
@ -86,7 +147,23 @@ static int ReadInteger(int Char) {
return IntegerValue; return IntegerValue;
} }
// Variable identifier, keyword, function. /*
* An Identifier can be any of:
* * A function name
* * A variable name
* * A struct name
* / A class name
* / An annotation name
*
* This function allows a full name to be read into a buffer, with a defined
* start character and a defined maximum text size limit.
*
* @param Char: The first char of the Identifier.
* @param Buffer: The location to store the Identifier. (usually CurrentIdentifer, a compiler global defined for this purpose)
* @param Limit: The maximum Identifer length.
* @return the length of the parsed identifier
*
*/
static int ReadIdentifier(int Char, char* Buffer, int Limit) { static int ReadIdentifier(int Char, char* Buffer, int Limit) {
int ind = 0; int ind = 0;
@ -108,6 +185,17 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit) {
return ind; return ind;
} }
/*
* Char literals appear as 'x'
*
* They are bounded by two apostrophes.
* They can contain any 1-byte ASCII character, as well as some
* predefined, standard escape codes.
* This function attempts to get the character from the file, with escape codes intact.
*
* @return the character as an int
*
*/
static int ReadCharLiteral() { static int ReadCharLiteral() {
int Char; int Char;
Char = NextChar(); Char = NextChar();
@ -131,7 +219,20 @@ static int ReadCharLiteral() {
return Char; return Char;
} }
/*
* String literals appear as "hello world"
*
* They are bounded by two quotation marks.
* They can contain an arbitrary length of text.
* They are backed by an array of chars (hence the char* type) and thus
* have a practically unlimited length.
*
* To read a String Literal, it is a simple matter of reading Char Literals until
* the String termination token is identified - the last quotation mark.
*
* @param Buffer: The buffer into which to write the string. (usually CurrentIdentifer, a compiler global defined for this purpose)
*
*/
static int ReadStringLiteral(char* Buffer) { static int ReadStringLiteral(char* Buffer) {
int Char; int Char;
@ -148,9 +249,18 @@ static int ReadStringLiteral(char* Buffer) {
} }
/* /*
* This function is what defines the valid keywords for the language * Keywords are source-code tokens / strings that are reserved for the compiler.
* //TODO: move this to a static list? * They cannot be used as identifers on their own.
* //TODO: More optimisations? *
* This function is where all of the keywords are added, and where most aliases are going to be stored.
*
* It uses a switch on the first character of the input string as an optimisation - rather than checking each
* keyword against the String individually, it only needs to compare a single number. This can be optimised into
* a hash table by the compiler for further optimisation, making this one of the fastest ways to switch
* on a full string.
*
* @param Str: The keyword input to try to parse
* @return the token expressed in terms of values of the TokenTypes enum
* *
*/ */
static int ReadKeyword(char* Str) { static int ReadKeyword(char* Str) {
@ -203,7 +313,6 @@ static int ReadKeyword(char* Str) {
break; break;
case 'p': case 'p':
// This is a huge optimisation once we have as many keywords as a fully featured language.
if(!strcmp(Str, "print")) if(!strcmp(Str, "print"))
return KW_PRINT; return KW_PRINT;
break; break;
@ -212,6 +321,11 @@ static int ReadKeyword(char* Str) {
if(!strcmp(Str, "return")) if(!strcmp(Str, "return"))
return KW_RETURN; return KW_RETURN;
break; break;
case 's':
if(!strcmp(Str, "struct"))
return KW_STRUCT;
break;
case 'v': case 'v':
if(!strcmp(Str, "void")) if(!strcmp(Str, "void"))
@ -234,8 +348,21 @@ static int ReadKeyword(char* Str) {
* * * * T O K E N I S E R * * * * * * * * T O K E N I S E R * * * *
* * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * */
int Tokenise(struct Token* Token) { /*
* Handles the majority of the work of reading tokens into the stream.
* It reads chars with FindChar, categorizing individual characters or small
* strings into their proper expression (as a value of the TokenTypes enum)
*
* It also defers the reading of numeric literals and char literals to the proper functions.
*
* If needed, it can also read Identifiers, for variable or function naming.
*
* This function may be the main bottleneck in the lexer.
*
*/
void Tokenise() {
int Char, TokenType; int Char, TokenType;
struct Token* Token = &CurrentToken;
if(RejectedToken != NULL) { if(RejectedToken != NULL) {
Token = RejectedToken; Token = RejectedToken;
@ -439,7 +566,5 @@ int Tokenise(struct Token* Token) {
DieChar("Unrecognized character", Char); DieChar("Unrecognized character", Char);
} }
return 1;
} }

View File

@ -73,48 +73,54 @@ char* TokenNames[] = {
"While keyword", "While keyword",
"For keyword", "For keyword",
"Return keyword" "Return keyword",
"Struct keyword"
}; };
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
/* Line = 1; // Option initialisers
Overread = '\n';
CurrentGlobal = 0;
struct ASTNode* Node;
CurrentLocal = SYMBOLS - 1; */
OptDumpTree = false; OptDumpTree = false;
OptKeepAssembly = false; OptKeepAssembly = false;
OptAssembleFiles = false; OptAssembleFiles = false;
OptLinkFiles = true; OptLinkFiles = true;
OptVerboseOutput = false; OptVerboseOutput = false;
// Temporary .o storage and counter
char* ObjectFiles[100]; char* ObjectFiles[100];
int ObjectCount = 0; int ObjectCount = 0;
// Parse command line arguments.
int i; int i;
for(i = 1; i < argc; i++) { for(i = 1/*skip 0*/; i < argc; i++) {
if(*argv[i] != '-') // not a flag // If we're not a flag, we can skip.
// We only care about flags in rows.
// ie. erc >> -v -T -o << test.exe src/main.er
if(*argv[i] != '-')
break; break;
// Once we identify a flag, we need to make sure it's not just a minus in-place.
for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) {
// Finally, identify what option is being invoked.
switch(argv[i][j]) { switch(argv[i][j]) {
case 'o': case 'o': // output
OutputFileName = argv[++i]; OutputFileName = argv[++i];
break; break;
case 'T': case 'T': // Debug
OptDumpTree = true; OptDumpTree = true;
break; break;
case 'c': case 'c': // Compile only
OptAssembleFiles = true; OptAssembleFiles = true;
OptKeepAssembly = false; OptKeepAssembly = false;
OptLinkFiles = false; OptLinkFiles = false;
break; break;
case 'S': case 'S': // aSsemble only
OptAssembleFiles = false; OptAssembleFiles = false;
OptKeepAssembly = true; OptKeepAssembly = true;
OptLinkFiles = false; OptLinkFiles = false;
break; break;
case 'v': case 'v': // Verbose output
OptVerboseOutput = true; OptVerboseOutput = true;
break; break;
default: default:
@ -123,29 +129,42 @@ int main(int argc, char* argv[]) {
} }
} }
if(i >= argc) // If we didn't provide anything other than flags, we need to show how to use the program.
if(i >= argc)
DisplayUsage(argv[0]); DisplayUsage(argv[0]);
// For the rest of the files specified, we can iterate them right to left.
while(i < argc) { while(i < argc) {
// Compile the file by invoking the Delegate
CurrentASMFile = Compile(argv[i]); CurrentASMFile = Compile(argv[i]);
if(OptLinkFiles || OptAssembleFiles) { if(OptLinkFiles || OptAssembleFiles) {
// If we need to assemble (or link, which requires assembly)
// then we invoke the Delegate again
CurrentObjectFile = Assemble(CurrentASMFile); CurrentObjectFile = Assemble(CurrentASMFile);
// We can only keep track of 99 objects, so we should crash at 98 to ensure we have enough room for the output file too.
if(ObjectCount == 98) { if(ObjectCount == 98) {
fprintf(stderr, "Too many inputs"); fprintf(stderr, "Too many inputs");
return 1; return 1; // We use return because we're in main, rather than invoking Die.
} }
// Move the ObjectCount forward.
ObjectFiles[ObjectCount++] = CurrentObjectFile; ObjectFiles[ObjectCount++] = CurrentObjectFile;
// Clear the new, forwarded index
ObjectFiles[ObjectCount] = NULL; ObjectFiles[ObjectCount] = NULL;
} }
if(!OptKeepAssembly) if(!OptKeepAssembly)
// unlink = delete
unlink(CurrentASMFile); unlink(CurrentASMFile);
i++; i++;
} }
if(OptLinkFiles) { if(OptLinkFiles) {
// If needed, invoke the Delegate one last time.
Link(OutputFileName, ObjectFiles); Link(OutputFileName, ObjectFiles);
if(!OptAssembleFiles) { if(!OptAssembleFiles) {
// Even though we need to assemble to link, we can respect the user's options and delete the intermediary files.
for(i = 0; ObjectFiles[i] != NULL; i++) for(i = 0; ObjectFiles[i] != NULL; i++)
unlink(ObjectFiles[i]); unlink(ObjectFiles[i]);
} }
@ -155,6 +174,11 @@ int main(int argc, char* argv[]) {
} }
/*
* Akin to a Halt and Catch Fire method.
* Simply prints an error, cleans up handles, and closes.
*/
void Die(char* Error) { void Die(char* Error) {
fprintf(stderr, "%s on line %d\n", Error, Line); fprintf(stderr, "%s on line %d\n", Error, Line);
fclose(OutputFile); fclose(OutputFile);
@ -162,6 +186,9 @@ void Die(char* Error) {
exit(1); exit(1);
} }
/*
* A variant of Die with an extra String attached.
*/
void DieMessage(char* Error, char* Reason) { void DieMessage(char* Error, char* Reason) {
fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line); fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line);
fclose(OutputFile); fclose(OutputFile);
@ -169,6 +196,9 @@ void DieMessage(char* Error, char* Reason) {
exit(1); exit(1);
} }
/*
* A variant of Die with an extra integer attached.
*/
void DieDecimal(char* Error, int Number) { void DieDecimal(char* Error, int Number) {
fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line); fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line);
fclose(OutputFile); fclose(OutputFile);
@ -176,6 +206,9 @@ void DieDecimal(char* Error, int Number) {
exit(1); exit(1);
} }
/*
* A variant of Die with an extra character attached.
*/
void DieChar(char* Error, int Char) { void DieChar(char* Error, int Char) {
fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line); fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line);
fclose(OutputFile); fclose(OutputFile);

View File

@ -10,12 +10,10 @@
#include "Data.h" #include "Data.h"
/* /*
* Precedence is directly related to Token Type. * The Precedence of an operator is directly related to Token Type.
* * Precedence determines how soon the operator and its surrounding values
* enum TokenTypes { * will be calculated and aliased.
* LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT * This allows for things like the common Order of Operations.
* };
*
*/ */
static int Precedence[] = { static int Precedence[] = {
0, 10, // EOF, ASSIGN 0, 10, // EOF, ASSIGN
@ -30,6 +28,13 @@ static int Precedence[] = {
110 // / 110 // /
}; };
/*
* Handles gathering the precedence of an operator from its token,
* in terms of values of the TokenTypes enum.
*
* Error handling is also done here, so that EOF or non-operators are not executed.
*
*/
static int OperatorPrecedence(int Token) { static int OperatorPrecedence(int Token) {
int Prec = Precedence[Token]; int Prec = Precedence[Token];
@ -40,6 +45,13 @@ static int OperatorPrecedence(int Token) {
return Prec; return Prec;
} }
/*
* If the value is a right-expression, or in other words is right associative,
* then it can be safely calculated beforehand and aliased to a value.
* In this case, we can try to alias (or constant fold) everything on the right side
* of an assignment.
*/
static int IsRightExpr(int Token) { static int IsRightExpr(int Token) {
return (Token == LI_EQUAL); return (Token == LI_EQUAL);
} }
@ -48,6 +60,29 @@ static int IsRightExpr(int Token) {
* * * N O D E C O N S T R U C T I O N * * * * * * N O D E C O N S T R U C T I O N * * *
* * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * */
/*
* ASTNodes form the structure of the language that moves the bulk of
* data around within the compiler.
* They contain:
* * An Operation (usually 1:1 with an input token),
* * A Type (to identify the size of data it contains),
* * Two more Left and Right ASTNodes (to form a doubly-linked list)
* * An extra Middle ASTNode in case it is needed (typically in the middle case of a For loop)
* * A Symbol Table Entry
* * An Integer Value
* * A flag to determine whether this node (and its sub-nodes) contain a right associative or Rval
*
* This is the only function where they are constructed.
*
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Left: The Node that is attached to the left side branch of this root.
* @param Middle: The Node that is attached to the middle of this root, if applicable.
* @param Right: The Node that is attached to the right side branch of this root.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTNode(int Operation, int Type, struct ASTNode* ConstructASTNode(int Operation, int Type,
struct ASTNode* Left, struct ASTNode* Left,
struct ASTNode* Middle, struct ASTNode* Middle,
@ -75,10 +110,28 @@ struct ASTNode* ConstructASTNode(int Operation, int Type,
} }
/*
* AST Leaves are categorized by their lack of child nodes.
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) { struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) {
return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue); return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue);
} }
/*
* AST Branches are categorized by having only one child node.
* These are sometimes called Unary Branches.
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Left: The Node that is attached to the left side branch of this root.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) { struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) {
return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue); return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue);
} }
@ -89,10 +142,10 @@ struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left
* * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * */
/* /*
* Take a Token Type, and convert it to an AST-Node Operation. * TokenTypes and SyntaxOps are mostly 1:1, so some minor effort can ensure that
* * these are synchronized well.
* TokenTypes and SyntaxOps are synchronized to make this easy. * This allows the parsing operation to be little more than a bounds check.
* * Otherwise, this would be a gigantic switch statement.
*/ */
int ParseTokenToOperation(int Token) { int ParseTokenToOperation(int Token) {
@ -103,11 +156,13 @@ int ParseTokenToOperation(int Token) {
} }
/* /*
* Parse a primary (terminal) expression. * Primary expressions may be any one of:
* This currently handles literal expressions, constructing a leaf node * * A terminal integer literal
* and handing control back up the chain. * * A terminal string literal
* * * A variable
* * A collection of expressions bounded by parentheses.
* *
* @return the AST Node that represents this expression
*/ */
struct ASTNode* ParsePrimary(void) { struct ASTNode* ParsePrimary(void) {
@ -134,7 +189,7 @@ struct ASTNode* ParsePrimary(void) {
case LI_LPARE: case LI_LPARE:
// Starting a ( expr ) block // Starting a ( expr ) block
Tokenise(&CurrentToken); Tokenise();
Node = ParsePrecedenceASTNode(0); Node = ParsePrecedenceASTNode(0);
@ -144,12 +199,26 @@ struct ASTNode* ParsePrimary(void) {
} }
Tokenise(&CurrentToken); Tokenise();
return Node; return Node;
} }
/*
* Parse a single binary expression.
* It ensures that these expressions are parsed to their full extent, that
* the order of operations is upheld, that the precedence of the prior
* iteration is considered, and that every error is handled.
*
* This is where all of the right-associative statements are folded, where
* type mismatches and widening are handled properly, and that all parsing
* is over by the time the end tokens ") } ] ;" are encountered.
*
* @param PreviousTokenPrecedence: The precedence of the operator to the left.
* @return the AST Node corresponding to this block.
*
*/
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
struct ASTNode* LeftNode, *RightNode; struct ASTNode* LeftNode, *RightNode;
struct ASTNode* LeftTemp, *RightTemp; struct ASTNode* LeftTemp, *RightTemp;
@ -159,25 +228,19 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
LeftNode = PrefixStatement(); LeftNode = PrefixStatement();
NodeType = CurrentToken.type; NodeType = CurrentToken.type;
//printf("%d\r\n", CurrentToken.type);
if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) { if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) {
//printf("Current token matches ; ) ]\r\n");
LeftNode->RVal = 1; return LeftNode; LeftNode->RVal = 1; return LeftNode;
} }
//printf("Current token has value %d, type %s\n", CurrentToken.value, TokenNames[CurrentToken.type]);
while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) { while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) {
//printf("inside while\n"); Tokenise();
Tokenise(&CurrentToken);
if(CurrentToken.type == LI_RPARE) if(CurrentToken.type == LI_RPARE)
break; break;
RightNode = ParsePrecedenceASTNode(Precedence[NodeType]); RightNode = ParsePrecedenceASTNode(Precedence[NodeType]);
/*
LeftType = LeftNode->ExprType;
RightType = RightNode->ExprType;
*/
/** /**
* While parsing this node, we may need to widen some types. * While parsing this node, we may need to widen some types.
* This requires a few functions and checks. * This requires a few functions and checks.
@ -194,9 +257,6 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
if(LeftNode == NULL) if(LeftNode == NULL)
Die("Incompatible Expression encountered in assignment"); Die("Incompatible Expression encountered in assignment");
//printf("\tAssigning variable: %s value %d\n", Symbols[FindSymbol(CurrentIdentifier)].Name, RightNode->Value.IntValue);
// LeftNode holds the target, the target variable in this case // LeftNode holds the target, the target variable in this case
printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name); printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name);
@ -212,11 +272,9 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
LeftNode->RVal = 1; LeftNode->RVal = 1;
RightNode->RVal = 1; RightNode->RVal = 1;
//printf("mutate left\r\n");
LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType); LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType);
//printf("mutate right\r\n");
RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType); RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType);
//printf("mutate right over\r\n");
/** /**
* If both are null, the types are incompatible. * If both are null, the types are incompatible.
*/ */
@ -267,105 +325,21 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
} }
/* struct ASTNode* ParseMultiplicativeASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParsePrimary();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) {
Tokenise(&CurrentToken);
RightNode = ParsePrimary();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* struct ASTNode* ParseAdditiveASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParseMultiplicativeASTNode();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while(1) {
Tokenise(&CurrentToken);
RightNode = ParseMultiplicativeASTNode();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* * * * * * * * * * * * * * * * * * * * * * * *
* * * * I N T E R P R E T A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * */
/*
int ParseAST(struct ASTNode* Node) {
int LeftVal, RightVal;
if(Node->Left)
LeftVal = ParseAST(Node->Left);
if(Node->Right)
RightVal = ParseAST(Node->Right);
/*
if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
switch(Node->Operation) {
case OP_ADD:
return (LeftVal + RightVal);
case OP_SUBTRACT:
return (LeftVal - RightVal);
case OP_MULTIPLY:
return (LeftVal * RightVal);
case OP_DIVIDE:
return (LeftVal / RightVal);
case REF_IDENT:
case TERM_INTLITERAL:
return Node->Value.IntValue;
default:
fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation);
exit(1);
}
}
*/
/* * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * *
* * * * F U N C T I O N S * * * * * * * * F U N C T I O N S * * * *
* * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * */
/*
* Handles the logic for calling a function.
* This is invoked by an identifier being recognized, followed by a "(.*)" string.
*
* It simply checks that the function exists, that the parameters given are valid,
* and generates the AST Node for calling it.
*
* @return the AST Node for calling the function stored in CurrentIdentifer
*
*/
struct ASTNode* CallFunction() { struct ASTNode* CallFunction() {
struct ASTNode* Tree; struct ASTNode* Tree;
struct SymbolTableEntry* Function; struct SymbolTableEntry* Function;
@ -385,6 +359,21 @@ struct ASTNode* CallFunction() {
return Tree; return Tree;
} }
/*
* An expression list is used:
* * In the call to a function
*
* It is parsed by seeking left parentheses "(", parsing binary expressions
* until either a comma or a right parentheses is found.
*
* The former will cause another expression to be parsed, the latter will cause
* parsing to stop.
*
* @return the AST Node representing every expression in the list, glued end to
* end with a COMPOSITE operation.
*
*/
struct ASTNode* GetExpressionList() { struct ASTNode* GetExpressionList() {
struct ASTNode* Tree = NULL, *Child = NULL; struct ASTNode* Tree = NULL, *Child = NULL;
int Count; int Count;
@ -397,7 +386,7 @@ struct ASTNode* GetExpressionList() {
switch(CurrentToken.type) { switch(CurrentToken.type) {
case LI_COM: case LI_COM:
Tokenise(&CurrentToken); Tokenise();
break; break;
case LI_RPARE: case LI_RPARE:
break; break;
@ -414,6 +403,18 @@ struct ASTNode* GetExpressionList() {
* * * * S T A T E M E N T S * * * * * * * * S T A T E M E N T S * * * *
* * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * */
/*
* Handles parsing an individual statement.
*
* It serves as a wrapper around:
* * If Statement
* * While Statement
* * For Statement
* * Return Statement
* * Numeric literals and variables
* * Binary Expressions
* @return the AST Node representing this single statement
*/
struct ASTNode* ParseStatement(void) { struct ASTNode* ParseStatement(void) {
int Type; int Type;
@ -425,18 +426,10 @@ struct ASTNode* ParseStatement(void) {
printf("\t\tNew Variable: %s\n", CurrentIdentifier); printf("\t\tNew Variable: %s\n", CurrentIdentifier);
Type = ParseOptionalPointer(); Type = ParseOptionalPointer();
VerifyToken(TY_IDENTIFIER, "ident"); VerifyToken(TY_IDENTIFIER, "ident");
BeginVariableDeclaration(Type, SC_LOCAL); BeginVariableDeclaration(Type, NULL, SC_LOCAL);
VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment? VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment?
return NULL; return NULL;
/*case TY_IDENTIFIER:
if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC)
printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
else
printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
return ParseIdentifier();
*/
case KW_IF: case KW_IF:
return IfStatement(); return IfStatement();
@ -451,11 +444,26 @@ struct ASTNode* ParseStatement(void) {
default: default:
ParsePrecedenceASTNode(0); ParsePrecedenceASTNode(0);
//DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type);
} }
} }
/*
* Handles parsing multiple statements or expressions in a row.
* These are typically grouped together with the Compound tokens "{ }"
* and seperated by the semicolon ";".
*
* Single Statements are parsed until a semicolon is reached, at which
* point another statement will be parsed, or until a Right Compound
* token is reached ("}"), at which point parsing will stop.
*
* It is useful for:
* * Tightly identifying related blocks of code
* * Containing the many statements of functions
*
* @return the AST Node representing this compound statement
*
*/
struct ASTNode* ParseCompound() { struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, *Tree; struct ASTNode* Left = NULL, *Tree;
@ -486,6 +494,21 @@ struct ASTNode* ParseCompound() {
} }
} }
/*
* This is the entry point to the parser/lexer.
*
* By definition, Global definitions are accessible anywhere.
* As of right now (20/01/2021), classe are unimplemented.
* This means that all functions and all function prototypes are globally scoped.
*
* You may also define variables, constants, preprocessor directives and other text
* in the global scope.
*
* The function itself loops, parsing either variables or functions, until it
* reaches the end of the file.
*
*/
void ParseGlobals() { void ParseGlobals() {
struct ASTNode* Tree; struct ASTNode* Tree;
int Type, FunctionComing; int Type, FunctionComing;
@ -516,7 +539,7 @@ void ParseGlobals() {
} }
} else { } else {
printf("\tParsing global variable declaration\n"); printf("\tParsing global variable declaration\n");
BeginVariableDeclaration(Type, SC_GLOBAL); BeginVariableDeclaration(Type, NULL, SC_GLOBAL);
VerifyToken(LI_SEMIC, ";"); VerifyToken(LI_SEMIC, ";");
} }

View File

@ -7,6 +7,34 @@
#include <Defs.h> #include <Defs.h>
#include <Data.h> #include <Data.h>
/****************************************************************
* Types are enumerated by the DataTypes enum. *
* They are represented by unsigned integers, where the *
* most significant 28 bits differentiate the raw type *
* of the data being encoded. *
* However, the least significant nibble - that is, *
* the lowest 4 bits, represent the count of indirection. *
* *
* This means that a raw Integer data type, such as an i32, *
* has the DataType representation 32. *
* However, a pointer to an Integer has DataType value 32+1, *
* or 33. *
* *
* This means that the maximum valid pointer level is 16. *
* That's a: *
* ****************int *
* That ought to be enough for everyone, right? *
* *
****************************************************************/
/*
* Adds 1 to the input Type, to add a level of indirection.
* If the indirection is already at 16 levels, it aborts.
*
* @param Type: The DataType to pointerise
* @return the new pointerised DataType value.
*/
int PointerTo(int Type) { int PointerTo(int Type) {
if((Type & 0xf) == 0xf) if((Type & 0xf) == 0xf)
DieDecimal("Unrecognized type in pointerisation", Type); DieDecimal("Unrecognized type in pointerisation", Type);
@ -14,30 +42,59 @@ int PointerTo(int Type) {
return (Type + 1); return (Type + 1);
} }
/*
* Returns the underlying type behind a pointer.
* If the type is not a pointer (the lowest 4 bits are 0), it halts compliation.
*
* @param Type: The type to un-dereference
* @return the underlying Type
*/
int ValueAt(int Type) { int ValueAt(int Type) {
printf("\t\tDereferencing a %s\n", TypeNames(Type)); printf("\t\tDereferencing a %s\n", TypeNames(Type));
//TODO: this is still bullshittery!
if((Type & 0xf) == 0x0) if((Type & 0xf) == 0x0)
DieDecimal("Unrecognized type in defererencing", Type); DieDecimal("Unrecognized type in defererencing", Type);
return (Type - 1); return (Type - 1);
} }
int ParseOptionalPointer() { /*
* Type declarations may be raw, they may be pointers.
* If they are pointers, we need to be able to check
* how many levels of indirection.
* However, being a pointer is optional.
*
* This can parase in just a lone type specifier, or
* any valid level of indirection therefore.
*
* @param Composite: unused
* @return the parsed DataType, with any indirection.
*
*/
int ParseOptionalPointer(struct SymbolTableEntry** Composite) {
int Type; int Type;
switch(CurrentToken.type) { switch(CurrentToken.type) {
case TY_VOID: case TY_VOID:
Type = RET_VOID; Type = RET_VOID;
Tokenise();
break; break;
case TY_CHAR: case TY_CHAR:
Type = RET_CHAR; Type = RET_CHAR;
Tokenise();
break; break;
case TY_INT: case TY_INT:
Type = RET_INT; Type = RET_INT;
Tokenise();
break; break;
case TY_LONG: case TY_LONG:
Type = RET_LONG; Type = RET_LONG;
Tokenise();
break;
case KW_STRUCT:
Type = DAT_STRUCT;
*Composite = BeginStructDeclaration();
break; break;
default: default:
DieDecimal("Illegal type for pointerisation", CurrentToken.type); DieDecimal("Illegal type for pointerisation", CurrentToken.type);
@ -47,17 +104,30 @@ int ParseOptionalPointer() {
// x = **y; // x = **y;
// possible. // possible.
while(1) { while(1) {
Tokenise(&CurrentToken); Tokenise();
printf("\t\t\tType on parsing is %d\n", CurrentToken.type); printf("\t\t\tType on parsing is %d\n", CurrentToken.type);
if(CurrentToken.type != AR_STAR) if(CurrentToken.type != AR_STAR)
break; break;
Type = PointerTo(Type); Type = PointerTo(Type);
// Tokenise(); TODO: is this skipping pointers?
} }
return Type; return Type;
} }
/*
* Array Accesses come in the form of x[y].
*
* x must be a pointer type, and an array structure.
* y can be any binary expression.
*
* It is a wrapper around *((imax*)x + y).
*
* @return the AST Node that represents this statement.
*/
struct ASTNode* AccessArray() { struct ASTNode* AccessArray() {
struct ASTNode* LeftNode, *RightNode; struct ASTNode* LeftNode, *RightNode;
struct SymbolTableEntry* Entry; struct SymbolTableEntry* Entry;
@ -67,9 +137,7 @@ struct ASTNode* AccessArray() {
DieMessage("Accessing undeclared array", CurrentIdentifier); DieMessage("Accessing undeclared array", CurrentIdentifier);
LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0); LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0);
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]); Tokenise();
Tokenise(&CurrentToken);
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
RightNode = ParsePrecedenceASTNode(0); RightNode = ParsePrecedenceASTNode(0);

View File

@ -8,9 +8,27 @@
#include <Data.h> #include <Data.h>
#include <stdbool.h> #include <stdbool.h>
static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) { /*
* Handles reading in a comma-separated list of declarations.
* Erythro treats structs, enums and function parameters the same in this regard -
* comma separated.
*
* C and C++ tend to treat enums and structs differently - the former separated by commas,
* the latter separated by semicolons.
*
* Note that since functions are read in through parentheses, and structs/enums are read in
* through brackets, the end character is configurable.
*
* @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable.
* @param Storage: The Storage Scope of this declaration list.
* @param End: The end token, in terms of TokenTypes enum values.
* @return the amount of declarations read in.
*
*/
static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) {
int TokenType, ParamCount = 0; int TokenType, ParamCount = 0;
struct SymbolTableEntry* PrototypePointer = NULL; struct SymbolTableEntry* PrototypePointer = NULL, *Composite;
if(FunctionSymbol != NULL) if(FunctionSymbol != NULL)
PrototypePointer = FunctionSymbol->Start; PrototypePointer = FunctionSymbol->Start;
@ -24,19 +42,15 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
DieDecimal("Function paramater of invalid type at index", ParamCount + 1); DieDecimal("Function paramater of invalid type at index", ParamCount + 1);
PrototypePointer=PrototypePointer->NextSymbol; PrototypePointer=PrototypePointer->NextSymbol;
} else { } else {
BeginVariableDeclaration(TokenType, SC_PARAM); BeginVariableDeclaration(TokenType, Composite, Storage);
} }
ParamCount++; ParamCount++;
switch(CurrentToken.type) { if((CurrentToken.type != LI_COM) && (CurrentToken.type != End))
case LI_COM: DieDecimal("Unexpected token in parameter", CurrentToken.type);
Tokenise(&CurrentToken);
break; if(CurrentToken.type == LI_COM)
case LI_RPARE: Tokenise();
break;
default:
DieDecimal("Unexpected token in parameter", CurrentToken.type);
}
} }
if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length)) if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length))
@ -45,6 +59,61 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
return ParamCount; return ParamCount;
} }
/*
* Handles the declaration of a new struct.
* struct thisStct { int x, int y, int z };
*
* Verifies that the current identifier is not used,
* verifies that this is not a redefinition (excluding
* the case where there is a declaration but no definition)
* and then saves it into the Structs symbol table.
*
* @return the Symbol Table entry of this new struct.
*/
struct SymbolTableEntry* BeginStructDeclaration() {
struct SymbolTableEntry* Composite = NULL, *Member;
int Offset;
Tokenise();
if(CurrentToken.type == TY_IDENTIFIER) {
Composite = FindStruct(CurrentIdentifier);
Tokenise();
}
if(CurrentToken.type != LI_LBRAC) {
if(Composite == NULL)
DieMessage("Unknown Struct", CurrentIdentifier);
return Composite;
}
if(Composite)
DieMessage("Redefinition of struct", CurrentIdentifier);
Composite = AddSymbol(CurrentIdentifier, DAT_STRUCT, 0, SC_STRUCT, 0, 0, NULL);
Tokenise();
ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAS);
VerifyToken(LI_RBRAS, "]");
Composite->Start = StructMembers;
StructMembers = StructMembersEnd = NULL;
Member = Composite->Start;
Member->SinkOffset = 0;
Offset = TypeSize(Member->Type, Member->CompositeType);
for(Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) {
Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1);
Offset += TypeSize(Member->Type, Member->CompositeType);
}
Composite->Length = Offset;
return Composite;
}
/* /*
* Handles the declaration of a type of a variable. * Handles the declaration of a type of a variable.
* int newVar; * int newVar;
@ -52,11 +121,12 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
* It verifies that we have a type keyword followed by a * It verifies that we have a type keyword followed by a
* unique, non-keyword identifier. * unique, non-keyword identifier.
* *
* It then stores this variable into the symbol table, * It then stores this variable into the appropriate symbol table,
* and returns the new item. * and returns the new item.
* *
* @return the Symbol Table entry of this new variable.
*/ */
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) { struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) {
struct SymbolTableEntry* Symbol = NULL; struct SymbolTableEntry* Symbol = NULL;
switch(Scope) { switch(Scope) {
@ -66,33 +136,50 @@ struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) {
case SC_LOCAL: case SC_LOCAL:
case SC_PARAM: case SC_PARAM:
if(FindLocal(CurrentIdentifier) != NULL) if(FindLocal(CurrentIdentifier) != NULL)
DieMessage("Invalid redelcaration of local variable", CurrentIdentifier); DieMessage("Invalid redeclaration of local variable", CurrentIdentifier);
case SC_MEMBER:
if(FindMember(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier);
} }
if(CurrentToken.type == LI_LBRAS) { if(CurrentToken.type == LI_LBRAS) {
Tokenise(&CurrentToken); Tokenise();
if(CurrentToken.type == LI_INT) { if(CurrentToken.type == LI_INT) {
switch(Scope) { switch(Scope) {
case SC_GLOBAL: case SC_GLOBAL:
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0); Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL);
break; break;
case SC_LOCAL: case SC_LOCAL:
case SC_PARAM: case SC_PARAM:
case SC_MEMBER:
Die("Local arrays are unimplemented"); Die("Local arrays are unimplemented");
} }
} }
Tokenise(&CurrentToken); Tokenise();
VerifyToken(LI_RBRAS, "]"); VerifyToken(LI_RBRAS, "]");
} else { } else {
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0); Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite);
} }
return Symbol; return Symbol;
} }
/*
* Handles the declaration of a new function.
* Verifies that the identifier is not taken (excluding the case
* where there is a declaration but no definition)
* Parses the list of parameters if present
* Saves the function prototype if there is no body
* Generates and saves the break-out point label
*
* @param Type: The return type of the function
* @return the AST for this function
*
*/
struct ASTNode* ParseFunction(int Type) { struct ASTNode* ParseFunction(int Type) {
struct ASTNode* Tree; struct ASTNode* Tree;
struct ASTNode* FinalStatement; struct ASTNode* FinalStatement;
@ -104,7 +191,7 @@ struct ASTNode* ParseFunction(int Type) {
OldFunction = NULL; OldFunction = NULL;
if(OldFunction == NULL) { if(OldFunction == NULL) {
BreakLabel = NewLabel(); BreakLabel = NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0); NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
} }
VerifyToken(LI_LPARE, "("); VerifyToken(LI_LPARE, "(");
@ -120,7 +207,7 @@ struct ASTNode* ParseFunction(int Type) {
Params = ParamsEnd = NULL; Params = ParamsEnd = NULL;
if(CurrentToken.type == LI_SEMIC) { if(CurrentToken.type == LI_SEMIC) {
Tokenise(&CurrentToken); Tokenise();
return NULL; return NULL;
} }
@ -149,7 +236,6 @@ struct ASTNode* ParseFunction(int Type) {
* //TODO: No brackets * //TODO: No brackets
* //TODO: Type inference * //TODO: Type inference
* *
*
*/ */
struct ASTNode* ReturnStatement() { struct ASTNode* ReturnStatement() {
@ -166,19 +252,10 @@ struct ASTNode* ReturnStatement() {
Tree = ParsePrecedenceASTNode(0); Tree = ParsePrecedenceASTNode(0);
/*
ReturnType = Tree->ExprType;
FunctionType = Symbols[CurrentFunction].Type;
*/
Tree = MutateType(Tree, FunctionEntry->Type, 0); Tree = MutateType(Tree, FunctionEntry->Type, 0);
if(Tree == NULL) if(Tree == NULL)
Die("Returning a value of incorrect type for function"); Die("Returning a value of incorrect type for function");
/*
if(ReturnType)
Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0);
*/
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0); Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0);
@ -189,59 +266,33 @@ struct ASTNode* ReturnStatement() {
return Tree; return Tree;
} }
/* /*
* Handles Identifiers. * Handles the surrounding logic for If statements.
* *
* This is called for any of: * If statements have the basic form:
* - Calling a function * * if (condition) body
* - Assigning an lvalue variable * * if (condition)
* - Performing arithmetic on a variable * body
* - Performing arithmetic with the return values of function calls * * if (condition) {
* body
* }
* *
* For the case where you're assigning an l-value; * Conditions may be any truthy statement (such as a pointer,
* You can assign with another assignment, * object, integer), as conditions not recognized are auto-
* a statement, a function or a literal. * matically converted to booleans.
*
* This meaning, any object that can be resolved to 0 or NULL
* can be placed as the condition and used as a check.
*
* For example:
* struct ASTNode* Node = NULL;
* if(Node) {
* // This will not run, as Node is ((void*)0)
* }
* *
*/ */
/*
struct ASTNode* ParseIdentifier() {
struct ASTNode* Left, *Right, *Tree;
int LeftType, RightType;
int ID;
VerifyToken(TY_IDENTIFIER, "ident");
printf("\t\tAfter parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier));
if(CurrentToken.type == LI_LPARE)
return CallFunction();
if((ID = FindSymbol(CurrentIdentifier)) == -1) {
printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name);
DieMessage("Undeclared Variable ", CurrentIdentifier);
}
Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID);
VerifyToken(LI_EQUAL, "=");
Left = ParsePrecedenceASTNode(0);
LeftType = Left->ExprType;
RightType = Right->ExprType;
Left = MutateType(Left, RightType, 0);
if(!Left)
Die("Incompatible types in assignment");
if(LeftType)
Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0);
Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0);
return Tree;
}*/
struct ASTNode* IfStatement() { struct ASTNode* IfStatement() {
struct ASTNode* Condition, *True, *False = NULL; struct ASTNode* Condition, *True, *False = NULL;
@ -261,13 +312,39 @@ struct ASTNode* IfStatement() {
True = ParseCompound(); True = ParseCompound();
if(CurrentToken.type == KW_ELSE) { if(CurrentToken.type == KW_ELSE) {
Tokenise(&CurrentToken); Tokenise();
False = ParseCompound(); False = ParseCompound();
} }
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0); return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0);
} }
/*
* Handles the surrounding logic for While loops.
*
* While loops have the basic form:
* while ( condition ) { body }
*
* When reaching the condition (which alike an If statement,
* can be any truthy value), if it resolves to true:
* The body is executed, and immediately the condition is checked
* again.
* This repeats until the condition resolves false, at which point
* the loop executes no more.
*
* This can be prototyped as the following pseudo-assembler:
*
* cond:
* check <condition>
* jne exit
* <body>
* jump cond
* exit:
* <more code>
*
* @return the AST of this statement
*
*/
struct ASTNode* WhileStatement() { struct ASTNode* WhileStatement() {
struct ASTNode* Condition, *Body; struct ASTNode* Condition, *Body;
@ -287,12 +364,36 @@ struct ASTNode* WhileStatement() {
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0); return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0);
} }
/*
* Handles the surrounding logic for For loops.
*
* They have the basic form of:
* for ( init ; condition; iterator) { body }
*
* The initialiser is run only once upon reaching the for loop.
* Then the condition is checked, and if true, the body is executed.
* After execution of the body, the iterator is run and the condition
* checked again.
*
* It can be prototyped as the following pseudo-assembler code:
*
* for:
* <init>
* cond:
* check <condition>
* jne exit
* <body>
* <iterator>
* jump cond
* exit:
* <loop exit>
*
* In the case of the implementation, "init" is the preoperator,
* "iterator" is the postoperator.
*
* @return the AST of this statement
*/
struct ASTNode* ForStatement() { struct ASTNode* ForStatement() {
// for (preop; condition; postop) {
// body
//}
struct ASTNode* Condition, *Body; struct ASTNode* Condition, *Body;
struct ASTNode* Preop, *Postop; struct ASTNode* Preop, *Postop;
@ -326,6 +427,18 @@ struct ASTNode* ForStatement() {
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0); return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0);
} }
/*
* Handles the surrounding logic for the Print statement.
*
* This is a legacy hold-over from the early testing, and it
* serves merely as a wrapper around the cstdlib printf function.
*
* It does, however (//TODO), attempt to guess the type that you
* want to print, which takes a lot of the guesswork out of printing.
*
* @return the AST of this statement
*/
struct ASTNode* PrintStatement(void) { struct ASTNode* PrintStatement(void) {
struct ASTNode* Tree; struct ASTNode* Tree;
int LeftType, RightType; int LeftType, RightType;
@ -342,7 +455,7 @@ struct ASTNode* PrintStatement(void) {
DieDecimal("Attempting to print an invalid type:", RightType); DieDecimal("Attempting to print an invalid type:", RightType);
if(RightType) if(RightType)
Tree = ConstructASTBranch(RightType, RET_INT, Tree, NULL, 0); Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0);
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0); Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0);
@ -352,17 +465,34 @@ struct ASTNode* PrintStatement(void) {
} }
/*
* Handles the surrounding logic for all of the logical and semantic
* postfixes.
*
* Postfixes are tokens that are affixed to the end of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of postfixes:
* * (): Call a function
* * []: Index or define an array.
* * ++: Increment a variable AFTER it is returned
* NOTE: there is a prefix variant of this for incrementing BEFOREhand.
* * --: Decrement a variable AFTER it is returned
* NOTE: there is a prefix variant of this for decrementing BEFOREhand.
*
* Planned postfixes:
* * >>: Arithmetic-Shift-Right a variable by one (Divide by two)
* NOTE: there is a prefix variant of this for shifting left - multiplying by two.
*
* @return the AST of the statement plus its' postfix
*/
struct ASTNode* PostfixStatement() { struct ASTNode* PostfixStatement() {
struct ASTNode* Tree; struct ASTNode* Tree;
struct SymbolTableEntry* Entry; struct SymbolTableEntry* Entry;
Tokenise(&CurrentToken); Tokenise();
// If we get here, we're one of three things:
// - Function
// - Array
// - Variable
if(CurrentToken.type == LI_LPARE) if(CurrentToken.type == LI_LPARE)
return CallFunction(); return CallFunction();
@ -370,8 +500,8 @@ struct ASTNode* PostfixStatement() {
return AccessArray(); return AccessArray();
// If we get here, we must be a variable. // If we get here, we must be a variable.
// There's no guarantees that the variable is in // (as functions have been called and arrays have been indexed)
// the symbol table, though. // Check that the variable is recognized..
if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR) if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR)
DieMessage("Unknown Variable", CurrentIdentifier); DieMessage("Unknown Variable", CurrentIdentifier);
@ -380,11 +510,11 @@ struct ASTNode* PostfixStatement() {
switch(CurrentToken.type) { switch(CurrentToken.type) {
case PPMM_PLUS: case PPMM_PLUS:
Tokenise(&CurrentToken); Tokenise();
Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0); Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0);
break; break;
case PPMM_MINUS: case PPMM_MINUS:
Tokenise(&CurrentToken); Tokenise();
Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0); Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0);
break; break;
default: default:
@ -395,33 +525,58 @@ struct ASTNode* PostfixStatement() {
} }
/*
* Handles the surrounding logic for all of the logical and semantic
* prefixes.
*
* Prefixes are tokens that are affixed to the start of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of prefixes:
* * !: Invert the boolean result of a statement or truthy value.
* * ~: Invert the individual bits in a number
* * -: Invert the number around the axis of 0 (negative->positive, positive->negative)
* * ++: Increment a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for incrementing AFTER the fact.
* * --: Decrement a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for decrementing AFTER the fact.
* * &: Dereference the following object (Get the address that contains it)
* * *: Get the object pointed at by the number following
*
* Planned prefixes:
* * <<: Arithmetic-Shift-Left a variable by one (Multiply by two)
* NOTE: there is a postfix variant of this for shifting right - dividing by two.
*
* @return the AST of this statement, plus its' prefixes and any postfixes.
*/
struct ASTNode* PrefixStatement() { struct ASTNode* PrefixStatement() {
struct ASTNode* Tree; struct ASTNode* Tree;
switch (CurrentToken.type) { switch (CurrentToken.type) {
case BOOL_INVERT: case BOOL_INVERT:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
Tree->RVal = 1; Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0); Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0);
break; break;
case BIT_NOT: case BIT_NOT:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
Tree->RVal = 1; Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0); Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0);
break; break;
case AR_MINUS: case AR_MINUS:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0); Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0);
break; break;
case PPMM_PLUS: case PPMM_PLUS:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
if(Tree->Operation != REF_IDENT) if(Tree->Operation != REF_IDENT)
@ -430,7 +585,7 @@ struct ASTNode* PrefixStatement() {
break; break;
case PPMM_MINUS: case PPMM_MINUS:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
if(Tree->Operation != REF_IDENT) if(Tree->Operation != REF_IDENT)
@ -440,7 +595,7 @@ struct ASTNode* PrefixStatement() {
break; break;
case BIT_AND: case BIT_AND:
Tokenise(&CurrentToken); Tokenise();
// To allow things like: // To allow things like:
// x = &&y; // x = &&y;
@ -454,7 +609,7 @@ struct ASTNode* PrefixStatement() {
Tree->ExprType = PointerTo(Tree->ExprType); Tree->ExprType = PointerTo(Tree->ExprType);
break; break;
case AR_STAR: case AR_STAR:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();

View File

@ -78,6 +78,28 @@ struct SymbolTableEntry* FindGlobal(char* Symbol) {
return SearchList(Symbol, Globals); return SearchList(Symbol, Globals);
} }
/*
* An override for FindSymbol.
* Searches only the defined Structs.
* @param Symbol: The string name of the symbol to search for.
* @return a pointer to the node if found, else NULL
*
*/
struct SymbolTableEntry* FindStruct(char* Symbol) {
return SearchList(Symbol, Structs);
}
/*
* An override for FindSymbol.
* Searches only the defined Struct & Enum Members.
* @param Symbol: The string name of the symbol to search for.
* @return a pointer to the node if found, else NULL
*
*/
struct SymbolTableEntry* FindMember(char* Symbol) {
return SearchList(Symbol, StructMembers);
}
/* /*
* Given a particular linked list, * Given a particular linked list,
* Take Node and append it to the Tail. * Take Node and append it to the Tail.
@ -112,6 +134,7 @@ void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail
void FreeLocals() { void FreeLocals() {
Locals = LocalsEnd = NULL; Locals = LocalsEnd = NULL;
Params = ParamsEnd = NULL; Params = ParamsEnd = NULL;
FunctionEntry = NULL;
} }
@ -122,6 +145,8 @@ void ClearTables() {
Globals = GlobalsEnd = NULL; Globals = GlobalsEnd = NULL;
Locals = LocalsEnd = NULL; Locals = LocalsEnd = NULL;
Params = ParamsEnd = NULL; Params = ParamsEnd = NULL;
StructMembers = StructMembersEnd = NULL;
Structs = StructsEnd = NULL;
} }
@ -136,34 +161,7 @@ void ClearTables() {
* *
* @return The SymbolTableEntry* pointer that corresponds to this newly constructed node. * @return The SymbolTableEntry* pointer that corresponds to this newly constructed node.
*/ */
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset) { struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType) {
/* int TableSlot;
int SinkOffset = 0;
if((TableSlot = FindSymbolImpl(Name, Storage)) != -1)
return -1;
// Instaed of spliting this up into AddLocalSymbol and AddGlobalSymbol,
// we can use this switch to avoid duplicated code.
switch(Storage) {
case SC_PARAM:
// Instead of special casing parameters, we can just add these to the symbol lists and be done with it.
printf("\tPreparing new parameter %s of type %s\r\n", Name, TypeNames[Type]);
TableSlot = AddSymbol(Name, Type, Structure, SC_GLOBAL, 88, 1);
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the global process
TableSlot = AddSymbol(Name, Type, Structure, SC_LOCAL, 88, 1);
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the local process
return TableSlot;
case SC_GLOBAL:
TableSlot = NewGlobalSymbol();
break;
case SC_LOCAL:
printf("\tCreating new local symbol %s\r\n", Name);
TableSlot = NewLocalSymbol();
SinkOffset = AsCalcOffset(Type);
break;
} */
struct SymbolTableEntry* Node = struct SymbolTableEntry* Node =
(struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry)); (struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry));
@ -174,33 +172,28 @@ struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Stor
Node->Storage = Storage; Node->Storage = Storage;
Node->Length = Length; Node->Length = Length;
Node->SinkOffset = SinkOffset; Node->SinkOffset = SinkOffset;
Node->CompositeType = CompositeType;
switch(Storage) { switch(Storage) {
case SC_GLOBAL: case SC_GLOBAL:
AppendSymbol(&Globals, &GlobalsEnd, Node); AppendSymbol(&Globals, &GlobalsEnd, Node);
// We don't want to generate a static block for functions.
if(Structure != ST_FUNC) AsGlobalSymbol(Node); if(Structure != ST_FUNC) AsGlobalSymbol(Node);
break; break;
case SC_STRUCT:
AppendSymbol(&Structs, &StructsEnd, Node);
break;
case SC_MEMBER:
AppendSymbol(&StructMembers, &StructMembersEnd, Node);
case SC_LOCAL: case SC_LOCAL:
AppendSymbol(&Locals, &LocalsEnd, Node); AppendSymbol(&Locals, &LocalsEnd, Node);
break; break;
case SC_PARAM: case SC_PARAM:
AppendSymbol(&Params, &ParamsEnd, Node); AppendSymbol(&Params, &ParamsEnd, Node);
break; break;
} }
/* // NOTE: Generating global symbol names must happen AFTER the name and type are declared.
switch(Storage) {
case SC_GLOBAL:
printf("\tCreating new global symbol %s into slot %d\r\n", Name, TableSlot);
if(Structure != ST_FUNC && EndLabel != 88) { // Magic keyword so that we don't generate ASM globals for parameters
printf("\t\tGenerating data symbol.\r\n");
AsGlobalSymbol(TableSlot);
}
break;
case SC_LOCAL:
break;
} */
//printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot);
return Node; return Node;
} }