Compare commits
3 Commits
eb118db872
...
e19a945934
Author | SHA1 | Date | |
---|---|---|---|
e19a945934 | |||
01d293f2c6 | |||
18b5da209d |
|
@ -18,6 +18,11 @@
|
|||
extern_ struct SymbolTableEntry* Globals, *GlobalsEnd;
|
||||
extern_ struct SymbolTableEntry* Locals, *LocalsEnd;
|
||||
extern_ struct SymbolTableEntry* Params, *ParamsEnd;
|
||||
extern_ struct SymbolTableEntry* Structs, *StructsEnd;
|
||||
extern_ struct SymbolTableEntry* StructMembers, *StructMembersEnd;
|
||||
|
||||
extern_ struct SymbolTableEntry* Unions, *UnionsEnd;
|
||||
extern_ struct SymbolTableEntry* Enums, *EnumsEnd;
|
||||
|
||||
extern_ bool OptDumpTree;
|
||||
extern_ bool OptKeepAssembly;
|
||||
|
|
|
@ -92,7 +92,8 @@ enum TokenTypes {
|
|||
KW_ELSE,
|
||||
KW_WHILE,
|
||||
KW_FOR,
|
||||
KW_RETURN
|
||||
KW_RETURN,
|
||||
KW_STRUCT
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -178,7 +179,6 @@ struct ASTNode {
|
|||
union {
|
||||
int Size; // OP_SCALE's linear representation
|
||||
int IntValue; // TERM_INTLIT's Value
|
||||
int ID; // LV_IDENT's Symbols[] index.
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -215,6 +215,9 @@ struct SymbolTableEntry {
|
|||
|
||||
enum StorageScope {
|
||||
SC_GLOBAL = 1, // Global Scope
|
||||
SC_STRUCT, // Struct Definitions
|
||||
SC_ENUM, // Enum Definitions
|
||||
SC_MEMBER, // The members of Structs or Enums
|
||||
//SC_CLASS, // Class-local definitions
|
||||
//SC_STATIC, // Static storage definitions
|
||||
SC_PARAM, // Function parameters
|
||||
|
@ -274,7 +277,7 @@ void DisplayUsage(char* ProgName);
|
|||
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
|
||||
int Tokenise(struct Token* Token);
|
||||
void Tokenise();
|
||||
|
||||
void VerifyToken(int Type, char* TokenExpected);
|
||||
void RejectToken(struct Token* Token);
|
||||
|
@ -355,14 +358,15 @@ struct ASTNode* PrintStatement(void);
|
|||
struct SymbolTableEntry* FindSymbol(char* Symbol);
|
||||
struct SymbolTableEntry* FindLocal(char* Symbol);
|
||||
struct SymbolTableEntry* FindGlobal(char* Symbol);
|
||||
struct SymbolTableEntry* FindStruct(char* Symbol);
|
||||
struct SymbolTableEntry* FindMember(char* Symbol);
|
||||
|
||||
void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node);
|
||||
|
||||
void FreeLocals();
|
||||
void ClearTables();
|
||||
|
||||
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset);
|
||||
|
||||
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType);
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* * * * C O N T R O L S T A T U S * * * *
|
||||
|
@ -460,7 +464,7 @@ void AsFunctionEpilogue(struct SymbolTableEntry* Entry);
|
|||
* * * * D E C L A R A T I O N * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope);
|
||||
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope);
|
||||
struct ASTNode* ParseIdentifier(void);
|
||||
|
||||
struct ASTNode* IfStatement();
|
||||
|
|
195
src/Assembler.c
195
src/Assembler.c
|
@ -9,16 +9,17 @@
|
|||
|
||||
|
||||
/*
|
||||
* If the entry in UsedRegisters
|
||||
* that correlates to the position of a register in Registers
|
||||
* is 1,
|
||||
* then that register is classed as used -
|
||||
* it has useful data inside it.
|
||||
* Stores how many hardware registers are being used at any one time.
|
||||
* It is empirically proven that only 4 clobber registers are
|
||||
* needed for any arbitrary length program.
|
||||
*
|
||||
* If UsedRegisters[i] =? 1, then Registers[i] contains useful data.
|
||||
* If UsedRegisters[i] =? 0, then Registers[i] is unused.
|
||||
*
|
||||
* if the entry is 0, then it is free.
|
||||
*/
|
||||
|
||||
static int UsedRegisters[4];
|
||||
|
||||
/* The https://en.wikipedia.org/wiki/X86_calling_conventions#Microsoft_x64_calling_convention
|
||||
* calling convention on Windows requires that
|
||||
* the last 4 arguments are placed in registers
|
||||
|
@ -26,25 +27,43 @@ static int UsedRegisters[4];
|
|||
* This order must be preserved, and they must be placed
|
||||
* right to left.
|
||||
*
|
||||
* That is the reason for the weird arrangement here.
|
||||
* The parameter registers are last, in reverse order.
|
||||
* The 4 clobber registers are first, and the 4 parameter registers are last.
|
||||
*/
|
||||
static char* Registers[8] = { "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" };
|
||||
static char* DoubleRegisters[8] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" };
|
||||
static char* ByteRegisters[8] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" };
|
||||
|
||||
/*
|
||||
* For ease of reading later code, we store the valid x86 comparison instructions,
|
||||
* and the inverse jump instructions together, in a synchronized fashion.
|
||||
*/
|
||||
static char* Registers[10] = { "%rsi", "%rdi", "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" };
|
||||
static char* DoubleRegisters[10] = { "%esi", "%edi", "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" };
|
||||
static char* ByteRegisters[10] = { "%sil", "%dil", "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" };
|
||||
|
||||
static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" };
|
||||
static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"};
|
||||
|
||||
// How far above the base pointer is the last local?
|
||||
static int LocalVarOffset;
|
||||
// How far must we lower the base pointer to retrieve the parameters?
|
||||
static int StackFrameOffset;
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* * * * R O O T O F A S S E M B L E R * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
// Just a short "hack" to make sure we only dump the tree the first time this function is called
|
||||
static int Started = 0;
|
||||
|
||||
/*
|
||||
* Walk the AST tree given, and generate the assembly code that represents
|
||||
* it.
|
||||
*
|
||||
* @param Node: The current Node to compile. If needed, its children will be parsed recursively.
|
||||
* @param Register: The index of Registers to store the result of the current compilation.
|
||||
* @param ParentOp: The Operation of the parent of the current Node.
|
||||
*
|
||||
* @return dependant on the Node. Typically the Register that stores the result of the Node's operation.
|
||||
*
|
||||
*/
|
||||
int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
|
||||
int LeftVal, RightVal;
|
||||
if(!Started && OptDumpTree)
|
||||
|
@ -83,14 +102,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
|
|||
if(Node->Right)
|
||||
RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation);
|
||||
|
||||
|
||||
/* if(Node->Operation == TERM_INTLITERAL)
|
||||
printf("int %d\n", Node->IntValue);
|
||||
else
|
||||
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
|
||||
|
||||
*/
|
||||
|
||||
switch(Node->Operation) {
|
||||
case OP_ADD:
|
||||
return AsAdd(LeftVal, RightVal);
|
||||
|
@ -141,31 +152,13 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
|
|||
|
||||
case OP_WIDEN:
|
||||
printf("\tWidening types..\r\n");
|
||||
return LeftVal; //AsWiden(LeftVal, Node->Left->ExprType, Node->ExprType);
|
||||
return LeftVal;
|
||||
|
||||
case OP_RET:
|
||||
printf("\tReturning from %s\n", Node->Symbol->Name);
|
||||
AsReturn(FunctionEntry, LeftVal);
|
||||
return -1;
|
||||
|
||||
/* case OP_EQUAL:
|
||||
return AsEqual(LeftVal, RightVal);
|
||||
|
||||
case OP_INEQ:
|
||||
return AsIneq(LeftVal, RightVal);
|
||||
|
||||
case OP_LESS:
|
||||
return AsLess(LeftVal, RightVal);
|
||||
|
||||
case OP_GREAT:
|
||||
return AsGreat(LeftVal, RightVal);
|
||||
|
||||
case OP_LESSE:
|
||||
return AsLessE(LeftVal, RightVal);
|
||||
|
||||
case OP_GREATE:
|
||||
return AsGreatE(LeftVal, RightVal); */
|
||||
|
||||
case OP_EQUAL:
|
||||
case OP_INEQ:
|
||||
case OP_LESS:
|
||||
|
@ -179,7 +172,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
|
|||
|
||||
|
||||
case REF_IDENT:
|
||||
//printf("\tReferencing variable %s %s with type %s and storage %d\r\n", Symbols[Node->Value.ID].Name, Node->RVal ? " rval " : "", ParentOp, Symbols[Node->Value.ID].Storage);
|
||||
if(Node->RVal || ParentOp == OP_DEREF) {
|
||||
if(Node->Symbol->Storage == SC_LOCAL || Node->Symbol->Storage == SC_PARAM)
|
||||
return AsLdLocalVar(Node->Symbol, Node->Operation);
|
||||
|
@ -199,11 +191,6 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
|
|||
DeallocateAllRegisters();
|
||||
return -1;
|
||||
|
||||
/* case OP_LOOP:
|
||||
// We only do while for now..
|
||||
return AsWhile(Node);
|
||||
break; */
|
||||
|
||||
case OP_BITAND:
|
||||
return AsBitwiseAND(LeftVal, RightVal);
|
||||
|
||||
|
@ -252,24 +239,31 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
|
|||
* * * * R E G I S T E R M A N A G E M E N T * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
// Set all Registers to unused.
|
||||
void DeallocateAllRegisters() {
|
||||
UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for an unused register, allocate it, and return it.
|
||||
* If none available, cancel compilation.
|
||||
*/
|
||||
int RetrieveRegister() {
|
||||
//printf("Current state of registers: %x, %x, %x, %x\n", UsedRegisters[0], UsedRegisters[1], UsedRegisters[2], UsedRegisters[3]);
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
if(UsedRegisters[i] == 0) {
|
||||
UsedRegisters[i] = 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "Out of registers!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the given register to unused.
|
||||
* If the register is not used, it is an invalid state.
|
||||
* @param Register: The Registers index to deallocate.
|
||||
*/
|
||||
void DeallocateRegister(int Register) {
|
||||
if(UsedRegisters[Register] != 1) {
|
||||
fprintf(stderr, "Error trying to free register %d\n", Register);
|
||||
|
@ -283,10 +277,25 @@ void DeallocateRegister(int Register) {
|
|||
* * * * * * S T A C K M A N A G E M E N T * * * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* Prepare a new stack frame pointer.
|
||||
* This resets the highest local.
|
||||
*
|
||||
*/
|
||||
void AsNewStackFrame() {
|
||||
LocalVarOffset = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the type of input, how far do we need to go down the stack frame
|
||||
* to store or retrieve this type?
|
||||
*
|
||||
* The stack must be 4-bytes aligned, so we set a hard minimum.
|
||||
*
|
||||
* @param Type: The DataTypes we want to store.
|
||||
* @return the offset to store the type, taking into account the current state of the stack frame.
|
||||
*
|
||||
*/
|
||||
int AsCalcOffset(int Type) {
|
||||
LocalVarOffset += PrimitiveSize(Type) > 4 ? PrimitiveSize(Type) : 4;
|
||||
return -LocalVarOffset;
|
||||
|
@ -296,12 +305,19 @@ int AsCalcOffset(int Type) {
|
|||
* * * * C O D E G E N E R A T I O N * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* A way to keep track of the largest label number.
|
||||
* Call this function to increase the number SRG-like.
|
||||
*
|
||||
* @return the highest available label number
|
||||
*
|
||||
*/
|
||||
int NewLabel(void) {
|
||||
static int id = 1;
|
||||
return id++;
|
||||
}
|
||||
|
||||
|
||||
// Assemble an If statement
|
||||
int AsIf(struct ASTNode* Node) {
|
||||
int FalseLabel, EndLabel;
|
||||
|
||||
|
@ -333,6 +349,7 @@ int AsIf(struct ASTNode* Node) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
// Assemble a comparison
|
||||
int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
|
||||
printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight);
|
||||
|
||||
|
@ -346,6 +363,7 @@ int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
|
|||
return RegisterRight;
|
||||
}
|
||||
|
||||
// Assemble an inverse comparison (a one-line jump)
|
||||
int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) {
|
||||
if(Operation < OP_EQUAL || Operation > OP_GREATE)
|
||||
Die("Bad Operation in AsCompareJmp");
|
||||
|
@ -359,16 +377,24 @@ int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label)
|
|||
return -1;
|
||||
}
|
||||
|
||||
// Assemble an immediate jump
|
||||
void AsJmp(int Label) {
|
||||
printf("\t\tJumping to label %d\n", Label);
|
||||
fprintf(OutputFile, "\tjmp\tL%d\n", Label);
|
||||
}
|
||||
|
||||
/* Create a new base label
|
||||
* @param Label: The number to create the label of
|
||||
*/
|
||||
void AsLabel(int Label) {
|
||||
printf("\tCreating label %d\n", Label);
|
||||
fprintf(OutputFile, "\nL%d:\n", Label);
|
||||
}
|
||||
|
||||
/*
|
||||
* Assemble a new global string into the data segment.
|
||||
* @param Value: The name of the string, as a string
|
||||
*/
|
||||
int AsNewString(char* Value) {
|
||||
int Label = NewLabel();
|
||||
char* CharPtr;
|
||||
|
@ -382,12 +408,17 @@ int AsNewString(char* Value) {
|
|||
return Label;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a string into a Register.
|
||||
* @param ID: the Label number of the string
|
||||
*/
|
||||
int AsLoadString(int ID) {
|
||||
int Register = RetrieveRegister();
|
||||
fprintf(OutputFile, "\tleaq\tL%d(\%%rip), %s\r\n", ID, Registers[Register]);
|
||||
return Register;
|
||||
}
|
||||
|
||||
// Assemble a While loop
|
||||
int AsWhile(struct ASTNode* Node) {
|
||||
int BodyLabel, BreakLabel;
|
||||
|
||||
|
@ -418,6 +449,7 @@ int AsWhile(struct ASTNode* Node) {
|
|||
|
||||
}
|
||||
|
||||
// Load a value into a register.
|
||||
int AsLoad(int Value) {
|
||||
int Register = RetrieveRegister();
|
||||
|
||||
|
@ -428,6 +460,7 @@ int AsLoad(int Value) {
|
|||
return Register;
|
||||
}
|
||||
|
||||
// Assemble an addition.
|
||||
int AsAdd(int Left, int Right) {
|
||||
printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]);
|
||||
fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]);
|
||||
|
@ -437,6 +470,7 @@ int AsAdd(int Left, int Right) {
|
|||
return Right;
|
||||
}
|
||||
|
||||
// Assemble a multiplication.
|
||||
int AsMul(int Left, int Right) {
|
||||
printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]);
|
||||
fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]);
|
||||
|
@ -446,6 +480,7 @@ int AsMul(int Left, int Right) {
|
|||
return Right;
|
||||
}
|
||||
|
||||
// Assemble a subtraction.
|
||||
int AsSub(int Left, int Right) {
|
||||
printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]);
|
||||
fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]);
|
||||
|
@ -455,6 +490,7 @@ int AsSub(int Left, int Right) {
|
|||
return Left;
|
||||
}
|
||||
|
||||
// Assemble a division.
|
||||
int AsDiv(int Left, int Right) {
|
||||
printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]);
|
||||
fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]);
|
||||
|
@ -467,12 +503,18 @@ int AsDiv(int Left, int Right) {
|
|||
return Left;
|
||||
}
|
||||
|
||||
// Assemble an ASL
|
||||
int AsShl(int Register, int Val) {
|
||||
printf("\tShifting %s to the left by %d bits.\n", Registers[Register], Val);
|
||||
fprintf(OutputFile, "\tsalq\t$%d, %s\n", Val, Registers[Register]);
|
||||
return Register;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a global variable into a register, with optional pre/post-inc/dec
|
||||
* @param Entry: The variable to load.
|
||||
* @param Operation: An optional SyntaxOps element
|
||||
*/
|
||||
int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) {
|
||||
int Reg = RetrieveRegister();
|
||||
|
||||
|
@ -543,6 +585,11 @@ int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) {
|
|||
return Reg;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a value from a register into a global variable.
|
||||
* @param Entry: The variable to store into.
|
||||
* @param Regsiter: The Registers index containing the value to store.
|
||||
*/
|
||||
int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) {
|
||||
printf("\tStoring contents of %s into %s, type %d, globally:\n", Registers[Register], Entry->Name, Entry->Type);
|
||||
|
||||
|
@ -568,6 +615,12 @@ int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) {
|
|||
return Register;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a value from a local variable into a register, with optional post/pre-inc/dec
|
||||
* @param Entry: The local variable to read
|
||||
* @param Operation: An optional SyntaxOps entry
|
||||
*/
|
||||
|
||||
int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) {
|
||||
int Reg = RetrieveRegister();
|
||||
|
||||
|
@ -638,6 +691,12 @@ int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) {
|
|||
return Reg;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store a value from a register into a local variable.
|
||||
* @param Entry: The local variable to write to.
|
||||
* @param Register: The Registers index containing the desired value
|
||||
*
|
||||
*/
|
||||
int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) {
|
||||
printf("\tStoring contents of %s into %s, type %d, locally\n", Registers[Register], Entry->Name, Entry->Type);
|
||||
|
||||
|
@ -663,6 +722,7 @@ int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) {
|
|||
return Register;
|
||||
}
|
||||
|
||||
// Assemble a pointerisation
|
||||
int AsAddr(struct SymbolTableEntry* Entry) {
|
||||
int Register = RetrieveRegister();
|
||||
printf("\tSaving pointer of %s into %s\n", Entry->Name, Registers[Register]);
|
||||
|
@ -671,6 +731,7 @@ int AsAddr(struct SymbolTableEntry* Entry) {
|
|||
return Register;
|
||||
}
|
||||
|
||||
// Assemble a dereference
|
||||
int AsDeref(int Reg, int Type) {
|
||||
|
||||
int DestSize = PrimitiveSize(ValueAt(Type));
|
||||
|
@ -693,6 +754,7 @@ int AsDeref(int Reg, int Type) {
|
|||
return Reg;
|
||||
}
|
||||
|
||||
// Assemble a store-through-dereference
|
||||
int AsStrDeref(int Register1, int Register2, int Type) {
|
||||
printf("\tStoring contents of %s into %s through a dereference, type %d\n", Registers[Register1], Registers[Register2], Type);
|
||||
|
||||
|
@ -711,6 +773,7 @@ int AsStrDeref(int Register1, int Register2, int Type) {
|
|||
return Register1;
|
||||
}
|
||||
|
||||
// Assemble a global symbol (variable, struct, enum, function, string)
|
||||
void AsGlobalSymbol(struct SymbolTableEntry* Entry) {
|
||||
int TypeSize;
|
||||
|
||||
|
@ -732,6 +795,7 @@ void AsGlobalSymbol(struct SymbolTableEntry* Entry) {
|
|||
}
|
||||
}
|
||||
|
||||
// Assemble a function call, with all associated parameter bumping and stack movement.
|
||||
int AsCallWrapper(struct ASTNode* Node) {
|
||||
struct ASTNode* CompositeTree = Node->Left;
|
||||
int Register, Args = 0;
|
||||
|
@ -747,6 +811,7 @@ int AsCallWrapper(struct ASTNode* Node) {
|
|||
return AsCall(Node->Symbol, Args);
|
||||
}
|
||||
|
||||
// Copy a function argument from Register to argument Position
|
||||
void AsCopyArgs(int Register, int Position) {
|
||||
if(Position > 4) { // Args above 4 go on the stack
|
||||
fprintf(OutputFile, "\tpushq\t%s\n", Registers[Register]);
|
||||
|
@ -755,6 +820,8 @@ void AsCopyArgs(int Register, int Position) {
|
|||
}
|
||||
}
|
||||
|
||||
// Assemble an actual function call.
|
||||
// NOTE: this should not be called. Use AsCallWrapper.
|
||||
int AsCall(struct SymbolTableEntry* Entry, int Args) {
|
||||
|
||||
int OutRegister = RetrieveRegister();
|
||||
|
@ -771,6 +838,7 @@ int AsCall(struct SymbolTableEntry* Entry, int Args) {
|
|||
return OutRegister;
|
||||
}
|
||||
|
||||
// Assemble a function return.
|
||||
int AsReturn(struct SymbolTableEntry* Entry, int Register) {
|
||||
|
||||
printf("\t\tCreating return for function %s\n", Entry->Name);
|
||||
|
@ -794,39 +862,46 @@ int AsReturn(struct SymbolTableEntry* Entry, int Register) {
|
|||
}
|
||||
|
||||
AsJmp(Entry->EndLabel);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Assemble a =?
|
||||
int AsEqual(int Left, int Right) {
|
||||
// Set the lowest bit if left = right
|
||||
return AsCompare(OP_EQUAL, Left, Right);
|
||||
}
|
||||
|
||||
// Assemble a !=
|
||||
int AsIneq(int Left, int Right) {
|
||||
// Set the lowest bit if left != right
|
||||
return AsCompare(OP_INEQ, Left, Right);
|
||||
}
|
||||
|
||||
// Assemble a <
|
||||
int AsLess(int Left, int Right) {
|
||||
// Set the lowest bit if left < right
|
||||
return AsCompare(OP_LESS, Left, Right);
|
||||
}
|
||||
|
||||
// Assemble a >
|
||||
int AsGreat(int Left, int Right) {
|
||||
// Set the lowest bit if left > right
|
||||
return AsCompare(OP_GREAT, Left, Right);
|
||||
}
|
||||
|
||||
// Assemble a <=
|
||||
int AsLessE(int Left, int Right) {
|
||||
// Set the lowest bit if left <= right
|
||||
return AsCompare(OP_LESSE, Left, Right);
|
||||
}
|
||||
|
||||
// Assemble a =>
|
||||
int AsGreatE(int Left, int Right) {
|
||||
// Set the lowest bit if left => right
|
||||
return AsCompare(OP_GREATE, Left, Right);
|
||||
}
|
||||
|
||||
// Assemble a print statement
|
||||
void AssemblerPrint(int Register) {
|
||||
printf("\t\tPrinting Register %s\n", Registers[Register]);
|
||||
|
||||
|
@ -837,34 +912,40 @@ void AssemblerPrint(int Register) {
|
|||
DeallocateRegister(Register);
|
||||
}
|
||||
|
||||
// Assemble a &
|
||||
int AsBitwiseAND(int Left, int Right) {
|
||||
fprintf(OutputFile, "\tandq\t%s, %s\n", Registers[Left], Registers[Right]);
|
||||
DeallocateRegister(Left);
|
||||
return Right;
|
||||
}
|
||||
|
||||
// Assemble a |
|
||||
int AsBitwiseOR(int Left, int Right) {
|
||||
fprintf(OutputFile, "\torq\t%s, %s\n", Registers[Left], Registers[Right]);
|
||||
DeallocateRegister(Left);
|
||||
return Right;
|
||||
}
|
||||
|
||||
// Assemble a ^
|
||||
int AsBitwiseXOR(int Left, int Right) {
|
||||
fprintf(OutputFile, "\txorq\t%s, %s\n", Registers[Left], Registers[Right]);
|
||||
DeallocateRegister(Left);
|
||||
return Right;
|
||||
}
|
||||
|
||||
// Assemble a ~
|
||||
int AsNegate(int Register) {
|
||||
fprintf(OutputFile, "\tnegq\t%s\n", Registers[Register]);
|
||||
return Register;
|
||||
}
|
||||
|
||||
// Assemble a !
|
||||
int AsInvert(int Register) {
|
||||
fprintf(OutputFile, "\tnotq\t%s\n", Registers[Register]);
|
||||
return Register;
|
||||
}
|
||||
|
||||
// Assemble a !
|
||||
int AsBooleanNOT(int Register) {
|
||||
fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]);
|
||||
fprintf(OutputFile, "\tsete\t%s\n", ByteRegisters[Register]);
|
||||
|
@ -872,6 +953,7 @@ int AsBooleanNOT(int Register) {
|
|||
return Register;
|
||||
}
|
||||
|
||||
// Assemble a <<
|
||||
int AsShiftLeft(int Left, int Right) {
|
||||
fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]);
|
||||
fprintf(OutputFile, "\tshlq\t\%%cl, %s\n", Registers[Left]);
|
||||
|
@ -879,6 +961,7 @@ int AsShiftLeft(int Left, int Right) {
|
|||
return Left;
|
||||
}
|
||||
|
||||
// Assemble a >>
|
||||
int AsShiftRight(int Left, int Right) {
|
||||
fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]);
|
||||
fprintf(OutputFile, "\tshrq\t\%%cl, %s\n", Registers[Left]);
|
||||
|
@ -886,6 +969,8 @@ int AsShiftRight(int Left, int Right) {
|
|||
return Left;
|
||||
}
|
||||
|
||||
// Assemble a conversion from arbitrary type to boolean.
|
||||
// Facilitates if(ptr)
|
||||
int AsBooleanConvert(int Register, int Operation, int Label) {
|
||||
fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]);
|
||||
|
||||
|
@ -903,6 +988,7 @@ int AsBooleanConvert(int Register, int Operation, int Label) {
|
|||
return Register;
|
||||
}
|
||||
|
||||
// Assemble the start of an assembly file
|
||||
void AssemblerPreamble() {
|
||||
DeallocateAllRegisters();
|
||||
fputs(
|
||||
|
@ -912,6 +998,15 @@ void AssemblerPreamble() {
|
|||
OutputFile);
|
||||
}
|
||||
|
||||
/*
|
||||
* Assemble a function block for the Entry.
|
||||
* Handles all stack logic for local variables,
|
||||
* as well as copying parameters out of registers and
|
||||
* into the spill space.
|
||||
*
|
||||
* @param Entry: The function to generate
|
||||
*
|
||||
*/
|
||||
void AsFunctionPreamble(struct SymbolTableEntry* Entry) {
|
||||
char* Name = Entry->Name;
|
||||
struct SymbolTableEntry* Param, *Local;
|
||||
|
@ -958,6 +1053,8 @@ void AsFunctionPreamble(struct SymbolTableEntry* Entry) {
|
|||
|
||||
}
|
||||
|
||||
|
||||
// Assemble the epilogue of a function
|
||||
void AsFunctionEpilogue(struct SymbolTableEntry* Entry) {
|
||||
AsLabel(Entry->EndLabel);
|
||||
|
||||
|
|
|
@ -7,6 +7,29 @@
|
|||
#include <Data.h>
|
||||
#include <errno.h>
|
||||
|
||||
/********************************************************************************
|
||||
* The Delegate is what allows the compiler backend to be abstracted. *
|
||||
* *
|
||||
* It delegates the operation of compiling, assembling and linking *
|
||||
* to the proper subsystems. *
|
||||
* *
|
||||
* As of right now (20/01/2021) it uses the GCC backend. *
|
||||
* *
|
||||
* Compile parses files to their AST and generates mingw PECOFF32+ assembly, *
|
||||
* Assemble uses GCC-as to compile the assembly to an object file. *
|
||||
* Link links the object files into an executable. *
|
||||
* *
|
||||
********************************************************************************/
|
||||
|
||||
/*
|
||||
* Files inputted must have a suffix/extension (because we're on Windows right now)
|
||||
* This is the way to change the suffix for when a file is converted to another.
|
||||
*
|
||||
* @param String: The full, current file name
|
||||
* @param Suffix: The new, desired extension.
|
||||
*
|
||||
*/
|
||||
|
||||
char* Suffixate(char* String, char Suffix) {
|
||||
char* Pos, *NewStr;
|
||||
|
||||
|
@ -26,6 +49,22 @@ char* Suffixate(char* String, char Suffix) {
|
|||
return NewStr;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Starts most of the work to do with the Erythro compiler.
|
||||
* It:
|
||||
* Opens the input and output files,
|
||||
* Parses the global symbols of the file, including function blocks.
|
||||
* Generates the assembly representation of the source code
|
||||
* Saves said assembly into the OutputFile
|
||||
* Returns the name of the file containing the generated assembly.
|
||||
* Note that the Input file must have a valid extension.
|
||||
* For Erythro code, this is .er
|
||||
* The generated assembly will have the extension .s
|
||||
*
|
||||
* @param InputFile: The filename of the Erythro Source code to compile
|
||||
* @return the filename of the generated PECOFF32+ assembly
|
||||
*/
|
||||
char* Compile(char* InputFile) {
|
||||
char* OutputName;
|
||||
OutputName = Suffixate(InputFile, 's');
|
||||
|
@ -52,7 +91,7 @@ char* Compile(char* InputFile) {
|
|||
if(OptVerboseOutput)
|
||||
printf("Compiling %s\r\n", InputFile);
|
||||
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
|
||||
AssemblerPreamble();
|
||||
|
||||
|
@ -62,6 +101,20 @@ char* Compile(char* InputFile) {
|
|||
return OutputName;
|
||||
}
|
||||
|
||||
/*
|
||||
* Processes the output from the Compile function.
|
||||
* Passes the generated .s file to (currently, as of
|
||||
* 21/01/2021), the GNU GAS assembler, to create an
|
||||
* object file.
|
||||
*
|
||||
* It does this by invoking the command on a shell.
|
||||
* TODO: fork it?
|
||||
*
|
||||
* @param InputFile: The .s assembly file to be processed
|
||||
* @output the name of the generated object file.
|
||||
*
|
||||
*/
|
||||
|
||||
char* Assemble(char* InputFile) {
|
||||
char Command[TEXTLEN];
|
||||
int Error;
|
||||
|
@ -85,6 +138,18 @@ char* Assemble(char* InputFile) {
|
|||
return OutputName;
|
||||
}
|
||||
|
||||
/*
|
||||
* Processes the outputted object files, turning them into an executable.
|
||||
* It does this by invoking (currently, as of 21/01/2021) the GNU GCC
|
||||
* compiler.
|
||||
* It invokes GCC rather than LD so that it automatically links against
|
||||
* libc and the CRT natives.
|
||||
*
|
||||
* @param Output: The desired name for the executable.
|
||||
* @param Objects: A list of the Object files to be linked.
|
||||
*
|
||||
*/
|
||||
|
||||
void Link(char* Output, char* Objects[]) {
|
||||
int Count, Size = TEXTLEN, Error;
|
||||
char Command[TEXTLEN], *CommandPtr;
|
||||
|
@ -112,7 +177,16 @@ void Link(char* Output, char* Objects[]) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Prints information about the available flags and
|
||||
* how to structure the command.
|
||||
* @param ProgName: The name of the file that was
|
||||
* attempted to run.
|
||||
*/
|
||||
|
||||
void DisplayUsage(char* ProgName) {
|
||||
fprintf(stderr, "Erythro Compiler v5 - Gemwire Institute\n");
|
||||
fprintf(stderr, "***************************************\n");
|
||||
fprintf(stderr, "Usage: %s -[vcST] {-o output} file [file ...]\n", ProgName);
|
||||
fprintf(stderr, " -v: Verbose Output Level\n");
|
||||
fprintf(stderr, " -c: Compile without Linking\n");
|
||||
|
|
|
@ -12,6 +12,9 @@ static int GenerateSrg() {
|
|||
return srgId++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk the Node tree, and dump the AST tree to stdout.
|
||||
*/
|
||||
void DumpTree(struct ASTNode* Node, int level) {
|
||||
int Lfalse, Lstart, Lend;
|
||||
|
||||
|
|
145
src/Lexer.c
145
src/Lexer.c
|
@ -11,10 +11,29 @@
|
|||
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* * * * * * C H A R S T R E AM * * * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* The Lexer holds a "stream" of characters.
|
||||
* You may read a character from the stream, and if it is not
|
||||
* the desired character, it may be placed into an "overread" buffer.
|
||||
* The overread buffer is checked before the source file is read any further.
|
||||
* This provides an effective way to "un-read" a character.
|
||||
*
|
||||
* @param Char: The character to "un-read"
|
||||
*
|
||||
*/
|
||||
|
||||
static void ReturnCharToStream(int Char) {
|
||||
Overread = Char;
|
||||
}
|
||||
|
||||
/*
|
||||
* NextChar allows you to ask the Lexer for the next useful character.
|
||||
* As mentioned above, it checks the overread buffer first.
|
||||
*
|
||||
* @return the character as int
|
||||
*
|
||||
*/
|
||||
static int NextChar(void) {
|
||||
int Char;
|
||||
|
||||
|
@ -32,6 +51,10 @@ static int NextChar(void) {
|
|||
return Char;
|
||||
}
|
||||
|
||||
/*
|
||||
* Searches for the next useful character, skipping whitespace.
|
||||
* @return the character as int.
|
||||
*/
|
||||
|
||||
static int FindChar() {
|
||||
int Char;
|
||||
|
@ -45,14 +68,31 @@ static int FindChar() {
|
|||
return Char;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allows the conversion between ASCII, hex and numerals.
|
||||
* @param String: The set of all valid results
|
||||
* @param Char: The ASCII character to convert
|
||||
* @return the ASCII character in int form, if in the set of valid results. -1 if not.
|
||||
*/
|
||||
|
||||
static int FindDigitFromPos(char* String, char Char) {
|
||||
char* Result = strchr(String, Char);
|
||||
return(Result ? Result - String : -1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Facilitates the easy checking of expected tokens.
|
||||
* NOTE: there is (soon to be) an optional variant of this function that
|
||||
* reads a token but does not consume it ( via Tokenise )
|
||||
*
|
||||
* @param Type: The expected token, in terms of value of the TokenTypes enum.
|
||||
* @param TokenExpected: A string to output when the token is not found.
|
||||
*
|
||||
*/
|
||||
|
||||
void VerifyToken(int Type, char* TokenExpected) {
|
||||
if(CurrentToken.type == Type)
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
else {
|
||||
printf("Expected %s on line %d\n", TokenExpected, Line);
|
||||
exit(1);
|
||||
|
@ -61,6 +101,12 @@ void VerifyToken(int Type, char* TokenExpected) {
|
|||
|
||||
static struct Token* RejectedToken = NULL;
|
||||
|
||||
/*
|
||||
* Rejected Tokens and the Overread Stream are identical concepts.
|
||||
* This was implemented first, but it is no longer used.
|
||||
* TODO: Refactor this function out.
|
||||
*/
|
||||
|
||||
void RejectToken(struct Token* Token) {
|
||||
if(RejectedToken != NULL)
|
||||
Die("Cannot reject two tokens in a row!");
|
||||
|
@ -72,6 +118,21 @@ void RejectToken(struct Token* Token) {
|
|||
* * * * L I T E R A L S A N D I D E N T I F I E R S * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* Facilitates the parsing of integer literals from the file.
|
||||
* Currently only supports the decimal numbers, despite the
|
||||
* FindDigitFromPos function allowing conversion.
|
||||
*
|
||||
* The functon loops over the characters, multiplying by 10 and adding
|
||||
* the new value on top, until a non-numeric character is found.
|
||||
* At that point, it returns the non-numeric character to the Overread Stream
|
||||
* and returns the calculated number.
|
||||
*
|
||||
* @param Char: The first number to scan.
|
||||
* @return the full parsed number as an int.
|
||||
*
|
||||
*/
|
||||
|
||||
static int ReadInteger(int Char) {
|
||||
int CurrentChar = 0;
|
||||
int IntegerValue = 0;
|
||||
|
@ -86,7 +147,23 @@ static int ReadInteger(int Char) {
|
|||
return IntegerValue;
|
||||
}
|
||||
|
||||
// Variable identifier, keyword, function.
|
||||
/*
|
||||
* An Identifier can be any of:
|
||||
* * A function name
|
||||
* * A variable name
|
||||
* * A struct name
|
||||
* / A class name
|
||||
* / An annotation name
|
||||
*
|
||||
* This function allows a full name to be read into a buffer, with a defined
|
||||
* start character and a defined maximum text size limit.
|
||||
*
|
||||
* @param Char: The first char of the Identifier.
|
||||
* @param Buffer: The location to store the Identifier. (usually CurrentIdentifer, a compiler global defined for this purpose)
|
||||
* @param Limit: The maximum Identifer length.
|
||||
* @return the length of the parsed identifier
|
||||
*
|
||||
*/
|
||||
static int ReadIdentifier(int Char, char* Buffer, int Limit) {
|
||||
int ind = 0;
|
||||
|
||||
|
@ -108,6 +185,17 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit) {
|
|||
return ind;
|
||||
}
|
||||
|
||||
/*
|
||||
* Char literals appear as 'x'
|
||||
*
|
||||
* They are bounded by two apostrophes.
|
||||
* They can contain any 1-byte ASCII character, as well as some
|
||||
* predefined, standard escape codes.
|
||||
* This function attempts to get the character from the file, with escape codes intact.
|
||||
*
|
||||
* @return the character as an int
|
||||
*
|
||||
*/
|
||||
static int ReadCharLiteral() {
|
||||
int Char;
|
||||
Char = NextChar();
|
||||
|
@ -131,7 +219,20 @@ static int ReadCharLiteral() {
|
|||
return Char;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* String literals appear as "hello world"
|
||||
*
|
||||
* They are bounded by two quotation marks.
|
||||
* They can contain an arbitrary length of text.
|
||||
* They are backed by an array of chars (hence the char* type) and thus
|
||||
* have a practically unlimited length.
|
||||
*
|
||||
* To read a String Literal, it is a simple matter of reading Char Literals until
|
||||
* the String termination token is identified - the last quotation mark.
|
||||
*
|
||||
* @param Buffer: The buffer into which to write the string. (usually CurrentIdentifer, a compiler global defined for this purpose)
|
||||
*
|
||||
*/
|
||||
static int ReadStringLiteral(char* Buffer) {
|
||||
int Char;
|
||||
|
||||
|
@ -148,9 +249,18 @@ static int ReadStringLiteral(char* Buffer) {
|
|||
}
|
||||
|
||||
/*
|
||||
* This function is what defines the valid keywords for the language
|
||||
* //TODO: move this to a static list?
|
||||
* //TODO: More optimisations?
|
||||
* Keywords are source-code tokens / strings that are reserved for the compiler.
|
||||
* They cannot be used as identifers on their own.
|
||||
*
|
||||
* This function is where all of the keywords are added, and where most aliases are going to be stored.
|
||||
*
|
||||
* It uses a switch on the first character of the input string as an optimisation - rather than checking each
|
||||
* keyword against the String individually, it only needs to compare a single number. This can be optimised into
|
||||
* a hash table by the compiler for further optimisation, making this one of the fastest ways to switch
|
||||
* on a full string.
|
||||
*
|
||||
* @param Str: The keyword input to try to parse
|
||||
* @return the token expressed in terms of values of the TokenTypes enum
|
||||
*
|
||||
*/
|
||||
static int ReadKeyword(char* Str) {
|
||||
|
@ -203,7 +313,6 @@ static int ReadKeyword(char* Str) {
|
|||
break;
|
||||
|
||||
case 'p':
|
||||
// This is a huge optimisation once we have as many keywords as a fully featured language.
|
||||
if(!strcmp(Str, "print"))
|
||||
return KW_PRINT;
|
||||
break;
|
||||
|
@ -213,6 +322,11 @@ static int ReadKeyword(char* Str) {
|
|||
return KW_RETURN;
|
||||
break;
|
||||
|
||||
case 's':
|
||||
if(!strcmp(Str, "struct"))
|
||||
return KW_STRUCT;
|
||||
break;
|
||||
|
||||
case 'v':
|
||||
if(!strcmp(Str, "void"))
|
||||
return TY_VOID;
|
||||
|
@ -234,8 +348,21 @@ static int ReadKeyword(char* Str) {
|
|||
* * * * T O K E N I S E R * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
int Tokenise(struct Token* Token) {
|
||||
/*
|
||||
* Handles the majority of the work of reading tokens into the stream.
|
||||
* It reads chars with FindChar, categorizing individual characters or small
|
||||
* strings into their proper expression (as a value of the TokenTypes enum)
|
||||
*
|
||||
* It also defers the reading of numeric literals and char literals to the proper functions.
|
||||
*
|
||||
* If needed, it can also read Identifiers, for variable or function naming.
|
||||
*
|
||||
* This function may be the main bottleneck in the lexer.
|
||||
*
|
||||
*/
|
||||
void Tokenise() {
|
||||
int Char, TokenType;
|
||||
struct Token* Token = &CurrentToken;
|
||||
|
||||
if(RejectedToken != NULL) {
|
||||
Token = RejectedToken;
|
||||
|
@ -439,7 +566,5 @@ int Tokenise(struct Token* Token) {
|
|||
DieChar("Unrecognized character", Char);
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
61
src/Main.c
61
src/Main.c
|
@ -73,48 +73,54 @@ char* TokenNames[] = {
|
|||
"While keyword",
|
||||
"For keyword",
|
||||
|
||||
"Return keyword"
|
||||
"Return keyword",
|
||||
|
||||
"Struct keyword"
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
/* Line = 1;
|
||||
Overread = '\n';
|
||||
CurrentGlobal = 0;
|
||||
struct ASTNode* Node;
|
||||
CurrentLocal = SYMBOLS - 1; */
|
||||
// Option initialisers
|
||||
OptDumpTree = false;
|
||||
OptKeepAssembly = false;
|
||||
OptAssembleFiles = false;
|
||||
OptLinkFiles = true;
|
||||
OptVerboseOutput = false;
|
||||
|
||||
// Temporary .o storage and counter
|
||||
char* ObjectFiles[100];
|
||||
int ObjectCount = 0;
|
||||
|
||||
// Parse command line arguments.
|
||||
int i;
|
||||
for(i = 1; i < argc; i++) {
|
||||
if(*argv[i] != '-') // not a flag
|
||||
for(i = 1/*skip 0*/; i < argc; i++) {
|
||||
// If we're not a flag, we can skip.
|
||||
// We only care about flags in rows.
|
||||
// ie. erc >> -v -T -o << test.exe src/main.er
|
||||
if(*argv[i] != '-')
|
||||
break;
|
||||
|
||||
// Once we identify a flag, we need to make sure it's not just a minus in-place.
|
||||
for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) {
|
||||
// Finally, identify what option is being invoked.
|
||||
switch(argv[i][j]) {
|
||||
case 'o':
|
||||
case 'o': // output
|
||||
OutputFileName = argv[++i];
|
||||
|
||||
break;
|
||||
case 'T':
|
||||
case 'T': // Debug
|
||||
OptDumpTree = true;
|
||||
break;
|
||||
case 'c':
|
||||
case 'c': // Compile only
|
||||
OptAssembleFiles = true;
|
||||
OptKeepAssembly = false;
|
||||
OptLinkFiles = false;
|
||||
break;
|
||||
case 'S':
|
||||
case 'S': // aSsemble only
|
||||
OptAssembleFiles = false;
|
||||
OptKeepAssembly = true;
|
||||
OptLinkFiles = false;
|
||||
break;
|
||||
case 'v':
|
||||
case 'v': // Verbose output
|
||||
OptVerboseOutput = true;
|
||||
break;
|
||||
default:
|
||||
|
@ -123,29 +129,42 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
// If we didn't provide anything other than flags, we need to show how to use the program.
|
||||
if(i >= argc)
|
||||
DisplayUsage(argv[0]);
|
||||
|
||||
// For the rest of the files specified, we can iterate them right to left.
|
||||
while(i < argc) {
|
||||
// Compile the file by invoking the Delegate
|
||||
CurrentASMFile = Compile(argv[i]);
|
||||
if(OptLinkFiles || OptAssembleFiles) {
|
||||
// If we need to assemble (or link, which requires assembly)
|
||||
// then we invoke the Delegate again
|
||||
CurrentObjectFile = Assemble(CurrentASMFile);
|
||||
// We can only keep track of 99 objects, so we should crash at 98 to ensure we have enough room for the output file too.
|
||||
if(ObjectCount == 98) {
|
||||
fprintf(stderr, "Too many inputs");
|
||||
return 1;
|
||||
return 1; // We use return because we're in main, rather than invoking Die.
|
||||
}
|
||||
|
||||
// Move the ObjectCount forward.
|
||||
ObjectFiles[ObjectCount++] = CurrentObjectFile;
|
||||
// Clear the new, forwarded index
|
||||
ObjectFiles[ObjectCount] = NULL;
|
||||
}
|
||||
|
||||
if(!OptKeepAssembly)
|
||||
// unlink = delete
|
||||
unlink(CurrentASMFile);
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if(OptLinkFiles) {
|
||||
// If needed, invoke the Delegate one last time.
|
||||
Link(OutputFileName, ObjectFiles);
|
||||
if(!OptAssembleFiles) {
|
||||
// Even though we need to assemble to link, we can respect the user's options and delete the intermediary files.
|
||||
for(i = 0; ObjectFiles[i] != NULL; i++)
|
||||
unlink(ObjectFiles[i]);
|
||||
}
|
||||
|
@ -155,6 +174,11 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* Akin to a Halt and Catch Fire method.
|
||||
* Simply prints an error, cleans up handles, and closes.
|
||||
*/
|
||||
|
||||
void Die(char* Error) {
|
||||
fprintf(stderr, "%s on line %d\n", Error, Line);
|
||||
fclose(OutputFile);
|
||||
|
@ -162,6 +186,9 @@ void Die(char* Error) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* A variant of Die with an extra String attached.
|
||||
*/
|
||||
void DieMessage(char* Error, char* Reason) {
|
||||
fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line);
|
||||
fclose(OutputFile);
|
||||
|
@ -169,6 +196,9 @@ void DieMessage(char* Error, char* Reason) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* A variant of Die with an extra integer attached.
|
||||
*/
|
||||
void DieDecimal(char* Error, int Number) {
|
||||
fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line);
|
||||
fclose(OutputFile);
|
||||
|
@ -176,6 +206,9 @@ void DieDecimal(char* Error, int Number) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* A variant of Die with an extra character attached.
|
||||
*/
|
||||
void DieChar(char* Error, int Char) {
|
||||
fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line);
|
||||
fclose(OutputFile);
|
||||
|
|
297
src/Parser.c
297
src/Parser.c
|
@ -10,12 +10,10 @@
|
|||
#include "Data.h"
|
||||
|
||||
/*
|
||||
* Precedence is directly related to Token Type.
|
||||
*
|
||||
* enum TokenTypes {
|
||||
* LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT
|
||||
* };
|
||||
*
|
||||
* The Precedence of an operator is directly related to Token Type.
|
||||
* Precedence determines how soon the operator and its surrounding values
|
||||
* will be calculated and aliased.
|
||||
* This allows for things like the common Order of Operations.
|
||||
*/
|
||||
static int Precedence[] = {
|
||||
0, 10, // EOF, ASSIGN
|
||||
|
@ -30,6 +28,13 @@ static int Precedence[] = {
|
|||
110 // /
|
||||
};
|
||||
|
||||
/*
|
||||
* Handles gathering the precedence of an operator from its token,
|
||||
* in terms of values of the TokenTypes enum.
|
||||
*
|
||||
* Error handling is also done here, so that EOF or non-operators are not executed.
|
||||
*
|
||||
*/
|
||||
static int OperatorPrecedence(int Token) {
|
||||
int Prec = Precedence[Token];
|
||||
|
||||
|
@ -40,6 +45,13 @@ static int OperatorPrecedence(int Token) {
|
|||
return Prec;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the value is a right-expression, or in other words is right associative,
|
||||
* then it can be safely calculated beforehand and aliased to a value.
|
||||
* In this case, we can try to alias (or constant fold) everything on the right side
|
||||
* of an assignment.
|
||||
*/
|
||||
|
||||
static int IsRightExpr(int Token) {
|
||||
return (Token == LI_EQUAL);
|
||||
}
|
||||
|
@ -48,6 +60,29 @@ static int IsRightExpr(int Token) {
|
|||
* * * N O D E C O N S T R U C T I O N * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* ASTNodes form the structure of the language that moves the bulk of
|
||||
* data around within the compiler.
|
||||
* They contain:
|
||||
* * An Operation (usually 1:1 with an input token),
|
||||
* * A Type (to identify the size of data it contains),
|
||||
* * Two more Left and Right ASTNodes (to form a doubly-linked list)
|
||||
* * An extra Middle ASTNode in case it is needed (typically in the middle case of a For loop)
|
||||
* * A Symbol Table Entry
|
||||
* * An Integer Value
|
||||
* * A flag to determine whether this node (and its sub-nodes) contain a right associative or Rval
|
||||
*
|
||||
* This is the only function where they are constructed.
|
||||
*
|
||||
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
|
||||
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
|
||||
* @param Left: The Node that is attached to the left side branch of this root.
|
||||
* @param Middle: The Node that is attached to the middle of this root, if applicable.
|
||||
* @param Right: The Node that is attached to the right side branch of this root.
|
||||
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
|
||||
* @param IntValue: The integer value encoded by this Node, if applicable.
|
||||
* @return a newly constructed AST Node
|
||||
*/
|
||||
struct ASTNode* ConstructASTNode(int Operation, int Type,
|
||||
struct ASTNode* Left,
|
||||
struct ASTNode* Middle,
|
||||
|
@ -75,10 +110,28 @@ struct ASTNode* ConstructASTNode(int Operation, int Type,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* AST Leaves are categorized by their lack of child nodes.
|
||||
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
|
||||
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
|
||||
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
|
||||
* @param IntValue: The integer value encoded by this Node, if applicable.
|
||||
* @return a newly constructed AST Node
|
||||
*/
|
||||
struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) {
|
||||
return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue);
|
||||
}
|
||||
|
||||
/*
|
||||
* AST Branches are categorized by having only one child node.
|
||||
* These are sometimes called Unary Branches.
|
||||
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
|
||||
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
|
||||
* @param Left: The Node that is attached to the left side branch of this root.
|
||||
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
|
||||
* @param IntValue: The integer value encoded by this Node, if applicable.
|
||||
* @return a newly constructed AST Node
|
||||
*/
|
||||
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) {
|
||||
return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue);
|
||||
}
|
||||
|
@ -89,10 +142,10 @@ struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left
|
|||
* * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* Take a Token Type, and convert it to an AST-Node Operation.
|
||||
*
|
||||
* TokenTypes and SyntaxOps are synchronized to make this easy.
|
||||
*
|
||||
* TokenTypes and SyntaxOps are mostly 1:1, so some minor effort can ensure that
|
||||
* these are synchronized well.
|
||||
* This allows the parsing operation to be little more than a bounds check.
|
||||
* Otherwise, this would be a gigantic switch statement.
|
||||
*/
|
||||
|
||||
int ParseTokenToOperation(int Token) {
|
||||
|
@ -103,11 +156,13 @@ int ParseTokenToOperation(int Token) {
|
|||
}
|
||||
|
||||
/*
|
||||
* Parse a primary (terminal) expression.
|
||||
* This currently handles literal expressions, constructing a leaf node
|
||||
* and handing control back up the chain.
|
||||
*
|
||||
* Primary expressions may be any one of:
|
||||
* * A terminal integer literal
|
||||
* * A terminal string literal
|
||||
* * A variable
|
||||
* * A collection of expressions bounded by parentheses.
|
||||
*
|
||||
* @return the AST Node that represents this expression
|
||||
*/
|
||||
|
||||
struct ASTNode* ParsePrimary(void) {
|
||||
|
@ -134,7 +189,7 @@ struct ASTNode* ParsePrimary(void) {
|
|||
|
||||
case LI_LPARE:
|
||||
// Starting a ( expr ) block
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
|
||||
Node = ParsePrecedenceASTNode(0);
|
||||
|
||||
|
@ -144,12 +199,26 @@ struct ASTNode* ParsePrimary(void) {
|
|||
}
|
||||
|
||||
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
|
||||
return Node;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Parse a single binary expression.
|
||||
* It ensures that these expressions are parsed to their full extent, that
|
||||
* the order of operations is upheld, that the precedence of the prior
|
||||
* iteration is considered, and that every error is handled.
|
||||
*
|
||||
* This is where all of the right-associative statements are folded, where
|
||||
* type mismatches and widening are handled properly, and that all parsing
|
||||
* is over by the time the end tokens ") } ] ;" are encountered.
|
||||
*
|
||||
* @param PreviousTokenPrecedence: The precedence of the operator to the left.
|
||||
* @return the AST Node corresponding to this block.
|
||||
*
|
||||
*/
|
||||
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
|
||||
struct ASTNode* LeftNode, *RightNode;
|
||||
struct ASTNode* LeftTemp, *RightTemp;
|
||||
|
@ -159,25 +228,19 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
|
|||
LeftNode = PrefixStatement();
|
||||
|
||||
NodeType = CurrentToken.type;
|
||||
//printf("%d\r\n", CurrentToken.type);
|
||||
|
||||
if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) {
|
||||
//printf("Current token matches ; ) ]\r\n");
|
||||
LeftNode->RVal = 1; return LeftNode;
|
||||
}
|
||||
|
||||
//printf("Current token has value %d, type %s\n", CurrentToken.value, TokenNames[CurrentToken.type]);
|
||||
while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) {
|
||||
//printf("inside while\n");
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
if(CurrentToken.type == LI_RPARE)
|
||||
break;
|
||||
|
||||
RightNode = ParsePrecedenceASTNode(Precedence[NodeType]);
|
||||
|
||||
/*
|
||||
LeftType = LeftNode->ExprType;
|
||||
RightType = RightNode->ExprType;
|
||||
*/
|
||||
|
||||
/**
|
||||
* While parsing this node, we may need to widen some types.
|
||||
* This requires a few functions and checks.
|
||||
|
@ -194,9 +257,6 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
|
|||
if(LeftNode == NULL)
|
||||
Die("Incompatible Expression encountered in assignment");
|
||||
|
||||
|
||||
//printf("\tAssigning variable: %s value %d\n", Symbols[FindSymbol(CurrentIdentifier)].Name, RightNode->Value.IntValue);
|
||||
|
||||
// LeftNode holds the target, the target variable in this case
|
||||
printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name);
|
||||
|
||||
|
@ -212,11 +272,9 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
|
|||
LeftNode->RVal = 1;
|
||||
RightNode->RVal = 1;
|
||||
|
||||
//printf("mutate left\r\n");
|
||||
LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType);
|
||||
//printf("mutate right\r\n");
|
||||
|
||||
RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType);
|
||||
//printf("mutate right over\r\n");
|
||||
/**
|
||||
* If both are null, the types are incompatible.
|
||||
*/
|
||||
|
@ -267,105 +325,21 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
|
|||
}
|
||||
|
||||
|
||||
/* struct ASTNode* ParseMultiplicativeASTNode(void) {
|
||||
struct ASTNode* LeftNode, * RightNode;
|
||||
int NodeType;
|
||||
|
||||
LeftNode = ParsePrimary();
|
||||
|
||||
NodeType = CurrentToken.type;
|
||||
if(NodeType == LI_EOF)
|
||||
return LeftNode;
|
||||
|
||||
while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) {
|
||||
Tokenise(&CurrentToken);
|
||||
|
||||
RightNode = ParsePrimary();
|
||||
|
||||
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
|
||||
|
||||
NodeType = CurrentToken.type;
|
||||
if(NodeType == LI_EOF)
|
||||
break;
|
||||
}
|
||||
|
||||
return LeftNode;
|
||||
}
|
||||
*/
|
||||
/* struct ASTNode* ParseAdditiveASTNode(void) {
|
||||
struct ASTNode* LeftNode, * RightNode;
|
||||
int NodeType;
|
||||
|
||||
LeftNode = ParseMultiplicativeASTNode();
|
||||
|
||||
NodeType = CurrentToken.type;
|
||||
if(NodeType == LI_EOF)
|
||||
return LeftNode;
|
||||
|
||||
while(1) {
|
||||
Tokenise(&CurrentToken);
|
||||
|
||||
RightNode = ParseMultiplicativeASTNode();
|
||||
|
||||
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
|
||||
|
||||
NodeType = CurrentToken.type;
|
||||
if(NodeType == LI_EOF)
|
||||
break;
|
||||
}
|
||||
|
||||
return LeftNode;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * * *
|
||||
* * * * I N T E R P R E T A T I O N * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
int ParseAST(struct ASTNode* Node) {
|
||||
|
||||
|
||||
int LeftVal, RightVal;
|
||||
|
||||
if(Node->Left)
|
||||
LeftVal = ParseAST(Node->Left);
|
||||
|
||||
if(Node->Right)
|
||||
RightVal = ParseAST(Node->Right);
|
||||
|
||||
/*
|
||||
if(Node->Operation == TERM_INTLITERAL)
|
||||
printf("int %d\n", Node->IntValue);
|
||||
else
|
||||
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
|
||||
|
||||
|
||||
switch(Node->Operation) {
|
||||
case OP_ADD:
|
||||
return (LeftVal + RightVal);
|
||||
case OP_SUBTRACT:
|
||||
return (LeftVal - RightVal);
|
||||
case OP_MULTIPLY:
|
||||
return (LeftVal * RightVal);
|
||||
case OP_DIVIDE:
|
||||
return (LeftVal / RightVal);
|
||||
|
||||
case REF_IDENT:
|
||||
case TERM_INTLITERAL:
|
||||
return Node->Value.IntValue;
|
||||
default:
|
||||
fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * *
|
||||
* * * * F U N C T I O N S * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* Handles the logic for calling a function.
|
||||
* This is invoked by an identifier being recognized, followed by a "(.*)" string.
|
||||
*
|
||||
* It simply checks that the function exists, that the parameters given are valid,
|
||||
* and generates the AST Node for calling it.
|
||||
*
|
||||
* @return the AST Node for calling the function stored in CurrentIdentifer
|
||||
*
|
||||
*/
|
||||
struct ASTNode* CallFunction() {
|
||||
struct ASTNode* Tree;
|
||||
struct SymbolTableEntry* Function;
|
||||
|
@ -385,6 +359,21 @@ struct ASTNode* CallFunction() {
|
|||
return Tree;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* An expression list is used:
|
||||
* * In the call to a function
|
||||
*
|
||||
* It is parsed by seeking left parentheses "(", parsing binary expressions
|
||||
* until either a comma or a right parentheses is found.
|
||||
*
|
||||
* The former will cause another expression to be parsed, the latter will cause
|
||||
* parsing to stop.
|
||||
*
|
||||
* @return the AST Node representing every expression in the list, glued end to
|
||||
* end with a COMPOSITE operation.
|
||||
*
|
||||
*/
|
||||
struct ASTNode* GetExpressionList() {
|
||||
struct ASTNode* Tree = NULL, *Child = NULL;
|
||||
int Count;
|
||||
|
@ -397,7 +386,7 @@ struct ASTNode* GetExpressionList() {
|
|||
|
||||
switch(CurrentToken.type) {
|
||||
case LI_COM:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
break;
|
||||
case LI_RPARE:
|
||||
break;
|
||||
|
@ -414,6 +403,18 @@ struct ASTNode* GetExpressionList() {
|
|||
* * * * S T A T E M E N T S * * * *
|
||||
* * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
/*
|
||||
* Handles parsing an individual statement.
|
||||
*
|
||||
* It serves as a wrapper around:
|
||||
* * If Statement
|
||||
* * While Statement
|
||||
* * For Statement
|
||||
* * Return Statement
|
||||
* * Numeric literals and variables
|
||||
* * Binary Expressions
|
||||
* @return the AST Node representing this single statement
|
||||
*/
|
||||
struct ASTNode* ParseStatement(void) {
|
||||
int Type;
|
||||
|
||||
|
@ -425,18 +426,10 @@ struct ASTNode* ParseStatement(void) {
|
|||
printf("\t\tNew Variable: %s\n", CurrentIdentifier);
|
||||
Type = ParseOptionalPointer();
|
||||
VerifyToken(TY_IDENTIFIER, "ident");
|
||||
BeginVariableDeclaration(Type, SC_LOCAL);
|
||||
BeginVariableDeclaration(Type, NULL, SC_LOCAL);
|
||||
VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment?
|
||||
return NULL;
|
||||
|
||||
/*case TY_IDENTIFIER:
|
||||
if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC)
|
||||
printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
|
||||
else
|
||||
printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
|
||||
|
||||
return ParseIdentifier();
|
||||
*/
|
||||
case KW_IF:
|
||||
return IfStatement();
|
||||
|
||||
|
@ -451,11 +444,26 @@ struct ASTNode* ParseStatement(void) {
|
|||
|
||||
default:
|
||||
ParsePrecedenceASTNode(0);
|
||||
//DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Handles parsing multiple statements or expressions in a row.
|
||||
* These are typically grouped together with the Compound tokens "{ }"
|
||||
* and seperated by the semicolon ";".
|
||||
*
|
||||
* Single Statements are parsed until a semicolon is reached, at which
|
||||
* point another statement will be parsed, or until a Right Compound
|
||||
* token is reached ("}"), at which point parsing will stop.
|
||||
*
|
||||
* It is useful for:
|
||||
* * Tightly identifying related blocks of code
|
||||
* * Containing the many statements of functions
|
||||
*
|
||||
* @return the AST Node representing this compound statement
|
||||
*
|
||||
*/
|
||||
struct ASTNode* ParseCompound() {
|
||||
struct ASTNode* Left = NULL, *Tree;
|
||||
|
||||
|
@ -486,6 +494,21 @@ struct ASTNode* ParseCompound() {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the entry point to the parser/lexer.
|
||||
*
|
||||
* By definition, Global definitions are accessible anywhere.
|
||||
* As of right now (20/01/2021), classe are unimplemented.
|
||||
* This means that all functions and all function prototypes are globally scoped.
|
||||
*
|
||||
* You may also define variables, constants, preprocessor directives and other text
|
||||
* in the global scope.
|
||||
*
|
||||
* The function itself loops, parsing either variables or functions, until it
|
||||
* reaches the end of the file.
|
||||
*
|
||||
*/
|
||||
|
||||
void ParseGlobals() {
|
||||
struct ASTNode* Tree;
|
||||
int Type, FunctionComing;
|
||||
|
@ -516,7 +539,7 @@ void ParseGlobals() {
|
|||
}
|
||||
} else {
|
||||
printf("\tParsing global variable declaration\n");
|
||||
BeginVariableDeclaration(Type, SC_GLOBAL);
|
||||
BeginVariableDeclaration(Type, NULL, SC_GLOBAL);
|
||||
VerifyToken(LI_SEMIC, ";");
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,34 @@
|
|||
#include <Defs.h>
|
||||
#include <Data.h>
|
||||
|
||||
/****************************************************************
|
||||
* Types are enumerated by the DataTypes enum. *
|
||||
* They are represented by unsigned integers, where the *
|
||||
* most significant 28 bits differentiate the raw type *
|
||||
* of the data being encoded. *
|
||||
* However, the least significant nibble - that is, *
|
||||
* the lowest 4 bits, represent the count of indirection. *
|
||||
* *
|
||||
* This means that a raw Integer data type, such as an i32, *
|
||||
* has the DataType representation 32. *
|
||||
* However, a pointer to an Integer has DataType value 32+1, *
|
||||
* or 33. *
|
||||
* *
|
||||
* This means that the maximum valid pointer level is 16. *
|
||||
* That's a: *
|
||||
* ****************int *
|
||||
* That ought to be enough for everyone, right? *
|
||||
* *
|
||||
****************************************************************/
|
||||
|
||||
/*
|
||||
* Adds 1 to the input Type, to add a level of indirection.
|
||||
* If the indirection is already at 16 levels, it aborts.
|
||||
*
|
||||
* @param Type: The DataType to pointerise
|
||||
* @return the new pointerised DataType value.
|
||||
*/
|
||||
|
||||
int PointerTo(int Type) {
|
||||
if((Type & 0xf) == 0xf)
|
||||
DieDecimal("Unrecognized type in pointerisation", Type);
|
||||
|
@ -14,30 +42,59 @@ int PointerTo(int Type) {
|
|||
return (Type + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the underlying type behind a pointer.
|
||||
* If the type is not a pointer (the lowest 4 bits are 0), it halts compliation.
|
||||
*
|
||||
* @param Type: The type to un-dereference
|
||||
* @return the underlying Type
|
||||
*/
|
||||
|
||||
int ValueAt(int Type) {
|
||||
printf("\t\tDereferencing a %s\n", TypeNames(Type));
|
||||
//TODO: this is still bullshittery!
|
||||
if((Type & 0xf) == 0x0)
|
||||
DieDecimal("Unrecognized type in defererencing", Type);
|
||||
return (Type - 1);
|
||||
}
|
||||
|
||||
int ParseOptionalPointer() {
|
||||
/*
|
||||
* Type declarations may be raw, they may be pointers.
|
||||
* If they are pointers, we need to be able to check
|
||||
* how many levels of indirection.
|
||||
* However, being a pointer is optional.
|
||||
*
|
||||
* This can parase in just a lone type specifier, or
|
||||
* any valid level of indirection therefore.
|
||||
*
|
||||
* @param Composite: unused
|
||||
* @return the parsed DataType, with any indirection.
|
||||
*
|
||||
*/
|
||||
|
||||
int ParseOptionalPointer(struct SymbolTableEntry** Composite) {
|
||||
|
||||
int Type;
|
||||
|
||||
switch(CurrentToken.type) {
|
||||
case TY_VOID:
|
||||
Type = RET_VOID;
|
||||
Tokenise();
|
||||
break;
|
||||
case TY_CHAR:
|
||||
Type = RET_CHAR;
|
||||
Tokenise();
|
||||
break;
|
||||
case TY_INT:
|
||||
Type = RET_INT;
|
||||
Tokenise();
|
||||
break;
|
||||
case TY_LONG:
|
||||
Type = RET_LONG;
|
||||
Tokenise();
|
||||
break;
|
||||
case KW_STRUCT:
|
||||
Type = DAT_STRUCT;
|
||||
*Composite = BeginStructDeclaration();
|
||||
break;
|
||||
default:
|
||||
DieDecimal("Illegal type for pointerisation", CurrentToken.type);
|
||||
|
@ -47,17 +104,30 @@ int ParseOptionalPointer() {
|
|||
// x = **y;
|
||||
// possible.
|
||||
while(1) {
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
printf("\t\t\tType on parsing is %d\n", CurrentToken.type);
|
||||
if(CurrentToken.type != AR_STAR)
|
||||
break;
|
||||
|
||||
Type = PointerTo(Type);
|
||||
// Tokenise(); TODO: is this skipping pointers?
|
||||
}
|
||||
|
||||
return Type;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Array Accesses come in the form of x[y].
|
||||
*
|
||||
* x must be a pointer type, and an array structure.
|
||||
* y can be any binary expression.
|
||||
*
|
||||
* It is a wrapper around *((imax*)x + y).
|
||||
*
|
||||
* @return the AST Node that represents this statement.
|
||||
*/
|
||||
|
||||
struct ASTNode* AccessArray() {
|
||||
struct ASTNode* LeftNode, *RightNode;
|
||||
struct SymbolTableEntry* Entry;
|
||||
|
@ -67,9 +137,7 @@ struct ASTNode* AccessArray() {
|
|||
DieMessage("Accessing undeclared array", CurrentIdentifier);
|
||||
|
||||
LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0);
|
||||
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
|
||||
Tokenise(&CurrentToken);
|
||||
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
|
||||
Tokenise();
|
||||
|
||||
RightNode = ParsePrecedenceASTNode(0);
|
||||
|
||||
|
|
359
src/Statements.c
359
src/Statements.c
|
@ -8,9 +8,27 @@
|
|||
#include <Data.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
|
||||
/*
|
||||
* Handles reading in a comma-separated list of declarations.
|
||||
* Erythro treats structs, enums and function parameters the same in this regard -
|
||||
* comma separated.
|
||||
*
|
||||
* C and C++ tend to treat enums and structs differently - the former separated by commas,
|
||||
* the latter separated by semicolons.
|
||||
*
|
||||
* Note that since functions are read in through parentheses, and structs/enums are read in
|
||||
* through brackets, the end character is configurable.
|
||||
*
|
||||
* @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable.
|
||||
* @param Storage: The Storage Scope of this declaration list.
|
||||
* @param End: The end token, in terms of TokenTypes enum values.
|
||||
* @return the amount of declarations read in.
|
||||
*
|
||||
*/
|
||||
|
||||
static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) {
|
||||
int TokenType, ParamCount = 0;
|
||||
struct SymbolTableEntry* PrototypePointer = NULL;
|
||||
struct SymbolTableEntry* PrototypePointer = NULL, *Composite;
|
||||
|
||||
if(FunctionSymbol != NULL)
|
||||
PrototypePointer = FunctionSymbol->Start;
|
||||
|
@ -24,19 +42,15 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
|
|||
DieDecimal("Function paramater of invalid type at index", ParamCount + 1);
|
||||
PrototypePointer=PrototypePointer->NextSymbol;
|
||||
} else {
|
||||
BeginVariableDeclaration(TokenType, SC_PARAM);
|
||||
BeginVariableDeclaration(TokenType, Composite, Storage);
|
||||
}
|
||||
ParamCount++;
|
||||
|
||||
switch(CurrentToken.type) {
|
||||
case LI_COM:
|
||||
Tokenise(&CurrentToken);
|
||||
break;
|
||||
case LI_RPARE:
|
||||
break;
|
||||
default:
|
||||
if((CurrentToken.type != LI_COM) && (CurrentToken.type != End))
|
||||
DieDecimal("Unexpected token in parameter", CurrentToken.type);
|
||||
}
|
||||
|
||||
if(CurrentToken.type == LI_COM)
|
||||
Tokenise();
|
||||
}
|
||||
|
||||
if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length))
|
||||
|
@ -45,6 +59,61 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
|
|||
return ParamCount;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the declaration of a new struct.
|
||||
* struct thisStct { int x, int y, int z };
|
||||
*
|
||||
* Verifies that the current identifier is not used,
|
||||
* verifies that this is not a redefinition (excluding
|
||||
* the case where there is a declaration but no definition)
|
||||
* and then saves it into the Structs symbol table.
|
||||
*
|
||||
* @return the Symbol Table entry of this new struct.
|
||||
*/
|
||||
|
||||
struct SymbolTableEntry* BeginStructDeclaration() {
|
||||
struct SymbolTableEntry* Composite = NULL, *Member;
|
||||
int Offset;
|
||||
|
||||
Tokenise();
|
||||
|
||||
if(CurrentToken.type == TY_IDENTIFIER) {
|
||||
Composite = FindStruct(CurrentIdentifier);
|
||||
Tokenise();
|
||||
}
|
||||
|
||||
if(CurrentToken.type != LI_LBRAC) {
|
||||
if(Composite == NULL)
|
||||
DieMessage("Unknown Struct", CurrentIdentifier);
|
||||
return Composite;
|
||||
}
|
||||
|
||||
if(Composite)
|
||||
DieMessage("Redefinition of struct", CurrentIdentifier);
|
||||
|
||||
Composite = AddSymbol(CurrentIdentifier, DAT_STRUCT, 0, SC_STRUCT, 0, 0, NULL);
|
||||
Tokenise();
|
||||
|
||||
ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAS);
|
||||
VerifyToken(LI_RBRAS, "]");
|
||||
|
||||
Composite->Start = StructMembers;
|
||||
StructMembers = StructMembersEnd = NULL;
|
||||
|
||||
Member = Composite->Start;
|
||||
Member->SinkOffset = 0;
|
||||
Offset = TypeSize(Member->Type, Member->CompositeType);
|
||||
|
||||
for(Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) {
|
||||
Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1);
|
||||
|
||||
Offset += TypeSize(Member->Type, Member->CompositeType);
|
||||
}
|
||||
|
||||
Composite->Length = Offset;
|
||||
return Composite;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the declaration of a type of a variable.
|
||||
* int newVar;
|
||||
|
@ -52,11 +121,12 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
|
|||
* It verifies that we have a type keyword followed by a
|
||||
* unique, non-keyword identifier.
|
||||
*
|
||||
* It then stores this variable into the symbol table,
|
||||
* It then stores this variable into the appropriate symbol table,
|
||||
* and returns the new item.
|
||||
*
|
||||
* @return the Symbol Table entry of this new variable.
|
||||
*/
|
||||
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) {
|
||||
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) {
|
||||
struct SymbolTableEntry* Symbol = NULL;
|
||||
|
||||
switch(Scope) {
|
||||
|
@ -66,33 +136,50 @@ struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) {
|
|||
case SC_LOCAL:
|
||||
case SC_PARAM:
|
||||
if(FindLocal(CurrentIdentifier) != NULL)
|
||||
DieMessage("Invalid redelcaration of local variable", CurrentIdentifier);
|
||||
DieMessage("Invalid redeclaration of local variable", CurrentIdentifier);
|
||||
case SC_MEMBER:
|
||||
if(FindMember(CurrentIdentifier) != NULL)
|
||||
DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier);
|
||||
}
|
||||
|
||||
if(CurrentToken.type == LI_LBRAS) {
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
|
||||
if(CurrentToken.type == LI_INT) {
|
||||
switch(Scope) {
|
||||
case SC_GLOBAL:
|
||||
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0);
|
||||
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL);
|
||||
break;
|
||||
case SC_LOCAL:
|
||||
case SC_PARAM:
|
||||
case SC_MEMBER:
|
||||
Die("Local arrays are unimplemented");
|
||||
}
|
||||
}
|
||||
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
VerifyToken(LI_RBRAS, "]");
|
||||
} else {
|
||||
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0);
|
||||
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite);
|
||||
}
|
||||
|
||||
return Symbol;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the declaration of a new function.
|
||||
* Verifies that the identifier is not taken (excluding the case
|
||||
* where there is a declaration but no definition)
|
||||
* Parses the list of parameters if present
|
||||
* Saves the function prototype if there is no body
|
||||
* Generates and saves the break-out point label
|
||||
*
|
||||
* @param Type: The return type of the function
|
||||
* @return the AST for this function
|
||||
*
|
||||
*/
|
||||
|
||||
struct ASTNode* ParseFunction(int Type) {
|
||||
struct ASTNode* Tree;
|
||||
struct ASTNode* FinalStatement;
|
||||
|
@ -104,7 +191,7 @@ struct ASTNode* ParseFunction(int Type) {
|
|||
OldFunction = NULL;
|
||||
if(OldFunction == NULL) {
|
||||
BreakLabel = NewLabel();
|
||||
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0);
|
||||
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
|
||||
}
|
||||
|
||||
VerifyToken(LI_LPARE, "(");
|
||||
|
@ -120,7 +207,7 @@ struct ASTNode* ParseFunction(int Type) {
|
|||
Params = ParamsEnd = NULL;
|
||||
|
||||
if(CurrentToken.type == LI_SEMIC) {
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -149,7 +236,6 @@ struct ASTNode* ParseFunction(int Type) {
|
|||
* //TODO: No brackets
|
||||
* //TODO: Type inference
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
struct ASTNode* ReturnStatement() {
|
||||
|
@ -166,19 +252,10 @@ struct ASTNode* ReturnStatement() {
|
|||
|
||||
Tree = ParsePrecedenceASTNode(0);
|
||||
|
||||
/*
|
||||
ReturnType = Tree->ExprType;
|
||||
FunctionType = Symbols[CurrentFunction].Type;
|
||||
*/
|
||||
|
||||
Tree = MutateType(Tree, FunctionEntry->Type, 0);
|
||||
if(Tree == NULL)
|
||||
Die("Returning a value of incorrect type for function");
|
||||
|
||||
/*
|
||||
if(ReturnType)
|
||||
Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0);
|
||||
*/
|
||||
|
||||
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0);
|
||||
|
||||
|
@ -189,59 +266,33 @@ struct ASTNode* ReturnStatement() {
|
|||
return Tree;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Handles Identifiers.
|
||||
* Handles the surrounding logic for If statements.
|
||||
*
|
||||
* This is called for any of:
|
||||
* - Calling a function
|
||||
* - Assigning an lvalue variable
|
||||
* - Performing arithmetic on a variable
|
||||
* - Performing arithmetic with the return values of function calls
|
||||
* If statements have the basic form:
|
||||
* * if (condition) body
|
||||
* * if (condition)
|
||||
* body
|
||||
* * if (condition) {
|
||||
* body
|
||||
* }
|
||||
*
|
||||
* For the case where you're assigning an l-value;
|
||||
* You can assign with another assignment,
|
||||
* a statement, a function or a literal.
|
||||
* Conditions may be any truthy statement (such as a pointer,
|
||||
* object, integer), as conditions not recognized are auto-
|
||||
* matically converted to booleans.
|
||||
*
|
||||
* This meaning, any object that can be resolved to 0 or NULL
|
||||
* can be placed as the condition and used as a check.
|
||||
*
|
||||
* For example:
|
||||
* struct ASTNode* Node = NULL;
|
||||
* if(Node) {
|
||||
* // This will not run, as Node is ((void*)0)
|
||||
* }
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
struct ASTNode* ParseIdentifier() {
|
||||
struct ASTNode* Left, *Right, *Tree;
|
||||
int LeftType, RightType;
|
||||
int ID;
|
||||
|
||||
VerifyToken(TY_IDENTIFIER, "ident");
|
||||
|
||||
printf("\t\tAfter parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier));
|
||||
|
||||
if(CurrentToken.type == LI_LPARE)
|
||||
return CallFunction();
|
||||
|
||||
if((ID = FindSymbol(CurrentIdentifier)) == -1) {
|
||||
printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name);
|
||||
DieMessage("Undeclared Variable ", CurrentIdentifier);
|
||||
}
|
||||
Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID);
|
||||
|
||||
VerifyToken(LI_EQUAL, "=");
|
||||
|
||||
Left = ParsePrecedenceASTNode(0);
|
||||
|
||||
LeftType = Left->ExprType;
|
||||
RightType = Right->ExprType;
|
||||
|
||||
Left = MutateType(Left, RightType, 0);
|
||||
if(!Left)
|
||||
Die("Incompatible types in assignment");
|
||||
|
||||
if(LeftType)
|
||||
Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0);
|
||||
|
||||
Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0);
|
||||
|
||||
return Tree;
|
||||
}*/
|
||||
|
||||
struct ASTNode* IfStatement() {
|
||||
struct ASTNode* Condition, *True, *False = NULL;
|
||||
|
||||
|
@ -261,13 +312,39 @@ struct ASTNode* IfStatement() {
|
|||
True = ParseCompound();
|
||||
|
||||
if(CurrentToken.type == KW_ELSE) {
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
False = ParseCompound();
|
||||
}
|
||||
|
||||
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the surrounding logic for While loops.
|
||||
*
|
||||
* While loops have the basic form:
|
||||
* while ( condition ) { body }
|
||||
*
|
||||
* When reaching the condition (which alike an If statement,
|
||||
* can be any truthy value), if it resolves to true:
|
||||
* The body is executed, and immediately the condition is checked
|
||||
* again.
|
||||
* This repeats until the condition resolves false, at which point
|
||||
* the loop executes no more.
|
||||
*
|
||||
* This can be prototyped as the following pseudo-assembler:
|
||||
*
|
||||
* cond:
|
||||
* check <condition>
|
||||
* jne exit
|
||||
* <body>
|
||||
* jump cond
|
||||
* exit:
|
||||
* <more code>
|
||||
*
|
||||
* @return the AST of this statement
|
||||
*
|
||||
*/
|
||||
struct ASTNode* WhileStatement() {
|
||||
struct ASTNode* Condition, *Body;
|
||||
|
||||
|
@ -287,12 +364,36 @@ struct ASTNode* WhileStatement() {
|
|||
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the surrounding logic for For loops.
|
||||
*
|
||||
* They have the basic form of:
|
||||
* for ( init ; condition; iterator) { body }
|
||||
*
|
||||
* The initialiser is run only once upon reaching the for loop.
|
||||
* Then the condition is checked, and if true, the body is executed.
|
||||
* After execution of the body, the iterator is run and the condition
|
||||
* checked again.
|
||||
*
|
||||
* It can be prototyped as the following pseudo-assembler code:
|
||||
*
|
||||
* for:
|
||||
* <init>
|
||||
* cond:
|
||||
* check <condition>
|
||||
* jne exit
|
||||
* <body>
|
||||
* <iterator>
|
||||
* jump cond
|
||||
* exit:
|
||||
* <loop exit>
|
||||
*
|
||||
* In the case of the implementation, "init" is the preoperator,
|
||||
* "iterator" is the postoperator.
|
||||
*
|
||||
* @return the AST of this statement
|
||||
*/
|
||||
struct ASTNode* ForStatement() {
|
||||
|
||||
// for (preop; condition; postop) {
|
||||
// body
|
||||
//}
|
||||
|
||||
struct ASTNode* Condition, *Body;
|
||||
struct ASTNode* Preop, *Postop;
|
||||
|
||||
|
@ -326,6 +427,18 @@ struct ASTNode* ForStatement() {
|
|||
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Handles the surrounding logic for the Print statement.
|
||||
*
|
||||
* This is a legacy hold-over from the early testing, and it
|
||||
* serves merely as a wrapper around the cstdlib printf function.
|
||||
*
|
||||
* It does, however (//TODO), attempt to guess the type that you
|
||||
* want to print, which takes a lot of the guesswork out of printing.
|
||||
*
|
||||
* @return the AST of this statement
|
||||
*/
|
||||
struct ASTNode* PrintStatement(void) {
|
||||
struct ASTNode* Tree;
|
||||
int LeftType, RightType;
|
||||
|
@ -342,7 +455,7 @@ struct ASTNode* PrintStatement(void) {
|
|||
DieDecimal("Attempting to print an invalid type:", RightType);
|
||||
|
||||
if(RightType)
|
||||
Tree = ConstructASTBranch(RightType, RET_INT, Tree, NULL, 0);
|
||||
Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0);
|
||||
|
||||
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0);
|
||||
|
||||
|
@ -352,16 +465,33 @@ struct ASTNode* PrintStatement(void) {
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the surrounding logic for all of the logical and semantic
|
||||
* postfixes.
|
||||
*
|
||||
* Postfixes are tokens that are affixed to the end of another, and
|
||||
* change behaviour in some way. These can be added calculations,
|
||||
* some form of transformation, or other.
|
||||
*
|
||||
* A current list of postfixes:
|
||||
* * (): Call a function
|
||||
* * []: Index or define an array.
|
||||
* * ++: Increment a variable AFTER it is returned
|
||||
* NOTE: there is a prefix variant of this for incrementing BEFOREhand.
|
||||
* * --: Decrement a variable AFTER it is returned
|
||||
* NOTE: there is a prefix variant of this for decrementing BEFOREhand.
|
||||
*
|
||||
* Planned postfixes:
|
||||
* * >>: Arithmetic-Shift-Right a variable by one (Divide by two)
|
||||
* NOTE: there is a prefix variant of this for shifting left - multiplying by two.
|
||||
*
|
||||
* @return the AST of the statement plus its' postfix
|
||||
*/
|
||||
struct ASTNode* PostfixStatement() {
|
||||
struct ASTNode* Tree;
|
||||
struct SymbolTableEntry* Entry;
|
||||
|
||||
Tokenise(&CurrentToken);
|
||||
|
||||
// If we get here, we're one of three things:
|
||||
// - Function
|
||||
// - Array
|
||||
// - Variable
|
||||
Tokenise();
|
||||
|
||||
if(CurrentToken.type == LI_LPARE)
|
||||
return CallFunction();
|
||||
|
@ -370,8 +500,8 @@ struct ASTNode* PostfixStatement() {
|
|||
return AccessArray();
|
||||
|
||||
// If we get here, we must be a variable.
|
||||
// There's no guarantees that the variable is in
|
||||
// the symbol table, though.
|
||||
// (as functions have been called and arrays have been indexed)
|
||||
// Check that the variable is recognized..
|
||||
|
||||
if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR)
|
||||
DieMessage("Unknown Variable", CurrentIdentifier);
|
||||
|
@ -380,11 +510,11 @@ struct ASTNode* PostfixStatement() {
|
|||
|
||||
switch(CurrentToken.type) {
|
||||
case PPMM_PLUS:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0);
|
||||
break;
|
||||
case PPMM_MINUS:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0);
|
||||
break;
|
||||
default:
|
||||
|
@ -395,33 +525,58 @@ struct ASTNode* PostfixStatement() {
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* Handles the surrounding logic for all of the logical and semantic
|
||||
* prefixes.
|
||||
*
|
||||
* Prefixes are tokens that are affixed to the start of another, and
|
||||
* change behaviour in some way. These can be added calculations,
|
||||
* some form of transformation, or other.
|
||||
*
|
||||
* A current list of prefixes:
|
||||
* * !: Invert the boolean result of a statement or truthy value.
|
||||
* * ~: Invert the individual bits in a number
|
||||
* * -: Invert the number around the axis of 0 (negative->positive, positive->negative)
|
||||
* * ++: Increment a variable BEFORE it is returned.
|
||||
* NOTE: there is a postfix variant of this for incrementing AFTER the fact.
|
||||
* * --: Decrement a variable BEFORE it is returned.
|
||||
* NOTE: there is a postfix variant of this for decrementing AFTER the fact.
|
||||
* * &: Dereference the following object (Get the address that contains it)
|
||||
* * *: Get the object pointed at by the number following
|
||||
*
|
||||
* Planned prefixes:
|
||||
* * <<: Arithmetic-Shift-Left a variable by one (Multiply by two)
|
||||
* NOTE: there is a postfix variant of this for shifting right - dividing by two.
|
||||
*
|
||||
* @return the AST of this statement, plus its' prefixes and any postfixes.
|
||||
*/
|
||||
struct ASTNode* PrefixStatement() {
|
||||
struct ASTNode* Tree;
|
||||
|
||||
switch (CurrentToken.type) {
|
||||
case BOOL_INVERT:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = PrefixStatement();
|
||||
Tree->RVal = 1;
|
||||
Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0);
|
||||
break;
|
||||
|
||||
case BIT_NOT:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = PrefixStatement();
|
||||
Tree->RVal = 1;
|
||||
Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0);
|
||||
break;
|
||||
|
||||
case AR_MINUS:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = PrefixStatement();
|
||||
|
||||
Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0);
|
||||
break;
|
||||
|
||||
case PPMM_PLUS:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = PrefixStatement();
|
||||
|
||||
if(Tree->Operation != REF_IDENT)
|
||||
|
@ -430,7 +585,7 @@ struct ASTNode* PrefixStatement() {
|
|||
break;
|
||||
|
||||
case PPMM_MINUS:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
Tree = PrefixStatement();
|
||||
|
||||
if(Tree->Operation != REF_IDENT)
|
||||
|
@ -440,7 +595,7 @@ struct ASTNode* PrefixStatement() {
|
|||
break;
|
||||
|
||||
case BIT_AND:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
|
||||
// To allow things like:
|
||||
// x = &&y;
|
||||
|
@ -454,7 +609,7 @@ struct ASTNode* PrefixStatement() {
|
|||
Tree->ExprType = PointerTo(Tree->ExprType);
|
||||
break;
|
||||
case AR_STAR:
|
||||
Tokenise(&CurrentToken);
|
||||
Tokenise();
|
||||
|
||||
Tree = PrefixStatement();
|
||||
|
||||
|
|
|
@ -78,6 +78,28 @@ struct SymbolTableEntry* FindGlobal(char* Symbol) {
|
|||
return SearchList(Symbol, Globals);
|
||||
}
|
||||
|
||||
/*
|
||||
* An override for FindSymbol.
|
||||
* Searches only the defined Structs.
|
||||
* @param Symbol: The string name of the symbol to search for.
|
||||
* @return a pointer to the node if found, else NULL
|
||||
*
|
||||
*/
|
||||
struct SymbolTableEntry* FindStruct(char* Symbol) {
|
||||
return SearchList(Symbol, Structs);
|
||||
}
|
||||
|
||||
/*
|
||||
* An override for FindSymbol.
|
||||
* Searches only the defined Struct & Enum Members.
|
||||
* @param Symbol: The string name of the symbol to search for.
|
||||
* @return a pointer to the node if found, else NULL
|
||||
*
|
||||
*/
|
||||
struct SymbolTableEntry* FindMember(char* Symbol) {
|
||||
return SearchList(Symbol, StructMembers);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a particular linked list,
|
||||
* Take Node and append it to the Tail.
|
||||
|
@ -112,6 +134,7 @@ void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail
|
|||
void FreeLocals() {
|
||||
Locals = LocalsEnd = NULL;
|
||||
Params = ParamsEnd = NULL;
|
||||
FunctionEntry = NULL;
|
||||
|
||||
}
|
||||
|
||||
|
@ -122,6 +145,8 @@ void ClearTables() {
|
|||
Globals = GlobalsEnd = NULL;
|
||||
Locals = LocalsEnd = NULL;
|
||||
Params = ParamsEnd = NULL;
|
||||
StructMembers = StructMembersEnd = NULL;
|
||||
Structs = StructsEnd = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -136,34 +161,7 @@ void ClearTables() {
|
|||
*
|
||||
* @return The SymbolTableEntry* pointer that corresponds to this newly constructed node.
|
||||
*/
|
||||
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset) {
|
||||
|
||||
/* int TableSlot;
|
||||
int SinkOffset = 0;
|
||||
|
||||
if((TableSlot = FindSymbolImpl(Name, Storage)) != -1)
|
||||
return -1;
|
||||
|
||||
// Instaed of spliting this up into AddLocalSymbol and AddGlobalSymbol,
|
||||
// we can use this switch to avoid duplicated code.
|
||||
switch(Storage) {
|
||||
case SC_PARAM:
|
||||
// Instead of special casing parameters, we can just add these to the symbol lists and be done with it.
|
||||
printf("\tPreparing new parameter %s of type %s\r\n", Name, TypeNames[Type]);
|
||||
TableSlot = AddSymbol(Name, Type, Structure, SC_GLOBAL, 88, 1);
|
||||
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the global process
|
||||
TableSlot = AddSymbol(Name, Type, Structure, SC_LOCAL, 88, 1);
|
||||
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the local process
|
||||
return TableSlot;
|
||||
case SC_GLOBAL:
|
||||
TableSlot = NewGlobalSymbol();
|
||||
break;
|
||||
case SC_LOCAL:
|
||||
printf("\tCreating new local symbol %s\r\n", Name);
|
||||
TableSlot = NewLocalSymbol();
|
||||
SinkOffset = AsCalcOffset(Type);
|
||||
break;
|
||||
} */
|
||||
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType) {
|
||||
|
||||
struct SymbolTableEntry* Node =
|
||||
(struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry));
|
||||
|
@ -174,33 +172,28 @@ struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Stor
|
|||
Node->Storage = Storage;
|
||||
Node->Length = Length;
|
||||
Node->SinkOffset = SinkOffset;
|
||||
Node->CompositeType = CompositeType;
|
||||
|
||||
switch(Storage) {
|
||||
case SC_GLOBAL:
|
||||
AppendSymbol(&Globals, &GlobalsEnd, Node);
|
||||
// We don't want to generate a static block for functions.
|
||||
if(Structure != ST_FUNC) AsGlobalSymbol(Node);
|
||||
break;
|
||||
case SC_STRUCT:
|
||||
AppendSymbol(&Structs, &StructsEnd, Node);
|
||||
break;
|
||||
case SC_MEMBER:
|
||||
AppendSymbol(&StructMembers, &StructMembersEnd, Node);
|
||||
case SC_LOCAL:
|
||||
AppendSymbol(&Locals, &LocalsEnd, Node);
|
||||
break;
|
||||
case SC_PARAM:
|
||||
AppendSymbol(&Params, &ParamsEnd, Node);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
/* // NOTE: Generating global symbol names must happen AFTER the name and type are declared.
|
||||
switch(Storage) {
|
||||
case SC_GLOBAL:
|
||||
printf("\tCreating new global symbol %s into slot %d\r\n", Name, TableSlot);
|
||||
if(Structure != ST_FUNC && EndLabel != 88) { // Magic keyword so that we don't generate ASM globals for parameters
|
||||
printf("\t\tGenerating data symbol.\r\n");
|
||||
AsGlobalSymbol(TableSlot);
|
||||
}
|
||||
break;
|
||||
case SC_LOCAL:
|
||||
break;
|
||||
} */
|
||||
|
||||
//printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot);
|
||||
return Node;
|
||||
}
|
Loading…
Reference in New Issue
Block a user