Compare commits

..

No commits in common. "e19a9459344fd92cd9ca49190ec39434380185fa" and "eb118db872c7c08b7d7063586aa9703ef6a2f5a0" have entirely different histories.

11 changed files with 372 additions and 952 deletions

View File

@ -18,11 +18,6 @@
extern_ struct SymbolTableEntry* Globals, *GlobalsEnd;
extern_ struct SymbolTableEntry* Locals, *LocalsEnd;
extern_ struct SymbolTableEntry* Params, *ParamsEnd;
extern_ struct SymbolTableEntry* Structs, *StructsEnd;
extern_ struct SymbolTableEntry* StructMembers, *StructMembersEnd;
extern_ struct SymbolTableEntry* Unions, *UnionsEnd;
extern_ struct SymbolTableEntry* Enums, *EnumsEnd;
extern_ bool OptDumpTree;
extern_ bool OptKeepAssembly;

View File

@ -92,8 +92,7 @@ enum TokenTypes {
KW_ELSE,
KW_WHILE,
KW_FOR,
KW_RETURN,
KW_STRUCT
KW_RETURN
};
/*
@ -179,6 +178,7 @@ struct ASTNode {
union {
int Size; // OP_SCALE's linear representation
int IntValue; // TERM_INTLIT's Value
int ID; // LV_IDENT's Symbols[] index.
};
};
@ -215,9 +215,6 @@ struct SymbolTableEntry {
enum StorageScope {
SC_GLOBAL = 1, // Global Scope
SC_STRUCT, // Struct Definitions
SC_ENUM, // Enum Definitions
SC_MEMBER, // The members of Structs or Enums
//SC_CLASS, // Class-local definitions
//SC_STATIC, // Static storage definitions
SC_PARAM, // Function parameters
@ -277,7 +274,7 @@ void DisplayUsage(char* ProgName);
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
void Tokenise();
int Tokenise(struct Token* Token);
void VerifyToken(int Type, char* TokenExpected);
void RejectToken(struct Token* Token);
@ -358,15 +355,14 @@ struct ASTNode* PrintStatement(void);
struct SymbolTableEntry* FindSymbol(char* Symbol);
struct SymbolTableEntry* FindLocal(char* Symbol);
struct SymbolTableEntry* FindGlobal(char* Symbol);
struct SymbolTableEntry* FindStruct(char* Symbol);
struct SymbolTableEntry* FindMember(char* Symbol);
void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node);
void FreeLocals();
void ClearTables();
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType);
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O N T R O L S T A T U S * * * *
@ -464,7 +460,7 @@ void AsFunctionEpilogue(struct SymbolTableEntry* Entry);
* * * * D E C L A R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * */
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope);
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope);
struct ASTNode* ParseIdentifier(void);
struct ASTNode* IfStatement();

View File

@ -9,17 +9,16 @@
/*
* Stores how many hardware registers are being used at any one time.
* It is empirically proven that only 4 clobber registers are
* needed for any arbitrary length program.
*
* If UsedRegisters[i] =? 1, then Registers[i] contains useful data.
* If UsedRegisters[i] =? 0, then Registers[i] is unused.
* If the entry in UsedRegisters
* that correlates to the position of a register in Registers
* is 1,
* then that register is classed as used -
* it has useful data inside it.
*
* if the entry is 0, then it is free.
*/
static int UsedRegisters[4];
/* The https://en.wikipedia.org/wiki/X86_calling_conventions#Microsoft_x64_calling_convention
* calling convention on Windows requires that
* the last 4 arguments are placed in registers
@ -27,43 +26,25 @@ static int UsedRegisters[4];
* This order must be preserved, and they must be placed
* right to left.
*
* The 4 clobber registers are first, and the 4 parameter registers are last.
*/
static char* Registers[8] = { "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" };
static char* DoubleRegisters[8] = { "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" };
static char* ByteRegisters[8] = { "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" };
/*
* For ease of reading later code, we store the valid x86 comparison instructions,
* and the inverse jump instructions together, in a synchronized fashion.
* That is the reason for the weird arrangement here.
* The parameter registers are last, in reverse order.
*/
static char* Registers[10] = { "%rsi", "%rdi", "%r10", "%r11" , "%r12" , "%r13", "%r9" , "%r8", "%rdx", "%rcx" };
static char* DoubleRegisters[10] = { "%esi", "%edi", "%r10d", "%r11d", "%r12d", "%r13d", "%r9d", "%r8d", "%edx", "%ecx" };
static char* ByteRegisters[10] = { "%sil", "%dil", "%r10b", "%r11b", "%r12b", "%r13b", "%r9b", "%r8b", "%dl" , "%cl" };
static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" };
static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"};
// How far above the base pointer is the last local?
static int LocalVarOffset;
// How far must we lower the base pointer to retrieve the parameters?
static int StackFrameOffset;
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * R O O T O F A S S E M B L E R * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
// Just a short "hack" to make sure we only dump the tree the first time this function is called
static int Started = 0;
/*
* Walk the AST tree given, and generate the assembly code that represents
* it.
*
* @param Node: The current Node to compile. If needed, its children will be parsed recursively.
* @param Register: The index of Registers to store the result of the current compilation.
* @param ParentOp: The Operation of the parent of the current Node.
*
* @return dependant on the Node. Typically the Register that stores the result of the Node's operation.
*
*/
int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
int LeftVal, RightVal;
if(!Started && OptDumpTree)
@ -102,6 +83,14 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
if(Node->Right)
RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation);
/* if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
*/
switch(Node->Operation) {
case OP_ADD:
return AsAdd(LeftVal, RightVal);
@ -152,13 +141,31 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
case OP_WIDEN:
printf("\tWidening types..\r\n");
return LeftVal;
return LeftVal; //AsWiden(LeftVal, Node->Left->ExprType, Node->ExprType);
case OP_RET:
printf("\tReturning from %s\n", Node->Symbol->Name);
AsReturn(FunctionEntry, LeftVal);
return -1;
/* case OP_EQUAL:
return AsEqual(LeftVal, RightVal);
case OP_INEQ:
return AsIneq(LeftVal, RightVal);
case OP_LESS:
return AsLess(LeftVal, RightVal);
case OP_GREAT:
return AsGreat(LeftVal, RightVal);
case OP_LESSE:
return AsLessE(LeftVal, RightVal);
case OP_GREATE:
return AsGreatE(LeftVal, RightVal); */
case OP_EQUAL:
case OP_INEQ:
case OP_LESS:
@ -172,6 +179,7 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
case REF_IDENT:
//printf("\tReferencing variable %s %s with type %s and storage %d\r\n", Symbols[Node->Value.ID].Name, Node->RVal ? " rval " : "", ParentOp, Symbols[Node->Value.ID].Storage);
if(Node->RVal || ParentOp == OP_DEREF) {
if(Node->Symbol->Storage == SC_LOCAL || Node->Symbol->Storage == SC_PARAM)
return AsLdLocalVar(Node->Symbol, Node->Operation);
@ -191,6 +199,11 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
DeallocateAllRegisters();
return -1;
/* case OP_LOOP:
// We only do while for now..
return AsWhile(Node);
break; */
case OP_BITAND:
return AsBitwiseAND(LeftVal, RightVal);
@ -239,31 +252,24 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
* * * * R E G I S T E R M A N A G E M E N T * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// Set all Registers to unused.
void DeallocateAllRegisters() {
UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0;
}
/*
* Search for an unused register, allocate it, and return it.
* If none available, cancel compilation.
*/
int RetrieveRegister() {
//printf("Current state of registers: %x, %x, %x, %x\n", UsedRegisters[0], UsedRegisters[1], UsedRegisters[2], UsedRegisters[3]);
for (size_t i = 0; i < 4; i++) {
if(UsedRegisters[i] == 0) {
UsedRegisters[i] = 1;
return i;
}
}
fprintf(stderr, "Out of registers!\n");
exit(1);
}
/*
* Set the given register to unused.
* If the register is not used, it is an invalid state.
* @param Register: The Registers index to deallocate.
*/
void DeallocateRegister(int Register) {
if(UsedRegisters[Register] != 1) {
fprintf(stderr, "Error trying to free register %d\n", Register);
@ -277,25 +283,10 @@ void DeallocateRegister(int Register) {
* * * * * * S T A C K M A N A G E M E N T * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Prepare a new stack frame pointer.
* This resets the highest local.
*
*/
void AsNewStackFrame() {
LocalVarOffset = 0;
}
/*
* Given the type of input, how far do we need to go down the stack frame
* to store or retrieve this type?
*
* The stack must be 4-bytes aligned, so we set a hard minimum.
*
* @param Type: The DataTypes we want to store.
* @return the offset to store the type, taking into account the current state of the stack frame.
*
*/
int AsCalcOffset(int Type) {
LocalVarOffset += PrimitiveSize(Type) > 4 ? PrimitiveSize(Type) : 4;
return -LocalVarOffset;
@ -305,19 +296,12 @@ int AsCalcOffset(int Type) {
* * * * C O D E G E N E R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* A way to keep track of the largest label number.
* Call this function to increase the number SRG-like.
*
* @return the highest available label number
*
*/
int NewLabel(void) {
static int id = 1;
return id++;
}
// Assemble an If statement
int AsIf(struct ASTNode* Node) {
int FalseLabel, EndLabel;
@ -349,7 +333,6 @@ int AsIf(struct ASTNode* Node) {
return -1;
}
// Assemble a comparison
int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight);
@ -363,7 +346,6 @@ int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
return RegisterRight;
}
// Assemble an inverse comparison (a one-line jump)
int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) {
if(Operation < OP_EQUAL || Operation > OP_GREATE)
Die("Bad Operation in AsCompareJmp");
@ -377,24 +359,16 @@ int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label)
return -1;
}
// Assemble an immediate jump
void AsJmp(int Label) {
printf("\t\tJumping to label %d\n", Label);
fprintf(OutputFile, "\tjmp\tL%d\n", Label);
}
/* Create a new base label
* @param Label: The number to create the label of
*/
void AsLabel(int Label) {
printf("\tCreating label %d\n", Label);
fprintf(OutputFile, "\nL%d:\n", Label);
}
/*
* Assemble a new global string into the data segment.
* @param Value: The name of the string, as a string
*/
int AsNewString(char* Value) {
int Label = NewLabel();
char* CharPtr;
@ -408,17 +382,12 @@ int AsNewString(char* Value) {
return Label;
}
/*
* Load a string into a Register.
* @param ID: the Label number of the string
*/
int AsLoadString(int ID) {
int Register = RetrieveRegister();
fprintf(OutputFile, "\tleaq\tL%d(\%%rip), %s\r\n", ID, Registers[Register]);
return Register;
}
// Assemble a While loop
int AsWhile(struct ASTNode* Node) {
int BodyLabel, BreakLabel;
@ -449,7 +418,6 @@ int AsWhile(struct ASTNode* Node) {
}
// Load a value into a register.
int AsLoad(int Value) {
int Register = RetrieveRegister();
@ -460,7 +428,6 @@ int AsLoad(int Value) {
return Register;
}
// Assemble an addition.
int AsAdd(int Left, int Right) {
printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]);
@ -470,7 +437,6 @@ int AsAdd(int Left, int Right) {
return Right;
}
// Assemble a multiplication.
int AsMul(int Left, int Right) {
printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]);
@ -480,7 +446,6 @@ int AsMul(int Left, int Right) {
return Right;
}
// Assemble a subtraction.
int AsSub(int Left, int Right) {
printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]);
@ -490,7 +455,6 @@ int AsSub(int Left, int Right) {
return Left;
}
// Assemble a division.
int AsDiv(int Left, int Right) {
printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]);
@ -503,18 +467,12 @@ int AsDiv(int Left, int Right) {
return Left;
}
// Assemble an ASL
int AsShl(int Register, int Val) {
printf("\tShifting %s to the left by %d bits.\n", Registers[Register], Val);
fprintf(OutputFile, "\tsalq\t$%d, %s\n", Val, Registers[Register]);
return Register;
}
/*
* Load a global variable into a register, with optional pre/post-inc/dec
* @param Entry: The variable to load.
* @param Operation: An optional SyntaxOps element
*/
int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) {
int Reg = RetrieveRegister();
@ -585,11 +543,6 @@ int AsLdGlobalVar(struct SymbolTableEntry* Entry, int Operation) {
return Reg;
}
/*
* Store a value from a register into a global variable.
* @param Entry: The variable to store into.
* @param Regsiter: The Registers index containing the value to store.
*/
int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) {
printf("\tStoring contents of %s into %s, type %d, globally:\n", Registers[Register], Entry->Name, Entry->Type);
@ -615,12 +568,6 @@ int AsStrGlobalVar(struct SymbolTableEntry* Entry, int Register) {
return Register;
}
/*
* Load a value from a local variable into a register, with optional post/pre-inc/dec
* @param Entry: The local variable to read
* @param Operation: An optional SyntaxOps entry
*/
int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) {
int Reg = RetrieveRegister();
@ -691,12 +638,6 @@ int AsLdLocalVar(struct SymbolTableEntry* Entry, int Operation) {
return Reg;
}
/*
* Store a value from a register into a local variable.
* @param Entry: The local variable to write to.
* @param Register: The Registers index containing the desired value
*
*/
int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) {
printf("\tStoring contents of %s into %s, type %d, locally\n", Registers[Register], Entry->Name, Entry->Type);
@ -722,7 +663,6 @@ int AsStrLocalVar(struct SymbolTableEntry* Entry, int Register) {
return Register;
}
// Assemble a pointerisation
int AsAddr(struct SymbolTableEntry* Entry) {
int Register = RetrieveRegister();
printf("\tSaving pointer of %s into %s\n", Entry->Name, Registers[Register]);
@ -731,7 +671,6 @@ int AsAddr(struct SymbolTableEntry* Entry) {
return Register;
}
// Assemble a dereference
int AsDeref(int Reg, int Type) {
int DestSize = PrimitiveSize(ValueAt(Type));
@ -754,7 +693,6 @@ int AsDeref(int Reg, int Type) {
return Reg;
}
// Assemble a store-through-dereference
int AsStrDeref(int Register1, int Register2, int Type) {
printf("\tStoring contents of %s into %s through a dereference, type %d\n", Registers[Register1], Registers[Register2], Type);
@ -773,7 +711,6 @@ int AsStrDeref(int Register1, int Register2, int Type) {
return Register1;
}
// Assemble a global symbol (variable, struct, enum, function, string)
void AsGlobalSymbol(struct SymbolTableEntry* Entry) {
int TypeSize;
@ -795,7 +732,6 @@ void AsGlobalSymbol(struct SymbolTableEntry* Entry) {
}
}
// Assemble a function call, with all associated parameter bumping and stack movement.
int AsCallWrapper(struct ASTNode* Node) {
struct ASTNode* CompositeTree = Node->Left;
int Register, Args = 0;
@ -811,7 +747,6 @@ int AsCallWrapper(struct ASTNode* Node) {
return AsCall(Node->Symbol, Args);
}
// Copy a function argument from Register to argument Position
void AsCopyArgs(int Register, int Position) {
if(Position > 4) { // Args above 4 go on the stack
fprintf(OutputFile, "\tpushq\t%s\n", Registers[Register]);
@ -820,8 +755,6 @@ void AsCopyArgs(int Register, int Position) {
}
}
// Assemble an actual function call.
// NOTE: this should not be called. Use AsCallWrapper.
int AsCall(struct SymbolTableEntry* Entry, int Args) {
int OutRegister = RetrieveRegister();
@ -838,7 +771,6 @@ int AsCall(struct SymbolTableEntry* Entry, int Args) {
return OutRegister;
}
// Assemble a function return.
int AsReturn(struct SymbolTableEntry* Entry, int Register) {
printf("\t\tCreating return for function %s\n", Entry->Name);
@ -862,46 +794,39 @@ int AsReturn(struct SymbolTableEntry* Entry, int Register) {
}
AsJmp(Entry->EndLabel);
}
// Assemble a =?
int AsEqual(int Left, int Right) {
// Set the lowest bit if left = right
return AsCompare(OP_EQUAL, Left, Right);
}
// Assemble a !=
int AsIneq(int Left, int Right) {
// Set the lowest bit if left != right
return AsCompare(OP_INEQ, Left, Right);
}
// Assemble a <
int AsLess(int Left, int Right) {
// Set the lowest bit if left < right
return AsCompare(OP_LESS, Left, Right);
}
// Assemble a >
int AsGreat(int Left, int Right) {
// Set the lowest bit if left > right
return AsCompare(OP_GREAT, Left, Right);
}
// Assemble a <=
int AsLessE(int Left, int Right) {
// Set the lowest bit if left <= right
return AsCompare(OP_LESSE, Left, Right);
}
// Assemble a =>
int AsGreatE(int Left, int Right) {
// Set the lowest bit if left => right
return AsCompare(OP_GREATE, Left, Right);
}
// Assemble a print statement
void AssemblerPrint(int Register) {
printf("\t\tPrinting Register %s\n", Registers[Register]);
@ -912,40 +837,34 @@ void AssemblerPrint(int Register) {
DeallocateRegister(Register);
}
// Assemble a &
int AsBitwiseAND(int Left, int Right) {
fprintf(OutputFile, "\tandq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left);
return Right;
}
// Assemble a |
int AsBitwiseOR(int Left, int Right) {
fprintf(OutputFile, "\torq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left);
return Right;
}
// Assemble a ^
int AsBitwiseXOR(int Left, int Right) {
fprintf(OutputFile, "\txorq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left);
return Right;
}
// Assemble a ~
int AsNegate(int Register) {
fprintf(OutputFile, "\tnegq\t%s\n", Registers[Register]);
return Register;
}
// Assemble a !
int AsInvert(int Register) {
fprintf(OutputFile, "\tnotq\t%s\n", Registers[Register]);
return Register;
}
// Assemble a !
int AsBooleanNOT(int Register) {
fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]);
fprintf(OutputFile, "\tsete\t%s\n", ByteRegisters[Register]);
@ -953,7 +872,6 @@ int AsBooleanNOT(int Register) {
return Register;
}
// Assemble a <<
int AsShiftLeft(int Left, int Right) {
fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]);
fprintf(OutputFile, "\tshlq\t\%%cl, %s\n", Registers[Left]);
@ -961,7 +879,6 @@ int AsShiftLeft(int Left, int Right) {
return Left;
}
// Assemble a >>
int AsShiftRight(int Left, int Right) {
fprintf(OutputFile, "\tmovb\t%s, \%%cl\n", ByteRegisters[Right]);
fprintf(OutputFile, "\tshrq\t\%%cl, %s\n", Registers[Left]);
@ -969,8 +886,6 @@ int AsShiftRight(int Left, int Right) {
return Left;
}
// Assemble a conversion from arbitrary type to boolean.
// Facilitates if(ptr)
int AsBooleanConvert(int Register, int Operation, int Label) {
fprintf(OutputFile, "\ttest\t%s, %s\n", Registers[Register], Registers[Register]);
@ -988,7 +903,6 @@ int AsBooleanConvert(int Register, int Operation, int Label) {
return Register;
}
// Assemble the start of an assembly file
void AssemblerPreamble() {
DeallocateAllRegisters();
fputs(
@ -998,15 +912,6 @@ void AssemblerPreamble() {
OutputFile);
}
/*
* Assemble a function block for the Entry.
* Handles all stack logic for local variables,
* as well as copying parameters out of registers and
* into the spill space.
*
* @param Entry: The function to generate
*
*/
void AsFunctionPreamble(struct SymbolTableEntry* Entry) {
char* Name = Entry->Name;
struct SymbolTableEntry* Param, *Local;
@ -1053,8 +958,6 @@ void AsFunctionPreamble(struct SymbolTableEntry* Entry) {
}
// Assemble the epilogue of a function
void AsFunctionEpilogue(struct SymbolTableEntry* Entry) {
AsLabel(Entry->EndLabel);

View File

@ -7,29 +7,6 @@
#include <Data.h>
#include <errno.h>
/********************************************************************************
* The Delegate is what allows the compiler backend to be abstracted. *
* *
* It delegates the operation of compiling, assembling and linking *
* to the proper subsystems. *
* *
* As of right now (20/01/2021) it uses the GCC backend. *
* *
* Compile parses files to their AST and generates mingw PECOFF32+ assembly, *
* Assemble uses GCC-as to compile the assembly to an object file. *
* Link links the object files into an executable. *
* *
********************************************************************************/
/*
* Files inputted must have a suffix/extension (because we're on Windows right now)
* This is the way to change the suffix for when a file is converted to another.
*
* @param String: The full, current file name
* @param Suffix: The new, desired extension.
*
*/
char* Suffixate(char* String, char Suffix) {
char* Pos, *NewStr;
@ -49,22 +26,6 @@ char* Suffixate(char* String, char Suffix) {
return NewStr;
}
/*
* Starts most of the work to do with the Erythro compiler.
* It:
* Opens the input and output files,
* Parses the global symbols of the file, including function blocks.
* Generates the assembly representation of the source code
* Saves said assembly into the OutputFile
* Returns the name of the file containing the generated assembly.
* Note that the Input file must have a valid extension.
* For Erythro code, this is .er
* The generated assembly will have the extension .s
*
* @param InputFile: The filename of the Erythro Source code to compile
* @return the filename of the generated PECOFF32+ assembly
*/
char* Compile(char* InputFile) {
char* OutputName;
OutputName = Suffixate(InputFile, 's');
@ -91,7 +52,7 @@ char* Compile(char* InputFile) {
if(OptVerboseOutput)
printf("Compiling %s\r\n", InputFile);
Tokenise();
Tokenise(&CurrentToken);
AssemblerPreamble();
@ -101,20 +62,6 @@ char* Compile(char* InputFile) {
return OutputName;
}
/*
* Processes the output from the Compile function.
* Passes the generated .s file to (currently, as of
* 21/01/2021), the GNU GAS assembler, to create an
* object file.
*
* It does this by invoking the command on a shell.
* TODO: fork it?
*
* @param InputFile: The .s assembly file to be processed
* @output the name of the generated object file.
*
*/
char* Assemble(char* InputFile) {
char Command[TEXTLEN];
int Error;
@ -138,18 +85,6 @@ char* Assemble(char* InputFile) {
return OutputName;
}
/*
* Processes the outputted object files, turning them into an executable.
* It does this by invoking (currently, as of 21/01/2021) the GNU GCC
* compiler.
* It invokes GCC rather than LD so that it automatically links against
* libc and the CRT natives.
*
* @param Output: The desired name for the executable.
* @param Objects: A list of the Object files to be linked.
*
*/
void Link(char* Output, char* Objects[]) {
int Count, Size = TEXTLEN, Error;
char Command[TEXTLEN], *CommandPtr;
@ -177,16 +112,7 @@ void Link(char* Output, char* Objects[]) {
}
}
/*
* Prints information about the available flags and
* how to structure the command.
* @param ProgName: The name of the file that was
* attempted to run.
*/
void DisplayUsage(char* ProgName) {
fprintf(stderr, "Erythro Compiler v5 - Gemwire Institute\n");
fprintf(stderr, "***************************************\n");
fprintf(stderr, "Usage: %s -[vcST] {-o output} file [file ...]\n", ProgName);
fprintf(stderr, " -v: Verbose Output Level\n");
fprintf(stderr, " -c: Compile without Linking\n");

View File

@ -12,9 +12,6 @@ static int GenerateSrg() {
return srgId++;
}
/*
* Walk the Node tree, and dump the AST tree to stdout.
*/
void DumpTree(struct ASTNode* Node, int level) {
int Lfalse, Lstart, Lend;

View File

@ -11,29 +11,10 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * C H A R S T R E AM * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* The Lexer holds a "stream" of characters.
* You may read a character from the stream, and if it is not
* the desired character, it may be placed into an "overread" buffer.
* The overread buffer is checked before the source file is read any further.
* This provides an effective way to "un-read" a character.
*
* @param Char: The character to "un-read"
*
*/
static void ReturnCharToStream(int Char) {
Overread = Char;
}
/*
* NextChar allows you to ask the Lexer for the next useful character.
* As mentioned above, it checks the overread buffer first.
*
* @return the character as int
*
*/
static int NextChar(void) {
int Char;
@ -51,10 +32,6 @@ static int NextChar(void) {
return Char;
}
/*
* Searches for the next useful character, skipping whitespace.
* @return the character as int.
*/
static int FindChar() {
int Char;
@ -68,31 +45,14 @@ static int FindChar() {
return Char;
}
/*
* Allows the conversion between ASCII, hex and numerals.
* @param String: The set of all valid results
* @param Char: The ASCII character to convert
* @return the ASCII character in int form, if in the set of valid results. -1 if not.
*/
static int FindDigitFromPos(char* String, char Char) {
char* Result = strchr(String, Char);
return(Result ? Result - String : -1);
}
/*
* Facilitates the easy checking of expected tokens.
* NOTE: there is (soon to be) an optional variant of this function that
* reads a token but does not consume it ( via Tokenise )
*
* @param Type: The expected token, in terms of value of the TokenTypes enum.
* @param TokenExpected: A string to output when the token is not found.
*
*/
void VerifyToken(int Type, char* TokenExpected) {
if(CurrentToken.type == Type)
Tokenise();
Tokenise(&CurrentToken);
else {
printf("Expected %s on line %d\n", TokenExpected, Line);
exit(1);
@ -101,12 +61,6 @@ void VerifyToken(int Type, char* TokenExpected) {
static struct Token* RejectedToken = NULL;
/*
* Rejected Tokens and the Overread Stream are identical concepts.
* This was implemented first, but it is no longer used.
* TODO: Refactor this function out.
*/
void RejectToken(struct Token* Token) {
if(RejectedToken != NULL)
Die("Cannot reject two tokens in a row!");
@ -118,21 +72,6 @@ void RejectToken(struct Token* Token) {
* * * * L I T E R A L S A N D I D E N T I F I E R S * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Facilitates the parsing of integer literals from the file.
* Currently only supports the decimal numbers, despite the
* FindDigitFromPos function allowing conversion.
*
* The functon loops over the characters, multiplying by 10 and adding
* the new value on top, until a non-numeric character is found.
* At that point, it returns the non-numeric character to the Overread Stream
* and returns the calculated number.
*
* @param Char: The first number to scan.
* @return the full parsed number as an int.
*
*/
static int ReadInteger(int Char) {
int CurrentChar = 0;
int IntegerValue = 0;
@ -147,23 +86,7 @@ static int ReadInteger(int Char) {
return IntegerValue;
}
/*
* An Identifier can be any of:
* * A function name
* * A variable name
* * A struct name
* / A class name
* / An annotation name
*
* This function allows a full name to be read into a buffer, with a defined
* start character and a defined maximum text size limit.
*
* @param Char: The first char of the Identifier.
* @param Buffer: The location to store the Identifier. (usually CurrentIdentifer, a compiler global defined for this purpose)
* @param Limit: The maximum Identifer length.
* @return the length of the parsed identifier
*
*/
// Variable identifier, keyword, function.
static int ReadIdentifier(int Char, char* Buffer, int Limit) {
int ind = 0;
@ -185,17 +108,6 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit) {
return ind;
}
/*
* Char literals appear as 'x'
*
* They are bounded by two apostrophes.
* They can contain any 1-byte ASCII character, as well as some
* predefined, standard escape codes.
* This function attempts to get the character from the file, with escape codes intact.
*
* @return the character as an int
*
*/
static int ReadCharLiteral() {
int Char;
Char = NextChar();
@ -219,20 +131,7 @@ static int ReadCharLiteral() {
return Char;
}
/*
* String literals appear as "hello world"
*
* They are bounded by two quotation marks.
* They can contain an arbitrary length of text.
* They are backed by an array of chars (hence the char* type) and thus
* have a practically unlimited length.
*
* To read a String Literal, it is a simple matter of reading Char Literals until
* the String termination token is identified - the last quotation mark.
*
* @param Buffer: The buffer into which to write the string. (usually CurrentIdentifer, a compiler global defined for this purpose)
*
*/
static int ReadStringLiteral(char* Buffer) {
int Char;
@ -249,18 +148,9 @@ static int ReadStringLiteral(char* Buffer) {
}
/*
* Keywords are source-code tokens / strings that are reserved for the compiler.
* They cannot be used as identifers on their own.
*
* This function is where all of the keywords are added, and where most aliases are going to be stored.
*
* It uses a switch on the first character of the input string as an optimisation - rather than checking each
* keyword against the String individually, it only needs to compare a single number. This can be optimised into
* a hash table by the compiler for further optimisation, making this one of the fastest ways to switch
* on a full string.
*
* @param Str: The keyword input to try to parse
* @return the token expressed in terms of values of the TokenTypes enum
* This function is what defines the valid keywords for the language
* //TODO: move this to a static list?
* //TODO: More optimisations?
*
*/
static int ReadKeyword(char* Str) {
@ -313,6 +203,7 @@ static int ReadKeyword(char* Str) {
break;
case 'p':
// This is a huge optimisation once we have as many keywords as a fully featured language.
if(!strcmp(Str, "print"))
return KW_PRINT;
break;
@ -322,11 +213,6 @@ static int ReadKeyword(char* Str) {
return KW_RETURN;
break;
case 's':
if(!strcmp(Str, "struct"))
return KW_STRUCT;
break;
case 'v':
if(!strcmp(Str, "void"))
return TY_VOID;
@ -348,21 +234,8 @@ static int ReadKeyword(char* Str) {
* * * * T O K E N I S E R * * * *
* * * * * * * * * * * * * * * * * * * * */
/*
* Handles the majority of the work of reading tokens into the stream.
* It reads chars with FindChar, categorizing individual characters or small
* strings into their proper expression (as a value of the TokenTypes enum)
*
* It also defers the reading of numeric literals and char literals to the proper functions.
*
* If needed, it can also read Identifiers, for variable or function naming.
*
* This function may be the main bottleneck in the lexer.
*
*/
void Tokenise() {
int Tokenise(struct Token* Token) {
int Char, TokenType;
struct Token* Token = &CurrentToken;
if(RejectedToken != NULL) {
Token = RejectedToken;
@ -566,5 +439,7 @@ void Tokenise() {
DieChar("Unrecognized character", Char);
}
return 1;
}

View File

@ -73,54 +73,48 @@ char* TokenNames[] = {
"While keyword",
"For keyword",
"Return keyword",
"Struct keyword"
"Return keyword"
};
int main(int argc, char* argv[]) {
// Option initialisers
/* Line = 1;
Overread = '\n';
CurrentGlobal = 0;
struct ASTNode* Node;
CurrentLocal = SYMBOLS - 1; */
OptDumpTree = false;
OptKeepAssembly = false;
OptAssembleFiles = false;
OptLinkFiles = true;
OptVerboseOutput = false;
// Temporary .o storage and counter
char* ObjectFiles[100];
int ObjectCount = 0;
// Parse command line arguments.
int i;
for(i = 1/*skip 0*/; i < argc; i++) {
// If we're not a flag, we can skip.
// We only care about flags in rows.
// ie. erc >> -v -T -o << test.exe src/main.er
if(*argv[i] != '-')
for(i = 1; i < argc; i++) {
if(*argv[i] != '-') // not a flag
break;
// Once we identify a flag, we need to make sure it's not just a minus in-place.
for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) {
// Finally, identify what option is being invoked.
switch(argv[i][j]) {
case 'o': // output
case 'o':
OutputFileName = argv[++i];
break;
case 'T': // Debug
case 'T':
OptDumpTree = true;
break;
case 'c': // Compile only
case 'c':
OptAssembleFiles = true;
OptKeepAssembly = false;
OptLinkFiles = false;
break;
case 'S': // aSsemble only
case 'S':
OptAssembleFiles = false;
OptKeepAssembly = true;
OptLinkFiles = false;
break;
case 'v': // Verbose output
case 'v':
OptVerboseOutput = true;
break;
default:
@ -129,42 +123,29 @@ int main(int argc, char* argv[]) {
}
}
// If we didn't provide anything other than flags, we need to show how to use the program.
if(i >= argc)
DisplayUsage(argv[0]);
// For the rest of the files specified, we can iterate them right to left.
while(i < argc) {
// Compile the file by invoking the Delegate
CurrentASMFile = Compile(argv[i]);
if(OptLinkFiles || OptAssembleFiles) {
// If we need to assemble (or link, which requires assembly)
// then we invoke the Delegate again
CurrentObjectFile = Assemble(CurrentASMFile);
// We can only keep track of 99 objects, so we should crash at 98 to ensure we have enough room for the output file too.
if(ObjectCount == 98) {
fprintf(stderr, "Too many inputs");
return 1; // We use return because we're in main, rather than invoking Die.
return 1;
}
// Move the ObjectCount forward.
ObjectFiles[ObjectCount++] = CurrentObjectFile;
// Clear the new, forwarded index
ObjectFiles[ObjectCount] = NULL;
}
if(!OptKeepAssembly)
// unlink = delete
unlink(CurrentASMFile);
i++;
}
if(OptLinkFiles) {
// If needed, invoke the Delegate one last time.
Link(OutputFileName, ObjectFiles);
if(!OptAssembleFiles) {
// Even though we need to assemble to link, we can respect the user's options and delete the intermediary files.
for(i = 0; ObjectFiles[i] != NULL; i++)
unlink(ObjectFiles[i]);
}
@ -174,11 +155,6 @@ int main(int argc, char* argv[]) {
}
/*
* Akin to a Halt and Catch Fire method.
* Simply prints an error, cleans up handles, and closes.
*/
void Die(char* Error) {
fprintf(stderr, "%s on line %d\n", Error, Line);
fclose(OutputFile);
@ -186,9 +162,6 @@ void Die(char* Error) {
exit(1);
}
/*
* A variant of Die with an extra String attached.
*/
void DieMessage(char* Error, char* Reason) {
fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line);
fclose(OutputFile);
@ -196,9 +169,6 @@ void DieMessage(char* Error, char* Reason) {
exit(1);
}
/*
* A variant of Die with an extra integer attached.
*/
void DieDecimal(char* Error, int Number) {
fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line);
fclose(OutputFile);
@ -206,9 +176,6 @@ void DieDecimal(char* Error, int Number) {
exit(1);
}
/*
* A variant of Die with an extra character attached.
*/
void DieChar(char* Error, int Char) {
fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line);
fclose(OutputFile);

View File

@ -10,10 +10,12 @@
#include "Data.h"
/*
* The Precedence of an operator is directly related to Token Type.
* Precedence determines how soon the operator and its surrounding values
* will be calculated and aliased.
* This allows for things like the common Order of Operations.
* Precedence is directly related to Token Type.
*
* enum TokenTypes {
* LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT
* };
*
*/
static int Precedence[] = {
0, 10, // EOF, ASSIGN
@ -28,13 +30,6 @@ static int Precedence[] = {
110 // /
};
/*
* Handles gathering the precedence of an operator from its token,
* in terms of values of the TokenTypes enum.
*
* Error handling is also done here, so that EOF or non-operators are not executed.
*
*/
static int OperatorPrecedence(int Token) {
int Prec = Precedence[Token];
@ -45,13 +40,6 @@ static int OperatorPrecedence(int Token) {
return Prec;
}
/*
* If the value is a right-expression, or in other words is right associative,
* then it can be safely calculated beforehand and aliased to a value.
* In this case, we can try to alias (or constant fold) everything on the right side
* of an assignment.
*/
static int IsRightExpr(int Token) {
return (Token == LI_EQUAL);
}
@ -60,29 +48,6 @@ static int IsRightExpr(int Token) {
* * * N O D E C O N S T R U C T I O N * * *
* * * * * * * * * * * * * * * * * * * * * * * */
/*
* ASTNodes form the structure of the language that moves the bulk of
* data around within the compiler.
* They contain:
* * An Operation (usually 1:1 with an input token),
* * A Type (to identify the size of data it contains),
* * Two more Left and Right ASTNodes (to form a doubly-linked list)
* * An extra Middle ASTNode in case it is needed (typically in the middle case of a For loop)
* * A Symbol Table Entry
* * An Integer Value
* * A flag to determine whether this node (and its sub-nodes) contain a right associative or Rval
*
* This is the only function where they are constructed.
*
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Left: The Node that is attached to the left side branch of this root.
* @param Middle: The Node that is attached to the middle of this root, if applicable.
* @param Right: The Node that is attached to the right side branch of this root.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTNode(int Operation, int Type,
struct ASTNode* Left,
struct ASTNode* Middle,
@ -110,28 +75,10 @@ struct ASTNode* ConstructASTNode(int Operation, int Type,
}
/*
* AST Leaves are categorized by their lack of child nodes.
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) {
return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue);
}
/*
* AST Branches are categorized by having only one child node.
* These are sometimes called Unary Branches.
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Left: The Node that is attached to the left side branch of this root.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) {
return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue);
}
@ -142,10 +89,10 @@ struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left
* * * * * * * * * * * * * * * * * * * * * * * */
/*
* TokenTypes and SyntaxOps are mostly 1:1, so some minor effort can ensure that
* these are synchronized well.
* This allows the parsing operation to be little more than a bounds check.
* Otherwise, this would be a gigantic switch statement.
* Take a Token Type, and convert it to an AST-Node Operation.
*
* TokenTypes and SyntaxOps are synchronized to make this easy.
*
*/
int ParseTokenToOperation(int Token) {
@ -156,13 +103,11 @@ int ParseTokenToOperation(int Token) {
}
/*
* Primary expressions may be any one of:
* * A terminal integer literal
* * A terminal string literal
* * A variable
* * A collection of expressions bounded by parentheses.
* Parse a primary (terminal) expression.
* This currently handles literal expressions, constructing a leaf node
* and handing control back up the chain.
*
*
* @return the AST Node that represents this expression
*/
struct ASTNode* ParsePrimary(void) {
@ -189,7 +134,7 @@ struct ASTNode* ParsePrimary(void) {
case LI_LPARE:
// Starting a ( expr ) block
Tokenise();
Tokenise(&CurrentToken);
Node = ParsePrecedenceASTNode(0);
@ -199,26 +144,12 @@ struct ASTNode* ParsePrimary(void) {
}
Tokenise();
Tokenise(&CurrentToken);
return Node;
}
/*
* Parse a single binary expression.
* It ensures that these expressions are parsed to their full extent, that
* the order of operations is upheld, that the precedence of the prior
* iteration is considered, and that every error is handled.
*
* This is where all of the right-associative statements are folded, where
* type mismatches and widening are handled properly, and that all parsing
* is over by the time the end tokens ") } ] ;" are encountered.
*
* @param PreviousTokenPrecedence: The precedence of the operator to the left.
* @return the AST Node corresponding to this block.
*
*/
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
struct ASTNode* LeftNode, *RightNode;
struct ASTNode* LeftTemp, *RightTemp;
@ -228,19 +159,25 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
LeftNode = PrefixStatement();
NodeType = CurrentToken.type;
//printf("%d\r\n", CurrentToken.type);
if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) {
//printf("Current token matches ; ) ]\r\n");
LeftNode->RVal = 1; return LeftNode;
}
//printf("Current token has value %d, type %s\n", CurrentToken.value, TokenNames[CurrentToken.type]);
while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) {
Tokenise();
//printf("inside while\n");
Tokenise(&CurrentToken);
if(CurrentToken.type == LI_RPARE)
break;
RightNode = ParsePrecedenceASTNode(Precedence[NodeType]);
/*
LeftType = LeftNode->ExprType;
RightType = RightNode->ExprType;
*/
/**
* While parsing this node, we may need to widen some types.
* This requires a few functions and checks.
@ -257,6 +194,9 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
if(LeftNode == NULL)
Die("Incompatible Expression encountered in assignment");
//printf("\tAssigning variable: %s value %d\n", Symbols[FindSymbol(CurrentIdentifier)].Name, RightNode->Value.IntValue);
// LeftNode holds the target, the target variable in this case
printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name);
@ -272,9 +212,11 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
LeftNode->RVal = 1;
RightNode->RVal = 1;
//printf("mutate left\r\n");
LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType);
//printf("mutate right\r\n");
RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType);
//printf("mutate right over\r\n");
/**
* If both are null, the types are incompatible.
*/
@ -325,21 +267,105 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
}
/* struct ASTNode* ParseMultiplicativeASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParsePrimary();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) {
Tokenise(&CurrentToken);
RightNode = ParsePrimary();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* struct ASTNode* ParseAdditiveASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParseMultiplicativeASTNode();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while(1) {
Tokenise(&CurrentToken);
RightNode = ParseMultiplicativeASTNode();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* * * * * * * * * * * * * * * * * * * * * * * *
* * * * I N T E R P R E T A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * */
/*
int ParseAST(struct ASTNode* Node) {
int LeftVal, RightVal;
if(Node->Left)
LeftVal = ParseAST(Node->Left);
if(Node->Right)
RightVal = ParseAST(Node->Right);
/*
if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
switch(Node->Operation) {
case OP_ADD:
return (LeftVal + RightVal);
case OP_SUBTRACT:
return (LeftVal - RightVal);
case OP_MULTIPLY:
return (LeftVal * RightVal);
case OP_DIVIDE:
return (LeftVal / RightVal);
case REF_IDENT:
case TERM_INTLITERAL:
return Node->Value.IntValue;
default:
fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation);
exit(1);
}
}
*/
/* * * * * * * * * * * * * * * * * * * * *
* * * * F U N C T I O N S * * * *
* * * * * * * * * * * * * * * * * * * * */
/*
* Handles the logic for calling a function.
* This is invoked by an identifier being recognized, followed by a "(.*)" string.
*
* It simply checks that the function exists, that the parameters given are valid,
* and generates the AST Node for calling it.
*
* @return the AST Node for calling the function stored in CurrentIdentifer
*
*/
struct ASTNode* CallFunction() {
struct ASTNode* Tree;
struct SymbolTableEntry* Function;
@ -359,21 +385,6 @@ struct ASTNode* CallFunction() {
return Tree;
}
/*
* An expression list is used:
* * In the call to a function
*
* It is parsed by seeking left parentheses "(", parsing binary expressions
* until either a comma or a right parentheses is found.
*
* The former will cause another expression to be parsed, the latter will cause
* parsing to stop.
*
* @return the AST Node representing every expression in the list, glued end to
* end with a COMPOSITE operation.
*
*/
struct ASTNode* GetExpressionList() {
struct ASTNode* Tree = NULL, *Child = NULL;
int Count;
@ -386,7 +397,7 @@ struct ASTNode* GetExpressionList() {
switch(CurrentToken.type) {
case LI_COM:
Tokenise();
Tokenise(&CurrentToken);
break;
case LI_RPARE:
break;
@ -403,18 +414,6 @@ struct ASTNode* GetExpressionList() {
* * * * S T A T E M E N T S * * * *
* * * * * * * * * * * * * * * * * * * * * */
/*
* Handles parsing an individual statement.
*
* It serves as a wrapper around:
* * If Statement
* * While Statement
* * For Statement
* * Return Statement
* * Numeric literals and variables
* * Binary Expressions
* @return the AST Node representing this single statement
*/
struct ASTNode* ParseStatement(void) {
int Type;
@ -426,10 +425,18 @@ struct ASTNode* ParseStatement(void) {
printf("\t\tNew Variable: %s\n", CurrentIdentifier);
Type = ParseOptionalPointer();
VerifyToken(TY_IDENTIFIER, "ident");
BeginVariableDeclaration(Type, NULL, SC_LOCAL);
BeginVariableDeclaration(Type, SC_LOCAL);
VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment?
return NULL;
/*case TY_IDENTIFIER:
if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC)
printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
else
printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
return ParseIdentifier();
*/
case KW_IF:
return IfStatement();
@ -444,26 +451,11 @@ struct ASTNode* ParseStatement(void) {
default:
ParsePrecedenceASTNode(0);
//DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type);
}
}
/*
* Handles parsing multiple statements or expressions in a row.
* These are typically grouped together with the Compound tokens "{ }"
* and seperated by the semicolon ";".
*
* Single Statements are parsed until a semicolon is reached, at which
* point another statement will be parsed, or until a Right Compound
* token is reached ("}"), at which point parsing will stop.
*
* It is useful for:
* * Tightly identifying related blocks of code
* * Containing the many statements of functions
*
* @return the AST Node representing this compound statement
*
*/
struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, *Tree;
@ -494,21 +486,6 @@ struct ASTNode* ParseCompound() {
}
}
/*
* This is the entry point to the parser/lexer.
*
* By definition, Global definitions are accessible anywhere.
* As of right now (20/01/2021), classe are unimplemented.
* This means that all functions and all function prototypes are globally scoped.
*
* You may also define variables, constants, preprocessor directives and other text
* in the global scope.
*
* The function itself loops, parsing either variables or functions, until it
* reaches the end of the file.
*
*/
void ParseGlobals() {
struct ASTNode* Tree;
int Type, FunctionComing;
@ -539,7 +516,7 @@ void ParseGlobals() {
}
} else {
printf("\tParsing global variable declaration\n");
BeginVariableDeclaration(Type, NULL, SC_GLOBAL);
BeginVariableDeclaration(Type, SC_GLOBAL);
VerifyToken(LI_SEMIC, ";");
}

View File

@ -7,34 +7,6 @@
#include <Defs.h>
#include <Data.h>
/****************************************************************
* Types are enumerated by the DataTypes enum. *
* They are represented by unsigned integers, where the *
* most significant 28 bits differentiate the raw type *
* of the data being encoded. *
* However, the least significant nibble - that is, *
* the lowest 4 bits, represent the count of indirection. *
* *
* This means that a raw Integer data type, such as an i32, *
* has the DataType representation 32. *
* However, a pointer to an Integer has DataType value 32+1, *
* or 33. *
* *
* This means that the maximum valid pointer level is 16. *
* That's a: *
* ****************int *
* That ought to be enough for everyone, right? *
* *
****************************************************************/
/*
* Adds 1 to the input Type, to add a level of indirection.
* If the indirection is already at 16 levels, it aborts.
*
* @param Type: The DataType to pointerise
* @return the new pointerised DataType value.
*/
int PointerTo(int Type) {
if((Type & 0xf) == 0xf)
DieDecimal("Unrecognized type in pointerisation", Type);
@ -42,59 +14,30 @@ int PointerTo(int Type) {
return (Type + 1);
}
/*
* Returns the underlying type behind a pointer.
* If the type is not a pointer (the lowest 4 bits are 0), it halts compliation.
*
* @param Type: The type to un-dereference
* @return the underlying Type
*/
int ValueAt(int Type) {
printf("\t\tDereferencing a %s\n", TypeNames(Type));
//TODO: this is still bullshittery!
if((Type & 0xf) == 0x0)
DieDecimal("Unrecognized type in defererencing", Type);
return (Type - 1);
}
/*
* Type declarations may be raw, they may be pointers.
* If they are pointers, we need to be able to check
* how many levels of indirection.
* However, being a pointer is optional.
*
* This can parase in just a lone type specifier, or
* any valid level of indirection therefore.
*
* @param Composite: unused
* @return the parsed DataType, with any indirection.
*
*/
int ParseOptionalPointer(struct SymbolTableEntry** Composite) {
int ParseOptionalPointer() {
int Type;
switch(CurrentToken.type) {
case TY_VOID:
Type = RET_VOID;
Tokenise();
break;
case TY_CHAR:
Type = RET_CHAR;
Tokenise();
break;
case TY_INT:
Type = RET_INT;
Tokenise();
break;
case TY_LONG:
Type = RET_LONG;
Tokenise();
break;
case KW_STRUCT:
Type = DAT_STRUCT;
*Composite = BeginStructDeclaration();
break;
default:
DieDecimal("Illegal type for pointerisation", CurrentToken.type);
@ -104,30 +47,17 @@ int ParseOptionalPointer(struct SymbolTableEntry** Composite) {
// x = **y;
// possible.
while(1) {
Tokenise();
Tokenise(&CurrentToken);
printf("\t\t\tType on parsing is %d\n", CurrentToken.type);
if(CurrentToken.type != AR_STAR)
break;
Type = PointerTo(Type);
// Tokenise(); TODO: is this skipping pointers?
}
return Type;
}
/*
* Array Accesses come in the form of x[y].
*
* x must be a pointer type, and an array structure.
* y can be any binary expression.
*
* It is a wrapper around *((imax*)x + y).
*
* @return the AST Node that represents this statement.
*/
struct ASTNode* AccessArray() {
struct ASTNode* LeftNode, *RightNode;
struct SymbolTableEntry* Entry;
@ -137,7 +67,9 @@ struct ASTNode* AccessArray() {
DieMessage("Accessing undeclared array", CurrentIdentifier);
LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0);
Tokenise();
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
Tokenise(&CurrentToken);
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
RightNode = ParsePrecedenceASTNode(0);

View File

@ -8,27 +8,9 @@
#include <Data.h>
#include <stdbool.h>
/*
* Handles reading in a comma-separated list of declarations.
* Erythro treats structs, enums and function parameters the same in this regard -
* comma separated.
*
* C and C++ tend to treat enums and structs differently - the former separated by commas,
* the latter separated by semicolons.
*
* Note that since functions are read in through parentheses, and structs/enums are read in
* through brackets, the end character is configurable.
*
* @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable.
* @param Storage: The Storage Scope of this declaration list.
* @param End: The end token, in terms of TokenTypes enum values.
* @return the amount of declarations read in.
*
*/
static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) {
static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
int TokenType, ParamCount = 0;
struct SymbolTableEntry* PrototypePointer = NULL, *Composite;
struct SymbolTableEntry* PrototypePointer = NULL;
if(FunctionSymbol != NULL)
PrototypePointer = FunctionSymbol->Start;
@ -42,15 +24,19 @@ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Stor
DieDecimal("Function paramater of invalid type at index", ParamCount + 1);
PrototypePointer=PrototypePointer->NextSymbol;
} else {
BeginVariableDeclaration(TokenType, Composite, Storage);
BeginVariableDeclaration(TokenType, SC_PARAM);
}
ParamCount++;
if((CurrentToken.type != LI_COM) && (CurrentToken.type != End))
switch(CurrentToken.type) {
case LI_COM:
Tokenise(&CurrentToken);
break;
case LI_RPARE:
break;
default:
DieDecimal("Unexpected token in parameter", CurrentToken.type);
if(CurrentToken.type == LI_COM)
Tokenise();
}
}
if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length))
@ -59,61 +45,6 @@ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Stor
return ParamCount;
}
/*
* Handles the declaration of a new struct.
* struct thisStct { int x, int y, int z };
*
* Verifies that the current identifier is not used,
* verifies that this is not a redefinition (excluding
* the case where there is a declaration but no definition)
* and then saves it into the Structs symbol table.
*
* @return the Symbol Table entry of this new struct.
*/
struct SymbolTableEntry* BeginStructDeclaration() {
struct SymbolTableEntry* Composite = NULL, *Member;
int Offset;
Tokenise();
if(CurrentToken.type == TY_IDENTIFIER) {
Composite = FindStruct(CurrentIdentifier);
Tokenise();
}
if(CurrentToken.type != LI_LBRAC) {
if(Composite == NULL)
DieMessage("Unknown Struct", CurrentIdentifier);
return Composite;
}
if(Composite)
DieMessage("Redefinition of struct", CurrentIdentifier);
Composite = AddSymbol(CurrentIdentifier, DAT_STRUCT, 0, SC_STRUCT, 0, 0, NULL);
Tokenise();
ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAS);
VerifyToken(LI_RBRAS, "]");
Composite->Start = StructMembers;
StructMembers = StructMembersEnd = NULL;
Member = Composite->Start;
Member->SinkOffset = 0;
Offset = TypeSize(Member->Type, Member->CompositeType);
for(Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) {
Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1);
Offset += TypeSize(Member->Type, Member->CompositeType);
}
Composite->Length = Offset;
return Composite;
}
/*
* Handles the declaration of a type of a variable.
* int newVar;
@ -121,12 +52,11 @@ struct SymbolTableEntry* BeginStructDeclaration() {
* It verifies that we have a type keyword followed by a
* unique, non-keyword identifier.
*
* It then stores this variable into the appropriate symbol table,
* It then stores this variable into the symbol table,
* and returns the new item.
*
* @return the Symbol Table entry of this new variable.
*/
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) {
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) {
struct SymbolTableEntry* Symbol = NULL;
switch(Scope) {
@ -136,50 +66,33 @@ struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEn
case SC_LOCAL:
case SC_PARAM:
if(FindLocal(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of local variable", CurrentIdentifier);
case SC_MEMBER:
if(FindMember(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier);
DieMessage("Invalid redelcaration of local variable", CurrentIdentifier);
}
if(CurrentToken.type == LI_LBRAS) {
Tokenise();
Tokenise(&CurrentToken);
if(CurrentToken.type == LI_INT) {
switch(Scope) {
case SC_GLOBAL:
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL);
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0);
break;
case SC_LOCAL:
case SC_PARAM:
case SC_MEMBER:
Die("Local arrays are unimplemented");
}
}
Tokenise();
Tokenise(&CurrentToken);
VerifyToken(LI_RBRAS, "]");
} else {
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite);
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0);
}
return Symbol;
}
/*
* Handles the declaration of a new function.
* Verifies that the identifier is not taken (excluding the case
* where there is a declaration but no definition)
* Parses the list of parameters if present
* Saves the function prototype if there is no body
* Generates and saves the break-out point label
*
* @param Type: The return type of the function
* @return the AST for this function
*
*/
struct ASTNode* ParseFunction(int Type) {
struct ASTNode* Tree;
struct ASTNode* FinalStatement;
@ -191,7 +104,7 @@ struct ASTNode* ParseFunction(int Type) {
OldFunction = NULL;
if(OldFunction == NULL) {
BreakLabel = NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0);
}
VerifyToken(LI_LPARE, "(");
@ -207,7 +120,7 @@ struct ASTNode* ParseFunction(int Type) {
Params = ParamsEnd = NULL;
if(CurrentToken.type == LI_SEMIC) {
Tokenise();
Tokenise(&CurrentToken);
return NULL;
}
@ -236,6 +149,7 @@ struct ASTNode* ParseFunction(int Type) {
* //TODO: No brackets
* //TODO: Type inference
*
*
*/
struct ASTNode* ReturnStatement() {
@ -252,10 +166,19 @@ struct ASTNode* ReturnStatement() {
Tree = ParsePrecedenceASTNode(0);
/*
ReturnType = Tree->ExprType;
FunctionType = Symbols[CurrentFunction].Type;
*/
Tree = MutateType(Tree, FunctionEntry->Type, 0);
if(Tree == NULL)
Die("Returning a value of incorrect type for function");
/*
if(ReturnType)
Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0);
*/
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0);
@ -266,33 +189,59 @@ struct ASTNode* ReturnStatement() {
return Tree;
}
/*
* Handles the surrounding logic for If statements.
* Handles Identifiers.
*
* If statements have the basic form:
* * if (condition) body
* * if (condition)
* body
* * if (condition) {
* body
* }
* This is called for any of:
* - Calling a function
* - Assigning an lvalue variable
* - Performing arithmetic on a variable
* - Performing arithmetic with the return values of function calls
*
* Conditions may be any truthy statement (such as a pointer,
* object, integer), as conditions not recognized are auto-
* matically converted to booleans.
*
* This meaning, any object that can be resolved to 0 or NULL
* can be placed as the condition and used as a check.
*
* For example:
* struct ASTNode* Node = NULL;
* if(Node) {
* // This will not run, as Node is ((void*)0)
* }
* For the case where you're assigning an l-value;
* You can assign with another assignment,
* a statement, a function or a literal.
*
*/
/*
struct ASTNode* ParseIdentifier() {
struct ASTNode* Left, *Right, *Tree;
int LeftType, RightType;
int ID;
VerifyToken(TY_IDENTIFIER, "ident");
printf("\t\tAfter parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier));
if(CurrentToken.type == LI_LPARE)
return CallFunction();
if((ID = FindSymbol(CurrentIdentifier)) == -1) {
printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name);
DieMessage("Undeclared Variable ", CurrentIdentifier);
}
Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID);
VerifyToken(LI_EQUAL, "=");
Left = ParsePrecedenceASTNode(0);
LeftType = Left->ExprType;
RightType = Right->ExprType;
Left = MutateType(Left, RightType, 0);
if(!Left)
Die("Incompatible types in assignment");
if(LeftType)
Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0);
Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0);
return Tree;
}*/
struct ASTNode* IfStatement() {
struct ASTNode* Condition, *True, *False = NULL;
@ -312,39 +261,13 @@ struct ASTNode* IfStatement() {
True = ParseCompound();
if(CurrentToken.type == KW_ELSE) {
Tokenise();
Tokenise(&CurrentToken);
False = ParseCompound();
}
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0);
}
/*
* Handles the surrounding logic for While loops.
*
* While loops have the basic form:
* while ( condition ) { body }
*
* When reaching the condition (which alike an If statement,
* can be any truthy value), if it resolves to true:
* The body is executed, and immediately the condition is checked
* again.
* This repeats until the condition resolves false, at which point
* the loop executes no more.
*
* This can be prototyped as the following pseudo-assembler:
*
* cond:
* check <condition>
* jne exit
* <body>
* jump cond
* exit:
* <more code>
*
* @return the AST of this statement
*
*/
struct ASTNode* WhileStatement() {
struct ASTNode* Condition, *Body;
@ -364,36 +287,12 @@ struct ASTNode* WhileStatement() {
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0);
}
/*
* Handles the surrounding logic for For loops.
*
* They have the basic form of:
* for ( init ; condition; iterator) { body }
*
* The initialiser is run only once upon reaching the for loop.
* Then the condition is checked, and if true, the body is executed.
* After execution of the body, the iterator is run and the condition
* checked again.
*
* It can be prototyped as the following pseudo-assembler code:
*
* for:
* <init>
* cond:
* check <condition>
* jne exit
* <body>
* <iterator>
* jump cond
* exit:
* <loop exit>
*
* In the case of the implementation, "init" is the preoperator,
* "iterator" is the postoperator.
*
* @return the AST of this statement
*/
struct ASTNode* ForStatement() {
// for (preop; condition; postop) {
// body
//}
struct ASTNode* Condition, *Body;
struct ASTNode* Preop, *Postop;
@ -427,18 +326,6 @@ struct ASTNode* ForStatement() {
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0);
}
/*
* Handles the surrounding logic for the Print statement.
*
* This is a legacy hold-over from the early testing, and it
* serves merely as a wrapper around the cstdlib printf function.
*
* It does, however (//TODO), attempt to guess the type that you
* want to print, which takes a lot of the guesswork out of printing.
*
* @return the AST of this statement
*/
struct ASTNode* PrintStatement(void) {
struct ASTNode* Tree;
int LeftType, RightType;
@ -455,7 +342,7 @@ struct ASTNode* PrintStatement(void) {
DieDecimal("Attempting to print an invalid type:", RightType);
if(RightType)
Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0);
Tree = ConstructASTBranch(RightType, RET_INT, Tree, NULL, 0);
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0);
@ -465,33 +352,16 @@ struct ASTNode* PrintStatement(void) {
}
/*
* Handles the surrounding logic for all of the logical and semantic
* postfixes.
*
* Postfixes are tokens that are affixed to the end of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of postfixes:
* * (): Call a function
* * []: Index or define an array.
* * ++: Increment a variable AFTER it is returned
* NOTE: there is a prefix variant of this for incrementing BEFOREhand.
* * --: Decrement a variable AFTER it is returned
* NOTE: there is a prefix variant of this for decrementing BEFOREhand.
*
* Planned postfixes:
* * >>: Arithmetic-Shift-Right a variable by one (Divide by two)
* NOTE: there is a prefix variant of this for shifting left - multiplying by two.
*
* @return the AST of the statement plus its' postfix
*/
struct ASTNode* PostfixStatement() {
struct ASTNode* Tree;
struct SymbolTableEntry* Entry;
Tokenise();
Tokenise(&CurrentToken);
// If we get here, we're one of three things:
// - Function
// - Array
// - Variable
if(CurrentToken.type == LI_LPARE)
return CallFunction();
@ -500,8 +370,8 @@ struct ASTNode* PostfixStatement() {
return AccessArray();
// If we get here, we must be a variable.
// (as functions have been called and arrays have been indexed)
// Check that the variable is recognized..
// There's no guarantees that the variable is in
// the symbol table, though.
if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR)
DieMessage("Unknown Variable", CurrentIdentifier);
@ -510,11 +380,11 @@ struct ASTNode* PostfixStatement() {
switch(CurrentToken.type) {
case PPMM_PLUS:
Tokenise();
Tokenise(&CurrentToken);
Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0);
break;
case PPMM_MINUS:
Tokenise();
Tokenise(&CurrentToken);
Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0);
break;
default:
@ -525,58 +395,33 @@ struct ASTNode* PostfixStatement() {
}
/*
* Handles the surrounding logic for all of the logical and semantic
* prefixes.
*
* Prefixes are tokens that are affixed to the start of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of prefixes:
* * !: Invert the boolean result of a statement or truthy value.
* * ~: Invert the individual bits in a number
* * -: Invert the number around the axis of 0 (negative->positive, positive->negative)
* * ++: Increment a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for incrementing AFTER the fact.
* * --: Decrement a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for decrementing AFTER the fact.
* * &: Dereference the following object (Get the address that contains it)
* * *: Get the object pointed at by the number following
*
* Planned prefixes:
* * <<: Arithmetic-Shift-Left a variable by one (Multiply by two)
* NOTE: there is a postfix variant of this for shifting right - dividing by two.
*
* @return the AST of this statement, plus its' prefixes and any postfixes.
*/
struct ASTNode* PrefixStatement() {
struct ASTNode* Tree;
switch (CurrentToken.type) {
case BOOL_INVERT:
Tokenise();
Tokenise(&CurrentToken);
Tree = PrefixStatement();
Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0);
break;
case BIT_NOT:
Tokenise();
Tokenise(&CurrentToken);
Tree = PrefixStatement();
Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0);
break;
case AR_MINUS:
Tokenise();
Tokenise(&CurrentToken);
Tree = PrefixStatement();
Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0);
break;
case PPMM_PLUS:
Tokenise();
Tokenise(&CurrentToken);
Tree = PrefixStatement();
if(Tree->Operation != REF_IDENT)
@ -585,7 +430,7 @@ struct ASTNode* PrefixStatement() {
break;
case PPMM_MINUS:
Tokenise();
Tokenise(&CurrentToken);
Tree = PrefixStatement();
if(Tree->Operation != REF_IDENT)
@ -595,7 +440,7 @@ struct ASTNode* PrefixStatement() {
break;
case BIT_AND:
Tokenise();
Tokenise(&CurrentToken);
// To allow things like:
// x = &&y;
@ -609,7 +454,7 @@ struct ASTNode* PrefixStatement() {
Tree->ExprType = PointerTo(Tree->ExprType);
break;
case AR_STAR:
Tokenise();
Tokenise(&CurrentToken);
Tree = PrefixStatement();

View File

@ -78,28 +78,6 @@ struct SymbolTableEntry* FindGlobal(char* Symbol) {
return SearchList(Symbol, Globals);
}
/*
* An override for FindSymbol.
* Searches only the defined Structs.
* @param Symbol: The string name of the symbol to search for.
* @return a pointer to the node if found, else NULL
*
*/
struct SymbolTableEntry* FindStruct(char* Symbol) {
return SearchList(Symbol, Structs);
}
/*
* An override for FindSymbol.
* Searches only the defined Struct & Enum Members.
* @param Symbol: The string name of the symbol to search for.
* @return a pointer to the node if found, else NULL
*
*/
struct SymbolTableEntry* FindMember(char* Symbol) {
return SearchList(Symbol, StructMembers);
}
/*
* Given a particular linked list,
* Take Node and append it to the Tail.
@ -134,7 +112,6 @@ void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail
void FreeLocals() {
Locals = LocalsEnd = NULL;
Params = ParamsEnd = NULL;
FunctionEntry = NULL;
}
@ -145,8 +122,6 @@ void ClearTables() {
Globals = GlobalsEnd = NULL;
Locals = LocalsEnd = NULL;
Params = ParamsEnd = NULL;
StructMembers = StructMembersEnd = NULL;
Structs = StructsEnd = NULL;
}
@ -161,7 +136,34 @@ void ClearTables() {
*
* @return The SymbolTableEntry* pointer that corresponds to this newly constructed node.
*/
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType) {
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset) {
/* int TableSlot;
int SinkOffset = 0;
if((TableSlot = FindSymbolImpl(Name, Storage)) != -1)
return -1;
// Instaed of spliting this up into AddLocalSymbol and AddGlobalSymbol,
// we can use this switch to avoid duplicated code.
switch(Storage) {
case SC_PARAM:
// Instead of special casing parameters, we can just add these to the symbol lists and be done with it.
printf("\tPreparing new parameter %s of type %s\r\n", Name, TypeNames[Type]);
TableSlot = AddSymbol(Name, Type, Structure, SC_GLOBAL, 88, 1);
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the global process
TableSlot = AddSymbol(Name, Type, Structure, SC_LOCAL, 88, 1);
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the local process
return TableSlot;
case SC_GLOBAL:
TableSlot = NewGlobalSymbol();
break;
case SC_LOCAL:
printf("\tCreating new local symbol %s\r\n", Name);
TableSlot = NewLocalSymbol();
SinkOffset = AsCalcOffset(Type);
break;
} */
struct SymbolTableEntry* Node =
(struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry));
@ -172,28 +174,33 @@ struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Stor
Node->Storage = Storage;
Node->Length = Length;
Node->SinkOffset = SinkOffset;
Node->CompositeType = CompositeType;
switch(Storage) {
case SC_GLOBAL:
AppendSymbol(&Globals, &GlobalsEnd, Node);
// We don't want to generate a static block for functions.
if(Structure != ST_FUNC) AsGlobalSymbol(Node);
break;
case SC_STRUCT:
AppendSymbol(&Structs, &StructsEnd, Node);
break;
case SC_MEMBER:
AppendSymbol(&StructMembers, &StructMembersEnd, Node);
case SC_LOCAL:
AppendSymbol(&Locals, &LocalsEnd, Node);
break;
case SC_PARAM:
AppendSymbol(&Params, &ParamsEnd, Node);
break;
}
/* // NOTE: Generating global symbol names must happen AFTER the name and type are declared.
switch(Storage) {
case SC_GLOBAL:
printf("\tCreating new global symbol %s into slot %d\r\n", Name, TableSlot);
if(Structure != ST_FUNC && EndLabel != 88) { // Magic keyword so that we don't generate ASM globals for parameters
printf("\t\tGenerating data symbol.\r\n");
AsGlobalSymbol(TableSlot);
}
break;
case SC_LOCAL:
break;
} */
//printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot);
return Node;
}