From 59329d963f1ebc14dec709316af1da9773b43a18 Mon Sep 17 00:00:00 2001 From: Curle Date: Thu, 10 Sep 2020 01:56:16 +0100 Subject: [PATCH] First files. Currently has two bugs. First, all functions are resolved to index 0 (currently PrintInteger) Second, the register used for returning is immediately overwritten by the next allocated register. This means addition of function return values is a little silly. Also, commit signing! --- include/Data.h | 30 +++ include/Defs.h | 320 ++++++++++++++++++++++++++++ src/Assembler.c | 519 ++++++++++++++++++++++++++++++++++++++++++++++ src/Lexer.c | 322 ++++++++++++++++++++++++++++ src/Main.c | 130 ++++++++++++ src/Parser.c | 433 ++++++++++++++++++++++++++++++++++++++ src/Statements.c | 386 ++++++++++++++++++++++++++++++++++ src/Symbols.c | 70 +++++++ tests/comparison1 | 23 ++ tests/for | 13 ++ tests/funcs | 8 + tests/funcs2 | 15 ++ tests/if | 13 ++ tests/parser1 | 6 + tests/tokeniser1 | 1 + tests/tokeniser2 | 8 + tests/tokeniser3 | 1 + tests/types | 17 ++ tests/vars1 | 9 + tests/while | 8 + 20 files changed, 2332 insertions(+) create mode 100644 include/Data.h create mode 100644 include/Defs.h create mode 100644 src/Assembler.c create mode 100644 src/Lexer.c create mode 100644 src/Main.c create mode 100644 src/Parser.c create mode 100644 src/Statements.c create mode 100644 src/Symbols.c create mode 100644 tests/comparison1 create mode 100644 tests/for create mode 100644 tests/funcs create mode 100644 tests/funcs2 create mode 100644 tests/if create mode 100644 tests/parser1 create mode 100644 tests/tokeniser1 create mode 100644 tests/tokeniser2 create mode 100644 tests/tokeniser3 create mode 100644 tests/types create mode 100644 tests/vars1 create mode 100644 tests/while diff --git a/include/Data.h b/include/Data.h new file mode 100644 index 0000000..f2d86df --- /dev/null +++ b/include/Data.h @@ -0,0 +1,30 @@ +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#pragma once +#include +#include + +#ifndef extern_ +#define extern_ extern +#endif + +#define TEXTLEN 512 +#define SYMBOLS 1024 + +extern_ struct SymbolTable Symbols[SYMBOLS]; + +extern_ char* TokenStrings[]; +extern_ char* TokenNames[]; + +extern_ int CurrentFunction; +extern_ int Line; +extern_ int Overread; + +extern_ FILE* SourceFile; +extern_ FILE* OutputFile; + +extern_ struct Token CurrentToken; +extern_ char CurrentIdentifier[TEXTLEN + 1]; \ No newline at end of file diff --git a/include/Defs.h b/include/Defs.h new file mode 100644 index 0000000..7f59993 --- /dev/null +++ b/include/Defs.h @@ -0,0 +1,320 @@ +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#pragma once +#include +#include +#include +#include + +/* + * ARithmetic tokens are prefixed AR. + * LIteral tokens are prefixed LI. + * KeyWords are prefixed KW. + * TYpes are prefixed TY. + * CoMParisons are prefixed CMP. + * + * NOTE: Tokens are different from Syntax Operations! + * + * Tokens should represent the characters that invoke them, + * not the actions they perform. + * + */ + + +enum TokenTypes { + LI_EOF, + + AR_PLUS, // Arithmetic + + AR_MINUS, // Arithmetic - + AR_STAR, // Arithmetic * + AR_SLASH, // Arithmetic / + + CMP_EQUAL, // =? + CMP_INEQ, // != + CMP_LT, // < + CMP_GT, // > + CMP_LTE, // <= + CMP_GTE, // => + + LI_EQUAL, // = + LI_INT, // Integer literal + LI_SEMIC, // ; + + LI_LBRAC, // { + LI_RBRAC, // } + + LI_LPARE, // ( + LI_RPARE, // ) + + TY_IDENTIFIER, // Identifier name. Variable, function, etc. + TY_NONE, // No return type. Literal void. + TY_CHAR, // "char" type keyword + TY_INT, // "int" type keyword + TY_LONG, // "long" type keyword + TY_VOID, // "void" type keyword + + KW_FUNC, // :: function name incoming + + KW_PRINT, + KW_IF, + KW_ELSE, + KW_WHILE, + KW_FOR, + KW_RETURN +}; + +/* + * All Syntax Operations are prefixed OP. + * Terminal Operations are prefixed TERM. + * L-Values are prefixed LV. + * Reference Operations are prefixed REF. + * + * These represent the actions that a token will perform. + * These are used exclusively in AST construction. + * + * It is important that Tokens and Operations are logically separated, + * but that the Operation's index is the same as the Token that invokes it. + */ + +enum SyntaxOps { + OP_ADD = 1, // Add two numbers. + OP_SUBTRACT, // Subtract two numbers. + OP_MULTIPLY, // Multiply two numbers. + OP_DIVIDE, // Divide two numbers. + + OP_EQUAL, // Compare equality + OP_INEQ, // Compare inequality + OP_LESS, // Less than? + OP_GREAT, // Greater than? + OP_LESSE, // Less than or Equal to? + OP_GREATE, // Greater than or Equal to? + + OP_ASSIGN, // Assign an l-value + + TERM_INTLITERAL, // Integer Literal. This is a virtual operation, so it's a terminal. + + REF_IDENT, // Reference (read) an identifier (variable). + LV_IDENT, // Write an identifier in the form of an l-value. + + OP_WIDEN, // Something contains a type that needs to be casted up + + OP_CALL, // Call a function + OP_RET, // Return from a function + + OP_COMP, // Compound statements need a way to be "glued" together. This is one of those mechanisms + OP_IF, // If statement + OP_LOOP, // FOR, WHILE + OP_PRINT, // Print statement + + OP_FUNC, // Define a function +}; + + +// A node in a Binary Tree that forms the syntax of Erythro +struct ASTNode { + int Operation; + int ExprType; // Value->IntValue's DataType + struct ASTNode* Left; + struct ASTNode* Middle; + struct ASTNode* Right; + union { + int IntValue; // TERM_INTLIT's Value + int ID; // LV_IDENT's Symbols[] index. + } Value; +}; + +struct Token { + int type; + int value; +}; + +/* + * The Symbol Table, used for variables, functions and + * assorted goodies. + */ + +struct SymbolTable { + char* Name; + int Type; // An entry in DataTypes, referring to the type of this data + int Structure; // An entry in StructureType - metadata on how to process the data + int EndLabel; // The number of the label to jump to, in order to exit this function (if applicable) +}; + + +/* + * The primitive data types for the language + * //TODO: Move back into TokenTypes + */ +enum DataTypes { + RET_NONE, // No return type. Literal void. + RET_CHAR, // "char" type keyword + RET_INT, // "int" type keyword + RET_LONG, // "long" type keyword + RET_VOID, // "void" type keyword + +}; + +/* + * The type of the structure of data being examined + * //TODO: move into TokenTypes? + */ + +enum StructureType { + ST_VAR, // This is variable + ST_FUNC // This is a function + // This is an enum + // This is a struct + // This is a typedef +}; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * L E X I N G * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +int Tokenise(struct Token* Token); + +int TypesCompatible(int* Left, int* Right, int STRICT); + +void VerifyToken(int Type, char* TokenExpected); +void RejectToken(struct Token* Token); + +static int ReadIdentifier(int Char, char* Buffer, int Limit); +static int ReadKeyword(char* Str); + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * S Y N T A X T R E E * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +struct ASTNode* ConstructASTNode(int Operation, int Type, + struct ASTNode* Left, + struct ASTNode* Middle, + struct ASTNode* Right, + int IntValue); + +struct ASTNode* ConstructASTLeaf(int Operation, int Type, int IntValue); + +struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, int IntValue); + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * * * * P A R S I N G * * * * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +//struct ASTNode* ParseNewASTNode(void); +//struct ASTNode* ParseAdditiveASTNode(void); +struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence); + +int ParseAST(struct ASTNode* Node); + +//void ParseStatements(void); +struct ASTNode* ParseStatement(void); + +struct ASTNode* ParseFunction(); +struct ASTNode* ParseCompound(); + + +struct ASTNode* CallFunction(); +struct ASTNode* ReturnStatement(); + +int ParseType(int Token); + + +int ParseTokenToOperation(int Token); + +struct ASTNode* PrintStatement(void); + + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * S Y M B O L T A B L E * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +int FindSymbol(char* Symbol); + +int AddSymbol(char* Name, int Type, int Structure); + +int AddFunctionSymbol(char* Name, int Type, int Structure, int EndLabel); + + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * C O N T R O L S T A T U S * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void Die(char* Error); + +void DieMessage(char* Error, char* Reason); + +void DieDecimal(char* Error, int Number); + +void DieChar(char* Error, int Char); + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * C O D E G E N E R A T I O N * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +int AssembleTree(struct ASTNode* Node, int Register, int ParentOp); + +void DeallocateAllRegisters(); + +int RetrieveRegister(); + +void DeallocateRegister(int Register); + +int PrimitiveSize(int Type); + +int AsLoad(int Value); +int AsAdd(int Left, int Right); +int AsMul(int Left, int Right); +int AsSub(int Left, int Right); +int AsDiv(int Left, int Right); + +int AsLdVar(int ID); +int AsStrVar(int Register, int ID); + +void AsNewSymb(int ID); + +int AsEqual(int Left, int Right); +int AsIneq(int Left, int Right); +int AsLess(int Left, int Right); +int AsGreat(int Left, int Right); +int AsLessE(int Left, int Right); +int AsGreatE(int Left, int Right); + +int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label); +int AsCompare(int Operation, int RegisterLeft, int RegisterRight); +int AsIf(struct ASTNode* Node); +int NewLabel(void); + +void AsJmp(int Label); +void AsLabel(int Label); + +int AsReturn(int Register, int FuncID); +int AsCall(int Register, int FuncID); + +int AsWhile(struct ASTNode* Node); + +void AssemblerPrint(int Register); + +void AssemblerPreamble(); +void AsFunctionPreamble(int ID); +void AsFunctionEpilogue(int ID); + + +/* * * * * * * * * * * * * * * * * * * * * * * + * * * * D E C L A R A T I O N * * * * + * * * * * * * * * * * * * * * * * * * * * * */ + +void BeginVariableDeclaration(void); +struct ASTNode* ParseIdentifier(void); + +struct ASTNode* IfStatement(); +struct ASTNode* WhileStatement(); +struct ASTNode* ForStatement(); \ No newline at end of file diff --git a/src/Assembler.c b/src/Assembler.c new file mode 100644 index 0000000..9adbcc8 --- /dev/null +++ b/src/Assembler.c @@ -0,0 +1,519 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include + + +/* + * If the entry in UsedRegisters + * that correlates to the position of a register in Registers + * is 1 + * then that register is classed as used - + * it has useful data inside it. + * + * if the entry is 0, then it is free. + */ + +static int UsedRegisters[4]; +static char* Registers[4] = { "%r8", "%r9", "%r10", "%r11" }; +static char* DoubleRegisters[4] = { "%r8d", "%r9d", "%r10d", "%r11d" }; +static char* ByteRegisters[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; + +static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" }; +static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"}; + +static char* Types[5] = { "none", "char", "int", "long", "void" }; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * R O O T O F A S S E M B L E R * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { + int LeftVal, RightVal; + + switch(Node->Operation) { + case OP_IF: + return AsIf(Node); + + case OP_LOOP: + return AsWhile(Node); + + case OP_COMP: + AssembleTree(Node->Left, -1, Node->Operation); + DeallocateAllRegisters(); + AssembleTree(Node->Right, -1, Node->Operation); + DeallocateAllRegisters(); + return -1; + + case OP_FUNC: + AsFunctionPreamble(Node->Value.ID); + AssembleTree(Node->Left, -1, Node->Operation); + AsFunctionEpilogue(Node->Value.ID); + return -1; + } + + + if(Node->Left) + LeftVal = AssembleTree(Node->Left, -1, Node->Operation); + + if(Node->Right) + RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation); + + +/* if(Node->Operation == TERM_INTLITERAL) + printf("int %d\n", Node->IntValue); + else + printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal); + + */ + + switch(Node->Operation) { + case OP_ADD: + return AsAdd(LeftVal, RightVal); + + case OP_SUBTRACT: + return AsSub(LeftVal, RightVal); + + case OP_MULTIPLY: + return AsMul(LeftVal, RightVal); + + case OP_DIVIDE: + return AsDiv(LeftVal, RightVal); + + case OP_ASSIGN: + return RightVal; + + case OP_WIDEN: + return LeftVal; + + case OP_RET: + AsReturn(LeftVal, CurrentFunction); + return -1; + + case OP_CALL: + printf("\tReturning from %s, %d\n", Symbols[Node->Value.ID].Name, Node->Value.ID); + return AsCall(LeftVal, Node->Value.ID); + + /* case OP_EQUAL: + return AsEqual(LeftVal, RightVal); + + case OP_INEQ: + return AsIneq(LeftVal, RightVal); + + case OP_LESS: + return AsLess(LeftVal, RightVal); + + case OP_GREAT: + return AsGreat(LeftVal, RightVal); + + case OP_LESSE: + return AsLessE(LeftVal, RightVal); + + case OP_GREATE: + return AsGreatE(LeftVal, RightVal); */ + + case OP_EQUAL: + case OP_INEQ: + case OP_LESS: + case OP_GREAT: + case OP_LESSE: + case OP_GREATE: + if(ParentOp == OP_IF || ParentOp == OP_LOOP) + return AsCompareJmp(Node->Operation, LeftVal, RightVal, Register); + else + return AsCompare(Node->Operation, LeftVal, RightVal); + + + case REF_IDENT: + return AsLdVar(Node->Value.ID); + + case LV_IDENT: + return AsStrVar(Register, Node->Value.ID); + + case TERM_INTLITERAL: + return AsLoad(Node->Value.IntValue); + break; + + case OP_PRINT: + AssemblerPrint(LeftVal); + DeallocateAllRegisters(); + return -1; + + /* case OP_LOOP: + // We only do while for now.. + return AsWhile(Node); + break; */ + + default: + DieDecimal("Unknown ASM Operation", Node->Operation); + } +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * R E G I S T E R M A N A G E M E N T * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void DeallocateAllRegisters() { + UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0; +} + +int RetrieveRegister() { + //printf("Current state of registers: %x, %x, %x, %x\n", UsedRegisters[0], UsedRegisters[1], UsedRegisters[2], UsedRegisters[3]); + + for (size_t i = 0; i < 4; i++) { + if(UsedRegisters[i] == 0) { + UsedRegisters[i] = 1; + return i; + } + } + + fprintf(stderr, "Out of registers!\n"); + exit(1); +} + +void DeallocateRegister(int Register) { + if(UsedRegisters[Register] != 1) { + fprintf(stderr, "Error trying to free register %d\n", Register); + exit(1); + } + + UsedRegisters[Register] = 0; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * C O D E G E N E R A T I O N * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +int NewLabel(void) { + static int id = 1; + return id++; +} + +int AsIf(struct ASTNode* Node) { + int FalseLabel, EndLabel; + + FalseLabel = NewLabel(); + if(Node->Right) + EndLabel = NewLabel(); + + + // Left is the condition + AssembleTree(Node->Left, FalseLabel, Node->Operation); + DeallocateAllRegisters(); + + // Middle is the true block + AssembleTree(Node->Middle, -1, Node->Operation); + DeallocateAllRegisters(); + + // Right is the optional else + if(Node->Right) + AsJmp(EndLabel); + + AsLabel(FalseLabel); + + if(Node->Right) { + AssembleTree(Node->Right, -1, Node->Operation); + DeallocateAllRegisters(); + AsLabel(EndLabel); + } + + return -1; +} + +int AsCompare(int Operation, int RegisterLeft, int RegisterRight) { + printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight); + + if(Operation < OP_EQUAL || Operation > OP_GREATE) + Die("Bad Operation in AsCompare"); + + fprintf(OutputFile, "\tcmpq\t%s, %s\n", Registers[RegisterRight], Registers[RegisterLeft]); + fprintf(OutputFile, "\t%s\t\t%s\n", Comparisons[Operation - OP_EQUAL], ByteRegisters[RegisterRight]); + fprintf(OutputFile, "\tmovzbq\t%s, %s\n", ByteRegisters[RegisterRight], Registers[RegisterLeft]); + DeallocateRegister(RegisterLeft); + return RegisterRight; +} + +int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) { + if(Operation < OP_EQUAL || Operation > OP_GREATE) + Die("Bad Operation in AsCompareJmp"); + + printf("\tBranching on comparison of registers %d & %d, with operation %s\n\n", RegisterLeft, RegisterRight, Comparisons[Operation - OP_EQUAL]); + + fprintf(OutputFile, "\tcmpq\t%s, %s\n", Registers[RegisterRight], Registers[RegisterLeft]); + fprintf(OutputFile, "\t%s\tL%d\n", InvComparisons[Operation - OP_EQUAL], Label); + DeallocateAllRegisters(); + + return -1; +} + +void AsJmp(int Label) { + printf("\t\tJumping to label %d\n", Label); + fprintf(OutputFile, "\tjmp\tL%d\n", Label); +} + +void AsLabel(int Label) { + printf("\tCreating label %d\n", Label); + fprintf(OutputFile, "L%d:\n", Label); +} + +int AsWhile(struct ASTNode* Node) { + int BodyLabel, BreakLabel; + + BodyLabel = NewLabel(); + BreakLabel = NewLabel(); + + printf("\tInitiating loop between labels %d and %d\n", BodyLabel, BreakLabel); + + // Mark the start position + AsLabel(BodyLabel); + + // Assemble the condition - this should include a jump to end! + AssembleTree(Node->Left, BreakLabel, Node->Operation); + DeallocateAllRegisters(); + + // Assemble the body + AssembleTree(Node->Right, -1, Node->Operation); + DeallocateAllRegisters(); + + // Jump back to the body - as we've already failed the condition check if we get here + AsJmp(BodyLabel); + + // Set up the label to break out of the loop. + AsLabel(BreakLabel); + + + return -1; + +} + +int AsLoad(int Value) { + int Register = RetrieveRegister(); + + printf("\tStoring value %d into %s\n", Value, Registers[Register]); + + fprintf(OutputFile, "\tmovq\t$%d, %s\n", Value, Registers[Register]); + + return Register; +} + +int AsAdd(int Left, int Right) { + printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]); + fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]); + + DeallocateRegister(Left); + + return Right; +} + +int AsMul(int Left, int Right) { + printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]); + fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]); + + DeallocateRegister(Left); + + return Right; +} + +int AsSub(int Left, int Right) { + printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]); + fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]); + + DeallocateRegister(Right); + + return Left; +} + +int AsDiv(int Left, int Right) { + printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]); + fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]); + fprintf(OutputFile, "\tcqo\n"); + fprintf(OutputFile, "\tidivq\t%s\n", Registers[Right]); + fprintf(OutputFile, "\tmovq\t%%rax, %s\n", Registers[Left]); + + DeallocateRegister(Right); + + return Left; +} + +int AsLdVar(int ID) { + int Reg = RetrieveRegister(); + + printf("\tStoring %s's contents into %s\n", Symbols[ID].Name, Registers[Reg]); + + switch(Symbols[ID].Type) { + case RET_CHAR: + // movzbq zeroes, then moves a byte into the quad register + fprintf(OutputFile, "\tmovzbq\t%s(\%%rip), %s\n", Symbols[ID].Name, Registers[Reg]); + break; + + case RET_INT: + fprintf(OutputFile, "\tmovzbl\t%s(\%%rip), %s\n", Symbols[ID].Name, Registers[Reg]); + break; + + case RET_LONG: + fprintf(OutputFile, "\tmovq\t%s(%%rip), %s\n", Symbols[ID].Name, Registers[Reg]); + break; + + default: + DieMessage("Bad type for loading", Types[Symbols[ID].Type]); + } + + return Reg; +} + +int AsStrVar(int Register, int ID) { + int Reg = RetrieveRegister(); + + printf("\tStoring contents of %s into %s\n", Registers[Register], Symbols[ID].Name); + + switch(Symbols[ID].Type) { + case RET_CHAR: + // movzbq zeroes, then moves a byte into the quad register + fprintf(OutputFile, "\tmovb\t%s, %s(\%%rip)\n", ByteRegisters[Reg], Symbols[ID].Name); + break; + + case RET_INT: + fprintf(OutputFile, "\tmovl\t%s, %s(\%%rip)\n", DoubleRegisters[Reg], Symbols[ID].Name); + break; + + case RET_LONG: + fprintf(OutputFile, "\tmovq\t%s, %s(%%rip)\n", Registers[Reg], Symbols[ID].Name); + break; + + default: + DieMessage("Bad type for saving", Types[Symbols[ID].Type]); + } + + return Reg; +} + +void AsNewSymb(int ID) { + int TypeSize; + + TypeSize = PrimitiveSize(Symbols[ID].Type); + + fprintf(OutputFile, "\t.comm\t%s, %d, %d\n", Symbols[ID].Name, TypeSize, TypeSize); + +} + +int AsCall(int Register, int FuncID) { + + int OutRegister = RetrieveRegister(); + + printf("\t\tCalling function %s with parameter %s\n", Symbols[FuncID].Name, Registers[Register]); + printf("\t\t\tFunction returns into %s\n", Registers[OutRegister]); + + fprintf(OutputFile, "\tmovq\t%s, %%rcx\n", Registers[Register]); + fprintf(OutputFile, "\tcall\t%s\n", Symbols[FuncID].Name); + fprintf(OutputFile, "\tmovq\t%%rax, %s\n", Registers[OutRegister]); + + DeallocateRegister(OutRegister); + return OutRegister; +} + +int AsReturn(int Register, int FuncID) { + + printf("\t\tCreating return for function %s\n", Symbols[FuncID].Name); + + switch(Symbols[FuncID].Type) { + case RET_CHAR: + fprintf(OutputFile, "\tmovzbl\t%s, %%eax\n", ByteRegisters[Register]); + break; + + case RET_INT: + fprintf(OutputFile, "\tmovl\t%s, %%eax\n", DoubleRegisters[Register]); + break; + + case RET_LONG: + fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Register]); + break; + + default: + DieMessage("Bad function type in generating return", Types[Symbols[FuncID].Type]); + + } + + AsJmp(Symbols[FuncID].EndLabel); + +} + +int AsEqual(int Left, int Right) { + // Set the lowest bit if left = right + return AsCompare(OP_EQUAL, Left, Right); +} + +int AsIneq(int Left, int Right) { + // Set the lowest bit if left != right + return AsCompare(OP_INEQ, Left, Right); +} + +int AsLess(int Left, int Right) { + // Set the lowest bit if left < right + return AsCompare(OP_LESS, Left, Right); +} + +int AsGreat(int Left, int Right) { + // Set the lowest bit if left > right + return AsCompare(OP_GREAT, Left, Right); +} + +int AsLessE(int Left, int Right) { + // Set the lowest bit if left <= right + return AsCompare(OP_LESSE, Left, Right); +} + +int AsGreatE(int Left, int Right) { + // Set the lowest bit if left => right + return AsCompare(OP_GREATE, Left, Right); +} + +void AssemblerPrint(int Register) { + printf("\t\tPrinting Register %s\n", Registers[Register]); + + fprintf(OutputFile, "\tmovq\t%s, %%rcx\n", Registers[Register]); + //fprintf(OutputFile, "\tleaq\t.LC0(%%rip), %%rcx\n"); + fprintf(OutputFile, "\tcall\tPrintInteger\n"); + + DeallocateRegister(Register); +} + +void AssemblerPreamble() { + DeallocateAllRegisters(); + fputs( + "\t.text\n", /* + ".LC0:\n" + "\t.string\t\"%d\\n\"\n", */ + OutputFile); +} + +void AsFunctionPreamble(int FunctionID) { + char* Name = Symbols[FunctionID].Name; + + fprintf(OutputFile, + "\t.text\n" + "\t.globl\t%s\n" + "\t.def\t%s; .scl 2; .type 32; .endef\n" + "%s:\n" + "\tpushq\t%%rbp\n" + "\tmovq\t%%rsp, %%rbp\n" + "\tsubq\t$32, %%rsp\n", Name, Name, Name); + + //PECOFF requires we call the global initialisers + if(!strcmp(Name, "main")) + fprintf(OutputFile, "\tcall\t__main\n"); +} + +void AsFunctionEpilogue(int FunctionID) { + AsLabel(Symbols[FunctionID].EndLabel); + + fputs( + "\tpopq\t%rbp\n" + "\taddq\t$32, %rsp\n" + "\tret\n", + OutputFile); +} \ No newline at end of file diff --git a/src/Lexer.c b/src/Lexer.c new file mode 100644 index 0000000..85c2dfd --- /dev/null +++ b/src/Lexer.c @@ -0,0 +1,322 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * * * C H A R S T R E AM * * * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +static void ReturnCharToStream(int Char) { + Overread = Char; +} + +static int NextChar(void) { + int Char; + + if(Overread) { + Char = Overread; + Overread = 0; + return Char; + } + + Char = fgetc(SourceFile); + + if(Char == '\n') + Line++; + + return Char; +} + + +static int FindChar() { + int Char; + + Char = NextChar(); + + while(Char == ' ' || Char == '\t' || Char == '\n' || Char == '\r') { + Char = NextChar(); + } + + return Char; +} + +static int FindDigitFromPos(char* String, char Char) { + char* Result = strchr(String, Char); + return(Result ? Result - String : -1); +} + +void VerifyToken(int Type, char* TokenExpected) { + if(CurrentToken.type == Type) + Tokenise(&CurrentToken); + else { + printf("Expected %s on line %d\n", TokenExpected, Line); + exit(1); + } +} + +static struct Token* RejectedToken = NULL; + +void RejectToken(struct Token* Token) { + if(RejectedToken != NULL) + Die("Cannot reject two tokens in a row!"); + + RejectedToken = Token; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * * * * L I T E R A L S A N D I D E N T I F I E R S * * * * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static int ReadInteger(int Char) { + int CurrentChar = 0; + int IntegerValue = 0; + + while((CurrentChar = FindDigitFromPos("0123456789", Char)) >= 0) { + IntegerValue = IntegerValue * 10 + CurrentChar; + Char = NextChar(); + } + + ReturnCharToStream(Char); + + return IntegerValue; +} + +// Variable identifier, keyword, function. +static int ReadIdentifier(int Char, char* Buffer, int Limit) { + int ind = 0; + + // This defines the valid chars in a keyword/variable/function. + while(isalpha(Char) || isdigit(Char) || Char == '_') { + if (ind >= Limit - 1) { + printf("Identifier too long: %d\n", Line); + exit(1); + } else { + Buffer[ind++] = Char; + } + + Char = NextChar(); + } + + // At this point, we've reached a non-keyword character + ReturnCharToStream(Char); + Buffer[ind] = '\0'; + return ind; +} + +/* + * This function is what defines the valid keywords for the language + * //TODO: move this to a static list? + * //TODO: More optimisations? + * + */ +static int ReadKeyword(char* Str) { + // First, scan with reference intact. + switch(*Str) { + // This lets us case against the first char: + case ':': + if(!strcmp(Str, "::")) + return KW_FUNC; + break; + + case 'c': + if(!strcmp(Str, "char")) + return TY_CHAR; + break; + + case 'e': + if(!strcmp(Str, "else")) + return KW_ELSE; + + break; + + case 'f': + if(!strcmp(Str, "for")) + return KW_FOR; + break; + + case 'i': + + if(!strcmp(Str, "int")) + return TY_INT; + + if(!strcmp(Str, "if")) + return KW_IF; + + break; + + case 'l': + if(!strcmp(Str, "long")) + return TY_LONG; + + break; + + case 'p': + // This is a huge optimisation once we have as many keywords as a fully featured language. + if(!strcmp(Str, "print")) + return KW_PRINT; + break; + + case 'r': + if(!strcmp(Str, "return")) + return KW_RETURN; + break; + + case 'v': + if(!strcmp(Str, "void")) + return TY_VOID; + break; + + case 'w': + if(!strcmp(Str, "while")) + return KW_WHILE; + break; + + + + } + + return 0; +} + +/* * * * * * * * * * * * * * * * * * * * * + * * * * T O K E N I S E R * * * * + * * * * * * * * * * * * * * * * * * * * */ + +int Tokenise(struct Token* Token) { + int Char, TokenType; + + if(RejectedToken != NULL) { + Token = RejectedToken; + RejectedToken = NULL; + return 1; + } + + Char = FindChar(); + + switch(Char) { + case EOF: + Token->type = LI_EOF; + return 0; + + case '+': + Token->type = AR_PLUS; + break; + + case '-': + Token->type = AR_MINUS; + break; + + case '*': + Token->type = AR_STAR; + break; + + case '/': + Token->type = AR_SLASH; + break; + + case '=': + Char = NextChar(); + // If the next char is =, we have ==, the compare equality token. + if(Char == '?') { + Token->type = CMP_EQUAL; + // if the next char is >, we have =>, the greater than or equal token. + } else if(Char == '>') { + Token->type = CMP_GTE; + // If none of the above match, we have = and an extra char. Return the char and set the token + } else { + ReturnCharToStream(Char); + Token->type = LI_EQUAL; + } + break; + + case '!': + Char = NextChar(); + // If the next char is =, we have !=, the compare inequality operator. + if(Char == '=') { + Token->type = CMP_INEQ; + // Otherwise, we have a spare char + } else { + ReturnCharToStream(Char); + } + break; + + case '<': + Char = NextChar(); + // If the next char is =, we have <=, the less than or equal comparator. + if(Char == '=') { + Token->type = CMP_LTE; + } else { + ReturnCharToStream(Char); + Token->type = CMP_LT; + } + break; + + case '>': + // There is no special casing for >. Less than or equal is => + Token->type = CMP_GT; + break; + + case ';': + Token->type = LI_SEMIC; + break; + + case '(': + Token->type = LI_LPARE; + break; + + case ')': + Token->type = LI_RPARE; + break; + + case '{': + Token->type = LI_LBRAC; + break; + + case '}': + Token->type = LI_RBRAC; + break; + + case ':': + Char = NextChar(); + + if(Char == ':') { + Token->type = KW_FUNC; + } else { + ReturnCharToStream(Char); + } + break; + + default: + if(isdigit(Char)) { + + Token->value = ReadInteger(Char); + Token->type = LI_INT; + break; + + } else if(isalpha(Char) || Char == '_') { // This is what defines what a variable/function/keyword can START with. + ReadIdentifier(Char, CurrentIdentifier, TEXTLEN); + + if(TokenType = ReadKeyword(CurrentIdentifier)) { + Token->type = TokenType; + break; + } + + Token->type = TY_IDENTIFIER; + break; + //printf("Line %d: Unrecognized symbol %s\n", CurrentIdentifier, Line); + //exit(1); + } + + + DieChar("Unrecognized character", Char); + + } + + return 1; +} + diff --git a/src/Main.c b/src/Main.c new file mode 100644 index 0000000..bda3e4c --- /dev/null +++ b/src/Main.c @@ -0,0 +1,130 @@ +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#define extern_ +#include +#undef extern_ +#include + +char* TokenStrings[] = { "+", "-", "*", "/", "int" }; +char* TokenNames[] = { + "End of file", + + "Addition", + "Subtraction", + "Multiplication", + "Division", + + "Equality Check", + "Inequality Check", + "Less Than", + "Greater Than", + "Less Than or Equal", + "Greater Than or Equal", + + "Assignment", + "Integer literal", + "Statement End", + + "Compound Block Start", + "Compound Block End", + + "Logical Block Start", + "Logical Block End", + + "Identifier", + "None Type", + "Char Type", + "Int Type", + "Long Type", + "Void Type", + + "Function keyword", + "Print Keyword", + "If keyword", + "Else keyword", + "While keyword", + "For keyword", + + "Return keyword" +}; + +static void TokeniseFile() { + + struct Token Token; + + while(Tokenise(&Token)) { + + printf("Token %s", TokenStrings[Token.type]); + if(Token.type == LI_INT) { + printf(", value %d", Token.value); + } + + printf("\n"); + } + +} + +int main(int argc, char* argv[]) { + Line = 1; + Overread = '\n'; + struct ASTNode* Node; + + + if((SourceFile = fopen(argv[1], "r")) == NULL) { + fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno)); + exit(1); + } + + if((OutputFile = fopen(argv[2], "w")) == NULL) { + fprintf(stderr, "Unable to open %s: %s\n", argv[2], strerror(errno)); + exit(1); + } + + AddFunctionSymbol("PrintInteger", RET_CHAR, ST_FUNC, 0); + + Tokenise(&CurrentToken); + + AssemblerPreamble(); + + while(1) { + Node = ParseFunction(); + printf("\nBeginning assembler creation of new function %s\n", Symbols[Node->Value.ID].Name); + AssembleTree(Node, -1, 0); + + if(CurrentToken.type == LI_EOF) + break; + } + + //Node = ParsePrecedenceASTNode(); + //printf("%d\n", ParseAST(Node)); + + //AssembleNode(Node); + + fclose(OutputFile); + + exit(0); +} + +void Die(char* Error) { + fprintf(stderr, "%s on line %d\n", Error, Line); + exit(1); +} + +void DieMessage(char* Error, char* Reason) { + fprintf(stderr, "%s:%s on line %d\n", Error, Reason, Line); + exit(1); +} + +void DieDecimal(char* Error, int Number) { + fprintf(stderr, "%s:%d on line %d\n", Error, Number, Line); + exit(1); +} + +void DieChar(char* Error, int Char) { + fprintf(stderr, "%s:%c on line %d\n", Error, Char, Line); + exit(1); +} \ No newline at end of file diff --git a/src/Parser.c b/src/Parser.c new file mode 100644 index 0000000..146f2c8 --- /dev/null +++ b/src/Parser.c @@ -0,0 +1,433 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include +#include "Defs.h" +#include "Data.h" + +/* + * Precedence is directly related to Token Type. + * + * enum TokenTypes { + * LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT + * }; + * + */ +static int Precedence[] = + { 0, // EOF + 10, 10, // + - + 20, 20, // * / + 30, 30, // =? != + 40, 40, // < > + 40, 40}; // <= => + +static int OperatorPrecedence(int Token) { + int Prec = Precedence[Token]; + + if(Prec == 0) { + Die("Attempting to determine operator precedence of an EOF or INT literal."); + } + + return Prec; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * + * * * N O D E C O N S T R U C T I O N * * * + * * * * * * * * * * * * * * * * * * * * * * * */ + +struct ASTNode* ConstructASTNode(int Operation, int Type, + struct ASTNode* Left, + struct ASTNode* Middle, + struct ASTNode* Right, + int IntValue) { + struct ASTNode* Node; + + Node = (struct ASTNode*) malloc(sizeof(struct ASTNode)); + + if(!Node) { + fprintf(stderr, "Unable to allocate node!"); + exit(1); + } + + Node->Operation = Operation; + Node->ExprType = Type; + Node->Left = Left; + Node->Middle = Middle; + Node->Right = Right; + Node->Value.IntValue = IntValue; + + return Node; +} + + +struct ASTNode* ConstructASTLeaf(int Operation, int Type, int IntValue) { + return ConstructASTNode(Operation, Type, NULL, NULL, NULL, IntValue); +} + +struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, int IntValue) { + return ConstructASTNode(Operation, Type, Left, NULL, NULL, IntValue); +} + + +/* * * * * * * * * * * * * * * * * * * * * * * * + * * * * T O K E N P A R S I N G * * * * + * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Take a Token Type, and convert it to an AST-Node Operation. + * + * TokenTypes and SyntaxOps are synchronized to make this easy. + * + */ + +int ParseTokenToOperation(int Token) { + if(Token > LI_EOF && Token < LI_INT) + return Token; + + DieDecimal("ParseToken: Unknown token", Token); +} + +/* + * Parse a primary (terminal) expression. + * This currently handles literal expressions, constructing a leaf node + * and handing control back up the chain. + * + * + */ + +static struct ASTNode* ParsePrimary(void) { + struct ASTNode* Node; + int ID; + + switch(CurrentToken.type) { + case LI_INT: + + if((CurrentToken.value >= 0) && (CurrentToken.value < 256)) + Node = ConstructASTLeaf(TERM_INTLITERAL, RET_CHAR, CurrentToken.value); + else + Node = ConstructASTLeaf(TERM_INTLITERAL, RET_INT, CurrentToken.value); + + //Tokenise(&CurrentToken); // Fetch next token + break; + + case TY_IDENTIFIER: + // A variable or a function? + + // Read the next token + Tokenise(&CurrentToken); + + // If the token after the identifier is a (, then it's a function. + if(CurrentToken.type == LI_LPARE) + return CallFunction(); + + // Otherwise, we've read too far and need to go back. + RejectToken(&CurrentToken); + + // It's a variable, so find the symbol and construct a leaf for it + ID = FindSymbol(CurrentIdentifier); + if(ID == -1) + DieMessage("Unknown Variable", CurrentIdentifier); + Node = ConstructASTLeaf(REF_IDENT, Symbols[ID].Type, ID); + break; + + default: + DieDecimal("Syntax Error", CurrentToken.type); + } + + Tokenise(&CurrentToken); + + return Node; +} + + +struct ASTNode* ParseNewASTNode(void) { + //fprintf(stdout, "New node requested."); + struct ASTNode* LeftNode, *RightNode; + int NodeType; + + + LeftNode = ParsePrimary(); // Fetches next token! + + // If there's just a number, then this is the AST Node. + // Return, as the root of the tree is the end of the tree. + if(CurrentToken.type == LI_EOF) + return(LeftNode); + + + NodeType = ParseTokenToOperation(CurrentToken.type); + + Tokenise(&CurrentToken); + + RightNode = ParseNewASTNode(); + + return ConstructASTNode(NodeType, LeftNode->ExprType, LeftNode, NULL, RightNode, 0); +} + + + +struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { + struct ASTNode* LeftNode, *RightNode; + int LeftType, RightType; + int NodeType; + + LeftNode = ParsePrimary(); + + NodeType = CurrentToken.type; + if(NodeType == LI_SEMIC || NodeType == LI_RPARE) + return LeftNode; + + //printf("Current token has value %d, type %d\n", CurrentToken.value, CurrentToken.type); + while(OperatorPrecedence(NodeType) > PreviousTokenPrecedence) { + //printf("inside while\n"); + Tokenise(&CurrentToken); + + RightNode = ParsePrecedenceASTNode(Precedence[NodeType]); + + LeftType = LeftNode->ExprType; + RightType = RightNode->ExprType; + + if(!TypesCompatible(&LeftType, &RightType, 0)) + Die("Assignment between incompatible types"); + + if(LeftType) + LeftNode = ConstructASTBranch(LeftType, RightNode->ExprType, LeftNode, 0); + if(RightType) + RightNode = ConstructASTBranch(RightType, LeftNode->ExprType, RightNode, 0); + + LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode->ExprType, LeftNode, NULL, RightNode, 0); + + NodeType = CurrentToken.type; + if(NodeType == LI_SEMIC || NodeType == LI_RPARE) + return LeftNode; + } + + return LeftNode; +} + + +/* struct ASTNode* ParseMultiplicativeASTNode(void) { + struct ASTNode* LeftNode, * RightNode; + int NodeType; + + LeftNode = ParsePrimary(); + + NodeType = CurrentToken.type; + if(NodeType == LI_EOF) + return LeftNode; + + while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) { + Tokenise(&CurrentToken); + + RightNode = ParsePrimary(); + + LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0); + + NodeType = CurrentToken.type; + if(NodeType == LI_EOF) + break; + } + + return LeftNode; +} + */ +/* struct ASTNode* ParseAdditiveASTNode(void) { + struct ASTNode* LeftNode, * RightNode; + int NodeType; + + LeftNode = ParseMultiplicativeASTNode(); + + NodeType = CurrentToken.type; + if(NodeType == LI_EOF) + return LeftNode; + + while(1) { + Tokenise(&CurrentToken); + + RightNode = ParseMultiplicativeASTNode(); + + LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0); + + NodeType = CurrentToken.type; + if(NodeType == LI_EOF) + break; + } + + return LeftNode; +} + */ + + +/* * * * * * * * * * * * * * * * * * * * * * * * + * * * * I N T E R P R E T A T I O N * * * * + * * * * * * * * * * * * * * * * * * * * * * * */ + +int ParseAST(struct ASTNode* Node) { + + + int LeftVal, RightVal; + + if(Node->Left) + LeftVal = ParseAST(Node->Left); + + if(Node->Right) + RightVal = ParseAST(Node->Right); + + /* + if(Node->Operation == TERM_INTLITERAL) + printf("int %d\n", Node->IntValue); + else + printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal); + */ + + switch(Node->Operation) { + case OP_ADD: + return (LeftVal + RightVal); + case OP_SUBTRACT: + return (LeftVal - RightVal); + case OP_MULTIPLY: + return (LeftVal * RightVal); + case OP_DIVIDE: + return (LeftVal / RightVal); + + case REF_IDENT: + case TERM_INTLITERAL: + return Node->Value.IntValue; + default: + fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation); + exit(1); + } +} + + +/* * * * * * * * * * * * * * * * * * * * * + * * * * F U N C T I O N S * * * * + * * * * * * * * * * * * * * * * * * * * */ + +struct ASTNode* CallFunction() { + struct ASTNode* Tree; + int FuncID; + + //TODO: Test structural type! + if((FuncID == FindSymbol(CurrentIdentifier)) == -1 && (Symbols[FuncID].Structure == ST_FUNC)) + DieMessage("Undeclared function", CurrentIdentifier); + + VerifyToken(LI_LPARE, "("); + + Tree = ParsePrecedenceASTNode(0); + + Tree = ConstructASTBranch(OP_CALL, Symbols[FuncID].Type, Tree, FuncID); + + VerifyToken(LI_RPARE, ")"); + + return Tree; +} + + + +/* * * * * * * * * * * * * * * * * * * * * * + * * * * S T A T E M E N T S * * * * + * * * * * * * * * * * * * * * * * * * * * */ + +struct ASTNode* ParseStatement(void) { + + + switch(CurrentToken.type) { + + case KW_PRINT: + return PrintStatement(); + + case TY_CHAR: + case TY_LONG: + case TY_INT: + printf("\t\tNew Variable: %s\n", CurrentIdentifier); + BeginVariableDeclaration(); + return NULL; + + case TY_IDENTIFIER: + if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC) + printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name); + else + printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name); + + return ParseIdentifier(); + + case KW_IF: + return IfStatement(); + + case KW_WHILE: + return WhileStatement(); + + case KW_FOR: + return ForStatement(); + + case KW_RETURN: + return ReturnStatement(); + + default: + DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type); + } +} + + +struct ASTNode* ParseCompound() { + struct ASTNode* Left = NULL, *Tree; + + // Compound statements are defined by comprising + // multiple statements inside { a bracket block } + VerifyToken(LI_LBRAC, "{"); + + while(1) { + printf("\tNew branch in compound\n"); + + Tree = ParseStatement(); + + if(Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN + || Tree->Operation == OP_RET || Tree->Operation == OP_CALL)) + VerifyToken(LI_SEMIC, ";"); + + if(Tree) { + if(!Left) + Left = Tree; + else + Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, 0); + } + + if(CurrentToken.type == LI_RBRAC) { + VerifyToken(LI_RBRAC, "}"); + return Left; + } + } +} + + + +/* void ParseStatements() { + while(1) { + switch(CurrentToken.type) { + + case KW_PRINT: + PrintStatement(); + break; + + case TY_INT: + BeginVariableDeclaration(); + break; + + case TY_IDENTIFIER: + AssignVariable(); + break; + + case LI_EOF: + return; + + default: + DieDecimal("Syntax error; Token", CurrentToken.type); + } + } +} */ + + diff --git a/src/Statements.c b/src/Statements.c new file mode 100644 index 0000000..258d9e9 --- /dev/null +++ b/src/Statements.c @@ -0,0 +1,386 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include + + +/* + * Turn a token type into its appropriate + * primitive type. + * + * This is where we do redirections like: + * short -> s16 + * long -> s64 + * int -> s32 + * char -> u8 + * + */ + +static char* Types[5] = { "none", "char", "int", "long", "void" }; +static int TypeSize[5] = { 0, 1, 4, 8, 0}; // in BYTES + +int ParseType(int Token) { + switch(Token) { + case TY_CHAR: + return RET_CHAR; + + case TY_VOID: + return RET_VOID; + + case TY_INT: + return RET_INT; + + default: + DieDecimal("Illegal variable type", Token); + } +} + +int PrimitiveSize(int Type) { + if(Type < RET_NONE || Type > RET_VOID) + DieMessage("Checking size of bad data type", Types[Type]); + + return TypeSize[Type]; +} + +/* + * Given two types, determine if they are compatible. + * + * Depending on the value of STRICT, it will try to + * fit the right value into the left value. + * + * This is valid, for ie. a char into an int, as int is larger than char. + * This is called widening the char. + * + * If STRICT is set, it will only allow widening the left to the right. + * This means you cannot `char a; int b; b = 15000; a = b;` + * As this would shrink the int and lose resolution. + * + * NOTE: THIS IS NOT THE DEFAULT BEHAVIOUR + * By default, you CAN shrink an int into a char, a la shifting down. + * + * + */ + +int TypesCompatible(int* Left, int* Right, int STRICT) { + + int LeftSize, RightSize; + + // Same types are compatible. No shrinking required + if(*Left == *Right) { + *Left = *Right = 0; + return 1; + } + + LeftSize = PrimitiveSize(*Left); + RightSize = PrimitiveSize(*Right); + + + // Types of size 0 are incompatible + if((LeftSize == 0) || (RightSize == 0)) + return 0; + + + /* char x; + * int y; + * y = 15; + * + * x = y; + * x needs to be widened, y copied in, then x shrunk back down + * AKA, the left must be widened. + */ + if(LeftSize < RightSize) { + *Left = OP_WIDEN; + *Right = 0; + return 1; + } + + /* + * char x; + * int y; + * + * x = 15; + * + * y = x; + * x must be widened to fit into y. + * if STRICT mode, this is not allowed. + * By default, this is valid. + * + */ + + if(LeftSize > RightSize) { + if(STRICT) + return 0; // Not compatible if STRICT + + *Left = 0; + *Right = OP_WIDEN; + return 1; // Compatible by default + } + + /* + * Any other cases left, by default, are compatible. + * + */ + + *Left = *Right = 0; + return 1; + +} + +/* + * Handles the declaration of a type of a variable. + * int newVar; + * + * It verifies that we have the `int` keyword followed by a + * unique, non-keyword identifier. + * + * It then stores this variable into the symbol table. + * + * //TODO: Assemble this into the symbol table. + * //TODO: int i = 5; + * + */ +void BeginVariableDeclaration(void) { + int ID; + + int Type = ParseType(CurrentToken.type); + //printf("type: %s\n", Types[Type]); + Tokenise(&CurrentToken); + VerifyToken(TY_IDENTIFIER, "ident"); + //printf("Identifier: %s\n", CurrentIdentifier); + + ID = AddSymbol(CurrentIdentifier, Type, ST_VAR); + AsNewSymb(ID); + + VerifyToken(LI_SEMIC, ";"); +} + +struct ASTNode* ParseFunction() { + struct ASTNode* Tree; + struct ASTNode* FinalStatement; + int SymbolSlot, BreakLabel, Type; + + Type = ParseType(CurrentToken.type); + Tokenise(&CurrentToken); + VerifyToken(KW_FUNC, "::"); + VerifyToken(TY_IDENTIFIER, "ident"); + + printf("\nIdentified function %s\n", CurrentIdentifier); + + BreakLabel = NewLabel(); + + SymbolSlot = AddFunctionSymbol(CurrentIdentifier, Type, ST_FUNC, BreakLabel); + CurrentFunction = SymbolSlot; + + VerifyToken(LI_LPARE, "("); + VerifyToken(LI_RPARE, ")"); + + Tree = ParseCompound(); + + if(Type != RET_VOID) { + // Functions with one statement have no composite node, so we have to check + FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree; + + if(FinalStatement == NULL || FinalStatement->Operation != OP_RET) { + Die("Function with non-void type does not return"); + } + + } + + return ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, SymbolSlot); +} + +/* + * Handles the logic for return. + * //TODO: No brackets + * //TODO: Type inference + * + * + */ + +struct ASTNode* ReturnStatement() { + struct ASTNode* Tree; + int ReturnType, FunctionType; + + + if(Symbols[CurrentFunction].Type == RET_VOID) + Die("Attempt to return from void function"); + + VerifyToken(KW_RETURN, "return"); + + VerifyToken(LI_LPARE, "("); // TODO: Make optional! Reject? + + Tree = ParsePrecedenceASTNode(0); + + ReturnType = Tree->ExprType; + FunctionType = Symbols[CurrentFunction].Type; + + if(!TypesCompatible(&ReturnType, &FunctionType, 0)) + Die("Returning a value of incorrect type for function"); + + + if(ReturnType) + Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0); + + Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, 0); + + printf("\t\tReturning from function %s\n", Symbols[CurrentFunction].Name); + + VerifyToken(LI_RPARE, ")"); // TODO: OPTIONALISE! + + return Tree; +} + +/* + * Handles the assignment of variables. + * + * You can assign variables with an assignment, + * a statement, a function or a literal. + * + * This means we need to do some recursive parsing. + * + */ + +struct ASTNode* ParseIdentifier() { + struct ASTNode* Left, *Right, *Tree; + int LeftType, RightType; + int ID; + + VerifyToken(TY_IDENTIFIER, "ident"); + + if(CurrentToken.type == LI_LPARE) + return CallFunction(); + + if((ID = FindSymbol(CurrentIdentifier)) == -1) { + printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name); + DieMessage("Undeclared Variable ", CurrentIdentifier); + } + Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID); + + VerifyToken(LI_EQUAL, "="); + + Left = ParsePrecedenceASTNode(0); + + LeftType = Left->ExprType; + RightType = Right->ExprType; + + if(!TypesCompatible(&LeftType, &RightType, 1)) + Die("Incompatible variable types"); + + if(LeftType) + Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0); + + Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0); + + return Tree; +} + +struct ASTNode* IfStatement() { + struct ASTNode* Condition, *True, *False = NULL; + + VerifyToken(KW_IF, "if"); + VerifyToken(LI_LPARE, "("); + + Condition = ParsePrecedenceASTNode(0); + + // Limit if(x) to =? != < > <= => + // No null checking, no arithmetic, no functions. + // TODO: this + if(Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) + Die("Invalid Comparison in if statement"); + + VerifyToken(LI_RPARE, ")"); + + True = ParseCompound(); + + if(CurrentToken.type == KW_ELSE) { + Tokenise(&CurrentToken); + False = ParseCompound(); + } + + return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, 0); +} + +struct ASTNode* WhileStatement() { + struct ASTNode* Condition, *Body; + + VerifyToken(KW_WHILE, "while"); + VerifyToken(LI_LPARE, "("); + + Condition = ParsePrecedenceASTNode(0); + + if(Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) + Die("Bad Comparison inside while()"); + + VerifyToken(LI_RPARE, ")"); + + Body = ParseCompound(); + + return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, 0); +} + +struct ASTNode* ForStatement() { + + // for (preop; condition; postop) { + // body + //} + + struct ASTNode* Condition, *Body; + struct ASTNode* Preop, *Postop; + + struct ASTNode* Tree; + + VerifyToken(KW_FOR, "for"); + VerifyToken(LI_LPARE, "("); + + Preop = ParseStatement(); + VerifyToken(LI_SEMIC, ";"); + + Condition = ParsePrecedenceASTNode(0); + + if(Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) + Die("Bad comparison in for"); + VerifyToken(LI_SEMIC, ";"); + + Postop = ParseStatement(); + VerifyToken(LI_RPARE, ")"); + + Body = ParseCompound(); + + // We need to be able to skip over the body and the postop, so we group them together. + Tree = ConstructASTNode(OP_COMP, RET_NONE, Body, NULL, Postop, 0); + // We need to be able to jump to the top of the condition and fall through to the body, + // so we group it with the last block + Tree = ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Tree, 0); + + // We need to append the postop to the loop, to form the final for loop + return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, 0); +} + +struct ASTNode* PrintStatement(void) { + struct ASTNode* Tree; + int LeftType, RightType; + + VerifyToken(KW_PRINT, "print"); + + Tree = ParsePrecedenceASTNode(0); + + LeftType = RET_INT; + RightType = Tree->ExprType; + + if(!TypesCompatible(&LeftType, &RightType, 0)) + DieDecimal("Attempting to print an invalid type:", RightType); + + if(RightType) + Tree = ConstructASTBranch(RightType, RET_INT, Tree, 0); + + Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, 0); + + //ParseAST(Tree); + + return Tree; + +} \ No newline at end of file diff --git a/src/Symbols.c b/src/Symbols.c new file mode 100644 index 0000000..4e221ce --- /dev/null +++ b/src/Symbols.c @@ -0,0 +1,70 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include + +static int GlobalSymbols = 0; + +static char* Types[4] = { "none", "char", "int", "void" }; + +/* + * Find the position of a symbol in the symbol table. + * @Return the index into the symbol table if found, + * -1 if not found. + */ +int FindSymbol(char* Symbol) { + int Ind; + + for(Ind = 0; Ind < GlobalSymbols; Ind++) { + if(*Symbol == *Symbols[Ind].Name && !strcmp(Symbol, Symbols[Ind].Name)) + return Ind; + } + + return -1; +} + +/* + * Append a new entry to the table of global symbols. + * @Return the index to the new entry + * + * Will kill the program if we run out. + * //TODO: Dump symbols on death? + */ +static int NewSymbol(void) { + int Pos; + + if((Pos = GlobalSymbols++) >= SYMBOLS) + Die("Too many symbols"); + + return Pos; +} + +// TODO: this is going weird! + +int AddFunctionSymbol(char* Name, int Type, int Structure, int EndLabel) { + int Slot; + Slot = AddSymbol(Name, Type, Structure); + Symbols[Slot].EndLabel = EndLabel; + return Slot; +} + +int AddSymbol(char* Name, int Type, int Structure) { + + int TableSlot; + + if((TableSlot = FindSymbol(Name) != -1)) + return TableSlot; + + TableSlot = NewSymbol(); + + Symbols[TableSlot].Name = strdup(Name); + Symbols[TableSlot].Type = Type; + Symbols[TableSlot].Structure = Structure; + + //printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot); + return TableSlot; +} \ No newline at end of file diff --git a/tests/comparison1 b/tests/comparison1 new file mode 100644 index 0000000..9e810f2 --- /dev/null +++ b/tests/comparison1 @@ -0,0 +1,23 @@ +int x; + +x = 1 < 2; +print x; + +x = 1 <= 2; +print x; + +x = 1 > 2; +print x; + +x = 1 => 2; +print x; + +x = 1 != 2; +print x; + +x = 1 =? 2; +print x; + +x = 18 =? 18; +print x; + diff --git a/tests/for b/tests/for new file mode 100644 index 0000000..80e36c2 --- /dev/null +++ b/tests/for @@ -0,0 +1,13 @@ +{ + int x; + + x = 0; + while (x < 10) { + print x; + x = x + 1; + } + + for(x = 20; x > 10; x = x - 1) { + print x; + } +} \ No newline at end of file diff --git a/tests/funcs b/tests/funcs new file mode 100644 index 0000000..a5b57c6 --- /dev/null +++ b/tests/funcs @@ -0,0 +1,8 @@ +void :: main () { + + int x; + + for(x = 20; x > 10; x = x - 1) { + print x; + } +} \ No newline at end of file diff --git a/tests/funcs2 b/tests/funcs2 new file mode 100644 index 0000000..2bf9282 --- /dev/null +++ b/tests/funcs2 @@ -0,0 +1,15 @@ +int :: Testings() { + return (40); +} + +void :: main() { + int Result; + + PrintInteger(10); + + Result = Testings(10); + + PrintInteger(Result); + + PrintInteger(Testings(10) + 10); +} \ No newline at end of file diff --git a/tests/if b/tests/if new file mode 100644 index 0000000..23e0854 --- /dev/null +++ b/tests/if @@ -0,0 +1,13 @@ +{ + int hey; + int sup; + + hey = 15; + sup = 20; + + if(hey < sup) { + print hey; + } else { + print sup; + } +} \ No newline at end of file diff --git a/tests/parser1 b/tests/parser1 new file mode 100644 index 0000000..7444049 --- /dev/null +++ b/tests/parser1 @@ -0,0 +1,6 @@ +print 12 * 3; +print + 18 - 2 + * 4; print +1 + 2 + + 9 - 5/2 + 3*5; \ No newline at end of file diff --git a/tests/tokeniser1 b/tests/tokeniser1 new file mode 100644 index 0000000..2ffdb2a --- /dev/null +++ b/tests/tokeniser1 @@ -0,0 +1 @@ +2 + 3 * 5 - 8 / 3 \ No newline at end of file diff --git a/tests/tokeniser2 b/tests/tokeniser2 new file mode 100644 index 0000000..0af8449 --- /dev/null +++ b/tests/tokeniser2 @@ -0,0 +1,8 @@ +251 + +32 - +531321323 * + + 8123 + + / +3 \ No newline at end of file diff --git a/tests/tokeniser3 b/tests/tokeniser3 new file mode 100644 index 0000000..b23145a --- /dev/null +++ b/tests/tokeniser3 @@ -0,0 +1 @@ +2+3*5-8/3 \ No newline at end of file diff --git a/tests/types b/tests/types new file mode 100644 index 0000000..f77ab0c --- /dev/null +++ b/tests/types @@ -0,0 +1,17 @@ +void :: main() { + + int x; + char y; + + x = 20; print x; + + y = 10; print y; + + for(x = 1; x <= 5; x = x + 1) { + print x; + } + + for(y = 253; y != 2; y = y + 1) { + print y; + } +} \ No newline at end of file diff --git a/tests/vars1 b/tests/vars1 new file mode 100644 index 0000000..e78479e --- /dev/null +++ b/tests/vars1 @@ -0,0 +1,9 @@ +int first; +int second; +int third; + +first = 5; +second = 8; +third = first * 5 + second / 4; + +print third - 10; \ No newline at end of file diff --git a/tests/while b/tests/while new file mode 100644 index 0000000..a9266fa --- /dev/null +++ b/tests/while @@ -0,0 +1,8 @@ +{ + int i; + + while (i <= 10) { + print i; + i = i + 1; + } +} \ No newline at end of file