From a27d3dd7829211f22c982c7313fab5a7bd15c888 Mon Sep 17 00:00:00 2001 From: Curle Date: Sun, 13 Sep 2020 02:26:49 +0100 Subject: [PATCH] Add support for pointers of char, int and long types --- include/Data.h | 2 ++ include/Defs.h | 31 +++++++++++++++++---- src/Assembler.c | 50 +++++++++++++++++++++++++++++----- src/Lexer.c | 4 +++ src/Main.c | 17 ++---------- src/Parser.c | 4 +-- src/Pointers.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ src/Statements.c | 71 ++++++++++++++++++++++++++++++------------------ tests/pointers | 22 +++++++++++++++ 9 files changed, 212 insertions(+), 55 deletions(-) create mode 100644 src/Pointers.c create mode 100644 tests/pointers diff --git a/include/Data.h b/include/Data.h index f2d86df..c28aa5d 100644 --- a/include/Data.h +++ b/include/Data.h @@ -16,6 +16,8 @@ extern_ struct SymbolTable Symbols[SYMBOLS]; +extern_ char* TypeNames[9]; + extern_ char* TokenStrings[]; extern_ char* TokenNames[]; diff --git a/include/Defs.h b/include/Defs.h index 7f59993..2b35bc6 100644 --- a/include/Defs.h +++ b/include/Defs.h @@ -16,6 +16,7 @@ * TYpes are prefixed TY. * CoMParisons are prefixed CMP. * + * * NOTE: Tokens are different from Syntax Operations! * * Tokens should represent the characters that invoke them, @@ -49,6 +50,8 @@ enum TokenTypes { LI_LPARE, // ( LI_RPARE, // ) + LI_AMP, // & + TY_IDENTIFIER, // Identifier name. Variable, function, etc. TY_NONE, // No return type. Literal void. TY_CHAR, // "char" type keyword @@ -94,6 +97,9 @@ enum SyntaxOps { OP_ASSIGN, // Assign an l-value + OP_ADDRESS, // Fetch the address of a var + OP_DEREF, // Get the value of the address in a pointer + TERM_INTLITERAL, // Integer Literal. This is a virtual operation, so it's a terminal. REF_IDENT, // Reference (read) an identifier (variable). @@ -149,11 +155,17 @@ struct SymbolTable { * //TODO: Move back into TokenTypes */ enum DataTypes { - RET_NONE, // No return type. Literal void. - RET_CHAR, // "char" type keyword - RET_INT, // "int" type keyword - RET_LONG, // "long" type keyword - RET_VOID, // "void" type keyword + RET_NONE, // No return type. Literal void. + RET_CHAR, // "char" type keyword + RET_INT, // "int" type keyword + RET_LONG, // "long" type keyword + RET_VOID, // "void" type keyword + + // Pointer types + PTR_CHAR, + PTR_INT, + PTR_LONG, + PTR_VOID, }; @@ -212,8 +224,10 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence); int ParseAST(struct ASTNode* Node); +struct ASTNode* ParsePrimary(void); //void ParseStatements(void); struct ASTNode* ParseStatement(void); +struct ASTNode* PrefixStatement(); struct ASTNode* ParseFunction(); struct ASTNode* ParseCompound(); @@ -222,7 +236,9 @@ struct ASTNode* ParseCompound(); struct ASTNode* CallFunction(); struct ASTNode* ReturnStatement(); -int ParseType(int Token); +int ParsePointer(); +int ValueAt(int Type); +int PointerTo(int Type); int ParseTokenToOperation(int Token); @@ -279,6 +295,9 @@ int AsDiv(int Left, int Right); int AsLdVar(int ID); int AsStrVar(int Register, int ID); +int AsDeref(int Reg, int Type); +int AsAddr(int ID); + void AsNewSymb(int ID); int AsEqual(int Left, int Right); diff --git a/src/Assembler.c b/src/Assembler.c index c66940f..a84fa35 100644 --- a/src/Assembler.c +++ b/src/Assembler.c @@ -26,8 +26,6 @@ static char* ByteRegisters[4] = { "%r8b", "%r9b", "%r10b", "%r11b" }; static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" }; static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"}; -static char* Types[5] = { "none", "char", "int", "long", "void" }; - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * R O O T O F A S S E M B L E R * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -85,6 +83,12 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { case OP_DIVIDE: return AsDiv(LeftVal, RightVal); + case OP_ADDRESS: + return AsAddr(Node->Value.ID); + + case OP_DEREF: + return AsDeref(LeftVal, Node->Left->ExprType); + case OP_ASSIGN: return RightVal; @@ -356,18 +360,22 @@ int AsLdVar(int ID) { break; case RET_LONG: + case PTR_CHAR: + case PTR_INT: + case PTR_LONG: + case PTR_VOID: fprintf(OutputFile, "\tmovq\t%s(%%rip), %s\n", Symbols[ID].Name, Registers[Reg]); break; default: - DieMessage("Bad type for loading", Types[Symbols[ID].Type]); + DieMessage("Bad type for loading", TypeNames[Symbols[ID].Type]); } return Reg; } int AsStrVar(int Register, int ID) { - printf("\tStoring contents of %s into %s\n", Registers[Register], Symbols[ID].Name); + printf("\tStoring contents of %s into %s, type %d\n", Registers[Register], Symbols[ID].Name, Symbols[ID].Type); switch(Symbols[ID].Type) { case RET_CHAR: @@ -380,14 +388,42 @@ int AsStrVar(int Register, int ID) { break; case RET_LONG: + case PTR_CHAR: + case PTR_INT: + case PTR_LONG: + case PTR_VOID: fprintf(OutputFile, "\tmovq\t%s, %s(%%rip)\n", Registers[Register], Symbols[ID].Name); break; default: - DieMessage("Bad type for saving", Types[Symbols[ID].Type]); + DieMessage("Bad type for saving", TypeNames[Symbols[ID].Type]); + } + + return Register; +} + +int AsAddr(int ID) { + int Register = RetrieveRegister(); + printf("\tSaving pointer of %s into %s\n", Symbols[ID].Name, Registers[Register]); + + fprintf(OutputFile, "\tleaq\t%s(%%rip), %s\n", Symbols[ID].Name, Registers[Register]); + return Register; +} + +int AsDeref(int Reg, int Type) { + + printf("\tDereferencing %s\n", Registers[Reg]); + switch(Type) { + case PTR_CHAR: + fprintf(OutputFile, "\tmovzbq\t(%s), %s\n", Registers[Reg], Registers[Reg]); + break; + case PTR_INT: + case PTR_LONG: + fprintf(OutputFile, "\tmovq\t(%s), %s\n", Registers[Reg], Registers[Reg]); + break; } - return Register; + return Reg; } void AsNewSymb(int ID) { @@ -432,7 +468,7 @@ int AsReturn(int Register, int FuncID) { break; default: - DieMessage("Bad function type in generating return", Types[Symbols[FuncID].Type]); + DieMessage("Bad function type in generating return", TypeNames[Symbols[FuncID].Type]); } diff --git a/src/Lexer.c b/src/Lexer.c index 85c2dfd..dbc17c0 100644 --- a/src/Lexer.c +++ b/src/Lexer.c @@ -218,6 +218,10 @@ int Tokenise(struct Token* Token) { case '/': Token->type = AR_SLASH; break; + + case '&': + Token->type = LI_AMP; + break; case '=': Char = NextChar(); diff --git a/src/Main.c b/src/Main.c index bda3e4c..c2d5082 100644 --- a/src/Main.c +++ b/src/Main.c @@ -35,6 +35,8 @@ char* TokenNames[] = { "Logical Block Start", "Logical Block End", + "Dereference operator", + "Identifier", "None Type", "Char Type", @@ -52,21 +54,8 @@ char* TokenNames[] = { "Return keyword" }; -static void TokeniseFile() { +char* TypeNames[9] = { "none", "char", "int", "long", "void", "charptr", "intptr", "longptr", "voidptr"}; - struct Token Token; - - while(Tokenise(&Token)) { - - printf("Token %s", TokenStrings[Token.type]); - if(Token.type == LI_INT) { - printf(", value %d", Token.value); - } - - printf("\n"); - } - -} int main(int argc, char* argv[]) { Line = 1; diff --git a/src/Parser.c b/src/Parser.c index 886ebf1..2621ac5 100644 --- a/src/Parser.c +++ b/src/Parser.c @@ -99,7 +99,7 @@ int ParseTokenToOperation(int Token) { * */ -static struct ASTNode* ParsePrimary(void) { +struct ASTNode* ParsePrimary(void) { struct ASTNode* Node; int ID; @@ -174,7 +174,7 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { int LeftType, RightType; int NodeType; - LeftNode = ParsePrimary(); + LeftNode = PrefixStatement(); NodeType = CurrentToken.type; if(NodeType == LI_SEMIC || NodeType == LI_RPARE) diff --git a/src/Pointers.c b/src/Pointers.c new file mode 100644 index 0000000..d708201 --- /dev/null +++ b/src/Pointers.c @@ -0,0 +1,66 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include + +int PointerTo(int Type) { + + printf("\t\tPointerising a %s\n", TypeNames[Type]); + // As it stands, the conversion between + // RET and PTR is +4. + // TODO: if we add types, increase this number + // TODO: Make this a proper translation table + // TODO: More checks! This can go wrong easily! + if(Type >= RET_CHAR && Type <= RET_VOID) { + return Type + 4; + } else { + DieDecimal("Unable to create a pointer to the desired type", Type); + } + + return -1; +} + +int ValueAt(int Type) { + + printf("\t\tDereferencing a %s\n", TypeNames[Type]); + //TODO: this is still bullshittery! + if(Type >= PTR_CHAR && Type <= PTR_VOID) { + return Type - 4; + } else { + DieDecimal("Unable to dereference type", Type); + } + return -1; +} + +int ParsePointer() { + + int Type; + // TODO: THIS IS WRONG AND SHOULD NOT EXIST + // TY_CHAR is 21, RET_CHAR is 1. + // Offset is 20. Rest are in order + + if(CurrentToken.type >= TY_CHAR && CurrentToken.type <= TY_VOID) { + Type = CurrentToken.type - 20; + } else { + DieDecimal("Illegal type for pointerisation", CurrentToken.type); + } + // Recursively scan more *s + // This makes things like: + // x = **y; + // possible. + while(1) { + Tokenise(&CurrentToken); + printf("\t\t\tType on parsing is %d\n", CurrentToken.type); + if(CurrentToken.type != AR_STAR) + break; + + Type = PointerTo(Type); + } + + return Type; +} + diff --git a/src/Statements.c b/src/Statements.c index bfce785..94d0d1c 100644 --- a/src/Statements.c +++ b/src/Statements.c @@ -20,28 +20,11 @@ * */ -static char* Types[5] = { "none", "char", "int", "long", "void" }; -static int TypeSize[5] = { 0, 1, 4, 8, 0}; // in BYTES - -int ParseType(int Token) { - switch(Token) { - case TY_CHAR: - return RET_CHAR; - - case TY_VOID: - return RET_VOID; - - case TY_INT: - return RET_INT; - - default: - DieDecimal("Illegal variable type", Token); - } -} +static int TypeSize[9] = { 0, 1, 4, 8, 0, 8, 8, 8, 8}; // in BYTES int PrimitiveSize(int Type) { - if(Type < RET_NONE || Type > RET_VOID) - DieMessage("Checking size of bad data type", Types[Type]); + if(Type < RET_NONE || Type > PTR_VOID) + DieDecimal("Checking size of bad data type", Type); return TypeSize[Type]; } @@ -145,10 +128,9 @@ int TypesCompatible(int* Left, int* Right, int STRICT) { */ void BeginVariableDeclaration(void) { int ID; - - int Type = ParseType(CurrentToken.type); + int Type = ParsePointer(CurrentToken.type); //printf("type: %s\n", Types[Type]); - Tokenise(&CurrentToken); + VerifyToken(TY_IDENTIFIER, "ident"); //printf("Identifier: %s\n", CurrentIdentifier); @@ -163,8 +145,8 @@ struct ASTNode* ParseFunction() { struct ASTNode* FinalStatement; int SymbolSlot, BreakLabel, Type; - Type = ParseType(CurrentToken.type); - Tokenise(&CurrentToken); + Type = ParsePointer(CurrentToken.type); + VerifyToken(KW_FUNC, "::"); VerifyToken(TY_IDENTIFIER, "ident"); @@ -251,7 +233,7 @@ struct ASTNode* ParseIdentifier() { VerifyToken(TY_IDENTIFIER, "ident"); - printf("After parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier)); + printf("\t\tAfter parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier)); if(CurrentToken.type == LI_LPARE) return CallFunction(); @@ -385,4 +367,41 @@ struct ASTNode* PrintStatement(void) { return Tree; +} + +struct ASTNode* PrefixStatement() { + struct ASTNode* Tree; + + switch (CurrentToken.type) { + case LI_AMP: + Tokenise(&CurrentToken); + + // To allow things like: + // x = &&y; + // We need to recursively parse prefixes; + Tree = PrefixStatement(); + + if(Tree->Operation != REF_IDENT) + Die("& must be followed by another & or an identifier."); + + Tree->Operation = OP_ADDRESS; + Tree->ExprType = PointerTo(Tree->ExprType); + break; + case AR_STAR: + Tokenise(&CurrentToken); + + Tree = PrefixStatement(); + + if(Tree->Operation != REF_IDENT && Tree->Operation != OP_DEREF) + Die("* must be followed by another * or an identifier."); + + Tree = ConstructASTBranch(OP_DEREF, ValueAt(Tree->ExprType), Tree, 0); + break; + + default: + Tree = ParsePrimary(); + + } + + return Tree; } \ No newline at end of file diff --git a/tests/pointers b/tests/pointers new file mode 100644 index 0000000..8f84bb7 --- /dev/null +++ b/tests/pointers @@ -0,0 +1,22 @@ +int :: main() { + char a; + char *b; + char c; + int d; + int *e; + int f; + + a= 18; + PrintInteger(a); + b= &a; + c= *b; + PrintInteger(c); + + d= 12; + PrintInteger(d); + e= &d; + f= *e; + PrintInteger(f); + + return(0); +} \ No newline at end of file