From 4e47cdcaf6b4405c3698fdaae1e0afed2398544e Mon Sep 17 00:00:00 2001 From: Curle Date: Mon, 24 Apr 2023 03:03:31 +0100 Subject: [PATCH] Add switch statement, case and default handling, wire in the error handler for a sample program --- CMakeLists.txt | 2 +- include/Defs.h | 5 +- src/Dump.c | 25 ++++++++- src/Errors.c | 10 +++- src/Lexer.c | 30 +++------- src/Main.c | 5 ++ src/Parser.c | 9 +-- src/Statements.c | 108 ++++++++++++++++++++++++++++++++++-- src/assemble/ASMAssembler.c | 93 +++++++++++++++++++++++++++++-- tests/switch.er | 27 +++++++++ 10 files changed, 270 insertions(+), 44 deletions(-) create mode 100644 tests/switch.er diff --git a/CMakeLists.txt b/CMakeLists.txt index 75bed9e..aa97b12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,4 +21,4 @@ add_executable(Erythro src/Symbols.c src/Types.c src/Importer.c - src/assemble/JVMAssembler.c) + src/assemble/JVMAssembler.c src/Errors.c) diff --git a/include/Defs.h b/include/Defs.h index 28248a5..168302a 100644 --- a/include/Defs.h +++ b/include/Defs.h @@ -373,8 +373,7 @@ void DisplayUsage(char* ProgName); void Tokenise(); void VerifyToken(int Type, char* TokenExpected); - -void RejectToken(struct Token* Token); +bool OptionallyConsume(int Type); static int ReadIdentifier(int Char, char* Buffer, int Limit); @@ -585,7 +584,7 @@ struct AssemblerVtable { int (*AsIf)(struct ASTNode*, int, int); int (*AsWhile)(struct ASTNode*); int (*AsSwitch)(struct ASTNode*); - int (*AsSwitchTable)(int, int, int, int*, int*, int); + void (*AsSwitchTable)(int, int, int, int*, int*, int); int (*NewLabel)(); void (*AsJmp)(int); void (*AsLabel)(int); diff --git a/src/Dump.c b/src/Dump.c index e5a4dae..1a48a46 100644 --- a/src/Dump.c +++ b/src/Dump.c @@ -16,12 +16,12 @@ static int GenerateSrg() { * Walk the Node tree, and dump the AST tree to stdout. */ void DumpTree(struct ASTNode* Node, int level) { - int Lfalse, Lstart, Lend; + int Lstart, Lend; // Handle weirdo loops and conditions first. switch (Node->Operation) { case OP_IF: - Lfalse = GenerateSrg(); + GenerateSrg(); for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "IF"); @@ -43,7 +43,15 @@ void DumpTree(struct ASTNode* Node, int level) { for (int i = 0; i < level; i++) fprintf(stdout, " "); fprintf(stdout, "LOOP starts at %d\n", Lstart); - Lend = GenerateSrg(); + GenerateSrg(); + DumpTree(Node->Left, level + 2); + DumpTree(Node->Right, level + 2); + return; + + case OP_SWITCH: + for (int i = 0; i < level; i++) + fprintf(stdout, " "); + fprintf(stdout, "SWITCH\n"); DumpTree(Node->Left, level + 2); DumpTree(Node->Right, level + 2); return; @@ -187,6 +195,17 @@ void DumpTree(struct ASTNode* Node, int level) { fprintf(stdout, "OP_BOOLCONV\n"); return; + case OP_DEFAULT: + fprintf(stdout, "OP_DEFAULT\n"); + DumpTree(Node->Left, level + 2); + return; + + case OP_CASE: + fprintf(stdout, "OP_CASE %d\n", Node->IntValue); + DumpTree(Node->Left, level + 2); + DumpTree(Node->Right, level); + return; + default: DieDecimal("Unknown Dump Operator", Node->Operation); } diff --git a/src/Errors.c b/src/Errors.c index cdde91e..eb6e59f 100644 --- a/src/Errors.c +++ b/src/Errors.c @@ -72,12 +72,16 @@ void ErrorReport(char* message, ...) { printErrorLine(file, line - 1); printHelpLine(line, strbuf); printLine(file, line); - printLine(file, line + 1); + if (!feof(file)) + printLine(file, line + 1); } else { printErrorLine(file, line); printHelpLine(line, strbuf); - printLine(file, line + 1); - printLine(file, line + 2); + + if (!feof(file)) + printLine(file, line + 1); + if (!feof(file)) + printLine(file, line + 2); } diff --git a/src/Lexer.c b/src/Lexer.c index 7b5e076..7ad19ae 100644 --- a/src/Lexer.c +++ b/src/Lexer.c @@ -103,21 +103,13 @@ void VerifyToken(int Type, char* TokenExpected) { } } -static struct Token* RejectedToken = NULL; - -/* - * Rejected Tokens and the Overread Stream are identical concepts. - * This was implemented first, but it is no longer used. - * TODO: Refactor this function out. - */ - -void RejectToken(struct Token* Token) { - if (RejectedToken != NULL) - Die("Cannot reject two tokens in a row!"); - - RejectedToken = Token; +bool OptionallyConsume(int Type) { + if (CurrentFile->CurrentSymbol.type == Type) { + Tokenise(); + return true; + } + return false; } - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * L I T E R A L S A N D I D E N T I F I E R S * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -127,7 +119,7 @@ void RejectToken(struct Token* Token) { * Currently only supports the decimal numbers, despite the * FindDigitFromPos function allowing conversion. * - * The functon loops over the characters, multiplying by 10 and adding + * The function loops over the characters, multiplying by 10 and adding * the new value on top, until a non-numeric character is found. * At that point, it returns the non-numeric character to the Overread Stream * and returns the calculated number. @@ -409,12 +401,6 @@ void Tokenise() { int Char, TokenType; struct Token* Token = &CurrentFile->CurrentSymbol; - if (RejectedToken != NULL) { - Token = RejectedToken; - RejectedToken = NULL; - return; - } - Char = FindChar(); switch (Char) { @@ -575,7 +561,7 @@ void Tokenise() { if (Char == ':') { Token->type = KW_FUNC; } else { - ReturnCharToStream(Char); + Token->type = LI_COLON; } break; diff --git a/src/Main.c b/src/Main.c index 4150736..6bcd470 100644 --- a/src/Main.c +++ b/src/Main.c @@ -47,6 +47,7 @@ char* TokenNames[] = { "Integer literal", "String literal", "Statement End", + "Colon", "Compound Block Start", "Compound Block End", @@ -72,6 +73,10 @@ char* TokenNames[] = { "Break keyword", "Continue keyword", + "Switch Keyword", + "Default Keyword", + "Case Keyword", + "Print Keyword", "If keyword", "Else keyword", diff --git a/src/Parser.c b/src/Parser.c index bece0b9..6743e64 100644 --- a/src/Parser.c +++ b/src/Parser.c @@ -286,18 +286,16 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { // int LeftType, RightType; int NodeType, OpType; - printf("Left node branch\r\n"); fflush(stdout); LeftNode = PrefixStatement(); NodeType = CurrentFile->CurrentSymbol.type; - if (NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) { + if (NodeType == LI_SEMIC || NodeType == LI_COLON || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM || NodeType == LI_INT) { LeftNode->RVal = 1; return LeftNode; } - printf("Operator expected\r\n"); while ((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) { Tokenise(); @@ -306,7 +304,6 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { RightNode = ParsePrecedenceASTNode(Precedence[NodeType]); - /** * While parsing this node, we may need to widen some types. * This requires a few functions and checks. @@ -472,6 +469,7 @@ struct ASTNode* GetExpressionList() { * * If Statement * * While Statement * * For Statement + * * Switch Statement * * Return Statement * * Numeric literals and variables * * Binary Expressions @@ -497,6 +495,9 @@ struct ASTNode* ParseStatement(void) { VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment? return NULL; + case KW_SWITCH: + return SwitchStatement(); + case KW_IF: return IfStatement(); diff --git a/src/Statements.c b/src/Statements.c index 53eb343..ad9a9aa 100644 --- a/src/Statements.c +++ b/src/Statements.c @@ -262,7 +262,7 @@ struct ASTNode* ParseFunction(int Type) { if (OldFunction->Storage != ST_FUNC) OldFunction = NULL; if (OldFunction == NULL) { - BreakLabel = NewLabel(); + BreakLabel = Assembler->vtable->NewLabel(); NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL); } @@ -324,7 +324,7 @@ struct ASTNode* ReturnStatement() { VerifyToken(KW_RETURN, "return"); - VerifyToken(LI_LPARE, "("); // TODO: Make optional! Reject? + bool bracketed = OptionallyConsume(LI_LPARE); Tree = ParsePrecedenceASTNode(0); @@ -337,7 +337,7 @@ struct ASTNode* ReturnStatement() { printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name); - VerifyToken(LI_RPARE, ")"); // TODO: OPTIONALISE! + if (bracketed) VerifyToken(LI_RPARE, ")"); return Tree; } @@ -543,6 +543,107 @@ struct ASTNode* PrintStatement(void) { } +struct ASTNode* SwitchStatement() { + struct ASTNode* left, *root, *c, *casetree=NULL, *casetail; + int looping=1, cases=0; + int defaultpresent=0; + int ASTOp, casevalue; + + printf("\tParsing switch statement\n"); + + // Skip switch( + Tokenise(); + VerifyToken(LI_LPARE, "("); + + printf("\tSwitch: Reading switch expression\n"); + // Fetch switch expression + left = ParsePrecedenceASTNode(0); + // Consume ) { + VerifyToken(LI_RPARE, ")"); + VerifyToken(LI_LBRAC, "{"); + + // Verify the switch expression (must be integer-compatible) + if (!TypeIsInt(!left->ExprType)) + Die("Switch expression is not of integer type"); + + Safe(); + + // Create the root Switch node + root = ConstructASTBranch(OP_SWITCH, 0, left, NULL, 0); + + // Iterate down the switch node, generating cases + while (looping) { + switch (CurrentFile->CurrentSymbol.type) { + case LI_RBRAC: + if (cases == 0) + Die("No cases in switch statement"); + looping = 0; + break; + case KW_CASE: + if (defaultpresent) + Die("Case present after default in switch."); + + ASTOp = OP_CASE; + + Safe(); + Tokenise(); + + // Parse case value + left = ParsePrecedenceASTNode(0); + if (left->Operation != TERM_INTLITERAL) + Die("Expecting integer literal for case value"); + casevalue = left->IntValue; + printf("\t\tSwitch case %d found\n", casevalue); + + // Make sure nothing resolves to the same case value + for (c = casetree; c != NULL; c = c->Right) + if (casevalue == c->IntValue) + Die("Duplicate case ID in switch statement"); + // Fallthrough so that we get the case tree logic deduplicated + case KW_DEFAULT: + if (defaultpresent) + Die("Duplicate default entries in switch"); + // Duplicate check because CASE falls through into this block + if (CurrentFile->CurrentSymbol.type == KW_DEFAULT) { + ASTOp = OP_DEFAULT; + defaultpresent = true; + Tokenise(); + + printf("\t\tSwitch default case found\n"); + } + + VerifyToken(LI_COLON, ":"); + Safe(); + + left = ParseStatement(); + OptionallyConsume(LI_SEMIC); + cases++; + Safe(); + + // Append this new case to the tree + if (casetree == NULL) { + casetree = casetail = ConstructASTBranch(ASTOp, 0, left, NULL, casevalue); + } else { + casetail->Right = ConstructASTBranch(ASTOp, 0, left, NULL, casevalue); + casetail = casetail->Right; + } + break; + + default: + ErrorReport("Unexpected token in switch statement: %s\n", TokenNames[CurrentFile->CurrentSymbol.type]); + exit(1); + } + } + + root->IntValue = cases; + root->Right = casetree; + + // Consume the right brace immediately + VerifyToken(LI_RBRAC, "}"); + + return root; +} + /** * Handles the surrounding logic for break statements * @@ -571,7 +672,6 @@ struct ASTNode* BreakStatement() { Die("Unable to break without a loop"); Tokenise(); - Tokenise(); return ConstructASTLeaf(OP_BREAK, 0, NULL, 0); } diff --git a/src/assemble/ASMAssembler.c b/src/assemble/ASMAssembler.c index 2384aad..fc4df51 100644 --- a/src/assemble/ASMAssembler.c +++ b/src/assemble/ASMAssembler.c @@ -296,6 +296,68 @@ static int AsWhile(struct ASTNode* Node) { } +static void AsSwitchTable(int reg, int cases, int toplabel, int* caselabel, int* caseval, int defaultlabel) { + int i, label; + + label = NewLabel(); + AsLabel(label); + + // Add a default case even if not present. + if (cases == 0) { + caseval[0] = 0; + caselabel[0] = defaultlabel; + cases = 1; + } + + fprintf(OutputFile, "\t.quad\t%d\n", cases); + for (i = 0; i < cases; i++) + fprintf(OutputFile, "\t.quad\t%d, L%d\n", caseval[i], caselabel[i]); + fprintf(OutputFile, "\t.quad\tL%d\n", defaultlabel); + + AsLabel(toplabel); + fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[reg]); + fprintf(OutputFile, "\tleaq\tL%d(%%rip), %%rdx\n", label); + fprintf(OutputFile, "\tjmp\tswitch\n"); +} + +static int AsSwitch(struct ASTNode* root) { + int* caseval, *caselabel; + int Ljump, Lend; + int i, reg, defaultlabel=0, cases=0; + struct ASTNode* c; + + caseval = (int*) malloc((root->IntValue + 1) * sizeof(int)); + caselabel = (int*) malloc((root->IntValue + 1) * sizeof(int)); + + Ljump = NewLabel(); + Lend = NewLabel(); + + defaultlabel = Lend; + + reg = AssembleTree(root->Left, -1, -1, -1, 0); + AsJmp(Ljump); + DeallocateAllRegisters(); + + for (i = 0, c = root->Right; c != NULL; i++, c = c->Right) { + caselabel[i] = NewLabel(); + caseval[i] = c->IntValue; + AsLabel(caselabel[i]); + + if (c->Operation == OP_DEFAULT) + defaultlabel = caselabel[i]; + else + cases++; + + AssembleTree(c->Left, -1, -1, Lend, 0); + DeallocateAllRegisters(); + } + + AsJmp(Lend); + AsSwitchTable(reg, cases, Ljump, caselabel, caseval, defaultlabel); + AsLabel(Lend); + return -1; +} + // Load a value into a register. static int AsLoad(int Value) { int Register = RetrieveRegister(); @@ -885,9 +947,27 @@ static int AsBooleanConvert(int Register, int Operation, int Label) { static void AssemblerPreamble() { DeallocateAllRegisters(); fputs( - "\t.text\n", /* - ".LC0:\n" - "\t.string\t\"%d\\n\"\n", */ + "\t.text\n" + "switch:\n" + "\t\tpushq %rsi\n" + "\t\tmovq %rdx, %rsi\n" + "\t\tmovq %rax, %rbx\n" + "\t\tcld\n" + "\t\tlodsq\n" + "\t\tmovq %rax, %rcx\n" + "next:\n" + "\t\tlodsq\n" + "\t\tmovq %rax, %rdx\n" + "\t\tlodsq\n" + "\t\tcmpq %rdx, %rbx\n" + "\t\tjnz no\n" + "\t\tpopq %rsi\n" + "\t\tjmp *%rax\n" + "no:\n" + "\t\tloop next\n" + "\t\tlodsq\n" + "\t\tpopq %rsi\n" + "\t\tjmp *%rax\n", OutputFile); } @@ -991,13 +1071,16 @@ static int AssembleTree(struct ASTNode* Node, int Register, int LoopBeginLabel, return -1; case OP_CALL: - return (AsCallWrapper(Node)); + return AsCallWrapper(Node); case OP_FUNC: AsFunctionPreamble(Node->Symbol); AssembleTree(Node->Left, -1, LoopBeginLabel, LoopEndLabel, Node->Operation); AsFunctionEpilogue(Node->Symbol); return -1; + + case OP_SWITCH: + return AsSwitch(Node); } @@ -1219,6 +1302,8 @@ static const struct AssemblerVtable Win32ASMVtable = { .AsStrGlobalVar = AsStrGlobalVar, .AsStrLocalVar = AsStrLocalVar, .AsSub = AsSub, + .AsSwitch = AsSwitch, + .AsSwitchTable = AsSwitchTable, .AsWhile = AsWhile, .DeallocateAllRegisters = DeallocateAllRegisters, .RetrieveRegister = RetrieveRegister, diff --git a/tests/switch.er b/tests/switch.er new file mode 100644 index 0000000..2afc4bf --- /dev/null +++ b/tests/switch.er @@ -0,0 +1,27 @@ +import "tests/print.em" + +int :: main() { + int x; + int y; + + y = 0; + + for (x = 0; x < 5; x++) { + switch(x) { + case 1: { + y = 5; + break; } + case 2: { + y = 7; + break; } + case 3: + y = 9; + default: + y = 100; + } + + printf("%d\n", y); + } + + return 0; +} \ No newline at end of file