From e42a2cfd8df453a1ed662ade289d8faebc073278 Mon Sep 17 00:00:00 2001 From: Curle Date: Mon, 24 Apr 2023 20:41:49 +0100 Subject: [PATCH] Refactor to allow inline initialization of variables. --- include/Data.h | 1 + include/Defs.h | 17 +- src/Dump.c | 15 +- src/Errors.c | 13 +- src/Main.c | 59 +++ src/Parser.c | 195 ++-------- src/Pointers.c | 86 ----- src/Statements.c | 693 +++++++++++++++++++++++++++--------- src/assemble/ASMAssembler.c | 7 +- 9 files changed, 623 insertions(+), 463 deletions(-) diff --git a/include/Data.h b/include/Data.h index 3442547..dafbc91 100644 --- a/include/Data.h +++ b/include/Data.h @@ -55,6 +55,7 @@ extern_ struct AssemblerModule* Assembler; // The names of each token in the language, synchronized to the TokenTypes enum. extern_ char* TokenNames[]; +extern_ char* OperationNames[]; // The names of the storage scopes. extern_ char* ScopeNames[]; diff --git a/include/Defs.h b/include/Defs.h index f747950..f3f5712 100644 --- a/include/Defs.h +++ b/include/Defs.h @@ -433,19 +433,13 @@ struct ASTNode* PostfixStatement(); void ParseGlobals(); -struct ASTNode* ParseFunction(int Type); +int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbool, int TerminateSymbol); struct ASTNode* ParseCompound(); struct SymbolTableEntry* BeginCompositeDeclaration(int Type); -void BeginEnumDeclaration(); - -int ReadAlias(struct SymbolTableEntry** Composite); - -int ParseAlias(char* Name, struct SymbolTableEntry** Composite); - -struct ASTNode* GetExpressionList(); +struct ASTNode* ParseExpressionList(int terminateToken); struct ASTNode* CallFunction(); @@ -455,8 +449,6 @@ struct ASTNode* BreakStatement(); struct ASTNode* ContinueStatement(); -int ReadTypeOrKeyword(struct SymbolTableEntry** Composite); - int ValueAt(int Type); int PointerTo(int Type); @@ -467,9 +459,6 @@ struct ASTNode* AccessMember(bool Deref); int ParseTokenToOperation(int Token); -struct ASTNode* PrintStatement(void); - - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * S Y M B O L T A B L E * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -628,8 +617,6 @@ void RegisterJVM(); struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope); -struct ASTNode* ParseIdentifier(void); - struct ASTNode* IfStatement(); struct ASTNode* WhileStatement(); diff --git a/src/Dump.c b/src/Dump.c index 1a48a46..1c44df4 100644 --- a/src/Dump.c +++ b/src/Dump.c @@ -47,14 +47,6 @@ void DumpTree(struct ASTNode* Node, int level) { DumpTree(Node->Left, level + 2); DumpTree(Node->Right, level + 2); return; - - case OP_SWITCH: - for (int i = 0; i < level; i++) - fprintf(stdout, " "); - fprintf(stdout, "SWITCH\n"); - DumpTree(Node->Left, level + 2); - DumpTree(Node->Right, level + 2); - return; } // If current node is a compound, we treat it as if we didn't just enter a loop. @@ -197,13 +189,14 @@ void DumpTree(struct ASTNode* Node, int level) { case OP_DEFAULT: fprintf(stdout, "OP_DEFAULT\n"); - DumpTree(Node->Left, level + 2); return; case OP_CASE: fprintf(stdout, "OP_CASE %d\n", Node->IntValue); - DumpTree(Node->Left, level + 2); - DumpTree(Node->Right, level); + return; + + case OP_SWITCH: + fprintf(stdout, "SWITCH\n"); return; default: diff --git a/src/Errors.c b/src/Errors.c index eb6e59f..10dc699 100644 --- a/src/Errors.c +++ b/src/Errors.c @@ -8,7 +8,7 @@ #include void Safe() { - CurrentFile->CurrentSafeColumn = CurrentFile->CurrentColumn; + CurrentFile->CurrentSafeColumn = CurrentFile->CurrentColumn - 1; } void printLine(FILE* file, int ln) { @@ -20,11 +20,17 @@ void printLine(FILE* file, int ln) { void printErrorLine(FILE* file, int ln) { char firstBuffer[256], problemBuffer[256], tailBuffer[256]; + // If highlight starts at column 0, don't try to print anything before it if (CurrentFile->CurrentSafeColumn != 0) fgets(firstBuffer, CurrentFile->CurrentSafeColumn, file); - fgets(problemBuffer, (CurrentFile->CurrentColumn > CurrentFile->CurrentSafeColumn ? CurrentFile->CurrentColumn - CurrentFile->CurrentSafeColumn : CurrentFile->CurrentSafeColumn), file); + + // Print the safe column up to current column + fgets(problemBuffer, (CurrentFile->CurrentColumn > CurrentFile->CurrentSafeColumn ? CurrentFile->CurrentColumn - CurrentFile->CurrentSafeColumn : CurrentFile->CurrentColumn), file); + + // Print the current column to the end of the line if (CurrentFile->CurrentColumn > CurrentFile->CurrentSafeColumn) fgets(tailBuffer, 256, file); + // Line number printf("%03d|%s\033[0;31m%s\033[0m%s", ln + 1, firstBuffer, problemBuffer, tailBuffer); } @@ -34,6 +40,7 @@ void printHelpLine(int line, char* message) { } void ErrorReport(char* message, ...) { + fflush(stdout); char strbuf[256]; // Resolve varargs to a string @@ -84,5 +91,5 @@ void ErrorReport(char* message, ...) { printLine(file, line + 2); } - + exit(1); } diff --git a/src/Main.c b/src/Main.c index 6bcd470..5bdedcc 100644 --- a/src/Main.c +++ b/src/Main.c @@ -92,6 +92,65 @@ char* TokenNames[] = { "Import keyword" }; +char* OperationNames[] = { + "OP_ASSIGN", // Assign an l-value + "OP_BOOLOR", // Boolean OR two statements + "OP_BOOLAND", // Boolean AND two statements + "OP_BITOR", // Bitwise OR a number + "OP_BITXOR", // Bitwise XOR a number + "OP_BITAND", // Bitwise AND a number + + "OP_EQUAL", // Compare equality + "OP_INEQ", // Compare inequality + "OP_LESS", // Less than? + "OP_GREAT", // Greater than? + "OP_LESSE", // Less than or Equal to? + "OP_GREATE", // Greater than or Equal to? + + "OP_SHIFTL", // Arithmetic Shift Left (Multiply by 2) + "OP_SHIFTR", // Arithmetic Shift Right (Divide by 2) + "OP_ADD", // Add two numbers. + "OP_SUBTRACT", // Subtract two numbers. + "OP_MULTIPLY", // Multiply two numbers. + "OP_DIVIDE", // Divide two numbers. + + "OP_PREINC", // Increment var before reference. + "OP_PREDEC", // Decrement var before reference. + "OP_POSTINC", // Increment var after reference. + "OP_POSTDEC", // Decrement var after reference. + + "OP_BITNOT", // Invert a number bitwise + "OP_BOOLNOT", // Invert a statement logically + "OP_NEGATE", // Negate a number (turn a positive number negative + "OP_BOOLCONV", // Convert an expression to a boolean.s + + "OP_ADDRESS", // Fetch the address of a var + "OP_DEREF", // Get the value of the address in a pointer + + "TERM_INTLITERAL", // Integer Literal. This is a virtual operation", so it's a terminal. + "TERM_STRLITERAL", // String Literal. Also terminal. + + "REF_IDENT", // Reference (read) an identifier (variable). + + "OP_WIDEN", // Something contains a type that needs to be casted up + "OP_SCALE", // We have a pointer that needs to be scaled! + + "OP_CALL", // Call a function + "OP_RET", // Return from a function + + "OP_COMP", // Compound statements need a way to be 'glued' together. This is one of those mechanisms + "OP_IF", // If statement + "OP_LOOP", // FOR", WHILE + "OP_PRINT", // Print statement + + "OP_FUNC", // Define a function + "OP_BREAK", // Break out of the loop + "OP_CONTINUE", // Continue the loop + "OP_SWITCH", // Switch statement + "OP_DEFAULT", // Default case + "OP_CASE" // Case +}; + char* ScopeNames[] = { "INVALID", "GLOBAL", diff --git a/src/Parser.c b/src/Parser.c index b7d9c88..7b04734 100644 --- a/src/Parser.c +++ b/src/Parser.c @@ -161,64 +161,6 @@ int ParseTokenToOperation(int Token) { DieDecimal("ParseToken: Unknown token", Token); } -/* - * The "alias" keyword allows one to create a new keyword that is accepted in lieu of another (or a chain of another) - * It does this by reading in sequence: - * * The "alias" keyword - * * The thing to alias (any valid primary type) - * * The new name - * - * They are stored in a separate symbol table and can be used anywhere the original is valid. - */ -int ReadAlias(struct SymbolTableEntry** Composite) { - int Type; - - Tokenise(); - - Type = ReadTypeOrKeyword(Composite); - - if (FindAlias(CurrentIdentifier) != NULL) - DieMessage("Redefinition of type", CurrentIdentifier); - - AddSymbol(CurrentIdentifier, Type, ST_VAR, SC_ALIAS, 0, 0, *Composite); - Tokenise(); - - return Type; -} - -/** - * When using an alias, we need to lookup the name (possibly recursively) to check whether it is a valid alias. - * If so, we need to know what it is an alias of. - * Once we have resolved what it finally means, we return the type. - * @param Name The name of the (initial) alias to check - * @param Composite A pointer to the composite element we should fill in. - * @return The aliased type. - */ -int ParseAlias(char* Name, struct SymbolTableEntry** Composite) { - struct SymbolTableEntry* Type = NULL, *RootType = NULL; - - // Ensure the first-round alias exists - Type = FindAlias(Name); - if (Type == NULL) - DieMessage("Unknown alias", Name); - - // Loop on the alias for as long as it continues to exist. - while (true) { - if (Type->CompositeType == NULL) - break; - - RootType = FindAlias(Type->CompositeType->Name); - if (RootType == NULL) - break; - Type = RootType; - } - - Tokenise(); - *Composite = Type->CompositeType; - return Type->Type; - -} - /* * Primary expressions may be any one of: * * A terminal integer literal @@ -414,7 +356,7 @@ struct ASTNode* CallFunction() { VerifyToken(LI_LPARE, "("); - Tree = GetExpressionList(); + Tree = ParseExpressionList(LI_RPARE); Tree = ConstructASTBranch(OP_CALL, Function->Type, Tree, Function, 0); @@ -438,25 +380,22 @@ struct ASTNode* CallFunction() { * end with a COMPOSITE operation. * */ -struct ASTNode* GetExpressionList() { +struct ASTNode* ParseExpressionList(int terminate) { struct ASTNode* Tree = NULL, * Child = NULL; int Count = 0; - while (CurrentFile->CurrentSymbol.type != LI_RPARE) { + Safe(); + while (CurrentFile->CurrentSymbol.type != terminate) { Child = ParsePrecedenceASTNode(0); Count++; - + Safe(); Tree = ConstructASTNode(OP_COMP, PointerTo(RET_VOID), Tree, NULL, Child, NULL, Count); - switch (CurrentFile->CurrentSymbol.type) { - case LI_COM: - Tokenise(); - break; - case LI_RPARE: - break; - default: - Die("Unexpected token in argument list"); - } + if (CurrentFile->CurrentSymbol.type == terminate) + break; + + VerifyToken(LI_COM, ","); + Safe(); } return Tree; @@ -484,8 +423,8 @@ struct ASTNode* GetExpressionList() { * @return the AST Node representing this single statement */ struct ASTNode* ParseStatement(void) { - int Type; struct ASTNode* Node; + struct SymbolTableEntry* Composite; printf("\t\tBranch leads to here, type %s/%d\r\n", TokenNames[CurrentFile->CurrentSymbol.type], CurrentFile->CurrentSymbol.type); switch (CurrentFile->CurrentSymbol.type) { @@ -497,11 +436,14 @@ struct ASTNode* ParseStatement(void) { case TY_CHAR: case TY_LONG: case TY_INT: + case KW_STRUCT: + case KW_UNION: + case KW_ENUM: + case KW_ALIAS: printf("\t\tNew Variable: %s\n", CurrentIdentifier); - Type = ReadTypeOrKeyword(NULL); - VerifyToken(TY_IDENTIFIER, "ident"); - BeginVariableDeclaration(Type, NULL, SC_LOCAL); - VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment? + ParseDeclarationList(&Composite, SC_LOCAL, LI_SEMIC, LI_EOF); + VerifyToken(LI_SEMIC, ";"); + Safe(); return NULL; case KW_SWITCH: @@ -526,58 +468,13 @@ struct ASTNode* ParseStatement(void) { return ContinueStatement(); default: - return ParsePrecedenceASTNode(0); - } -} - - -/* - * Handles parsing multiple statements or expressions in a row. - * These are typically grouped together with the Compound tokens "{ }" - * and seperated by the semicolon ";". - * - * Single Statements are parsed until a semicolon is reached, at which - * point another statement will be parsed, or until a Right Compound - * token is reached ("}"), at which point parsing will stop. - * - * It is useful for: - * * Tightly identifying related blocks of code - * * Containing the many statements of functions - * - * @return the AST Node representing this compound statement - * - */ -struct ASTNode* ParseCompound() { - struct ASTNode* Left = NULL, * Tree; - - while (1) { - printf("\tNew branch in compound\n"); - - Tree = ParseStatement(); - - if (Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN - || Tree->Operation == OP_RET || Tree->Operation == OP_CALL - || Tree->Operation == OP_BREAK || Tree->Operation == OP_CONTINUE)) + Node = ParsePrecedenceASTNode(0); VerifyToken(LI_SEMIC, ";"); - - if (Tree) { - if (Left == NULL) - Left = Tree; - else - Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0); - } - - if (CurrentFile->CurrentSymbol.type == LI_RBRAC) { - fflush(stdout); - return Left; - } - - if (CurrentFile->SwitchStatement && (CurrentFile->CurrentSymbol.type == KW_CASE || CurrentFile->CurrentSymbol.type == KW_DEFAULT)) { - return Left; - } + return Node; } } + /* * This is the entry point to the parser/lexer. * @@ -594,56 +491,16 @@ struct ASTNode* ParseCompound() { */ void ParseGlobals() { - struct ASTNode* Tree; struct SymbolTableEntry* Composite; - int Type, FunctionComing; printf("Parsing global definitions\r\n"); - while (1) { - // We loop early if there's a struct, and since a struct may be the last - // thing in a file, we need to check for eof before anything else - if (CurrentFile->CurrentSymbol.type == LI_EOF) - break; - - printf("New definition incoming..\r\n\n"); - Type = ReadTypeOrKeyword(&Composite); - - //TODO: converge pathways on this block? - if (CurrentFile->CurrentSymbol.type == KW_FUNC) { - VerifyToken(KW_FUNC, "::"); - FunctionComing = 1; - } - - // Structs are parsed fully in ParseOptionalPointer - // TODO: FIX THAT!! - if ((Type == DAT_STRUCT || Type == DAT_UNION || Type == DAT_ENUM || Type == DAT_ALIAS) && CurrentFile->CurrentSymbol.type == LI_SEMIC) { - Tokenise(); - continue; - } - - // If we read metadata or an import, then skip all processing. - if (Type == DAT_NONE) - continue; - - VerifyToken(TY_IDENTIFIER, "ident"); - - if (FunctionComing && CurrentFile->CurrentSymbol.type == LI_LPARE) { - printf("\tParsing function\n"); - Tree = ParseFunction(Type); - if (Tree && CurrentFile->AllowDefinitions) { - printf("\nBeginning assembler creation of new function %s\n", Tree->Symbol->Name); - Assembler->vtable->AssembleTree(Tree, -1, -1, -1, 0); - FreeLocals(); - } else { - printf("\nFunction prototype saved\r\n"); - } - } else { - printf("\tParsing global variable declaration\n"); - BeginVariableDeclaration(Type, Composite, SC_GLOBAL); - VerifyToken(LI_SEMIC, ";"); - } + while (CurrentFile->CurrentSymbol.type != LI_EOF) { + // Read in a declaration, or list thereof + ParseDeclarationList(&Composite, SC_GLOBAL, LI_SEMIC, LI_EOF); + // Consume semicolons if present + OptionallyConsume(LI_SEMIC); } } diff --git a/src/Pointers.c b/src/Pointers.c index 4b62586..0dce068 100644 --- a/src/Pointers.c +++ b/src/Pointers.c @@ -57,92 +57,6 @@ int ValueAt(int Type) { return (Type - 1); } -/* - * Type declarations may be raw, they may be pointers. - * If they are pointers, we need to be able to check - * how many levels of indirection. - * However, being a pointer is optional. - * - * This can parase in just a lone type specifier, or - * any valid level of indirection therefore. - * - * @param Composite: unused - * @return the parsed DataType, with any indirection. - * - */ - -int ReadTypeOrKeyword(struct SymbolTableEntry** Composite) { - - int Type; - - switch (CurrentFile->CurrentSymbol.type) { - case KW_IMPORT: - Type = DAT_NONE; - ImportModule(); - break; - case TY_VOID: - Type = RET_VOID; - Tokenise(); - break; - case TY_CHAR: - Type = RET_CHAR; - Tokenise(); - break; - case TY_INT: - Type = RET_INT; - Tokenise(); - break; - case TY_LONG: - Type = RET_LONG; - Tokenise(); - break; - case TY_IDENTIFIER: - Type = ParseAlias(CurrentIdentifier, Composite); - break; - case KW_ALIAS: - Type = ReadAlias(Composite); - if (CurrentFile->CurrentSymbol.type == LI_SEMIC) - Type = DAT_ALIAS; - break; - case KW_ENUM: - Type = RET_INT; - BeginEnumDeclaration(); - if (CurrentFile->CurrentSymbol.type == LI_SEMIC) - Type = DAT_ENUM; - break; - case KW_STRUCT: - Type = DAT_STRUCT; - *Composite = BeginCompositeDeclaration(Type); - break; - case KW_UNION: - Type = DAT_UNION; - *Composite = BeginCompositeDeclaration(Type); - break; - default: - DieDecimal("Illegal type for pointerisation", CurrentFile->CurrentSymbol.type); - } - // Recursively scan more *s - // This makes things like: - // x = **y; - // possible. - while (1) { - // But, skip parsing if we're looking at an import. - if (CurrentFile->CurrentSymbol.type == KW_IMPORT) - break; - - printf("\t\t\tType on parsing is %d\n", CurrentFile->CurrentSymbol.type); - if (CurrentFile->CurrentSymbol.type != AR_STAR) - break; - - Type = PointerTo(Type); - Tokenise(); - // Tokenise(); TODO: is this skipping pointers? - } - - return Type; -} - - /* * Array Accesses come in the form of x[y]. * diff --git a/src/Statements.c b/src/Statements.c index 1751d5c..b7a1744 100644 --- a/src/Statements.c +++ b/src/Statements.c @@ -8,59 +8,505 @@ #include #include +static void ParseEnumDeclaration(); +static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage); +static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType); + +/* + * Handles parsing multiple statements or expressions in a row. + * These are typically grouped together with the Compound tokens "{ }" + * and seperated by the semicolon ";". + * + * Single Statements are parsed until a semicolon is reached, at which + * point another statement will be parsed, or until a Right Compound + * token is reached ("}"), at which point parsing will stop. + * + * It is useful for: + * * Tightly identifying related blocks of code + * * Containing the many statements of functions + * + * @return the AST Node representing this compound statement + * + */ +struct ASTNode* ParseCompound() { + struct ASTNode* Left = NULL, * Tree; + + while (1) { + printf("\tNew branch in compound\n"); + + Tree = ParseStatement(); + + /*if (Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN + || Tree->Operation == OP_RET || Tree->Operation == OP_CALL + || Tree->Operation == OP_BREAK || Tree->Operation == OP_CONTINUE)) + VerifyToken(LI_SEMIC, ";"); */ + + Safe(); + + if (Tree) { + if (Left == NULL) + Left = Tree; + else + Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0); + } + + if (CurrentFile->CurrentSymbol.type == LI_RBRAC) { + fflush(stdout); + return Left; + } + + if (CurrentFile->SwitchStatement && (CurrentFile->CurrentSymbol.type == KW_CASE || CurrentFile->CurrentSymbol.type == KW_DEFAULT)) { + return Left; + } + } +} + +/* + * Resolve a typename to a type struct. + * Short circuit on the case where a definition is present, as definitions are typeless. + */ +static int ParseType(struct SymbolTableEntry** CompositeType, int* Scope) { + int Type = -1, Extern = 1; + + while (Extern) { + switch (CurrentFile->CurrentSymbol.type) { + default: + Extern = 0; + } + } + + switch (CurrentFile->CurrentSymbol.type) { + case KW_IMPORT: + Type = -1; + ImportModule(); + break; + case TY_VOID: + Type = RET_VOID; + Tokenise(); + break; + case TY_CHAR: + Type = RET_CHAR; + Tokenise(); + break; + case TY_INT: + Type = RET_INT; + Tokenise(); + break; + case TY_LONG: + Type = RET_LONG; + Tokenise(); + break; + case TY_IDENTIFIER: + case KW_ALIAS: + Type = ParseAliasDeclaration(CompositeType); + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) + Type = -1; + break; + case KW_ENUM: + Type = RET_INT; + ParseEnumDeclaration(); + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) + Type = -1; + break; + case KW_STRUCT: + Type = DAT_STRUCT; + *CompositeType = BeginCompositeDeclaration(Type); + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) + Type = -1; + break; + case KW_UNION: + Type = DAT_UNION; + *CompositeType = BeginCompositeDeclaration(Type); + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) + Type = -1; + break; + default: + ErrorReport("Illegal type on token %s\n", CurrentFile->CurrentSymbol.type); + } + + return Type; +} + +/* + * Given a Type passed by ParseType, read following dereferences and return pointer type. + */ +static int ParsePointerType(int Type) { + while (1) { + // But, skip parsing if we're looking at an import. + if (CurrentFile->CurrentSymbol.type == KW_IMPORT) + break; + + printf("\t\t\tType on parsing is %s\n", TokenNames[CurrentFile->CurrentSymbol.type]); + if (CurrentFile->CurrentSymbol.type != AR_STAR) + break; + + Type = PointerTo(Type); + Tokenise(); + } + + return Type; +} + +/* + * Parse a declaration of an array - the [ ] part. + * + * @param name the name of the array + * @param Type the type of the array, if scalar + * @param CompositeType the type of the array, if composite + * @param Storage the storage class of the array + * @return the defined array symbol + */ +static struct SymbolTableEntry* ParseArrayDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) { + struct SymbolTableEntry* Symbol = NULL; + Tokenise(); + Safe(); + + if (CurrentFile->CurrentSymbol.type == LI_INT) { + switch (Storage) { + case SC_GLOBAL: + Symbol = AddSymbol(name, PointerTo(Type), ST_ARR, Storage, CurrentFile->CurrentSymbol.value, 0, CompositeType); + break; + case SC_LOCAL: + case SC_PARAM: + case SC_MEMBER: + default: + ErrorReport("Local array definitions not permitted.\n"); + } + } + + Tokenise(); + VerifyToken(LI_RBRAC, "]"); + Safe(); + return Symbol; +} + +// A short redirect to add a Scalar definition to the variable tables. +static struct SymbolTableEntry* ParseScalarDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) { + return AddSymbol(name, Type, ST_VAR, Storage, 1, 0, CompositeType); +} + /* * Handles reading in a comma-separated list of declarations. * Erythro treats structs, enums and function parameters the same in this regard - * comma separated. - * + * * C and C++ tend to treat enums and structs differently - the former separated by commas, * the latter separated by semicolons. - * + * * Note that since functions are read in through parentheses, and structs/enums are read in * through brackets, the end character is configurable. - * - * @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable. - * @param Storage: The Storage Scope of this declaration list. - * @param End: The end token, in terms of TokenTypes enum values. - * @return the amount of declarations read in. - * + * + * Parse declarations, including lists thereof, until the Terminate symbol is encountered. + * Will first parse a type name, then parse the identifier using ParseSymbolDeclaration. + * Declaration lists must be separated by a comma or terminated with the StatementEndSymbol. + * + * @param CompositeType out: the type of the declaration list. + * @param ClassType the type of the class + * @param StatementEndSymbool the symbol that marks the end of the declaration list + * @param TerminateSymbol the symbol that marks the end of parsing + * @return the type of the declaration + * */ +int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbool, int TerminateSymbol) { -static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) { + int initType, type; + struct SymbolTableEntry* symbol; + + fflush(stdout); + if ((initType = ParseType(CompositeType, &ClassType)) == -1) + return initType; + + while (1) { + type = ParsePointerType(initType); + symbol = ParseDeclarationSymbol(type, *CompositeType, ClassType); + printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, type, ScopeNames[ClassType]); + + // Lists of function declarations are not valid. + if (symbol->Type == ST_FUNC) { + if (ClassType != SC_GLOBAL) + ErrorReport("Function definition not at global scope\n"); + return type; + } + + // Terminate at either symbol + if (CurrentFile->CurrentSymbol.type == StatementEndSymbool || CurrentFile->CurrentSymbol.type == TerminateSymbol) + return type; + + // We must be continuing the list, so parse a comma + VerifyToken(LI_COM, ","); + } +} + +/* + * Parse the full list of parameter declarations. + * Each has a type, a name, may be a pointer, or an array. + * + * @param FunctionDeclaration the type of the declaration of the function, if declared already. + * @param FunctionDefinition the type of the definition of the function, which we are parsing + * @return the number of parameters parsed + */ +static int ParseParameterDeclarationList(struct SymbolTableEntry* FunctionDeclaration, struct SymbolTableEntry* FunctionDefinition) { int TokenType, ParamCount = 0; struct SymbolTableEntry* PrototypePointer = NULL, * Composite; - if (FunctionSymbol != NULL) - PrototypePointer = FunctionSymbol->Start; + if (FunctionDeclaration != NULL) + PrototypePointer = FunctionDeclaration->Start; - while (CurrentFile->CurrentSymbol.type != End) { - TokenType = ReadTypeOrKeyword(&Composite); - VerifyToken(TY_IDENTIFIER, "identifier"); + while (CurrentFile->CurrentSymbol.type != LI_RPARE) { + // Doing int x, y, float z is valid, so parse a list of declarations per parameter. + TokenType = ParseDeclarationList(&Composite, SC_PARAM, LI_COM, LI_RPARE); + if (TokenType == -1) + ErrorReport("Bad type in parameter list"); - printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, TokenType, ScopeNames[Storage]); + printf("\tReading a new parameter: %s of type %d\n", CurrentIdentifier, TokenType); if (PrototypePointer != NULL) { if (TokenType != PrototypePointer->Type) - DieDecimal("Function parameter of invalid type at index", ParamCount + 1); + ErrorReport("Function parameter has invalid type at index %d\n", ParamCount + 1); PrototypePointer = PrototypePointer->NextSymbol; - } else { - BeginVariableDeclaration(TokenType, Composite, Storage); } + Safe(); ParamCount++; - if ((CurrentFile->CurrentSymbol.type != LI_COM) && (CurrentFile->CurrentSymbol.type != End)) - DieDecimal("Unexpected token in parameter", CurrentFile->CurrentSymbol.type); + if (CurrentFile->CurrentSymbol.type == LI_RPARE) + break; - if (CurrentFile->CurrentSymbol.type == LI_COM) - Tokenise(); + VerifyToken(LI_COM, ","); + Safe(); } - if ((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length)) - DieMessage("Invalid number of parameters in prototyped function", FunctionSymbol->Name); + if ((FunctionDeclaration != NULL) && (ParamCount != FunctionDeclaration->Length)) + ErrorReport("Function definition has different number of parameters than the function declaration (%d vs %d).\n", ParamCount, FunctionDeclaration->Length); return ParamCount; } +/* + * Parse a function declaration, and optionally definition. + * ( parameter(,?)* ) ; + * ( parameter(,?)* ) compound ; + * + * @param name the name of the function + * @param Type the type of the function, if primitive + * @param CompositeType the type of the function, if composite + * @param Storage the scope of the function + * @return the new symbol table entry for the function + */ +static struct SymbolTableEntry* ParseFunctionDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) { + struct ASTNode* Tree; + struct ASTNode* FinalStatement; + struct SymbolTableEntry* OldFunction, * NewFunction = NULL; + int BreakLabel = 0, ParamCount = 0; + + VerifyToken(KW_FUNC, "::"); + Safe(); + VerifyToken(TY_IDENTIFIER, "Identifier"); + Safe(); + + if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL) + if (OldFunction->Storage != ST_FUNC) + OldFunction = NULL; + if (OldFunction == NULL) { + BreakLabel = Assembler->vtable->NewLabel(); + NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL); + } + + VerifyToken(LI_LPARE, "("); + Safe(); + ParamCount = ParseParameterDeclarationList(OldFunction, NewFunction); + VerifyToken(LI_RPARE, ")"); + Safe(); + + printf("\nIdentified%sfunction %s of return type %s, end label %d\n", + (OldFunction == NULL) ? " new " : " overloaded ", + (OldFunction == NULL) ? NewFunction->Name : OldFunction->Name, + TypeNames(Type), BreakLabel); + + if (NewFunction) { + NewFunction->Elements = ParamCount; + NewFunction->Start = Params; + NewFunction->Type = RET_LONG; + OldFunction = NewFunction; + } + + Params = ParamsEnd = NULL; + + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) { + return OldFunction; + } + + CurrentFile->FunctionEntry = OldFunction; + + CurrentFile->CurrentLoopDepth = 0; + VerifyToken(LI_LBRAC, "{"); + Safe(); + Tree = ParseCompound(); + Safe(); + VerifyToken(LI_RBRAC, "}"); + + if (Type != RET_VOID) { + // Functions with one statement have no composite node, so we have to check + FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree; + + if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) { + ErrorReport("Function with non-void type does not return"); + } + } + + Tree = ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel); + + if (Tree && CurrentFile->AllowDefinitions) { + printf("\nBeginning assembler creation of new function %s\n", Tree->Symbol->Name); + if (OptDumpTree) { + DumpTree(Tree, 0); + fprintf(stdout, "\n\n"); + } + + // Emit the function now + Assembler->vtable->AssembleTree(Tree, -1, -1, -1, 0); + FreeLocals(); + } else { + printf("\nFunction prototype saved\r\n"); + } + + Safe(); + return OldFunction; +} + +/* + * The "alias" keyword allows one to create a new keyword that is accepted in lieu of another (or a chain of another) + * It does this by reading in sequence: + * * The "alias" keyword + * * The thing to alias (any valid primary type) + * * The new name + * + * They are stored in a separate symbol table and can be used anywhere the original is valid. + */ + +static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType) { + int Type, Storage = 0; + + // "alias" + Tokenise(); + Safe(); + + Type = ParseType(CompositeType, &Storage); + if (Storage != 0) + ErrorReport("Cannot extern an alias definition.\n"); + + if (FindAlias(CurrentIdentifier) != NULL) + ErrorReport("Duplicate type alias.\n"); + + // It may be a pointer definition + Type = ParsePointerType(Type); + + AddSymbol(CurrentIdentifier, Type, ST_VAR, SC_ALIAS, 0, 0, *CompositeType); + Tokenise(); + Safe(); + + return Type; +} + +/* + * Get the type that a typedef declaration aliases. + * @param name the name of the typedef + * @param CompositeType out: the type if composite + * @return the type if scalar + */ +static int GetTypedef(char* name, struct SymbolTableEntry** CompositeType) { + struct SymbolTableEntry* type; + type = FindAlias(name); + + if (type == NULL) + ErrorReport("Unknown alias type"); + Tokenise(); + Safe(); + + *CompositeType = type->CompositeType; + return type->Type; +} + +/* + * Parse an array initialization. + * Everything after the =, for example. + * Every element must match the type of the array, and the number of elements must match the size of the array. + * @param Symbol the symbol of the array we're initializing + * @param Type the type of the array, if primitive + * @param CompositeType the type of the array, if composite + * @param Storage the storage class of the array we're initializing + */ +static void ParseArrayInitialization(struct SymbolTableEntry* Symbol, int Type, struct SymbolTableEntry* CompositeType, int Storage) { + ErrorReport("Array initialization not permitted.\n"); +} + +/* + * Parse a name symbol for a declaration. + * Calls out to parse functions, arrays and scalars alike. + * Also parses an inline initialization if present. + * + * @param Type the type of the declaration, if primitive + * @param CompositeType a reference to the type, if composite (struct) + * @param Storage the storage class of the declaration + * @return the symbol table entry to the new symbol + */ +static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage) { + struct SymbolTableEntry* symbol = NULL; + char* variableName = strdup(CurrentIdentifier); + int structureType = ST_VAR; + + Safe(); + + if(CurrentFile->CurrentSymbol.type == KW_FUNC) + return ParseFunctionDeclaration(variableName, Type, CompositeType, Storage); + + VerifyToken(TY_IDENTIFIER, "Identifier"); + + // Check for duplicate declarations + switch (Storage) { + case SC_GLOBAL: + if (FindGlobal(variableName) != NULL) + ErrorReport("Duplicate global declaration\n"); + case SC_LOCAL: + case SC_PARAM: + if (FindLocal(variableName) != NULL) + ErrorReport("Duplicate local declaration\n"); + case SC_MEMBER: + if (FindMember(variableName) != NULL) + ErrorReport("Duplicate member declaration\n"); + default: break; + } + + // Determine whether this is an array or scalar. + if (CurrentFile->CurrentSymbol.type == LI_LBRAC) { + symbol = ParseArrayDeclaration(variableName, Type, CompositeType, Storage); + structureType = ST_ARR; + } else { + symbol = ParseScalarDeclaration(variableName, Type, CompositeType, Storage); + } + + // Determine whether we're initializing immediately + if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { + // TODO: Default parameters + if (Storage == SC_PARAM) + ErrorReport("Initialization of parameter not permitted.\n"); + // TODO: Enum initialization + if (Storage == SC_MEMBER) + ErrorReport("Initialization of a member not permitted.\n"); + + Tokenise(); + + if (structureType == ST_ARR) { + ParseArrayInitialization(symbol, Type, CompositeType, Storage); + } else { + // TODO: Inline initialization + ErrorReport("Initialization of a scalar not permitted.\n"); + } + } + + return symbol; + +} + /* * Handles the declaration of a new composite type. * For example, a struct is a composite of multiple different named positions: @@ -75,10 +521,12 @@ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Stor */ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) { - struct SymbolTableEntry* Composite = NULL, * Member; + struct SymbolTableEntry* Composite = NULL, *Member; int Offset = 0, Largest = 0; + // "struct" / "union" Tokenise(); + Safe(); if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier); @@ -87,19 +535,35 @@ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) { if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Composite == NULL) - DieMessage("Unknown Struct", CurrentIdentifier); + ErrorReport("Use of undefined composite"); return Composite; } if (Composite) - DieMessage("Redefinition of composite", CurrentIdentifier); + ErrorReport("Redefinition of composite"); Composite = AddSymbol(CurrentIdentifier, Type, ST_RUCT, Type == DAT_STRUCT ? SC_STRUCT : SC_UNION, 0, 0, NULL); Tokenise(); + Safe(); + printf("Reading a composite declaration.. Type is %s\n", Type == DAT_STRUCT ? "struct" : "union"); - ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAC); + + while (1) { + Type = ParseDeclarationList(&Member, SC_MEMBER, LI_SEMIC,LI_RBRAC); + if (Type == -1) + ErrorReport("Bad type in member list of composite\n"); + + OptionallyConsume(LI_SEMIC); + Safe(); + + if (CurrentFile->CurrentSymbol.type == LI_RBRAC) + break; + } + VerifyToken(LI_RBRAC, "}"); + if (CompositeMembers == NULL) + ErrorReport("No members in struct.\n"); Composite->Start = CompositeMembers; CompositeMembers = CompositeMembersEnd = NULL; @@ -123,12 +587,14 @@ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) { return Composite; } -void BeginEnumDeclaration() { +static void ParseEnumDeclaration() { struct SymbolTableEntry* Type = NULL; char* Name; int Value = 0; + // "enum" Tokenise(); + Safe(); // enum name if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { @@ -137,19 +603,20 @@ void BeginEnumDeclaration() { Tokenise(); } - // enum name {? if not, enum name var. + // We're expecting to declare an enum, so make sure the content follows. if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Type == NULL) - DieMessage("Undeclared Enum", Name); + ErrorReport("Enum used but not yet declared.\n"); return; } // Skip the { that we have Tokenise(); + Safe(); if (Type != NULL) - DieMessage("Attempting to redefine enum", Type->Name); + ErrorReport("Enum redeclared.\n"); else Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUM, 0, 0, NULL); @@ -159,187 +626,58 @@ void BeginEnumDeclaration() { Type = FindEnumMember(Name); if (Type != NULL) - DieMessage("Attempting to redeclare enum value", Name); + ErrorReport("Enum value already declared\n"); + Safe(); // Parse equality if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { Tokenise(); // Expect a number after the equals if (CurrentFile->CurrentSymbol.type != LI_INT) - Die("Expected integer to assign enum value to"); + ErrorReport("Expected integer in enum assignment\n"); Value = CurrentFile->CurrentSymbol.value; + // int Tokenise(); + Safe(); } Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUMENTRY, Value++, 0, NULL); - // Break on right brace if (CurrentFile->CurrentSymbol.type == LI_RBRAC) break; VerifyToken(LI_COM, "Comma"); + Safe(); } // Skip right brace Tokenise(); - - } -/* - * Handles the declaration of a type of a variable. - * int newVar; - * - * It verifies that we have a type keyword followed by a - * unique, non-keyword identifier. - * - * It then stores this variable into the appropriate symbol table, - * and returns the new item. - * - * @return the Symbol Table entry of this new variable. - */ -struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) { - struct SymbolTableEntry* Symbol = NULL; - - switch (Scope) { - case SC_GLOBAL: - if (FindGlobal(CurrentIdentifier) != NULL) - DieMessage("Invalid redeclaration of global variable", CurrentIdentifier); - case SC_LOCAL: - case SC_PARAM: - if (FindLocal(CurrentIdentifier) != NULL) - DieMessage("Invalid redeclaration of local variable", CurrentIdentifier); - case SC_MEMBER: - if (FindMember(CurrentIdentifier) != NULL) - DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier); - } - - if (CurrentFile->CurrentSymbol.type == LI_LBRAS) { - Tokenise(); - - if (CurrentFile->CurrentSymbol.type == LI_INT) { - switch (Scope) { - case SC_GLOBAL: - Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL); - break; - case SC_LOCAL: - case SC_PARAM: - case SC_MEMBER: - Die("Local arrays are unimplemented"); - } - } - - Tokenise(); - VerifyToken(LI_RBRAS, "]"); - } else { - Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite); - } - - return Symbol; - -} - -/* - * Handles the declaration of a new function. - * Verifies that the identifier is not taken (excluding the case - * where there is a declaration but no definition) - * Parses the list of parameters if present - * Saves the function prototype if there is no body - * Generates and saves the break-out point label - * - * @param Type: The return type of the function - * @return the AST for this function - * - */ - -struct ASTNode* ParseFunction(int Type) { - struct ASTNode* Tree; - struct ASTNode* FinalStatement; - struct SymbolTableEntry* OldFunction, * NewFunction = NULL; - int BreakLabel = 0, ParamCount = 0; - - if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL) - if (OldFunction->Storage != ST_FUNC) - OldFunction = NULL; - if (OldFunction == NULL) { - BreakLabel = Assembler->vtable->NewLabel(); - NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL); - } - - VerifyToken(LI_LPARE, "("); - ParamCount = ReadDeclarationList(OldFunction, SC_PARAM, LI_RPARE); - VerifyToken(LI_RPARE, ")"); - - printf("\nIdentified%sfunction %s of return type %s, end label %d\n", - (OldFunction == NULL) ? " new " : " overloaded ", - (OldFunction == NULL) ? NewFunction->Name : OldFunction->Name, - TypeNames(Type), BreakLabel); - - if (NewFunction) { - NewFunction->Elements = ParamCount; - NewFunction->Start = Params; - NewFunction->Type = RET_LONG; - OldFunction = NewFunction; - } - - Params = ParamsEnd = NULL; - - if (CurrentFile->CurrentSymbol.type == LI_SEMIC) { - Tokenise(); - return NULL; - } - - CurrentFile->FunctionEntry = OldFunction; - - CurrentFile->CurrentLoopDepth = 0; - VerifyToken(LI_LBRAC, "{"); - Tree = ParseCompound(); - VerifyToken(LI_RBRAC, "}"); - - if (Type != RET_VOID) { - // Functions with one statement have no composite node, so we have to check - FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree; - - if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) { - Die("Function with non-void type does not return"); - } - - } - - return ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel); -} - -/* - * Handles the logic for return. - * //TODO: No brackets - * //TODO: Type inference - * - */ - struct ASTNode* ReturnStatement() { struct ASTNode* Tree; - int ReturnType; - - - if (CurrentFile->FunctionEntry->Type == RET_VOID) - Die("Attempt to return from void function"); VerifyToken(KW_RETURN, "return"); + if (CurrentFile->FunctionEntry->Type == RET_VOID) + ErrorReport("Attempt to return from void function"); + bool bracketed = OptionallyConsume(LI_LPARE); + Safe(); Tree = ParsePrecedenceASTNode(0); Tree = MutateType(Tree, CurrentFile->FunctionEntry->Type, 0); if (Tree == NULL) - Die("Returning a value of incorrect type for function"); - + ErrorReport("Returning a value of incorrect type for function. Expected %s.\n", TypeNames(CurrentFile->FunctionEntry->Type)); Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, CurrentFile->FunctionEntry, 0); printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name); if (bracketed) VerifyToken(LI_RPARE, ")"); + Safe(); + VerifyToken(LI_SEMIC, ";"); return Tree; } @@ -480,7 +818,7 @@ struct ASTNode* ForStatement() { VerifyToken(KW_FOR, "for"); VerifyToken(LI_LPARE, "("); - Preop = ParseStatement(); + Preop = ParseExpressionList(LI_SEMIC); VerifyToken(LI_SEMIC, ";"); Condition = ParsePrecedenceASTNode(0); @@ -490,7 +828,7 @@ struct ASTNode* ForStatement() { VerifyToken(LI_SEMIC, ";"); - Postop = ParseStatement(); + Postop = ParseExpressionList(LI_RPARE); VerifyToken(LI_RPARE, ")"); CurrentFile->CurrentLoopDepth++; @@ -676,6 +1014,9 @@ struct ASTNode* BreakStatement() { Die("Unable to break without a loop or switch statement"); Tokenise(); + Safe(); + VerifyToken(LI_SEMIC, ";"); + Safe(); return ConstructASTLeaf(OP_BREAK, 0, NULL, 0); } diff --git a/src/assemble/ASMAssembler.c b/src/assemble/ASMAssembler.c index fc4df51..bb66a3f 100644 --- a/src/assemble/ASMAssembler.c +++ b/src/assemble/ASMAssembler.c @@ -1055,7 +1055,8 @@ static int AssembleTree(struct ASTNode* Node, int Register, int LoopBeginLabel, DumpTree(Node, 0); Started = 1; - printf("Current operation: %d\r\n", Node->Operation); + printf("Current operation: %s\r\n", OperationNames[Node->Operation]); + fflush(stdout); switch (Node->Operation) { case OP_IF: return AsIf(Node, LoopBeginLabel, LoopEndLabel); @@ -1064,9 +1065,9 @@ static int AssembleTree(struct ASTNode* Node, int Register, int LoopBeginLabel, return AsWhile(Node); case OP_COMP: - AssembleTree(Node->Left, -1, LoopBeginLabel, LoopEndLabel, Node->Operation); + if (Node->Left) AssembleTree(Node->Left, -1, LoopBeginLabel, LoopEndLabel, Node->Operation); DeallocateAllRegisters(); - AssembleTree(Node->Right, -1, LoopBeginLabel, LoopEndLabel, Node->Operation); + if (Node->Right) AssembleTree(Node->Right, -1, LoopBeginLabel, LoopEndLabel, Node->Operation); DeallocateAllRegisters(); return -1;