/*************/ /*GEMWIRE */ /* ERYTHRO*/ /*************/ #include #include #include /* * Handles reading in a comma-separated list of declarations. * Erythro treats structs, enums and function parameters the same in this regard - * comma separated. * * C and C++ tend to treat enums and structs differently - the former separated by commas, * the latter separated by semicolons. * * Note that since functions are read in through parentheses, and structs/enums are read in * through brackets, the end character is configurable. * * @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable. * @param Storage: The Storage Scope of this declaration list. * @param End: The end token, in terms of TokenTypes enum values. * @return the amount of declarations read in. * */ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) { int TokenType, ParamCount = 0; struct SymbolTableEntry* PrototypePointer = NULL, * Composite; if (FunctionSymbol != NULL) PrototypePointer = FunctionSymbol->Start; while (CurrentFile->CurrentSymbol.type != End) { TokenType = ReadTypeOrKeyword(&Composite); VerifyToken(TY_IDENTIFIER, "identifier"); printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, TokenType, ScopeNames[Storage]); if (PrototypePointer != NULL) { if (TokenType != PrototypePointer->Type) DieDecimal("Function parameter of invalid type at index", ParamCount + 1); PrototypePointer = PrototypePointer->NextSymbol; } else { BeginVariableDeclaration(TokenType, Composite, Storage); } ParamCount++; if ((CurrentFile->CurrentSymbol.type != LI_COM) && (CurrentFile->CurrentSymbol.type != End)) DieDecimal("Unexpected token in parameter", CurrentFile->CurrentSymbol.type); if (CurrentFile->CurrentSymbol.type == LI_COM) Tokenise(); } if ((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length)) DieMessage("Invalid number of parameters in prototyped function", FunctionSymbol->Name); return ParamCount; } /* * Handles the declaration of a new composite type. * For example, a struct is a composite of multiple different named positions: * struct thisStct { int x, int y, int z }; * * Verifies that the current identifier is not used, * verifies that this is not a redefinition (excluding * the case where there is a declaration but no definition) * and then saves it into the appropriate symbol table. * * @return the Symbol Table entry of this new composite. */ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) { struct SymbolTableEntry* Composite = NULL, * Member; int Offset = 0, Largest = 0; Tokenise(); if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier); Tokenise(); } if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Composite == NULL) DieMessage("Unknown Struct", CurrentIdentifier); return Composite; } if (Composite) DieMessage("Redefinition of composite", CurrentIdentifier); Composite = AddSymbol(CurrentIdentifier, Type, ST_RUCT, Type == DAT_STRUCT ? SC_STRUCT : SC_UNION, 0, 0, NULL); Tokenise(); printf("Reading a composite declaration.. Type is %s\n", Type == DAT_STRUCT ? "struct" : "union"); ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAC); VerifyToken(LI_RBRAC, "}"); Composite->Start = CompositeMembers; CompositeMembers = CompositeMembersEnd = NULL; Member = Composite->Start; printf("\tSetting first entry in composite to %s\r\n", Member->Name); Member->SinkOffset = 0; Offset = TypeSize(Member->Type, Member->CompositeType); for (Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) { if (Type == DAT_STRUCT) Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1); else Member->SinkOffset = 0; int CurrentSize = TypeSize(Member->Type, Member->CompositeType); Offset += CurrentSize; Largest = CurrentSize > Largest ? CurrentSize : Largest; } Composite->Length = Type == DAT_STRUCT ? Offset : Largest; return Composite; } void BeginEnumDeclaration() { struct SymbolTableEntry* Type = NULL; char* Name; int Value = 0; Tokenise(); // enum name if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Type = FindEnum(CurrentIdentifier); Name = strdup(CurrentIdentifier); Tokenise(); } // enum name {? if not, enum name var. if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Type == NULL) DieMessage("Undeclared Enum", Name); return; } // Skip the { that we have Tokenise(); if (Type != NULL) DieMessage("Attempting to redefine enum", Type->Name); else Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUM, 0, 0, NULL); while (1) { VerifyToken(TY_IDENTIFIER, "Enum Entry"); Name = strdup(CurrentIdentifier); Type = FindEnumMember(Name); if (Type != NULL) DieMessage("Attempting to redeclare enum value", Name); // Parse equality if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { Tokenise(); // Expect a number after the equals if (CurrentFile->CurrentSymbol.type != LI_INT) Die("Expected integer to assign enum value to"); Value = CurrentFile->CurrentSymbol.value; Tokenise(); } Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUMENTRY, Value++, 0, NULL); // Break on right brace if (CurrentFile->CurrentSymbol.type == LI_RBRAC) break; VerifyToken(LI_COM, "Comma"); } // Skip right brace Tokenise(); } /* * Handles the declaration of a type of a variable. * int newVar; * * It verifies that we have a type keyword followed by a * unique, non-keyword identifier. * * It then stores this variable into the appropriate symbol table, * and returns the new item. * * @return the Symbol Table entry of this new variable. */ struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) { struct SymbolTableEntry* Symbol = NULL; switch (Scope) { case SC_GLOBAL: if (FindGlobal(CurrentIdentifier) != NULL) DieMessage("Invalid redeclaration of global variable", CurrentIdentifier); case SC_LOCAL: case SC_PARAM: if (FindLocal(CurrentIdentifier) != NULL) DieMessage("Invalid redeclaration of local variable", CurrentIdentifier); case SC_MEMBER: if (FindMember(CurrentIdentifier) != NULL) DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier); } if (CurrentFile->CurrentSymbol.type == LI_LBRAS) { Tokenise(); if (CurrentFile->CurrentSymbol.type == LI_INT) { switch (Scope) { case SC_GLOBAL: Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL); break; case SC_LOCAL: case SC_PARAM: case SC_MEMBER: Die("Local arrays are unimplemented"); } } Tokenise(); VerifyToken(LI_RBRAS, "]"); } else { Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite); } return Symbol; } /* * Handles the declaration of a new function. * Verifies that the identifier is not taken (excluding the case * where there is a declaration but no definition) * Parses the list of parameters if present * Saves the function prototype if there is no body * Generates and saves the break-out point label * * @param Type: The return type of the function * @return the AST for this function * */ struct ASTNode* ParseFunction(int Type) { struct ASTNode* Tree; struct ASTNode* FinalStatement; struct SymbolTableEntry* OldFunction, * NewFunction = NULL; int BreakLabel = 0, ParamCount = 0; if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL) if (OldFunction->Storage != ST_FUNC) OldFunction = NULL; if (OldFunction == NULL) { BreakLabel = NewLabel(); NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL); } VerifyToken(LI_LPARE, "("); ParamCount = ReadDeclarationList(OldFunction, SC_PARAM, LI_RPARE); VerifyToken(LI_RPARE, ")"); printf("\nIdentified%sfunction %s of return type %s, end label %d\n", (OldFunction == NULL) ? " new " : " overloaded ", (OldFunction == NULL) ? NewFunction->Name : OldFunction->Name, TypeNames(Type), BreakLabel); if (NewFunction) { NewFunction->Elements = ParamCount; NewFunction->Start = Params; NewFunction->Type = RET_LONG; OldFunction = NewFunction; } Params = ParamsEnd = NULL; if (CurrentFile->CurrentSymbol.type == LI_SEMIC) { Tokenise(); return NULL; } CurrentFile->FunctionEntry = OldFunction; CurrentFile->CurrentLoopDepth = 0; Tree = ParseCompound(); if (Type != RET_VOID) { // Functions with one statement have no composite node, so we have to check FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree; if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) { Die("Function with non-void type does not return"); } } return ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel); } /* * Handles the logic for return. * //TODO: No brackets * //TODO: Type inference * */ struct ASTNode* ReturnStatement() { struct ASTNode* Tree; int ReturnType; if (CurrentFile->FunctionEntry->Type == RET_VOID) Die("Attempt to return from void function"); VerifyToken(KW_RETURN, "return"); VerifyToken(LI_LPARE, "("); // TODO: Make optional! Reject? Tree = ParsePrecedenceASTNode(0); Tree = MutateType(Tree, CurrentFile->FunctionEntry->Type, 0); if (Tree == NULL) Die("Returning a value of incorrect type for function"); Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, CurrentFile->FunctionEntry, 0); printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name); VerifyToken(LI_RPARE, ")"); // TODO: OPTIONALISE! return Tree; } /* * Handles the surrounding logic for If statements. * * If statements have the basic form: * * if (condition) body * * if (condition) * body * * if (condition) { * body * } * * Conditions may be any truthy statement (such as a pointer, * object, integer), as conditions not recognized are auto- * matically converted to booleans. * * This meaning, any object that can be resolved to 0 or NULL * can be placed as the condition and used as a check. * * For example: * struct ASTNode* Node = NULL; * if(Node) { * // This will not run, as Node is ((void*)0) * } * */ struct ASTNode* IfStatement() { struct ASTNode* Condition, * True, * False = NULL; VerifyToken(KW_IF, "if"); VerifyToken(LI_LPARE, "("); Condition = ParsePrecedenceASTNode(0); // Limit if(x) to =? != < > <= => // No null checking, no arithmetic, no functions. // TODO: this if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0); VerifyToken(LI_RPARE, ")"); True = ParseStatement(); if (CurrentFile->CurrentSymbol.type == KW_ELSE) { Tokenise(); False = ParseStatement(); } return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0); } /* * Handles the surrounding logic for While loops. * * While loops have the basic form: * while ( condition ) { body } * * When reaching the condition (which alike an If statement, * can be any truthy value), if it resolves to true: * The body is executed, and immediately the condition is checked * again. * This repeats until the condition resolves false, at which point * the loop executes no more. * * This can be prototyped as the following pseudo-assembler: * * cond: * check * jne exit * * jump cond * exit: * * * @return the AST of this statement * */ struct ASTNode* WhileStatement() { struct ASTNode* Condition, * Body; VerifyToken(KW_WHILE, "while"); VerifyToken(LI_LPARE, "("); Condition = ParsePrecedenceASTNode(0); if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0); VerifyToken(LI_RPARE, ")"); CurrentFile->CurrentLoopDepth++; Body = ParseStatement(); CurrentFile->CurrentLoopDepth--; return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0); } /* * Handles the surrounding logic for For loops. * * They have the basic form of: * for ( init ; condition; iterator) { body } * * The initialiser is run only once upon reaching the for loop. * Then the condition is checked, and if true, the body is executed. * After execution of the body, the iterator is run and the condition * checked again. * * It can be prototyped as the following pseudo-assembler code: * * for: * * cond: * check * jne exit * * * jump cond * exit: * * * In the case of the implementation, "init" is the preoperator, * "iterator" is the postoperator. * * @return the AST of this statement */ struct ASTNode* ForStatement() { struct ASTNode* Condition, * Body; struct ASTNode* Preop, * Postop; struct ASTNode* Tree; VerifyToken(KW_FOR, "for"); VerifyToken(LI_LPARE, "("); Preop = ParseStatement(); VerifyToken(LI_SEMIC, ";"); Condition = ParsePrecedenceASTNode(0); if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0); VerifyToken(LI_SEMIC, ";"); Postop = ParseStatement(); VerifyToken(LI_RPARE, ")"); CurrentFile->CurrentLoopDepth++; Body = ParseStatement(); CurrentFile->CurrentLoopDepth--; // We need to be able to skip over the body and the postop, so we group them together. Tree = ConstructASTNode(OP_COMP, RET_NONE, Body, NULL, Postop, NULL, 0); // We need to be able to jump to the top of the condition and fall through to the body, // so we group it with the last block Tree = ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Tree, NULL, 0); // We need to append the postop to the loop, to form the final for loop return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0); } /* * Handles the surrounding logic for the Print statement. * * This is a legacy hold-over from the early testing, and it * serves merely as a wrapper around the cstdlib printf.er function. * * It does, however (//TODO), attempt to guess the type that you * want to print, which takes a lot of the guesswork out of printing. * * @return the AST of this statement */ struct ASTNode* PrintStatement(void) { struct ASTNode* Tree; int LeftType, RightType; VerifyToken(KW_PRINT, "print"); Tree = ParsePrecedenceASTNode(0); LeftType = RET_INT; RightType = Tree->ExprType; Tree = MutateType(Tree, RightType, 0); if (!Tree) DieDecimal("Attempting to print an invalid type:", RightType); if (RightType) Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0); Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0); //ParseAST(Tree); return Tree; } /** * Handles the surrounding logic for break statements * * They have the basic form of: * break; * * If there is a loop currently being evaluated, break will insert an immediate jump to the end of the loop. * All locals inside the loop will lose their binding at this point. * * It can be prototyped as the following pseudo-assembler code: * * while: * check * jne exit * * : jump exit * jump while * exit: * * * * @return the AST of this statement */ struct ASTNode* BreakStatement() { if (CurrentFile->CurrentLoopDepth == 0) Die("Unable to break without a loop"); Tokenise(); Tokenise(); return ConstructASTLeaf(OP_BREAK, 0, NULL, 0); } /** * Handles the surrounding logic for continue statements * * They have the basic form of: * continue; * * If there is a loop currently being evaluated, continue will insert an immediate jump to the start of the loop. * * It can be prototyped as the following pseudo-assembler code: * * while: * check * jne exit * * : jump while * jump while * exit: * * * * @return the AST of this statement */ struct ASTNode* ContinueStatement() { if (CurrentFile->CurrentLoopDepth == 0) Die("Unable to break without a loop"); Tokenise(); return ConstructASTLeaf(OP_CONTINUE, 0, NULL, 0); } /* * Handles the surrounding logic for all of the logical and semantic * postfixes. * * Postfixes are tokens that are affixed to the end of another, and * change behaviour in some way. These can be added calculations, * some form of transformation, or other. * * A current list of postfixes: * * (): Call a function * * []: Index or define an array. * * ++: Increment a variable AFTER it is returned * NOTE: there is a prefix variant of this for incrementing BEFOREhand. * * --: Decrement a variable AFTER it is returned * NOTE: there is a prefix variant of this for decrementing BEFOREhand. * * Planned postfixes: * * >>: Arithmetic-Shift-Right a variable by one (Divide by two) * NOTE: there is a prefix variant of this for shifting left - multiplying by two. * * @return the AST of the statement plus its' postfix */ struct ASTNode* PostfixStatement() { struct ASTNode* Tree; struct SymbolTableEntry* Entry; // Early exit if we find an enum value if ((Entry = FindEnumMember(CurrentIdentifier)) != NULL) { Tokenise(); return ConstructASTLeaf(TERM_INTLITERAL, RET_INT, NULL, Entry->IntValue); } Tokenise(); if (CurrentFile->CurrentSymbol.type == LI_LPARE) return CallFunction(); if (CurrentFile->CurrentSymbol.type == LI_LBRAS) return AccessArray(); // If we get here, we must be a variable. // (as functions have been called and arrays have been indexed) // Check that the variable is recognized.. if ((Entry = FindSymbol(CurrentIdentifier)) == NULL || (Entry->Structure != ST_VAR && Entry->Structure != ST_FUNC)) { DumpAllLists(); DieMessage("Unknown Variable", CurrentIdentifier); } // Here we check for postincrement and postdecrement. switch (CurrentFile->CurrentSymbol.type) { case LI_DOT: return AccessMember(false); case LI_ARROW: return AccessMember(true); case PPMM_PLUS: Tokenise(); Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0); break; case PPMM_MINUS: Tokenise(); Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0); break; default: Tree = ConstructASTLeaf(REF_IDENT, Entry->Type, Entry, 0); } return Tree; } /* * Handles the surrounding logic for all of the logical and semantic * prefixes. * * Prefixes are tokens that are affixed to the start of another, and * change behaviour in some way. These can be added calculations, * some form of transformation, or other. * * A current list of prefixes: * * !: Invert the boolean result of a statement or truthy value. * * ~: Invert the individual bits in a number * * -: Invert the number around the axis of 0 (negative->positive, positive->negative) * * ++: Increment a variable BEFORE it is returned. * NOTE: there is a postfix variant of this for incrementing AFTER the fact. * * --: Decrement a variable BEFORE it is returned. * NOTE: there is a postfix variant of this for decrementing AFTER the fact. * * &: Dereference the following object (Get the address that contains it) * * *: Get the object pointed at by the number following * * Planned prefixes: * * <<: Arithmetic-Shift-Left a variable by one (Multiply by two) * NOTE: there is a postfix variant of this for shifting right - dividing by two. * * @return the AST of this statement, plus its' prefixes and any postfixes. */ struct ASTNode* PrefixStatement() { struct ASTNode* Tree; switch (CurrentFile->CurrentSymbol.type) { case BOOL_INVERT: Tokenise(); Tree = PrefixStatement(); Tree->RVal = 1; Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0); break; case BIT_NOT: Tokenise(); Tree = PrefixStatement(); Tree->RVal = 1; Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0); break; case AR_MINUS: Tokenise(); Tree = PrefixStatement(); Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0); break; case PPMM_PLUS: Tokenise(); Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT) Die("++ not followed by identifier"); Tree = ConstructASTBranch(OP_PREINC, Tree->ExprType, Tree, NULL, 0); break; case PPMM_MINUS: Tokenise(); Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT) Die("-- not followed by identifier"); Tree = ConstructASTBranch(OP_PREDEC, Tree->ExprType, Tree, NULL, 0); break; case BIT_AND: Tokenise(); // To allow things like: // x = &&y; // We need to recursively parse prefixes; Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT) Die("& must be followed by another & or an identifier."); Tree->Operation = OP_ADDRESS; Tree->ExprType = PointerTo(Tree->ExprType); break; case AR_STAR: Tokenise(); Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT && Tree->Operation != OP_DEREF) Die("* must be followed by another * or an identifier."); Tree = ConstructASTBranch(OP_DEREF, ValueAt(Tree->ExprType), Tree, NULL, 0); break; default: Tree = ParsePrimary(); } return Tree; }