/*************/ /*GEMWIRE */ /* ERYTHRO*/ /*************/ #include #include #include static void ParseEnumDeclaration(); static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage, struct ASTNode** Tree); static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType); /* * Handles parsing multiple statements or expressions in a row. * These are typically grouped together with the Compound tokens "{ }" * and seperated by the semicolon ";". * * Single Statements are parsed until a semicolon is reached, at which * point another statement will be parsed, or until a Right Compound * token is reached ("}"), at which point parsing will stop. * * It is useful for: * * Tightly identifying related blocks of code * * Containing the many statements of functions * * @return the AST Node representing this compound statement * */ struct ASTNode* ParseCompound() { struct ASTNode* Left = NULL, * Tree; while (1) { printf("\tNew branch in compound\n"); Tree = ParseStatement(); /*if (Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN || Tree->Operation == OP_RET || Tree->Operation == OP_CALL || Tree->Operation == OP_BREAK || Tree->Operation == OP_CONTINUE)) VerifyToken(LI_SEMIC, ";"); */ Safe(); if (Tree) { if (Left == NULL) Left = Tree; else Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0); } if (CurrentFile->CurrentSymbol.type == LI_RBRAC) { fflush(stdout); return Left; } if (CurrentFile->SwitchStatement && (CurrentFile->CurrentSymbol.type == KW_CASE || CurrentFile->CurrentSymbol.type == KW_DEFAULT)) { return Left; } } } /* * Read a literal of the given type. * @param Type integer or char* * @return the integer literal, or label value of the string. */ int ParseLiteral(int Type) { if ((Type == PointerTo(RET_CHAR)) && (CurrentFile->CurrentSymbol.type == LI_STR)) return Assembler->vtable->AsNewString(CurrentIdentifier); if (CurrentFile->CurrentSymbol.type == LI_INT) { switch (Type) { case RET_CHAR: if (CurrentFile->CurrentSymbol.value < 0 || CurrentFile->CurrentSymbol.value > 255) ErrorReport("Integer literal value too big for char\n"); case RET_INT: case RET_LONG: break; default: ErrorReport("Type Mismatch. Integer Literal vs Variable.\n"); } } else { ErrorReport("Expecting an integer literal or char array.\n"); } return CurrentFile->CurrentSymbol.value; } /* * Get the type that a typedef declaration aliases. * @param name the name of the typedef * @param CompositeType out: the type if composite * @return the type if scalar */ static int GetTypedef(char* name, struct SymbolTableEntry** CompositeType) { struct SymbolTableEntry* type; type = FindAlias(name); if (type == NULL) ErrorReport("Unknown alias type: %s\n", name); Tokenise(); Safe(); *CompositeType = type->CompositeType; return type->Type; } /* * Resolve a typename to a type struct. * Short circuit on the case where a definition is present, as definitions are typeless. */ static int ParseType(struct SymbolTableEntry** CompositeType, int* Scope) { int Type = -1, Extern = 1; while (Extern) { switch (CurrentFile->CurrentSymbol.type) { default: Extern = 0; } } switch (CurrentFile->CurrentSymbol.type) { case KW_IMPORT: Type = -1; ImportModule(); break; case TY_VOID: Type = RET_VOID; Tokenise(); break; case TY_CHAR: Type = RET_CHAR; Tokenise(); break; case TY_INT: Type = RET_INT; Tokenise(); break; case TY_LONG: Type = RET_LONG; Tokenise(); break; case KW_ALIAS: Type = ParseAliasDeclaration(CompositeType); if (CurrentFile->CurrentSymbol.type == LI_SEMIC) Type = -1; break; case KW_ENUM: Type = RET_INT; ParseEnumDeclaration(); if (CurrentFile->CurrentSymbol.type == LI_SEMIC) Type = -1; break; case KW_STRUCT: Type = DAT_STRUCT; *CompositeType = BeginCompositeDeclaration(Type); if (CurrentFile->CurrentSymbol.type == LI_SEMIC) Type = -1; break; case KW_UNION: Type = DAT_UNION; *CompositeType = BeginCompositeDeclaration(Type); if (CurrentFile->CurrentSymbol.type == LI_SEMIC) Type = -1; break; case TY_IDENTIFIER: Type = GetTypedef(CurrentIdentifier, CompositeType); break; default: ErrorReport("Illegal type on token %s\n", TokenNames[CurrentFile->CurrentSymbol.type]); } return Type; } /* * Given a Type passed by ParseType, read following dereferences and return pointer type. */ static int ParsePointerType(int Type) { while (1) { // But, skip parsing if we're looking at an import. if (CurrentFile->CurrentSymbol.type == KW_IMPORT) break; printf("\t\t\tType on parsing is %s\n", TokenNames[CurrentFile->CurrentSymbol.type]); if (CurrentFile->CurrentSymbol.type != AR_STAR) break; Type = PointerTo(Type); Tokenise(); } return Type; } /* * Parse a declaration of an array - the [ ] part. * * @param name the name of the array * @param Type the type of the array, if scalar * @param CompositeType the type of the array, if composite * @param Storage the storage class of the array * @return the defined array symbol */ static struct SymbolTableEntry* ParseArrayDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) { struct SymbolTableEntry* Symbol = NULL; int Elems = -1, MaxElems, *InitialList, i = 0, j = 0; Tokenise(); Safe(); if (CurrentFile->CurrentSymbol.type == LI_INT) { if (CurrentFile->CurrentSymbol.value <= 0) ErrorReport("Array size in definition cannot be negative.\n"); Elems = CurrentFile->CurrentSymbol.value; Tokenise(); } VerifyToken(LI_RBRAS, "]"); Safe(); switch (Storage) { case SC_GLOBAL: Symbol = AddSymbol(name, PointerTo(Type), ST_ARR, SC_GLOBAL, 0, 0, CompositeType); break; default: ErrorReport("Local array declaration not supported\n"); } if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { if (Storage != SC_GLOBAL) ErrorReport("Non-global array cannot be initialized.\n"); Tokenise(); Safe(); VerifyToken(LI_LBRAC, "{"); if (Elems != -1) MaxElems = Elems; else MaxElems = 10; InitialList = (int*)malloc(MaxElems * sizeof(int)); while (1) { if (Elems != -1 && i == MaxElems) ErrorReport("Too many items in initializer list\n"); InitialList[i++] = ParseLiteral(Type); Tokenise(); Safe(); if (Elems == -1 && i == MaxElems) { MaxElems += 10; InitialList = (int*)realloc(InitialList, MaxElems * sizeof(int)); } if (CurrentFile->CurrentSymbol.type == LI_RBRAC) { Tokenise(); break; } VerifyToken(LI_COM, ","); Safe(); } for (j = i; j < Symbol->Length; j++) InitialList[j] = 0; if (i > Elems) Elems = i; Symbol->InitialValues = InitialList; } Symbol->Length = Elems; Symbol->Size = Symbol->Length * TypeSize(Type, CompositeType); if (Storage == SC_GLOBAL) Assembler->vtable->AsGlobalSymbol(Symbol); return Symbol; } // A short redirect to add a Scalar definition to the variable tables. static struct SymbolTableEntry* ParseScalarDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage, struct ASTNode** Tree) { struct SymbolTableEntry* sym = AddSymbol(name, Type, ST_VAR, Storage, 1, 0, CompositeType); struct ASTNode* var, *expr; // Being assigned. if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { if (Storage != SC_GLOBAL && Storage != SC_LOCAL) ErrorReport("Non-static, non-local variable cannot be initialized.\n"); Tokenise(); Safe(); if (Storage == SC_GLOBAL) { sym->InitialValues = (int*) malloc(sizeof(int)); sym->InitialValues[0] = ParseLiteral(Type); Tokenise(); } else if (Storage == SC_LOCAL) { var = ConstructASTLeaf(REF_IDENT, sym->Type, sym, 0); expr = ParsePrecedenceASTNode(0); expr->RVal = 1; expr = MutateType(expr, var->ExprType, 0); if (expr == NULL) ErrorReport("Incompatible types in assignment: %s, %s\n", TypeNames(expr->ExprType), TypeNames(var->ExprType)); *Tree = ConstructASTNode(OP_ASSIGN, expr->ExprType, expr, NULL, var, NULL, 0); } } if (Storage == SC_GLOBAL) Assembler->vtable->AsGlobalSymbol(sym); return sym; } /* * Handles reading in a comma-or-semicolon separated list of declarations. * Erythro treats structs, enums and function parameters the same in this regard - * comma separated. * * C and C++ tend to treat enums and structs differently - the former separated by commas, * the latter separated by semicolons. * * Note that since functions are read in through parentheses, and structs/enums are read in * through brackets, the end character is configurable. * * Parse declarations, including lists thereof, until the Terminate symbol is encountered. * Will first parse a type name, then parse the identifier using ParseSymbolDeclaration. * Declaration lists must be separated by a comma or terminated with the StatementEndSymbol. * * @param CompositeType out: the type of the declaration list. * @param ClassType the type of the class * @param StatementEndSymbol the symbol that marks the end of the declaration list * @param TerminateSymbol the symbol that marks the end of parsing * @return the type of the declaration * */ int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbol, int TerminateSymbol, struct ASTNode** CompoundTree) { int initType, type; struct SymbolTableEntry* symbol; struct ASTNode* Tree; *CompoundTree = NULL; fflush(stdout); if ((initType = ParseType(CompositeType, &ClassType)) == -1) return initType; while (1) { type = ParsePointerType(initType); symbol = ParseDeclarationSymbol(type, *CompositeType, ClassType, &Tree); printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, type, ScopeNames[ClassType]); // Lists of function declarations are not valid. if (symbol->Type == ST_FUNC) { if (ClassType != SC_GLOBAL) ErrorReport("Function definition not at global scope\n"); return type; } if (*CompoundTree == NULL) *CompoundTree = Tree; else *CompoundTree = ConstructASTNode(OP_COMP, RET_NONE, *CompoundTree, NULL, Tree, NULL, 0); // Terminate at either symbol if (CurrentFile->CurrentSymbol.type == StatementEndSymbol || CurrentFile->CurrentSymbol.type == TerminateSymbol) return type; // We must be continuing the list, so parse a comma VerifyToken(LI_COM, ","); } } /* * Parse the full list of parameter declarations. * Each has a type, a name, may be a pointer, or an array. * * @param FunctionDeclaration the type of the declaration of the function, if declared already. * @param FunctionDefinition the type of the definition of the function, which we are parsing * @return the number of parameters parsed */ static int ParseParameterDeclarationList(struct SymbolTableEntry* FunctionDeclaration, struct SymbolTableEntry* FunctionDefinition) { int TokenType, ParamCount = 0; struct SymbolTableEntry* PrototypePointer = NULL, * Composite; struct ASTNode* empty; if (FunctionDeclaration != NULL) PrototypePointer = FunctionDeclaration->Start; while (CurrentFile->CurrentSymbol.type != LI_RPARE) { // Doing int x, y, float z is valid, so parse a list of declarations per parameter. TokenType = ParseDeclarationList(&Composite, SC_PARAM, LI_COM, LI_RPARE, &empty); if (TokenType == -1) ErrorReport("Bad type in parameter list"); printf("\tReading a new parameter: %s of type %d\n", CurrentIdentifier, TokenType); if (PrototypePointer != NULL) { if (TokenType != PrototypePointer->Type) ErrorReport("Function parameter has invalid type at index %d\n", ParamCount + 1); PrototypePointer = PrototypePointer->NextSymbol; } Safe(); ParamCount++; if (CurrentFile->CurrentSymbol.type == LI_RPARE) break; VerifyToken(LI_COM, ","); Safe(); } if ((FunctionDeclaration != NULL) && (ParamCount != FunctionDeclaration->Length)) ErrorReport("Function definition has different number of parameters than the function declaration (%d vs %d).\n", ParamCount, FunctionDeclaration->Length); return ParamCount; } /* * Parse a function declaration, and optionally definition. * ( parameter(,?)* ) ; * ( parameter(,?)* ) compound ; * * @param name the name of the function * @param Type the type of the function, if primitive * @param CompositeType the type of the function, if composite * @param Storage the scope of the function * @return the new symbol table entry for the function */ static struct SymbolTableEntry* ParseFunctionDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) { struct ASTNode* Tree; struct ASTNode* FinalStatement; struct SymbolTableEntry* OldFunction, * NewFunction = NULL; int BreakLabel = 0, ParamCount = 0; VerifyToken(KW_FUNC, "::"); Safe(); VerifyToken(TY_IDENTIFIER, "Identifier"); Safe(); if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL) if (OldFunction->Storage != ST_FUNC) OldFunction = NULL; if (OldFunction == NULL) { BreakLabel = Assembler->vtable->NewLabel(); NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL); } VerifyToken(LI_LPARE, "("); Safe(); ParamCount = ParseParameterDeclarationList(OldFunction, NewFunction); VerifyToken(LI_RPARE, ")"); Safe(); printf("\nIdentified%sfunction %s of return type %s, end label %d\n", (OldFunction == NULL) ? " new " : " overloaded ", (OldFunction == NULL) ? NewFunction->Name : OldFunction->Name, TypeNames(Type), BreakLabel); if (NewFunction) { NewFunction->Length = ParamCount; NewFunction->Start = Params; NewFunction->Type = RET_LONG; OldFunction = NewFunction; } Params = ParamsEnd = NULL; if (CurrentFile->CurrentSymbol.type == LI_SEMIC) { return OldFunction; } CurrentFile->FunctionEntry = OldFunction; CurrentFile->CurrentLoopDepth = 0; VerifyToken(LI_LBRAC, "{"); Safe(); Tree = ParseCompound(); Safe(); VerifyToken(LI_RBRAC, "}"); if (Type != RET_VOID) { // Functions with one statement have no composite node, so we have to check FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree; if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) { ErrorReport("Function with non-void type does not return"); } } Tree = ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel); if (Tree && CurrentFile->AllowDefinitions) { printf("\nBeginning assembler creation of new function %s\n", Tree->Symbol->Name); if (OptDumpTree) { DumpTree(Tree, 0); fprintf(stdout, "\n\n"); } // Emit the function now Assembler->vtable->AssembleTree(Tree, -1, -1, -1, 0); FreeLocals(); } else { printf("\nFunction prototype saved\r\n"); } Safe(); return OldFunction; } /* * The "alias" keyword allows one to create a new keyword that is accepted in lieu of another (or a chain of another) * It does this by reading in sequence: * * The "alias" keyword * * The thing to alias (any valid primary type) * * The new name * * They are stored in a separate symbol table and can be used anywhere the original is valid. */ static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType) { int Type, Storage = 0; // "alias" Tokenise(); Safe(); Type = ParseType(CompositeType, &Storage); if (Storage != 0) ErrorReport("Cannot extern an alias definition.\n"); if (FindAlias(CurrentIdentifier) != NULL) ErrorReport("Duplicate type alias.\n"); // It may be a pointer definition Type = ParsePointerType(Type); AddSymbol(CurrentIdentifier, Type, ST_VAR, SC_ALIAS, 0, 0, *CompositeType); Tokenise(); Safe(); return Type; } /* * Parse an array initialization. * Everything after the =, for example. * Every element must match the type of the array, and the number of elements must match the size of the array. * @param Symbol the symbol of the array we're initializing * @param Type the type of the array, if primitive * @param CompositeType the type of the array, if composite * @param Storage the storage class of the array we're initializing */ static void ParseArrayInitialization(struct SymbolTableEntry* Symbol, int Type, struct SymbolTableEntry* CompositeType, int Storage) { ErrorReport("Array initialization not permitted.\n"); } static char* copyString(char* str) { size_t len = strlen(str); char *dst = malloc(len + 1); // Space for length plus nul if (dst == NULL) return NULL; // No memory strcpy(dst, str); // Copy the characters return dst; // Return the new string } /* * Parse a name symbol for a declaration. * Calls out to parse functions, arrays and scalars alike. * Also parses an inline initialization if present. * * @param Type the type of the declaration, if primitive * @param CompositeType a reference to the type, if composite (struct) * @param Storage the storage class of the declaration * @return the symbol table entry to the new symbol */ static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage, struct ASTNode** Tree) { struct SymbolTableEntry* symbol = NULL; char* variableName = copyString(CurrentIdentifier); int structureType = ST_VAR; Safe(); if(CurrentFile->CurrentSymbol.type == KW_FUNC) return ParseFunctionDeclaration(variableName, Type, CompositeType, Storage); VerifyToken(TY_IDENTIFIER, "Identifier"); // Check for duplicate declarations switch (Storage) { case SC_GLOBAL: if (FindGlobal(variableName) != NULL) ErrorReport("Duplicate global declaration\n"); case SC_LOCAL: case SC_PARAM: if (FindLocal(variableName) != NULL) ErrorReport("Duplicate local declaration\n"); case SC_MEMBER: if (FindMember(variableName) != NULL) ErrorReport("Duplicate member declaration\n"); default: break; } // Determine whether this is an array or scalar. if (CurrentFile->CurrentSymbol.type == LI_LBRAS) { symbol = ParseArrayDeclaration(variableName, Type, CompositeType, Storage); structureType = ST_ARR; } else { symbol = ParseScalarDeclaration(variableName, Type, CompositeType, Storage, Tree); } return symbol; } /* * Handles the declaration of a new composite type. * For example, a struct is a composite of multiple different named positions: * struct thisStct { int x, int y, int z }; * * Verifies that the current identifier is not used, * verifies that this is not a redefinition (excluding * the case where there is a declaration but no definition) * and then saves it into the appropriate symbol table. * * @return the Symbol Table entry of this new composite. */ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) { struct SymbolTableEntry* Composite = NULL, *Member; int Offset = 0, Largest = 0; // "struct" / "union" Tokenise(); Safe(); if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier); Tokenise(); } if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Composite == NULL) ErrorReport("Use of undefined composite"); return Composite; } if (Composite) ErrorReport("Redefinition of composite"); Composite = AddSymbol(CurrentIdentifier, Type, ST_RUCT, Type == DAT_STRUCT ? SC_STRUCT : SC_UNION, 0, 0, NULL); Tokenise(); Safe(); printf("Reading a composite declaration.. Type is %s\n", Type == DAT_STRUCT ? "struct" : "union"); while (1) { Type = ParseDeclarationList(&Member, SC_MEMBER, LI_SEMIC, LI_RBRAC, NULL); if (Type == -1) ErrorReport("Bad type in member list of composite\n"); OptionallyConsume(LI_SEMIC); Safe(); if (CurrentFile->CurrentSymbol.type == LI_RBRAC) break; } VerifyToken(LI_RBRAC, "}"); if (CompositeMembers == NULL) ErrorReport("No members in struct.\n"); Composite->Start = CompositeMembers; CompositeMembers = CompositeMembersEnd = NULL; Member = Composite->Start; printf("\tSetting first entry in composite to %s\r\n", Member->Name); Member->SinkOffset = 0; Offset = TypeSize(Member->Type, Member->CompositeType); for (Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) { if (Type == DAT_STRUCT) Member->SinkOffset = Assembler->vtable->AsAlignMemory(Member->Type, Offset, 1); else Member->SinkOffset = 0; int CurrentSize = TypeSize(Member->Type, Member->CompositeType); Offset += CurrentSize; Largest = CurrentSize > Largest ? CurrentSize : Largest; } Composite->Length = Type == DAT_STRUCT ? Offset : Largest; Composite->Size = Offset; return Composite; } static void ParseEnumDeclaration() { struct SymbolTableEntry* Type = NULL; char* Name; int Value = 0; // "enum" Tokenise(); Safe(); // enum name if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Type = FindEnum(CurrentIdentifier); Name = strdup(CurrentIdentifier); Tokenise(); } // We're expecting to declare an enum, so make sure the content follows. if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Type == NULL) ErrorReport("Enum used but not yet declared.\n"); return; } // Skip the { that we have Tokenise(); Safe(); if (Type != NULL) ErrorReport("Enum redeclared.\n"); else Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUM, 0, 0, NULL); while (1) { VerifyToken(TY_IDENTIFIER, "Enum Entry"); Name = strdup(CurrentIdentifier); Type = FindEnumMember(Name); if (Type != NULL) ErrorReport("Enum value already declared\n"); Safe(); // Parse equality if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { Tokenise(); // Expect a number after the equals if (CurrentFile->CurrentSymbol.type != LI_INT) ErrorReport("Expected integer in enum assignment\n"); Value = CurrentFile->CurrentSymbol.value; // int Tokenise(); Safe(); } Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUMENTRY, Value++, 0, NULL); // Break on right brace if (CurrentFile->CurrentSymbol.type == LI_RBRAC) break; VerifyToken(LI_COM, "Comma"); Safe(); } // Skip right brace Tokenise(); } struct ASTNode* ReturnStatement() { struct ASTNode* Tree; VerifyToken(KW_RETURN, "return"); if (CurrentFile->FunctionEntry->Type == RET_VOID) ErrorReport("Attempt to return from void function"); bool bracketed = OptionallyConsume(LI_LPARE); Safe(); Tree = ParsePrecedenceASTNode(0); Tree = MutateType(Tree, CurrentFile->FunctionEntry->Type, 0); if (Tree == NULL) ErrorReport("Returning a value of incorrect type for function. Expected %s.\n", TypeNames(CurrentFile->FunctionEntry->Type)); Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, CurrentFile->FunctionEntry, 0); printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name); if (bracketed) VerifyToken(LI_RPARE, ")"); Safe(); VerifyToken(LI_SEMIC, ";"); return Tree; } /* * Handles the surrounding logic for If statements. * * If statements have the basic form: * * if (condition) body * * if (condition) * body * * if (condition) { * body * } * * Conditions may be any truthy statement (such as a pointer, * object, integer), as conditions not recognized are auto- * matically converted to booleans. * * This meaning, any object that can be resolved to 0 or NULL * can be placed as the condition and used as a check. * * For example: * struct ASTNode* Node = NULL; * if(Node) { * // This will not run, as Node is ((void*)0) * } * */ struct ASTNode* IfStatement() { struct ASTNode* Condition, * True, * False = NULL; VerifyToken(KW_IF, "if"); VerifyToken(LI_LPARE, "("); Condition = ParsePrecedenceASTNode(0); // Limit if(x) to =? != < > <= => // No null checking, no arithmetic, no functions. // TODO: this if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0); VerifyToken(LI_RPARE, ")"); True = ParseStatement(); if (CurrentFile->CurrentSymbol.type == KW_ELSE) { Tokenise(); False = ParseStatement(); } return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0); } /* * Handles the surrounding logic for While loops. * * While loops have the basic form: * while ( condition ) { body } * * When reaching the condition (which alike an If statement, * can be any truthy value), if it resolves to true: * The body is executed, and immediately the condition is checked * again. * This repeats until the condition resolves false, at which point * the loop executes no more. * * This can be prototyped as the following pseudo-assembler: * * cond: * check * jne exit * * jump cond * exit: * * * @return the AST of this statement * */ struct ASTNode* WhileStatement() { struct ASTNode* Condition, * Body; VerifyToken(KW_WHILE, "while"); VerifyToken(LI_LPARE, "("); Condition = ParsePrecedenceASTNode(0); if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0); VerifyToken(LI_RPARE, ")"); CurrentFile->CurrentLoopDepth++; Body = ParseStatement(); CurrentFile->CurrentLoopDepth--; return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0); } /* * Handles the surrounding logic for For loops. * * They have the basic form of: * for ( init ; condition; iterator) { body } * * The initialiser is run only once upon reaching the for loop. * Then the condition is checked, and if true, the body is executed. * After execution of the body, the iterator is run and the condition * checked again. * * It can be prototyped as the following pseudo-assembler code: * * for: * * cond: * check * jne exit * * * jump cond * exit: * * * In the case of the implementation, "init" is the preoperator, * "iterator" is the postoperator. * * @return the AST of this statement */ struct ASTNode* ForStatement() { struct ASTNode* Condition, * Body; struct ASTNode* Preop, * Postop; struct ASTNode* Tree; VerifyToken(KW_FOR, "for"); VerifyToken(LI_LPARE, "("); Preop = ParseExpressionList(LI_SEMIC); VerifyToken(LI_SEMIC, ";"); Condition = ParsePrecedenceASTNode(0); if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE) Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0); VerifyToken(LI_SEMIC, ";"); Postop = ParseExpressionList(LI_RPARE); VerifyToken(LI_RPARE, ")"); CurrentFile->CurrentLoopDepth++; Body = ParseStatement(); CurrentFile->CurrentLoopDepth--; // We need to be able to skip over the body and the postop, so we group them together. Tree = ConstructASTNode(OP_COMP, RET_NONE, Body, NULL, Postop, NULL, 0); // We need to be able to jump to the top of the condition and fall through to the body, // so we group it with the last block Tree = ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Tree, NULL, 0); // We need to append the postop to the loop, to form the final for loop return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0); } /* * Handles the surrounding logic for the Print statement. * * This is a legacy hold-over from the early testing, and it * serves merely as a wrapper around the cstdlib printf.er function. * * It does, however (//TODO), attempt to guess the type that you * want to print, which takes a lot of the guesswork out of printing. * * @return the AST of this statement */ struct ASTNode* PrintStatement(void) { struct ASTNode* Tree; int LeftType, RightType; VerifyToken(KW_PRINT, "print"); Tree = ParsePrecedenceASTNode(0); LeftType = RET_INT; RightType = Tree->ExprType; Tree = MutateType(Tree, RightType, 0); if (!Tree) DieDecimal("Attempting to print an invalid type:", RightType); if (RightType) Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0); Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0); //ParseAST(Tree); return Tree; } struct ASTNode* SwitchStatement() { struct ASTNode* left, *root, *c, *casetree=NULL, *casetail; int looping=1, cases=0; int defaultpresent=0; int ASTOp, casevalue; printf("\tParsing switch statement\n"); CurrentFile->SwitchStatement = true; // Skip switch( Tokenise(); VerifyToken(LI_LPARE, "("); printf("\tSwitch: Reading switch expression\n"); // Fetch switch expression left = ParsePrecedenceASTNode(0); // Consume ) { VerifyToken(LI_RPARE, ")"); VerifyToken(LI_LBRAC, "{"); // Verify the switch expression (must be integer-compatible) if (!TypeIsInt(left->ExprType)) ErrorReport("Switch expression is not of integer type, instead %s.\n", TypeNames(left->ExprType)); Safe(); // Create the root Switch node root = ConstructASTBranch(OP_SWITCH, 0, left, NULL, 0); // Iterate down the switch node, generating cases while (looping) { switch (CurrentFile->CurrentSymbol.type) { case LI_RBRAC: if (cases == 0) Die("No cases in switch statement"); looping = 0; break; case KW_CASE: if (defaultpresent) Die("Case present after default in switch."); ASTOp = OP_CASE; Safe(); Tokenise(); // Parse case value left = ParsePrecedenceASTNode(0); if (left->Operation != TERM_INTLITERAL) Die("Expecting integer literal for case value"); casevalue = left->IntValue; printf("\t\tSwitch case %d found\n", casevalue); // Make sure nothing resolves to the same case value for (c = casetree; c != NULL; c = c->Right) if (casevalue == c->IntValue) Die("Duplicate case ID in switch statement"); // Fallthrough so that we get the case tree logic deduplicated case KW_DEFAULT: if (defaultpresent) Die("Duplicate default entries in switch"); // Duplicate check because CASE falls through into this block if (CurrentFile->CurrentSymbol.type == KW_DEFAULT) { ASTOp = OP_DEFAULT; defaultpresent = true; Tokenise(); printf("\t\tSwitch default case found\n"); } VerifyToken(LI_COLON, ":"); Safe(); left = ParseCompound(); OptionallyConsume(LI_SEMIC); cases++; Safe(); // Append this new case to the tree if (casetree == NULL) { casetree = casetail = ConstructASTBranch(ASTOp, 0, left, NULL, casevalue); } else { casetail->Right = ConstructASTBranch(ASTOp, 0, left, NULL, casevalue); casetail = casetail->Right; } break; default: ErrorReport("Unexpected token in switch statement: %s\n", TokenNames[CurrentFile->CurrentSymbol.type]); exit(1); } } root->IntValue = cases; root->Right = casetree; // Consume the right brace immediately VerifyToken(LI_RBRAC, "}"); CurrentFile->SwitchStatement = false; return root; } /** * Handles the surrounding logic for break statements * * They have the basic form of: * break; * * If there is a loop currently being evaluated, break will insert an immediate jump to the end of the loop. * All locals inside the loop will lose their binding at this point. * * It can be prototyped as the following pseudo-assembler code: * * while: * check * jne exit * * : jump exit * jump while * exit: * * * * @return the AST of this statement */ struct ASTNode* BreakStatement() { if (CurrentFile->CurrentLoopDepth == 0 && !CurrentFile->SwitchStatement) Die("Unable to break without a loop or switch statement"); Tokenise(); Safe(); VerifyToken(LI_SEMIC, ";"); Safe(); return ConstructASTLeaf(OP_BREAK, 0, NULL, 0); } /** * Handles the surrounding logic for continue statements * * They have the basic form of: * continue; * * If there is a loop currently being evaluated, continue will insert an immediate jump to the start of the loop. * * It can be prototyped as the following pseudo-assembler code: * * while: * check * jne exit * * : jump while * jump while * exit: * * * * @return the AST of this statement */ struct ASTNode* ContinueStatement() { if (CurrentFile->CurrentLoopDepth == 0) Die("Unable to break without a loop"); Tokenise(); return ConstructASTLeaf(OP_CONTINUE, 0, NULL, 0); } /* * Handles the surrounding logic for all of the logical and semantic * postfixes. * * Postfixes are tokens that are affixed to the end of another, and * change behaviour in some way. These can be added calculations, * some form of transformation, or other. * * A current list of postfixes: * * (): Call a function * * []: Index or define an array. * * ++: Increment a variable AFTER it is returned * NOTE: there is a prefix variant of this for incrementing BEFOREhand. * * --: Decrement a variable AFTER it is returned * NOTE: there is a prefix variant of this for decrementing BEFOREhand. * * Planned postfixes: * * >>: Arithmetic-Shift-Right a variable by one (Divide by two) * NOTE: there is a prefix variant of this for shifting left - multiplying by two. * * @return the AST of the statement plus its' postfix */ struct ASTNode* PostfixStatement() { struct ASTNode* Tree; struct SymbolTableEntry* Entry; // Early exit if we find an enum value if ((Entry = FindEnumMember(CurrentIdentifier)) != NULL) { Tokenise(); return ConstructASTLeaf(TERM_INTLITERAL, RET_INT, NULL, Entry->IntValue); } Tokenise(); if (CurrentFile->CurrentSymbol.type == LI_LPARE) return CallFunction(); if (CurrentFile->CurrentSymbol.type == LI_LBRAS) return AccessArray(); // If we get here, we must be a variable. // (as functions have been called and arrays have been indexed) // Check that the variable is recognized.. if ((Entry = FindSymbol(CurrentIdentifier)) == NULL || (Entry->Structure != ST_VAR && Entry->Structure != ST_FUNC)) { DumpAllLists(); DieMessage("Unknown Variable", CurrentIdentifier); } // Here we check for postincrement and postdecrement. switch (CurrentFile->CurrentSymbol.type) { case LI_DOT: return AccessMember(false); case LI_ARROW: return AccessMember(true); case PPMM_PLUS: Tokenise(); Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0); break; case PPMM_MINUS: Tokenise(); Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0); break; default: Tree = ConstructASTLeaf(REF_IDENT, Entry->Type, Entry, 0); } return Tree; } /* * Handles the surrounding logic for all of the logical and semantic * prefixes. * * Prefixes are tokens that are affixed to the start of another, and * change behaviour in some way. These can be added calculations, * some form of transformation, or other. * * A current list of prefixes: * * !: Invert the boolean result of a statement or truthy value. * * ~: Invert the individual bits in a number * * -: Invert the number around the axis of 0 (negative->positive, positive->negative) * * ++: Increment a variable BEFORE it is returned. * NOTE: there is a postfix variant of this for incrementing AFTER the fact. * * --: Decrement a variable BEFORE it is returned. * NOTE: there is a postfix variant of this for decrementing AFTER the fact. * * &: Dereference the following object (Get the address that contains it) * * *: Get the object pointed at by the number following * * Planned prefixes: * * <<: Arithmetic-Shift-Left a variable by one (Multiply by two) * NOTE: there is a postfix variant of this for shifting right - dividing by two. * * @return the AST of this statement, plus its' prefixes and any postfixes. */ struct ASTNode* PrefixStatement() { struct ASTNode* Tree; switch (CurrentFile->CurrentSymbol.type) { case BOOL_INVERT: Tokenise(); Tree = PrefixStatement(); Tree->RVal = 1; Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0); break; case BIT_NOT: Tokenise(); Tree = PrefixStatement(); Tree->RVal = 1; Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0); break; case AR_MINUS: Tokenise(); Tree = PrefixStatement(); Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0); break; case PPMM_PLUS: Tokenise(); Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT) Die("++ not followed by identifier"); Tree = ConstructASTBranch(OP_PREINC, Tree->ExprType, Tree, NULL, 0); break; case PPMM_MINUS: Tokenise(); Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT) Die("-- not followed by identifier"); Tree = ConstructASTBranch(OP_PREDEC, Tree->ExprType, Tree, NULL, 0); break; case BIT_AND: Tokenise(); // To allow things like: // x = &&y; // We need to recursively parse prefixes; Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT) Die("& must be followed by another & or an identifier."); Tree->Operation = OP_ADDRESS; Tree->ExprType = PointerTo(Tree->ExprType); break; case AR_STAR: Tokenise(); Tree = PrefixStatement(); if (Tree->Operation != REF_IDENT && Tree->Operation != OP_DEREF) Die("* must be followed by another * or an identifier."); Tree = ConstructASTBranch(OP_DEREF, ValueAt(Tree->ExprType), Tree, NULL, 0); break; default: Tree = ParsePrimary(); } return Tree; }