diff --git a/include/Data.h b/include/Data.h index 04dcd84..ff7714a 100644 --- a/include/Data.h +++ b/include/Data.h @@ -18,6 +18,11 @@ extern_ struct SymbolTableEntry* Globals, *GlobalsEnd; extern_ struct SymbolTableEntry* Locals, *LocalsEnd; extern_ struct SymbolTableEntry* Params, *ParamsEnd; +extern_ struct SymbolTableEntry* Structs, *StructsEnd; +extern_ struct SymbolTableEntry* StructMembers, *StructMembersEnd; + +extern_ struct SymbolTableEntry* Unions, *UnionsEnd; +extern_ struct SymbolTableEntry* Enums, *EnumsEnd; extern_ bool OptDumpTree; extern_ bool OptKeepAssembly; diff --git a/include/Defs.h b/include/Defs.h index 970ddcf..27de1e9 100644 --- a/include/Defs.h +++ b/include/Defs.h @@ -92,7 +92,8 @@ enum TokenTypes { KW_ELSE, KW_WHILE, KW_FOR, - KW_RETURN + KW_RETURN, + KW_STRUCT }; /* @@ -178,7 +179,6 @@ struct ASTNode { union { int Size; // OP_SCALE's linear representation int IntValue; // TERM_INTLIT's Value - int ID; // LV_IDENT's Symbols[] index. }; }; @@ -215,6 +215,9 @@ struct SymbolTableEntry { enum StorageScope { SC_GLOBAL = 1, // Global Scope + SC_STRUCT, // Struct Definitions + SC_ENUM, // Enum Definitions + SC_MEMBER, // The members of Structs or Enums //SC_CLASS, // Class-local definitions //SC_STATIC, // Static storage definitions SC_PARAM, // Function parameters @@ -274,7 +277,7 @@ void DisplayUsage(char* ProgName); * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -int Tokenise(struct Token* Token); +void Tokenise(); void VerifyToken(int Type, char* TokenExpected); void RejectToken(struct Token* Token); @@ -355,14 +358,15 @@ struct ASTNode* PrintStatement(void); struct SymbolTableEntry* FindSymbol(char* Symbol); struct SymbolTableEntry* FindLocal(char* Symbol); struct SymbolTableEntry* FindGlobal(char* Symbol); +struct SymbolTableEntry* FindStruct(char* Symbol); +struct SymbolTableEntry* FindMember(char* Symbol); void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node); void FreeLocals(); void ClearTables(); -struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset); - +struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * C O N T R O L S T A T U S * * * * @@ -460,7 +464,7 @@ void AsFunctionEpilogue(struct SymbolTableEntry* Entry); * * * * D E C L A R A T I O N * * * * * * * * * * * * * * * * * * * * * * * * * * */ -struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope); +struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope); struct ASTNode* ParseIdentifier(void); struct ASTNode* IfStatement(); diff --git a/src/Delegate.c b/src/Delegate.c index 3547587..953a053 100644 --- a/src/Delegate.c +++ b/src/Delegate.c @@ -52,7 +52,7 @@ char* Compile(char* InputFile) { if(OptVerboseOutput) printf("Compiling %s\r\n", InputFile); - Tokenise(&CurrentToken); + Tokenise(); AssemblerPreamble(); diff --git a/src/Lexer.c b/src/Lexer.c index f1af0ea..eef5f28 100644 --- a/src/Lexer.c +++ b/src/Lexer.c @@ -11,10 +11,29 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * C H A R S T R E AM * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * The Lexer holds a "stream" of characters. + * You may read a character from the stream, and if it is not + * the desired character, it may be placed into an "overread" buffer. + * The overread buffer is checked before the source file is read any further. + * This provides an effective way to "un-read" a character. + * + * @param Char: The character to "un-read" + * + */ + static void ReturnCharToStream(int Char) { Overread = Char; } +/* + * NextChar allows you to ask the Lexer for the next useful character. + * As mentioned above, it checks the overread buffer first. + * + * @return the character as int + * + */ static int NextChar(void) { int Char; @@ -32,6 +51,10 @@ static int NextChar(void) { return Char; } +/* + * Searches for the next useful character, skipping whitespace. + * @return the character as int. + */ static int FindChar() { int Char; @@ -45,14 +68,31 @@ static int FindChar() { return Char; } +/* + * Allows the conversion between ASCII, hex and numerals. + * @param String: The set of all valid results + * @param Char: The ASCII character to convert + * @return the ASCII character in int form, if in the set of valid results. -1 if not. + */ + static int FindDigitFromPos(char* String, char Char) { char* Result = strchr(String, Char); return(Result ? Result - String : -1); } +/* + * Facilitates the easy checking of expected tokens. + * NOTE: there is (soon to be) an optional variant of this function that + * reads a token but does not consume it ( via Tokenise ) + * + * @param Type: The expected token, in terms of value of the TokenTypes enum. + * @param TokenExpected: A string to output when the token is not found. + * + */ + void VerifyToken(int Type, char* TokenExpected) { if(CurrentToken.type == Type) - Tokenise(&CurrentToken); + Tokenise(); else { printf("Expected %s on line %d\n", TokenExpected, Line); exit(1); @@ -61,6 +101,12 @@ void VerifyToken(int Type, char* TokenExpected) { static struct Token* RejectedToken = NULL; +/* + * Rejected Tokens and the Overread Stream are identical concepts. + * This was implemented first, but it is no longer used. + * TODO: Refactor this function out. + */ + void RejectToken(struct Token* Token) { if(RejectedToken != NULL) Die("Cannot reject two tokens in a row!"); @@ -72,6 +118,21 @@ void RejectToken(struct Token* Token) { * * * * L I T E R A L S A N D I D E N T I F I E R S * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * Facilitates the parsing of integer literals from the file. + * Currently only supports the decimal numbers, despite the + * FindDigitFromPos function allowing conversion. + * + * The functon loops over the characters, multiplying by 10 and adding + * the new value on top, until a non-numeric character is found. + * At that point, it returns the non-numeric character to the Overread Stream + * and returns the calculated number. + * + * @param Char: The first number to scan. + * @return the full parsed number as an int. + * + */ + static int ReadInteger(int Char) { int CurrentChar = 0; int IntegerValue = 0; @@ -86,7 +147,23 @@ static int ReadInteger(int Char) { return IntegerValue; } -// Variable identifier, keyword, function. +/* + * An Identifier can be any of: + * * A function name + * * A variable name + * * A struct name + * / A class name + * / An annotation name + * + * This function allows a full name to be read into a buffer, with a defined + * start character and a defined maximum text size limit. + * + * @param Char: The first char of the Identifier. + * @param Buffer: The location to store the Identifier. (usually CurrentIdentifer, a compiler global defined for this purpose) + * @param Limit: The maximum Identifer length. + * @return the length of the parsed identifier + * + */ static int ReadIdentifier(int Char, char* Buffer, int Limit) { int ind = 0; @@ -108,6 +185,17 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit) { return ind; } +/* + * Char literals appear as 'x' + * + * They are bounded by two apostrophes. + * They can contain any 1-byte ASCII character, as well as some + * predefined, standard escape codes. + * This function attempts to get the character from the file, with escape codes intact. + * + * @return the character as an int + * + */ static int ReadCharLiteral() { int Char; Char = NextChar(); @@ -131,7 +219,20 @@ static int ReadCharLiteral() { return Char; } - +/* + * String literals appear as "hello world" + * + * They are bounded by two quotation marks. + * They can contain an arbitrary length of text. + * They are backed by an array of chars (hence the char* type) and thus + * have a practically unlimited length. + * + * To read a String Literal, it is a simple matter of reading Char Literals until + * the String termination token is identified - the last quotation mark. + * + * @param Buffer: The buffer into which to write the string. (usually CurrentIdentifer, a compiler global defined for this purpose) + * + */ static int ReadStringLiteral(char* Buffer) { int Char; @@ -148,9 +249,18 @@ static int ReadStringLiteral(char* Buffer) { } /* - * This function is what defines the valid keywords for the language - * //TODO: move this to a static list? - * //TODO: More optimisations? + * Keywords are source-code tokens / strings that are reserved for the compiler. + * They cannot be used as identifers on their own. + * + * This function is where all of the keywords are added, and where most aliases are going to be stored. + * + * It uses a switch on the first character of the input string as an optimisation - rather than checking each + * keyword against the String individually, it only needs to compare a single number. This can be optimised into + * a hash table by the compiler for further optimisation, making this one of the fastest ways to switch + * on a full string. + * + * @param Str: The keyword input to try to parse + * @return the token expressed in terms of values of the TokenTypes enum * */ static int ReadKeyword(char* Str) { @@ -203,7 +313,6 @@ static int ReadKeyword(char* Str) { break; case 'p': - // This is a huge optimisation once we have as many keywords as a fully featured language. if(!strcmp(Str, "print")) return KW_PRINT; break; @@ -212,6 +321,11 @@ static int ReadKeyword(char* Str) { if(!strcmp(Str, "return")) return KW_RETURN; break; + + case 's': + if(!strcmp(Str, "struct")) + return KW_STRUCT; + break; case 'v': if(!strcmp(Str, "void")) @@ -234,8 +348,21 @@ static int ReadKeyword(char* Str) { * * * * T O K E N I S E R * * * * * * * * * * * * * * * * * * * * * * * * */ -int Tokenise(struct Token* Token) { +/* + * Handles the majority of the work of reading tokens into the stream. + * It reads chars with FindChar, categorizing individual characters or small + * strings into their proper expression (as a value of the TokenTypes enum) + * + * It also defers the reading of numeric literals and char literals to the proper functions. + * + * If needed, it can also read Identifiers, for variable or function naming. + * + * This function may be the main bottleneck in the lexer. + * + */ +void Tokenise() { int Char, TokenType; + struct Token* Token = &CurrentToken; if(RejectedToken != NULL) { Token = RejectedToken; @@ -439,7 +566,5 @@ int Tokenise(struct Token* Token) { DieChar("Unrecognized character", Char); } - - return 1; } diff --git a/src/Main.c b/src/Main.c index 70d8dde..b07888a 100644 --- a/src/Main.c +++ b/src/Main.c @@ -73,48 +73,54 @@ char* TokenNames[] = { "While keyword", "For keyword", - "Return keyword" + "Return keyword", + + "Struct keyword" }; int main(int argc, char* argv[]) { -/* Line = 1; - Overread = '\n'; - CurrentGlobal = 0; - struct ASTNode* Node; - CurrentLocal = SYMBOLS - 1; */ + // Option initialisers OptDumpTree = false; OptKeepAssembly = false; OptAssembleFiles = false; OptLinkFiles = true; OptVerboseOutput = false; + + // Temporary .o storage and counter char* ObjectFiles[100]; int ObjectCount = 0; + // Parse command line arguments. int i; - for(i = 1; i < argc; i++) { - if(*argv[i] != '-') // not a flag + for(i = 1/*skip 0*/; i < argc; i++) { + // If we're not a flag, we can skip. + // We only care about flags in rows. + // ie. erc >> -v -T -o << test.exe src/main.er + if(*argv[i] != '-') break; - + + // Once we identify a flag, we need to make sure it's not just a minus in-place. for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { + // Finally, identify what option is being invoked. switch(argv[i][j]) { - case 'o': + case 'o': // output OutputFileName = argv[++i]; break; - case 'T': + case 'T': // Debug OptDumpTree = true; break; - case 'c': + case 'c': // Compile only OptAssembleFiles = true; OptKeepAssembly = false; OptLinkFiles = false; break; - case 'S': + case 'S': // aSsemble only OptAssembleFiles = false; OptKeepAssembly = true; OptLinkFiles = false; break; - case 'v': + case 'v': // Verbose output OptVerboseOutput = true; break; default: @@ -123,29 +129,42 @@ int main(int argc, char* argv[]) { } } - if(i >= argc) + // If we didn't provide anything other than flags, we need to show how to use the program. + if(i >= argc) DisplayUsage(argv[0]); + // For the rest of the files specified, we can iterate them right to left. while(i < argc) { + // Compile the file by invoking the Delegate CurrentASMFile = Compile(argv[i]); if(OptLinkFiles || OptAssembleFiles) { + // If we need to assemble (or link, which requires assembly) + // then we invoke the Delegate again CurrentObjectFile = Assemble(CurrentASMFile); + // We can only keep track of 99 objects, so we should crash at 98 to ensure we have enough room for the output file too. if(ObjectCount == 98) { fprintf(stderr, "Too many inputs"); - return 1; + return 1; // We use return because we're in main, rather than invoking Die. } + + // Move the ObjectCount forward. ObjectFiles[ObjectCount++] = CurrentObjectFile; + // Clear the new, forwarded index ObjectFiles[ObjectCount] = NULL; } if(!OptKeepAssembly) + // unlink = delete unlink(CurrentASMFile); + i++; } if(OptLinkFiles) { + // If needed, invoke the Delegate one last time. Link(OutputFileName, ObjectFiles); if(!OptAssembleFiles) { + // Even though we need to assemble to link, we can respect the user's options and delete the intermediary files. for(i = 0; ObjectFiles[i] != NULL; i++) unlink(ObjectFiles[i]); } @@ -155,6 +174,11 @@ int main(int argc, char* argv[]) { } +/* + * Akin to a Halt and Catch Fire method. + * Simply prints an error, cleans up handles, and closes. + */ + void Die(char* Error) { fprintf(stderr, "%s on line %d\n", Error, Line); fclose(OutputFile); @@ -162,6 +186,9 @@ void Die(char* Error) { exit(1); } +/* + * A variant of Die with an extra String attached. + */ void DieMessage(char* Error, char* Reason) { fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line); fclose(OutputFile); @@ -169,6 +196,9 @@ void DieMessage(char* Error, char* Reason) { exit(1); } +/* + * A variant of Die with an extra integer attached. + */ void DieDecimal(char* Error, int Number) { fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line); fclose(OutputFile); @@ -176,6 +206,9 @@ void DieDecimal(char* Error, int Number) { exit(1); } +/* + * A variant of Die with an extra character attached. + */ void DieChar(char* Error, int Char) { fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line); fclose(OutputFile); diff --git a/src/Parser.c b/src/Parser.c index fecf4b0..52691f1 100644 --- a/src/Parser.c +++ b/src/Parser.c @@ -10,12 +10,10 @@ #include "Data.h" /* - * Precedence is directly related to Token Type. - * - * enum TokenTypes { - * LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT - * }; - * + * The Precedence of an operator is directly related to Token Type. + * Precedence determines how soon the operator and its surrounding values + * will be calculated and aliased. + * This allows for things like the common Order of Operations. */ static int Precedence[] = { 0, 10, // EOF, ASSIGN @@ -30,6 +28,13 @@ static int Precedence[] = { 110 // / }; +/* + * Handles gathering the precedence of an operator from its token, + * in terms of values of the TokenTypes enum. + * + * Error handling is also done here, so that EOF or non-operators are not executed. + * + */ static int OperatorPrecedence(int Token) { int Prec = Precedence[Token]; @@ -40,6 +45,13 @@ static int OperatorPrecedence(int Token) { return Prec; } +/* + * If the value is a right-expression, or in other words is right associative, + * then it can be safely calculated beforehand and aliased to a value. + * In this case, we can try to alias (or constant fold) everything on the right side + * of an assignment. + */ + static int IsRightExpr(int Token) { return (Token == LI_EQUAL); } @@ -48,6 +60,29 @@ static int IsRightExpr(int Token) { * * * N O D E C O N S T R U C T I O N * * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * ASTNodes form the structure of the language that moves the bulk of + * data around within the compiler. + * They contain: + * * An Operation (usually 1:1 with an input token), + * * A Type (to identify the size of data it contains), + * * Two more Left and Right ASTNodes (to form a doubly-linked list) + * * An extra Middle ASTNode in case it is needed (typically in the middle case of a For loop) + * * A Symbol Table Entry + * * An Integer Value + * * A flag to determine whether this node (and its sub-nodes) contain a right associative or Rval + * + * This is the only function where they are constructed. + * + * @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum + * @param Type: The data type of this Node, in terms of values of the DataTypes enum. + * @param Left: The Node that is attached to the left side branch of this root. + * @param Middle: The Node that is attached to the middle of this root, if applicable. + * @param Right: The Node that is attached to the right side branch of this root. + * @param Symbol: The Symbol Table Entry that represents this Node, if applicable. + * @param IntValue: The integer value encoded by this Node, if applicable. + * @return a newly constructed AST Node + */ struct ASTNode* ConstructASTNode(int Operation, int Type, struct ASTNode* Left, struct ASTNode* Middle, @@ -75,10 +110,28 @@ struct ASTNode* ConstructASTNode(int Operation, int Type, } +/* + * AST Leaves are categorized by their lack of child nodes. + * @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum + * @param Type: The data type of this Node, in terms of values of the DataTypes enum. + * @param Symbol: The Symbol Table Entry that represents this Node, if applicable. + * @param IntValue: The integer value encoded by this Node, if applicable. + * @return a newly constructed AST Node + */ struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) { return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue); } +/* + * AST Branches are categorized by having only one child node. + * These are sometimes called Unary Branches. + * @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum + * @param Type: The data type of this Node, in terms of values of the DataTypes enum. + * @param Left: The Node that is attached to the left side branch of this root. + * @param Symbol: The Symbol Table Entry that represents this Node, if applicable. + * @param IntValue: The integer value encoded by this Node, if applicable. + * @return a newly constructed AST Node + */ struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) { return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue); } @@ -89,10 +142,10 @@ struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left * * * * * * * * * * * * * * * * * * * * * * * */ /* - * Take a Token Type, and convert it to an AST-Node Operation. - * - * TokenTypes and SyntaxOps are synchronized to make this easy. - * + * TokenTypes and SyntaxOps are mostly 1:1, so some minor effort can ensure that + * these are synchronized well. + * This allows the parsing operation to be little more than a bounds check. + * Otherwise, this would be a gigantic switch statement. */ int ParseTokenToOperation(int Token) { @@ -103,11 +156,13 @@ int ParseTokenToOperation(int Token) { } /* - * Parse a primary (terminal) expression. - * This currently handles literal expressions, constructing a leaf node - * and handing control back up the chain. - * + * Primary expressions may be any one of: + * * A terminal integer literal + * * A terminal string literal + * * A variable + * * A collection of expressions bounded by parentheses. * + * @return the AST Node that represents this expression */ struct ASTNode* ParsePrimary(void) { @@ -134,7 +189,7 @@ struct ASTNode* ParsePrimary(void) { case LI_LPARE: // Starting a ( expr ) block - Tokenise(&CurrentToken); + Tokenise(); Node = ParsePrecedenceASTNode(0); @@ -144,12 +199,26 @@ struct ASTNode* ParsePrimary(void) { } - Tokenise(&CurrentToken); + Tokenise(); return Node; } +/* + * Parse a single binary expression. + * It ensures that these expressions are parsed to their full extent, that + * the order of operations is upheld, that the precedence of the prior + * iteration is considered, and that every error is handled. + * + * This is where all of the right-associative statements are folded, where + * type mismatches and widening are handled properly, and that all parsing + * is over by the time the end tokens ") } ] ;" are encountered. + * + * @param PreviousTokenPrecedence: The precedence of the operator to the left. + * @return the AST Node corresponding to this block. + * + */ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { struct ASTNode* LeftNode, *RightNode; struct ASTNode* LeftTemp, *RightTemp; @@ -159,25 +228,19 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { LeftNode = PrefixStatement(); NodeType = CurrentToken.type; - //printf("%d\r\n", CurrentToken.type); + if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) { - //printf("Current token matches ; ) ]\r\n"); LeftNode->RVal = 1; return LeftNode; } - //printf("Current token has value %d, type %s\n", CurrentToken.value, TokenNames[CurrentToken.type]); while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) { - //printf("inside while\n"); - Tokenise(&CurrentToken); + Tokenise(); if(CurrentToken.type == LI_RPARE) break; RightNode = ParsePrecedenceASTNode(Precedence[NodeType]); - /* - LeftType = LeftNode->ExprType; - RightType = RightNode->ExprType; - */ + /** * While parsing this node, we may need to widen some types. * This requires a few functions and checks. @@ -194,9 +257,6 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { if(LeftNode == NULL) Die("Incompatible Expression encountered in assignment"); - - //printf("\tAssigning variable: %s value %d\n", Symbols[FindSymbol(CurrentIdentifier)].Name, RightNode->Value.IntValue); - // LeftNode holds the target, the target variable in this case printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name); @@ -212,11 +272,9 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { LeftNode->RVal = 1; RightNode->RVal = 1; - //printf("mutate left\r\n"); LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType); - //printf("mutate right\r\n"); + RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType); - //printf("mutate right over\r\n"); /** * If both are null, the types are incompatible. */ @@ -267,105 +325,21 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { } -/* struct ASTNode* ParseMultiplicativeASTNode(void) { - struct ASTNode* LeftNode, * RightNode; - int NodeType; - - LeftNode = ParsePrimary(); - - NodeType = CurrentToken.type; - if(NodeType == LI_EOF) - return LeftNode; - - while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) { - Tokenise(&CurrentToken); - - RightNode = ParsePrimary(); - - LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0); - - NodeType = CurrentToken.type; - if(NodeType == LI_EOF) - break; - } - - return LeftNode; -} - */ -/* struct ASTNode* ParseAdditiveASTNode(void) { - struct ASTNode* LeftNode, * RightNode; - int NodeType; - - LeftNode = ParseMultiplicativeASTNode(); - - NodeType = CurrentToken.type; - if(NodeType == LI_EOF) - return LeftNode; - - while(1) { - Tokenise(&CurrentToken); - - RightNode = ParseMultiplicativeASTNode(); - - LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0); - - NodeType = CurrentToken.type; - if(NodeType == LI_EOF) - break; - } - - return LeftNode; -} - */ - - -/* * * * * * * * * * * * * * * * * * * * * * * * - * * * * I N T E R P R E T A T I O N * * * * - * * * * * * * * * * * * * * * * * * * * * * * */ - -/* -int ParseAST(struct ASTNode* Node) { - - - int LeftVal, RightVal; - - if(Node->Left) - LeftVal = ParseAST(Node->Left); - - if(Node->Right) - RightVal = ParseAST(Node->Right); - - /* - if(Node->Operation == TERM_INTLITERAL) - printf("int %d\n", Node->IntValue); - else - printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal); - - - switch(Node->Operation) { - case OP_ADD: - return (LeftVal + RightVal); - case OP_SUBTRACT: - return (LeftVal - RightVal); - case OP_MULTIPLY: - return (LeftVal * RightVal); - case OP_DIVIDE: - return (LeftVal / RightVal); - - case REF_IDENT: - case TERM_INTLITERAL: - return Node->Value.IntValue; - default: - fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation); - exit(1); - } -} -*/ /* * * * * * * * * * * * * * * * * * * * * * * * * F U N C T I O N S * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * Handles the logic for calling a function. + * This is invoked by an identifier being recognized, followed by a "(.*)" string. + * + * It simply checks that the function exists, that the parameters given are valid, + * and generates the AST Node for calling it. + * + * @return the AST Node for calling the function stored in CurrentIdentifer + * + */ struct ASTNode* CallFunction() { struct ASTNode* Tree; struct SymbolTableEntry* Function; @@ -385,6 +359,21 @@ struct ASTNode* CallFunction() { return Tree; } + +/* + * An expression list is used: + * * In the call to a function + * + * It is parsed by seeking left parentheses "(", parsing binary expressions + * until either a comma or a right parentheses is found. + * + * The former will cause another expression to be parsed, the latter will cause + * parsing to stop. + * + * @return the AST Node representing every expression in the list, glued end to + * end with a COMPOSITE operation. + * + */ struct ASTNode* GetExpressionList() { struct ASTNode* Tree = NULL, *Child = NULL; int Count; @@ -397,7 +386,7 @@ struct ASTNode* GetExpressionList() { switch(CurrentToken.type) { case LI_COM: - Tokenise(&CurrentToken); + Tokenise(); break; case LI_RPARE: break; @@ -414,6 +403,18 @@ struct ASTNode* GetExpressionList() { * * * * S T A T E M E N T S * * * * * * * * * * * * * * * * * * * * * * * * * */ +/* + * Handles parsing an individual statement. + * + * It serves as a wrapper around: + * * If Statement + * * While Statement + * * For Statement + * * Return Statement + * * Numeric literals and variables + * * Binary Expressions + * @return the AST Node representing this single statement + */ struct ASTNode* ParseStatement(void) { int Type; @@ -425,18 +426,10 @@ struct ASTNode* ParseStatement(void) { printf("\t\tNew Variable: %s\n", CurrentIdentifier); Type = ParseOptionalPointer(); VerifyToken(TY_IDENTIFIER, "ident"); - BeginVariableDeclaration(Type, SC_LOCAL); + BeginVariableDeclaration(Type, NULL, SC_LOCAL); VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment? return NULL; - - /*case TY_IDENTIFIER: - if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC) - printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name); - else - printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name); - - return ParseIdentifier(); - */ + case KW_IF: return IfStatement(); @@ -451,11 +444,26 @@ struct ASTNode* ParseStatement(void) { default: ParsePrecedenceASTNode(0); - //DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type); } } +/* + * Handles parsing multiple statements or expressions in a row. + * These are typically grouped together with the Compound tokens "{ }" + * and seperated by the semicolon ";". + * + * Single Statements are parsed until a semicolon is reached, at which + * point another statement will be parsed, or until a Right Compound + * token is reached ("}"), at which point parsing will stop. + * + * It is useful for: + * * Tightly identifying related blocks of code + * * Containing the many statements of functions + * + * @return the AST Node representing this compound statement + * + */ struct ASTNode* ParseCompound() { struct ASTNode* Left = NULL, *Tree; @@ -486,6 +494,21 @@ struct ASTNode* ParseCompound() { } } +/* + * This is the entry point to the parser/lexer. + * + * By definition, Global definitions are accessible anywhere. + * As of right now (20/01/2021), classe are unimplemented. + * This means that all functions and all function prototypes are globally scoped. + * + * You may also define variables, constants, preprocessor directives and other text + * in the global scope. + * + * The function itself loops, parsing either variables or functions, until it + * reaches the end of the file. + * + */ + void ParseGlobals() { struct ASTNode* Tree; int Type, FunctionComing; @@ -516,7 +539,7 @@ void ParseGlobals() { } } else { printf("\tParsing global variable declaration\n"); - BeginVariableDeclaration(Type, SC_GLOBAL); + BeginVariableDeclaration(Type, NULL, SC_GLOBAL); VerifyToken(LI_SEMIC, ";"); } diff --git a/src/Pointers.c b/src/Pointers.c index 002d8cb..171b1a6 100644 --- a/src/Pointers.c +++ b/src/Pointers.c @@ -22,22 +22,30 @@ int ValueAt(int Type) { return (Type - 1); } -int ParseOptionalPointer() { +int ParseOptionalPointer(struct SymbolTableEntry** Composite) { int Type; switch(CurrentToken.type) { case TY_VOID: Type = RET_VOID; + Tokenise(); break; case TY_CHAR: Type = RET_CHAR; + Tokenise(); break; case TY_INT: Type = RET_INT; + Tokenise(); break; case TY_LONG: Type = RET_LONG; + Tokenise(); + break; + case KW_STRUCT: + Type = DAT_STRUCT; + *Composite = BeginStructDeclaration(); break; default: DieDecimal("Illegal type for pointerisation", CurrentToken.type); @@ -47,12 +55,13 @@ int ParseOptionalPointer() { // x = **y; // possible. while(1) { - Tokenise(&CurrentToken); + Tokenise(); printf("\t\t\tType on parsing is %d\n", CurrentToken.type); if(CurrentToken.type != AR_STAR) break; Type = PointerTo(Type); + Tokenise(); } return Type; @@ -68,7 +77,7 @@ struct ASTNode* AccessArray() { LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0); //printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]); - Tokenise(&CurrentToken); + Tokenise(); //printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]); RightNode = ParsePrecedenceASTNode(0); diff --git a/src/Statements.c b/src/Statements.c index 1aa4b89..9cf9161 100644 --- a/src/Statements.c +++ b/src/Statements.c @@ -8,9 +8,27 @@ #include #include -static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) { +/* + * Handles reading in a comma-separated list of declarations. + * Erythro treats structs, enums and function parameters the same in this regard - + * comma separated. + * + * C and C++ tend to treat enums and structs differently - the former separated by commas, + * the latter separated by semicolons. + * + * Note that since functions are read in through parentheses, and structs/enums are read in + * through brackets, the end character is configurable. + * + * @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable. + * @param Storage: The Storage Scope of this declaration list. + * @param End: The end token, in terms of TokenTypes enum values. + * @return the amount of declarations read in. + * + */ + +static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) { int TokenType, ParamCount = 0; - struct SymbolTableEntry* PrototypePointer = NULL; + struct SymbolTableEntry* PrototypePointer = NULL, *Composite; if(FunctionSymbol != NULL) PrototypePointer = FunctionSymbol->Start; @@ -24,19 +42,15 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) { DieDecimal("Function paramater of invalid type at index", ParamCount + 1); PrototypePointer=PrototypePointer->NextSymbol; } else { - BeginVariableDeclaration(TokenType, SC_PARAM); + BeginVariableDeclaration(TokenType, Composite, Storage); } ParamCount++; - switch(CurrentToken.type) { - case LI_COM: - Tokenise(&CurrentToken); - break; - case LI_RPARE: - break; - default: - DieDecimal("Unexpected token in parameter", CurrentToken.type); - } + if((CurrentToken.type != LI_COM) && (CurrentToken.type != End)) + DieDecimal("Unexpected token in parameter", CurrentToken.type); + + if(CurrentToken.type == LI_COM) + Tokenise(); } if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length)) @@ -45,6 +59,61 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) { return ParamCount; } +/* + * Handles the declaration of a new struct. + * struct thisStct { int x, int y, int z }; + * + * Verifies that the current identifier is not used, + * verifies that this is not a redefinition (excluding + * the case where there is a declaration but no definition) + * and then saves it into the Structs symbol table. + * + * @return the Symbol Table entry of this new struct. + */ + +struct SymbolTableEntry* BeginStructDeclaration() { + struct SymbolTableEntry* Composite = NULL, *Member; + int Offset; + + Tokenise(); + + if(CurrentToken.type == TY_IDENTIFIER) { + Composite = FindStruct(CurrentIdentifier); + Tokenise(); + } + + if(CurrentToken.type != LI_LBRAC) { + if(Composite == NULL) + DieMessage("Unknown Struct", CurrentIdentifier); + return Composite; + } + + if(Composite) + DieMessage("Redefinition of struct", CurrentIdentifier); + + Composite = AddSymbol(CurrentIdentifier, DAT_STRUCT, 0, SC_STRUCT, 0, 0, NULL); + Tokenise(); + + ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAS); + VerifyToken(LI_RBRAS, "]"); + + Composite->Start = StructMembers; + StructMembers = StructMembersEnd = NULL; + + Member = Composite->Start; + Member->SinkOffset = 0; + Offset = TypeSize(Member->Type, Member->CompositeType); + + for(Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) { + Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1); + + Offset += TypeSize(Member->Type, Member->CompositeType); + } + + Composite->Length = Offset; + return Composite; +} + /* * Handles the declaration of a type of a variable. * int newVar; @@ -52,11 +121,12 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) { * It verifies that we have a type keyword followed by a * unique, non-keyword identifier. * - * It then stores this variable into the symbol table, + * It then stores this variable into the appropriate symbol table, * and returns the new item. * + * @return the Symbol Table entry of this new variable. */ -struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) { +struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) { struct SymbolTableEntry* Symbol = NULL; switch(Scope) { @@ -66,33 +136,50 @@ struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) { case SC_LOCAL: case SC_PARAM: if(FindLocal(CurrentIdentifier) != NULL) - DieMessage("Invalid redelcaration of local variable", CurrentIdentifier); + DieMessage("Invalid redeclaration of local variable", CurrentIdentifier); + case SC_MEMBER: + if(FindMember(CurrentIdentifier) != NULL) + DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier); } if(CurrentToken.type == LI_LBRAS) { - Tokenise(&CurrentToken); + Tokenise(); if(CurrentToken.type == LI_INT) { switch(Scope) { case SC_GLOBAL: - Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0); + Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL); break; case SC_LOCAL: case SC_PARAM: + case SC_MEMBER: Die("Local arrays are unimplemented"); } } - Tokenise(&CurrentToken); + Tokenise(); VerifyToken(LI_RBRAS, "]"); } else { - Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0); + Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite); } return Symbol; } +/* + * Handles the declaration of a new function. + * Verifies that the identifier is not taken (excluding the case + * where there is a declaration but no definition) + * Parses the list of parameters if present + * Saves the function prototype if there is no body + * Generates and saves the break-out point label + * + * @param Type: The return type of the function + * @return the AST for this function + * + */ + struct ASTNode* ParseFunction(int Type) { struct ASTNode* Tree; struct ASTNode* FinalStatement; @@ -104,7 +191,7 @@ struct ASTNode* ParseFunction(int Type) { OldFunction = NULL; if(OldFunction == NULL) { BreakLabel = NewLabel(); - NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0); + NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL); } VerifyToken(LI_LPARE, "("); @@ -120,7 +207,7 @@ struct ASTNode* ParseFunction(int Type) { Params = ParamsEnd = NULL; if(CurrentToken.type == LI_SEMIC) { - Tokenise(&CurrentToken); + Tokenise(); return NULL; } @@ -149,7 +236,6 @@ struct ASTNode* ParseFunction(int Type) { * //TODO: No brackets * //TODO: Type inference * - * */ struct ASTNode* ReturnStatement() { @@ -166,19 +252,10 @@ struct ASTNode* ReturnStatement() { Tree = ParsePrecedenceASTNode(0); - /* - ReturnType = Tree->ExprType; - FunctionType = Symbols[CurrentFunction].Type; - */ - Tree = MutateType(Tree, FunctionEntry->Type, 0); if(Tree == NULL) Die("Returning a value of incorrect type for function"); - /* - if(ReturnType) - Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0); - */ Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0); @@ -189,59 +266,33 @@ struct ASTNode* ReturnStatement() { return Tree; } + + /* - * Handles Identifiers. + * Handles the surrounding logic for If statements. * - * This is called for any of: - * - Calling a function - * - Assigning an lvalue variable - * - Performing arithmetic on a variable - * - Performing arithmetic with the return values of function calls + * If statements have the basic form: + * * if (condition) body + * * if (condition) + * body + * * if (condition) { + * body + * } * - * For the case where you're assigning an l-value; - * You can assign with another assignment, - * a statement, a function or a literal. + * Conditions may be any truthy statement (such as a pointer, + * object, integer), as conditions not recognized are auto- + * matically converted to booleans. + * + * This meaning, any object that can be resolved to 0 or NULL + * can be placed as the condition and used as a check. + * + * For example: + * struct ASTNode* Node = NULL; + * if(Node) { + * // This will not run, as Node is ((void*)0) + * } * */ - -/* -struct ASTNode* ParseIdentifier() { - struct ASTNode* Left, *Right, *Tree; - int LeftType, RightType; - int ID; - - VerifyToken(TY_IDENTIFIER, "ident"); - - printf("\t\tAfter parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier)); - - if(CurrentToken.type == LI_LPARE) - return CallFunction(); - - if((ID = FindSymbol(CurrentIdentifier)) == -1) { - printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name); - DieMessage("Undeclared Variable ", CurrentIdentifier); - } - Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID); - - VerifyToken(LI_EQUAL, "="); - - Left = ParsePrecedenceASTNode(0); - - LeftType = Left->ExprType; - RightType = Right->ExprType; - - Left = MutateType(Left, RightType, 0); - if(!Left) - Die("Incompatible types in assignment"); - - if(LeftType) - Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0); - - Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0); - - return Tree; -}*/ - struct ASTNode* IfStatement() { struct ASTNode* Condition, *True, *False = NULL; @@ -261,13 +312,39 @@ struct ASTNode* IfStatement() { True = ParseCompound(); if(CurrentToken.type == KW_ELSE) { - Tokenise(&CurrentToken); + Tokenise(); False = ParseCompound(); } return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0); } +/* + * Handles the surrounding logic for While loops. + * + * While loops have the basic form: + * while ( condition ) { body } + * + * When reaching the condition (which alike an If statement, + * can be any truthy value), if it resolves to true: + * The body is executed, and immediately the condition is checked + * again. + * This repeats until the condition resolves false, at which point + * the loop executes no more. + * + * This can be prototyped as the following pseudo-assembler: + * + * cond: + * check + * jne exit + * + * jump cond + * exit: + * + * + * @return the AST of this statement + * + */ struct ASTNode* WhileStatement() { struct ASTNode* Condition, *Body; @@ -287,12 +364,36 @@ struct ASTNode* WhileStatement() { return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0); } +/* + * Handles the surrounding logic for For loops. + * + * They have the basic form of: + * for ( init ; condition; iterator) { body } + * + * The initialiser is run only once upon reaching the for loop. + * Then the condition is checked, and if true, the body is executed. + * After execution of the body, the iterator is run and the condition + * checked again. + * + * It can be prototyped as the following pseudo-assembler code: + * + * for: + * + * cond: + * check + * jne exit + * + * + * jump cond + * exit: + * + * + * In the case of the implementation, "init" is the preoperator, + * "iterator" is the postoperator. + * + * @return the AST of this statement + */ struct ASTNode* ForStatement() { - - // for (preop; condition; postop) { - // body - //} - struct ASTNode* Condition, *Body; struct ASTNode* Preop, *Postop; @@ -326,6 +427,18 @@ struct ASTNode* ForStatement() { return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0); } + +/* + * Handles the surrounding logic for the Print statement. + * + * This is a legacy hold-over from the early testing, and it + * serves merely as a wrapper around the cstdlib printf function. + * + * It does, however (//TODO), attempt to guess the type that you + * want to print, which takes a lot of the guesswork out of printing. + * + * @return the AST of this statement + */ struct ASTNode* PrintStatement(void) { struct ASTNode* Tree; int LeftType, RightType; @@ -342,7 +455,7 @@ struct ASTNode* PrintStatement(void) { DieDecimal("Attempting to print an invalid type:", RightType); if(RightType) - Tree = ConstructASTBranch(RightType, RET_INT, Tree, NULL, 0); + Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0); Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0); @@ -352,17 +465,34 @@ struct ASTNode* PrintStatement(void) { } +/* + * Handles the surrounding logic for all of the logical and semantic + * postfixes. + * + * Postfixes are tokens that are affixed to the end of another, and + * change behaviour in some way. These can be added calculations, + * some form of transformation, or other. + * + * A current list of postfixes: + * * (): Call a function + * * []: Index or define an array. + * * ++: Increment a variable AFTER it is returned + * NOTE: there is a prefix variant of this for incrementing BEFOREhand. + * * --: Decrement a variable AFTER it is returned + * NOTE: there is a prefix variant of this for decrementing BEFOREhand. + * + * Planned postfixes: + * * >>: Arithmetic-Shift-Right a variable by one (Divide by two) + * NOTE: there is a prefix variant of this for shifting left - multiplying by two. + * + * @return the AST of the statement plus its' postfix + */ struct ASTNode* PostfixStatement() { struct ASTNode* Tree; struct SymbolTableEntry* Entry; - Tokenise(&CurrentToken); + Tokenise(); - // If we get here, we're one of three things: - // - Function - // - Array - // - Variable - if(CurrentToken.type == LI_LPARE) return CallFunction(); @@ -370,8 +500,8 @@ struct ASTNode* PostfixStatement() { return AccessArray(); // If we get here, we must be a variable. - // There's no guarantees that the variable is in - // the symbol table, though. + // (as functions have been called and arrays have been indexed) + // Check that the variable is recognized.. if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR) DieMessage("Unknown Variable", CurrentIdentifier); @@ -380,11 +510,11 @@ struct ASTNode* PostfixStatement() { switch(CurrentToken.type) { case PPMM_PLUS: - Tokenise(&CurrentToken); + Tokenise(); Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0); break; case PPMM_MINUS: - Tokenise(&CurrentToken); + Tokenise(); Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0); break; default: @@ -395,33 +525,58 @@ struct ASTNode* PostfixStatement() { } +/* + * Handles the surrounding logic for all of the logical and semantic + * prefixes. + * + * Prefixes are tokens that are affixed to the start of another, and + * change behaviour in some way. These can be added calculations, + * some form of transformation, or other. + * + * A current list of prefixes: + * * !: Invert the boolean result of a statement or truthy value. + * * ~: Invert the individual bits in a number + * * -: Invert the number around the axis of 0 (negative->positive, positive->negative) + * * ++: Increment a variable BEFORE it is returned. + * NOTE: there is a postfix variant of this for incrementing AFTER the fact. + * * --: Decrement a variable BEFORE it is returned. + * NOTE: there is a postfix variant of this for decrementing AFTER the fact. + * * &: Dereference the following object (Get the address that contains it) + * * *: Get the object pointed at by the number following + * + * Planned prefixes: + * * <<: Arithmetic-Shift-Left a variable by one (Multiply by two) + * NOTE: there is a postfix variant of this for shifting right - dividing by two. + * + * @return the AST of this statement, plus its' prefixes and any postfixes. + */ struct ASTNode* PrefixStatement() { struct ASTNode* Tree; switch (CurrentToken.type) { case BOOL_INVERT: - Tokenise(&CurrentToken); + Tokenise(); Tree = PrefixStatement(); Tree->RVal = 1; Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0); break; case BIT_NOT: - Tokenise(&CurrentToken); + Tokenise(); Tree = PrefixStatement(); Tree->RVal = 1; Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0); break; case AR_MINUS: - Tokenise(&CurrentToken); + Tokenise(); Tree = PrefixStatement(); Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0); break; case PPMM_PLUS: - Tokenise(&CurrentToken); + Tokenise(); Tree = PrefixStatement(); if(Tree->Operation != REF_IDENT) @@ -430,7 +585,7 @@ struct ASTNode* PrefixStatement() { break; case PPMM_MINUS: - Tokenise(&CurrentToken); + Tokenise(); Tree = PrefixStatement(); if(Tree->Operation != REF_IDENT) @@ -440,7 +595,7 @@ struct ASTNode* PrefixStatement() { break; case BIT_AND: - Tokenise(&CurrentToken); + Tokenise(); // To allow things like: // x = &&y; @@ -454,7 +609,7 @@ struct ASTNode* PrefixStatement() { Tree->ExprType = PointerTo(Tree->ExprType); break; case AR_STAR: - Tokenise(&CurrentToken); + Tokenise(); Tree = PrefixStatement(); diff --git a/src/Symbols.c b/src/Symbols.c index 780ae4b..7a29d7d 100644 --- a/src/Symbols.c +++ b/src/Symbols.c @@ -78,6 +78,28 @@ struct SymbolTableEntry* FindGlobal(char* Symbol) { return SearchList(Symbol, Globals); } +/* + * An override for FindSymbol. + * Searches only the defined Structs. + * @param Symbol: The string name of the symbol to search for. + * @return a pointer to the node if found, else NULL + * + */ +struct SymbolTableEntry* FindStruct(char* Symbol) { + return SearchList(Symbol, Structs); +} + +/* + * An override for FindSymbol. + * Searches only the defined Struct & Enum Members. + * @param Symbol: The string name of the symbol to search for. + * @return a pointer to the node if found, else NULL + * + */ +struct SymbolTableEntry* FindMember(char* Symbol) { + return SearchList(Symbol, StructMembers); +} + /* * Given a particular linked list, * Take Node and append it to the Tail. @@ -112,6 +134,7 @@ void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail void FreeLocals() { Locals = LocalsEnd = NULL; Params = ParamsEnd = NULL; + FunctionEntry = NULL; } @@ -122,6 +145,8 @@ void ClearTables() { Globals = GlobalsEnd = NULL; Locals = LocalsEnd = NULL; Params = ParamsEnd = NULL; + StructMembers = StructMembersEnd = NULL; + Structs = StructsEnd = NULL; } @@ -136,34 +161,7 @@ void ClearTables() { * * @return The SymbolTableEntry* pointer that corresponds to this newly constructed node. */ -struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset) { - - /* int TableSlot; - int SinkOffset = 0; - - if((TableSlot = FindSymbolImpl(Name, Storage)) != -1) - return -1; - - // Instaed of spliting this up into AddLocalSymbol and AddGlobalSymbol, - // we can use this switch to avoid duplicated code. - switch(Storage) { - case SC_PARAM: - // Instead of special casing parameters, we can just add these to the symbol lists and be done with it. - printf("\tPreparing new parameter %s of type %s\r\n", Name, TypeNames[Type]); - TableSlot = AddSymbol(Name, Type, Structure, SC_GLOBAL, 88, 1); - Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the global process - TableSlot = AddSymbol(Name, Type, Structure, SC_LOCAL, 88, 1); - Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the local process - return TableSlot; - case SC_GLOBAL: - TableSlot = NewGlobalSymbol(); - break; - case SC_LOCAL: - printf("\tCreating new local symbol %s\r\n", Name); - TableSlot = NewLocalSymbol(); - SinkOffset = AsCalcOffset(Type); - break; - } */ +struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType) { struct SymbolTableEntry* Node = (struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry)); @@ -174,33 +172,28 @@ struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Stor Node->Storage = Storage; Node->Length = Length; Node->SinkOffset = SinkOffset; + Node->CompositeType = CompositeType; switch(Storage) { case SC_GLOBAL: AppendSymbol(&Globals, &GlobalsEnd, Node); + // We don't want to generate a static block for functions. if(Structure != ST_FUNC) AsGlobalSymbol(Node); break; + case SC_STRUCT: + AppendSymbol(&Structs, &StructsEnd, Node); + break; + case SC_MEMBER: + AppendSymbol(&StructMembers, &StructMembersEnd, Node); case SC_LOCAL: AppendSymbol(&Locals, &LocalsEnd, Node); break; case SC_PARAM: AppendSymbol(&Params, &ParamsEnd, Node); break; + } - /* // NOTE: Generating global symbol names must happen AFTER the name and type are declared. - switch(Storage) { - case SC_GLOBAL: - printf("\tCreating new global symbol %s into slot %d\r\n", Name, TableSlot); - if(Structure != ST_FUNC && EndLabel != 88) { // Magic keyword so that we don't generate ASM globals for parameters - printf("\t\tGenerating data symbol.\r\n"); - AsGlobalSymbol(TableSlot); - } - break; - case SC_LOCAL: - break; - } */ - - //printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot); + return Node; } \ No newline at end of file