From 37cdaacc71a3c4d82abe8028801c5666e4670f89 Mon Sep 17 00:00:00 2001 From: Curle Date: Sat, 5 Mar 2022 01:13:45 +0000 Subject: [PATCH] Begin refactor for multiplexed compilation --- .gitignore | 4 +++ CMakeLists.txt | 3 +- include/Data.h | 28 ++++++++++------- include/Defs.h | 81 ++++++++++++++++++++++++++++++++++++++++++------ src/Assembler.c | 8 ++--- src/Delegate.c | 40 ++++++++++++------------ src/Dump.c | 5 +-- src/Importer.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ src/Lexer.c | 17 +++++----- src/Main.c | 53 +++++++++++++------------------ src/Parser.c | 38 +++++++++++++---------- src/Pointers.c | 21 ++++++++----- src/Statements.c | 50 +++++++++++++++--------------- src/Symbols.c | 14 ++++----- src/Types.c | 2 +- 15 files changed, 295 insertions(+), 144 deletions(-) create mode 100644 src/Importer.c diff --git a/.gitignore b/.gitignore index e42c6f5..c5707cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ .idea .vscode +.cache out bin +build +Erythro +test \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index e56db3c..21726e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,4 +17,5 @@ add_executable(Erythro src/Pointers.c src/Statements.c src/Symbols.c - src/Types.c) + src/Types.c + src/Importer.c) diff --git a/include/Data.h b/include/Data.h index e3e484f..7cdfcec 100644 --- a/include/Data.h +++ b/include/Data.h @@ -16,42 +16,48 @@ #define TEXTLEN 512 #define SYMBOLS 1024 +// All currently open source files. +extern_ struct FileData** Files; +// The source file currently being operated on. +extern_ struct FileData* CurrentFile; +// The file we are writing into; CurrentFile -> OutputFile +extern_ FILE* OutputFile; + +// Symbol tables. extern_ struct SymbolTableEntry* Globals, * GlobalsEnd; extern_ struct SymbolTableEntry* Locals, * LocalsEnd; extern_ struct SymbolTableEntry* Params, * ParamsEnd; extern_ struct SymbolTableEntry* Structs, * StructsEnd; - extern_ struct SymbolTableEntry* CompositeMembers, * CompositeMembersEnd; extern_ struct SymbolTableEntry* EnumMembers, * EnumMembersEnd; - extern_ struct SymbolTableEntry* Unions, * UnionsEnd; extern_ struct SymbolTableEntry* Enums, * EnumsEnd; extern_ struct SymbolTableEntry* Types, * TypesEnd; +// Whether we should dump the syntax tree before starting to assemble the file. extern_ bool OptDumpTree; +// Whether we should keep the assembly files after successfully linking. extern_ bool OptKeepAssembly; +// Whether to stop at compilation and dumping - skip assembly and binary creation altogether. extern_ bool OptAssembleFiles; +// Whether to stop at assembly - skip linking into a binary. extern_ bool OptLinkFiles; +// Whether to output extended debugging information. extern_ bool OptVerboseOutput; +// The name of the binary we want to create. extern_ char* OutputFileName; -extern_ char* CurrentASMFile, * CurrentObjectFile; - +// The sizes of each of the core types, in bytes. extern_ int TypeSizes[5]; +// The names of each token in the language, synchronized to the TokenTypes enum. extern_ char* TokenNames[]; +// The names of the storage scopes. extern_ char* ScopeNames[]; -extern_ int CurrentFunction; -extern_ struct SymbolTableEntry* FunctionEntry; -extern_ int Line; extern_ int Overread; -extern_ FILE* SourceFile; -extern_ FILE* OutputFile; - -extern_ struct Token CurrentToken; extern_ char CurrentIdentifier[TEXTLEN + 1]; extern_ int CurrentGlobal; diff --git a/include/Defs.h b/include/Defs.h index dd39821..68d1f84 100644 --- a/include/Defs.h +++ b/include/Defs.h @@ -11,6 +11,7 @@ #include #include + /* * ARithmetic tokens are prefixed AR. * LIteral tokens are prefixed LI. @@ -99,7 +100,9 @@ enum TokenTypes { KW_STRUCT, KW_UNION, KW_ENUM, - KW_ALIAS + KW_ALIAS, + + KW_IMPORT }; /* @@ -113,6 +116,9 @@ enum TokenTypes { * * It is important that Tokens and Operations are logically separated, * but that the Operation's index is the same as the Token that invokes it. + * + * Every five elements, an index is assigned. These are the natural indices. + * They are marked to make navigation of the Syntax Tree easier. */ enum SyntaxOps { @@ -173,7 +179,17 @@ enum SyntaxOps { }; -// A node in a Binary Tree that forms the syntax of Erythro +/** + * The way syntax is stored by the parser and assembled into a usable file. + * An ASTNode forms an item in a linked list. + * + * Thus, you can traverse up and down a tree of ASTNodes easily. + * + * Walking the tree is as simple as reading the Operation and recursively reading the Left, Middle and Right nodes as called for. + * For example, an if-else statement uses all three subnodes. + * + * This means that AST Nodes aren't exactly a binary tree, but a syntax tree nonetheless. + */ struct ASTNode { int Operation; // SyntaxOps Index int ExprType; // Value->IntValue's DataType @@ -188,6 +204,12 @@ struct ASTNode { }; }; +/** + * Describes the basic unit of syntax in the language. + * A token has a type (an index into the TokenTypes enum) and a value. + * + * The value represents the numerical value of an integer literal, etc. + */ struct Token { int type; int value; @@ -219,6 +241,47 @@ struct SymbolTableEntry { struct SymbolTableEntry* Start; // The first member in a list }; + +/** + * Information about a given source file. + * + * A file that starts the parsing of another file will never confuse the parser. + * + * It is the end goal that the parser will be multithreaded, operating on a single file at a time. + * + * Note that files do not contain their own symbol tables - these are global. + */ +struct FileData { + // Whether or not this file will accept definitions of functions. + bool AllowDefinitions; + + // A FILE stream that we can read the file from. + FILE* Stream; + + // The filename of the source code + char* SourceName; + // The filename of the assembly output + char* AssemblyName; + // The filename of the assembled object code + char* ObjectName; + + // The full contents of the source file + char* Content; + // The line of the file we are currently working on, -1 if it is finished + long CurrentLine; + // The column of the file we are currently working on, -1 if it is finished + long CurrentColumn; + + // The symbol currently being lexed - TokenTypes index and integer value. + struct Token CurrentSymbol; + // The function currently being parsed - null if in global scope or if finished. + struct SymbolTableEntry* FunctionEntry; + + // Once ready, the full AST trees of this file. + struct ASTNode* Tree; +}; + + enum StorageScope { SC_GLOBAL = 1, // Global Scope SC_STRUCT, // Struct Definitions @@ -255,6 +318,7 @@ enum DataTypes { DAT_UNION, // Union Data DAT_ENUM, // Enum Data DAT_ALIAS, // Alias Definition + DAT_NONE, // No type, no work needed. }; /* @@ -278,11 +342,11 @@ enum StructureType { char* Suffixate(char* String, char Suffix); -char* Compile(char* InputFile); +void Compile(struct FileData* InputFile); -char* Assemble(char* InputFile); +void Assemble(struct FileData* InputFile); -void Link(char* Output, char* Objects[]); +void Link(char* Output, struct FileData* Objects[]); void DisplayUsage(char* ProgName); @@ -291,7 +355,6 @@ void DisplayUsage(char* ProgName); * * * * * * * * * L E X I N G * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - void Tokenise(); void VerifyToken(int Type, char* TokenExpected); @@ -302,6 +365,8 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit); static int ReadKeyword(char* Str); +void ImportModule(); + /* * * * * * * * * * * * * * * * * * * * * * * * * T Y P E S * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -331,8 +396,7 @@ struct ASTNode* ConstructASTNode(int Operation, int Type, struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue); -struct ASTNode* -ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue); +struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * @@ -483,7 +547,6 @@ void AsGlobalSymbol(struct SymbolTableEntry* Entry); int AsNewString(char* Value); - int AsLoadString(int ID); int AsEqual(int Left, int Right); diff --git a/src/Assembler.c b/src/Assembler.c index fdea8ea..d232e09 100644 --- a/src/Assembler.c +++ b/src/Assembler.c @@ -161,7 +161,7 @@ int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) { case OP_RET: printf("\tReturning from %s\n", Node->Symbol->Name); - AsReturn(FunctionEntry, LeftVal); + AsReturn(CurrentFile->FunctionEntry, LeftVal); return -1; case OP_EQUAL: @@ -856,13 +856,13 @@ void AsGlobalSymbol(struct SymbolTableEntry* Entry) { switch (Size) { case 1: - fprintf(OutputFile, "\t.byte\t0\r\n", Entry->Name); + fprintf(OutputFile, "\t.byte\t0\r\n"); break; case 4: - fprintf(OutputFile, "\t.long\t0\r\n", Entry->Name); + fprintf(OutputFile, "\t.long\t0\r\n"); break; case 8: - fprintf(OutputFile, "\t.quad\t0\r\n", Entry->Name); + fprintf(OutputFile, "\t.quad\t0\r\n"); break; default: for (int i = 0; i < Size; i++) diff --git a/src/Delegate.c b/src/Delegate.c index 764bca2..8eab91f 100644 --- a/src/Delegate.c +++ b/src/Delegate.c @@ -62,19 +62,19 @@ char* Suffixate(char* String, char Suffix) { * For Erythro code, this is .er * The generated assembly will have the extension .s * - * @param InputFile: The filename of the Erythro Source code to compile + * @param InputFile: A pointer to the data that we should use to compile this file. * @return the filename of the generated PECOFF32+ assembly */ -char* Compile(char* InputFile) { +void Compile(struct FileData* InputFile) { char* OutputName; - OutputName = Suffixate(InputFile, 's'); + OutputName = Suffixate(InputFile->SourceName, 's'); if (OutputName == NULL) { - fprintf(stderr, "%s must have a suffix.\r\n", InputFile); + fprintf(stderr, "%s must have a suffix.\r\n", InputFile->SourceName); exit(1); } - if ((SourceFile = fopen(InputFile, "r")) == NULL) { - fprintf(stderr, "Unable to open %s: %s\n", InputFile, strerror(errno)); + if ((InputFile->Stream = fopen(InputFile->SourceName, "r")) == NULL) { + fprintf(stderr, "Unable to open %s: %s\n", InputFile->SourceName, strerror(errno)); exit(1); } @@ -83,13 +83,15 @@ char* Compile(char* InputFile) { exit(1); } - Line = 1; + InputFile->AssemblyName = OutputName; + CurrentFile = InputFile; + + CurrentFile->CurrentLine = 1; Overread = '\n'; - CurrentGlobal = 0; - CurrentLocal = SYMBOLS - 1; + if (OptVerboseOutput) - printf("Compiling %s\r\n", InputFile); + printf("Compiling %s\r\n", CurrentFile->SourceName); Tokenise(); @@ -97,8 +99,9 @@ char* Compile(char* InputFile) { ParseGlobals(); + // Output.Tree = ParseGlobals(); + fclose(OutputFile); - return OutputName; } /* @@ -115,27 +118,26 @@ char* Compile(char* InputFile) { * */ -char* Assemble(char* InputFile) { +void Assemble(struct FileData* InputFile) { char Command[TEXTLEN]; int Error; char* OutputName; - OutputName = Suffixate(InputFile, 'o'); + OutputName = Suffixate(InputFile->AssemblyName, 'o'); if (OutputName == NULL) { - fprintf(stderr, "%s must have a suffix.\r\n", InputFile); + fprintf(stderr, "%s must have a suffix.\r\n", InputFile->AssemblyName); exit(1); } - snprintf(Command, TEXTLEN, "%s %s %s", "as -o ", OutputName, InputFile); + snprintf(Command, TEXTLEN, "%s %s %s", "as -o ", OutputName, InputFile->AssemblyName); if (OptVerboseOutput) printf("%s\n", Command); Error = system(Command); if (Error != 0) { - fprintf(stderr, "Assembling of %s failed with code %d\n", InputFile, Error); + fprintf(stderr, "Assembling of %s failed with error code %d\n", InputFile->AssemblyName, Error); exit(1); } - return OutputName; } /* @@ -150,7 +152,7 @@ char* Assemble(char* InputFile) { * */ -void Link(char* Output, char* Objects[]) { +void Link(char* Output, struct FileData* Objects[]) { int Count, Size = TEXTLEN, Error; char Command[TEXTLEN], * CommandPtr; @@ -160,7 +162,7 @@ void Link(char* Output, char* Objects[]) { Size -= Count; while (*Objects != NULL) { - Count = snprintf(CommandPtr, Size, "%s ", *Objects); + Count = snprintf(CommandPtr, Size, "%s ", (*Objects)->ObjectName); CommandPtr += Count; Size -= Count; Objects++; diff --git a/src/Dump.c b/src/Dump.c index 1ee5d00..6c7cf21 100644 --- a/src/Dump.c +++ b/src/Dump.c @@ -107,10 +107,7 @@ void DumpTree(struct ASTNode* Node, int level) { fprintf(stdout, "TERM_STRLITERAL rval L%d\n", Node->IntValue); return; case REF_IDENT: - if (Node->RVal) - fprintf(stdout, "REF_IDENT rval %s\n", Node->Symbol->Name); - else - fprintf(stdout, "REF_IDENT %s\n", Node->Symbol->Name); + fprintf(stdout, "REF_IDENT%s %s\n", Node->RVal ? " rval" : "", Node->Symbol->Name); return; case OP_ASSIGN: fprintf(stdout, "OP_ASSIGN\n"); diff --git a/src/Importer.c b/src/Importer.c new file mode 100644 index 0000000..653a478 --- /dev/null +++ b/src/Importer.c @@ -0,0 +1,75 @@ + +/*************/ +/*GEMWIRE */ +/* ERYTHRO*/ +/*************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * The function of the importer is to read in definitions from a file, and store + * them into the symbol tables. + * + * The file to be imported is called a "module", which is Erythro terminology for C-like "headers". + * They contain extra metadata that allows for Erythro's enhanced debugging and error logging. + * + * Modules may also contain metadata about the contents within - allowing for multiple compile-time + * sourcesets with different arguments, all parsed at the same time as the source code. + * + * This allows Erythro to have first-class support for multiple-build-single-link situations, + * that would require the use of a build system like CMake in other languages. + * + */ + +/** + * Read in the information of a module, check that it is valid, and then read the module itself. + * Import syntax looks like: + * + * > import "file" + * + * The string is appended to the current working directory and is checked. + * If the resulting path exists and resolves to a file, then the file's declarations are added to the symbol tables. + * + * Modules may not contain definitions. Only declarations + * TODO: Module metadata as described above. + */ + void ImportModule() { + // Skip the import keyword + Tokenise(); + + // Make sure there's a string after the import. + if (CurrentFile->CurrentSymbol.type != LI_STR) + Die("Import statement must be followed by a compile-time constant string."); + + // Read in the string that we know must be there. + char* Module = strdup(CurrentIdentifier); + + // Figure out the working directory + char CWD[PATH_MAX]; + + if (getcwd(CWD, sizeof(CWD)) == NULL) + DieMessage("Unable to find cwd when importing module", Module); + + // Append the module name to the current working directory + char* ModulePath = malloc(sizeof(CWD) + sizeof(Module) + 1); + strcpy(ModulePath, CWD); + strcpy(ModulePath, Module); + + printf("Scanning %s for module definitions.\n", ModulePath); + + // Stat the file to see if it exists + struct stat FileInfo; + if (stat(ModulePath, &FileInfo) != 0) + DieMessage("Unable to access the imported module", ModulePath); + + // At this point, the file exists and we have the path. + // Pass it to the lexer and have at it. + + } \ No newline at end of file diff --git a/src/Lexer.c b/src/Lexer.c index 1b3adae..c499bdd 100644 --- a/src/Lexer.c +++ b/src/Lexer.c @@ -43,10 +43,10 @@ static int NextChar(void) { return Char; } - Char = fgetc(SourceFile); + Char = fgetc(CurrentFile->Stream); if (Char == '\n') - Line++; + CurrentFile->CurrentLine++; return Char; } @@ -91,10 +91,10 @@ static int FindDigitFromPos(char* String, char Char) { */ void VerifyToken(int Type, char* TokenExpected) { - if (CurrentToken.type == Type) + if (CurrentFile->CurrentSymbol.type == Type) Tokenise(); else { - printf("Expected %s on line %d\n", TokenExpected, Line); + printf("Expected %s on line %ld\n", TokenExpected, CurrentFile->CurrentLine); exit(1); } } @@ -170,7 +170,7 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit) { // This defines the valid chars in a keyword/variable/function. while (isalpha(Char) || isdigit(Char) || Char == '_') { if (ind >= Limit - 1) { - printf("Identifier too long: %d\n", Line); + printf("Identifier too long: %ld\n", CurrentFile->CurrentLine); exit(1); } else { Buffer[ind++] = Char; @@ -319,6 +319,9 @@ static int ReadKeyword(char* Str) { if (!strcmp(Str, "int")) return TY_INT; + if (!strcmp(Str, "import")) + return KW_IMPORT; + if (!strcmp(Str, "if")) return KW_IF; @@ -384,7 +387,7 @@ static int ReadKeyword(char* Str) { */ void Tokenise() { int Char, TokenType; - struct Token* Token = &CurrentToken; + struct Token* Token = &CurrentFile->CurrentSymbol; if (RejectedToken != NULL) { Token = RejectedToken; @@ -580,7 +583,7 @@ void Tokenise() { Char == '_') { // This is what defines what a variable/function/keyword can START with. ReadIdentifier(Char, CurrentIdentifier, TEXTLEN); - if (TokenType = ReadKeyword(CurrentIdentifier)) { + if ((TokenType = ReadKeyword(CurrentIdentifier))) { Token->type = TokenType; break; } diff --git a/src/Main.c b/src/Main.c index 32c7c12..2878c92 100644 --- a/src/Main.c +++ b/src/Main.c @@ -4,13 +4,10 @@ /*************/ #include - #define extern_ - #include - #undef extern_ - +#include int TypeSizes[5] = {0, 1, 4, 8, 0}; // in BYTES @@ -105,7 +102,6 @@ int main(int argc, char* argv[]) { OptVerboseOutput = false; // Temporary .o storage and counter - char* ObjectFiles[100]; int ObjectCount = 0; // Parse command line arguments. @@ -134,7 +130,7 @@ int main(int argc, char* argv[]) { OptLinkFiles = false; break; case 'S': // aSsemble only - OptAssembleFiles = false; + OptAssembleFiles = true; OptKeepAssembly = true; OptLinkFiles = false; break; @@ -151,40 +147,45 @@ int main(int argc, char* argv[]) { if (i >= argc) DisplayUsage(argv[0]); + // Allocate enough files for the full specified source code + Files = malloc(sizeof(struct FileData) * i); + // For the rest of the files specified, we can iterate them right to left. while (i < argc) { + // Prepare the source metadata before we start compiling + struct FileData* Source = malloc(sizeof(struct FileData)); + Files[i] = Source; + // Compile the file by invoking the Delegate - CurrentASMFile = Compile(argv[i]); + Compile(Source); if (OptLinkFiles || OptAssembleFiles) { // If we need to assemble (or link, which requires assembly) // then we invoke the Delegate again - CurrentObjectFile = Assemble(CurrentASMFile); + Assemble(Source); // We can only keep track of 99 objects, so we should crash at 98 to ensure we have enough room for the output file too. if (ObjectCount == 98) { fprintf(stderr, "Too many inputs"); return 1; // We use return because we're in main, rather than invoking Die. } - // Move the ObjectCount forward. - ObjectFiles[ObjectCount++] = CurrentObjectFile; - // Clear the new, forwarded index - ObjectFiles[ObjectCount] = NULL; } if (!OptKeepAssembly) // unlink = delete - unlink(CurrentASMFile); + unlink(Source->AssemblyName); i++; } if (OptLinkFiles) { // If needed, invoke the Delegate one last time. - Link(OutputFileName, ObjectFiles); + Link(OutputFileName, Files); if (!OptAssembleFiles) { // Even though we need to assemble to link, we can respect the user's options and delete the intermediary files. - for (i = 0; ObjectFiles[i] != NULL; i++) - unlink(ObjectFiles[i]); + for (i = 0; Files[i] != NULL; i++) { + unlink(Files[i]->AssemblyName); + unlink(Files[i]->ObjectName); + } } } @@ -198,7 +199,7 @@ int main(int argc, char* argv[]) { */ void Die(char* Error) { - fprintf(stderr, "%s on line %d\n", Error, Line); + fprintf(stderr, "%s on line %ld\n", Error, CurrentFile->CurrentLine); fclose(OutputFile); unlink(OutputFileName); exit(1); @@ -208,7 +209,7 @@ void Die(char* Error) { * A variant of Die with an extra String attached. */ void DieMessage(char* Error, char* Reason) { - fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line); + fprintf(stderr, "%s: %s on line %ld\n", Error, Reason, CurrentFile->CurrentLine); fclose(OutputFile); unlink(OutputFileName); exit(1); @@ -218,19 +219,7 @@ void DieMessage(char* Error, char* Reason) { * A variant of Die with an extra integer attached. */ void DieDecimal(char* Error, int Number) { - fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line); - fclose(OutputFile); - unlink(OutputFileName); - exit(1); -} - -/** - * A variant of Die that prints the int in binary. - */ -void DieBinary(char* Error, int Number) { - char buf[33]; - itoa(Number, buf, 2); - printf("%s: %s\n", Error, buf); + fprintf(stderr, "%s: %d on line %ld\n", Error, Number, CurrentFile->CurrentLine); fclose(OutputFile); unlink(OutputFileName); exit(1); @@ -240,7 +229,7 @@ void DieBinary(char* Error, int Number) { * A variant of Die with an extra character attached. */ void DieChar(char* Error, int Char) { - fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line); + fprintf(stderr, "%s: %c on line %ld\n", Error, Char, CurrentFile->CurrentLine); fclose(OutputFile); unlink(OutputFileName); exit(1); diff --git a/src/Parser.c b/src/Parser.c index 77cfc72..c063286 100644 --- a/src/Parser.c +++ b/src/Parser.c @@ -230,13 +230,13 @@ struct ASTNode* ParsePrimary(void) { struct ASTNode* Node; int ID; - switch (CurrentToken.type) { + switch (CurrentFile->CurrentSymbol.type) { case LI_INT: - if ((CurrentToken.value >= 0) && (CurrentToken.value < 256)) - Node = ConstructASTLeaf(TERM_INTLITERAL, RET_CHAR, NULL, CurrentToken.value); + if ((CurrentFile->CurrentSymbol.value >= 0) && (CurrentFile->CurrentSymbol.value < 256)) + Node = ConstructASTLeaf(TERM_INTLITERAL, RET_CHAR, NULL, CurrentFile->CurrentSymbol.value); else - Node = ConstructASTLeaf(TERM_INTLITERAL, RET_INT, NULL, CurrentToken.value); + Node = ConstructASTLeaf(TERM_INTLITERAL, RET_INT, NULL, CurrentFile->CurrentSymbol.value); break; case LI_STR: @@ -288,7 +288,7 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { LeftNode = PrefixStatement(); - NodeType = CurrentToken.type; + NodeType = CurrentFile->CurrentSymbol.type; if (NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) { LeftNode->RVal = 1; @@ -298,7 +298,7 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { while ((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) { Tokenise(); - if (CurrentToken.type == LI_RPARE) + if (CurrentFile->CurrentSymbol.type == LI_RPARE) break; RightNode = ParsePrecedenceASTNode(Precedence[NodeType]); @@ -331,7 +331,7 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { RightTemp = NULL; LeftTemp = NULL; } else { - printf("\t\tAttempting to handle a %d in Binary Expression parsing\r\n", CurrentToken.type); + printf("\t\tAttempting to handle a %d in Binary Expression parsing\r\n", CurrentFile->CurrentSymbol.type); LeftNode->RVal = 1; RightNode->RVal = 1; @@ -370,7 +370,7 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode->ExprType, LeftNode, NULL, RightNode, NULL, 0); - NodeType = CurrentToken.type; + NodeType = CurrentFile->CurrentSymbol.type; if (NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS) { LeftNode->RVal = 1; return LeftNode; @@ -435,13 +435,13 @@ struct ASTNode* GetExpressionList() { struct ASTNode* Tree = NULL, * Child = NULL; int Count; - while (CurrentToken.type != LI_RPARE) { + while (CurrentFile->CurrentSymbol.type != LI_RPARE) { Child = ParsePrecedenceASTNode(0); Count++; Tree = ConstructASTNode(OP_COMP, PointerTo(RET_VOID), Tree, NULL, Child, NULL, Count); - switch (CurrentToken.type) { + switch (CurrentFile->CurrentSymbol.type) { case LI_COM: Tokenise(); break; @@ -475,8 +475,8 @@ struct ASTNode* GetExpressionList() { struct ASTNode* ParseStatement(void) { int Type; - printf("\t\tBranch leads to here, type %s/%d\r\n", TokenNames[CurrentToken.type], CurrentToken.type); - switch (CurrentToken.type) { + printf("\t\tBranch leads to here, type %s/%d\r\n", TokenNames[CurrentFile->CurrentSymbol.type], CurrentFile->CurrentSymbol.type); + switch (CurrentFile->CurrentSymbol.type) { case TY_CHAR: case TY_LONG: case TY_INT: @@ -544,7 +544,7 @@ struct ASTNode* ParseCompound() { Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0); } - if (CurrentToken.type == LI_RBRAC) { + if (CurrentFile->CurrentSymbol.type == LI_RBRAC) { VerifyToken(LI_RBRAC, "}"); return Left; } @@ -577,28 +577,32 @@ void ParseGlobals() { // We loop early if there's a struct, and since a struct may be the last // thing in a file, we need to check for eof before anything else - if (CurrentToken.type == LI_EOF) + if (CurrentFile->CurrentSymbol.type == LI_EOF) break; printf("New definition incoming..\r\n\n"); Type = ParseOptionalPointer(&Composite); //TODO: converge pathways on this block? - if (CurrentToken.type == KW_FUNC) { + if (CurrentFile->CurrentSymbol.type == KW_FUNC) { VerifyToken(KW_FUNC, "::"); FunctionComing = 1; } // Structs are parsed fully in ParseOptionalPointer // TODO: FIX THAT!! - if ((Type == DAT_STRUCT || Type == DAT_UNION || Type == DAT_ENUM || Type == DAT_ALIAS) && CurrentToken.type == LI_SEMIC) { + if ((Type == DAT_STRUCT || Type == DAT_UNION || Type == DAT_ENUM || Type == DAT_ALIAS) && CurrentFile->CurrentSymbol.type == LI_SEMIC) { Tokenise(); continue; } + // If we read metadata or an import, then skip all processing. + if (Type == DAT_NONE) + continue; + VerifyToken(TY_IDENTIFIER, "ident"); - if (FunctionComing && CurrentToken.type == LI_LPARE) { + if (FunctionComing && CurrentFile->CurrentSymbol.type == LI_LPARE) { printf("\tParsing function\n"); Tree = ParseFunction(Type); if (Tree) { diff --git a/src/Pointers.c b/src/Pointers.c index 31b7cee..5b920bc 100644 --- a/src/Pointers.c +++ b/src/Pointers.c @@ -75,7 +75,10 @@ int ParseOptionalPointer(struct SymbolTableEntry** Composite) { int Type; - switch (CurrentToken.type) { + switch (CurrentFile->CurrentSymbol.type) { + case KW_IMPORT: + ImportModule(); + break; case TY_VOID: Type = RET_VOID; Tokenise(); @@ -97,13 +100,13 @@ int ParseOptionalPointer(struct SymbolTableEntry** Composite) { break; case KW_ALIAS: Type = ReadAlias(Composite); - if (CurrentToken.type == LI_SEMIC) + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) Type = DAT_ALIAS; break; case KW_ENUM: Type = RET_INT; BeginEnumDeclaration(); - if (CurrentToken.type == LI_SEMIC) + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) Type = DAT_ENUM; break; case KW_STRUCT: @@ -115,15 +118,19 @@ int ParseOptionalPointer(struct SymbolTableEntry** Composite) { *Composite = BeginCompositeDeclaration(Type); break; default: - DieDecimal("Illegal type for pointerisation", CurrentToken.type); + DieDecimal("Illegal type for pointerisation", CurrentFile->CurrentSymbol.type); } // Recursively scan more *s // This makes things like: // x = **y; // possible. while (1) { - printf("\t\t\tType on parsing is %d\n", CurrentToken.type); - if (CurrentToken.type != AR_STAR) + // But, skip parsing if we're looking at an import. + if (CurrentFile->CurrentSymbol.type == KW_IMPORT) + break; + + printf("\t\t\tType on parsing is %d\n", CurrentFile->CurrentSymbol.type); + if (CurrentFile->CurrentSymbol.type != AR_STAR) break; Type = PointerTo(Type); @@ -164,7 +171,7 @@ struct ASTNode* AccessArray() { if (!TypeIsInt(RightNode->ExprType)) Die("Array index is not integer"); - printf("\t\tPreparing types - RightNode of type %s must be mutated to LeftNode type %s\r\n", (RightNode->ExprType), + printf("\t\tPreparing types - RightNode of type %s must be mutated to LeftNode type %s\r\n", TypeNames(RightNode->ExprType), TypeNames(LeftNode->ExprType)); RightNode = MutateType(RightNode, LeftNode->ExprType, OP_ADD); diff --git a/src/Statements.c b/src/Statements.c index f789e02..f91ad6f 100644 --- a/src/Statements.c +++ b/src/Statements.c @@ -33,7 +33,7 @@ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Stor if (FunctionSymbol != NULL) PrototypePointer = FunctionSymbol->Start; - while (CurrentToken.type != End) { + while (CurrentFile->CurrentSymbol.type != End) { TokenType = ParseOptionalPointer(&Composite); VerifyToken(TY_IDENTIFIER, "identifier"); @@ -48,10 +48,10 @@ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Stor } ParamCount++; - if ((CurrentToken.type != LI_COM) && (CurrentToken.type != End)) - DieDecimal("Unexpected token in parameter", CurrentToken.type); + if ((CurrentFile->CurrentSymbol.type != LI_COM) && (CurrentFile->CurrentSymbol.type != End)) + DieDecimal("Unexpected token in parameter", CurrentFile->CurrentSymbol.type); - if (CurrentToken.type == LI_COM) + if (CurrentFile->CurrentSymbol.type == LI_COM) Tokenise(); } @@ -80,12 +80,12 @@ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) { Tokenise(); - if (CurrentToken.type == TY_IDENTIFIER) { + if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier); Tokenise(); } - if (CurrentToken.type != LI_LBRAC) { + if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Composite == NULL) DieMessage("Unknown Struct", CurrentIdentifier); return Composite; @@ -131,14 +131,14 @@ void BeginEnumDeclaration() { Tokenise(); // enum name - if (CurrentToken.type == TY_IDENTIFIER) { + if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) { Type = FindEnum(CurrentIdentifier); Name = strdup(CurrentIdentifier); Tokenise(); } // enum name {? if not, enum name var. - if (CurrentToken.type != LI_LBRAC) { + if (CurrentFile->CurrentSymbol.type != LI_LBRAC) { if (Type == NULL) DieMessage("Undeclared Enum", Name); @@ -162,19 +162,19 @@ void BeginEnumDeclaration() { DieMessage("Attempting to redeclare enum value", Name); // Parse equality - if (CurrentToken.type == LI_EQUAL) { + if (CurrentFile->CurrentSymbol.type == LI_EQUAL) { Tokenise(); // Expect a number after the equals - if (CurrentToken.type != LI_INT) + if (CurrentFile->CurrentSymbol.type != LI_INT) Die("Expected integer to assign enum value to"); - Value = CurrentToken.value; + Value = CurrentFile->CurrentSymbol.value; Tokenise(); } Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUMENTRY, Value++, 0, NULL); // Break on right brace - if (CurrentToken.type == LI_RBRAC) + if (CurrentFile->CurrentSymbol.type == LI_RBRAC) break; VerifyToken(LI_COM, "Comma"); @@ -214,10 +214,10 @@ struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEn DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier); } - if (CurrentToken.type == LI_LBRAS) { + if (CurrentFile->CurrentSymbol.type == LI_LBRAS) { Tokenise(); - if (CurrentToken.type == LI_INT) { + if (CurrentFile->CurrentSymbol.type == LI_INT) { switch (Scope) { case SC_GLOBAL: Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL); @@ -284,12 +284,12 @@ struct ASTNode* ParseFunction(int Type) { Params = ParamsEnd = NULL; - if (CurrentToken.type == LI_SEMIC) { + if (CurrentFile->CurrentSymbol.type == LI_SEMIC) { Tokenise(); return NULL; } - FunctionEntry = OldFunction; + CurrentFile->FunctionEntry = OldFunction; Tree = ParseCompound(); @@ -318,7 +318,7 @@ struct ASTNode* ReturnStatement() { int ReturnType; - if (FunctionEntry->Type == RET_VOID) + if (CurrentFile->FunctionEntry->Type == RET_VOID) Die("Attempt to return from void function"); VerifyToken(KW_RETURN, "return"); @@ -327,14 +327,14 @@ struct ASTNode* ReturnStatement() { Tree = ParsePrecedenceASTNode(0); - Tree = MutateType(Tree, FunctionEntry->Type, 0); + Tree = MutateType(Tree, CurrentFile->FunctionEntry->Type, 0); if (Tree == NULL) Die("Returning a value of incorrect type for function"); - Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0); + Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, CurrentFile->FunctionEntry, 0); - printf("\t\tReturning from function %s\n", FunctionEntry->Name); + printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name); VerifyToken(LI_RPARE, ")"); // TODO: OPTIONALISE! @@ -385,7 +385,7 @@ struct ASTNode* IfStatement() { True = ParseCompound(); - if (CurrentToken.type == KW_ELSE) { + if (CurrentFile->CurrentSymbol.type == KW_ELSE) { Tokenise(); False = ParseCompound(); } @@ -573,10 +573,10 @@ struct ASTNode* PostfixStatement() { Tokenise(); - if (CurrentToken.type == LI_LPARE) + if (CurrentFile->CurrentSymbol.type == LI_LPARE) return CallFunction(); - if (CurrentToken.type == LI_LBRAS) + if (CurrentFile->CurrentSymbol.type == LI_LBRAS) return AccessArray(); // If we get here, we must be a variable. @@ -591,7 +591,7 @@ struct ASTNode* PostfixStatement() { // Here we check for postincrement and postdecrement. - switch (CurrentToken.type) { + switch (CurrentFile->CurrentSymbol.type) { case LI_DOT: return AccessMember(false); case LI_ARROW: @@ -640,7 +640,7 @@ struct ASTNode* PostfixStatement() { struct ASTNode* PrefixStatement() { struct ASTNode* Tree; - switch (CurrentToken.type) { + switch (CurrentFile->CurrentSymbol.type) { case BOOL_INVERT: Tokenise(); Tree = PrefixStatement(); diff --git a/src/Symbols.c b/src/Symbols.c index d215d18..a080795 100644 --- a/src/Symbols.c +++ b/src/Symbols.c @@ -17,8 +17,8 @@ void DumpAllLists() { printf("\nLocal symbols:\n"); DumpList(Locals); printf("\nParameters:\n"); - if (FunctionEntry != NULL && FunctionEntry->Start != NULL) - DumpList(FunctionEntry->Start); + if (CurrentFile->FunctionEntry != NULL && CurrentFile->FunctionEntry->Start != NULL) + DumpList(CurrentFile->FunctionEntry->Start); DumpList(Params); printf("\nStructs:\n"); DumpList(Structs); @@ -67,8 +67,8 @@ static struct SymbolTableEntry* SearchList(char* Name, struct SymbolTableEntry* struct SymbolTableEntry* FindSymbol(char* Symbol) { struct SymbolTableEntry* Node; - if (FunctionEntry) { - Node = SearchList(Symbol, FunctionEntry->Start); + if (CurrentFile->FunctionEntry) { + Node = SearchList(Symbol, CurrentFile->FunctionEntry->Start); if (Node) return Node; } @@ -90,8 +90,8 @@ struct SymbolTableEntry* FindSymbol(char* Symbol) { struct SymbolTableEntry* FindLocal(char* Symbol) { struct SymbolTableEntry* Node; - if (FunctionEntry) { - Node = SearchList(Symbol, FunctionEntry->Start); + if (CurrentFile->FunctionEntry) { + Node = SearchList(Symbol, CurrentFile->FunctionEntry->Start); if (Node) return Node; } @@ -210,7 +210,7 @@ void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail void FreeLocals() { Locals = LocalsEnd = NULL; Params = ParamsEnd = NULL; - FunctionEntry = NULL; + CurrentFile->FunctionEntry = NULL; } diff --git a/src/Types.c b/src/Types.c index e5dcbe2..5e868f6 100644 --- a/src/Types.c +++ b/src/Types.c @@ -51,7 +51,7 @@ int PrimitiveSize(int Type) { case RET_LONG: return 8; default: - DieBinary("Bad type in PrimitiveSize", Type); + DieDecimal("Bad type in PrimitiveSize", Type); } return 0; }