Major refactoring

Comments added to the parser and lexer, more to come
This commit is contained in:
Curle 2021-01-20 19:22:15 +00:00
parent eb118db872
commit 18b5da209d
No known key found for this signature in database
GPG Key ID: 58A5C4688ECE6E7C
9 changed files with 666 additions and 319 deletions

View File

@ -18,6 +18,11 @@
extern_ struct SymbolTableEntry* Globals, *GlobalsEnd; extern_ struct SymbolTableEntry* Globals, *GlobalsEnd;
extern_ struct SymbolTableEntry* Locals, *LocalsEnd; extern_ struct SymbolTableEntry* Locals, *LocalsEnd;
extern_ struct SymbolTableEntry* Params, *ParamsEnd; extern_ struct SymbolTableEntry* Params, *ParamsEnd;
extern_ struct SymbolTableEntry* Structs, *StructsEnd;
extern_ struct SymbolTableEntry* StructMembers, *StructMembersEnd;
extern_ struct SymbolTableEntry* Unions, *UnionsEnd;
extern_ struct SymbolTableEntry* Enums, *EnumsEnd;
extern_ bool OptDumpTree; extern_ bool OptDumpTree;
extern_ bool OptKeepAssembly; extern_ bool OptKeepAssembly;

View File

@ -92,7 +92,8 @@ enum TokenTypes {
KW_ELSE, KW_ELSE,
KW_WHILE, KW_WHILE,
KW_FOR, KW_FOR,
KW_RETURN KW_RETURN,
KW_STRUCT
}; };
/* /*
@ -178,7 +179,6 @@ struct ASTNode {
union { union {
int Size; // OP_SCALE's linear representation int Size; // OP_SCALE's linear representation
int IntValue; // TERM_INTLIT's Value int IntValue; // TERM_INTLIT's Value
int ID; // LV_IDENT's Symbols[] index.
}; };
}; };
@ -215,6 +215,9 @@ struct SymbolTableEntry {
enum StorageScope { enum StorageScope {
SC_GLOBAL = 1, // Global Scope SC_GLOBAL = 1, // Global Scope
SC_STRUCT, // Struct Definitions
SC_ENUM, // Enum Definitions
SC_MEMBER, // The members of Structs or Enums
//SC_CLASS, // Class-local definitions //SC_CLASS, // Class-local definitions
//SC_STATIC, // Static storage definitions //SC_STATIC, // Static storage definitions
SC_PARAM, // Function parameters SC_PARAM, // Function parameters
@ -274,7 +277,7 @@ void DisplayUsage(char* ProgName);
* * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * */
int Tokenise(struct Token* Token); void Tokenise();
void VerifyToken(int Type, char* TokenExpected); void VerifyToken(int Type, char* TokenExpected);
void RejectToken(struct Token* Token); void RejectToken(struct Token* Token);
@ -355,14 +358,15 @@ struct ASTNode* PrintStatement(void);
struct SymbolTableEntry* FindSymbol(char* Symbol); struct SymbolTableEntry* FindSymbol(char* Symbol);
struct SymbolTableEntry* FindLocal(char* Symbol); struct SymbolTableEntry* FindLocal(char* Symbol);
struct SymbolTableEntry* FindGlobal(char* Symbol); struct SymbolTableEntry* FindGlobal(char* Symbol);
struct SymbolTableEntry* FindStruct(char* Symbol);
struct SymbolTableEntry* FindMember(char* Symbol);
void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node); void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node);
void FreeLocals(); void FreeLocals();
void ClearTables(); void ClearTables();
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset); struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O N T R O L S T A T U S * * * * * * * * C O N T R O L S T A T U S * * * *
@ -460,7 +464,7 @@ void AsFunctionEpilogue(struct SymbolTableEntry* Entry);
* * * * D E C L A R A T I O N * * * * * * * * D E C L A R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * */
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope); struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope);
struct ASTNode* ParseIdentifier(void); struct ASTNode* ParseIdentifier(void);
struct ASTNode* IfStatement(); struct ASTNode* IfStatement();

View File

@ -52,7 +52,7 @@ char* Compile(char* InputFile) {
if(OptVerboseOutput) if(OptVerboseOutput)
printf("Compiling %s\r\n", InputFile); printf("Compiling %s\r\n", InputFile);
Tokenise(&CurrentToken); Tokenise();
AssemblerPreamble(); AssemblerPreamble();

View File

@ -11,10 +11,29 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * C H A R S T R E AM * * * * * * * * * * * * C H A R S T R E AM * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* The Lexer holds a "stream" of characters.
* You may read a character from the stream, and if it is not
* the desired character, it may be placed into an "overread" buffer.
* The overread buffer is checked before the source file is read any further.
* This provides an effective way to "un-read" a character.
*
* @param Char: The character to "un-read"
*
*/
static void ReturnCharToStream(int Char) { static void ReturnCharToStream(int Char) {
Overread = Char; Overread = Char;
} }
/*
* NextChar allows you to ask the Lexer for the next useful character.
* As mentioned above, it checks the overread buffer first.
*
* @return the character as int
*
*/
static int NextChar(void) { static int NextChar(void) {
int Char; int Char;
@ -32,6 +51,10 @@ static int NextChar(void) {
return Char; return Char;
} }
/*
* Searches for the next useful character, skipping whitespace.
* @return the character as int.
*/
static int FindChar() { static int FindChar() {
int Char; int Char;
@ -45,14 +68,31 @@ static int FindChar() {
return Char; return Char;
} }
/*
* Allows the conversion between ASCII, hex and numerals.
* @param String: The set of all valid results
* @param Char: The ASCII character to convert
* @return the ASCII character in int form, if in the set of valid results. -1 if not.
*/
static int FindDigitFromPos(char* String, char Char) { static int FindDigitFromPos(char* String, char Char) {
char* Result = strchr(String, Char); char* Result = strchr(String, Char);
return(Result ? Result - String : -1); return(Result ? Result - String : -1);
} }
/*
* Facilitates the easy checking of expected tokens.
* NOTE: there is (soon to be) an optional variant of this function that
* reads a token but does not consume it ( via Tokenise )
*
* @param Type: The expected token, in terms of value of the TokenTypes enum.
* @param TokenExpected: A string to output when the token is not found.
*
*/
void VerifyToken(int Type, char* TokenExpected) { void VerifyToken(int Type, char* TokenExpected) {
if(CurrentToken.type == Type) if(CurrentToken.type == Type)
Tokenise(&CurrentToken); Tokenise();
else { else {
printf("Expected %s on line %d\n", TokenExpected, Line); printf("Expected %s on line %d\n", TokenExpected, Line);
exit(1); exit(1);
@ -61,6 +101,12 @@ void VerifyToken(int Type, char* TokenExpected) {
static struct Token* RejectedToken = NULL; static struct Token* RejectedToken = NULL;
/*
* Rejected Tokens and the Overread Stream are identical concepts.
* This was implemented first, but it is no longer used.
* TODO: Refactor this function out.
*/
void RejectToken(struct Token* Token) { void RejectToken(struct Token* Token) {
if(RejectedToken != NULL) if(RejectedToken != NULL)
Die("Cannot reject two tokens in a row!"); Die("Cannot reject two tokens in a row!");
@ -72,6 +118,21 @@ void RejectToken(struct Token* Token) {
* * * * L I T E R A L S A N D I D E N T I F I E R S * * * * * * * * L I T E R A L S A N D I D E N T I F I E R S * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Facilitates the parsing of integer literals from the file.
* Currently only supports the decimal numbers, despite the
* FindDigitFromPos function allowing conversion.
*
* The functon loops over the characters, multiplying by 10 and adding
* the new value on top, until a non-numeric character is found.
* At that point, it returns the non-numeric character to the Overread Stream
* and returns the calculated number.
*
* @param Char: The first number to scan.
* @return the full parsed number as an int.
*
*/
static int ReadInteger(int Char) { static int ReadInteger(int Char) {
int CurrentChar = 0; int CurrentChar = 0;
int IntegerValue = 0; int IntegerValue = 0;
@ -86,7 +147,23 @@ static int ReadInteger(int Char) {
return IntegerValue; return IntegerValue;
} }
// Variable identifier, keyword, function. /*
* An Identifier can be any of:
* * A function name
* * A variable name
* * A struct name
* / A class name
* / An annotation name
*
* This function allows a full name to be read into a buffer, with a defined
* start character and a defined maximum text size limit.
*
* @param Char: The first char of the Identifier.
* @param Buffer: The location to store the Identifier. (usually CurrentIdentifer, a compiler global defined for this purpose)
* @param Limit: The maximum Identifer length.
* @return the length of the parsed identifier
*
*/
static int ReadIdentifier(int Char, char* Buffer, int Limit) { static int ReadIdentifier(int Char, char* Buffer, int Limit) {
int ind = 0; int ind = 0;
@ -108,6 +185,17 @@ static int ReadIdentifier(int Char, char* Buffer, int Limit) {
return ind; return ind;
} }
/*
* Char literals appear as 'x'
*
* They are bounded by two apostrophes.
* They can contain any 1-byte ASCII character, as well as some
* predefined, standard escape codes.
* This function attempts to get the character from the file, with escape codes intact.
*
* @return the character as an int
*
*/
static int ReadCharLiteral() { static int ReadCharLiteral() {
int Char; int Char;
Char = NextChar(); Char = NextChar();
@ -131,7 +219,20 @@ static int ReadCharLiteral() {
return Char; return Char;
} }
/*
* String literals appear as "hello world"
*
* They are bounded by two quotation marks.
* They can contain an arbitrary length of text.
* They are backed by an array of chars (hence the char* type) and thus
* have a practically unlimited length.
*
* To read a String Literal, it is a simple matter of reading Char Literals until
* the String termination token is identified - the last quotation mark.
*
* @param Buffer: The buffer into which to write the string. (usually CurrentIdentifer, a compiler global defined for this purpose)
*
*/
static int ReadStringLiteral(char* Buffer) { static int ReadStringLiteral(char* Buffer) {
int Char; int Char;
@ -148,9 +249,18 @@ static int ReadStringLiteral(char* Buffer) {
} }
/* /*
* This function is what defines the valid keywords for the language * Keywords are source-code tokens / strings that are reserved for the compiler.
* //TODO: move this to a static list? * They cannot be used as identifers on their own.
* //TODO: More optimisations? *
* This function is where all of the keywords are added, and where most aliases are going to be stored.
*
* It uses a switch on the first character of the input string as an optimisation - rather than checking each
* keyword against the String individually, it only needs to compare a single number. This can be optimised into
* a hash table by the compiler for further optimisation, making this one of the fastest ways to switch
* on a full string.
*
* @param Str: The keyword input to try to parse
* @return the token expressed in terms of values of the TokenTypes enum
* *
*/ */
static int ReadKeyword(char* Str) { static int ReadKeyword(char* Str) {
@ -203,7 +313,6 @@ static int ReadKeyword(char* Str) {
break; break;
case 'p': case 'p':
// This is a huge optimisation once we have as many keywords as a fully featured language.
if(!strcmp(Str, "print")) if(!strcmp(Str, "print"))
return KW_PRINT; return KW_PRINT;
break; break;
@ -213,6 +322,11 @@ static int ReadKeyword(char* Str) {
return KW_RETURN; return KW_RETURN;
break; break;
case 's':
if(!strcmp(Str, "struct"))
return KW_STRUCT;
break;
case 'v': case 'v':
if(!strcmp(Str, "void")) if(!strcmp(Str, "void"))
return TY_VOID; return TY_VOID;
@ -234,8 +348,21 @@ static int ReadKeyword(char* Str) {
* * * * T O K E N I S E R * * * * * * * * T O K E N I S E R * * * *
* * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * */
int Tokenise(struct Token* Token) { /*
* Handles the majority of the work of reading tokens into the stream.
* It reads chars with FindChar, categorizing individual characters or small
* strings into their proper expression (as a value of the TokenTypes enum)
*
* It also defers the reading of numeric literals and char literals to the proper functions.
*
* If needed, it can also read Identifiers, for variable or function naming.
*
* This function may be the main bottleneck in the lexer.
*
*/
void Tokenise() {
int Char, TokenType; int Char, TokenType;
struct Token* Token = &CurrentToken;
if(RejectedToken != NULL) { if(RejectedToken != NULL) {
Token = RejectedToken; Token = RejectedToken;
@ -439,7 +566,5 @@ int Tokenise(struct Token* Token) {
DieChar("Unrecognized character", Char); DieChar("Unrecognized character", Char);
} }
return 1;
} }

View File

@ -73,48 +73,54 @@ char* TokenNames[] = {
"While keyword", "While keyword",
"For keyword", "For keyword",
"Return keyword" "Return keyword",
"Struct keyword"
}; };
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
/* Line = 1; // Option initialisers
Overread = '\n';
CurrentGlobal = 0;
struct ASTNode* Node;
CurrentLocal = SYMBOLS - 1; */
OptDumpTree = false; OptDumpTree = false;
OptKeepAssembly = false; OptKeepAssembly = false;
OptAssembleFiles = false; OptAssembleFiles = false;
OptLinkFiles = true; OptLinkFiles = true;
OptVerboseOutput = false; OptVerboseOutput = false;
// Temporary .o storage and counter
char* ObjectFiles[100]; char* ObjectFiles[100];
int ObjectCount = 0; int ObjectCount = 0;
// Parse command line arguments.
int i; int i;
for(i = 1; i < argc; i++) { for(i = 1/*skip 0*/; i < argc; i++) {
if(*argv[i] != '-') // not a flag // If we're not a flag, we can skip.
// We only care about flags in rows.
// ie. erc >> -v -T -o << test.exe src/main.er
if(*argv[i] != '-')
break; break;
// Once we identify a flag, we need to make sure it's not just a minus in-place.
for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) { for(int j = 1; (*argv[i] == '-') && argv[i][j]; j++) {
// Finally, identify what option is being invoked.
switch(argv[i][j]) { switch(argv[i][j]) {
case 'o': case 'o': // output
OutputFileName = argv[++i]; OutputFileName = argv[++i];
break; break;
case 'T': case 'T': // Debug
OptDumpTree = true; OptDumpTree = true;
break; break;
case 'c': case 'c': // Compile only
OptAssembleFiles = true; OptAssembleFiles = true;
OptKeepAssembly = false; OptKeepAssembly = false;
OptLinkFiles = false; OptLinkFiles = false;
break; break;
case 'S': case 'S': // aSsemble only
OptAssembleFiles = false; OptAssembleFiles = false;
OptKeepAssembly = true; OptKeepAssembly = true;
OptLinkFiles = false; OptLinkFiles = false;
break; break;
case 'v': case 'v': // Verbose output
OptVerboseOutput = true; OptVerboseOutput = true;
break; break;
default: default:
@ -123,29 +129,42 @@ int main(int argc, char* argv[]) {
} }
} }
// If we didn't provide anything other than flags, we need to show how to use the program.
if(i >= argc) if(i >= argc)
DisplayUsage(argv[0]); DisplayUsage(argv[0]);
// For the rest of the files specified, we can iterate them right to left.
while(i < argc) { while(i < argc) {
// Compile the file by invoking the Delegate
CurrentASMFile = Compile(argv[i]); CurrentASMFile = Compile(argv[i]);
if(OptLinkFiles || OptAssembleFiles) { if(OptLinkFiles || OptAssembleFiles) {
// If we need to assemble (or link, which requires assembly)
// then we invoke the Delegate again
CurrentObjectFile = Assemble(CurrentASMFile); CurrentObjectFile = Assemble(CurrentASMFile);
// We can only keep track of 99 objects, so we should crash at 98 to ensure we have enough room for the output file too.
if(ObjectCount == 98) { if(ObjectCount == 98) {
fprintf(stderr, "Too many inputs"); fprintf(stderr, "Too many inputs");
return 1; return 1; // We use return because we're in main, rather than invoking Die.
} }
// Move the ObjectCount forward.
ObjectFiles[ObjectCount++] = CurrentObjectFile; ObjectFiles[ObjectCount++] = CurrentObjectFile;
// Clear the new, forwarded index
ObjectFiles[ObjectCount] = NULL; ObjectFiles[ObjectCount] = NULL;
} }
if(!OptKeepAssembly) if(!OptKeepAssembly)
// unlink = delete
unlink(CurrentASMFile); unlink(CurrentASMFile);
i++; i++;
} }
if(OptLinkFiles) { if(OptLinkFiles) {
// If needed, invoke the Delegate one last time.
Link(OutputFileName, ObjectFiles); Link(OutputFileName, ObjectFiles);
if(!OptAssembleFiles) { if(!OptAssembleFiles) {
// Even though we need to assemble to link, we can respect the user's options and delete the intermediary files.
for(i = 0; ObjectFiles[i] != NULL; i++) for(i = 0; ObjectFiles[i] != NULL; i++)
unlink(ObjectFiles[i]); unlink(ObjectFiles[i]);
} }
@ -155,6 +174,11 @@ int main(int argc, char* argv[]) {
} }
/*
* Akin to a Halt and Catch Fire method.
* Simply prints an error, cleans up handles, and closes.
*/
void Die(char* Error) { void Die(char* Error) {
fprintf(stderr, "%s on line %d\n", Error, Line); fprintf(stderr, "%s on line %d\n", Error, Line);
fclose(OutputFile); fclose(OutputFile);
@ -162,6 +186,9 @@ void Die(char* Error) {
exit(1); exit(1);
} }
/*
* A variant of Die with an extra String attached.
*/
void DieMessage(char* Error, char* Reason) { void DieMessage(char* Error, char* Reason) {
fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line); fprintf(stderr, "%s: %s on line %d\n", Error, Reason, Line);
fclose(OutputFile); fclose(OutputFile);
@ -169,6 +196,9 @@ void DieMessage(char* Error, char* Reason) {
exit(1); exit(1);
} }
/*
* A variant of Die with an extra integer attached.
*/
void DieDecimal(char* Error, int Number) { void DieDecimal(char* Error, int Number) {
fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line); fprintf(stderr, "%s: %d on line %d\n", Error, Number, Line);
fclose(OutputFile); fclose(OutputFile);
@ -176,6 +206,9 @@ void DieDecimal(char* Error, int Number) {
exit(1); exit(1);
} }
/*
* A variant of Die with an extra character attached.
*/
void DieChar(char* Error, int Char) { void DieChar(char* Error, int Char) {
fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line); fprintf(stderr, "%s: %c on line %d\n", Error, Char, Line);
fclose(OutputFile); fclose(OutputFile);

View File

@ -10,12 +10,10 @@
#include "Data.h" #include "Data.h"
/* /*
* Precedence is directly related to Token Type. * The Precedence of an operator is directly related to Token Type.
* * Precedence determines how soon the operator and its surrounding values
* enum TokenTypes { * will be calculated and aliased.
* LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT * This allows for things like the common Order of Operations.
* };
*
*/ */
static int Precedence[] = { static int Precedence[] = {
0, 10, // EOF, ASSIGN 0, 10, // EOF, ASSIGN
@ -30,6 +28,13 @@ static int Precedence[] = {
110 // / 110 // /
}; };
/*
* Handles gathering the precedence of an operator from its token,
* in terms of values of the TokenTypes enum.
*
* Error handling is also done here, so that EOF or non-operators are not executed.
*
*/
static int OperatorPrecedence(int Token) { static int OperatorPrecedence(int Token) {
int Prec = Precedence[Token]; int Prec = Precedence[Token];
@ -40,6 +45,13 @@ static int OperatorPrecedence(int Token) {
return Prec; return Prec;
} }
/*
* If the value is a right-expression, or in other words is right associative,
* then it can be safely calculated beforehand and aliased to a value.
* In this case, we can try to alias (or constant fold) everything on the right side
* of an assignment.
*/
static int IsRightExpr(int Token) { static int IsRightExpr(int Token) {
return (Token == LI_EQUAL); return (Token == LI_EQUAL);
} }
@ -48,6 +60,29 @@ static int IsRightExpr(int Token) {
* * * N O D E C O N S T R U C T I O N * * * * * * N O D E C O N S T R U C T I O N * * *
* * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * */
/*
* ASTNodes form the structure of the language that moves the bulk of
* data around within the compiler.
* They contain:
* * An Operation (usually 1:1 with an input token),
* * A Type (to identify the size of data it contains),
* * Two more Left and Right ASTNodes (to form a doubly-linked list)
* * An extra Middle ASTNode in case it is needed (typically in the middle case of a For loop)
* * A Symbol Table Entry
* * An Integer Value
* * A flag to determine whether this node (and its sub-nodes) contain a right associative or Rval
*
* This is the only function where they are constructed.
*
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Left: The Node that is attached to the left side branch of this root.
* @param Middle: The Node that is attached to the middle of this root, if applicable.
* @param Right: The Node that is attached to the right side branch of this root.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTNode(int Operation, int Type, struct ASTNode* ConstructASTNode(int Operation, int Type,
struct ASTNode* Left, struct ASTNode* Left,
struct ASTNode* Middle, struct ASTNode* Middle,
@ -75,10 +110,28 @@ struct ASTNode* ConstructASTNode(int Operation, int Type,
} }
/*
* AST Leaves are categorized by their lack of child nodes.
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) { struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) {
return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue); return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue);
} }
/*
* AST Branches are categorized by having only one child node.
* These are sometimes called Unary Branches.
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
* @param Left: The Node that is attached to the left side branch of this root.
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
* @param IntValue: The integer value encoded by this Node, if applicable.
* @return a newly constructed AST Node
*/
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) { struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) {
return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue); return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue);
} }
@ -89,10 +142,10 @@ struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left
* * * * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * * * */
/* /*
* Take a Token Type, and convert it to an AST-Node Operation. * TokenTypes and SyntaxOps are mostly 1:1, so some minor effort can ensure that
* * these are synchronized well.
* TokenTypes and SyntaxOps are synchronized to make this easy. * This allows the parsing operation to be little more than a bounds check.
* * Otherwise, this would be a gigantic switch statement.
*/ */
int ParseTokenToOperation(int Token) { int ParseTokenToOperation(int Token) {
@ -103,11 +156,13 @@ int ParseTokenToOperation(int Token) {
} }
/* /*
* Parse a primary (terminal) expression. * Primary expressions may be any one of:
* This currently handles literal expressions, constructing a leaf node * * A terminal integer literal
* and handing control back up the chain. * * A terminal string literal
* * * A variable
* * A collection of expressions bounded by parentheses.
* *
* @return the AST Node that represents this expression
*/ */
struct ASTNode* ParsePrimary(void) { struct ASTNode* ParsePrimary(void) {
@ -134,7 +189,7 @@ struct ASTNode* ParsePrimary(void) {
case LI_LPARE: case LI_LPARE:
// Starting a ( expr ) block // Starting a ( expr ) block
Tokenise(&CurrentToken); Tokenise();
Node = ParsePrecedenceASTNode(0); Node = ParsePrecedenceASTNode(0);
@ -144,12 +199,26 @@ struct ASTNode* ParsePrimary(void) {
} }
Tokenise(&CurrentToken); Tokenise();
return Node; return Node;
} }
/*
* Parse a single binary expression.
* It ensures that these expressions are parsed to their full extent, that
* the order of operations is upheld, that the precedence of the prior
* iteration is considered, and that every error is handled.
*
* This is where all of the right-associative statements are folded, where
* type mismatches and widening are handled properly, and that all parsing
* is over by the time the end tokens ") } ] ;" are encountered.
*
* @param PreviousTokenPrecedence: The precedence of the operator to the left.
* @return the AST Node corresponding to this block.
*
*/
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) { struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
struct ASTNode* LeftNode, *RightNode; struct ASTNode* LeftNode, *RightNode;
struct ASTNode* LeftTemp, *RightTemp; struct ASTNode* LeftTemp, *RightTemp;
@ -159,25 +228,19 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
LeftNode = PrefixStatement(); LeftNode = PrefixStatement();
NodeType = CurrentToken.type; NodeType = CurrentToken.type;
//printf("%d\r\n", CurrentToken.type);
if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) { if(NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM) {
//printf("Current token matches ; ) ]\r\n");
LeftNode->RVal = 1; return LeftNode; LeftNode->RVal = 1; return LeftNode;
} }
//printf("Current token has value %d, type %s\n", CurrentToken.value, TokenNames[CurrentToken.type]);
while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) { while((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) || (IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) {
//printf("inside while\n"); Tokenise();
Tokenise(&CurrentToken);
if(CurrentToken.type == LI_RPARE) if(CurrentToken.type == LI_RPARE)
break; break;
RightNode = ParsePrecedenceASTNode(Precedence[NodeType]); RightNode = ParsePrecedenceASTNode(Precedence[NodeType]);
/*
LeftType = LeftNode->ExprType;
RightType = RightNode->ExprType;
*/
/** /**
* While parsing this node, we may need to widen some types. * While parsing this node, we may need to widen some types.
* This requires a few functions and checks. * This requires a few functions and checks.
@ -194,9 +257,6 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
if(LeftNode == NULL) if(LeftNode == NULL)
Die("Incompatible Expression encountered in assignment"); Die("Incompatible Expression encountered in assignment");
//printf("\tAssigning variable: %s value %d\n", Symbols[FindSymbol(CurrentIdentifier)].Name, RightNode->Value.IntValue);
// LeftNode holds the target, the target variable in this case // LeftNode holds the target, the target variable in this case
printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name); printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name);
@ -212,11 +272,9 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
LeftNode->RVal = 1; LeftNode->RVal = 1;
RightNode->RVal = 1; RightNode->RVal = 1;
//printf("mutate left\r\n");
LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType); LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType);
//printf("mutate right\r\n");
RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType); RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType);
//printf("mutate right over\r\n");
/** /**
* If both are null, the types are incompatible. * If both are null, the types are incompatible.
*/ */
@ -267,105 +325,21 @@ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
} }
/* struct ASTNode* ParseMultiplicativeASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParsePrimary();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) {
Tokenise(&CurrentToken);
RightNode = ParsePrimary();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* struct ASTNode* ParseAdditiveASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParseMultiplicativeASTNode();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while(1) {
Tokenise(&CurrentToken);
RightNode = ParseMultiplicativeASTNode();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* * * * * * * * * * * * * * * * * * * * * * * *
* * * * I N T E R P R E T A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * */
/*
int ParseAST(struct ASTNode* Node) {
int LeftVal, RightVal;
if(Node->Left)
LeftVal = ParseAST(Node->Left);
if(Node->Right)
RightVal = ParseAST(Node->Right);
/*
if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
switch(Node->Operation) {
case OP_ADD:
return (LeftVal + RightVal);
case OP_SUBTRACT:
return (LeftVal - RightVal);
case OP_MULTIPLY:
return (LeftVal * RightVal);
case OP_DIVIDE:
return (LeftVal / RightVal);
case REF_IDENT:
case TERM_INTLITERAL:
return Node->Value.IntValue;
default:
fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation);
exit(1);
}
}
*/
/* * * * * * * * * * * * * * * * * * * * * /* * * * * * * * * * * * * * * * * * * * *
* * * * F U N C T I O N S * * * * * * * * F U N C T I O N S * * * *
* * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * */
/*
* Handles the logic for calling a function.
* This is invoked by an identifier being recognized, followed by a "(.*)" string.
*
* It simply checks that the function exists, that the parameters given are valid,
* and generates the AST Node for calling it.
*
* @return the AST Node for calling the function stored in CurrentIdentifer
*
*/
struct ASTNode* CallFunction() { struct ASTNode* CallFunction() {
struct ASTNode* Tree; struct ASTNode* Tree;
struct SymbolTableEntry* Function; struct SymbolTableEntry* Function;
@ -385,6 +359,21 @@ struct ASTNode* CallFunction() {
return Tree; return Tree;
} }
/*
* An expression list is used:
* * In the call to a function
*
* It is parsed by seeking left parentheses "(", parsing binary expressions
* until either a comma or a right parentheses is found.
*
* The former will cause another expression to be parsed, the latter will cause
* parsing to stop.
*
* @return the AST Node representing every expression in the list, glued end to
* end with a COMPOSITE operation.
*
*/
struct ASTNode* GetExpressionList() { struct ASTNode* GetExpressionList() {
struct ASTNode* Tree = NULL, *Child = NULL; struct ASTNode* Tree = NULL, *Child = NULL;
int Count; int Count;
@ -397,7 +386,7 @@ struct ASTNode* GetExpressionList() {
switch(CurrentToken.type) { switch(CurrentToken.type) {
case LI_COM: case LI_COM:
Tokenise(&CurrentToken); Tokenise();
break; break;
case LI_RPARE: case LI_RPARE:
break; break;
@ -414,6 +403,18 @@ struct ASTNode* GetExpressionList() {
* * * * S T A T E M E N T S * * * * * * * * S T A T E M E N T S * * * *
* * * * * * * * * * * * * * * * * * * * * */ * * * * * * * * * * * * * * * * * * * * * */
/*
* Handles parsing an individual statement.
*
* It serves as a wrapper around:
* * If Statement
* * While Statement
* * For Statement
* * Return Statement
* * Numeric literals and variables
* * Binary Expressions
* @return the AST Node representing this single statement
*/
struct ASTNode* ParseStatement(void) { struct ASTNode* ParseStatement(void) {
int Type; int Type;
@ -425,18 +426,10 @@ struct ASTNode* ParseStatement(void) {
printf("\t\tNew Variable: %s\n", CurrentIdentifier); printf("\t\tNew Variable: %s\n", CurrentIdentifier);
Type = ParseOptionalPointer(); Type = ParseOptionalPointer();
VerifyToken(TY_IDENTIFIER, "ident"); VerifyToken(TY_IDENTIFIER, "ident");
BeginVariableDeclaration(Type, SC_LOCAL); BeginVariableDeclaration(Type, NULL, SC_LOCAL);
VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment? VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment?
return NULL; return NULL;
/*case TY_IDENTIFIER:
if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC)
printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
else
printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
return ParseIdentifier();
*/
case KW_IF: case KW_IF:
return IfStatement(); return IfStatement();
@ -451,11 +444,26 @@ struct ASTNode* ParseStatement(void) {
default: default:
ParsePrecedenceASTNode(0); ParsePrecedenceASTNode(0);
//DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type);
} }
} }
/*
* Handles parsing multiple statements or expressions in a row.
* These are typically grouped together with the Compound tokens "{ }"
* and seperated by the semicolon ";".
*
* Single Statements are parsed until a semicolon is reached, at which
* point another statement will be parsed, or until a Right Compound
* token is reached ("}"), at which point parsing will stop.
*
* It is useful for:
* * Tightly identifying related blocks of code
* * Containing the many statements of functions
*
* @return the AST Node representing this compound statement
*
*/
struct ASTNode* ParseCompound() { struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, *Tree; struct ASTNode* Left = NULL, *Tree;
@ -486,6 +494,21 @@ struct ASTNode* ParseCompound() {
} }
} }
/*
* This is the entry point to the parser/lexer.
*
* By definition, Global definitions are accessible anywhere.
* As of right now (20/01/2021), classe are unimplemented.
* This means that all functions and all function prototypes are globally scoped.
*
* You may also define variables, constants, preprocessor directives and other text
* in the global scope.
*
* The function itself loops, parsing either variables or functions, until it
* reaches the end of the file.
*
*/
void ParseGlobals() { void ParseGlobals() {
struct ASTNode* Tree; struct ASTNode* Tree;
int Type, FunctionComing; int Type, FunctionComing;
@ -516,7 +539,7 @@ void ParseGlobals() {
} }
} else { } else {
printf("\tParsing global variable declaration\n"); printf("\tParsing global variable declaration\n");
BeginVariableDeclaration(Type, SC_GLOBAL); BeginVariableDeclaration(Type, NULL, SC_GLOBAL);
VerifyToken(LI_SEMIC, ";"); VerifyToken(LI_SEMIC, ";");
} }

View File

@ -22,22 +22,30 @@ int ValueAt(int Type) {
return (Type - 1); return (Type - 1);
} }
int ParseOptionalPointer() { int ParseOptionalPointer(struct SymbolTableEntry** Composite) {
int Type; int Type;
switch(CurrentToken.type) { switch(CurrentToken.type) {
case TY_VOID: case TY_VOID:
Type = RET_VOID; Type = RET_VOID;
Tokenise();
break; break;
case TY_CHAR: case TY_CHAR:
Type = RET_CHAR; Type = RET_CHAR;
Tokenise();
break; break;
case TY_INT: case TY_INT:
Type = RET_INT; Type = RET_INT;
Tokenise();
break; break;
case TY_LONG: case TY_LONG:
Type = RET_LONG; Type = RET_LONG;
Tokenise();
break;
case KW_STRUCT:
Type = DAT_STRUCT;
*Composite = BeginStructDeclaration();
break; break;
default: default:
DieDecimal("Illegal type for pointerisation", CurrentToken.type); DieDecimal("Illegal type for pointerisation", CurrentToken.type);
@ -47,12 +55,13 @@ int ParseOptionalPointer() {
// x = **y; // x = **y;
// possible. // possible.
while(1) { while(1) {
Tokenise(&CurrentToken); Tokenise();
printf("\t\t\tType on parsing is %d\n", CurrentToken.type); printf("\t\t\tType on parsing is %d\n", CurrentToken.type);
if(CurrentToken.type != AR_STAR) if(CurrentToken.type != AR_STAR)
break; break;
Type = PointerTo(Type); Type = PointerTo(Type);
Tokenise();
} }
return Type; return Type;
@ -68,7 +77,7 @@ struct ASTNode* AccessArray() {
LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0); LeftNode = ConstructASTLeaf(OP_ADDRESS, Entry->Type, Entry, 0);
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]); //printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
Tokenise(&CurrentToken); Tokenise();
//printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]); //printf("\t\tCurrent token: %s\r\n", TokenNames[CurrentToken.type]);
RightNode = ParsePrecedenceASTNode(0); RightNode = ParsePrecedenceASTNode(0);

View File

@ -8,9 +8,27 @@
#include <Data.h> #include <Data.h>
#include <stdbool.h> #include <stdbool.h>
static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) { /*
* Handles reading in a comma-separated list of declarations.
* Erythro treats structs, enums and function parameters the same in this regard -
* comma separated.
*
* C and C++ tend to treat enums and structs differently - the former separated by commas,
* the latter separated by semicolons.
*
* Note that since functions are read in through parentheses, and structs/enums are read in
* through brackets, the end character is configurable.
*
* @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable.
* @param Storage: The Storage Scope of this declaration list.
* @param End: The end token, in terms of TokenTypes enum values.
* @return the amount of declarations read in.
*
*/
static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) {
int TokenType, ParamCount = 0; int TokenType, ParamCount = 0;
struct SymbolTableEntry* PrototypePointer = NULL; struct SymbolTableEntry* PrototypePointer = NULL, *Composite;
if(FunctionSymbol != NULL) if(FunctionSymbol != NULL)
PrototypePointer = FunctionSymbol->Start; PrototypePointer = FunctionSymbol->Start;
@ -24,19 +42,15 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
DieDecimal("Function paramater of invalid type at index", ParamCount + 1); DieDecimal("Function paramater of invalid type at index", ParamCount + 1);
PrototypePointer=PrototypePointer->NextSymbol; PrototypePointer=PrototypePointer->NextSymbol;
} else { } else {
BeginVariableDeclaration(TokenType, SC_PARAM); BeginVariableDeclaration(TokenType, Composite, Storage);
} }
ParamCount++; ParamCount++;
switch(CurrentToken.type) { if((CurrentToken.type != LI_COM) && (CurrentToken.type != End))
case LI_COM: DieDecimal("Unexpected token in parameter", CurrentToken.type);
Tokenise(&CurrentToken);
break; if(CurrentToken.type == LI_COM)
case LI_RPARE: Tokenise();
break;
default:
DieDecimal("Unexpected token in parameter", CurrentToken.type);
}
} }
if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length)) if((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length))
@ -45,6 +59,61 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
return ParamCount; return ParamCount;
} }
/*
* Handles the declaration of a new struct.
* struct thisStct { int x, int y, int z };
*
* Verifies that the current identifier is not used,
* verifies that this is not a redefinition (excluding
* the case where there is a declaration but no definition)
* and then saves it into the Structs symbol table.
*
* @return the Symbol Table entry of this new struct.
*/
struct SymbolTableEntry* BeginStructDeclaration() {
struct SymbolTableEntry* Composite = NULL, *Member;
int Offset;
Tokenise();
if(CurrentToken.type == TY_IDENTIFIER) {
Composite = FindStruct(CurrentIdentifier);
Tokenise();
}
if(CurrentToken.type != LI_LBRAC) {
if(Composite == NULL)
DieMessage("Unknown Struct", CurrentIdentifier);
return Composite;
}
if(Composite)
DieMessage("Redefinition of struct", CurrentIdentifier);
Composite = AddSymbol(CurrentIdentifier, DAT_STRUCT, 0, SC_STRUCT, 0, 0, NULL);
Tokenise();
ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAS);
VerifyToken(LI_RBRAS, "]");
Composite->Start = StructMembers;
StructMembers = StructMembersEnd = NULL;
Member = Composite->Start;
Member->SinkOffset = 0;
Offset = TypeSize(Member->Type, Member->CompositeType);
for(Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) {
Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1);
Offset += TypeSize(Member->Type, Member->CompositeType);
}
Composite->Length = Offset;
return Composite;
}
/* /*
* Handles the declaration of a type of a variable. * Handles the declaration of a type of a variable.
* int newVar; * int newVar;
@ -52,11 +121,12 @@ static int ReadParameters(struct SymbolTableEntry* FunctionSymbol) {
* It verifies that we have a type keyword followed by a * It verifies that we have a type keyword followed by a
* unique, non-keyword identifier. * unique, non-keyword identifier.
* *
* It then stores this variable into the symbol table, * It then stores this variable into the appropriate symbol table,
* and returns the new item. * and returns the new item.
* *
* @return the Symbol Table entry of this new variable.
*/ */
struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) { struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) {
struct SymbolTableEntry* Symbol = NULL; struct SymbolTableEntry* Symbol = NULL;
switch(Scope) { switch(Scope) {
@ -66,33 +136,50 @@ struct SymbolTableEntry* BeginVariableDeclaration(int Type, int Scope) {
case SC_LOCAL: case SC_LOCAL:
case SC_PARAM: case SC_PARAM:
if(FindLocal(CurrentIdentifier) != NULL) if(FindLocal(CurrentIdentifier) != NULL)
DieMessage("Invalid redelcaration of local variable", CurrentIdentifier); DieMessage("Invalid redeclaration of local variable", CurrentIdentifier);
case SC_MEMBER:
if(FindMember(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier);
} }
if(CurrentToken.type == LI_LBRAS) { if(CurrentToken.type == LI_LBRAS) {
Tokenise(&CurrentToken); Tokenise();
if(CurrentToken.type == LI_INT) { if(CurrentToken.type == LI_INT) {
switch(Scope) { switch(Scope) {
case SC_GLOBAL: case SC_GLOBAL:
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0); Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL);
break; break;
case SC_LOCAL: case SC_LOCAL:
case SC_PARAM: case SC_PARAM:
case SC_MEMBER:
Die("Local arrays are unimplemented"); Die("Local arrays are unimplemented");
} }
} }
Tokenise(&CurrentToken); Tokenise();
VerifyToken(LI_RBRAS, "]"); VerifyToken(LI_RBRAS, "]");
} else { } else {
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0); Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite);
} }
return Symbol; return Symbol;
} }
/*
* Handles the declaration of a new function.
* Verifies that the identifier is not taken (excluding the case
* where there is a declaration but no definition)
* Parses the list of parameters if present
* Saves the function prototype if there is no body
* Generates and saves the break-out point label
*
* @param Type: The return type of the function
* @return the AST for this function
*
*/
struct ASTNode* ParseFunction(int Type) { struct ASTNode* ParseFunction(int Type) {
struct ASTNode* Tree; struct ASTNode* Tree;
struct ASTNode* FinalStatement; struct ASTNode* FinalStatement;
@ -104,7 +191,7 @@ struct ASTNode* ParseFunction(int Type) {
OldFunction = NULL; OldFunction = NULL;
if(OldFunction == NULL) { if(OldFunction == NULL) {
BreakLabel = NewLabel(); BreakLabel = NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0); NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
} }
VerifyToken(LI_LPARE, "("); VerifyToken(LI_LPARE, "(");
@ -120,7 +207,7 @@ struct ASTNode* ParseFunction(int Type) {
Params = ParamsEnd = NULL; Params = ParamsEnd = NULL;
if(CurrentToken.type == LI_SEMIC) { if(CurrentToken.type == LI_SEMIC) {
Tokenise(&CurrentToken); Tokenise();
return NULL; return NULL;
} }
@ -149,7 +236,6 @@ struct ASTNode* ParseFunction(int Type) {
* //TODO: No brackets * //TODO: No brackets
* //TODO: Type inference * //TODO: Type inference
* *
*
*/ */
struct ASTNode* ReturnStatement() { struct ASTNode* ReturnStatement() {
@ -166,19 +252,10 @@ struct ASTNode* ReturnStatement() {
Tree = ParsePrecedenceASTNode(0); Tree = ParsePrecedenceASTNode(0);
/*
ReturnType = Tree->ExprType;
FunctionType = Symbols[CurrentFunction].Type;
*/
Tree = MutateType(Tree, FunctionEntry->Type, 0); Tree = MutateType(Tree, FunctionEntry->Type, 0);
if(Tree == NULL) if(Tree == NULL)
Die("Returning a value of incorrect type for function"); Die("Returning a value of incorrect type for function");
/*
if(ReturnType)
Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0);
*/
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0); Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0);
@ -189,59 +266,33 @@ struct ASTNode* ReturnStatement() {
return Tree; return Tree;
} }
/* /*
* Handles Identifiers. * Handles the surrounding logic for If statements.
* *
* This is called for any of: * If statements have the basic form:
* - Calling a function * * if (condition) body
* - Assigning an lvalue variable * * if (condition)
* - Performing arithmetic on a variable * body
* - Performing arithmetic with the return values of function calls * * if (condition) {
* body
* }
* *
* For the case where you're assigning an l-value; * Conditions may be any truthy statement (such as a pointer,
* You can assign with another assignment, * object, integer), as conditions not recognized are auto-
* a statement, a function or a literal. * matically converted to booleans.
*
* This meaning, any object that can be resolved to 0 or NULL
* can be placed as the condition and used as a check.
*
* For example:
* struct ASTNode* Node = NULL;
* if(Node) {
* // This will not run, as Node is ((void*)0)
* }
* *
*/ */
/*
struct ASTNode* ParseIdentifier() {
struct ASTNode* Left, *Right, *Tree;
int LeftType, RightType;
int ID;
VerifyToken(TY_IDENTIFIER, "ident");
printf("\t\tAfter parsing, the identifier name is %s, id %d in the symbol table.\n", CurrentIdentifier, FindSymbol(CurrentIdentifier));
if(CurrentToken.type == LI_LPARE)
return CallFunction();
if((ID = FindSymbol(CurrentIdentifier)) == -1) {
printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name);
DieMessage("Undeclared Variable ", CurrentIdentifier);
}
Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID);
VerifyToken(LI_EQUAL, "=");
Left = ParsePrecedenceASTNode(0);
LeftType = Left->ExprType;
RightType = Right->ExprType;
Left = MutateType(Left, RightType, 0);
if(!Left)
Die("Incompatible types in assignment");
if(LeftType)
Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0);
Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0);
return Tree;
}*/
struct ASTNode* IfStatement() { struct ASTNode* IfStatement() {
struct ASTNode* Condition, *True, *False = NULL; struct ASTNode* Condition, *True, *False = NULL;
@ -261,13 +312,39 @@ struct ASTNode* IfStatement() {
True = ParseCompound(); True = ParseCompound();
if(CurrentToken.type == KW_ELSE) { if(CurrentToken.type == KW_ELSE) {
Tokenise(&CurrentToken); Tokenise();
False = ParseCompound(); False = ParseCompound();
} }
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0); return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0);
} }
/*
* Handles the surrounding logic for While loops.
*
* While loops have the basic form:
* while ( condition ) { body }
*
* When reaching the condition (which alike an If statement,
* can be any truthy value), if it resolves to true:
* The body is executed, and immediately the condition is checked
* again.
* This repeats until the condition resolves false, at which point
* the loop executes no more.
*
* This can be prototyped as the following pseudo-assembler:
*
* cond:
* check <condition>
* jne exit
* <body>
* jump cond
* exit:
* <more code>
*
* @return the AST of this statement
*
*/
struct ASTNode* WhileStatement() { struct ASTNode* WhileStatement() {
struct ASTNode* Condition, *Body; struct ASTNode* Condition, *Body;
@ -287,12 +364,36 @@ struct ASTNode* WhileStatement() {
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0); return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0);
} }
/*
* Handles the surrounding logic for For loops.
*
* They have the basic form of:
* for ( init ; condition; iterator) { body }
*
* The initialiser is run only once upon reaching the for loop.
* Then the condition is checked, and if true, the body is executed.
* After execution of the body, the iterator is run and the condition
* checked again.
*
* It can be prototyped as the following pseudo-assembler code:
*
* for:
* <init>
* cond:
* check <condition>
* jne exit
* <body>
* <iterator>
* jump cond
* exit:
* <loop exit>
*
* In the case of the implementation, "init" is the preoperator,
* "iterator" is the postoperator.
*
* @return the AST of this statement
*/
struct ASTNode* ForStatement() { struct ASTNode* ForStatement() {
// for (preop; condition; postop) {
// body
//}
struct ASTNode* Condition, *Body; struct ASTNode* Condition, *Body;
struct ASTNode* Preop, *Postop; struct ASTNode* Preop, *Postop;
@ -326,6 +427,18 @@ struct ASTNode* ForStatement() {
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0); return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0);
} }
/*
* Handles the surrounding logic for the Print statement.
*
* This is a legacy hold-over from the early testing, and it
* serves merely as a wrapper around the cstdlib printf function.
*
* It does, however (//TODO), attempt to guess the type that you
* want to print, which takes a lot of the guesswork out of printing.
*
* @return the AST of this statement
*/
struct ASTNode* PrintStatement(void) { struct ASTNode* PrintStatement(void) {
struct ASTNode* Tree; struct ASTNode* Tree;
int LeftType, RightType; int LeftType, RightType;
@ -342,7 +455,7 @@ struct ASTNode* PrintStatement(void) {
DieDecimal("Attempting to print an invalid type:", RightType); DieDecimal("Attempting to print an invalid type:", RightType);
if(RightType) if(RightType)
Tree = ConstructASTBranch(RightType, RET_INT, Tree, NULL, 0); Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0);
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0); Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0);
@ -352,16 +465,33 @@ struct ASTNode* PrintStatement(void) {
} }
/*
* Handles the surrounding logic for all of the logical and semantic
* postfixes.
*
* Postfixes are tokens that are affixed to the end of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of postfixes:
* * (): Call a function
* * []: Index or define an array.
* * ++: Increment a variable AFTER it is returned
* NOTE: there is a prefix variant of this for incrementing BEFOREhand.
* * --: Decrement a variable AFTER it is returned
* NOTE: there is a prefix variant of this for decrementing BEFOREhand.
*
* Planned postfixes:
* * >>: Arithmetic-Shift-Right a variable by one (Divide by two)
* NOTE: there is a prefix variant of this for shifting left - multiplying by two.
*
* @return the AST of the statement plus its' postfix
*/
struct ASTNode* PostfixStatement() { struct ASTNode* PostfixStatement() {
struct ASTNode* Tree; struct ASTNode* Tree;
struct SymbolTableEntry* Entry; struct SymbolTableEntry* Entry;
Tokenise(&CurrentToken); Tokenise();
// If we get here, we're one of three things:
// - Function
// - Array
// - Variable
if(CurrentToken.type == LI_LPARE) if(CurrentToken.type == LI_LPARE)
return CallFunction(); return CallFunction();
@ -370,8 +500,8 @@ struct ASTNode* PostfixStatement() {
return AccessArray(); return AccessArray();
// If we get here, we must be a variable. // If we get here, we must be a variable.
// There's no guarantees that the variable is in // (as functions have been called and arrays have been indexed)
// the symbol table, though. // Check that the variable is recognized..
if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR) if((Entry = FindSymbol(CurrentIdentifier)) == NULL || Entry->Structure != ST_VAR)
DieMessage("Unknown Variable", CurrentIdentifier); DieMessage("Unknown Variable", CurrentIdentifier);
@ -380,11 +510,11 @@ struct ASTNode* PostfixStatement() {
switch(CurrentToken.type) { switch(CurrentToken.type) {
case PPMM_PLUS: case PPMM_PLUS:
Tokenise(&CurrentToken); Tokenise();
Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0); Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0);
break; break;
case PPMM_MINUS: case PPMM_MINUS:
Tokenise(&CurrentToken); Tokenise();
Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0); Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0);
break; break;
default: default:
@ -395,33 +525,58 @@ struct ASTNode* PostfixStatement() {
} }
/*
* Handles the surrounding logic for all of the logical and semantic
* prefixes.
*
* Prefixes are tokens that are affixed to the start of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of prefixes:
* * !: Invert the boolean result of a statement or truthy value.
* * ~: Invert the individual bits in a number
* * -: Invert the number around the axis of 0 (negative->positive, positive->negative)
* * ++: Increment a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for incrementing AFTER the fact.
* * --: Decrement a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for decrementing AFTER the fact.
* * &: Dereference the following object (Get the address that contains it)
* * *: Get the object pointed at by the number following
*
* Planned prefixes:
* * <<: Arithmetic-Shift-Left a variable by one (Multiply by two)
* NOTE: there is a postfix variant of this for shifting right - dividing by two.
*
* @return the AST of this statement, plus its' prefixes and any postfixes.
*/
struct ASTNode* PrefixStatement() { struct ASTNode* PrefixStatement() {
struct ASTNode* Tree; struct ASTNode* Tree;
switch (CurrentToken.type) { switch (CurrentToken.type) {
case BOOL_INVERT: case BOOL_INVERT:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
Tree->RVal = 1; Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0); Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0);
break; break;
case BIT_NOT: case BIT_NOT:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
Tree->RVal = 1; Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0); Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0);
break; break;
case AR_MINUS: case AR_MINUS:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0); Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0);
break; break;
case PPMM_PLUS: case PPMM_PLUS:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
if(Tree->Operation != REF_IDENT) if(Tree->Operation != REF_IDENT)
@ -430,7 +585,7 @@ struct ASTNode* PrefixStatement() {
break; break;
case PPMM_MINUS: case PPMM_MINUS:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();
if(Tree->Operation != REF_IDENT) if(Tree->Operation != REF_IDENT)
@ -440,7 +595,7 @@ struct ASTNode* PrefixStatement() {
break; break;
case BIT_AND: case BIT_AND:
Tokenise(&CurrentToken); Tokenise();
// To allow things like: // To allow things like:
// x = &&y; // x = &&y;
@ -454,7 +609,7 @@ struct ASTNode* PrefixStatement() {
Tree->ExprType = PointerTo(Tree->ExprType); Tree->ExprType = PointerTo(Tree->ExprType);
break; break;
case AR_STAR: case AR_STAR:
Tokenise(&CurrentToken); Tokenise();
Tree = PrefixStatement(); Tree = PrefixStatement();

View File

@ -78,6 +78,28 @@ struct SymbolTableEntry* FindGlobal(char* Symbol) {
return SearchList(Symbol, Globals); return SearchList(Symbol, Globals);
} }
/*
* An override for FindSymbol.
* Searches only the defined Structs.
* @param Symbol: The string name of the symbol to search for.
* @return a pointer to the node if found, else NULL
*
*/
struct SymbolTableEntry* FindStruct(char* Symbol) {
return SearchList(Symbol, Structs);
}
/*
* An override for FindSymbol.
* Searches only the defined Struct & Enum Members.
* @param Symbol: The string name of the symbol to search for.
* @return a pointer to the node if found, else NULL
*
*/
struct SymbolTableEntry* FindMember(char* Symbol) {
return SearchList(Symbol, StructMembers);
}
/* /*
* Given a particular linked list, * Given a particular linked list,
* Take Node and append it to the Tail. * Take Node and append it to the Tail.
@ -112,6 +134,7 @@ void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail
void FreeLocals() { void FreeLocals() {
Locals = LocalsEnd = NULL; Locals = LocalsEnd = NULL;
Params = ParamsEnd = NULL; Params = ParamsEnd = NULL;
FunctionEntry = NULL;
} }
@ -122,6 +145,8 @@ void ClearTables() {
Globals = GlobalsEnd = NULL; Globals = GlobalsEnd = NULL;
Locals = LocalsEnd = NULL; Locals = LocalsEnd = NULL;
Params = ParamsEnd = NULL; Params = ParamsEnd = NULL;
StructMembers = StructMembersEnd = NULL;
Structs = StructsEnd = NULL;
} }
@ -136,34 +161,7 @@ void ClearTables() {
* *
* @return The SymbolTableEntry* pointer that corresponds to this newly constructed node. * @return The SymbolTableEntry* pointer that corresponds to this newly constructed node.
*/ */
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset) { struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType) {
/* int TableSlot;
int SinkOffset = 0;
if((TableSlot = FindSymbolImpl(Name, Storage)) != -1)
return -1;
// Instaed of spliting this up into AddLocalSymbol and AddGlobalSymbol,
// we can use this switch to avoid duplicated code.
switch(Storage) {
case SC_PARAM:
// Instead of special casing parameters, we can just add these to the symbol lists and be done with it.
printf("\tPreparing new parameter %s of type %s\r\n", Name, TypeNames[Type]);
TableSlot = AddSymbol(Name, Type, Structure, SC_GLOBAL, 88, 1);
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the global process
TableSlot = AddSymbol(Name, Type, Structure, SC_LOCAL, 88, 1);
Symbols[TableSlot].Storage = SC_PARAM; // Fix the parameter after running the local process
return TableSlot;
case SC_GLOBAL:
TableSlot = NewGlobalSymbol();
break;
case SC_LOCAL:
printf("\tCreating new local symbol %s\r\n", Name);
TableSlot = NewLocalSymbol();
SinkOffset = AsCalcOffset(Type);
break;
} */
struct SymbolTableEntry* Node = struct SymbolTableEntry* Node =
(struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry)); (struct SymbolTableEntry*) malloc(sizeof(struct SymbolTableEntry));
@ -174,33 +172,28 @@ struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Stor
Node->Storage = Storage; Node->Storage = Storage;
Node->Length = Length; Node->Length = Length;
Node->SinkOffset = SinkOffset; Node->SinkOffset = SinkOffset;
Node->CompositeType = CompositeType;
switch(Storage) { switch(Storage) {
case SC_GLOBAL: case SC_GLOBAL:
AppendSymbol(&Globals, &GlobalsEnd, Node); AppendSymbol(&Globals, &GlobalsEnd, Node);
// We don't want to generate a static block for functions.
if(Structure != ST_FUNC) AsGlobalSymbol(Node); if(Structure != ST_FUNC) AsGlobalSymbol(Node);
break; break;
case SC_STRUCT:
AppendSymbol(&Structs, &StructsEnd, Node);
break;
case SC_MEMBER:
AppendSymbol(&StructMembers, &StructMembersEnd, Node);
case SC_LOCAL: case SC_LOCAL:
AppendSymbol(&Locals, &LocalsEnd, Node); AppendSymbol(&Locals, &LocalsEnd, Node);
break; break;
case SC_PARAM: case SC_PARAM:
AppendSymbol(&Params, &ParamsEnd, Node); AppendSymbol(&Params, &ParamsEnd, Node);
break; break;
} }
/* // NOTE: Generating global symbol names must happen AFTER the name and type are declared.
switch(Storage) {
case SC_GLOBAL:
printf("\tCreating new global symbol %s into slot %d\r\n", Name, TableSlot);
if(Structure != ST_FUNC && EndLabel != 88) { // Magic keyword so that we don't generate ASM globals for parameters
printf("\t\tGenerating data symbol.\r\n");
AsGlobalSymbol(TableSlot);
}
break;
case SC_LOCAL:
break;
} */
//printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot);
return Node; return Node;
} }