Refactor to allow inline initialization of variables.

This commit is contained in:
Curle 2023-04-24 20:41:49 +01:00
parent 96f6773904
commit e42a2cfd8d
9 changed files with 623 additions and 463 deletions

View File

@ -55,6 +55,7 @@ extern_ struct AssemblerModule* Assembler;
// The names of each token in the language, synchronized to the TokenTypes enum.
extern_ char* TokenNames[];
extern_ char* OperationNames[];
// The names of the storage scopes.
extern_ char* ScopeNames[];

View File

@ -433,19 +433,13 @@ struct ASTNode* PostfixStatement();
void ParseGlobals();
struct ASTNode* ParseFunction(int Type);
int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbool, int TerminateSymbol);
struct ASTNode* ParseCompound();
struct SymbolTableEntry* BeginCompositeDeclaration(int Type);
void BeginEnumDeclaration();
int ReadAlias(struct SymbolTableEntry** Composite);
int ParseAlias(char* Name, struct SymbolTableEntry** Composite);
struct ASTNode* GetExpressionList();
struct ASTNode* ParseExpressionList(int terminateToken);
struct ASTNode* CallFunction();
@ -455,8 +449,6 @@ struct ASTNode* BreakStatement();
struct ASTNode* ContinueStatement();
int ReadTypeOrKeyword(struct SymbolTableEntry** Composite);
int ValueAt(int Type);
int PointerTo(int Type);
@ -467,9 +459,6 @@ struct ASTNode* AccessMember(bool Deref);
int ParseTokenToOperation(int Token);
struct ASTNode* PrintStatement(void);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * S Y M B O L T A B L E * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
@ -628,8 +617,6 @@ void RegisterJVM();
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope);
struct ASTNode* ParseIdentifier(void);
struct ASTNode* IfStatement();
struct ASTNode* WhileStatement();

View File

@ -47,14 +47,6 @@ void DumpTree(struct ASTNode* Node, int level) {
DumpTree(Node->Left, level + 2);
DumpTree(Node->Right, level + 2);
return;
case OP_SWITCH:
for (int i = 0; i < level; i++)
fprintf(stdout, " ");
fprintf(stdout, "SWITCH\n");
DumpTree(Node->Left, level + 2);
DumpTree(Node->Right, level + 2);
return;
}
// If current node is a compound, we treat it as if we didn't just enter a loop.
@ -197,13 +189,14 @@ void DumpTree(struct ASTNode* Node, int level) {
case OP_DEFAULT:
fprintf(stdout, "OP_DEFAULT\n");
DumpTree(Node->Left, level + 2);
return;
case OP_CASE:
fprintf(stdout, "OP_CASE %d\n", Node->IntValue);
DumpTree(Node->Left, level + 2);
DumpTree(Node->Right, level);
return;
case OP_SWITCH:
fprintf(stdout, "SWITCH\n");
return;
default:

View File

@ -8,7 +8,7 @@
#include <stdarg.h>
void Safe() {
CurrentFile->CurrentSafeColumn = CurrentFile->CurrentColumn;
CurrentFile->CurrentSafeColumn = CurrentFile->CurrentColumn - 1;
}
void printLine(FILE* file, int ln) {
@ -20,11 +20,17 @@ void printLine(FILE* file, int ln) {
void printErrorLine(FILE* file, int ln) {
char firstBuffer[256], problemBuffer[256], tailBuffer[256];
// If highlight starts at column 0, don't try to print anything before it
if (CurrentFile->CurrentSafeColumn != 0)
fgets(firstBuffer, CurrentFile->CurrentSafeColumn, file);
fgets(problemBuffer, (CurrentFile->CurrentColumn > CurrentFile->CurrentSafeColumn ? CurrentFile->CurrentColumn - CurrentFile->CurrentSafeColumn : CurrentFile->CurrentSafeColumn), file);
// Print the safe column up to current column
fgets(problemBuffer, (CurrentFile->CurrentColumn > CurrentFile->CurrentSafeColumn ? CurrentFile->CurrentColumn - CurrentFile->CurrentSafeColumn : CurrentFile->CurrentColumn), file);
// Print the current column to the end of the line
if (CurrentFile->CurrentColumn > CurrentFile->CurrentSafeColumn)
fgets(tailBuffer, 256, file);
// Line number
printf("%03d|%s\033[0;31m%s\033[0m%s", ln + 1, firstBuffer, problemBuffer, tailBuffer);
}
@ -34,6 +40,7 @@ void printHelpLine(int line, char* message) {
}
void ErrorReport(char* message, ...) {
fflush(stdout);
char strbuf[256];
// Resolve varargs to a string
@ -84,5 +91,5 @@ void ErrorReport(char* message, ...) {
printLine(file, line + 2);
}
exit(1);
}

View File

@ -92,6 +92,65 @@ char* TokenNames[] = {
"Import keyword"
};
char* OperationNames[] = {
"OP_ASSIGN", // Assign an l-value
"OP_BOOLOR", // Boolean OR two statements
"OP_BOOLAND", // Boolean AND two statements
"OP_BITOR", // Bitwise OR a number
"OP_BITXOR", // Bitwise XOR a number
"OP_BITAND", // Bitwise AND a number
"OP_EQUAL", // Compare equality
"OP_INEQ", // Compare inequality
"OP_LESS", // Less than?
"OP_GREAT", // Greater than?
"OP_LESSE", // Less than or Equal to?
"OP_GREATE", // Greater than or Equal to?
"OP_SHIFTL", // Arithmetic Shift Left (Multiply by 2)
"OP_SHIFTR", // Arithmetic Shift Right (Divide by 2)
"OP_ADD", // Add two numbers.
"OP_SUBTRACT", // Subtract two numbers.
"OP_MULTIPLY", // Multiply two numbers.
"OP_DIVIDE", // Divide two numbers.
"OP_PREINC", // Increment var before reference.
"OP_PREDEC", // Decrement var before reference.
"OP_POSTINC", // Increment var after reference.
"OP_POSTDEC", // Decrement var after reference.
"OP_BITNOT", // Invert a number bitwise
"OP_BOOLNOT", // Invert a statement logically
"OP_NEGATE", // Negate a number (turn a positive number negative
"OP_BOOLCONV", // Convert an expression to a boolean.s
"OP_ADDRESS", // Fetch the address of a var
"OP_DEREF", // Get the value of the address in a pointer
"TERM_INTLITERAL", // Integer Literal. This is a virtual operation", so it's a terminal.
"TERM_STRLITERAL", // String Literal. Also terminal.
"REF_IDENT", // Reference (read) an identifier (variable).
"OP_WIDEN", // Something contains a type that needs to be casted up
"OP_SCALE", // We have a pointer that needs to be scaled!
"OP_CALL", // Call a function
"OP_RET", // Return from a function
"OP_COMP", // Compound statements need a way to be 'glued' together. This is one of those mechanisms
"OP_IF", // If statement
"OP_LOOP", // FOR", WHILE
"OP_PRINT", // Print statement
"OP_FUNC", // Define a function
"OP_BREAK", // Break out of the loop
"OP_CONTINUE", // Continue the loop
"OP_SWITCH", // Switch statement
"OP_DEFAULT", // Default case
"OP_CASE" // Case
};
char* ScopeNames[] = {
"INVALID",
"GLOBAL",

View File

@ -161,64 +161,6 @@ int ParseTokenToOperation(int Token) {
DieDecimal("ParseToken: Unknown token", Token);
}
/*
* The "alias" keyword allows one to create a new keyword that is accepted in lieu of another (or a chain of another)
* It does this by reading in sequence:
* * The "alias" keyword
* * The thing to alias (any valid primary type)
* * The new name
*
* They are stored in a separate symbol table and can be used anywhere the original is valid.
*/
int ReadAlias(struct SymbolTableEntry** Composite) {
int Type;
Tokenise();
Type = ReadTypeOrKeyword(Composite);
if (FindAlias(CurrentIdentifier) != NULL)
DieMessage("Redefinition of type", CurrentIdentifier);
AddSymbol(CurrentIdentifier, Type, ST_VAR, SC_ALIAS, 0, 0, *Composite);
Tokenise();
return Type;
}
/**
* When using an alias, we need to lookup the name (possibly recursively) to check whether it is a valid alias.
* If so, we need to know what it is an alias of.
* Once we have resolved what it finally means, we return the type.
* @param Name The name of the (initial) alias to check
* @param Composite A pointer to the composite element we should fill in.
* @return The aliased type.
*/
int ParseAlias(char* Name, struct SymbolTableEntry** Composite) {
struct SymbolTableEntry* Type = NULL, *RootType = NULL;
// Ensure the first-round alias exists
Type = FindAlias(Name);
if (Type == NULL)
DieMessage("Unknown alias", Name);
// Loop on the alias for as long as it continues to exist.
while (true) {
if (Type->CompositeType == NULL)
break;
RootType = FindAlias(Type->CompositeType->Name);
if (RootType == NULL)
break;
Type = RootType;
}
Tokenise();
*Composite = Type->CompositeType;
return Type->Type;
}
/*
* Primary expressions may be any one of:
* * A terminal integer literal
@ -414,7 +356,7 @@ struct ASTNode* CallFunction() {
VerifyToken(LI_LPARE, "(");
Tree = GetExpressionList();
Tree = ParseExpressionList(LI_RPARE);
Tree = ConstructASTBranch(OP_CALL, Function->Type, Tree, Function, 0);
@ -438,25 +380,22 @@ struct ASTNode* CallFunction() {
* end with a COMPOSITE operation.
*
*/
struct ASTNode* GetExpressionList() {
struct ASTNode* ParseExpressionList(int terminate) {
struct ASTNode* Tree = NULL, * Child = NULL;
int Count = 0;
while (CurrentFile->CurrentSymbol.type != LI_RPARE) {
Safe();
while (CurrentFile->CurrentSymbol.type != terminate) {
Child = ParsePrecedenceASTNode(0);
Count++;
Safe();
Tree = ConstructASTNode(OP_COMP, PointerTo(RET_VOID), Tree, NULL, Child, NULL, Count);
switch (CurrentFile->CurrentSymbol.type) {
case LI_COM:
Tokenise();
break;
case LI_RPARE:
break;
default:
Die("Unexpected token in argument list");
}
if (CurrentFile->CurrentSymbol.type == terminate)
break;
VerifyToken(LI_COM, ",");
Safe();
}
return Tree;
@ -484,8 +423,8 @@ struct ASTNode* GetExpressionList() {
* @return the AST Node representing this single statement
*/
struct ASTNode* ParseStatement(void) {
int Type;
struct ASTNode* Node;
struct SymbolTableEntry* Composite;
printf("\t\tBranch leads to here, type %s/%d\r\n", TokenNames[CurrentFile->CurrentSymbol.type], CurrentFile->CurrentSymbol.type);
switch (CurrentFile->CurrentSymbol.type) {
@ -497,11 +436,14 @@ struct ASTNode* ParseStatement(void) {
case TY_CHAR:
case TY_LONG:
case TY_INT:
case KW_STRUCT:
case KW_UNION:
case KW_ENUM:
case KW_ALIAS:
printf("\t\tNew Variable: %s\n", CurrentIdentifier);
Type = ReadTypeOrKeyword(NULL);
VerifyToken(TY_IDENTIFIER, "ident");
BeginVariableDeclaration(Type, NULL, SC_LOCAL);
VerifyToken(LI_SEMIC, ";"); // TODO: single line assignment?
ParseDeclarationList(&Composite, SC_LOCAL, LI_SEMIC, LI_EOF);
VerifyToken(LI_SEMIC, ";");
Safe();
return NULL;
case KW_SWITCH:
@ -526,58 +468,13 @@ struct ASTNode* ParseStatement(void) {
return ContinueStatement();
default:
return ParsePrecedenceASTNode(0);
}
}
/*
* Handles parsing multiple statements or expressions in a row.
* These are typically grouped together with the Compound tokens "{ }"
* and seperated by the semicolon ";".
*
* Single Statements are parsed until a semicolon is reached, at which
* point another statement will be parsed, or until a Right Compound
* token is reached ("}"), at which point parsing will stop.
*
* It is useful for:
* * Tightly identifying related blocks of code
* * Containing the many statements of functions
*
* @return the AST Node representing this compound statement
*
*/
struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, * Tree;
while (1) {
printf("\tNew branch in compound\n");
Tree = ParseStatement();
if (Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN
|| Tree->Operation == OP_RET || Tree->Operation == OP_CALL
|| Tree->Operation == OP_BREAK || Tree->Operation == OP_CONTINUE))
Node = ParsePrecedenceASTNode(0);
VerifyToken(LI_SEMIC, ";");
if (Tree) {
if (Left == NULL)
Left = Tree;
else
Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0);
}
if (CurrentFile->CurrentSymbol.type == LI_RBRAC) {
fflush(stdout);
return Left;
}
if (CurrentFile->SwitchStatement && (CurrentFile->CurrentSymbol.type == KW_CASE || CurrentFile->CurrentSymbol.type == KW_DEFAULT)) {
return Left;
}
return Node;
}
}
/*
* This is the entry point to the parser/lexer.
*
@ -594,56 +491,16 @@ struct ASTNode* ParseCompound() {
*/
void ParseGlobals() {
struct ASTNode* Tree;
struct SymbolTableEntry* Composite;
int Type, FunctionComing;
printf("Parsing global definitions\r\n");
while (1) {
// We loop early if there's a struct, and since a struct may be the last
// thing in a file, we need to check for eof before anything else
if (CurrentFile->CurrentSymbol.type == LI_EOF)
break;
printf("New definition incoming..\r\n\n");
Type = ReadTypeOrKeyword(&Composite);
//TODO: converge pathways on this block?
if (CurrentFile->CurrentSymbol.type == KW_FUNC) {
VerifyToken(KW_FUNC, "::");
FunctionComing = 1;
}
// Structs are parsed fully in ParseOptionalPointer
// TODO: FIX THAT!!
if ((Type == DAT_STRUCT || Type == DAT_UNION || Type == DAT_ENUM || Type == DAT_ALIAS) && CurrentFile->CurrentSymbol.type == LI_SEMIC) {
Tokenise();
continue;
}
// If we read metadata or an import, then skip all processing.
if (Type == DAT_NONE)
continue;
VerifyToken(TY_IDENTIFIER, "ident");
if (FunctionComing && CurrentFile->CurrentSymbol.type == LI_LPARE) {
printf("\tParsing function\n");
Tree = ParseFunction(Type);
if (Tree && CurrentFile->AllowDefinitions) {
printf("\nBeginning assembler creation of new function %s\n", Tree->Symbol->Name);
Assembler->vtable->AssembleTree(Tree, -1, -1, -1, 0);
FreeLocals();
} else {
printf("\nFunction prototype saved\r\n");
}
} else {
printf("\tParsing global variable declaration\n");
BeginVariableDeclaration(Type, Composite, SC_GLOBAL);
VerifyToken(LI_SEMIC, ";");
}
while (CurrentFile->CurrentSymbol.type != LI_EOF) {
// Read in a declaration, or list thereof
ParseDeclarationList(&Composite, SC_GLOBAL, LI_SEMIC, LI_EOF);
// Consume semicolons if present
OptionallyConsume(LI_SEMIC);
}
}

View File

@ -57,92 +57,6 @@ int ValueAt(int Type) {
return (Type - 1);
}
/*
* Type declarations may be raw, they may be pointers.
* If they are pointers, we need to be able to check
* how many levels of indirection.
* However, being a pointer is optional.
*
* This can parase in just a lone type specifier, or
* any valid level of indirection therefore.
*
* @param Composite: unused
* @return the parsed DataType, with any indirection.
*
*/
int ReadTypeOrKeyword(struct SymbolTableEntry** Composite) {
int Type;
switch (CurrentFile->CurrentSymbol.type) {
case KW_IMPORT:
Type = DAT_NONE;
ImportModule();
break;
case TY_VOID:
Type = RET_VOID;
Tokenise();
break;
case TY_CHAR:
Type = RET_CHAR;
Tokenise();
break;
case TY_INT:
Type = RET_INT;
Tokenise();
break;
case TY_LONG:
Type = RET_LONG;
Tokenise();
break;
case TY_IDENTIFIER:
Type = ParseAlias(CurrentIdentifier, Composite);
break;
case KW_ALIAS:
Type = ReadAlias(Composite);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = DAT_ALIAS;
break;
case KW_ENUM:
Type = RET_INT;
BeginEnumDeclaration();
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = DAT_ENUM;
break;
case KW_STRUCT:
Type = DAT_STRUCT;
*Composite = BeginCompositeDeclaration(Type);
break;
case KW_UNION:
Type = DAT_UNION;
*Composite = BeginCompositeDeclaration(Type);
break;
default:
DieDecimal("Illegal type for pointerisation", CurrentFile->CurrentSymbol.type);
}
// Recursively scan more *s
// This makes things like:
// x = **y;
// possible.
while (1) {
// But, skip parsing if we're looking at an import.
if (CurrentFile->CurrentSymbol.type == KW_IMPORT)
break;
printf("\t\t\tType on parsing is %d\n", CurrentFile->CurrentSymbol.type);
if (CurrentFile->CurrentSymbol.type != AR_STAR)
break;
Type = PointerTo(Type);
Tokenise();
// Tokenise(); TODO: is this skipping pointers?
}
return Type;
}
/*
* Array Accesses come in the form of x[y].
*

View File

@ -8,59 +8,505 @@
#include <Data.h>
#include <stdbool.h>
static void ParseEnumDeclaration();
static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage);
static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType);
/*
* Handles parsing multiple statements or expressions in a row.
* These are typically grouped together with the Compound tokens "{ }"
* and seperated by the semicolon ";".
*
* Single Statements are parsed until a semicolon is reached, at which
* point another statement will be parsed, or until a Right Compound
* token is reached ("}"), at which point parsing will stop.
*
* It is useful for:
* * Tightly identifying related blocks of code
* * Containing the many statements of functions
*
* @return the AST Node representing this compound statement
*
*/
struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, * Tree;
while (1) {
printf("\tNew branch in compound\n");
Tree = ParseStatement();
/*if (Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN
|| Tree->Operation == OP_RET || Tree->Operation == OP_CALL
|| Tree->Operation == OP_BREAK || Tree->Operation == OP_CONTINUE))
VerifyToken(LI_SEMIC, ";"); */
Safe();
if (Tree) {
if (Left == NULL)
Left = Tree;
else
Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0);
}
if (CurrentFile->CurrentSymbol.type == LI_RBRAC) {
fflush(stdout);
return Left;
}
if (CurrentFile->SwitchStatement && (CurrentFile->CurrentSymbol.type == KW_CASE || CurrentFile->CurrentSymbol.type == KW_DEFAULT)) {
return Left;
}
}
}
/*
* Resolve a typename to a type struct.
* Short circuit on the case where a definition is present, as definitions are typeless.
*/
static int ParseType(struct SymbolTableEntry** CompositeType, int* Scope) {
int Type = -1, Extern = 1;
while (Extern) {
switch (CurrentFile->CurrentSymbol.type) {
default:
Extern = 0;
}
}
switch (CurrentFile->CurrentSymbol.type) {
case KW_IMPORT:
Type = -1;
ImportModule();
break;
case TY_VOID:
Type = RET_VOID;
Tokenise();
break;
case TY_CHAR:
Type = RET_CHAR;
Tokenise();
break;
case TY_INT:
Type = RET_INT;
Tokenise();
break;
case TY_LONG:
Type = RET_LONG;
Tokenise();
break;
case TY_IDENTIFIER:
case KW_ALIAS:
Type = ParseAliasDeclaration(CompositeType);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case KW_ENUM:
Type = RET_INT;
ParseEnumDeclaration();
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case KW_STRUCT:
Type = DAT_STRUCT;
*CompositeType = BeginCompositeDeclaration(Type);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case KW_UNION:
Type = DAT_UNION;
*CompositeType = BeginCompositeDeclaration(Type);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
default:
ErrorReport("Illegal type on token %s\n", CurrentFile->CurrentSymbol.type);
}
return Type;
}
/*
* Given a Type passed by ParseType, read following dereferences and return pointer type.
*/
static int ParsePointerType(int Type) {
while (1) {
// But, skip parsing if we're looking at an import.
if (CurrentFile->CurrentSymbol.type == KW_IMPORT)
break;
printf("\t\t\tType on parsing is %s\n", TokenNames[CurrentFile->CurrentSymbol.type]);
if (CurrentFile->CurrentSymbol.type != AR_STAR)
break;
Type = PointerTo(Type);
Tokenise();
}
return Type;
}
/*
* Parse a declaration of an array - the [ <int> ] part.
*
* @param name the name of the array
* @param Type the type of the array, if scalar
* @param CompositeType the type of the array, if composite
* @param Storage the storage class of the array
* @return the defined array symbol
*/
static struct SymbolTableEntry* ParseArrayDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
struct SymbolTableEntry* Symbol = NULL;
Tokenise();
Safe();
if (CurrentFile->CurrentSymbol.type == LI_INT) {
switch (Storage) {
case SC_GLOBAL:
Symbol = AddSymbol(name, PointerTo(Type), ST_ARR, Storage, CurrentFile->CurrentSymbol.value, 0, CompositeType);
break;
case SC_LOCAL:
case SC_PARAM:
case SC_MEMBER:
default:
ErrorReport("Local array definitions not permitted.\n");
}
}
Tokenise();
VerifyToken(LI_RBRAC, "]");
Safe();
return Symbol;
}
// A short redirect to add a Scalar definition to the variable tables.
static struct SymbolTableEntry* ParseScalarDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
return AddSymbol(name, Type, ST_VAR, Storage, 1, 0, CompositeType);
}
/*
* Handles reading in a comma-separated list of declarations.
* Erythro treats structs, enums and function parameters the same in this regard -
* comma separated.
*
*
* C and C++ tend to treat enums and structs differently - the former separated by commas,
* the latter separated by semicolons.
*
*
* Note that since functions are read in through parentheses, and structs/enums are read in
* through brackets, the end character is configurable.
*
* @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable.
* @param Storage: The Storage Scope of this declaration list.
* @param End: The end token, in terms of TokenTypes enum values.
* @return the amount of declarations read in.
*
*
* Parse declarations, including lists thereof, until the Terminate symbol is encountered.
* Will first parse a type name, then parse the identifier using ParseSymbolDeclaration.
* Declaration lists must be separated by a comma or terminated with the StatementEndSymbol.
*
* @param CompositeType out: the type of the declaration list.
* @param ClassType the type of the class
* @param StatementEndSymbool the symbol that marks the end of the declaration list
* @param TerminateSymbol the symbol that marks the end of parsing
* @return the type of the declaration
*
*/
int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbool, int TerminateSymbol) {
static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) {
int initType, type;
struct SymbolTableEntry* symbol;
fflush(stdout);
if ((initType = ParseType(CompositeType, &ClassType)) == -1)
return initType;
while (1) {
type = ParsePointerType(initType);
symbol = ParseDeclarationSymbol(type, *CompositeType, ClassType);
printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, type, ScopeNames[ClassType]);
// Lists of function declarations are not valid.
if (symbol->Type == ST_FUNC) {
if (ClassType != SC_GLOBAL)
ErrorReport("Function definition not at global scope\n");
return type;
}
// Terminate at either symbol
if (CurrentFile->CurrentSymbol.type == StatementEndSymbool || CurrentFile->CurrentSymbol.type == TerminateSymbol)
return type;
// We must be continuing the list, so parse a comma
VerifyToken(LI_COM, ",");
}
}
/*
* Parse the full list of parameter declarations.
* Each has a type, a name, may be a pointer, or an array.
*
* @param FunctionDeclaration the type of the declaration of the function, if declared already.
* @param FunctionDefinition the type of the definition of the function, which we are parsing
* @return the number of parameters parsed
*/
static int ParseParameterDeclarationList(struct SymbolTableEntry* FunctionDeclaration, struct SymbolTableEntry* FunctionDefinition) {
int TokenType, ParamCount = 0;
struct SymbolTableEntry* PrototypePointer = NULL, * Composite;
if (FunctionSymbol != NULL)
PrototypePointer = FunctionSymbol->Start;
if (FunctionDeclaration != NULL)
PrototypePointer = FunctionDeclaration->Start;
while (CurrentFile->CurrentSymbol.type != End) {
TokenType = ReadTypeOrKeyword(&Composite);
VerifyToken(TY_IDENTIFIER, "identifier");
while (CurrentFile->CurrentSymbol.type != LI_RPARE) {
// Doing int x, y, float z is valid, so parse a list of declarations per parameter.
TokenType = ParseDeclarationList(&Composite, SC_PARAM, LI_COM, LI_RPARE);
if (TokenType == -1)
ErrorReport("Bad type in parameter list");
printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, TokenType, ScopeNames[Storage]);
printf("\tReading a new parameter: %s of type %d\n", CurrentIdentifier, TokenType);
if (PrototypePointer != NULL) {
if (TokenType != PrototypePointer->Type)
DieDecimal("Function parameter of invalid type at index", ParamCount + 1);
ErrorReport("Function parameter has invalid type at index %d\n", ParamCount + 1);
PrototypePointer = PrototypePointer->NextSymbol;
} else {
BeginVariableDeclaration(TokenType, Composite, Storage);
}
Safe();
ParamCount++;
if ((CurrentFile->CurrentSymbol.type != LI_COM) && (CurrentFile->CurrentSymbol.type != End))
DieDecimal("Unexpected token in parameter", CurrentFile->CurrentSymbol.type);
if (CurrentFile->CurrentSymbol.type == LI_RPARE)
break;
if (CurrentFile->CurrentSymbol.type == LI_COM)
Tokenise();
VerifyToken(LI_COM, ",");
Safe();
}
if ((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length))
DieMessage("Invalid number of parameters in prototyped function", FunctionSymbol->Name);
if ((FunctionDeclaration != NULL) && (ParamCount != FunctionDeclaration->Length))
ErrorReport("Function definition has different number of parameters than the function declaration (%d vs %d).\n", ParamCount, FunctionDeclaration->Length);
return ParamCount;
}
/*
* Parse a function declaration, and optionally definition.
* <type> <identifier> ( parameter(,?)* ) ;
* <type> <identiier> ( parameter(,?)* ) compound ;
*
* @param name the name of the function
* @param Type the type of the function, if primitive
* @param CompositeType the type of the function, if composite
* @param Storage the scope of the function
* @return the new symbol table entry for the function
*/
static struct SymbolTableEntry* ParseFunctionDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
struct ASTNode* Tree;
struct ASTNode* FinalStatement;
struct SymbolTableEntry* OldFunction, * NewFunction = NULL;
int BreakLabel = 0, ParamCount = 0;
VerifyToken(KW_FUNC, "::");
Safe();
VerifyToken(TY_IDENTIFIER, "Identifier");
Safe();
if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL)
if (OldFunction->Storage != ST_FUNC)
OldFunction = NULL;
if (OldFunction == NULL) {
BreakLabel = Assembler->vtable->NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
}
VerifyToken(LI_LPARE, "(");
Safe();
ParamCount = ParseParameterDeclarationList(OldFunction, NewFunction);
VerifyToken(LI_RPARE, ")");
Safe();
printf("\nIdentified%sfunction %s of return type %s, end label %d\n",
(OldFunction == NULL) ? " new " : " overloaded ",
(OldFunction == NULL) ? NewFunction->Name : OldFunction->Name,
TypeNames(Type), BreakLabel);
if (NewFunction) {
NewFunction->Elements = ParamCount;
NewFunction->Start = Params;
NewFunction->Type = RET_LONG;
OldFunction = NewFunction;
}
Params = ParamsEnd = NULL;
if (CurrentFile->CurrentSymbol.type == LI_SEMIC) {
return OldFunction;
}
CurrentFile->FunctionEntry = OldFunction;
CurrentFile->CurrentLoopDepth = 0;
VerifyToken(LI_LBRAC, "{");
Safe();
Tree = ParseCompound();
Safe();
VerifyToken(LI_RBRAC, "}");
if (Type != RET_VOID) {
// Functions with one statement have no composite node, so we have to check
FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree;
if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) {
ErrorReport("Function with non-void type does not return");
}
}
Tree = ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel);
if (Tree && CurrentFile->AllowDefinitions) {
printf("\nBeginning assembler creation of new function %s\n", Tree->Symbol->Name);
if (OptDumpTree) {
DumpTree(Tree, 0);
fprintf(stdout, "\n\n");
}
// Emit the function now
Assembler->vtable->AssembleTree(Tree, -1, -1, -1, 0);
FreeLocals();
} else {
printf("\nFunction prototype saved\r\n");
}
Safe();
return OldFunction;
}
/*
* The "alias" keyword allows one to create a new keyword that is accepted in lieu of another (or a chain of another)
* It does this by reading in sequence:
* * The "alias" keyword
* * The thing to alias (any valid primary type)
* * The new name
*
* They are stored in a separate symbol table and can be used anywhere the original is valid.
*/
static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType) {
int Type, Storage = 0;
// "alias"
Tokenise();
Safe();
Type = ParseType(CompositeType, &Storage);
if (Storage != 0)
ErrorReport("Cannot extern an alias definition.\n");
if (FindAlias(CurrentIdentifier) != NULL)
ErrorReport("Duplicate type alias.\n");
// It may be a pointer definition
Type = ParsePointerType(Type);
AddSymbol(CurrentIdentifier, Type, ST_VAR, SC_ALIAS, 0, 0, *CompositeType);
Tokenise();
Safe();
return Type;
}
/*
* Get the type that a typedef declaration aliases.
* @param name the name of the typedef
* @param CompositeType out: the type if composite
* @return the type if scalar
*/
static int GetTypedef(char* name, struct SymbolTableEntry** CompositeType) {
struct SymbolTableEntry* type;
type = FindAlias(name);
if (type == NULL)
ErrorReport("Unknown alias type");
Tokenise();
Safe();
*CompositeType = type->CompositeType;
return type->Type;
}
/*
* Parse an array initialization.
* Everything after the =, for example.
* Every element must match the type of the array, and the number of elements must match the size of the array.
* @param Symbol the symbol of the array we're initializing
* @param Type the type of the array, if primitive
* @param CompositeType the type of the array, if composite
* @param Storage the storage class of the array we're initializing
*/
static void ParseArrayInitialization(struct SymbolTableEntry* Symbol, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
ErrorReport("Array initialization not permitted.\n");
}
/*
* Parse a name symbol for a declaration.
* Calls out to parse functions, arrays and scalars alike.
* Also parses an inline initialization if present.
*
* @param Type the type of the declaration, if primitive
* @param CompositeType a reference to the type, if composite (struct)
* @param Storage the storage class of the declaration
* @return the symbol table entry to the new symbol
*/
static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage) {
struct SymbolTableEntry* symbol = NULL;
char* variableName = strdup(CurrentIdentifier);
int structureType = ST_VAR;
Safe();
if(CurrentFile->CurrentSymbol.type == KW_FUNC)
return ParseFunctionDeclaration(variableName, Type, CompositeType, Storage);
VerifyToken(TY_IDENTIFIER, "Identifier");
// Check for duplicate declarations
switch (Storage) {
case SC_GLOBAL:
if (FindGlobal(variableName) != NULL)
ErrorReport("Duplicate global declaration\n");
case SC_LOCAL:
case SC_PARAM:
if (FindLocal(variableName) != NULL)
ErrorReport("Duplicate local declaration\n");
case SC_MEMBER:
if (FindMember(variableName) != NULL)
ErrorReport("Duplicate member declaration\n");
default: break;
}
// Determine whether this is an array or scalar.
if (CurrentFile->CurrentSymbol.type == LI_LBRAC) {
symbol = ParseArrayDeclaration(variableName, Type, CompositeType, Storage);
structureType = ST_ARR;
} else {
symbol = ParseScalarDeclaration(variableName, Type, CompositeType, Storage);
}
// Determine whether we're initializing immediately
if (CurrentFile->CurrentSymbol.type == LI_EQUAL) {
// TODO: Default parameters
if (Storage == SC_PARAM)
ErrorReport("Initialization of parameter not permitted.\n");
// TODO: Enum initialization
if (Storage == SC_MEMBER)
ErrorReport("Initialization of a member not permitted.\n");
Tokenise();
if (structureType == ST_ARR) {
ParseArrayInitialization(symbol, Type, CompositeType, Storage);
} else {
// TODO: Inline initialization
ErrorReport("Initialization of a scalar not permitted.\n");
}
}
return symbol;
}
/*
* Handles the declaration of a new composite type.
* For example, a struct is a composite of multiple different named positions:
@ -75,10 +521,12 @@ static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Stor
*/
struct SymbolTableEntry* BeginCompositeDeclaration(int Type) {
struct SymbolTableEntry* Composite = NULL, * Member;
struct SymbolTableEntry* Composite = NULL, *Member;
int Offset = 0, Largest = 0;
// "struct" / "union"
Tokenise();
Safe();
if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) {
Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier);
@ -87,19 +535,35 @@ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) {
if (CurrentFile->CurrentSymbol.type != LI_LBRAC) {
if (Composite == NULL)
DieMessage("Unknown Struct", CurrentIdentifier);
ErrorReport("Use of undefined composite");
return Composite;
}
if (Composite)
DieMessage("Redefinition of composite", CurrentIdentifier);
ErrorReport("Redefinition of composite");
Composite = AddSymbol(CurrentIdentifier, Type, ST_RUCT, Type == DAT_STRUCT ? SC_STRUCT : SC_UNION, 0, 0, NULL);
Tokenise();
Safe();
printf("Reading a composite declaration.. Type is %s\n", Type == DAT_STRUCT ? "struct" : "union");
ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAC);
while (1) {
Type = ParseDeclarationList(&Member, SC_MEMBER, LI_SEMIC,LI_RBRAC);
if (Type == -1)
ErrorReport("Bad type in member list of composite\n");
OptionallyConsume(LI_SEMIC);
Safe();
if (CurrentFile->CurrentSymbol.type == LI_RBRAC)
break;
}
VerifyToken(LI_RBRAC, "}");
if (CompositeMembers == NULL)
ErrorReport("No members in struct.\n");
Composite->Start = CompositeMembers;
CompositeMembers = CompositeMembersEnd = NULL;
@ -123,12 +587,14 @@ struct SymbolTableEntry* BeginCompositeDeclaration(int Type) {
return Composite;
}
void BeginEnumDeclaration() {
static void ParseEnumDeclaration() {
struct SymbolTableEntry* Type = NULL;
char* Name;
int Value = 0;
// "enum"
Tokenise();
Safe();
// enum name
if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) {
@ -137,19 +603,20 @@ void BeginEnumDeclaration() {
Tokenise();
}
// enum name {? if not, enum name var.
// We're expecting to declare an enum, so make sure the content follows.
if (CurrentFile->CurrentSymbol.type != LI_LBRAC) {
if (Type == NULL)
DieMessage("Undeclared Enum", Name);
ErrorReport("Enum used but not yet declared.\n");
return;
}
// Skip the { that we have
Tokenise();
Safe();
if (Type != NULL)
DieMessage("Attempting to redefine enum", Type->Name);
ErrorReport("Enum redeclared.\n");
else
Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUM, 0, 0, NULL);
@ -159,187 +626,58 @@ void BeginEnumDeclaration() {
Type = FindEnumMember(Name);
if (Type != NULL)
DieMessage("Attempting to redeclare enum value", Name);
ErrorReport("Enum value already declared\n");
Safe();
// Parse equality
if (CurrentFile->CurrentSymbol.type == LI_EQUAL) {
Tokenise();
// Expect a number after the equals
if (CurrentFile->CurrentSymbol.type != LI_INT)
Die("Expected integer to assign enum value to");
ErrorReport("Expected integer in enum assignment\n");
Value = CurrentFile->CurrentSymbol.value;
// int
Tokenise();
Safe();
}
Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUMENTRY, Value++, 0, NULL);
// Break on right brace
if (CurrentFile->CurrentSymbol.type == LI_RBRAC)
break;
VerifyToken(LI_COM, "Comma");
Safe();
}
// Skip right brace
Tokenise();
}
/*
* Handles the declaration of a type of a variable.
* int newVar;
*
* It verifies that we have a type keyword followed by a
* unique, non-keyword identifier.
*
* It then stores this variable into the appropriate symbol table,
* and returns the new item.
*
* @return the Symbol Table entry of this new variable.
*/
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) {
struct SymbolTableEntry* Symbol = NULL;
switch (Scope) {
case SC_GLOBAL:
if (FindGlobal(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of global variable", CurrentIdentifier);
case SC_LOCAL:
case SC_PARAM:
if (FindLocal(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of local variable", CurrentIdentifier);
case SC_MEMBER:
if (FindMember(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier);
}
if (CurrentFile->CurrentSymbol.type == LI_LBRAS) {
Tokenise();
if (CurrentFile->CurrentSymbol.type == LI_INT) {
switch (Scope) {
case SC_GLOBAL:
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL);
break;
case SC_LOCAL:
case SC_PARAM:
case SC_MEMBER:
Die("Local arrays are unimplemented");
}
}
Tokenise();
VerifyToken(LI_RBRAS, "]");
} else {
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite);
}
return Symbol;
}
/*
* Handles the declaration of a new function.
* Verifies that the identifier is not taken (excluding the case
* where there is a declaration but no definition)
* Parses the list of parameters if present
* Saves the function prototype if there is no body
* Generates and saves the break-out point label
*
* @param Type: The return type of the function
* @return the AST for this function
*
*/
struct ASTNode* ParseFunction(int Type) {
struct ASTNode* Tree;
struct ASTNode* FinalStatement;
struct SymbolTableEntry* OldFunction, * NewFunction = NULL;
int BreakLabel = 0, ParamCount = 0;
if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL)
if (OldFunction->Storage != ST_FUNC)
OldFunction = NULL;
if (OldFunction == NULL) {
BreakLabel = Assembler->vtable->NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
}
VerifyToken(LI_LPARE, "(");
ParamCount = ReadDeclarationList(OldFunction, SC_PARAM, LI_RPARE);
VerifyToken(LI_RPARE, ")");
printf("\nIdentified%sfunction %s of return type %s, end label %d\n",
(OldFunction == NULL) ? " new " : " overloaded ",
(OldFunction == NULL) ? NewFunction->Name : OldFunction->Name,
TypeNames(Type), BreakLabel);
if (NewFunction) {
NewFunction->Elements = ParamCount;
NewFunction->Start = Params;
NewFunction->Type = RET_LONG;
OldFunction = NewFunction;
}
Params = ParamsEnd = NULL;
if (CurrentFile->CurrentSymbol.type == LI_SEMIC) {
Tokenise();
return NULL;
}
CurrentFile->FunctionEntry = OldFunction;
CurrentFile->CurrentLoopDepth = 0;
VerifyToken(LI_LBRAC, "{");
Tree = ParseCompound();
VerifyToken(LI_RBRAC, "}");
if (Type != RET_VOID) {
// Functions with one statement have no composite node, so we have to check
FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree;
if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) {
Die("Function with non-void type does not return");
}
}
return ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel);
}
/*
* Handles the logic for return.
* //TODO: No brackets
* //TODO: Type inference
*
*/
struct ASTNode* ReturnStatement() {
struct ASTNode* Tree;
int ReturnType;
if (CurrentFile->FunctionEntry->Type == RET_VOID)
Die("Attempt to return from void function");
VerifyToken(KW_RETURN, "return");
if (CurrentFile->FunctionEntry->Type == RET_VOID)
ErrorReport("Attempt to return from void function");
bool bracketed = OptionallyConsume(LI_LPARE);
Safe();
Tree = ParsePrecedenceASTNode(0);
Tree = MutateType(Tree, CurrentFile->FunctionEntry->Type, 0);
if (Tree == NULL)
Die("Returning a value of incorrect type for function");
ErrorReport("Returning a value of incorrect type for function. Expected %s.\n", TypeNames(CurrentFile->FunctionEntry->Type));
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, CurrentFile->FunctionEntry, 0);
printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name);
if (bracketed) VerifyToken(LI_RPARE, ")");
Safe();
VerifyToken(LI_SEMIC, ";");
return Tree;
}
@ -480,7 +818,7 @@ struct ASTNode* ForStatement() {
VerifyToken(KW_FOR, "for");
VerifyToken(LI_LPARE, "(");
Preop = ParseStatement();
Preop = ParseExpressionList(LI_SEMIC);
VerifyToken(LI_SEMIC, ";");
Condition = ParsePrecedenceASTNode(0);
@ -490,7 +828,7 @@ struct ASTNode* ForStatement() {
VerifyToken(LI_SEMIC, ";");