Erythro/src/Statements.c
2023-12-08 03:44:42 +00:00

1339 lines
41 KiB
C

/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#include <Data.h>
#include <stdbool.h>
static void ParseEnumDeclaration();
static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage, struct ASTNode** Tree);
static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType);
/*
* Handles parsing multiple statements or expressions in a row.
* These are typically grouped together with the Compound tokens "{ }"
* and seperated by the semicolon ";".
*
* Single Statements are parsed until a semicolon is reached, at which
* point another statement will be parsed, or until a Right Compound
* token is reached ("}"), at which point parsing will stop.
*
* It is useful for:
* * Tightly identifying related blocks of code
* * Containing the many statements of functions
*
* @return the AST Node representing this compound statement
*
*/
struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, * Tree;
while (1) {
printf("\tNew branch in compound\n");
Tree = ParseStatement();
/*if (Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN
|| Tree->Operation == OP_RET || Tree->Operation == OP_CALL
|| Tree->Operation == OP_BREAK || Tree->Operation == OP_CONTINUE))
VerifyToken(LI_SEMIC, ";"); */
Safe();
if (Tree) {
if (Left == NULL)
Left = Tree;
else
Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, NULL, 0);
}
if (CurrentFile->CurrentSymbol.type == LI_RBRAC) {
fflush(stdout);
return Left;
}
if (CurrentFile->SwitchStatement && (CurrentFile->CurrentSymbol.type == KW_CASE || CurrentFile->CurrentSymbol.type == KW_DEFAULT)) {
return Left;
}
}
}
/*
* Read a literal of the given type.
* @param Type integer or char*
* @return the integer literal, or label value of the string.
*/
int ParseLiteral(int Type) {
if ((Type == PointerTo(RET_CHAR)) && (CurrentFile->CurrentSymbol.type == LI_STR))
return Assembler->vtable->AsNewString(CurrentIdentifier);
if (CurrentFile->CurrentSymbol.type == LI_INT) {
switch (Type) {
case RET_CHAR:
if (CurrentFile->CurrentSymbol.value < 0 || CurrentFile->CurrentSymbol.value > 255)
ErrorReport("Integer literal value too big for char\n");
case RET_INT:
case RET_LONG:
break;
default: ErrorReport("Type Mismatch. Integer Literal vs Variable.\n");
}
} else {
ErrorReport("Expecting an integer literal or char array.\n");
}
return CurrentFile->CurrentSymbol.value;
}
/*
* Get the type that a typedef declaration aliases.
* @param name the name of the typedef
* @param CompositeType out: the type if composite
* @return the type if scalar
*/
static int GetTypedef(char* name, struct SymbolTableEntry** CompositeType) {
struct SymbolTableEntry* type;
type = FindAlias(name);
if (type == NULL)
ErrorReport("Unknown alias type: %s\n", name);
Tokenise();
Safe();
*CompositeType = type->CompositeType;
return type->Type;
}
/*
* Resolve a typename to a type struct.
* Short circuit on the case where a definition is present, as definitions are typeless.
*/
static int ParseType(struct SymbolTableEntry** CompositeType, int* Scope) {
int Type = -1, Extern = 1;
while (Extern) {
switch (CurrentFile->CurrentSymbol.type) {
default:
Extern = 0;
}
}
switch (CurrentFile->CurrentSymbol.type) {
case KW_IMPORT:
Type = -1;
ImportModule();
break;
case TY_VOID:
Type = RET_VOID;
Tokenise();
break;
case TY_CHAR:
Type = RET_CHAR;
Tokenise();
break;
case TY_INT:
Type = RET_INT;
Tokenise();
break;
case TY_LONG:
Type = RET_LONG;
Tokenise();
break;
case KW_ALIAS:
Type = ParseAliasDeclaration(CompositeType);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case KW_ENUM:
Type = RET_INT;
ParseEnumDeclaration();
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case KW_STRUCT:
Type = DAT_STRUCT;
*CompositeType = BeginCompositeDeclaration(Type);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case KW_UNION:
Type = DAT_UNION;
*CompositeType = BeginCompositeDeclaration(Type);
if (CurrentFile->CurrentSymbol.type == LI_SEMIC)
Type = -1;
break;
case TY_IDENTIFIER:
Type = GetTypedef(CurrentIdentifier, CompositeType);
break;
default:
ErrorReport("Illegal type on token %s\n", TokenNames[CurrentFile->CurrentSymbol.type]);
}
return Type;
}
/*
* Given a Type passed by ParseType, read following dereferences and return pointer type.
*/
static int ParsePointerType(int Type) {
while (1) {
// But, skip parsing if we're looking at an import.
if (CurrentFile->CurrentSymbol.type == KW_IMPORT)
break;
printf("\t\t\tType on parsing is %s\n", TokenNames[CurrentFile->CurrentSymbol.type]);
if (CurrentFile->CurrentSymbol.type != AR_STAR)
break;
Type = PointerTo(Type);
Tokenise();
}
return Type;
}
/*
* Parse a declaration of an array - the [ <int> ] part.
*
* @param name the name of the array
* @param Type the type of the array, if scalar
* @param CompositeType the type of the array, if composite
* @param Storage the storage class of the array
* @return the defined array symbol
*/
static struct SymbolTableEntry* ParseArrayDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
struct SymbolTableEntry* Symbol = NULL;
int Elems = -1, MaxElems, *InitialList, i = 0, j = 0;
Tokenise();
Safe();
if (CurrentFile->CurrentSymbol.type == LI_INT) {
if (CurrentFile->CurrentSymbol.value <= 0)
ErrorReport("Array size in definition cannot be negative.\n");
Elems = CurrentFile->CurrentSymbol.value;
Tokenise();
}
VerifyToken(LI_RBRAS, "]");
Safe();
switch (Storage) {
case SC_GLOBAL:
Symbol = AddSymbol(name, PointerTo(Type), ST_ARR, SC_GLOBAL, 0, 0, CompositeType);
break;
default:
ErrorReport("Local array declaration not supported\n");
}
if (CurrentFile->CurrentSymbol.type == LI_EQUAL) {
if (Storage != SC_GLOBAL)
ErrorReport("Non-global array cannot be initialized.\n");
Tokenise();
Safe();
VerifyToken(LI_LBRAC, "{");
if (Elems != -1)
MaxElems = Elems;
else
MaxElems = 10;
InitialList = (int*)malloc(MaxElems * sizeof(int));
while (1) {
if (Elems != -1 && i == MaxElems)
ErrorReport("Too many items in initializer list\n");
InitialList[i++] = ParseLiteral(Type);
Tokenise();
Safe();
if (Elems == -1 && i == MaxElems) {
MaxElems += 10;
InitialList = (int*)realloc(InitialList, MaxElems * sizeof(int));
}
if (CurrentFile->CurrentSymbol.type == LI_RBRAC) {
Tokenise();
break;
}
VerifyToken(LI_COM, ",");
Safe();
}
for (j = i; j < Symbol->Length; j++)
InitialList[j] = 0;
if (i > Elems)
Elems = i;
Symbol->InitialValues = InitialList;
}
Symbol->Length = Elems;
Symbol->Size = Symbol->Length * TypeSize(Type, CompositeType);
if (Storage == SC_GLOBAL)
Assembler->vtable->AsGlobalSymbol(Symbol);
return Symbol;
}
// A short redirect to add a Scalar definition to the variable tables.
static struct SymbolTableEntry* ParseScalarDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage, struct ASTNode** Tree) {
struct SymbolTableEntry* sym = AddSymbol(name, Type, ST_VAR, Storage, 1, 0, CompositeType);
struct ASTNode* var, *expr;
// Being assigned.
if (CurrentFile->CurrentSymbol.type == LI_EQUAL) {
if (Storage != SC_GLOBAL && Storage != SC_LOCAL)
ErrorReport("Non-static, non-local variable cannot be initialized.\n");
Tokenise();
Safe();
if (Storage == SC_GLOBAL) {
sym->InitialValues = (int*) malloc(sizeof(int));
sym->InitialValues[0] = ParseLiteral(Type);
Tokenise();
}
else if (Storage == SC_LOCAL) {
var = ConstructASTLeaf(REF_IDENT, sym->Type, sym, 0);
expr = ParsePrecedenceASTNode(0);
expr->RVal = 1;
expr = MutateType(expr, var->ExprType, 0);
if (expr == NULL)
ErrorReport("Incompatible types in assignment: %s, %s\n", TypeNames(expr->ExprType), TypeNames(var->ExprType));
*Tree = ConstructASTNode(OP_ASSIGN, expr->ExprType, expr, NULL, var, NULL, 0);
}
}
if (Storage == SC_GLOBAL)
Assembler->vtable->AsGlobalSymbol(sym);
return sym;
}
/*
* Handles reading in a comma-or-semicolon separated list of declarations.
* Erythro treats structs, enums and function parameters the same in this regard -
* comma separated.
*
* C and C++ tend to treat enums and structs differently - the former separated by commas,
* the latter separated by semicolons.
*
* Note that since functions are read in through parentheses, and structs/enums are read in
* through brackets, the end character is configurable.
*
* Parse declarations, including lists thereof, until the Terminate symbol is encountered.
* Will first parse a type name, then parse the identifier using ParseSymbolDeclaration.
* Declaration lists must be separated by a comma or terminated with the StatementEndSymbol.
*
* @param CompositeType out: the type of the declaration list.
* @param ClassType the type of the class
* @param StatementEndSymbol the symbol that marks the end of the declaration list
* @param TerminateSymbol the symbol that marks the end of parsing
* @return the type of the declaration
*
*/
int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbol, int TerminateSymbol, struct ASTNode** CompoundTree) {
int initType, type;
struct SymbolTableEntry* symbol;
struct ASTNode* Tree;
*CompoundTree = NULL;
fflush(stdout);
if ((initType = ParseType(CompositeType, &ClassType)) == -1)
return initType;
while (1) {
type = ParsePointerType(initType);
symbol = ParseDeclarationSymbol(type, *CompositeType, ClassType, &Tree);
printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, type, ScopeNames[ClassType]);
// Lists of function declarations are not valid.
if (symbol->Type == ST_FUNC) {
if (ClassType != SC_GLOBAL)
ErrorReport("Function definition not at global scope\n");
return type;
}
if (*CompoundTree == NULL)
*CompoundTree = Tree;
else
*CompoundTree = ConstructASTNode(OP_COMP, RET_NONE, *CompoundTree, NULL, Tree, NULL, 0);
// Terminate at either symbol
if (CurrentFile->CurrentSymbol.type == StatementEndSymbol || CurrentFile->CurrentSymbol.type == TerminateSymbol)
return type;
// We must be continuing the list, so parse a comma
VerifyToken(LI_COM, ",");
}
}
/*
* Parse the full list of parameter declarations.
* Each has a type, a name, may be a pointer, or an array.
*
* @param FunctionDeclaration the type of the declaration of the function, if declared already.
* @param FunctionDefinition the type of the definition of the function, which we are parsing
* @return the number of parameters parsed
*/
static int ParseParameterDeclarationList(struct SymbolTableEntry* FunctionDeclaration, struct SymbolTableEntry* FunctionDefinition) {
int TokenType, ParamCount = 0;
struct SymbolTableEntry* PrototypePointer = NULL, * Composite;
struct ASTNode* empty;
if (FunctionDeclaration != NULL)
PrototypePointer = FunctionDeclaration->Start;
while (CurrentFile->CurrentSymbol.type != LI_RPARE) {
// Doing int x, y, float z is valid, so parse a list of declarations per parameter.
TokenType = ParseDeclarationList(&Composite, SC_PARAM, LI_COM, LI_RPARE, &empty);
if (TokenType == -1)
ErrorReport("Bad type in parameter list");
printf("\tReading a new parameter: %s of type %d\n", CurrentIdentifier, TokenType);
if (PrototypePointer != NULL) {
if (TokenType != PrototypePointer->Type)
ErrorReport("Function parameter has invalid type at index %d\n", ParamCount + 1);
PrototypePointer = PrototypePointer->NextSymbol;
}
Safe();
ParamCount++;
if (CurrentFile->CurrentSymbol.type == LI_RPARE)
break;
VerifyToken(LI_COM, ",");
Safe();
}
if ((FunctionDeclaration != NULL) && (ParamCount != FunctionDeclaration->Length))
ErrorReport("Function definition has different number of parameters than the function declaration (%d vs %d).\n", ParamCount, FunctionDeclaration->Length);
return ParamCount;
}
/*
* Parse a function declaration, and optionally definition.
* <type> <identifier> ( parameter(,?)* ) ;
* <type> <identiier> ( parameter(,?)* ) compound ;
*
* @param name the name of the function
* @param Type the type of the function, if primitive
* @param CompositeType the type of the function, if composite
* @param Storage the scope of the function
* @return the new symbol table entry for the function
*/
static struct SymbolTableEntry* ParseFunctionDeclaration(char* name, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
struct ASTNode* Tree;
struct ASTNode* FinalStatement;
struct SymbolTableEntry* OldFunction, * NewFunction = NULL;
int BreakLabel = 0, ParamCount = 0;
VerifyToken(KW_FUNC, "::");
Safe();
VerifyToken(TY_IDENTIFIER, "Identifier");
Safe();
if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL)
if (OldFunction->Storage != ST_FUNC)
OldFunction = NULL;
if (OldFunction == NULL) {
BreakLabel = Assembler->vtable->NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
}
VerifyToken(LI_LPARE, "(");
Safe();
ParamCount = ParseParameterDeclarationList(OldFunction, NewFunction);
VerifyToken(LI_RPARE, ")");
Safe();
printf("\nIdentified%sfunction %s of return type %s, end label %d\n",
(OldFunction == NULL) ? " new " : " overloaded ",
(OldFunction == NULL) ? NewFunction->Name : OldFunction->Name,
TypeNames(Type), BreakLabel);
if (NewFunction) {
NewFunction->Length = ParamCount;
NewFunction->Start = Params;
NewFunction->Type = RET_LONG;
OldFunction = NewFunction;
}
Params = ParamsEnd = NULL;
if (CurrentFile->CurrentSymbol.type == LI_SEMIC) {
return OldFunction;
}
CurrentFile->FunctionEntry = OldFunction;
CurrentFile->CurrentLoopDepth = 0;
VerifyToken(LI_LBRAC, "{");
Safe();
Tree = ParseCompound();
Safe();
VerifyToken(LI_RBRAC, "}");
if (Type != RET_VOID) {
// Functions with one statement have no composite node, so we have to check
FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree;
if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) {
ErrorReport("Function with non-void type does not return");
}
}
Tree = ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel);
if (Tree && CurrentFile->AllowDefinitions) {
printf("\nBeginning assembler creation of new function %s\n", Tree->Symbol->Name);
if (OptDumpTree) {
DumpTree(Tree, 0);
fprintf(stdout, "\n\n");
}
// Emit the function now
Assembler->vtable->AssembleTree(Tree, -1, -1, -1, 0);
FreeLocals();
} else {
printf("\nFunction prototype saved\r\n");
}
Safe();
return OldFunction;
}
/*
* The "alias" keyword allows one to create a new keyword that is accepted in lieu of another (or a chain of another)
* It does this by reading in sequence:
* * The "alias" keyword
* * The thing to alias (any valid primary type)
* * The new name
*
* They are stored in a separate symbol table and can be used anywhere the original is valid.
*/
static int ParseAliasDeclaration(struct SymbolTableEntry** CompositeType) {
int Type, Storage = 0;
// "alias"
Tokenise();
Safe();
Type = ParseType(CompositeType, &Storage);
if (Storage != 0)
ErrorReport("Cannot extern an alias definition.\n");
if (FindAlias(CurrentIdentifier) != NULL)
ErrorReport("Duplicate type alias.\n");
// It may be a pointer definition
Type = ParsePointerType(Type);
AddSymbol(CurrentIdentifier, Type, ST_VAR, SC_ALIAS, 0, 0, *CompositeType);
Tokenise();
Safe();
return Type;
}
/*
* Parse an array initialization.
* Everything after the =, for example.
* Every element must match the type of the array, and the number of elements must match the size of the array.
* @param Symbol the symbol of the array we're initializing
* @param Type the type of the array, if primitive
* @param CompositeType the type of the array, if composite
* @param Storage the storage class of the array we're initializing
*/
static void ParseArrayInitialization(struct SymbolTableEntry* Symbol, int Type, struct SymbolTableEntry* CompositeType, int Storage) {
ErrorReport("Array initialization not permitted.\n");
}
static char* copyString(char* str) {
size_t len = strlen(str);
char *dst = malloc(len + 1); // Space for length plus nul
if (dst == NULL) return NULL; // No memory
strcpy(dst, str); // Copy the characters
return dst; // Return the new string
}
/*
* Parse a name symbol for a declaration.
* Calls out to parse functions, arrays and scalars alike.
* Also parses an inline initialization if present.
*
* @param Type the type of the declaration, if primitive
* @param CompositeType a reference to the type, if composite (struct)
* @param Storage the storage class of the declaration
* @return the symbol table entry to the new symbol
*/
static struct SymbolTableEntry* ParseDeclarationSymbol(int Type, struct SymbolTableEntry* CompositeType, int Storage, struct ASTNode** Tree) {
struct SymbolTableEntry* symbol = NULL;
char* variableName = copyString(CurrentIdentifier);
int structureType = ST_VAR;
Safe();
if(CurrentFile->CurrentSymbol.type == KW_FUNC)
return ParseFunctionDeclaration(variableName, Type, CompositeType, Storage);
VerifyToken(TY_IDENTIFIER, "Identifier");
// Check for duplicate declarations
switch (Storage) {
case SC_GLOBAL:
if (FindGlobal(variableName) != NULL)
ErrorReport("Duplicate global declaration\n");
case SC_LOCAL:
case SC_PARAM:
if (FindLocal(variableName) != NULL)
ErrorReport("Duplicate local declaration\n");
case SC_MEMBER:
if (FindMember(variableName) != NULL)
ErrorReport("Duplicate member declaration\n");
default: break;
}
// Determine whether this is an array or scalar.
if (CurrentFile->CurrentSymbol.type == LI_LBRAS) {
symbol = ParseArrayDeclaration(variableName, Type, CompositeType, Storage);
structureType = ST_ARR;
} else {
symbol = ParseScalarDeclaration(variableName, Type, CompositeType, Storage, Tree);
}
return symbol;
}
/*
* Handles the declaration of a new composite type.
* For example, a struct is a composite of multiple different named positions:
* struct thisStct { int x, int y, int z };
*
* Verifies that the current identifier is not used,
* verifies that this is not a redefinition (excluding
* the case where there is a declaration but no definition)
* and then saves it into the appropriate symbol table.
*
* @return the Symbol Table entry of this new composite.
*/
struct SymbolTableEntry* BeginCompositeDeclaration(int Type) {
struct SymbolTableEntry* Composite = NULL, *Member;
int Offset = 0, Largest = 0;
// "struct" / "union"
Tokenise();
Safe();
if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) {
Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier);
Tokenise();
}
if (CurrentFile->CurrentSymbol.type != LI_LBRAC) {
if (Composite == NULL)
ErrorReport("Use of undefined composite");
return Composite;
}
if (Composite)
ErrorReport("Redefinition of composite");
Composite = AddSymbol(CurrentIdentifier, Type, ST_RUCT, Type == DAT_STRUCT ? SC_STRUCT : SC_UNION, 0, 0, NULL);
Tokenise();
Safe();
printf("Reading a composite declaration.. Type is %s\n", Type == DAT_STRUCT ? "struct" : "union");
while (1) {
Type = ParseDeclarationList(&Member, SC_MEMBER, LI_SEMIC, LI_RBRAC, NULL);
if (Type == -1)
ErrorReport("Bad type in member list of composite\n");
OptionallyConsume(LI_SEMIC);
Safe();
if (CurrentFile->CurrentSymbol.type == LI_RBRAC)
break;
}
VerifyToken(LI_RBRAC, "}");
if (CompositeMembers == NULL)
ErrorReport("No members in struct.\n");
Composite->Start = CompositeMembers;
CompositeMembers = CompositeMembersEnd = NULL;
Member = Composite->Start;
printf("\tSetting first entry in composite to %s\r\n", Member->Name);
Member->SinkOffset = 0;
Offset = TypeSize(Member->Type, Member->CompositeType);
for (Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) {
if (Type == DAT_STRUCT)
Member->SinkOffset = Assembler->vtable->AsAlignMemory(Member->Type, Offset, 1);
else
Member->SinkOffset = 0;
int CurrentSize = TypeSize(Member->Type, Member->CompositeType);
Offset += CurrentSize;
Largest = CurrentSize > Largest ? CurrentSize : Largest;
}
Composite->Length = Type == DAT_STRUCT ? Offset : Largest;
Composite->Size = Offset;
return Composite;
}
static void ParseEnumDeclaration() {
struct SymbolTableEntry* Type = NULL;
char* Name;
int Value = 0;
// "enum"
Tokenise();
Safe();
// enum name
if (CurrentFile->CurrentSymbol.type == TY_IDENTIFIER) {
Type = FindEnum(CurrentIdentifier);
Name = strdup(CurrentIdentifier);
Tokenise();
}
// We're expecting to declare an enum, so make sure the content follows.
if (CurrentFile->CurrentSymbol.type != LI_LBRAC) {
if (Type == NULL)
ErrorReport("Enum used but not yet declared.\n");
return;
}
// Skip the { that we have
Tokenise();
Safe();
if (Type != NULL)
ErrorReport("Enum redeclared.\n");
else
Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUM, 0, 0, NULL);
while (1) {
VerifyToken(TY_IDENTIFIER, "Enum Entry");
Name = strdup(CurrentIdentifier);
Type = FindEnumMember(Name);
if (Type != NULL)
ErrorReport("Enum value already declared\n");
Safe();
// Parse equality
if (CurrentFile->CurrentSymbol.type == LI_EQUAL) {
Tokenise();
// Expect a number after the equals
if (CurrentFile->CurrentSymbol.type != LI_INT)
ErrorReport("Expected integer in enum assignment\n");
Value = CurrentFile->CurrentSymbol.value;
// int
Tokenise();
Safe();
}
Type = AddSymbol(Name, DAT_ENUM, ST_ENUM, SC_ENUMENTRY, Value++, 0, NULL);
// Break on right brace
if (CurrentFile->CurrentSymbol.type == LI_RBRAC)
break;
VerifyToken(LI_COM, "Comma");
Safe();
}
// Skip right brace
Tokenise();
}
struct ASTNode* ReturnStatement() {
struct ASTNode* Tree;
VerifyToken(KW_RETURN, "return");
if (CurrentFile->FunctionEntry->Type == RET_VOID)
ErrorReport("Attempt to return from void function");
bool bracketed = OptionallyConsume(LI_LPARE);
Safe();
Tree = ParsePrecedenceASTNode(0);
Tree = MutateType(Tree, CurrentFile->FunctionEntry->Type, 0);
if (Tree == NULL)
ErrorReport("Returning a value of incorrect type for function. Expected %s.\n", TypeNames(CurrentFile->FunctionEntry->Type));
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, CurrentFile->FunctionEntry, 0);
printf("\t\tReturning from function %s\n", CurrentFile->FunctionEntry->Name);
if (bracketed) VerifyToken(LI_RPARE, ")");
Safe();
VerifyToken(LI_SEMIC, ";");
return Tree;
}
/*
* Handles the surrounding logic for If statements.
*
* If statements have the basic form:
* * if (condition) body
* * if (condition)
* body
* * if (condition) {
* body
* }
*
* Conditions may be any truthy statement (such as a pointer,
* object, integer), as conditions not recognized are auto-
* matically converted to booleans.
*
* This meaning, any object that can be resolved to 0 or NULL
* can be placed as the condition and used as a check.
*
* For example:
* struct ASTNode* Node = NULL;
* if(Node) {
* // This will not run, as Node is ((void*)0)
* }
*
*/
struct ASTNode* IfStatement() {
struct ASTNode* Condition, * True, * False = NULL;
VerifyToken(KW_IF, "if");
VerifyToken(LI_LPARE, "(");
Condition = ParsePrecedenceASTNode(0);
// Limit if(x) to =? != < > <= =>
// No null checking, no arithmetic, no functions.
// TODO: this
if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0);
VerifyToken(LI_RPARE, ")");
True = ParseStatement();
if (CurrentFile->CurrentSymbol.type == KW_ELSE) {
Tokenise();
False = ParseStatement();
}
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0);
}
/*
* Handles the surrounding logic for While loops.
*
* While loops have the basic form:
* while ( condition ) { body }
*
* When reaching the condition (which alike an If statement,
* can be any truthy value), if it resolves to true:
* The body is executed, and immediately the condition is checked
* again.
* This repeats until the condition resolves false, at which point
* the loop executes no more.
*
* This can be prototyped as the following pseudo-assembler:
*
* cond:
* check <condition>
* jne exit
* <body>
* jump cond
* exit:
* <more code>
*
* @return the AST of this statement
*
*/
struct ASTNode* WhileStatement() {
struct ASTNode* Condition, * Body;
VerifyToken(KW_WHILE, "while");
VerifyToken(LI_LPARE, "(");
Condition = ParsePrecedenceASTNode(0);
if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0);
VerifyToken(LI_RPARE, ")");
CurrentFile->CurrentLoopDepth++;
Body = ParseStatement();
CurrentFile->CurrentLoopDepth--;
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0);
}
/*
* Handles the surrounding logic for For loops.
*
* They have the basic form of:
* for ( init ; condition; iterator) { body }
*
* The initialiser is run only once upon reaching the for loop.
* Then the condition is checked, and if true, the body is executed.
* After execution of the body, the iterator is run and the condition
* checked again.
*
* It can be prototyped as the following pseudo-assembler code:
*
* for:
* <init>
* cond:
* check <condition>
* jne exit
* <body>
* <iterator>
* jump cond
* exit:
* <loop exit>
*
* In the case of the implementation, "init" is the preoperator,
* "iterator" is the postoperator.
*
* @return the AST of this statement
*/
struct ASTNode* ForStatement() {
struct ASTNode* Condition, * Body;
struct ASTNode* Preop, * Postop;
struct ASTNode* Tree;
VerifyToken(KW_FOR, "for");
VerifyToken(LI_LPARE, "(");
Preop = ParseExpressionList(LI_SEMIC);
VerifyToken(LI_SEMIC, ";");
Condition = ParsePrecedenceASTNode(0);
if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0);
VerifyToken(LI_SEMIC, ";");
Postop = ParseExpressionList(LI_RPARE);
VerifyToken(LI_RPARE, ")");
CurrentFile->CurrentLoopDepth++;
Body = ParseStatement();
CurrentFile->CurrentLoopDepth--;
// We need to be able to skip over the body and the postop, so we group them together.
Tree = ConstructASTNode(OP_COMP, RET_NONE, Body, NULL, Postop, NULL, 0);
// We need to be able to jump to the top of the condition and fall through to the body,
// so we group it with the last block
Tree = ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Tree, NULL, 0);
// We need to append the postop to the loop, to form the final for loop
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0);
}
/*
* Handles the surrounding logic for the Print statement.
*
* This is a legacy hold-over from the early testing, and it
* serves merely as a wrapper around the cstdlib printf.er function.
*
* It does, however (//TODO), attempt to guess the type that you
* want to print, which takes a lot of the guesswork out of printing.
*
* @return the AST of this statement
*/
struct ASTNode* PrintStatement(void) {
struct ASTNode* Tree;
int LeftType, RightType;
VerifyToken(KW_PRINT, "print");
Tree = ParsePrecedenceASTNode(0);
LeftType = RET_INT;
RightType = Tree->ExprType;
Tree = MutateType(Tree, RightType, 0);
if (!Tree)
DieDecimal("Attempting to print an invalid type:", RightType);
if (RightType)
Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0);
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0);
//ParseAST(Tree);
return Tree;
}
struct ASTNode* SwitchStatement() {
struct ASTNode* left, *root, *c, *casetree=NULL, *casetail;
int looping=1, cases=0;
int defaultpresent=0;
int ASTOp, casevalue;
printf("\tParsing switch statement\n");
CurrentFile->SwitchStatement = true;
// Skip switch(
Tokenise();
VerifyToken(LI_LPARE, "(");
printf("\tSwitch: Reading switch expression\n");
// Fetch switch expression
left = ParsePrecedenceASTNode(0);
// Consume ) {
VerifyToken(LI_RPARE, ")");
VerifyToken(LI_LBRAC, "{");
// Verify the switch expression (must be integer-compatible)
if (!TypeIsInt(left->ExprType))
ErrorReport("Switch expression is not of integer type, instead %s.\n", TypeNames(left->ExprType));
Safe();
// Create the root Switch node
root = ConstructASTBranch(OP_SWITCH, 0, left, NULL, 0);
// Iterate down the switch node, generating cases
while (looping) {
switch (CurrentFile->CurrentSymbol.type) {
case LI_RBRAC:
if (cases == 0)
Die("No cases in switch statement");
looping = 0;
break;
case KW_CASE:
if (defaultpresent)
Die("Case present after default in switch.");
ASTOp = OP_CASE;
Safe();
Tokenise();
// Parse case value
left = ParsePrecedenceASTNode(0);
if (left->Operation != TERM_INTLITERAL)
Die("Expecting integer literal for case value");
casevalue = left->IntValue;
printf("\t\tSwitch case %d found\n", casevalue);
// Make sure nothing resolves to the same case value
for (c = casetree; c != NULL; c = c->Right)
if (casevalue == c->IntValue)
Die("Duplicate case ID in switch statement");
// Fallthrough so that we get the case tree logic deduplicated
case KW_DEFAULT:
if (defaultpresent)
Die("Duplicate default entries in switch");
// Duplicate check because CASE falls through into this block
if (CurrentFile->CurrentSymbol.type == KW_DEFAULT) {
ASTOp = OP_DEFAULT;
defaultpresent = true;
Tokenise();
printf("\t\tSwitch default case found\n");
}
VerifyToken(LI_COLON, ":");
Safe();
left = ParseCompound();
OptionallyConsume(LI_SEMIC);
cases++;
Safe();
// Append this new case to the tree
if (casetree == NULL) {
casetree = casetail = ConstructASTBranch(ASTOp, 0, left, NULL, casevalue);
} else {
casetail->Right = ConstructASTBranch(ASTOp, 0, left, NULL, casevalue);
casetail = casetail->Right;
}
break;
default:
ErrorReport("Unexpected token in switch statement: %s\n", TokenNames[CurrentFile->CurrentSymbol.type]);
exit(1);
}
}
root->IntValue = cases;
root->Right = casetree;
// Consume the right brace immediately
VerifyToken(LI_RBRAC, "}");
CurrentFile->SwitchStatement = false;
return root;
}
/**
* Handles the surrounding logic for break statements
*
* They have the basic form of:
* break;
*
* If there is a loop currently being evaluated, break will insert an immediate jump to the end of the loop.
* All locals inside the loop will lose their binding at this point.
*
* It can be prototyped as the following pseudo-assembler code:
*
* while:
* check <condition>
* jne exit
* <body>
* <break>: jump exit
* jump while
* exit:
* <loop exit>
*
*
* @return the AST of this statement
*/
struct ASTNode* BreakStatement() {
if (CurrentFile->CurrentLoopDepth == 0 && !CurrentFile->SwitchStatement)
Die("Unable to break without a loop or switch statement");
Tokenise();
Safe();
VerifyToken(LI_SEMIC, ";");
Safe();
return ConstructASTLeaf(OP_BREAK, 0, NULL, 0);
}
/**
* Handles the surrounding logic for continue statements
*
* They have the basic form of:
* continue;
*
* If there is a loop currently being evaluated, continue will insert an immediate jump to the start of the loop.
*
* It can be prototyped as the following pseudo-assembler code:
*
* while:
* check <condition>
* jne exit
* <body>
* <continue>: jump while
* jump while
* exit:
* <loop exit>
*
*
* @return the AST of this statement
*/
struct ASTNode* ContinueStatement() {
if (CurrentFile->CurrentLoopDepth == 0)
Die("Unable to break without a loop");
Tokenise();
return ConstructASTLeaf(OP_CONTINUE, 0, NULL, 0);
}
/*
* Handles the surrounding logic for all of the logical and semantic
* postfixes.
*
* Postfixes are tokens that are affixed to the end of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of postfixes:
* * (): Call a function
* * []: Index or define an array.
* * ++: Increment a variable AFTER it is returned
* NOTE: there is a prefix variant of this for incrementing BEFOREhand.
* * --: Decrement a variable AFTER it is returned
* NOTE: there is a prefix variant of this for decrementing BEFOREhand.
*
* Planned postfixes:
* * >>: Arithmetic-Shift-Right a variable by one (Divide by two)
* NOTE: there is a prefix variant of this for shifting left - multiplying by two.
*
* @return the AST of the statement plus its' postfix
*/
struct ASTNode* PostfixStatement() {
struct ASTNode* Tree;
struct SymbolTableEntry* Entry;
// Early exit if we find an enum value
if ((Entry = FindEnumMember(CurrentIdentifier)) != NULL) {
Tokenise();
return ConstructASTLeaf(TERM_INTLITERAL, RET_INT, NULL, Entry->IntValue);
}
Tokenise();
if (CurrentFile->CurrentSymbol.type == LI_LPARE)
return CallFunction();
if (CurrentFile->CurrentSymbol.type == LI_LBRAS)
return AccessArray();
// If we get here, we must be a variable.
// (as functions have been called and arrays have been indexed)
// Check that the variable is recognized..
if ((Entry = FindSymbol(CurrentIdentifier)) == NULL ||
(Entry->Structure != ST_VAR && Entry->Structure != ST_FUNC)) {
DumpAllLists();
DieMessage("Unknown Variable", CurrentIdentifier);
}
// Here we check for postincrement and postdecrement.
switch (CurrentFile->CurrentSymbol.type) {
case LI_DOT:
return AccessMember(false);
case LI_ARROW:
return AccessMember(true);
case PPMM_PLUS:
Tokenise();
Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0);
break;
case PPMM_MINUS:
Tokenise();
Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0);
break;
default:
Tree = ConstructASTLeaf(REF_IDENT, Entry->Type, Entry, 0);
}
return Tree;
}
/*
* Handles the surrounding logic for all of the logical and semantic
* prefixes.
*
* Prefixes are tokens that are affixed to the start of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of prefixes:
* * !: Invert the boolean result of a statement or truthy value.
* * ~: Invert the individual bits in a number
* * -: Invert the number around the axis of 0 (negative->positive, positive->negative)
* * ++: Increment a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for incrementing AFTER the fact.
* * --: Decrement a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for decrementing AFTER the fact.
* * &: Dereference the following object (Get the address that contains it)
* * *: Get the object pointed at by the number following
*
* Planned prefixes:
* * <<: Arithmetic-Shift-Left a variable by one (Multiply by two)
* NOTE: there is a postfix variant of this for shifting right - dividing by two.
*
* @return the AST of this statement, plus its' prefixes and any postfixes.
*/
struct ASTNode* PrefixStatement() {
struct ASTNode* Tree;
switch (CurrentFile->CurrentSymbol.type) {
case BOOL_INVERT:
Tokenise();
Tree = PrefixStatement();
Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0);
break;
case BIT_NOT:
Tokenise();
Tree = PrefixStatement();
Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0);
break;
case AR_MINUS:
Tokenise();
Tree = PrefixStatement();
Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0);
break;
case PPMM_PLUS:
Tokenise();
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT)
Die("++ not followed by identifier");
Tree = ConstructASTBranch(OP_PREINC, Tree->ExprType, Tree, NULL, 0);
break;
case PPMM_MINUS:
Tokenise();
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT)
Die("-- not followed by identifier");
Tree = ConstructASTBranch(OP_PREDEC, Tree->ExprType, Tree, NULL, 0);
break;
case BIT_AND:
Tokenise();
// To allow things like:
// x = &&y;
// We need to recursively parse prefixes;
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT)
Die("& must be followed by another & or an identifier.");
Tree->Operation = OP_ADDRESS;
Tree->ExprType = PointerTo(Tree->ExprType);
break;
case AR_STAR:
Tokenise();
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT && Tree->Operation != OP_DEREF)
Die("* must be followed by another * or an identifier.");
Tree = ConstructASTBranch(OP_DEREF, ValueAt(Tree->ExprType), Tree, NULL, 0);
break;
default:
Tree = ParsePrimary();
}
return Tree;
}