Erythro/src/Statements.c

646 lines
19 KiB
C
Raw Normal View History

/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#include <Data.h>
#include <stdbool.h>
/*
* Handles reading in a comma-separated list of declarations.
* Erythro treats structs, enums and function parameters the same in this regard -
* comma separated.
*
* C and C++ tend to treat enums and structs differently - the former separated by commas,
* the latter separated by semicolons.
*
* Note that since functions are read in through parentheses, and structs/enums are read in
* through brackets, the end character is configurable.
*
* @param FunctionSymbol: The Symbol Table Entry of the current function, if applicable.
* @param Storage: The Storage Scope of this declaration list.
* @param End: The end token, in terms of TokenTypes enum values.
* @return the amount of declarations read in.
*
*/
static int ReadDeclarationList(struct SymbolTableEntry* FunctionSymbol, int Storage, int End) {
int TokenType, ParamCount = 0;
struct SymbolTableEntry* PrototypePointer = NULL, * Composite;
if (FunctionSymbol != NULL)
PrototypePointer = FunctionSymbol->Start;
while (CurrentToken.type != End) {
TokenType = ParseOptionalPointer(&Composite);
VerifyToken(TY_IDENTIFIER, "identifier");
printf("\tReading a new element: %s of type %d, scope %s\n", CurrentIdentifier, TokenType, ScopeNames[Storage]);
2021-01-22 01:01:53 +00:00
if (PrototypePointer != NULL) {
if (TokenType != PrototypePointer->Type)
2021-01-22 01:01:53 +00:00
DieDecimal("Function parameter of invalid type at index", ParamCount + 1);
PrototypePointer = PrototypePointer->NextSymbol;
} else {
BeginVariableDeclaration(TokenType, Composite, Storage);
}
ParamCount++;
if ((CurrentToken.type != LI_COM) && (CurrentToken.type != End))
DieDecimal("Unexpected token in parameter", CurrentToken.type);
if (CurrentToken.type == LI_COM)
Tokenise();
}
if ((FunctionSymbol != NULL) && (ParamCount != FunctionSymbol->Length))
DieMessage("Invalid number of parameters in prototyped function", FunctionSymbol->Name);
return ParamCount;
}
/*
* Handles the declaration of a new composite type.
* For example, a struct is a composite of multiple different named positions:
* struct thisStct { int x, int y, int z };
*
* Verifies that the current identifier is not used,
* verifies that this is not a redefinition (excluding
* the case where there is a declaration but no definition)
* and then saves it into the appropriate symbol table.
*
* @return the Symbol Table entry of this new composite.
*/
struct SymbolTableEntry* BeginCompositeDeclaration(int Type) {
struct SymbolTableEntry* Composite = NULL, * Member;
int Offset = 0, Largest = 0;
Tokenise();
if (CurrentToken.type == TY_IDENTIFIER) {
Composite = Type == DAT_STRUCT ? FindStruct(CurrentIdentifier) : FindUnion(CurrentIdentifier);
Tokenise();
}
if (CurrentToken.type != LI_LBRAC) {
if (Composite == NULL)
DieMessage("Unknown Struct", CurrentIdentifier);
return Composite;
}
if (Composite)
DieMessage("Redefinition of composite", CurrentIdentifier);
Composite = AddSymbol(CurrentIdentifier, Type, 0, Type == DAT_STRUCT ? SC_STRUCT : SC_UNION, 0, 0, NULL);
Tokenise();
printf("Reading a composite declaration.. Type is %s\n", Type == DAT_STRUCT ? "struct" : "union");
2021-01-22 01:01:53 +00:00
ReadDeclarationList(NULL, SC_MEMBER, LI_RBRAC);
VerifyToken(LI_RBRAC, "}");
Composite->Start = CompositeMembers;
CompositeMembers = CompositeMembersEnd = NULL;
Member = Composite->Start;
printf("\tSetting first entry in composite to %s\r\n", Member->Name);
Member->SinkOffset = 0;
Offset = TypeSize(Member->Type, Member->CompositeType);
for (Member = Member->NextSymbol; Member != NULL; Member = Member->NextSymbol) {
if (Type == DAT_STRUCT)
Member->SinkOffset = AsAlignMemory(Member->Type, Offset, 1);
else
Member->SinkOffset = 0;
int CurrentSize = TypeSize(Member->Type, Member->CompositeType);
Offset += CurrentSize;
Largest = CurrentSize > Largest ? CurrentSize : Largest;
}
Composite->Length = Type == DAT_STRUCT ? Offset : Largest;
return Composite;
}
/*
* Handles the declaration of a type of a variable.
* int newVar;
*
* It verifies that we have a type keyword followed by a
* unique, non-keyword identifier.
*
* It then stores this variable into the appropriate symbol table,
* and returns the new item.
*
* @return the Symbol Table entry of this new variable.
*/
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope) {
struct SymbolTableEntry* Symbol = NULL;
switch (Scope) {
case SC_GLOBAL:
if (FindGlobal(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of global variable", CurrentIdentifier);
case SC_LOCAL:
case SC_PARAM:
if (FindLocal(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of local variable", CurrentIdentifier);
case SC_MEMBER:
if (FindMember(CurrentIdentifier) != NULL)
DieMessage("Invalid redeclaration of Enum/Struct member", CurrentIdentifier);
}
if (CurrentToken.type == LI_LBRAS) {
Tokenise();
if (CurrentToken.type == LI_INT) {
switch (Scope) {
case SC_GLOBAL:
Symbol = AddSymbol(CurrentIdentifier, PointerTo(Type), ST_ARR, Scope, 1, 0, NULL);
break;
case SC_LOCAL:
case SC_PARAM:
case SC_MEMBER:
Die("Local arrays are unimplemented");
}
}
Tokenise();
2020-11-18 20:49:08 +00:00
VerifyToken(LI_RBRAS, "]");
} else {
Symbol = AddSymbol(CurrentIdentifier, Type, ST_VAR, Scope, 1, 0, Composite);
}
2020-11-18 20:49:08 +00:00
return Symbol;
}
/*
* Handles the declaration of a new function.
* Verifies that the identifier is not taken (excluding the case
* where there is a declaration but no definition)
* Parses the list of parameters if present
* Saves the function prototype if there is no body
* Generates and saves the break-out point label
*
* @param Type: The return type of the function
* @return the AST for this function
*
*/
struct ASTNode* ParseFunction(int Type) {
struct ASTNode* Tree;
struct ASTNode* FinalStatement;
struct SymbolTableEntry* OldFunction, * NewFunction = NULL;
int SymbolSlot, BreakLabel, ParamCount, ID;
if ((OldFunction = FindSymbol(CurrentIdentifier)) != NULL)
if (OldFunction->Storage != ST_FUNC)
OldFunction = NULL;
if (OldFunction == NULL) {
BreakLabel = NewLabel();
NewFunction = AddSymbol(CurrentIdentifier, Type, ST_FUNC, SC_GLOBAL, BreakLabel, 0, NULL);
}
VerifyToken(LI_LPARE, "(");
ParamCount = ReadDeclarationList(OldFunction, SC_PARAM, LI_RPARE);
VerifyToken(LI_RPARE, ")");
printf("\nIdentified%sfunction %s of return type %s, end label %d\n",
(OldFunction == NULL) ? " new " : " overloaded ",
(OldFunction == NULL) ? NewFunction->Name : OldFunction->Name,
TypeNames(Type), BreakLabel);
2021-02-22 17:43:06 +00:00
if (NewFunction) {
NewFunction->Elements = ParamCount;
NewFunction->Start = Params;
NewFunction->Type = RET_LONG;
OldFunction = NewFunction;
}
Params = ParamsEnd = NULL;
if (CurrentToken.type == LI_SEMIC) {
Tokenise();
return NULL;
}
FunctionEntry = OldFunction;
Tree = ParseCompound();
if (Type != RET_VOID) {
// Functions with one statement have no composite node, so we have to check
FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree;
if (FinalStatement == NULL || FinalStatement->Operation != OP_RET) {
Die("Function with non-void type does not return");
}
}
return ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, OldFunction, BreakLabel);
}
/*
* Handles the logic for return.
* //TODO: No brackets
* //TODO: Type inference
*
*/
struct ASTNode* ReturnStatement() {
struct ASTNode* Tree;
int ReturnType;
if (FunctionEntry->Type == RET_VOID)
Die("Attempt to return from void function");
VerifyToken(KW_RETURN, "return");
VerifyToken(LI_LPARE, "("); // TODO: Make optional! Reject?
Tree = ParsePrecedenceASTNode(0);
Tree = MutateType(Tree, FunctionEntry->Type, 0);
if (Tree == NULL)
Die("Returning a value of incorrect type for function");
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, FunctionEntry, 0);
printf("\t\tReturning from function %s\n", FunctionEntry->Name);
VerifyToken(LI_RPARE, ")"); // TODO: OPTIONALISE!
return Tree;
}
/*
* Handles the surrounding logic for If statements.
*
* If statements have the basic form:
* * if (condition) body
* * if (condition)
* body
* * if (condition) {
* body
* }
*
* Conditions may be any truthy statement (such as a pointer,
* object, integer), as conditions not recognized are auto-
* matically converted to booleans.
*
* This meaning, any object that can be resolved to 0 or NULL
* can be placed as the condition and used as a check.
*
* For example:
* struct ASTNode* Node = NULL;
* if(Node) {
* // This will not run, as Node is ((void*)0)
* }
*
*/
struct ASTNode* IfStatement() {
struct ASTNode* Condition, * True, * False = NULL;
VerifyToken(KW_IF, "if");
VerifyToken(LI_LPARE, "(");
Condition = ParsePrecedenceASTNode(0);
// Limit if(x) to =? != < > <= =>
// No null checking, no arithmetic, no functions.
// TODO: this
if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0);
VerifyToken(LI_RPARE, ")");
True = ParseCompound();
if (CurrentToken.type == KW_ELSE) {
Tokenise();
False = ParseCompound();
}
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, NULL, 0);
}
/*
* Handles the surrounding logic for While loops.
*
* While loops have the basic form:
* while ( condition ) { body }
*
* When reaching the condition (which alike an If statement,
* can be any truthy value), if it resolves to true:
* The body is executed, and immediately the condition is checked
* again.
* This repeats until the condition resolves false, at which point
* the loop executes no more.
*
* This can be prototyped as the following pseudo-assembler:
*
* cond:
* check <condition>
* jne exit
* <body>
* jump cond
* exit:
* <more code>
*
* @return the AST of this statement
*
*/
struct ASTNode* WhileStatement() {
struct ASTNode* Condition, * Body;
VerifyToken(KW_WHILE, "while");
VerifyToken(LI_LPARE, "(");
Condition = ParsePrecedenceASTNode(0);
if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0);
VerifyToken(LI_RPARE, ")");
Body = ParseCompound();
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, NULL, 0);
}
/*
* Handles the surrounding logic for For loops.
*
* They have the basic form of:
* for ( init ; condition; iterator) { body }
*
* The initialiser is run only once upon reaching the for loop.
* Then the condition is checked, and if true, the body is executed.
* After execution of the body, the iterator is run and the condition
* checked again.
*
* It can be prototyped as the following pseudo-assembler code:
*
* for:
* <init>
* cond:
* check <condition>
* jne exit
* <body>
* <iterator>
* jump cond
* exit:
* <loop exit>
*
* In the case of the implementation, "init" is the preoperator,
* "iterator" is the postoperator.
*
* @return the AST of this statement
*/
struct ASTNode* ForStatement() {
struct ASTNode* Condition, * Body;
struct ASTNode* Preop, * Postop;
struct ASTNode* Tree;
VerifyToken(KW_FOR, "for");
VerifyToken(LI_LPARE, "(");
Preop = ParseStatement();
VerifyToken(LI_SEMIC, ";");
Condition = ParsePrecedenceASTNode(0);
if (Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Condition = ConstructASTBranch(OP_BOOLCONV, Condition->ExprType, Condition, NULL, 0);
VerifyToken(LI_SEMIC, ";");
Postop = ParseStatement();
VerifyToken(LI_RPARE, ")");
Body = ParseCompound();
// We need to be able to skip over the body and the postop, so we group them together.
Tree = ConstructASTNode(OP_COMP, RET_NONE, Body, NULL, Postop, NULL, 0);
// We need to be able to jump to the top of the condition and fall through to the body,
// so we group it with the last block
Tree = ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Tree, NULL, 0);
// We need to append the postop to the loop, to form the final for loop
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, NULL, 0);
}
/*
* Handles the surrounding logic for the Print statement.
*
* This is a legacy hold-over from the early testing, and it
* serves merely as a wrapper around the cstdlib printf.er function.
*
* It does, however (//TODO), attempt to guess the type that you
* want to print, which takes a lot of the guesswork out of printing.
*
* @return the AST of this statement
*/
struct ASTNode* PrintStatement(void) {
struct ASTNode* Tree;
int LeftType, RightType;
VerifyToken(KW_PRINT, "print");
Tree = ParsePrecedenceASTNode(0);
LeftType = RET_INT;
RightType = Tree->ExprType;
2020-09-14 01:05:24 +00:00
Tree = MutateType(Tree, RightType, 0);
if (!Tree)
DieDecimal("Attempting to print an invalid type:", RightType);
if (RightType)
Tree = ConstructASTBranch(Tree->Right->Operation, RET_INT, Tree, NULL, 0);
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, NULL, 0);
//ParseAST(Tree);
return Tree;
}
/*
* Handles the surrounding logic for all of the logical and semantic
* postfixes.
*
* Postfixes are tokens that are affixed to the end of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of postfixes:
* * (): Call a function
* * []: Index or define an array.
* * ++: Increment a variable AFTER it is returned
* NOTE: there is a prefix variant of this for incrementing BEFOREhand.
* * --: Decrement a variable AFTER it is returned
* NOTE: there is a prefix variant of this for decrementing BEFOREhand.
*
* Planned postfixes:
* * >>: Arithmetic-Shift-Right a variable by one (Divide by two)
* NOTE: there is a prefix variant of this for shifting left - multiplying by two.
*
* @return the AST of the statement plus its' postfix
*/
struct ASTNode* PostfixStatement() {
struct ASTNode* Tree;
struct SymbolTableEntry* Entry;
Tokenise();
if (CurrentToken.type == LI_LPARE)
return CallFunction();
if (CurrentToken.type == LI_LBRAS)
return AccessArray();
// If we get here, we must be a variable.
// (as functions have been called and arrays have been indexed)
// Check that the variable is recognized..
if ((Entry = FindSymbol(CurrentIdentifier)) == NULL ||
(Entry->Structure != ST_VAR && Entry->Structure != ST_FUNC)) {
DumpAllLists();
DieMessage("Unknown Variable", CurrentIdentifier);
}
// Here we check for postincrement and postdecrement.
switch (CurrentToken.type) {
case LI_DOT:
return AccessMember(false);
case LI_ARROW:
return AccessMember(true);
case PPMM_PLUS:
Tokenise();
Tree = ConstructASTLeaf(OP_POSTINC, Entry->Type, Entry, 0);
break;
case PPMM_MINUS:
Tokenise();
Tree = ConstructASTLeaf(OP_POSTDEC, Entry->Type, Entry, 0);
break;
default:
Tree = ConstructASTLeaf(REF_IDENT, Entry->Type, Entry, 0);
}
return Tree;
}
/*
* Handles the surrounding logic for all of the logical and semantic
* prefixes.
*
* Prefixes are tokens that are affixed to the start of another, and
* change behaviour in some way. These can be added calculations,
* some form of transformation, or other.
*
* A current list of prefixes:
* * !: Invert the boolean result of a statement or truthy value.
* * ~: Invert the individual bits in a number
* * -: Invert the number around the axis of 0 (negative->positive, positive->negative)
* * ++: Increment a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for incrementing AFTER the fact.
* * --: Decrement a variable BEFORE it is returned.
* NOTE: there is a postfix variant of this for decrementing AFTER the fact.
* * &: Dereference the following object (Get the address that contains it)
* * *: Get the object pointed at by the number following
*
* Planned prefixes:
* * <<: Arithmetic-Shift-Left a variable by one (Multiply by two)
* NOTE: there is a postfix variant of this for shifting right - dividing by two.
*
* @return the AST of this statement, plus its' prefixes and any postfixes.
*/
struct ASTNode* PrefixStatement() {
struct ASTNode* Tree;
switch (CurrentToken.type) {
case BOOL_INVERT:
Tokenise();
Tree = PrefixStatement();
Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BOOLNOT, Tree->ExprType, Tree, NULL, 0);
break;
case BIT_NOT:
Tokenise();
Tree = PrefixStatement();
Tree->RVal = 1;
Tree = ConstructASTBranch(OP_BITNOT, Tree->ExprType, Tree, NULL, 0);
break;
case AR_MINUS:
Tokenise();
Tree = PrefixStatement();
Tree = ConstructASTBranch(OP_NEGATE, Tree->ExprType, Tree, NULL, 0);
break;
case PPMM_PLUS:
Tokenise();
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT)
Die("++ not followed by identifier");
Tree = ConstructASTBranch(OP_PREINC, Tree->ExprType, Tree, NULL, 0);
break;
case PPMM_MINUS:
Tokenise();
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT)
Die("-- not followed by identifier");
Tree = ConstructASTBranch(OP_PREDEC, Tree->ExprType, Tree, NULL, 0);
break;
case BIT_AND:
Tokenise();
// To allow things like:
// x = &&y;
// We need to recursively parse prefixes;
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT)
Die("& must be followed by another & or an identifier.");
Tree->Operation = OP_ADDRESS;
Tree->ExprType = PointerTo(Tree->ExprType);
break;
case AR_STAR:
Tokenise();
Tree = PrefixStatement();
if (Tree->Operation != REF_IDENT && Tree->Operation != OP_DEREF)
Die("* must be followed by another * or an identifier.");
Tree = ConstructASTBranch(OP_DEREF, ValueAt(Tree->ExprType), Tree, NULL, 0);
break;
default:
Tree = ParsePrimary();
}
return Tree;
}