519 lines
16 KiB
C
519 lines
16 KiB
C
|
|
/*************/
|
|
/*GEMWIRE */
|
|
/* ERYTHRO*/
|
|
/*************/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "Defs.h"
|
|
#include "Data.h"
|
|
|
|
#if defined(__GNUC__) || defined(APPLE)
|
|
#include <errno.h>
|
|
#endif
|
|
|
|
/*
|
|
* The Precedence of an operator is directly related to Token Type.
|
|
* Precedence determines how soon the operator and its surrounding values
|
|
* will be calculated and aliased.
|
|
* This allows for things like the common Order of Operations.
|
|
*/
|
|
static int Precedence[] = {
|
|
0, 10, // EOF, ASSIGN
|
|
20, 30, // || &&
|
|
40, 50, // | ^
|
|
60, 70, // & =?
|
|
70, 80, // != <
|
|
80, 80, // > <=
|
|
80, 90, // => <<
|
|
90, 100, // >> +
|
|
100, 110, // - *
|
|
110 // /
|
|
};
|
|
|
|
/*
|
|
* Handles gathering the precedence of an operator from its token,
|
|
* in terms of values of the TokenTypes enum.
|
|
*
|
|
* Error handling is also done here, so that EOF or non-operators are not executed.
|
|
*
|
|
*/
|
|
static int OperatorPrecedence(int Token) {
|
|
int Prec = Precedence[Token];
|
|
|
|
if (Prec == 0 || Token >= PPMM_PLUS) {
|
|
if (Token == TY_IDENTIFIER) {
|
|
ErrorReport("Attempting to determine operator precedence of identifier %s\n", CurrentIdentifier);
|
|
exit(1);
|
|
}
|
|
|
|
ErrorReport("Attempting to determine operator precedence of an EOF or INT literal: %s\n", TokenNames[Token]);
|
|
exit(1);
|
|
}
|
|
|
|
return Prec;
|
|
}
|
|
|
|
/*
|
|
* If the value is a right-expression, or in other words is right associative,
|
|
* then it can be safely calculated beforehand and aliased to a value.
|
|
* In this case, we can try to alias (or constant fold) everything on the right side
|
|
* of an assignment.
|
|
*/
|
|
|
|
static int IsRightExpr(int Token) {
|
|
return (Token == LI_EQUAL);
|
|
}
|
|
|
|
/* * * * * * * * * * * * * * * * * * * * * * * *
|
|
* * * N O D E C O N S T R U C T I O N * * *
|
|
* * * * * * * * * * * * * * * * * * * * * * * */
|
|
|
|
/*
|
|
* ASTNodes form the structure of the language that moves the bulk of
|
|
* data around within the compiler.
|
|
* They contain:
|
|
* * An Operation (usually 1:1 with an input token),
|
|
* * A Type (to identify the size of data it contains),
|
|
* * Two more Left and Right ASTNodes (to form a doubly-linked list)
|
|
* * An extra Middle ASTNode in case it is needed (typically in the middle case of a For loop)
|
|
* * A Symbol Table Entry
|
|
* * An Integer Value
|
|
* * A flag to determine whether this node (and its sub-nodes) contain a right associative or Rval
|
|
*
|
|
* This is the only function where they are constructed.
|
|
*
|
|
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
|
|
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
|
|
* @param Left: The Node that is attached to the left side branch of this root.
|
|
* @param Middle: The Node that is attached to the middle of this root, if applicable.
|
|
* @param Right: The Node that is attached to the right side branch of this root.
|
|
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
|
|
* @param IntValue: The integer value encoded by this Node, if applicable.
|
|
* @return a newly constructed AST Node
|
|
*/
|
|
struct ASTNode* ConstructASTNode(int Operation, int Type,
|
|
struct ASTNode* Left,
|
|
struct ASTNode* Middle,
|
|
struct ASTNode* Right,
|
|
struct SymbolTableEntry* Symbol,
|
|
int IntValue) {
|
|
|
|
struct ASTNode* Node;
|
|
|
|
Node = (struct ASTNode*) malloc(sizeof(struct ASTNode));
|
|
|
|
if (!Node)
|
|
ErrorReport("Unable to allocate node: %s\n", errno);
|
|
|
|
|
|
Node->Operation = Operation;
|
|
Node->ExprType = Type;
|
|
Node->Left = Left;
|
|
Node->Middle = Middle;
|
|
Node->Right = Right;
|
|
Node->Symbol = Symbol;
|
|
Node->IntValue = IntValue;
|
|
|
|
return Node;
|
|
}
|
|
|
|
|
|
/*
|
|
* AST Leaves are categorized by their lack of child nodes.
|
|
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
|
|
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
|
|
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
|
|
* @param IntValue: The integer value encoded by this Node, if applicable.
|
|
* @return a newly constructed AST Node
|
|
*/
|
|
struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue) {
|
|
return ConstructASTNode(Operation, Type, NULL, NULL, NULL, Symbol, IntValue);
|
|
}
|
|
|
|
/*
|
|
* AST Branches are categorized by having only one child node.
|
|
* These are sometimes called Unary Branches.
|
|
* @param Operation: The input Op of this Node, in terms of values of the SyntaxOps enum
|
|
* @param Type: The data type of this Node, in terms of values of the DataTypes enum.
|
|
* @param Left: The Node that is attached to the left side branch of this root.
|
|
* @param Symbol: The Symbol Table Entry that represents this Node, if applicable.
|
|
* @param IntValue: The integer value encoded by this Node, if applicable.
|
|
* @return a newly constructed AST Node
|
|
*/
|
|
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue) {
|
|
return ConstructASTNode(Operation, Type, Left, NULL, NULL, Symbol, IntValue);
|
|
}
|
|
|
|
|
|
/* * * * * * * * * * * * * * * * * * * * * * * *
|
|
* * * * T O K E N P A R S I N G * * * *
|
|
* * * * * * * * * * * * * * * * * * * * * * * */
|
|
|
|
/*
|
|
* TokenTypes and SyntaxOps are mostly 1:1, so some minor effort can ensure that
|
|
* these are synchronized well.
|
|
* This allows the parsing operation to be little more than a bounds check.
|
|
* Otherwise, this would be a gigantic switch statement.
|
|
*/
|
|
|
|
int ParseTokenToOperation(int Token) {
|
|
if (Token > LI_EOF && Token < LI_INT)
|
|
return Token;
|
|
|
|
ErrorReport("ParseToken: Unknown token %s\n", TokenNames[Token]);
|
|
}
|
|
|
|
/*
|
|
* Primary expressions may be any one of:
|
|
* * A terminal integer literal
|
|
* * A terminal string literal
|
|
* * A variable
|
|
* * A collection of expressions bounded by parentheses.
|
|
*
|
|
* @return the AST Node that represents this expression
|
|
*/
|
|
|
|
struct ASTNode* ParsePrimary(void) {
|
|
struct ASTNode* Node;
|
|
int ID;
|
|
|
|
switch (CurrentFile->CurrentSymbol.type) {
|
|
case LI_INT:
|
|
|
|
if ((CurrentFile->CurrentSymbol.value >= 0) && (CurrentFile->CurrentSymbol.value < 256))
|
|
Node = ConstructASTLeaf(TERM_INTLITERAL, RET_CHAR, NULL, CurrentFile->CurrentSymbol.value);
|
|
else
|
|
Node = ConstructASTLeaf(TERM_INTLITERAL, RET_INT, NULL, CurrentFile->CurrentSymbol.value);
|
|
break;
|
|
|
|
case LI_STR:
|
|
|
|
ID = Assembler->vtable->AsNewString(CurrentIdentifier);
|
|
Node = ConstructASTLeaf(TERM_STRLITERAL, PointerTo(RET_CHAR), NULL, ID);
|
|
break;
|
|
|
|
case TY_IDENTIFIER:
|
|
return PostfixStatement();
|
|
|
|
case LI_LPARE:
|
|
// Starting a ( expr ) block
|
|
Tokenise();
|
|
|
|
Node = ParsePrecedenceASTNode(0);
|
|
|
|
// Make sure we close
|
|
VerifyToken(LI_RPARE, ")");
|
|
return Node;
|
|
}
|
|
|
|
Tokenise();
|
|
Safe();
|
|
return Node;
|
|
}
|
|
|
|
|
|
/*
|
|
* Parse a single binary expression.
|
|
* It ensures that these expressions are parsed to their full extent, that
|
|
* the order of operations is upheld, that the precedence of the prior
|
|
* iteration is considered, and that every error is handled.
|
|
*
|
|
* This is where all the right-associative statements are folded, where
|
|
* type mismatches and widening are handled properly, and that all parsing
|
|
* is over by the time the end tokens ") } ] ;" are encountered.
|
|
*
|
|
* @param PreviousTokenPrecedence: The precedence of the operator to the left.
|
|
* @return the AST Node corresponding to this block.
|
|
*
|
|
*/
|
|
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
|
|
struct ASTNode* LeftNode, * RightNode;
|
|
struct ASTNode* LeftTemp, * RightTemp;
|
|
// int LeftType, RightType;
|
|
int NodeType, OpType;
|
|
|
|
Safe();
|
|
LeftNode = PrefixStatement();
|
|
|
|
NodeType = CurrentFile->CurrentSymbol.type;
|
|
|
|
if (NodeType == LI_SEMIC || NodeType == LI_COLON || NodeType == LI_RPARE || NodeType == LI_RBRAS || NodeType == LI_COM || NodeType == LI_INT) {
|
|
LeftNode->RVal = 1;
|
|
return LeftNode;
|
|
}
|
|
|
|
Safe();
|
|
|
|
while ((OperatorPrecedence(NodeType) > PreviousTokenPrecedence) ||
|
|
(IsRightExpr(OpType) && OperatorPrecedence(OpType) == PreviousTokenPrecedence)) {
|
|
Tokenise();
|
|
if (CurrentFile->CurrentSymbol.type == LI_RPARE)
|
|
break;
|
|
|
|
Safe();
|
|
|
|
RightNode = ParsePrecedenceASTNode(Precedence[NodeType]);
|
|
|
|
Safe();
|
|
/**
|
|
* While parsing this node, we may need to widen some types.
|
|
* This requires a few functions and checks.
|
|
*/
|
|
|
|
OpType = ParseTokenToOperation(NodeType);
|
|
|
|
if (OpType == OP_ASSIGN) {
|
|
printf("\tParsePrecedenceASTNode: Assignment statement\r\n");
|
|
RightNode->RVal = 1;
|
|
LeftNode->RVal = 0;
|
|
|
|
RightNode = MutateType(RightNode, LeftNode->ExprType, 0);
|
|
if (RightNode == NULL)
|
|
ErrorReport("Incompatible types encountered in assignment: %s, %s\n", TypeNames(RightNode->ExprType), TypeNames(LeftNode->ExprType));
|
|
|
|
// LeftNode holds the target, the target variable in this case
|
|
printf("\t\tAssigning variable: %s\n", LeftNode->Symbol->Name);
|
|
|
|
LeftTemp = LeftNode;
|
|
LeftNode = RightNode;
|
|
RightNode = LeftTemp;
|
|
|
|
// Clear temps as insurance
|
|
RightTemp = NULL;
|
|
LeftTemp = NULL;
|
|
} else {
|
|
printf("\t\tAttempting to handle a %s in Binary Expression parsing\r\n", TokenNames[CurrentFile->CurrentSymbol.type]);
|
|
LeftNode->RVal = 1;
|
|
RightNode->RVal = 1;
|
|
|
|
LeftTemp = MutateType(LeftNode, RightNode->ExprType, OpType);
|
|
|
|
RightTemp = MutateType(RightNode, LeftNode->ExprType, OpType);
|
|
/**
|
|
* If both are null, the types are incompatible.
|
|
*/
|
|
|
|
if (LeftTemp == NULL && RightTemp == NULL)
|
|
ErrorReport("Incompatible types in parsing nodes: %s, %s\n", TypeNames(LeftNode->ExprType), TypeNames(RightNode->ExprType));
|
|
|
|
/**
|
|
* If the left was valid, or valid for
|
|
* expansion, then it will be non-null.
|
|
*
|
|
* If it was valid, then this will be
|
|
* equivalent to LeftNode = LeftNode
|
|
*/
|
|
|
|
|
|
if (LeftTemp != NULL)
|
|
LeftNode = LeftTemp;
|
|
|
|
/**
|
|
* Same here, but there is a higher chance
|
|
* for the right node to be incompatible due
|
|
* to the nature of widening types.
|
|
*/
|
|
|
|
if (RightTemp != NULL)
|
|
RightNode = RightTemp;
|
|
|
|
}
|
|
|
|
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode->ExprType, LeftNode, NULL, RightNode,
|
|
NULL, 0);
|
|
NodeType = CurrentFile->CurrentSymbol.type;
|
|
if (NodeType == LI_SEMIC || NodeType == LI_RPARE || NodeType == LI_RBRAS) {
|
|
LeftNode->RVal = 1;
|
|
return LeftNode;
|
|
}
|
|
|
|
}
|
|
LeftNode->RVal = 1;
|
|
return LeftNode;
|
|
}
|
|
|
|
|
|
|
|
/* * * * * * * * * * * * * * * * * * * * *
|
|
* * * * F U N C T I O N S * * * *
|
|
* * * * * * * * * * * * * * * * * * * * */
|
|
|
|
/*
|
|
* Handles the logic for calling a function.
|
|
* This is invoked by an identifier being recognized, followed by a "(.*)" string.
|
|
*
|
|
* It simply checks that the function exists, that the parameters given are valid,
|
|
* and generates the AST Node for calling it.
|
|
*
|
|
* @return the AST Node for calling the function stored in CurrentIdentifer
|
|
*
|
|
*/
|
|
struct ASTNode* CallFunction() {
|
|
struct ASTNode* Tree;
|
|
struct SymbolTableEntry* Function;
|
|
|
|
//TODO: Test structural type!
|
|
if ((Function = FindSymbol(CurrentIdentifier)) == NULL || (Function->Structure != ST_FUNC))
|
|
ErrorReport("Undeclared function: %s\n", CurrentIdentifier);
|
|
|
|
VerifyToken(LI_LPARE, "(");
|
|
|
|
Tree = ParseExpressionList(LI_RPARE);
|
|
|
|
Tree = ConstructASTBranch(OP_CALL, Function->Type, Tree, Function, 0);
|
|
|
|
VerifyToken(LI_RPARE, ")");
|
|
|
|
return Tree;
|
|
}
|
|
|
|
|
|
/*
|
|
* An expression list is used:
|
|
* * In the call to a function
|
|
*
|
|
* It is parsed by seeking left parentheses "(", parsing binary expressions
|
|
* until either a comma or a right parentheses is found.
|
|
*
|
|
* The former will cause another expression to be parsed, the latter will cause
|
|
* parsing to stop.
|
|
*
|
|
* @return the AST Node representing every expression in the list, glued end to
|
|
* end with a COMPOSITE operation.
|
|
*
|
|
*/
|
|
struct ASTNode* ParseExpressionList(int terminate) {
|
|
struct ASTNode* Tree = NULL, * Child = NULL;
|
|
int Count = 0;
|
|
|
|
Safe();
|
|
while (CurrentFile->CurrentSymbol.type != terminate) {
|
|
// TODO: for(int x = 0;
|
|
Child = ParsePrecedenceASTNode(0);
|
|
printf("\nFunction parameter %d is type %s.\n", Count, TypeNames(Child->ExprType));
|
|
Count++;
|
|
Tree = ConstructASTNode(OP_COMP, PointerTo(RET_VOID), Tree, NULL, Child, NULL, Count);
|
|
|
|
if (CurrentFile->CurrentSymbol.type == terminate)
|
|
break;
|
|
|
|
VerifyToken(LI_COM, ",");
|
|
Safe();
|
|
}
|
|
|
|
return Tree;
|
|
}
|
|
|
|
|
|
/* * * * * * * * * * * * * * * * * * * * * *
|
|
* * * * S T A T E M E N T S * * * *
|
|
* * * * * * * * * * * * * * * * * * * * * */
|
|
|
|
/*
|
|
* Handles parsing an individual statement.
|
|
* This has the ability to parse Compounds in the case it starts with a left brace.
|
|
* This solves the dangling else problem for the parser.
|
|
*
|
|
* It serves as a wrapper around:
|
|
* * If Statement
|
|
* * While Statement
|
|
* * For Statement
|
|
* * Switch Statement
|
|
* * Return Statement
|
|
* * Numeric literals and variables
|
|
* * Binary Expressions
|
|
*
|
|
* @return the AST Node representing this single statement
|
|
*/
|
|
struct ASTNode* ParseStatement(void) {
|
|
struct ASTNode* Node;
|
|
struct SymbolTableEntry* Composite;
|
|
|
|
printf("\t\tBranch leads to type %s/%d\r\n", TokenNames[CurrentFile->CurrentSymbol.type], CurrentFile->CurrentSymbol.type);
|
|
switch (CurrentFile->CurrentSymbol.type) {
|
|
case LI_LBRAC:
|
|
Tokenise();
|
|
Node = ParseCompound();
|
|
VerifyToken(LI_RBRAC, "}");
|
|
return Node;
|
|
case TY_IDENTIFIER:
|
|
if (FindAlias(CurrentIdentifier) == NULL) {
|
|
Node = ParsePrecedenceASTNode(0);
|
|
VerifyToken(LI_SEMIC, ";");
|
|
return Node;
|
|
}
|
|
case TY_CHAR:
|
|
case TY_LONG:
|
|
case TY_INT:
|
|
case KW_STRUCT:
|
|
case KW_UNION:
|
|
case KW_ENUM:
|
|
case KW_ALIAS:
|
|
printf("\t\tTrying new Variable: %s\n", CurrentIdentifier);
|
|
ParseDeclarationList(&Composite, SC_LOCAL, LI_SEMIC, LI_EOF, &Node);
|
|
VerifyToken(LI_SEMIC, ";");
|
|
Safe();
|
|
return NULL;
|
|
|
|
case KW_SWITCH:
|
|
return SwitchStatement();
|
|
|
|
case KW_IF:
|
|
return IfStatement();
|
|
|
|
case KW_WHILE:
|
|
return WhileStatement();
|
|
|
|
case KW_FOR:
|
|
return ForStatement();
|
|
|
|
case KW_RETURN:
|
|
return ReturnStatement();
|
|
|
|
case KW_BREAK:
|
|
return BreakStatement();
|
|
|
|
case KW_CONTINUE:
|
|
return ContinueStatement();
|
|
|
|
default:
|
|
Node = ParsePrecedenceASTNode(0);
|
|
VerifyToken(LI_SEMIC, ";");
|
|
return Node;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* This is the entry point to the parser/lexer.
|
|
*
|
|
* By definition, Global definitions are accessible anywhere.
|
|
* As of right now (20/01/2021), classe are unimplemented.
|
|
* This means that all functions and all function prototypes are globally scoped.
|
|
*
|
|
* You may also define variables, constants, preprocessor directives and other text
|
|
* in the global scope.
|
|
*
|
|
* The function itself loops, parsing either variables or functions, until it
|
|
* reaches the end of the file.
|
|
*
|
|
*/
|
|
|
|
void ParseGlobals() {
|
|
struct SymbolTableEntry* Composite;
|
|
struct ASTNode* empty;
|
|
printf("Parsing global definitions\r\n");
|
|
|
|
while (CurrentFile->CurrentSymbol.type != LI_EOF) {
|
|
// Read in a declaration, or list thereof
|
|
ParseDeclarationList(&Composite, SC_GLOBAL, LI_SEMIC, LI_EOF, &empty);
|
|
|
|
// Consume semicolons if present
|
|
OptionallyConsume(LI_SEMIC);
|
|
}
|
|
}
|
|
|
|
|