First files.

Currently has two bugs.

First, all functions are resolved to index 0 (currently PrintInteger)

Second, the register used for returning is immediately overwritten by the next allocated register. This means addition of function return values is a little silly.
This commit is contained in:
Curle 2020-09-10 01:56:16 +01:00
commit 430829e4ec
20 changed files with 2330 additions and 0 deletions

30
include/Data.h Normal file
View File

@ -0,0 +1,30 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#pragma once
#include <stdio.h>
#include <Defs.h>
#ifndef extern_
#define extern_ extern
#endif
#define TEXTLEN 512
#define SYMBOLS 1024
extern_ struct SymbolTable Symbols[SYMBOLS];
extern_ char* TokenStrings[];
extern_ char* TokenNames[];
extern_ int CurrentFunction;
extern_ int Line;
extern_ int Overread;
extern_ FILE* SourceFile;
extern_ FILE* OutputFile;
extern_ struct Token CurrentToken;
extern_ char CurrentIdentifier[TEXTLEN + 1];

320
include/Defs.h Normal file
View File

@ -0,0 +1,320 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/*
* ARithmetic tokens are prefixed AR.
* LIteral tokens are prefixed LI.
* KeyWords are prefixed KW.
* TYpes are prefixed TY.
* CoMParisons are prefixed CMP.
*
* NOTE: Tokens are different from Syntax Operations!
*
* Tokens should represent the characters that invoke them,
* not the actions they perform.
*
*/
enum TokenTypes {
LI_EOF,
AR_PLUS, // Arithmetic +
AR_MINUS, // Arithmetic -
AR_STAR, // Arithmetic *
AR_SLASH, // Arithmetic /
CMP_EQUAL, // =?
CMP_INEQ, // !=
CMP_LT, // <
CMP_GT, // >
CMP_LTE, // <=
CMP_GTE, // =>
LI_EQUAL, // =
LI_INT, // Integer literal
LI_SEMIC, // ;
LI_LBRAC, // {
LI_RBRAC, // }
LI_LPARE, // (
LI_RPARE, // )
TY_IDENTIFIER, // Identifier name. Variable, function, etc.
TY_NONE, // No return type. Literal void.
TY_CHAR, // "char" type keyword
TY_INT, // "int" type keyword
TY_LONG, // "long" type keyword
TY_VOID, // "void" type keyword
KW_FUNC, // :: function name incoming
KW_PRINT,
KW_IF,
KW_ELSE,
KW_WHILE,
KW_FOR,
KW_RETURN
};
/*
* All Syntax Operations are prefixed OP.
* Terminal Operations are prefixed TERM.
* L-Values are prefixed LV.
* Reference Operations are prefixed REF.
*
* These represent the actions that a token will perform.
* These are used exclusively in AST construction.
*
* It is important that Tokens and Operations are logically separated,
* but that the Operation's index is the same as the Token that invokes it.
*/
enum SyntaxOps {
OP_ADD = 1, // Add two numbers.
OP_SUBTRACT, // Subtract two numbers.
OP_MULTIPLY, // Multiply two numbers.
OP_DIVIDE, // Divide two numbers.
OP_EQUAL, // Compare equality
OP_INEQ, // Compare inequality
OP_LESS, // Less than?
OP_GREAT, // Greater than?
OP_LESSE, // Less than or Equal to?
OP_GREATE, // Greater than or Equal to?
OP_ASSIGN, // Assign an l-value
TERM_INTLITERAL, // Integer Literal. This is a virtual operation, so it's a terminal.
REF_IDENT, // Reference (read) an identifier (variable).
LV_IDENT, // Write an identifier in the form of an l-value.
OP_WIDEN, // Something contains a type that needs to be casted up
OP_CALL, // Call a function
OP_RET, // Return from a function
OP_COMP, // Compound statements need a way to be "glued" together. This is one of those mechanisms
OP_IF, // If statement
OP_LOOP, // FOR, WHILE
OP_PRINT, // Print statement
OP_FUNC, // Define a function
};
// A node in a Binary Tree that forms the syntax of Erythro
struct ASTNode {
int Operation;
int ExprType; // Value->IntValue's DataType
struct ASTNode* Left;
struct ASTNode* Middle;
struct ASTNode* Right;
union {
int IntValue; // TERM_INTLIT's Value
int ID; // LV_IDENT's Symbols[] index.
} Value;
};
struct Token {
int type;
int value;
};
/*
* The Symbol Table, used for variables, functions and
* assorted goodies.
*/
struct SymbolTable {
char* Name;
int Type; // An entry in DataTypes, referring to the type of this data
int Structure; // An entry in StructureType - metadata on how to process the data
int EndLabel; // The number of the label to jump to, in order to exit this function (if applicable)
};
/*
* The primitive data types for the language
* //TODO: Move back into TokenTypes
*/
enum DataTypes {
RET_NONE, // No return type. Literal void.
RET_CHAR, // "char" type keyword
RET_INT, // "int" type keyword
RET_LONG, // "long" type keyword
RET_VOID, // "void" type keyword
};
/*
* The type of the structure of data being examined
* //TODO: move into TokenTypes?
*/
enum StructureType {
ST_VAR, // This is variable
ST_FUNC // This is a function
// This is an enum
// This is a struct
// This is a typedef
};
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * L E X I N G * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
int Tokenise(struct Token* Token);
int TypesCompatible(int* Left, int* Right, int STRICT);
void VerifyToken(int Type, char* TokenExpected);
void RejectToken(struct Token* Token);
static int ReadIdentifier(int Char, char* Buffer, int Limit);
static int ReadKeyword(char* Str);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * S Y N T A X T R E E * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
struct ASTNode* ConstructASTNode(int Operation, int Type,
struct ASTNode* Left,
struct ASTNode* Middle,
struct ASTNode* Right,
int IntValue);
struct ASTNode* ConstructASTLeaf(int Operation, int Type, int IntValue);
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, int IntValue);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * P A R S I N G * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
//struct ASTNode* ParseNewASTNode(void);
//struct ASTNode* ParseAdditiveASTNode(void);
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence);
int ParseAST(struct ASTNode* Node);
//void ParseStatements(void);
struct ASTNode* ParseStatement(void);
struct ASTNode* ParseFunction();
struct ASTNode* ParseCompound();
struct ASTNode* CallFunction();
struct ASTNode* ReturnStatement();
int ParseType(int Token);
int ParseTokenToOperation(int Token);
struct ASTNode* PrintStatement(void);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * S Y M B O L T A B L E * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
int FindSymbol(char* Symbol);
int AddSymbol(char* Name, int Type, int Structure);
int AddFunctionSymbol(char* Name, int Type, int Structure, int EndLabel);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O N T R O L S T A T U S * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
void Die(char* Error);
void DieMessage(char* Error, char* Reason);
void DieDecimal(char* Error, int Number);
void DieChar(char* Error, int Char);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O D E G E N E R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
int AssembleTree(struct ASTNode* Node, int Register, int ParentOp);
void DeallocateAllRegisters();
int RetrieveRegister();
void DeallocateRegister(int Register);
int PrimitiveSize(int Type);
int AsLoad(int Value);
int AsAdd(int Left, int Right);
int AsMul(int Left, int Right);
int AsSub(int Left, int Right);
int AsDiv(int Left, int Right);
int AsLdVar(int ID);
int AsStrVar(int Register, int ID);
void AsNewSymb(int ID);
int AsEqual(int Left, int Right);
int AsIneq(int Left, int Right);
int AsLess(int Left, int Right);
int AsGreat(int Left, int Right);
int AsLessE(int Left, int Right);
int AsGreatE(int Left, int Right);
int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label);
int AsCompare(int Operation, int RegisterLeft, int RegisterRight);
int AsIf(struct ASTNode* Node);
int NewLabel(void);
void AsJmp(int Label);
void AsLabel(int Label);
int AsReturn(int Register, int FuncID);
int AsCall(int Register, int FuncID);
int AsWhile(struct ASTNode* Node);
void AssemblerPrint(int Register);
void AssemblerPreamble();
void AsFunctionPreamble(int ID);
void AsFunctionEpilogue(int ID);
/* * * * * * * * * * * * * * * * * * * * * * *
* * * * D E C L A R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * */
void BeginVariableDeclaration(void);
struct ASTNode* ParseIdentifier(void);
struct ASTNode* IfStatement();
struct ASTNode* WhileStatement();
struct ASTNode* ForStatement();

519
src/Assembler.c Normal file
View File

@ -0,0 +1,519 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#include <Data.h>
/*
* If the entry in UsedRegisters
* that correlates to the position of a register in Registers
* is 1
* then that register is classed as used -
* it has useful data inside it.
*
* if the entry is 0, then it is free.
*/
static int UsedRegisters[4];
static char* Registers[4] = { "%r8", "%r9", "%r10", "%r11" };
static char* DoubleRegisters[4] = { "%r8d", "%r9d", "%r10d", "%r11d" };
static char* ByteRegisters[4] = { "%r8b", "%r9b", "%r10b", "%r11b" };
static char* Comparisons[6] = { "sete", "setne", "setl", "setg", "setle", "setge" };
static char* InvComparisons[6] = { "jne", "je", "jge", "jle", "jg", "jl"};
static char* Types[5] = { "none", "char", "int", "long", "void" };
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * R O O T O F A S S E M B L E R * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
int AssembleTree(struct ASTNode* Node, int Register, int ParentOp) {
int LeftVal, RightVal;
switch(Node->Operation) {
case OP_IF:
return AsIf(Node);
case OP_LOOP:
return AsWhile(Node);
case OP_COMP:
AssembleTree(Node->Left, -1, Node->Operation);
DeallocateAllRegisters();
AssembleTree(Node->Right, -1, Node->Operation);
DeallocateAllRegisters();
return -1;
case OP_FUNC:
AsFunctionPreamble(Node->Value.ID);
AssembleTree(Node->Left, -1, Node->Operation);
AsFunctionEpilogue(Node->Value.ID);
return -1;
}
if(Node->Left)
LeftVal = AssembleTree(Node->Left, -1, Node->Operation);
if(Node->Right)
RightVal = AssembleTree(Node->Right, LeftVal, Node->Operation);
/* if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
*/
switch(Node->Operation) {
case OP_ADD:
return AsAdd(LeftVal, RightVal);
case OP_SUBTRACT:
return AsSub(LeftVal, RightVal);
case OP_MULTIPLY:
return AsMul(LeftVal, RightVal);
case OP_DIVIDE:
return AsDiv(LeftVal, RightVal);
case OP_ASSIGN:
return RightVal;
case OP_WIDEN:
return LeftVal;
case OP_RET:
AsReturn(LeftVal, CurrentFunction);
return -1;
case OP_CALL:
printf("\tReturning from %s, %d\n", Symbols[Node->Value.ID].Name, Node->Value.ID);
return AsCall(LeftVal, Node->Value.ID);
/* case OP_EQUAL:
return AsEqual(LeftVal, RightVal);
case OP_INEQ:
return AsIneq(LeftVal, RightVal);
case OP_LESS:
return AsLess(LeftVal, RightVal);
case OP_GREAT:
return AsGreat(LeftVal, RightVal);
case OP_LESSE:
return AsLessE(LeftVal, RightVal);
case OP_GREATE:
return AsGreatE(LeftVal, RightVal); */
case OP_EQUAL:
case OP_INEQ:
case OP_LESS:
case OP_GREAT:
case OP_LESSE:
case OP_GREATE:
if(ParentOp == OP_IF || ParentOp == OP_LOOP)
return AsCompareJmp(Node->Operation, LeftVal, RightVal, Register);
else
return AsCompare(Node->Operation, LeftVal, RightVal);
case REF_IDENT:
return AsLdVar(Node->Value.ID);
case LV_IDENT:
return AsStrVar(Register, Node->Value.ID);
case TERM_INTLITERAL:
return AsLoad(Node->Value.IntValue);
break;
case OP_PRINT:
AssemblerPrint(LeftVal);
DeallocateAllRegisters();
return -1;
/* case OP_LOOP:
// We only do while for now..
return AsWhile(Node);
break; */
default:
DieDecimal("Unknown ASM Operation", Node->Operation);
}
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * R E G I S T E R M A N A G E M E N T * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
void DeallocateAllRegisters() {
UsedRegisters[0] = UsedRegisters[1] = UsedRegisters[2] = UsedRegisters[3] = 0;
}
int RetrieveRegister() {
//printf("Current state of registers: %x, %x, %x, %x\n", UsedRegisters[0], UsedRegisters[1], UsedRegisters[2], UsedRegisters[3]);
for (size_t i = 0; i < 4; i++) {
if(UsedRegisters[i] == 0) {
UsedRegisters[i] = 1;
return i;
}
}
fprintf(stderr, "Out of registers!\n");
exit(1);
}
void DeallocateRegister(int Register) {
if(UsedRegisters[Register] != 1) {
fprintf(stderr, "Error trying to free register %d\n", Register);
exit(1);
}
UsedRegisters[Register] = 0;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O D E G E N E R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
int NewLabel(void) {
static int id = 1;
return id++;
}
int AsIf(struct ASTNode* Node) {
int FalseLabel, EndLabel;
FalseLabel = NewLabel();
if(Node->Right)
EndLabel = NewLabel();
// Left is the condition
AssembleTree(Node->Left, FalseLabel, Node->Operation);
DeallocateAllRegisters();
// Middle is the true block
AssembleTree(Node->Middle, -1, Node->Operation);
DeallocateAllRegisters();
// Right is the optional else
if(Node->Right)
AsJmp(EndLabel);
AsLabel(FalseLabel);
if(Node->Right) {
AssembleTree(Node->Right, -1, Node->Operation);
DeallocateAllRegisters();
AsLabel(EndLabel);
}
return -1;
}
int AsCompare(int Operation, int RegisterLeft, int RegisterRight) {
printf("Comparing registers %d & %d\n", RegisterLeft, RegisterRight);
if(Operation < OP_EQUAL || Operation > OP_GREATE)
Die("Bad Operation in AsCompare");
fprintf(OutputFile, "\tcmpq\t%s, %s\n", Registers[RegisterRight], Registers[RegisterLeft]);
fprintf(OutputFile, "\t%s\t\t%s\n", Comparisons[Operation - OP_EQUAL], ByteRegisters[RegisterRight]);
fprintf(OutputFile, "\tmovzbq\t%s, %s\n", ByteRegisters[RegisterRight], Registers[RegisterLeft]);
DeallocateRegister(RegisterLeft);
return RegisterRight;
}
int AsCompareJmp(int Operation, int RegisterLeft, int RegisterRight, int Label) {
if(Operation < OP_EQUAL || Operation > OP_GREATE)
Die("Bad Operation in AsCompareJmp");
printf("\tBranching on comparison of registers %d & %d, with operation %s\n\n", RegisterLeft, RegisterRight, Comparisons[Operation - OP_EQUAL]);
fprintf(OutputFile, "\tcmpq\t%s, %s\n", Registers[RegisterRight], Registers[RegisterLeft]);
fprintf(OutputFile, "\t%s\tL%d\n", InvComparisons[Operation - OP_EQUAL], Label);
DeallocateAllRegisters();
return -1;
}
void AsJmp(int Label) {
printf("\t\tJumping to label %d\n", Label);
fprintf(OutputFile, "\tjmp\tL%d\n", Label);
}
void AsLabel(int Label) {
printf("\tCreating label %d\n", Label);
fprintf(OutputFile, "L%d:\n", Label);
}
int AsWhile(struct ASTNode* Node) {
int BodyLabel, BreakLabel;
BodyLabel = NewLabel();
BreakLabel = NewLabel();
printf("\tInitiating loop between labels %d and %d\n", BodyLabel, BreakLabel);
// Mark the start position
AsLabel(BodyLabel);
// Assemble the condition - this should include a jump to end!
AssembleTree(Node->Left, BreakLabel, Node->Operation);
DeallocateAllRegisters();
// Assemble the body
AssembleTree(Node->Right, -1, Node->Operation);
DeallocateAllRegisters();
// Jump back to the body - as we've already failed the condition check if we get here
AsJmp(BodyLabel);
// Set up the label to break out of the loop.
AsLabel(BreakLabel);
return -1;
}
int AsLoad(int Value) {
int Register = RetrieveRegister();
printf("\tStoring value %d into %s\n", Value, Registers[Register]);
fprintf(OutputFile, "\tmovq\t$%d, %s\n", Value, Registers[Register]);
return Register;
}
int AsAdd(int Left, int Right) {
printf("\tAdding Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\taddq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left);
return Right;
}
int AsMul(int Left, int Right) {
printf("\tMultiplying Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\timulq\t%s, %s\n", Registers[Left], Registers[Right]);
DeallocateRegister(Left);
return Right;
}
int AsSub(int Left, int Right) {
printf("\tSubtracting Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\tsubq\t%s, %s\n", Registers[Right], Registers[Left]);
DeallocateRegister(Right);
return Left;
}
int AsDiv(int Left, int Right) {
printf("\tDividing Registers %s, %s\n", Registers[Left], Registers[Right]);
fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Left]);
fprintf(OutputFile, "\tcqo\n");
fprintf(OutputFile, "\tidivq\t%s\n", Registers[Right]);
fprintf(OutputFile, "\tmovq\t%%rax, %s\n", Registers[Left]);
DeallocateRegister(Right);
return Left;
}
int AsLdVar(int ID) {
int Reg = RetrieveRegister();
printf("\tStoring %s's contents into %s\n", Symbols[ID].Name, Registers[Reg]);
switch(Symbols[ID].Type) {
case RET_CHAR:
// movzbq zeroes, then moves a byte into the quad register
fprintf(OutputFile, "\tmovzbq\t%s(\%%rip), %s\n", Symbols[ID].Name, Registers[Reg]);
break;
case RET_INT:
fprintf(OutputFile, "\tmovzbl\t%s(\%%rip), %s\n", Symbols[ID].Name, Registers[Reg]);
break;
case RET_LONG:
fprintf(OutputFile, "\tmovq\t%s(%%rip), %s\n", Symbols[ID].Name, Registers[Reg]);
break;
default:
DieMessage("Bad type for loading", Types[Symbols[ID].Type]);
}
return Reg;
}
int AsStrVar(int Register, int ID) {
int Reg = RetrieveRegister();
printf("\tStoring contents of %s into %s\n", Registers[Register], Symbols[ID].Name);
switch(Symbols[ID].Type) {
case RET_CHAR:
// movzbq zeroes, then moves a byte into the quad register
fprintf(OutputFile, "\tmovb\t%s, %s(\%%rip)\n", ByteRegisters[Reg], Symbols[ID].Name);
break;
case RET_INT:
fprintf(OutputFile, "\tmovl\t%s, %s(\%%rip)\n", DoubleRegisters[Reg], Symbols[ID].Name);
break;
case RET_LONG:
fprintf(OutputFile, "\tmovq\t%s, %s(%%rip)\n", Registers[Reg], Symbols[ID].Name);
break;
default:
DieMessage("Bad type for saving", Types[Symbols[ID].Type]);
}
return Reg;
}
void AsNewSymb(int ID) {
int TypeSize;
TypeSize = PrimitiveSize(Symbols[ID].Type);
fprintf(OutputFile, "\t.comm\t%s, %d, %d\n", Symbols[ID].Name, TypeSize, TypeSize);
}
int AsCall(int Register, int FuncID) {
int OutRegister = RetrieveRegister();
printf("\t\tCalling function %s with parameter %s\n", Symbols[FuncID].Name, Registers[Register]);
printf("\t\t\tFunction returns into %s\n", Registers[OutRegister]);
fprintf(OutputFile, "\tmovq\t%s, %%rcx\n", Registers[Register]);
fprintf(OutputFile, "\tcall\t%s\n", Symbols[FuncID].Name);
fprintf(OutputFile, "\tmovq\t%%rax, %s\n", Registers[OutRegister]);
DeallocateRegister(OutRegister);
return OutRegister;
}
int AsReturn(int Register, int FuncID) {
printf("\t\tCreating return for function %s\n", Symbols[FuncID].Name);
switch(Symbols[FuncID].Type) {
case RET_CHAR:
fprintf(OutputFile, "\tmovzbl\t%s, %%eax\n", ByteRegisters[Register]);
break;
case RET_INT:
fprintf(OutputFile, "\tmovl\t%s, %%eax\n", DoubleRegisters[Register]);
break;
case RET_LONG:
fprintf(OutputFile, "\tmovq\t%s, %%rax\n", Registers[Register]);
break;
default:
DieMessage("Bad function type in generating return", Types[Symbols[FuncID].Type]);
}
AsJmp(Symbols[FuncID].EndLabel);
}
int AsEqual(int Left, int Right) {
// Set the lowest bit if left = right
return AsCompare(OP_EQUAL, Left, Right);
}
int AsIneq(int Left, int Right) {
// Set the lowest bit if left != right
return AsCompare(OP_INEQ, Left, Right);
}
int AsLess(int Left, int Right) {
// Set the lowest bit if left < right
return AsCompare(OP_LESS, Left, Right);
}
int AsGreat(int Left, int Right) {
// Set the lowest bit if left > right
return AsCompare(OP_GREAT, Left, Right);
}
int AsLessE(int Left, int Right) {
// Set the lowest bit if left <= right
return AsCompare(OP_LESSE, Left, Right);
}
int AsGreatE(int Left, int Right) {
// Set the lowest bit if left => right
return AsCompare(OP_GREATE, Left, Right);
}
void AssemblerPrint(int Register) {
printf("\t\tPrinting Register %s\n", Registers[Register]);
fprintf(OutputFile, "\tmovq\t%s, %%rcx\n", Registers[Register]);
//fprintf(OutputFile, "\tleaq\t.LC0(%%rip), %%rcx\n");
fprintf(OutputFile, "\tcall\tPrintInteger\n");
DeallocateRegister(Register);
}
void AssemblerPreamble() {
DeallocateAllRegisters();
fputs(
"\t.text\n", /*
".LC0:\n"
"\t.string\t\"%d\\n\"\n", */
OutputFile);
}
void AsFunctionPreamble(int FunctionID) {
char* Name = Symbols[FunctionID].Name;
fprintf(OutputFile,
"\t.text\n"
"\t.globl\t%s\n"
"\t.def\t%s; .scl 2; .type 32; .endef\n"
"%s:\n"
"\tpushq\t%%rbp\n"
"\tmovq\t%%rsp, %%rbp\n"
"\tsubq\t$32, %%rsp\n", Name, Name, Name);
//PECOFF requires we call the global initialisers
if(!strcmp(Name, "main"))
fprintf(OutputFile, "\tcall\t__main\n");
}
void AsFunctionEpilogue(int FunctionID) {
AsLabel(Symbols[FunctionID].EndLabel);
fputs(
"\tpopq\t%rbp\n"
"\taddq\t$32, %rsp\n"
"\tret\n",
OutputFile);
}

322
src/Lexer.c Normal file
View File

@ -0,0 +1,322 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#include <Data.h>
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * C H A R S T R E AM * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void ReturnCharToStream(int Char) {
Overread = Char;
}
static int NextChar(void) {
int Char;
if(Overread) {
Char = Overread;
Overread = 0;
return Char;
}
Char = fgetc(SourceFile);
if(Char == '\n')
Line++;
return Char;
}
static int FindChar() {
int Char;
Char = NextChar();
while(Char == ' ' || Char == '\t' || Char == '\n' || Char == '\r') {
Char = NextChar();
}
return Char;
}
static int FindDigitFromPos(char* String, char Char) {
char* Result = strchr(String, Char);
return(Result ? Result - String : -1);
}
void VerifyToken(int Type, char* TokenExpected) {
if(CurrentToken.type == Type)
Tokenise(&CurrentToken);
else {
printf("Expected %s on line %d\n", TokenExpected, Line);
exit(1);
}
}
static struct Token* RejectedToken = NULL;
void RejectToken(struct Token* Token) {
if(RejectedToken != NULL)
Die("Cannot reject two tokens in a row!");
RejectedToken = Token;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * L I T E R A L S A N D I D E N T I F I E R S * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int ReadInteger(int Char) {
int CurrentChar = 0;
int IntegerValue = 0;
while((CurrentChar = FindDigitFromPos("0123456789", Char)) >= 0) {
IntegerValue = IntegerValue * 10 + CurrentChar;
Char = NextChar();
}
ReturnCharToStream(Char);
return IntegerValue;
}
// Variable identifier, keyword, function.
static int ReadIdentifier(int Char, char* Buffer, int Limit) {
int ind = 0;
// This defines the valid chars in a keyword/variable/function.
while(isalpha(Char) || isdigit(Char) || Char == '_') {
if (ind >= Limit - 1) {
printf("Identifier too long: %d\n", Line);
exit(1);
} else {
Buffer[ind++] = Char;
}
Char = NextChar();
}
// At this point, we've reached a non-keyword character
ReturnCharToStream(Char);
Buffer[ind] = '\0';
return ind;
}
/*
* This function is what defines the valid keywords for the language
* //TODO: move this to a static list?
* //TODO: More optimisations?
*
*/
static int ReadKeyword(char* Str) {
// First, scan with reference intact.
switch(*Str) {
// This lets us case against the first char:
case ':':
if(!strcmp(Str, "::"))
return KW_FUNC;
break;
case 'c':
if(!strcmp(Str, "char"))
return TY_CHAR;
break;
case 'e':
if(!strcmp(Str, "else"))
return KW_ELSE;
break;
case 'f':
if(!strcmp(Str, "for"))
return KW_FOR;
break;
case 'i':
if(!strcmp(Str, "int"))
return TY_INT;
if(!strcmp(Str, "if"))
return KW_IF;
break;
case 'l':
if(!strcmp(Str, "long"))
return TY_LONG;
break;
case 'p':
// This is a huge optimisation once we have as many keywords as a fully featured language.
if(!strcmp(Str, "print"))
return KW_PRINT;
break;
case 'r':
if(!strcmp(Str, "return"))
return KW_RETURN;
break;
case 'v':
if(!strcmp(Str, "void"))
return TY_VOID;
break;
case 'w':
if(!strcmp(Str, "while"))
return KW_WHILE;
break;
}
return 0;
}
/* * * * * * * * * * * * * * * * * * * * *
* * * * T O K E N I S E R * * * *
* * * * * * * * * * * * * * * * * * * * */
int Tokenise(struct Token* Token) {
int Char, TokenType;
if(RejectedToken != NULL) {
Token = RejectedToken;
RejectedToken = NULL;
return 1;
}
Char = FindChar();
switch(Char) {
case EOF:
Token->type = LI_EOF;
return 0;
case '+':
Token->type = AR_PLUS;
break;
case '-':
Token->type = AR_MINUS;
break;
case '*':
Token->type = AR_STAR;
break;
case '/':
Token->type = AR_SLASH;
break;
case '=':
Char = NextChar();
// If the next char is =, we have ==, the compare equality token.
if(Char == '?') {
Token->type = CMP_EQUAL;
// if the next char is >, we have =>, the greater than or equal token.
} else if(Char == '>') {
Token->type = CMP_GTE;
// If none of the above match, we have = and an extra char. Return the char and set the token
} else {
ReturnCharToStream(Char);
Token->type = LI_EQUAL;
}
break;
case '!':
Char = NextChar();
// If the next char is =, we have !=, the compare inequality operator.
if(Char == '=') {
Token->type = CMP_INEQ;
// Otherwise, we have a spare char
} else {
ReturnCharToStream(Char);
}
break;
case '<':
Char = NextChar();
// If the next char is =, we have <=, the less than or equal comparator.
if(Char == '=') {
Token->type = CMP_LTE;
} else {
ReturnCharToStream(Char);
Token->type = CMP_LT;
}
break;
case '>':
// There is no special casing for >. Less than or equal is =>
Token->type = CMP_GT;
break;
case ';':
Token->type = LI_SEMIC;
break;
case '(':
Token->type = LI_LPARE;
break;
case ')':
Token->type = LI_RPARE;
break;
case '{':
Token->type = LI_LBRAC;
break;
case '}':
Token->type = LI_RBRAC;
break;
case ':':
Char = NextChar();
if(Char == ':') {
Token->type = KW_FUNC;
} else {
ReturnCharToStream(Char);
}
break;
default:
if(isdigit(Char)) {
Token->value = ReadInteger(Char);
Token->type = LI_INT;
break;
} else if(isalpha(Char) || Char == '_') { // This is what defines what a variable/function/keyword can START with.
ReadIdentifier(Char, CurrentIdentifier, TEXTLEN);
if(TokenType = ReadKeyword(CurrentIdentifier)) {
Token->type = TokenType;
break;
}
Token->type = TY_IDENTIFIER;
break;
//printf("Line %d: Unrecognized symbol %s\n", CurrentIdentifier, Line);
//exit(1);
}
DieChar("Unrecognized character", Char);
}
return 1;
}

130
src/Main.c Normal file
View File

@ -0,0 +1,130 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#define extern_
#include <Data.h>
#undef extern_
#include <errno.h>
char* TokenStrings[] = { "+", "-", "*", "/", "int" };
char* TokenNames[] = {
"End of file",
"Addition",
"Subtraction",
"Multiplication",
"Division",
"Equality Check",
"Inequality Check",
"Less Than",
"Greater Than",
"Less Than or Equal",
"Greater Than or Equal",
"Assignment",
"Integer literal",
"Statement End",
"Compound Block Start",
"Compound Block End",
"Logical Block Start",
"Logical Block End",
"Identifier",
"None Type",
"Char Type",
"Int Type",
"Long Type",
"Void Type",
"Function keyword",
"Print Keyword",
"If keyword",
"Else keyword",
"While keyword",
"For keyword",
"Return keyword"
};
static void TokeniseFile() {
struct Token Token;
while(Tokenise(&Token)) {
printf("Token %s", TokenStrings[Token.type]);
if(Token.type == LI_INT) {
printf(", value %d", Token.value);
}
printf("\n");
}
}
int main(int argc, char* argv[]) {
Line = 1;
Overread = '\n';
struct ASTNode* Node;
if((SourceFile = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Unable to open %s: %s\n", argv[1], strerror(errno));
exit(1);
}
if((OutputFile = fopen(argv[2], "w")) == NULL) {
fprintf(stderr, "Unable to open %s: %s\n", argv[2], strerror(errno));
exit(1);
}
AddFunctionSymbol("PrintInteger", RET_CHAR, ST_FUNC, 0);
Tokenise(&CurrentToken);
AssemblerPreamble();
while(1) {
Node = ParseFunction();
printf("\nBeginning assembler creation of new function %s\n", Symbols[Node->Value.ID].Name);
AssembleTree(Node, -1, 0);
if(CurrentToken.type == LI_EOF)
break;
}
//Node = ParsePrecedenceASTNode();
//printf("%d\n", ParseAST(Node));
//AssembleNode(Node);
fclose(OutputFile);
exit(0);
}
void Die(char* Error) {
fprintf(stderr, "%s on line %d\n", Error, Line);
exit(1);
}
void DieMessage(char* Error, char* Reason) {
fprintf(stderr, "%s:%s on line %d\n", Error, Reason, Line);
exit(1);
}
void DieDecimal(char* Error, int Number) {
fprintf(stderr, "%s:%d on line %d\n", Error, Number, Line);
exit(1);
}
void DieChar(char* Error, int Char) {
fprintf(stderr, "%s:%c on line %d\n", Error, Char, Line);
exit(1);
}

433
src/Parser.c Normal file
View File

@ -0,0 +1,433 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <stdio.h>
#include <stdlib.h>
#include "Defs.h"
#include "Data.h"
/*
* Precedence is directly related to Token Type.
*
* enum TokenTypes {
* LI_EOF, AR_PLUS, AR_MINUS, AR_STAR, AR_SLASH, LI_INT
* };
*
*/
static int Precedence[] =
{ 0, // EOF
10, 10, // + -
20, 20, // * /
30, 30, // =? !=
40, 40, // < >
40, 40}; // <= =>
static int OperatorPrecedence(int Token) {
int Prec = Precedence[Token];
if(Prec == 0) {
Die("Attempting to determine operator precedence of an EOF or INT literal.");
}
return Prec;
}
/* * * * * * * * * * * * * * * * * * * * * * * *
* * * N O D E C O N S T R U C T I O N * * *
* * * * * * * * * * * * * * * * * * * * * * * */
struct ASTNode* ConstructASTNode(int Operation, int Type,
struct ASTNode* Left,
struct ASTNode* Middle,
struct ASTNode* Right,
int IntValue) {
struct ASTNode* Node;
Node = (struct ASTNode*) malloc(sizeof(struct ASTNode));
if(!Node) {
fprintf(stderr, "Unable to allocate node!");
exit(1);
}
Node->Operation = Operation;
Node->ExprType = Type;
Node->Left = Left;
Node->Middle = Middle;
Node->Right = Right;
Node->Value.IntValue = IntValue;
return Node;
}
struct ASTNode* ConstructASTLeaf(int Operation, int Type, int IntValue) {
return ConstructASTNode(Operation, Type, NULL, NULL, NULL, IntValue);
}
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, int IntValue) {
return ConstructASTNode(Operation, Type, Left, NULL, NULL, IntValue);
}
/* * * * * * * * * * * * * * * * * * * * * * * *
* * * * T O K E N P A R S I N G * * * *
* * * * * * * * * * * * * * * * * * * * * * * */
/*
* Take a Token Type, and convert it to an AST-Node Operation.
*
* TokenTypes and SyntaxOps are synchronized to make this easy.
*
*/
int ParseTokenToOperation(int Token) {
if(Token > LI_EOF && Token < LI_INT)
return Token;
DieDecimal("ParseToken: Unknown token", Token);
}
/*
* Parse a primary (terminal) expression.
* This currently handles literal expressions, constructing a leaf node
* and handing control back up the chain.
*
*
*/
static struct ASTNode* ParsePrimary(void) {
struct ASTNode* Node;
int ID;
switch(CurrentToken.type) {
case LI_INT:
if((CurrentToken.value >= 0) && (CurrentToken.value < 256))
Node = ConstructASTLeaf(TERM_INTLITERAL, RET_CHAR, CurrentToken.value);
else
Node = ConstructASTLeaf(TERM_INTLITERAL, RET_INT, CurrentToken.value);
//Tokenise(&CurrentToken); // Fetch next token
break;
case TY_IDENTIFIER:
// A variable or a function?
// Read the next token
Tokenise(&CurrentToken);
// If the token after the identifier is a (, then it's a function.
if(CurrentToken.type == LI_LPARE)
return CallFunction();
// Otherwise, we've read too far and need to go back.
RejectToken(&CurrentToken);
// It's a variable, so find the symbol and construct a leaf for it
ID = FindSymbol(CurrentIdentifier);
if(ID == -1)
DieMessage("Unknown Variable", CurrentIdentifier);
Node = ConstructASTLeaf(REF_IDENT, Symbols[ID].Type, ID);
break;
default:
DieDecimal("Syntax Error", CurrentToken.type);
}
Tokenise(&CurrentToken);
return Node;
}
struct ASTNode* ParseNewASTNode(void) {
//fprintf(stdout, "New node requested.");
struct ASTNode* LeftNode, *RightNode;
int NodeType;
LeftNode = ParsePrimary(); // Fetches next token!
// If there's just a number, then this is the AST Node.
// Return, as the root of the tree is the end of the tree.
if(CurrentToken.type == LI_EOF)
return(LeftNode);
NodeType = ParseTokenToOperation(CurrentToken.type);
Tokenise(&CurrentToken);
RightNode = ParseNewASTNode();
return ConstructASTNode(NodeType, LeftNode->ExprType, LeftNode, NULL, RightNode, 0);
}
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence) {
struct ASTNode* LeftNode, *RightNode;
int LeftType, RightType;
int NodeType;
LeftNode = ParsePrimary();
NodeType = CurrentToken.type;
if(NodeType == LI_SEMIC || NodeType == LI_RPARE)
return LeftNode;
//printf("Current token has value %d, type %d\n", CurrentToken.value, CurrentToken.type);
while(OperatorPrecedence(NodeType) > PreviousTokenPrecedence) {
//printf("inside while\n");
Tokenise(&CurrentToken);
RightNode = ParsePrecedenceASTNode(Precedence[NodeType]);
LeftType = LeftNode->ExprType;
RightType = RightNode->ExprType;
if(!TypesCompatible(&LeftType, &RightType, 0))
Die("Assignment between incompatible types");
if(LeftType)
LeftNode = ConstructASTBranch(LeftType, RightNode->ExprType, LeftNode, 0);
if(RightType)
RightNode = ConstructASTBranch(RightType, LeftNode->ExprType, RightNode, 0);
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode->ExprType, LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_SEMIC || NodeType == LI_RPARE)
return LeftNode;
}
return LeftNode;
}
/* struct ASTNode* ParseMultiplicativeASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParsePrimary();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while((NodeType == AR_STAR) || (NodeType == AR_SLASH)) {
Tokenise(&CurrentToken);
RightNode = ParsePrimary();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* struct ASTNode* ParseAdditiveASTNode(void) {
struct ASTNode* LeftNode, * RightNode;
int NodeType;
LeftNode = ParseMultiplicativeASTNode();
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
return LeftNode;
while(1) {
Tokenise(&CurrentToken);
RightNode = ParseMultiplicativeASTNode();
LeftNode = ConstructASTNode(ParseTokenToOperation(NodeType), LeftNode, NULL, RightNode, 0);
NodeType = CurrentToken.type;
if(NodeType == LI_EOF)
break;
}
return LeftNode;
}
*/
/* * * * * * * * * * * * * * * * * * * * * * * *
* * * * I N T E R P R E T A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * */
int ParseAST(struct ASTNode* Node) {
int LeftVal, RightVal;
if(Node->Left)
LeftVal = ParseAST(Node->Left);
if(Node->Right)
RightVal = ParseAST(Node->Right);
/*
if(Node->Operation == TERM_INTLITERAL)
printf("int %d\n", Node->IntValue);
else
printf("%d %s %d\n", LeftVal, TokenStrings[Node->Operation], RightVal);
*/
switch(Node->Operation) {
case OP_ADD:
return (LeftVal + RightVal);
case OP_SUBTRACT:
return (LeftVal - RightVal);
case OP_MULTIPLY:
return (LeftVal * RightVal);
case OP_DIVIDE:
return (LeftVal / RightVal);
case REF_IDENT:
case TERM_INTLITERAL:
return Node->Value.IntValue;
default:
fprintf(stderr, "Unknown syntax token: %d\n", Node->Operation);
exit(1);
}
}
/* * * * * * * * * * * * * * * * * * * * *
* * * * F U N C T I O N S * * * *
* * * * * * * * * * * * * * * * * * * * */
struct ASTNode* CallFunction() {
struct ASTNode* Tree;
int FuncID;
//TODO: Test structural type!
if((FuncID == FindSymbol(CurrentIdentifier)) == -1 && (Symbols[FuncID].Structure == ST_FUNC))
DieMessage("Undeclared function", CurrentIdentifier);
VerifyToken(LI_LPARE, "(");
Tree = ParsePrecedenceASTNode(0);
Tree = ConstructASTBranch(OP_CALL, Symbols[FuncID].Type, Tree, FuncID);
VerifyToken(LI_RPARE, ")");
return Tree;
}
/* * * * * * * * * * * * * * * * * * * * * *
* * * * S T A T E M E N T S * * * *
* * * * * * * * * * * * * * * * * * * * * */
struct ASTNode* ParseStatement(void) {
switch(CurrentToken.type) {
case KW_PRINT:
return PrintStatement();
case TY_CHAR:
case TY_LONG:
case TY_INT:
printf("\t\tNew Variable: %s\n", CurrentIdentifier);
BeginVariableDeclaration();
return NULL;
case TY_IDENTIFIER:
if(Symbols[FindSymbol(CurrentIdentifier)].Structure == ST_FUNC)
printf("\t\tCalling Function: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
else
printf("\t\tAssigning variable: %s\n", Symbols[FindSymbol(CurrentIdentifier)].Name);
return ParseIdentifier();
case KW_IF:
return IfStatement();
case KW_WHILE:
return WhileStatement();
case KW_FOR:
return ForStatement();
case KW_RETURN:
return ReturnStatement();
default:
DieDecimal("Syntax Error in single-statement parsing. Token:", CurrentToken.type);
}
}
struct ASTNode* ParseCompound() {
struct ASTNode* Left = NULL, *Tree;
// Compound statements are defined by comprising
// multiple statements inside { a bracket block }
VerifyToken(LI_LBRAC, "{");
while(1) {
printf("\tNew branch in compound\n");
Tree = ParseStatement();
if(Tree && (Tree->Operation == OP_PRINT || Tree->Operation == OP_ASSIGN
|| Tree->Operation == OP_RET || Tree->Operation == OP_CALL))
VerifyToken(LI_SEMIC, ";");
if(Tree) {
if(!Left)
Left = Tree;
else
Left = ConstructASTNode(OP_COMP, RET_NONE, Left, NULL, Tree, 0);
}
if(CurrentToken.type == LI_RBRAC) {
VerifyToken(LI_RBRAC, "}");
return Left;
}
}
}
/* void ParseStatements() {
while(1) {
switch(CurrentToken.type) {
case KW_PRINT:
PrintStatement();
break;
case TY_INT:
BeginVariableDeclaration();
break;
case TY_IDENTIFIER:
AssignVariable();
break;
case LI_EOF:
return;
default:
DieDecimal("Syntax error; Token", CurrentToken.type);
}
}
} */

386
src/Statements.c Normal file
View File

@ -0,0 +1,386 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#include <Data.h>
/*
* Turn a token type into its appropriate
* primitive type.
*
* This is where we do redirections like:
* short -> s16
* long -> s64
* int -> s32
* char -> u8
*
*/
static char* Types[5] = { "none", "char", "int", "long", "void" };
static int TypeSize[5] = { 0, 1, 4, 8, 0}; // in BYTES
int ParseType(int Token) {
switch(Token) {
case TY_CHAR:
return RET_CHAR;
case TY_VOID:
return RET_VOID;
case TY_INT:
return RET_INT;
default:
DieDecimal("Illegal variable type", Token);
}
}
int PrimitiveSize(int Type) {
if(Type < RET_NONE || Type > RET_VOID)
DieMessage("Checking size of bad data type", Types[Type]);
return TypeSize[Type];
}
/*
* Given two types, determine if they are compatible.
*
* Depending on the value of STRICT, it will try to
* fit the right value into the left value.
*
* This is valid, for ie. a char into an int, as int is larger than char.
* This is called widening the char.
*
* If STRICT is set, it will only allow widening the left to the right.
* This means you cannot `char a; int b; b = 15000; a = b;`
* As this would shrink the int and lose resolution.
*
* NOTE: THIS IS NOT THE DEFAULT BEHAVIOUR
* By default, you CAN shrink an int into a char, a la shifting down.
*
*
*/
int TypesCompatible(int* Left, int* Right, int STRICT) {
int LeftSize, RightSize;
// Same types are compatible. No shrinking required
if(*Left == *Right) {
*Left = *Right = 0;
return 1;
}
LeftSize = PrimitiveSize(*Left);
RightSize = PrimitiveSize(*Right);
// Types of size 0 are incompatible
if((LeftSize == 0) || (RightSize == 0))
return 0;
/* char x;
* int y;
* y = 15;
*
* x = y;
* x needs to be widened, y copied in, then x shrunk back down
* AKA, the left must be widened.
*/
if(LeftSize < RightSize) {
*Left = OP_WIDEN;
*Right = 0;
return 1;
}
/*
* char x;
* int y;
*
* x = 15;
*
* y = x;
* x must be widened to fit into y.
* if STRICT mode, this is not allowed.
* By default, this is valid.
*
*/
if(LeftSize > RightSize) {
if(STRICT)
return 0; // Not compatible if STRICT
*Left = 0;
*Right = OP_WIDEN;
return 1; // Compatible by default
}
/*
* Any other cases left, by default, are compatible.
*
*/
*Left = *Right = 0;
return 1;
}
/*
* Handles the declaration of a type of a variable.
* int newVar;
*
* It verifies that we have the `int` keyword followed by a
* unique, non-keyword identifier.
*
* It then stores this variable into the symbol table.
*
* //TODO: Assemble this into the symbol table.
* //TODO: int i = 5;
*
*/
void BeginVariableDeclaration(void) {
int ID;
int Type = ParseType(CurrentToken.type);
//printf("type: %s\n", Types[Type]);
Tokenise(&CurrentToken);
VerifyToken(TY_IDENTIFIER, "ident");
//printf("Identifier: %s\n", CurrentIdentifier);
ID = AddSymbol(CurrentIdentifier, Type, ST_VAR);
AsNewSymb(ID);
VerifyToken(LI_SEMIC, ";");
}
struct ASTNode* ParseFunction() {
struct ASTNode* Tree;
struct ASTNode* FinalStatement;
int SymbolSlot, BreakLabel, Type;
Type = ParseType(CurrentToken.type);
Tokenise(&CurrentToken);
VerifyToken(KW_FUNC, "::");
VerifyToken(TY_IDENTIFIER, "ident");
printf("\nIdentified function %s\n", CurrentIdentifier);
BreakLabel = NewLabel();
SymbolSlot = AddFunctionSymbol(CurrentIdentifier, Type, ST_FUNC, BreakLabel);
CurrentFunction = SymbolSlot;
VerifyToken(LI_LPARE, "(");
VerifyToken(LI_RPARE, ")");
Tree = ParseCompound();
if(Type != RET_VOID) {
// Functions with one statement have no composite node, so we have to check
FinalStatement = (Tree->Operation == OP_COMP) ? Tree->Right : Tree;
if(FinalStatement == NULL || FinalStatement->Operation != OP_RET) {
Die("Function with non-void type does not return");
}
}
return ConstructASTBranch(OP_FUNC, Tree->ExprType, Tree, SymbolSlot);
}
/*
* Handles the logic for return.
* //TODO: No brackets
* //TODO: Type inference
*
*
*/
struct ASTNode* ReturnStatement() {
struct ASTNode* Tree;
int ReturnType, FunctionType;
if(Symbols[CurrentFunction].Type == RET_VOID)
Die("Attempt to return from void function");
VerifyToken(KW_RETURN, "return");
VerifyToken(LI_LPARE, "("); // TODO: Make optional! Reject?
Tree = ParsePrecedenceASTNode(0);
ReturnType = Tree->ExprType;
FunctionType = Symbols[CurrentFunction].Type;
if(!TypesCompatible(&ReturnType, &FunctionType, 0))
Die("Returning a value of incorrect type for function");
if(ReturnType)
Tree = ConstructASTBranch(ReturnType, FunctionType, Tree, 0);
Tree = ConstructASTBranch(OP_RET, RET_NONE, Tree, 0);
printf("\t\tReturning from function %s\n", Symbols[CurrentFunction].Name);
VerifyToken(LI_RPARE, ")"); // TODO: OPTIONALISE!
return Tree;
}
/*
* Handles the assignment of variables.
*
* You can assign variables with an assignment,
* a statement, a function or a literal.
*
* This means we need to do some recursive parsing.
*
*/
struct ASTNode* ParseIdentifier() {
struct ASTNode* Left, *Right, *Tree;
int LeftType, RightType;
int ID;
VerifyToken(TY_IDENTIFIER, "ident");
if(CurrentToken.type == LI_LPARE)
return CallFunction();
if((ID = FindSymbol(CurrentIdentifier)) == -1) {
printf("Symbol %s not in table. Table contents: %s, %s\n", CurrentIdentifier, Symbols[0].Name, Symbols[1].Name);
DieMessage("Undeclared Variable ", CurrentIdentifier);
}
Right = ConstructASTLeaf(LV_IDENT, Symbols[ID].Type, ID);
VerifyToken(LI_EQUAL, "=");
Left = ParsePrecedenceASTNode(0);
LeftType = Left->ExprType;
RightType = Right->ExprType;
if(!TypesCompatible(&LeftType, &RightType, 1))
Die("Incompatible variable types");
if(LeftType)
Left = ConstructASTBranch(LeftType, Right->ExprType, Left, 0);
Tree = ConstructASTNode(OP_ASSIGN, RET_INT, Left, NULL, Right, 0);
return Tree;
}
struct ASTNode* IfStatement() {
struct ASTNode* Condition, *True, *False = NULL;
VerifyToken(KW_IF, "if");
VerifyToken(LI_LPARE, "(");
Condition = ParsePrecedenceASTNode(0);
// Limit if(x) to =? != < > <= =>
// No null checking, no arithmetic, no functions.
// TODO: this
if(Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Die("Invalid Comparison in if statement");
VerifyToken(LI_RPARE, ")");
True = ParseCompound();
if(CurrentToken.type == KW_ELSE) {
Tokenise(&CurrentToken);
False = ParseCompound();
}
return ConstructASTNode(OP_IF, RET_NONE, Condition, True, False, 0);
}
struct ASTNode* WhileStatement() {
struct ASTNode* Condition, *Body;
VerifyToken(KW_WHILE, "while");
VerifyToken(LI_LPARE, "(");
Condition = ParsePrecedenceASTNode(0);
if(Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Die("Bad Comparison inside while()");
VerifyToken(LI_RPARE, ")");
Body = ParseCompound();
return ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Body, 0);
}
struct ASTNode* ForStatement() {
// for (preop; condition; postop) {
// body
//}
struct ASTNode* Condition, *Body;
struct ASTNode* Preop, *Postop;
struct ASTNode* Tree;
VerifyToken(KW_FOR, "for");
VerifyToken(LI_LPARE, "(");
Preop = ParseStatement();
VerifyToken(LI_SEMIC, ";");
Condition = ParsePrecedenceASTNode(0);
if(Condition->Operation < OP_EQUAL || Condition->Operation > OP_GREATE)
Die("Bad comparison in for");
VerifyToken(LI_SEMIC, ";");
Postop = ParseStatement();
VerifyToken(LI_RPARE, ")");
Body = ParseCompound();
// We need to be able to skip over the body and the postop, so we group them together.
Tree = ConstructASTNode(OP_COMP, RET_NONE, Body, NULL, Postop, 0);
// We need to be able to jump to the top of the condition and fall through to the body,
// so we group it with the last block
Tree = ConstructASTNode(OP_LOOP, RET_NONE, Condition, NULL, Tree, 0);
// We need to append the postop to the loop, to form the final for loop
return ConstructASTNode(OP_COMP, RET_NONE, Preop, NULL, Tree, 0);
}
struct ASTNode* PrintStatement(void) {
struct ASTNode* Tree;
int LeftType, RightType;
VerifyToken(KW_PRINT, "print");
Tree = ParsePrecedenceASTNode(0);
LeftType = RET_INT;
RightType = Tree->ExprType;
if(!TypesCompatible(&LeftType, &RightType, 0))
DieDecimal("Attempting to print an invalid type:", RightType);
if(RightType)
Tree = ConstructASTBranch(RightType, RET_INT, Tree, 0);
Tree = ConstructASTBranch(OP_PRINT, RET_NONE, Tree, 0);
//ParseAST(Tree);
return Tree;
}

68
src/Symbols.c Normal file
View File

@ -0,0 +1,68 @@
/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#include <Defs.h>
#include <Data.h>
static int GlobalSymbols = 0;
static char* Types[4] = { "none", "char", "int", "void" };
/*
* Find the position of a symbol in the symbol table.
* @Return the index into the symbol table if found,
* -1 if not found.
*/
int FindSymbol(char* Symbol) {
int Ind;
for(Ind = 0; Ind < GlobalSymbols; Ind++) {
if(*Symbol == *Symbols[Ind].Name && !strcmp(Symbol, Symbols[Ind].Name))
return Ind;
}
return -1;
}
/*
* Append a new entry to the table of global symbols.
* @Return the index to the new entry
*
* Will kill the program if we run out.
* //TODO: Dump symbols on death?
*/
static int NewSymbol(void) {
int Pos;
if((Pos = GlobalSymbols++) >= SYMBOLS)
Die("Too many symbols");
return Pos;
}
int AddFunctionSymbol(char* Name, int Type, int Structure, int EndLabel) {
int Slot;
Slot = AddSymbol(Name, Type, Structure);
Symbols[Slot].EndLabel = EndLabel;
return Slot;
}
int AddSymbol(char* Name, int Type, int Structure) {
int TableSlot;
if((TableSlot = FindSymbol(Name) != -1))
return TableSlot;
TableSlot = NewSymbol();
Symbols[TableSlot].Name = strdup(Name);
Symbols[TableSlot].Type = Type;
Symbols[TableSlot].Structure = Structure;
//printf("Adding new variable %s of type %s to the table at %d\n", CurrentIdentifier, Types[Type], TableSlot);
return TableSlot;
}

23
tests/comparison1 Normal file
View File

@ -0,0 +1,23 @@
int x;
x = 1 < 2;
print x;
x = 1 <= 2;
print x;
x = 1 > 2;
print x;
x = 1 => 2;
print x;
x = 1 != 2;
print x;
x = 1 =? 2;
print x;
x = 18 =? 18;
print x;

13
tests/for Normal file
View File

@ -0,0 +1,13 @@
{
int x;
x = 0;
while (x < 10) {
print x;
x = x + 1;
}
for(x = 20; x > 10; x = x - 1) {
print x;
}
}

8
tests/funcs Normal file
View File

@ -0,0 +1,8 @@
void :: main () {
int x;
for(x = 20; x > 10; x = x - 1) {
print x;
}
}

15
tests/funcs2 Normal file
View File

@ -0,0 +1,15 @@
int :: Testings() {
return (40);
}
void :: main() {
int Result;
PrintInteger(10);
Result = Testings(10);
PrintInteger(Result);
PrintInteger(Testings(10) + 10);
}

13
tests/if Normal file
View File

@ -0,0 +1,13 @@
{
int hey;
int sup;
hey = 15;
sup = 20;
if(hey < sup) {
print hey;
} else {
print sup;
}
}

6
tests/parser1 Normal file
View File

@ -0,0 +1,6 @@
print 12 * 3;
print
18 - 2
* 4; print
1 + 2 +
9 - 5/2 + 3*5;

1
tests/tokeniser1 Normal file
View File

@ -0,0 +1 @@
2 + 3 * 5 - 8 / 3

8
tests/tokeniser2 Normal file
View File

@ -0,0 +1,8 @@
251 +
32 -
531321323 *
8123
/
3

1
tests/tokeniser3 Normal file
View File

@ -0,0 +1 @@
2+3*5-8/3

17
tests/types Normal file
View File

@ -0,0 +1,17 @@
void :: main() {
int x;
char y;
x = 20; print x;
y = 10; print y;
for(x = 1; x <= 5; x = x + 1) {
print x;
}
for(y = 253; y != 2; y = y + 1) {
print y;
}
}

9
tests/vars1 Normal file
View File

@ -0,0 +1,9 @@
int first;
int second;
int third;
first = 5;
second = 8;
third = first * 5 + second / 4;
print third - 10;

8
tests/while Normal file
View File

@ -0,0 +1,8 @@
{
int i;
while (i <= 10) {
print i;
i = i + 1;
}
}