Erythro/include/Defs.h

642 lines
19 KiB
C

/*************/
/*GEMWIRE */
/* ERYTHRO*/
/*************/
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdbool.h>
/*
* ARithmetic tokens are prefixed AR.
* LIteral tokens are prefixed LI.
* KeyWords are prefixed KW.
* TYpes are prefixed TY.
* CoMParisons are prefixed CMP.
* BOOLean maths is prefixed BOOL.
* BITwise maths is prefixed BIT.
* Arithmetic SHifts are prefixed SH.
* PlusPlusMinusMinus operators are prefixed PPMM.
*
*
* NOTE: Tokens are different from Syntax Operations!
*
* Tokens should represent the characters that invoke them,
* not the actions they perform.
*
*/
enum TokenTypes {
LI_EOF,
LI_EQUAL, // =
BOOL_OR, // Boolean OR (||)
BOOL_AND, // Boolean AND (&&)
BIT_OR, // Bitwise OR (|)
BIT_XOR, // Bitwise XOR (^)
BIT_AND, // Bitwise AND (&)
CMP_EQUAL, // =?
CMP_INEQ, // !=
CMP_LT, // <
CMP_GT, // >
CMP_LTE, // <=
CMP_GTE, // =>
SH_LEFT, // Left Shift (<<)
SH_RIGHT, // Right Shift (>>)
AR_PLUS, // Arithmetic +
AR_MINUS, // Arithmetic -
AR_STAR, // Arithmetic *
AR_SLASH, // Arithmetic /
PPMM_PLUS, // PPMM Increment (++)
PPMM_MINUS, // PPMM Decrement (--)
BOOL_INVERT, // Boolean Invert (!)
BIT_NOT, // Bitwise NOT (~)
LI_INT, // Integer literal
LI_STR, // String literal
LI_SEMIC, // ;
LI_COLON, // :
LI_LBRAC, // {
LI_RBRAC, // }
LI_LBRAS, // [
LI_RBRAS, // ]
LI_LPARE, // (
LI_RPARE, // )
LI_COM, // ,
LI_DOT, // .
LI_ARROW, // ->
TY_IDENTIFIER, // Identifier name. Variable, function, etc.
TY_NONE, // No return type. Literal void.
TY_CHAR, // "char" type keyword
TY_INT, // "int" type keyword
TY_LONG, // "long" type keyword
TY_VOID, // "void" type keyword
KW_FUNC, // :: function name incoming
KW_BREAK, // "break" keyword
KW_CONTINUE, // "continue" keyword
KW_SWITCH, // "switch" keyword
KW_DEFAULT, // "default" keyword
KW_CASE, // "case" keyword
KW_PRINT,
KW_IF,
KW_ELSE,
KW_WHILE,
KW_FOR,
KW_RETURN,
KW_STRUCT,
KW_UNION,
KW_ENUM,
KW_ALIAS,
KW_IMPORT
};
/*
* All Syntax Operations are prefixed OP.
* Terminal Operations are prefixed TERM.
* L-Values are prefixed LV.
* Reference Operations are prefixed REF.
*
* These represent the actions that a token will perform.
* These are used exclusively in AST construction.
*
* It is important that Tokens and Operations are logically separated,
* but that the Operation's index is the same as the Token that invokes it.
*
* Every five elements, an index is assigned. These are the natural indices.
* They are marked to make navigation of the Syntax Tree easier.
*/
enum SyntaxOps {
OP_ASSIGN = 1, // Assign an l-value
OP_BOOLOR, // Boolean OR two statements
OP_BOOLAND, // Boolean AND two statements
OP_BITOR, // Bitwise OR a number
OP_BITXOR = 5, // Bitwise XOR a number
OP_BITAND, // Bitwise AND a number
OP_EQUAL, // Compare equality
OP_INEQ, // Compare inequality
OP_LESS, // Less than?
OP_GREAT = 10, // Greater than?
OP_LESSE, // Less than or Equal to?
OP_GREATE, // Greater than or Equal to?
OP_SHIFTL, // Arithmetic Shift Left (Multiply by 2)
OP_SHIFTR, // Arithmetic Shift Right (Divide by 2)
OP_ADD = 15, // Add two numbers.
OP_SUBTRACT, // Subtract two numbers.
OP_MULTIPLY, // Multiply two numbers.
OP_DIVIDE, // Divide two numbers.
OP_PREINC, // Increment var before reference.
OP_PREDEC = 20, // Decrement var before reference.
OP_POSTINC, // Increment var after reference.
OP_POSTDEC, // Decrement var after reference.
OP_BITNOT, // Invert a number bitwise
OP_BOOLNOT, // Invert a statement logically
OP_NEGATE = 25, // Negate a number (turn a positive number negative)
OP_BOOLCONV, // Convert an expression to a boolean.s
OP_ADDRESS, // Fetch the address of a var
OP_DEREF, // Get the value of the address in a pointer
TERM_INTLITERAL, // Integer Literal. This is a virtual operation, so it's a terminal.
TERM_STRLITERAL = 30, // String Literal. Also terminal.
REF_IDENT, // Reference (read) an identifier (variable).
OP_WIDEN, // Something contains a type that needs to be casted up
OP_SCALE, // We have a pointer that needs to be scaled!
OP_CALL, // Call a function
OP_RET = 35, // Return from a function
OP_COMP, // Compound statements need a way to be "glued" together. This is one of those mechanisms
OP_IF, // If statement
OP_LOOP, // FOR, WHILE
OP_PRINT, // Print statement
OP_FUNC = 40, // Define a function
OP_BREAK, // Break out of the loop
OP_CONTINUE, // Continue the loop
OP_SWITCH, // Switch statement
OP_DEFAULT, // Default case
OP_CASE = 45 // Case
};
/**
* The way syntax is stored by the parser and assembled into a usable file.
* An ASTNode forms an item in a linked list.
*
* Thus, you can traverse up and down a tree of ASTNodes easily.
*
* Walking the tree is as simple as reading the Operation and recursively reading the Left, Middle and Right nodes as called for.
* For example, an if-else statement uses all three subnodes.
*
* This means that AST Nodes aren't exactly a binary tree, but a syntax tree nonetheless.
*/
struct ASTNode {
int Operation; // SyntaxOps Index
int ExprType; // Value->IntValue's DataType
int RVal; // True if this node is an Rval, false if Lval
struct ASTNode* Left;
struct ASTNode* Middle;
struct ASTNode* Right;
struct SymbolTableEntry* Symbol;
union {
int Size; // OP_SCALE's linear representation
int IntValue; // TERM_INTLIT's Value
};
};
/**
* Describes the basic unit of syntax in the language.
* A token has a type (an index into the TokenTypes enum) and a value.
*
* The value represents the numerical value of an integer literal, etc.
*/
struct Token {
int type;
int value;
};
/*
* The Symbol Table, used for variables, functions and
* assorted goodies.
*/
struct SymbolTableEntry {
char* Name;
int Type; // An entry in DataTypes, referring to the type of this data
struct SymbolTableEntry* CompositeType; // A pointer to the start of a Symbol Table list that represents a certain Composite type
int Structure; // An entry in StructureType - metadata on how to process the data
int Storage; // The scope of this symbol - decides when it is discarded.
union {
int EndLabel; // For a function - The number of the label to jump to, in order to exit this function (if applicable)
int Length; // For an array - The length of the symbol in units of 1 element -- the size of an array, for example.
int IntValue; // For an enum - The value of an Enum entry
};
union {
int SinkOffset; // For a variable - How many times must we sink the rbp to get to this symbol in the stack?
int Elements; // For a function - How many parameters?
};
struct SymbolTableEntry* NextSymbol; // The next symbol in a list
struct SymbolTableEntry* Start; // The first member in a list
};
/**
* Information about a given source file.
*
* A file that starts the parsing of another file will never confuse the parser.
*
* It is the end goal that the parser will be multithreaded, operating on a single file at a time.
*
* Note that files do not contain their own symbol tables - these are global.
*/
struct FileData {
// Whether or not this file will accept definitions of functions.
bool AllowDefinitions;
// A FILE stream that we can read the file from.
FILE* Stream;
// The filename of the source code
char* SourceName;
// The filename of the assembly output
char* AssemblyName;
// The filename of the assembled object code
char* ObjectName;
// The line of the file we are currently working on, -1 if it is finished
long CurrentLine;
// The column of the file we are currently working on, -1 if it is finished
long CurrentColumn;
// The depth of the loop currently being parsed.
long CurrentLoopDepth;
// The column that was last marked as "valid", the start of the error block if something goes wrong.
long CurrentSafeColumn;
// Whether or not we are currently parsing a switch statement - changes the behavior of compound statements!
bool SwitchStatement;
// The symbol currently being lexed - TokenTypes index and integer value.
struct Token CurrentSymbol;
// The function currently being parsed - null if in global scope or if finished.
struct SymbolTableEntry* FunctionEntry;
// Once ready, the full AST trees of this file.
struct ASTNode* Tree;
};
enum StorageScope {
SC_GLOBAL = 1, // Global Scope
SC_STRUCT, // Struct Definitions
SC_UNION, // Union Definitions
SC_ENUM, // Enum Definitions
SC_ENUMENTRY, // Enum Entry Names
SC_ALIAS, // Typedef aliases
SC_MEMBER, // The members of Structs or Unions
//SC_CLASS, // Class-local definitions
//SC_STATIC, // Static storage definitions
SC_PARAM, // Function parameters
SC_LOCAL // Function-local scope.
// There is no deeper scope than function.
};
/*
* The types of data being held in memory.
* The lowest 4 bits of these enum values
* encode a nested pointer type.
*
* This meaning, a single enum can hold
* ****************int types.
* Should be enough for everyone, right?
*/
enum DataTypes {
RET_NONE, // No return type. Literal void.
RET_CHAR = 16, // "char" type keyword
RET_INT = 32, // "int" type keyword
RET_LONG = 48, // "long" type keyword
RET_VOID = 64, // "void" type keyword
DAT_STRUCT = 80, // Struct Data
DAT_UNION, // Union Data
DAT_ENUM, // Enum Data
DAT_ALIAS, // Alias Definition
DAT_NONE, // No type, no work needed.
};
/*
* The type of the structure of data being examined
* //TODO: move into TokenTypes?
*/
enum StructureType {
ST_VAR, // This is variable
ST_FUNC, // This is a function
ST_ARR, // This is an array
ST_RUCT, // This is a struct
ST_ENUM, // This is an enum
// This is a typedef
};
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * A R G U M E N T S * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
char* Suffixate(char* String, char Suffix);
void Compile(struct FileData* InputFile);
void Assemble(struct FileData* InputFile);
void Link(char* Output, struct FileData* Objects[], int ObjectsLength);
void DisplayUsage(char* ProgName);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * L E X I N G * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
void Tokenise();
void VerifyToken(int Type, char* TokenExpected);
bool OptionallyConsume(int Type);
static int ReadIdentifier(int Char, char* Buffer, int Limit);
static int ReadKeyword(char* Str);
void ImportModule();
/* * * * * * * * * * * * * * * * * * * *
* * * * * T Y P E S * * * * * *
* * * * * * * * * * * * * * * * * * * */
struct ASTNode* MutateType(struct ASTNode* Tree, int RightType, int Operation);
int TypeIsInt(int Type);
int TypeIsPtr(int Type);
char* TypeNames(int Type);
int TypeSize(int Type, struct SymbolTableEntry* Composite);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * S Y N T A X T R E E * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
struct ASTNode* ConstructASTNode(int Operation, int Type,
struct ASTNode* Left,
struct ASTNode* Middle,
struct ASTNode* Right,
struct SymbolTableEntry* Symbol,
int IntValue);
struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue);
struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * P A R S I N G * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence);
struct ASTNode* ParsePrimary(void);
struct ASTNode* ParseStatement(void);
struct ASTNode* PrefixStatement();
struct ASTNode* PostfixStatement();
void ParseGlobals();
struct ASTNode* ParseFunction(int Type);
struct ASTNode* ParseCompound();
struct SymbolTableEntry* BeginCompositeDeclaration(int Type);
void BeginEnumDeclaration();
int ReadAlias(struct SymbolTableEntry** Composite);
int ParseAlias(char* Name, struct SymbolTableEntry** Composite);
struct ASTNode* GetExpressionList();
struct ASTNode* CallFunction();
struct ASTNode* ReturnStatement();
struct ASTNode* BreakStatement();
struct ASTNode* ContinueStatement();
int ReadTypeOrKeyword(struct SymbolTableEntry** Composite);
int ValueAt(int Type);
int PointerTo(int Type);
struct ASTNode* AccessArray();
struct ASTNode* AccessMember(bool Deref);
int ParseTokenToOperation(int Token);
struct ASTNode* PrintStatement(void);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * S Y M B O L T A B L E * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
void DumpAllLists();
void DumpList(struct SymbolTableEntry* List);
struct SymbolTableEntry* FindSymbol(char* Symbol);
struct SymbolTableEntry* FindLocal(char* Symbol);
struct SymbolTableEntry* FindGlobal(char* Symbol);
struct SymbolTableEntry* FindStruct(char* Symbol);
struct SymbolTableEntry* FindAlias(char* Symbol);
struct SymbolTableEntry* FindEnum(char* Symbol);
struct SymbolTableEntry* FindEnumMember(char* Symbol);
struct SymbolTableEntry* FindUnion(char* Symbol);
struct SymbolTableEntry* FindMember(char* Symbol);
void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node);
void FreeLocals();
void ClearTables();
struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset,
struct SymbolTableEntry* CompositeType);
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O N T R O L S T A T U S * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
void Die(char* Error);
void DieMessage(char* Error, char* Reason);
void DieDecimal(char* Error, int Number);
void DieChar(char* Error, int Char);
void DieBinary(char* Error, int Number);
void ErrorReport(char* message, ...);
void Safe();
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * C O D E G E N E R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
int PrimitiveSize (int);
/**
* All of the functions required to be implemented by an Assembler Module.
*/
struct AssemblerVtable {
// Entry Point
int (*AssembleTree)(struct ASTNode*, int, int, int, int);
// Register management
void (*DeallocateAllRegisters)();
int (*RetrieveRegister)();
void (*DeallocateRegister)(int);
// Alignment
int (*AsAlignMemory)(int, int, int);
int (*AsCalcOffset)(int);
void (*AsNewStackFrame)();
// Basic operations
int (*AsLoad)(int);
int (*AsAdd)(int, int);
int (*AsMul)(int, int);
int (*AsSub)(int, int);
int (*AsDiv)(int, int);
int (*AsLdGlobalVar)(struct SymbolTableEntry*, int);
int (*AsLdLocalVar)(struct SymbolTableEntry*, int);
int (*AsStrGlobalVar)(struct SymbolTableEntry*, int);
int (*AsStrLocalVar)(struct SymbolTableEntry*, int);
int (*AsDeref)(int, int);
int (*AsStrDeref)(int, int, int);
int (*AsAddr)(struct SymbolTableEntry*);
int (*AsNewString)(char*);
int (*AsLoadString)(int);
// Comparisons
int (*AsEqual)(int, int);
int (*AsIneq)(int, int);
int (*AsLess)(int, int);
int (*AsGreat)(int, int);
int (*AsLessE)(int, int);
int (*AsGreatE)(int, int);
// Binary operations
int (*AsBitwiseAND)(int, int);
int (*AsBitwiseOR)(int, int);
int (*AsBitwiseXOR)(int, int);
int (*AsNegate)(int);
int (*AsInvert)(int);
int (*AsBooleanNOT)(int);
int (*AsShiftLeft)(int, int);
int (*AsShiftRight)(int, int);
// Comparisons
int (*AsBooleanConvert)(int, int, int);
int (*AsCompareJmp)(int, int, int, int);
int (*AsCompare)(int, int, int);
// Loops and jumps
int (*AsIf)(struct ASTNode*, int, int);
int (*AsWhile)(struct ASTNode*);
int (*AsSwitch)(struct ASTNode*);
void (*AsSwitchTable)(int, int, int, int*, int*, int);
int (*NewLabel)();
void (*AsJmp)(int);
void (*AsLabel)(int);
// Call and return
int (*AsShl)(int, int);
int (*AsReturn)(struct SymbolTableEntry*, int);
int (*AsCallWrapper)(struct ASTNode*);
void (*AsCopyArgs)(int, int);
int (*AsCall)(struct SymbolTableEntry*, int);
void (*AssemblerPrint)(int);
// Preamble and epilogue
void (*AsGlobalSymbol)(struct SymbolTableEntry*);
void (*AssemblerPreamble)();
void (*AsFunctionPreamble)(struct SymbolTableEntry*);
void (*AsFunctionEpilogue)(struct SymbolTableEntry*);
};
struct AssemblerModule{
char* name;
const struct AssemblerVtable* vtable;
};
int RegisterModule(struct AssemblerModule*);
void RegisterAllModules();
// Module List
void RegisterQBE();
void RegisterWin32ASM();
void RegisterJVM();
/* * * * * * * * * * * * * * * * * * * * * * *
* * * * D E C L A R A T I O N * * * *
* * * * * * * * * * * * * * * * * * * * * * */
struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope);
struct ASTNode* ParseIdentifier(void);
struct ASTNode* IfStatement();
struct ASTNode* WhileStatement();
struct ASTNode* ForStatement();
struct ASTNode* SwitchStatement();
void DumpTree(struct ASTNode* node, int level);