/*************/ /*GEMWIRE */ /* ERYTHRO*/ /*************/ #pragma once #include #include #include #include #include /* * ARithmetic tokens are prefixed AR. * LIteral tokens are prefixed LI. * KeyWords are prefixed KW. * TYpes are prefixed TY. * CoMParisons are prefixed CMP. * BOOLean maths is prefixed BOOL. * BITwise maths is prefixed BIT. * Arithmetic SHifts are prefixed SH. * PlusPlusMinusMinus operators are prefixed PPMM. * * * NOTE: Tokens are different from Syntax Operations! * * Tokens should represent the characters that invoke them, * not the actions they perform. * */ enum TokenTypes { LI_EOF, LI_EQUAL, // = BOOL_OR, // Boolean OR (||) BOOL_AND, // Boolean AND (&&) BIT_OR, // Bitwise OR (|) BIT_XOR, // Bitwise XOR (^) BIT_AND, // Bitwise AND (&) CMP_EQUAL, // =? CMP_INEQ, // != CMP_LT, // < CMP_GT, // > CMP_LTE, // <= CMP_GTE, // => SH_LEFT, // Left Shift (<<) SH_RIGHT, // Right Shift (>>) AR_PLUS, // Arithmetic + AR_MINUS, // Arithmetic - AR_STAR, // Arithmetic * AR_SLASH, // Arithmetic / PPMM_PLUS, // PPMM Increment (++) PPMM_MINUS, // PPMM Decrement (--) BOOL_INVERT, // Boolean Invert (!) BIT_NOT, // Bitwise NOT (~) LI_INT, // Integer literal LI_STR, // String literal LI_SEMIC, // ; LI_COLON, // : LI_LBRAC, // { LI_RBRAC, // } LI_LBRAS, // [ LI_RBRAS, // ] LI_LPARE, // ( LI_RPARE, // ) LI_COM, // , LI_DOT, // . LI_ARROW, // -> TY_IDENTIFIER, // Identifier name. Variable, function, etc. TY_NONE, // No return type. Literal void. TY_CHAR, // "char" type keyword TY_INT, // "int" type keyword TY_LONG, // "long" type keyword TY_VOID, // "void" type keyword KW_FUNC, // :: function name incoming KW_BREAK, // "break" keyword KW_CONTINUE, // "continue" keyword KW_SWITCH, // "switch" keyword KW_DEFAULT, // "default" keyword KW_CASE, // "case" keyword KW_PRINT, KW_IF, KW_ELSE, KW_WHILE, KW_FOR, KW_RETURN, KW_STRUCT, KW_UNION, KW_ENUM, KW_ALIAS, KW_IMPORT }; /* * All Syntax Operations are prefixed OP. * Terminal Operations are prefixed TERM. * L-Values are prefixed LV. * Reference Operations are prefixed REF. * * These represent the actions that a token will perform. * These are used exclusively in AST construction. * * It is important that Tokens and Operations are logically separated, * but that the Operation's index is the same as the Token that invokes it. * * Every five elements, an index is assigned. These are the natural indices. * They are marked to make navigation of the Syntax Tree easier. */ enum SyntaxOps { OP_ASSIGN = 1, // Assign an l-value OP_BOOLOR, // Boolean OR two statements OP_BOOLAND, // Boolean AND two statements OP_BITOR, // Bitwise OR a number OP_BITXOR = 5, // Bitwise XOR a number OP_BITAND, // Bitwise AND a number OP_EQUAL, // Compare equality OP_INEQ, // Compare inequality OP_LESS, // Less than? OP_GREAT = 10, // Greater than? OP_LESSE, // Less than or Equal to? OP_GREATE, // Greater than or Equal to? OP_SHIFTL, // Arithmetic Shift Left (Multiply by 2) OP_SHIFTR, // Arithmetic Shift Right (Divide by 2) OP_ADD = 15, // Add two numbers. OP_SUBTRACT, // Subtract two numbers. OP_MULTIPLY, // Multiply two numbers. OP_DIVIDE, // Divide two numbers. OP_PREINC, // Increment var before reference. OP_PREDEC = 20, // Decrement var before reference. OP_POSTINC, // Increment var after reference. OP_POSTDEC, // Decrement var after reference. OP_BITNOT, // Invert a number bitwise OP_BOOLNOT, // Invert a statement logically OP_NEGATE = 25, // Negate a number (turn a positive number negative) OP_BOOLCONV, // Convert an expression to a boolean.s OP_ADDRESS, // Fetch the address of a var OP_DEREF, // Get the value of the address in a pointer TERM_INTLITERAL, // Integer Literal. This is a virtual operation, so it's a terminal. TERM_STRLITERAL = 30, // String Literal. Also terminal. REF_IDENT, // Reference (read) an identifier (variable). OP_WIDEN, // Something contains a type that needs to be casted up OP_SCALE, // We have a pointer that needs to be scaled! OP_CALL, // Call a function OP_RET = 35, // Return from a function OP_COMP, // Compound statements need a way to be "glued" together. This is one of those mechanisms OP_IF, // If statement OP_LOOP, // FOR, WHILE OP_PRINT, // Print statement OP_FUNC = 40, // Define a function OP_BREAK, // Break out of the loop OP_CONTINUE, // Continue the loop OP_SWITCH, // Switch statement OP_DEFAULT, // Default case OP_CASE = 45 // Case }; /** * The way syntax is stored by the parser and assembled into a usable file. * An ASTNode forms an item in a linked list. * * Thus, you can traverse up and down a tree of ASTNodes easily. * * Walking the tree is as simple as reading the Operation and recursively reading the Left, Middle and Right nodes as called for. * For example, an if-else statement uses all three subnodes. * * This means that AST Nodes aren't exactly a binary tree, but a syntax tree nonetheless. */ struct ASTNode { int Operation; // SyntaxOps Index int ExprType; // Value->IntValue's DataType int RVal; // True if this node is an Rval, false if Lval struct ASTNode* Left; struct ASTNode* Middle; struct ASTNode* Right; struct SymbolTableEntry* Symbol; union { int Size; // OP_SCALE's linear representation int IntValue; // TERM_INTLIT's Value }; }; /** * Describes the basic unit of syntax in the language. * A token has a type (an index into the TokenTypes enum) and a value. * * The value represents the numerical value of an integer literal, etc. */ struct Token { int type; int value; }; /* * The Symbol Table, used for variables, functions and * assorted goodies. */ struct SymbolTableEntry { char* Name; int Type; // An entry in DataTypes, referring to the type of this data struct SymbolTableEntry* CompositeType; // A pointer to the start of a Symbol Table list that represents a certain Composite type int Structure; // An entry in StructureType - metadata on how to process the data int Storage; // The scope of this symbol - decides when it is discarded. union { int EndLabel; // For a function - The number of the label to jump to, in order to exit this function (if applicable) int Length; // For an array - The length of the symbol in units of 1 element -- the size of an array, for example. int IntValue; // For an enum - The value of an Enum entry }; union { int SinkOffset; // For a variable - How many times must we sink the rbp to get to this symbol in the stack? int Elements; // For a function - How many parameters? }; struct SymbolTableEntry* NextSymbol; // The next symbol in a list struct SymbolTableEntry* Start; // The first member in a list }; /** * Information about a given source file. * * A file that starts the parsing of another file will never confuse the parser. * * It is the end goal that the parser will be multithreaded, operating on a single file at a time. * * Note that files do not contain their own symbol tables - these are global. */ struct FileData { // Whether or not this file will accept definitions of functions. bool AllowDefinitions; // A FILE stream that we can read the file from. FILE* Stream; // The filename of the source code char* SourceName; // The filename of the assembly output char* AssemblyName; // The filename of the assembled object code char* ObjectName; // The line of the file we are currently working on, -1 if it is finished long CurrentLine; // The column of the file we are currently working on, -1 if it is finished long CurrentColumn; // The depth of the loop currently being parsed. long CurrentLoopDepth; // The column that was last marked as "valid", the start of the error block if something goes wrong. long CurrentSafeColumn; // Whether or not we are currently parsing a switch statement - changes the behavior of compound statements! bool SwitchStatement; // The symbol currently being lexed - TokenTypes index and integer value. struct Token CurrentSymbol; // The function currently being parsed - null if in global scope or if finished. struct SymbolTableEntry* FunctionEntry; // Once ready, the full AST trees of this file. struct ASTNode* Tree; }; enum StorageScope { SC_GLOBAL = 1, // Global Scope SC_STRUCT, // Struct Definitions SC_UNION, // Union Definitions SC_ENUM, // Enum Definitions SC_ENUMENTRY, // Enum Entry Names SC_ALIAS, // Typedef aliases SC_MEMBER, // The members of Structs or Unions //SC_CLASS, // Class-local definitions //SC_STATIC, // Static storage definitions SC_PARAM, // Function parameters SC_LOCAL // Function-local scope. // There is no deeper scope than function. }; /* * The types of data being held in memory. * The lowest 4 bits of these enum values * encode a nested pointer type. * * This meaning, a single enum can hold * ****************int types. * Should be enough for everyone, right? */ enum DataTypes { RET_NONE, // No return type. Literal void. RET_CHAR = 16, // "char" type keyword RET_INT = 32, // "int" type keyword RET_LONG = 48, // "long" type keyword RET_VOID = 64, // "void" type keyword DAT_STRUCT = 80, // Struct Data DAT_UNION, // Union Data DAT_ENUM, // Enum Data DAT_ALIAS, // Alias Definition DAT_NONE, // No type, no work needed. }; /* * The type of the structure of data being examined * //TODO: move into TokenTypes? */ enum StructureType { ST_VAR, // This is variable ST_FUNC, // This is a function ST_ARR, // This is an array ST_RUCT, // This is a struct ST_ENUM, // This is an enum // This is a typedef }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * A R G U M E N T S * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ char* Suffixate(char* String, char Suffix); void Compile(struct FileData* InputFile); void Assemble(struct FileData* InputFile); void Link(char* Output, struct FileData* Objects[], int ObjectsLength); void DisplayUsage(char* ProgName); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * L E X I N G * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void Tokenise(); void VerifyToken(int Type, char* TokenExpected); bool OptionallyConsume(int Type); static int ReadIdentifier(int Char, char* Buffer, int Limit); static int ReadKeyword(char* Str); void ImportModule(); /* * * * * * * * * * * * * * * * * * * * * * * * * T Y P E S * * * * * * * * * * * * * * * * * * * * * * * * * */ struct ASTNode* MutateType(struct ASTNode* Tree, int RightType, int Operation); int TypeIsInt(int Type); int TypeIsPtr(int Type); char* TypeNames(int Type); int TypeSize(int Type, struct SymbolTableEntry* Composite); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * S Y N T A X T R E E * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ struct ASTNode* ConstructASTNode(int Operation, int Type, struct ASTNode* Left, struct ASTNode* Middle, struct ASTNode* Right, struct SymbolTableEntry* Symbol, int IntValue); struct ASTNode* ConstructASTLeaf(int Operation, int Type, struct SymbolTableEntry* Symbol, int IntValue); struct ASTNode* ConstructASTBranch(int Operation, int Type, struct ASTNode* Left, struct SymbolTableEntry* Symbol, int IntValue); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * P A R S I N G * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ struct ASTNode* ParsePrecedenceASTNode(int PreviousTokenPrecedence); struct ASTNode* ParsePrimary(void); struct ASTNode* ParseStatement(void); struct ASTNode* PrefixStatement(); struct ASTNode* PostfixStatement(); void ParseGlobals(); int ParseDeclarationList(struct SymbolTableEntry** CompositeType, int ClassType, int StatementEndSymbool, int TerminateSymbol); struct ASTNode* ParseCompound(); struct SymbolTableEntry* BeginCompositeDeclaration(int Type); struct ASTNode* ParseExpressionList(int terminateToken); struct ASTNode* CallFunction(); struct ASTNode* ReturnStatement(); struct ASTNode* BreakStatement(); struct ASTNode* ContinueStatement(); int ValueAt(int Type); int PointerTo(int Type); struct ASTNode* AccessArray(); struct ASTNode* AccessMember(bool Deref); int ParseTokenToOperation(int Token); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * S Y M B O L T A B L E * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void DumpAllLists(); void DumpList(struct SymbolTableEntry* List); struct SymbolTableEntry* FindSymbol(char* Symbol); struct SymbolTableEntry* FindLocal(char* Symbol); struct SymbolTableEntry* FindGlobal(char* Symbol); struct SymbolTableEntry* FindStruct(char* Symbol); struct SymbolTableEntry* FindAlias(char* Symbol); struct SymbolTableEntry* FindEnum(char* Symbol); struct SymbolTableEntry* FindEnumMember(char* Symbol); struct SymbolTableEntry* FindUnion(char* Symbol); struct SymbolTableEntry* FindMember(char* Symbol); void AppendSymbol(struct SymbolTableEntry** Head, struct SymbolTableEntry** Tail, struct SymbolTableEntry* Node); void FreeLocals(); void ClearTables(); struct SymbolTableEntry* AddSymbol(char* Name, int Type, int Structure, int Storage, int Length, int SinkOffset, struct SymbolTableEntry* CompositeType); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * C O N T R O L S T A T U S * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void Die(char* Error); void DieMessage(char* Error, char* Reason); void DieDecimal(char* Error, int Number); void DieChar(char* Error, int Char); void DieBinary(char* Error, int Number); void ErrorReport(char* message, ...); void Safe(); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * C O D E G E N E R A T I O N * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ int PrimitiveSize (int); /** * All of the functions required to be implemented by an Assembler Module. */ struct AssemblerVtable { // Entry Point int (*AssembleTree)(struct ASTNode*, int, int, int, int); // Register management void (*DeallocateAllRegisters)(); int (*RetrieveRegister)(); void (*DeallocateRegister)(int); // Alignment int (*AsAlignMemory)(int, int, int); int (*AsCalcOffset)(int); void (*AsNewStackFrame)(); // Basic operations int (*AsLoad)(int); int (*AsAdd)(int, int); int (*AsMul)(int, int); int (*AsSub)(int, int); int (*AsDiv)(int, int); int (*AsLdGlobalVar)(struct SymbolTableEntry*, int); int (*AsLdLocalVar)(struct SymbolTableEntry*, int); int (*AsStrGlobalVar)(struct SymbolTableEntry*, int); int (*AsStrLocalVar)(struct SymbolTableEntry*, int); int (*AsDeref)(int, int); int (*AsStrDeref)(int, int, int); int (*AsAddr)(struct SymbolTableEntry*); int (*AsNewString)(char*); int (*AsLoadString)(int); // Comparisons int (*AsEqual)(int, int); int (*AsIneq)(int, int); int (*AsLess)(int, int); int (*AsGreat)(int, int); int (*AsLessE)(int, int); int (*AsGreatE)(int, int); // Binary operations int (*AsBitwiseAND)(int, int); int (*AsBitwiseOR)(int, int); int (*AsBitwiseXOR)(int, int); int (*AsNegate)(int); int (*AsInvert)(int); int (*AsBooleanNOT)(int); int (*AsShiftLeft)(int, int); int (*AsShiftRight)(int, int); // Comparisons int (*AsBooleanConvert)(int, int, int); int (*AsCompareJmp)(int, int, int, int); int (*AsCompare)(int, int, int); // Loops and jumps int (*AsIf)(struct ASTNode*, int, int); int (*AsWhile)(struct ASTNode*); int (*AsSwitch)(struct ASTNode*); void (*AsSwitchTable)(int, int, int, int*, int*, int); int (*NewLabel)(); void (*AsJmp)(int); void (*AsLabel)(int); // Call and return int (*AsShl)(int, int); int (*AsReturn)(struct SymbolTableEntry*, int); int (*AsCallWrapper)(struct ASTNode*); void (*AsCopyArgs)(int, int); int (*AsCall)(struct SymbolTableEntry*, int); void (*AssemblerPrint)(int); // Preamble and epilogue void (*AsGlobalSymbol)(struct SymbolTableEntry*); void (*AssemblerPreamble)(); void (*AsFunctionPreamble)(struct SymbolTableEntry*); void (*AsFunctionEpilogue)(struct SymbolTableEntry*); }; struct AssemblerModule{ char* name; const struct AssemblerVtable* vtable; }; int RegisterModule(struct AssemblerModule*); void RegisterAllModules(); // Module List void RegisterQBE(); void RegisterWin32ASM(); void RegisterJVM(); /* * * * * * * * * * * * * * * * * * * * * * * * * * * D E C L A R A T I O N * * * * * * * * * * * * * * * * * * * * * * * * * * */ struct SymbolTableEntry* BeginVariableDeclaration(int Type, struct SymbolTableEntry* Composite, int Scope); struct ASTNode* IfStatement(); struct ASTNode* WhileStatement(); struct ASTNode* ForStatement(); struct ASTNode* SwitchStatement(); void DumpTree(struct ASTNode* node, int level);