323 lines
7.5 KiB
C
323 lines
7.5 KiB
C
|
|
||
|
/*************/
|
||
|
/*GEMWIRE */
|
||
|
/* ERYTHRO*/
|
||
|
/*************/
|
||
|
|
||
|
#include <Defs.h>
|
||
|
#include <Data.h>
|
||
|
|
||
|
|
||
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||
|
* * * * * * C H A R S T R E AM * * * * * *
|
||
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||
|
static void ReturnCharToStream(int Char) {
|
||
|
Overread = Char;
|
||
|
}
|
||
|
|
||
|
static int NextChar(void) {
|
||
|
int Char;
|
||
|
|
||
|
if(Overread) {
|
||
|
Char = Overread;
|
||
|
Overread = 0;
|
||
|
return Char;
|
||
|
}
|
||
|
|
||
|
Char = fgetc(SourceFile);
|
||
|
|
||
|
if(Char == '\n')
|
||
|
Line++;
|
||
|
|
||
|
return Char;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int FindChar() {
|
||
|
int Char;
|
||
|
|
||
|
Char = NextChar();
|
||
|
|
||
|
while(Char == ' ' || Char == '\t' || Char == '\n' || Char == '\r') {
|
||
|
Char = NextChar();
|
||
|
}
|
||
|
|
||
|
return Char;
|
||
|
}
|
||
|
|
||
|
static int FindDigitFromPos(char* String, char Char) {
|
||
|
char* Result = strchr(String, Char);
|
||
|
return(Result ? Result - String : -1);
|
||
|
}
|
||
|
|
||
|
void VerifyToken(int Type, char* TokenExpected) {
|
||
|
if(CurrentToken.type == Type)
|
||
|
Tokenise(&CurrentToken);
|
||
|
else {
|
||
|
printf("Expected %s on line %d\n", TokenExpected, Line);
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static struct Token* RejectedToken = NULL;
|
||
|
|
||
|
void RejectToken(struct Token* Token) {
|
||
|
if(RejectedToken != NULL)
|
||
|
Die("Cannot reject two tokens in a row!");
|
||
|
|
||
|
RejectedToken = Token;
|
||
|
}
|
||
|
|
||
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||
|
* * * * L I T E R A L S A N D I D E N T I F I E R S * * * *
|
||
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||
|
|
||
|
static int ReadInteger(int Char) {
|
||
|
int CurrentChar = 0;
|
||
|
int IntegerValue = 0;
|
||
|
|
||
|
while((CurrentChar = FindDigitFromPos("0123456789", Char)) >= 0) {
|
||
|
IntegerValue = IntegerValue * 10 + CurrentChar;
|
||
|
Char = NextChar();
|
||
|
}
|
||
|
|
||
|
ReturnCharToStream(Char);
|
||
|
|
||
|
return IntegerValue;
|
||
|
}
|
||
|
|
||
|
// Variable identifier, keyword, function.
|
||
|
static int ReadIdentifier(int Char, char* Buffer, int Limit) {
|
||
|
int ind = 0;
|
||
|
|
||
|
// This defines the valid chars in a keyword/variable/function.
|
||
|
while(isalpha(Char) || isdigit(Char) || Char == '_') {
|
||
|
if (ind >= Limit - 1) {
|
||
|
printf("Identifier too long: %d\n", Line);
|
||
|
exit(1);
|
||
|
} else {
|
||
|
Buffer[ind++] = Char;
|
||
|
}
|
||
|
|
||
|
Char = NextChar();
|
||
|
}
|
||
|
|
||
|
// At this point, we've reached a non-keyword character
|
||
|
ReturnCharToStream(Char);
|
||
|
Buffer[ind] = '\0';
|
||
|
return ind;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* This function is what defines the valid keywords for the language
|
||
|
* //TODO: move this to a static list?
|
||
|
* //TODO: More optimisations?
|
||
|
*
|
||
|
*/
|
||
|
static int ReadKeyword(char* Str) {
|
||
|
// First, scan with reference intact.
|
||
|
switch(*Str) {
|
||
|
// This lets us case against the first char:
|
||
|
case ':':
|
||
|
if(!strcmp(Str, "::"))
|
||
|
return KW_FUNC;
|
||
|
break;
|
||
|
|
||
|
case 'c':
|
||
|
if(!strcmp(Str, "char"))
|
||
|
return TY_CHAR;
|
||
|
break;
|
||
|
|
||
|
case 'e':
|
||
|
if(!strcmp(Str, "else"))
|
||
|
return KW_ELSE;
|
||
|
|
||
|
break;
|
||
|
|
||
|
case 'f':
|
||
|
if(!strcmp(Str, "for"))
|
||
|
return KW_FOR;
|
||
|
break;
|
||
|
|
||
|
case 'i':
|
||
|
|
||
|
if(!strcmp(Str, "int"))
|
||
|
return TY_INT;
|
||
|
|
||
|
if(!strcmp(Str, "if"))
|
||
|
return KW_IF;
|
||
|
|
||
|
break;
|
||
|
|
||
|
case 'l':
|
||
|
if(!strcmp(Str, "long"))
|
||
|
return TY_LONG;
|
||
|
|
||
|
break;
|
||
|
|
||
|
case 'p':
|
||
|
// This is a huge optimisation once we have as many keywords as a fully featured language.
|
||
|
if(!strcmp(Str, "print"))
|
||
|
return KW_PRINT;
|
||
|
break;
|
||
|
|
||
|
case 'r':
|
||
|
if(!strcmp(Str, "return"))
|
||
|
return KW_RETURN;
|
||
|
break;
|
||
|
|
||
|
case 'v':
|
||
|
if(!strcmp(Str, "void"))
|
||
|
return TY_VOID;
|
||
|
break;
|
||
|
|
||
|
case 'w':
|
||
|
if(!strcmp(Str, "while"))
|
||
|
return KW_WHILE;
|
||
|
break;
|
||
|
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* * * * * * * * * * * * * * * * * * * * *
|
||
|
* * * * T O K E N I S E R * * * *
|
||
|
* * * * * * * * * * * * * * * * * * * * */
|
||
|
|
||
|
int Tokenise(struct Token* Token) {
|
||
|
int Char, TokenType;
|
||
|
|
||
|
if(RejectedToken != NULL) {
|
||
|
Token = RejectedToken;
|
||
|
RejectedToken = NULL;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
Char = FindChar();
|
||
|
|
||
|
switch(Char) {
|
||
|
case EOF:
|
||
|
Token->type = LI_EOF;
|
||
|
return 0;
|
||
|
|
||
|
case '+':
|
||
|
Token->type = AR_PLUS;
|
||
|
break;
|
||
|
|
||
|
case '-':
|
||
|
Token->type = AR_MINUS;
|
||
|
break;
|
||
|
|
||
|
case '*':
|
||
|
Token->type = AR_STAR;
|
||
|
break;
|
||
|
|
||
|
case '/':
|
||
|
Token->type = AR_SLASH;
|
||
|
break;
|
||
|
|
||
|
case '=':
|
||
|
Char = NextChar();
|
||
|
// If the next char is =, we have ==, the compare equality token.
|
||
|
if(Char == '?') {
|
||
|
Token->type = CMP_EQUAL;
|
||
|
// if the next char is >, we have =>, the greater than or equal token.
|
||
|
} else if(Char == '>') {
|
||
|
Token->type = CMP_GTE;
|
||
|
// If none of the above match, we have = and an extra char. Return the char and set the token
|
||
|
} else {
|
||
|
ReturnCharToStream(Char);
|
||
|
Token->type = LI_EQUAL;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case '!':
|
||
|
Char = NextChar();
|
||
|
// If the next char is =, we have !=, the compare inequality operator.
|
||
|
if(Char == '=') {
|
||
|
Token->type = CMP_INEQ;
|
||
|
// Otherwise, we have a spare char
|
||
|
} else {
|
||
|
ReturnCharToStream(Char);
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case '<':
|
||
|
Char = NextChar();
|
||
|
// If the next char is =, we have <=, the less than or equal comparator.
|
||
|
if(Char == '=') {
|
||
|
Token->type = CMP_LTE;
|
||
|
} else {
|
||
|
ReturnCharToStream(Char);
|
||
|
Token->type = CMP_LT;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case '>':
|
||
|
// There is no special casing for >. Less than or equal is =>
|
||
|
Token->type = CMP_GT;
|
||
|
break;
|
||
|
|
||
|
case ';':
|
||
|
Token->type = LI_SEMIC;
|
||
|
break;
|
||
|
|
||
|
case '(':
|
||
|
Token->type = LI_LPARE;
|
||
|
break;
|
||
|
|
||
|
case ')':
|
||
|
Token->type = LI_RPARE;
|
||
|
break;
|
||
|
|
||
|
case '{':
|
||
|
Token->type = LI_LBRAC;
|
||
|
break;
|
||
|
|
||
|
case '}':
|
||
|
Token->type = LI_RBRAC;
|
||
|
break;
|
||
|
|
||
|
case ':':
|
||
|
Char = NextChar();
|
||
|
|
||
|
if(Char == ':') {
|
||
|
Token->type = KW_FUNC;
|
||
|
} else {
|
||
|
ReturnCharToStream(Char);
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
if(isdigit(Char)) {
|
||
|
|
||
|
Token->value = ReadInteger(Char);
|
||
|
Token->type = LI_INT;
|
||
|
break;
|
||
|
|
||
|
} else if(isalpha(Char) || Char == '_') { // This is what defines what a variable/function/keyword can START with.
|
||
|
ReadIdentifier(Char, CurrentIdentifier, TEXTLEN);
|
||
|
|
||
|
if(TokenType = ReadKeyword(CurrentIdentifier)) {
|
||
|
Token->type = TokenType;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
Token->type = TY_IDENTIFIER;
|
||
|
break;
|
||
|
//printf("Line %d: Unrecognized symbol %s\n", CurrentIdentifier, Line);
|
||
|
//exit(1);
|
||
|
}
|
||
|
|
||
|
|
||
|
DieChar("Unrecognized character", Char);
|
||
|
|
||
|
}
|
||
|
|
||
|
return 1;
|
||
|
}
|
||
|
|