/*************/
/*GEMWIRE    */
/*    ERYTHRO*/
/*************/

#include <Defs.h>
#include <Data.h>


/* * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * * * * * *    C H A R       S T R E AM     * * * * * *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static void ReturnCharToStream(int Char) {
    Overread = Char;
}

static int NextChar(void) {
    int Char;

    if(Overread) {
        Char = Overread;
        Overread = 0;
        return Char;
    }

    Char = fgetc(SourceFile);

    if(Char == '\n')
        Line++;
    
    return Char;
}


static int FindChar() {
    int Char;

    Char = NextChar();

    while(Char == ' ' || Char == '\t' || Char == '\n' || Char == '\r') {
        Char = NextChar();
    }

    return Char;
}

static int FindDigitFromPos(char* String, char Char) {
    char* Result = strchr(String, Char);
    return(Result ? Result - String : -1);
}

void VerifyToken(int Type, char* TokenExpected) {
    if(CurrentToken.type == Type)
        Tokenise(&CurrentToken);
    else {
        printf("Expected %s on line %d\n", TokenExpected, Line);
        exit(1);
    }
}

static struct Token* RejectedToken = NULL;

void RejectToken(struct Token* Token) {
    if(RejectedToken != NULL)
        Die("Cannot reject two tokens in a row!");
    
    RejectedToken = Token;
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * * * *     L I T E R A L S   A N D   I D E N T I F I E R S     * * * *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

static int ReadInteger(int Char) {
    int CurrentChar = 0;
    int IntegerValue = 0;

    while((CurrentChar = FindDigitFromPos("0123456789", Char)) >= 0) {
        IntegerValue = IntegerValue * 10 + CurrentChar;
        Char = NextChar();
    }

    ReturnCharToStream(Char);

    return IntegerValue;
}

// Variable identifier, keyword, function.
static int ReadIdentifier(int Char, char* Buffer, int Limit) {
    int ind = 0;   

    // This defines the valid chars in a keyword/variable/function.
    while(isalpha(Char) || isdigit(Char) || Char == '_') {
        if (ind >= Limit - 1) {
            printf("Identifier too long: %d\n", Line);
            exit(1);
        } else {
            Buffer[ind++] = Char;
        }

        Char = NextChar();
    }

    // At this point, we've reached a non-keyword character
    ReturnCharToStream(Char);
    Buffer[ind] = '\0';
    return ind;
}

static int ReadCharLiteral() {
    int Char;
    Char = NextChar();
    if(Char == '\\') {
        switch(Char = NextChar()) {
            case 'a': return '\a';
            case 'b': return '\b';
            case 'f': return '\f';
            case 'n': return '\n';
            case 'r': return '\r';
            case 't': return '\t';
            case 'v': return '\v';
            case '\\': return '\\';
            case '"': return '"';
            case '\'': return '\'';
            default:
                DieChar("Unknown Escape: ", Char);
        }
    }

    return Char;
}


static int ReadStringLiteral(char* Buffer) {
    int Char;

    for(int i = 0; i < TEXTLEN - 1; i++) {
        if((Char = ReadCharLiteral()) == '"') {
            Buffer[i] = 0; return i;
        }
        
        Buffer[i] = Char;
    }

    Die("String Literal Too Long");
    return 0;
}

/*
 * This function is what defines the valid keywords for the language
 * //TODO: move this to a static list?
 * //TODO: More optimisations?
 * 
 */
static int ReadKeyword(char* Str) {
    // First, scan with reference intact.
    switch(*Str) {
        // This lets us case against the first char:
        case ':':
            if(!strcmp(Str, "::"))
                return KW_FUNC;
            break;

        case 'c':
            if(!strcmp(Str, "char"))
                return TY_CHAR;
            break;

        case 'e':
            if(!strcmp(Str, "else"))
                return KW_ELSE;

            break;
    
        case 'f':
            if(!strcmp(Str, "for"))
                return KW_FOR;
            break;

        case 'i':

            if(!strcmp(Str, "int"))
                return TY_INT;
            
            if(!strcmp(Str, "if"))
                return KW_IF;

            break;

        case 'l':
            if(!strcmp(Str, "long"))
                return TY_LONG;

            break;

        case 'p':
            // This is a huge optimisation once we have as many keywords as a fully featured language.
            if(!strcmp(Str, "print"))
                return KW_PRINT;
            break;

        case 'r':
            if(!strcmp(Str, "return"))
                return KW_RETURN;
            break;
            
        case 'v':
            if(!strcmp(Str, "void"))
                return TY_VOID;
            break;
            
        case 'w':
            if(!strcmp(Str, "while"))
                return KW_WHILE;
            break;


    }

    return 0;
}

/* * * * * * * * * * * * * * * * * * * * *
 * * * *      T O K E N I S E R    * * * *
 * * * * * * * * * * * * * * * * * * * * */

int Tokenise(struct Token* Token) {
    int Char, TokenType;

    if(RejectedToken != NULL) {
        Token = RejectedToken;
        RejectedToken = NULL;
        return 1;
    }

    Char = FindChar();

    switch(Char) {
        case EOF:
            Token->type = LI_EOF;
            return 0;

        case '+':
            // + can be either "+" or "++".
            Char = NextChar();
            if(Char == '+') {
                Token->type = PPMM_PLUS;
            } else {
                Token->type = AR_PLUS;
                ReturnCharToStream(Char);
            }
            break;

        case '-':
            // - can be either "-" or "--"
            Char = NextChar();
            if(Char == '-') {
                Token->type = PPMM_MINUS;
            } else {
                Token->type = AR_MINUS;
                ReturnCharToStream(Char);
            }
            break;

        case '*':
            Token->type = AR_STAR;
            break;

        case '/':
            Token->type = AR_SLASH;
            break;

        case '&':
            Char = NextChar();
            if(Char == '&') {
                Token->type = BOOL_AND;
            } else {
                Token->type = BIT_AND;
                ReturnCharToStream(Char);
            }
            break;
        
        case '|':
            Char = NextChar();
            if(Char == '|') {
                Token->type = BOOL_OR;
            } else {
                Token->type = BIT_OR;
                ReturnCharToStream(Char);
            }
            break;
        
        case '^':
            Token->type = BIT_XOR;
            break;
        
        case '~':
            Token->type = BIT_NOT;
            break;

        case ',':
            Token->type = LI_COM;
            break;
        
        case '=':
            Char = NextChar();
            // If the next char is =, we have ==, the compare equality token.
            if(Char == '?') {
                Token->type = CMP_EQUAL;
            // if the next char is >, we have =>, the greater than or equal token.
            } else if(Char == '>') {
                Token->type = CMP_GTE;
            // If none of the above match, we have = and an extra char. Return the char and set the token
            } else {
                ReturnCharToStream(Char);
                Token->type = LI_EQUAL;
            }
            break;
        
        case '!':
            Char = NextChar();
            // If the next char is =, we have !=, the compare inequality operator.
            if(Char == '=') {
                Token->type = CMP_INEQ;
            // Otherwise, we have a spare char
            } else {
                Token->type = BOOL_INVERT;
                ReturnCharToStream(Char);            
            }
            break;

        case '<':
            Char = NextChar();
            // If the next char is =, we have <=, the less than or equal comparator.
            if(Char == '=') {
                Token->type = CMP_LTE;
            } else if(Char == '<') { // But if the next char is <, we have << - the Shift Left operator.
                Token->type = SH_LEFT;
            } else {
                ReturnCharToStream(Char);
                Token->type = CMP_LT;
            }
            break;

        case '>':
            // For >, Less than or equal is => so we can ignore it, but the Shift Right operator is >>.
            Char = NextChar();
            if(Char == '>') {
                Token->type = SH_RIGHT;
            } else {
                Token->type = CMP_GT;
                ReturnCharToStream(Char);
            }
            break;

        case ';':
            Token->type = LI_SEMIC;
            break;

        case '(':
            Token->type = LI_LPARE;
            break;
        
        case ')':
            Token->type = LI_RPARE;
            break;
        
        case '{':
            Token->type = LI_LBRAC;
            break;

        case '}':
            Token->type = LI_RBRAC;
            break;

        case '[':
            Token->type = LI_LBRAS;
            break;
        
        case ']':
            Token->type = LI_RBRAS;
            break;
            
        case ':':
            Char = NextChar();

            if(Char == ':') {
                Token->type = KW_FUNC;
            } else {
                ReturnCharToStream(Char);
            }
            break;

        case '\'':
            Token->value = ReadCharLiteral();
            Token->type = LI_INT;

            if(NextChar() != '\'')
                Die("Expected '\\'' at the end of a character.");
            break;

        case '"':
            ReadStringLiteral(CurrentIdentifier);
            Token->type = LI_STR;
            break;

        default:
            if(isdigit(Char)) {

                Token->value = ReadInteger(Char);
                Token->type = LI_INT;
                break;
            
            } else if(isalpha(Char) || Char == '_') { // This is what defines what a variable/function/keyword can START with.
                ReadIdentifier(Char, CurrentIdentifier, TEXTLEN);

                if(TokenType = ReadKeyword(CurrentIdentifier)) {
                    Token->type = TokenType;
                    break;
                }
                
                Token->type = TY_IDENTIFIER;
                break;
                //printf("Line %d: Unrecognized symbol %s\n", CurrentIdentifier, Line);
                //exit(1);
            }

            
            DieChar("Unrecognized character", Char);

    }

    return 1;
}