174 lines
5.8 KiB
C++
174 lines
5.8 KiB
C++
#include "tokenizer.h"
|
|
#include "error.h"
|
|
|
|
static bool streq(const char* a, const char* b) {
|
|
while (*a != 0 && *b != 0) {
|
|
if (*a != *b) return false;
|
|
a++; b++;
|
|
}
|
|
return *a == *b;
|
|
}
|
|
|
|
static const char* ptr;
|
|
static t_tokentype current_token = TOKEN_ERROR;
|
|
static char identifier[255];
|
|
static int value;
|
|
static int line = 0;
|
|
static int row = 0;
|
|
static int current_tokenline = 0;
|
|
static int current_tokenrow = 0;
|
|
|
|
struct t_token_op {
|
|
const char* str;
|
|
t_tokentype tokentype;
|
|
};
|
|
|
|
t_token_op tkn_tokens[] = {
|
|
{ "let", TOKEN_LET },
|
|
{ "if", TOKEN_IF },
|
|
{ "then", TOKEN_THEN },
|
|
{ "else", TOKEN_ELSE },
|
|
{ "end", TOKEN_END },
|
|
{ "for", TOKEN_FOR },
|
|
{ "to", TOKEN_TO },
|
|
{ "break", TOKEN_BREAK },
|
|
{ "continue", TOKEN_CONTINUE },
|
|
{ "goto", TOKEN_GOTO },
|
|
{ "gosub", TOKEN_GOSUB },
|
|
{ "return", TOKEN_RETURN },
|
|
{ "rem", TOKEN_REM },
|
|
{ "quit", TOKEN_QUIT },
|
|
{ "and", TOKEN_AND },
|
|
{ "or", TOKEN_OR },
|
|
{ "not", TOKEN_NOT },
|
|
{ "mod", TOKEN_MOD },
|
|
{ "dim", TOKEN_DIM },
|
|
{ "const", TOKEN_CONST },
|
|
{ "inc", TOKEN_INC },
|
|
{ "dec", TOKEN_DEC },
|
|
{ "strleft", TOKEN_STRLEFT },
|
|
{ "strlen", TOKEN_STRLEN },
|
|
{ "debug", TOKEN_DEBUG },
|
|
{ "chr", TOKEN_CHR },
|
|
{ "str", TOKEN_STR },
|
|
{ "setsprite", TOKEN_SETSPRITE },
|
|
{ "putsprite", TOKEN_PUTSPRITE },
|
|
{ "locate", TOKEN_LOCATE },
|
|
{ "print", TOKEN_PRINT },
|
|
{ "putchar", TOKEN_PUTCHAR },
|
|
{ "setchar", TOKEN_SETCHAR },
|
|
{ "keypressed", TOKEN_KEYPRESSED },
|
|
{ "anykey", TOKEN_ANYKEY },
|
|
{ "updatescr", TOKEN_UPDATESCR },
|
|
{ "color", TOKEN_COLOR },
|
|
{ "border", TOKEN_BORDER },
|
|
{ "cls", TOKEN_CLS },
|
|
{ "wait", TOKEN_WAIT },
|
|
{ "setmusic", TOKEN_SETMUSIC },
|
|
{ "playmusic", TOKEN_PLAYMUSIC },
|
|
{ "stopmusic", TOKEN_STOPMUSIC },
|
|
{ "ismusicplaying", TOKEN_ISMUSICPLAYING },
|
|
};
|
|
|
|
static char hex_digit(const char digit) { return digit - (digit <= 57 ? 48 : 55); }
|
|
|
|
#define CCHR *ptr
|
|
#define NEXT if(*ptr==10){line++;row=0;}else{row++;};ptr++
|
|
|
|
static void tkn_do_next() {
|
|
identifier[0] = 0;
|
|
value = 0;
|
|
char* id = identifier;
|
|
// Ignore whitespace
|
|
while (CCHR != 0 && CCHR != 10 && CCHR <= 32) { NEXT; }
|
|
// If zero, we are at the end of the file
|
|
current_tokenline = line; current_tokenrow = row;
|
|
if (CCHR == 0) { current_token = TOKEN_ENDFILE; return; }
|
|
// if 13, we are at the end of the line
|
|
if (CCHR == 10) { NEXT; current_token = TOKEN_ENDLINE; return; }
|
|
// if 34, grab the string
|
|
if (CCHR == 34) {
|
|
NEXT;
|
|
while (CCHR != 0 && CCHR != 34) { *id = CCHR; id++; NEXT; }
|
|
if (CCHR == 0) { current_token = TOKEN_ERROR; error_raise("Unexpected end of file."); return; }
|
|
*id = 0; NEXT;
|
|
current_token = TOKEN_STRING; return;
|
|
}
|
|
// If digit, grab the number
|
|
if (CCHR >= 48 && CCHR <= 57) {
|
|
do { value = value * 10 + (CCHR - 48); NEXT; } while (CCHR >= 48 && CCHR <= 57);
|
|
current_token = TOKEN_NUMBER; return;
|
|
}
|
|
// if letter, grab the identifier
|
|
if ((CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122)) {
|
|
do { *id = CCHR; id++; NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122) || CCHR == 95);
|
|
*id = 0;
|
|
int i = 0; while (identifier[i] != 0) { if (identifier[i] < 95) { identifier[i] |= 32; }; i++; } // to lowecase
|
|
// if it ends with ':', it's a label
|
|
if (CCHR == ':') { NEXT; current_token = TOKEN_LABEL; return; }
|
|
// If it matches a keyword, it's a keyword
|
|
for (auto token : tkn_tokens) {
|
|
if (streq(token.str, identifier)) {
|
|
current_token = token.tokentype;
|
|
if (current_token == TOKEN_REM) { while (CCHR != 0 && CCHR != 13) { NEXT; } }
|
|
return;
|
|
}
|
|
}
|
|
// else, it's an identifier
|
|
current_token = TOKEN_IDENTIFIER; return;
|
|
}
|
|
// Check if it's any of the operators
|
|
if (CCHR == '+') { NEXT; current_token = TOKEN_PLUS; return; }
|
|
if (CCHR == '-') { NEXT; current_token = TOKEN_MINUS; return; }
|
|
if (CCHR == '*') { NEXT; current_token = TOKEN_ASTERISC; return; }
|
|
if (CCHR == '/') { NEXT; current_token = TOKEN_SLASH; return; }
|
|
if (CCHR == '(') { NEXT; current_token = TOKEN_LPAR; return; }
|
|
if (CCHR == ')') { NEXT; current_token = TOKEN_RPAR; return; }
|
|
if (CCHR == ':') { NEXT; current_token = TOKEN_COLON; return; }
|
|
if (CCHR == ',') { NEXT; current_token = TOKEN_COMMA; return; }
|
|
if (CCHR == '<') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_LEQ; } else { current_token = TOKEN_LT; } return; }
|
|
if (CCHR == '>') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_GEQ; } else { current_token = TOKEN_GT; } return; }
|
|
if (CCHR == '=') { NEXT; current_token = TOKEN_EQ; return; }
|
|
if (CCHR == '%') { NEXT; current_token = TOKEN_MOD; return; }
|
|
if (CCHR == 39) { NEXT; current_token = TOKEN_REM; while (CCHR != 0 && CCHR != 10) { NEXT; }; return; }
|
|
|
|
if (CCHR == '&') {
|
|
NEXT;
|
|
if ((CCHR | 32) == 'h') {
|
|
NEXT;
|
|
if ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)) {
|
|
do { value = (value << 4) + hex_digit(CCHR); NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70));
|
|
current_token = TOKEN_NUMBER; return;
|
|
}
|
|
}
|
|
if ((CCHR | 32) == 'b') {
|
|
NEXT;
|
|
if (CCHR == 48 || CCHR == 49) {
|
|
do { value = (value << 1) + (CCHR-48); NEXT; } while (CCHR == 48 || CCHR == 49);
|
|
current_token = TOKEN_NUMBER; return;
|
|
}
|
|
}
|
|
current_token = TOKEN_ERROR;
|
|
error_raise("Syntax error");
|
|
return;
|
|
}
|
|
|
|
// if no match, it's an error
|
|
current_token = TOKEN_ERROR; return;
|
|
}
|
|
|
|
void tkn_init(const char* buffer) {
|
|
ptr = buffer;
|
|
current_token = TOKEN_ERROR;
|
|
value = line = row = current_tokenline = current_tokenrow = 0;
|
|
identifier[0] = 0;
|
|
}
|
|
|
|
void tkn_next() { do { tkn_do_next(); } while (current_token == TOKEN_REM || current_token == TOKEN_ENDLINE); }
|
|
|
|
t_tokentype tkn_get_token() { return current_token; }
|
|
char* tkn_get_string() { return identifier; }
|
|
int tkn_get_value() { return value; }
|
|
int tkn_get_line() { return current_tokenline; }
|
|
int tkn_get_row() { return current_tokenrow; }
|