#include "tokenizer.h" #include "error.h" #include static bool streq(const char* a, const char* b) { while (*a != 0 && *b != 0) { if (*a != *b) return false; a++; b++; } return *a == *b; } static const char* ptr; static t_tokentype current_token = TOKEN_ERROR; static char identifier[255]; static unsigned char array[255]; static float value; static int line = 0; static int row = 0; static int current_tokenline = 0; static int current_tokenrow = 0; struct t_token_op { const char* str; t_tokentype tokentype; }; t_token_op tkn_tokens[] = { { "struct", TOKEN_STRUCT }, { "as", TOKEN_AS }, { "end", TOKEN_END }, { "function", TOKEN_FUNCTION }, { "var", TOKEN_VAR }, { "array", TOKEN_ARRAY }, { "of", TOKEN_OF }, { "const", TOKEN_CONST }, { "if", TOKEN_IF }, { "then", TOKEN_THEN }, { "else", TOKEN_ELSE }, { "while", TOKEN_WHILE }, { "do", TOKEN_DO }, { "repeat", TOKEN_REPEAT }, { "until", TOKEN_UNTIL }, { "for", TOKEN_FOR }, { "to", TOKEN_TO }, { "step", TOKEN_STEP }, { "return", TOKEN_RETURN }, { "rem", TOKEN_REM }, { "and", TOKEN_AND }, { "or", TOKEN_OR }, { "not", TOKEN_NOT }, { "mod", TOKEN_MOD } }; //static char hex_digit(const char digit) { return digit - (digit <= 57 ? 48 : 55); } #define CCHR *ptr #define NEXT if(*ptr==10){line++;row=0;}else{row++;};ptr++ static void tkn_do_next() { identifier[0] = 0; value = 0; char* id = identifier; // Ignore whitespace while (CCHR != 0 && CCHR != 10 && CCHR <= 32) { NEXT; } // If zero, we are at the end of the file current_tokenline = line; current_tokenrow = row; if (CCHR == 0) { current_token = TOKEN_ENDFILE; return; } // if 13, we are at the end of the line if (CCHR == 10) { NEXT; current_token = TOKEN_ENDLINE; return; } // if 34, grab the string if (CCHR == 34) { NEXT; while (CCHR != 0 && CCHR != 34) { *id = CCHR; id++; NEXT; } if (CCHR == 0) { current_token = TOKEN_ERROR; error_raise("Unexpected end of file."); return; } *id = 0; NEXT; current_token = TOKEN_STRING; return; } // If digit, grab the number if (CCHR >= 48 && CCHR <= 57) { do { value = value * 10 + (CCHR - 48); NEXT; } while (CCHR >= 48 && CCHR <= 57); if (CCHR == '.') { float d = 0.1f; NEXT; do { value = value + (float(CCHR - 48) * d); d *= 0.1f; NEXT; } while (CCHR >= 48 && CCHR <= 57); } current_token = TOKEN_NUMBER; return; } // if letter, grab the identifier if ((CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122)) { do { *id = CCHR; id++; NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122) || CCHR == 95); *id = 0; int i = 0; while (identifier[i] != 0) { if (identifier[i] < 95) { identifier[i] |= 32; }; i++; } // to lowecase // If it matches a keyword, it's a keyword for (auto token : tkn_tokens) { if (streq(token.str, identifier)) { current_token = token.tokentype; if (current_token == TOKEN_REM) { while (CCHR != 0 && CCHR != 13) { NEXT; } } return; } } // else, it's an identifier current_token = TOKEN_IDENTIFIER; return; } // Check if it's any of the operators if (CCHR == '+') { NEXT; current_token = TOKEN_PLUS; return; } if (CCHR == '-') { NEXT; current_token = TOKEN_MINUS; return; } if (CCHR == '*') { NEXT; current_token = TOKEN_ASTERISC; return; } if (CCHR == '/') { NEXT; current_token = TOKEN_SLASH; return; } if (CCHR == '.') { NEXT; current_token = TOKEN_DOT; return; } if (CCHR == '(') { NEXT; current_token = TOKEN_LPAR; return; } if (CCHR == ')') { NEXT; current_token = TOKEN_RPAR; return; } if (CCHR == '[') { NEXT; current_token = TOKEN_LBRACKET; return; } if (CCHR == ']') { NEXT; current_token = TOKEN_RBRACKET; return; } if (CCHR == ',') { NEXT; current_token = TOKEN_COMMA; return; } if (CCHR == '<') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_LEQ; } else { if (CCHR == '>') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_LT; } } return; } if (CCHR == '>') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_GEQ; } else { current_token = TOKEN_GT; } return; } if (CCHR == '=') { NEXT; current_token = TOKEN_EQ; return; } if (CCHR == '!') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_NOT; } return; } if (CCHR == '%') { NEXT; current_token = TOKEN_MOD; return; } if (CCHR == 39) { NEXT; current_token = TOKEN_REM; while (CCHR != 0 && CCHR != 10) { NEXT; }; return; } // Per a agafar numeros en hexadecimal o binari, heretat de PaCO, no se si li fa falta a JailBasic /*if (CCHR == '&') { NEXT; if ((CCHR | 32) == 'h') { NEXT; if ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)) { do { value = (value << 4) + hex_digit(CCHR); NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)); current_token = TOKEN_NUMBER; return; } } if ((CCHR | 32) == 'b') { NEXT; if (CCHR == 48 || CCHR == 49) { do { value = (value << 1) + (CCHR-48); NEXT; } while (CCHR == 48 || CCHR == 49); current_token = TOKEN_NUMBER; return; } } current_token = TOKEN_ERROR; error_raise("Syntax error"); return; }*/ // if no match, it's an error current_token = TOKEN_ERROR; return; } void tkn_init(const char* buffer) { ptr = buffer; current_token = TOKEN_ERROR; value = line = row = current_tokenline = current_tokenrow = 0; identifier[0] = 0; array[0] = 0; } void tkn_next() { do { tkn_do_next(); } while (current_token == TOKEN_REM || current_token == TOKEN_ENDLINE); } t_tokentype tkn_get_token() { return current_token; } char* tkn_get_string() { return identifier; } unsigned char* tkn_get_array() { return array; } float tkn_get_value() { return value; } int tkn_get_line() { return current_tokenline; } int tkn_get_row() { return current_tokenrow; }