First commit, porting from PaCO

2021-04-27 19:02:20 +02:00
commit f1f590735b
11 changed files with 1283 additions and 0 deletions
--- a/tokenizer.cpp
+++ b/tokenizer.cpp
@@ -0,0 +1,161 @@
+#include "tokenizer.h"
+#include "error.h"
+
+static bool streq(const char* a, const char* b) {
+	while (*a != 0 && *b != 0) {
+		if (*a != *b) return false;
+		a++; b++;
+	}
+	return *a == *b;
+}
+
+static const char* ptr;
+static t_tokentype current_token = TOKEN_ERROR;
+static char identifier[255];
+static unsigned char array[255];
+static float value;
+static int line = 0;
+static int row = 0;
+static int current_tokenline = 0;
+static int current_tokenrow = 0;
+
+struct t_token_op {
+	const char* str;
+	t_tokentype tokentype;
+};
+
+t_token_op tkn_tokens[] = {
+	{ "struct", TOKEN_STRUCT },
+	{ "as", TOKEN_AS },
+	{ "end", TOKEN_END },
+	{ "function", TOKEN_FUNCTION },
+	{ "var", TOKEN_VAR },
+	{ "array", TOKEN_ARRAY },
+	{ "of", TOKEN_OF },
+	{ "const", TOKEN_CONST },
+	{ "if", TOKEN_IF },
+	{ "then", TOKEN_THEN },
+	{ "else", TOKEN_ELSE },
+	{ "while", TOKEN_WHILE },
+	{ "do", TOKEN_DO },
+	{ "repeat", TOKEN_REPEAT },
+	{ "until", TOKEN_UNTIL },
+	{ "for", TOKEN_FOR },
+	{ "to", TOKEN_TO },
+	{ "step", TOKEN_STEP },
+	{ "return", TOKEN_RETURN },
+	{ "rem", TOKEN_REM },
+	{ "and", TOKEN_AND },
+	{ "or", TOKEN_OR },
+	{ "not", TOKEN_NOT },
+	{ "mod", TOKEN_MOD }
+};
+
+//static char hex_digit(const char digit) { return digit - (digit <= 57 ? 48 : 55); }
+
+#define CCHR *ptr
+#define NEXT if(*ptr==10){line++;row=0;}else{row++;};ptr++
+
+static void tkn_do_next() {
+	identifier[0] = 0;
+	value = 0;
+	char* id = identifier;
+	// Ignore whitespace
+	while (CCHR != 0 && CCHR != 10 && CCHR <= 32) { NEXT; }
+	// If zero, we are at the end of the file
+	current_tokenline = line; current_tokenrow = row;
+	if (CCHR == 0) { current_token = TOKEN_ENDFILE; return; }
+	// if 13, we are at the end of the line
+	if (CCHR == 10) { NEXT; current_token = TOKEN_ENDLINE; return; }
+	// if 34, grab the string
+	if (CCHR == 34) {
+		NEXT;
+		while (CCHR != 0 && CCHR != 34) { *id = CCHR; id++; NEXT; }
+		if (CCHR == 0) { current_token = TOKEN_ERROR; error_raise("Unexpected end of file."); return; }
+		*id = 0; NEXT;
+		current_token = TOKEN_STRING; return;
+	}
+	// If digit, grab the number
+	if (CCHR >= 48 && CCHR <= 57) {
+		do { value = value * 10 + (CCHR - 48); NEXT; } while (CCHR >= 48 && CCHR <= 57);
+		if (CCHR == '.') {
+			int d = 10;
+			do { value = value + ((CCHR - 48) / d); d *= 10; NEXT; } while (CCHR >= 48 && CCHR <= 57);
+		}
+		current_token = TOKEN_NUMBER; return;
+	}
+	// if letter, grab the identifier
+	if ((CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122)) {
+		do { *id = CCHR; id++; NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122) || CCHR == 95);
+		*id = 0;
+		int i = 0; while (identifier[i] != 0) { if (identifier[i] < 95) { identifier[i] |= 32; };  i++; } // to lowecase
+		// If it matches a keyword, it's a keyword
+		for (auto token : tkn_tokens) {
+			if (streq(token.str, identifier)) {
+				current_token = token.tokentype;
+				if (current_token == TOKEN_REM) { while (CCHR != 0 && CCHR != 13) { NEXT; } }
+				return;
+			}
+		}
+		// else, it's an identifier
+		current_token = TOKEN_IDENTIFIER; return;
+	}
+	// Check if it's any of the operators
+	if (CCHR == '+') { NEXT; current_token = TOKEN_PLUS; return; }
+	if (CCHR == '-') { NEXT; current_token = TOKEN_MINUS; return; }
+	if (CCHR == '*') { NEXT; current_token = TOKEN_ASTERISC; return; }
+	if (CCHR == '/') { NEXT; current_token = TOKEN_SLASH; return; }
+	if (CCHR == '.') { NEXT; current_token = TOKEN_DOT; return; }
+	if (CCHR == '(') { NEXT; current_token = TOKEN_LPAR; return; }
+	if (CCHR == ')') { NEXT; current_token = TOKEN_RPAR; return; }
+	if (CCHR == '[') { NEXT; current_token = TOKEN_LBRACKET; return; }
+	if (CCHR == ']') { NEXT; current_token = TOKEN_RBRACKET; return; }
+	if (CCHR == ',') { NEXT; current_token = TOKEN_COMMA; return; }
+	if (CCHR == '<') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_LEQ; } else { if (CCHR == '>') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_LT; } } return; }
+	if (CCHR == '>') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_GEQ; } else { current_token = TOKEN_GT; } return; }
+	if (CCHR == '=') { NEXT; current_token = TOKEN_EQ; return; }
+	if (CCHR == '!') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_NOT; } return; }
+	if (CCHR == '%') { NEXT; current_token = TOKEN_MOD; return; }
+	if (CCHR == 39)  { NEXT; current_token = TOKEN_REM; while (CCHR != 0 && CCHR != 10) { NEXT; };  return; }
+
+	// Per a agafar numeros en hexadecimal o binari, heretat de PaCO, no se si li fa falta a JailScript
+	/*if (CCHR == '&') {
+		NEXT;
+		if ((CCHR | 32) == 'h') {
+			NEXT;
+			if ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)) {
+				do { value = (value << 4) + hex_digit(CCHR); NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70));
+				current_token = TOKEN_NUMBER; return;
+			}
+		}
+		if ((CCHR | 32) == 'b') {
+			NEXT;
+			if (CCHR == 48 || CCHR == 49) {
+				do { value = (value << 1) + (CCHR-48); NEXT; } while (CCHR == 48 || CCHR == 49);
+				current_token = TOKEN_NUMBER; return;
+			}
+		}
+		current_token = TOKEN_ERROR;
+		error_raise("Syntax error");
+		return;
+	}*/
+
+	// if no match, it's an error
+	current_token = TOKEN_ERROR; return;
+}
+
+void tkn_init(const char* buffer) {
+	ptr = buffer;
+	current_token = TOKEN_ERROR;
+	value = line = row = current_tokenline = current_tokenrow = 0;
+	identifier[0] = 0; array[0] = 0;
+}
+
+void tkn_next() { do { tkn_do_next(); } while (current_token == TOKEN_REM || current_token == TOKEN_ENDLINE); }
+
+t_tokentype tkn_get_token() { return current_token; }
+char* tkn_get_string() { return identifier; }
+unsigned char* tkn_get_array() { return array; }
+float tkn_get_value() { return value; }
+int tkn_get_line() { return current_tokenline; }
+int tkn_get_row() { return current_tokenrow; }