commit f1f590735b0aa69ba7e0a438bdedec0cc4a648c7 Author: Raimon Zamora Date: Tue Apr 27 19:02:20 2021 +0200 First commit, porting from PaCO diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b15602d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +.vscode diff --git a/error.cpp b/error.cpp new file mode 100644 index 0000000..13ae601 --- /dev/null +++ b/error.cpp @@ -0,0 +1,22 @@ +#include "error.h" +#include +#include "tokenizer.h" + +static char errormsg[255]; +static bool raised = false; + + +void error_raise(const char* msg) { + if (!raised) { + raised = true; + sprintf(errormsg, "ERROR AT SOURCE:%s at %d:%d.", msg, tkn_get_line() + 1, tkn_get_row() + 1); + } +} + +bool error_raised() { return raised; } + +void error_print(unsigned char* mem) { + //char* msg = errormsg; + //while (*msg != 0) *mem++ = *msg++; + printf("%s", errormsg); +} diff --git a/error.h b/error.h new file mode 100644 index 0000000..b753dfe --- /dev/null +++ b/error.h @@ -0,0 +1,5 @@ +#pragma once + +void error_raise(const char* msg); +bool error_raised(); +void error_print(unsigned char* mem); diff --git a/heap.cpp b/heap.cpp new file mode 100644 index 0000000..8ac1b6f --- /dev/null +++ b/heap.cpp @@ -0,0 +1,48 @@ +#include "heap.h" +#include + +static int heap_size = 0; +static int heap_address = 0; +static char* heap_mem = NULL; + +void heap_init() { + heap_size = 0; + heap_address = 0; + if (heap_mem != NULL) { free(heap_mem); heap_mem = NULL; } +} + +const int heap_get_address() { + return heap_address; +} + +void heap_reserve(const int size) { + heap_size += size; + heap_address += size; +} + +const int heap_get_size() { + return heap_size; +} + +void heap_alloc() { + if (heap_mem == NULL) heap_mem = (char*)malloc(heap_size); +} + +const float heap_get_float(const int address) { + return (float)heap_mem[address]; +} + +const char* heap_get_string(const int address) { + return &heap_mem[address]; +} + +void heap_set_float(const int address, const float value) { + *(float*)&heap_mem[address] = value; + //float *f = (float*)&heap_mem[address]; + //*f = value; +} + +void heap_set_string(const int address, const char* value) { + const int size = *value; + for (int i = 0; i <= size; i++) { heap_mem[address+i] = value[i]; } +} diff --git a/heap.h b/heap.h new file mode 100644 index 0000000..48db50e --- /dev/null +++ b/heap.h @@ -0,0 +1,11 @@ +#pragma once + +void heap_init(); +const int heap_get_address(); +void heap_reserve(const int size); +const int heap_get_size(); +void heap_alloc(); +const float heap_get_float(const int address); +const char* heap_get_string(const int address); +void heap_set_float(const int address, const float value); +void heap_set_string(const int address, const char* value); diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..d962963 --- /dev/null +++ b/parser.cpp @@ -0,0 +1,826 @@ +#include "parser.h" +#include "tokenizer.h" +#include "error.h" +#include +#include +#include + +#define MAX_CODE_SIZE 65536 +#define MAX_LABELS 256 +#define MAX_FUNCTIONS 256 +#define MAX_EXTERNAL_FUNCTIONS 256 +#define MAX_IDENTIFIER_LENGTH 40 +#define MAX_CONSTANTS 256 +#define MAX_STRUCT_MEMBERS 10 + +struct t_variable { + char name[MAX_IDENTIFIER_LENGTH]; + int type; + int length; + int address; +}; + +struct t_struct { + char name[MAX_IDENTIFIER_LENGTH]; + int num_members; + t_variable members[MAX_STRUCT_MEMBERS]; +}; + +struct t_scope { + +}; + +struct t_label { + char name[MAX_IDENTIFIER_LENGTH]; + word address; +}; + +/*struct t_variable { + char name[MAX_IDENTIFIER_LENGTH]; + int index = 0; + int size = 1; + t_variable* next{ nullptr }; +}; + +struct t_external_function { + char name[MAX_IDENTIFIER_LENGTH]; + int num_parameters = 0; +}; + +struct t_string { + unsigned char string[256]; + word references[256]; + int num_references = 1; +}; + +struct t_constant { + char name[MAX_IDENTIFIER_LENGTH]; + byte value; +};*/ + +static byte* code; // [MAX_CODE_SIZE]; +static word codepos = 0; +static word lines[32768]; +static word current_line; + +//static t_label known_labels[MAX_LABELS]; +//static int num_known_labels = 0; +static t_label unknown_labels[MAX_LABELS]; +static int num_unknown_labels = 0; +static int num_anonymous_labels = 0; + +static char* breaklabel = nullptr; +static char* contlabel = nullptr; + +static t_variable* variables = nullptr; +static int num_variables = 0; +static bool variable_is_array = false; + +static t_external_function external_functions[MAX_EXTERNAL_FUNCTIONS]; +static int num_external_functions = 0; + +static t_string strings[256]; +static int num_strings = 0; + +static bool parser_finished = false; + +static t_constant constants[MAX_CONSTANTS]; +static int num_constants; + +/****************************************************************************************/ +/* GENERIC FUNCTIONS */ +/****************************************************************************************/ + +static void int_to_string(int value, char* label) { + for (int i = 0; i < 7; i++) label[i] = '0'; + label[7] = 0; + int i = 6; + while (value > 0) { + label[i] = 48 + (value % 10); + value = value / 10; + i--; + } +} + +/****************************************************************************************/ +/* BYTECODE GENERATOR */ +/****************************************************************************************/ + +static void emmit(const byte value) { + lines[codepos] = current_line; + code[codepos++] = value; +} + +static void emmit_w(const word value) { + lines[codepos] = current_line; + code[codepos++] = value & 255; + lines[codepos] = current_line; + code[codepos++] = value >> 8; +} + +static void emmit_f(const float value) { + byte *v = (byte*)&value; + lines[codepos] = current_line; code[codepos++] = *(v++); + lines[codepos] = current_line; code[codepos++] = *(v++); + lines[codepos] = current_line; code[codepos++] = *(v++); + lines[codepos] = current_line; code[codepos++] = *(v++); +} + +static void patch(const word address, const word dest) { + code[address] = dest & 255; + code[address + 1] = dest >> 8; +} + +static const word get_current_address() { + return codepos; +} + +/****************************************************************************************/ +/* VARIABLE MANAGEMENT */ +/****************************************************************************************/ + +static int get_variable_index(const char* string, const int array_size = 1) { + t_variable* variable = variables; + t_variable* last = variables; + while (variable != nullptr) { + if (strcmp(variable->name, string) == 0) { variable_is_array = (variable->size > 1); return variable->index + 0xC000; } + last = variable; + variable = variable->next; + } + t_variable* newvar = (t_variable*)malloc(sizeof(t_variable)); + //newvar->name = (char*)malloc(strlen(string) + 1); + strcpy(newvar->name, string); + newvar->index = num_variables; + num_variables += array_size; + newvar->size = array_size; + variable_is_array = (newvar->size > 1); + newvar->next = nullptr; + if (last != nullptr) { + last->next = newvar; + } + else { + variables = newvar; + } + return newvar->index + 0xC000; +} + +static bool is_variable_array() { return variable_is_array; } + +/****************************************************************************************/ +/* CONSTANT MANAGEMENT */ +/****************************************************************************************/ +void parser_register_constant(const char* name, const unsigned char value) { + strcpy(constants[num_constants].name, name); + constants[num_constants++].value = value; +} + +static const int get_constant(const char* name) { + for (int i = 0; i < num_constants; i++) { + if (strcmp(constants[i].name, name) == 0) + return constants[i].value; + } + return -1; +} + +/****************************************************************************************/ +/* LABEL MANAGEMENT */ +/****************************************************************************************/ + +static const word get_label_address(const char* string) { + for (int i = 0; i < num_known_labels; i++) { if (strcmp(known_labels[i].name, string) == 0) return known_labels[i].address; } + //unknown_labels[num_unknown_labels].name = (char*)malloc(strlen(string) + 1); + strcpy(unknown_labels[num_unknown_labels].name, string); + unknown_labels[num_unknown_labels].address = get_current_address(); + num_unknown_labels++; + return 0; +} + +static void register_label_address(const char* string) { + // If the label already exists, vomit an error + for (int i = 0; i < num_known_labels; i++) { + if (strcmp(known_labels[i].name, string) == 0) { + parser_finished = true; + error_raise("Duplicate label"); + return; + } + } + // register new label + //known_labels[num_known_labels].name = (char*)malloc(strlen(string) + 1); + strcpy(known_labels[num_known_labels].name, string); + known_labels[num_known_labels++].address = get_current_address(); + // patch and remove any references awaiting for this label + int i = 0; + while (i < num_unknown_labels) { + if (strcmp(unknown_labels[i].name, string) == 0) { + patch(unknown_labels[i].address, get_current_address()); + if (i < num_unknown_labels - 1) { + //free(unknown_labels[i].name); + unknown_labels[i].address = unknown_labels[num_unknown_labels - 1].address; + strcpy(unknown_labels[i].name, unknown_labels[num_unknown_labels - 1].name); + //unknown_labels[i].name = unknown_labels[num_unknown_labels - 1].name; + } + num_unknown_labels--; + } + else { + i++; + } + } + //num_known_labels++; +} + +static void generate_anonymous_labelname(char* dest) { + int_to_string(num_anonymous_labels++, dest); +} + +/****************************************************************************************/ +/* STRING MANAGEMENT */ +/****************************************************************************************/ +static const bool same_string(const unsigned char* a, const unsigned char*b) { + for (int i = 0; i <= a[0]; i++) if (a[i] != b[i]) return false; + return true; +} +static void copy_string(unsigned char* dest, const unsigned char* src) { + memcpy(dest, src, src[0] + 1); + //for (int i = 0; i <= src[0]; i++) dest[i] = src[i]; +} +static void to_basic_string(unsigned char* dest, const char* src) { + int len = strlen(src); + *dest = len; dest++; + memcpy(dest, src, len); +} + +static int register_array(const unsigned char* string) { + for (int i = 0; i < num_strings; i++) { + if (same_string(strings[i].string, string)) { strings[i].references[strings[i].num_references++] = get_current_address(); return 0; } + } + copy_string(strings[num_strings].string, string); + strings[num_strings++].references[0] = get_current_address(); + return 0; +} + +static int register_string(const char* string) { + unsigned char new_str[255]; to_basic_string(new_str, string); + for (int i = 0; i < num_strings; i++) { + if (same_string(strings[i].string, new_str)) { strings[i].references[strings[i].num_references++] = get_current_address(); return 0; } + } + copy_string(strings[num_strings].string, new_str); + strings[num_strings++].references[0] = get_current_address(); + return 0; +} + +static void append_strings() { + code[0] = codepos & 255; + code[1] = codepos >> 8; + emmit(num_strings); + for (int i = 0; i < num_strings; i++) { + for (int j = 0; j < strings[i].num_references; j++) { patch(strings[i].references[j], get_current_address()); } + //char len = strings[i].string[0]; + //emmit(len); + for (int j = 0; j <= strings[i].string[0]; j++) { emmit(strings[i].string[j]); } + } +} + +/****************************************************************************************/ +/* PARSER */ +/****************************************************************************************/ + +static void parse_expression(); +static void parse_statements(); + +/* [RZC 27/04/2021] No usat en JailScript +//static void parse_strleft(); +static void parse_str(); +static void parse_chr(); + +static void parse_strlen(); +static void parse_keypressed(); +static void parse_anykey(); +static void parse_getchar(); +static void parse_getcolor(); +*/ + +#define EXPECT(X, Y) if (tkn_get_token() != X) { parser_finished = true; error_raise(Y); return; } + +static void parse_concat_atom() { + if (tkn_get_token() == TOKEN_STRING) { + emmit(OP_SETX); + emmit_w(register_string(tkn_get_string())); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_load")); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_HEXSTRING) { + emmit(OP_SETX); + emmit_w(register_array(tkn_get_array())); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_load")); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_IDENTIFIER) { + int ivar = get_variable_index(tkn_get_string()); + if (!is_variable_array()) { parser_finished = true; error_raise("Expected string constant or variable"); return; } + emmit(OP_SETX); + emmit_w(ivar); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_load")); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_STRLEFT) { tkn_next(); parse_strleft(); return; } + if (tkn_get_token() == TOKEN_STR) { tkn_next(); parse_str(); return; } + if (tkn_get_token() == TOKEN_CHR) { tkn_next(); parse_chr(); return; } + parser_finished = true; error_raise("Syntax error"); + return; +} + +static void parse_concatenation() { + parse_concat_atom(); + while (tkn_get_token() == TOKEN_COMMA) { + tkn_next(); parse_concat_atom(); + emmit(OP_CONCAT); + } +} + +static void parse_expr_atom() { + // NUM, VAR, UNARY, PAREN + if (tkn_get_token() == TOKEN_NUMBER) { + emmit(OP_PUSH); + emmit_f(tkn_get_value()); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_IDENTIFIER) { + // [RZC 27/04/2021] Per ara llevem les constants + /*int constvalue = get_constant(tkn_get_string()); + if (constvalue != -1) { + emmit(OP_PUSH); + emmit(constvalue); + tkn_next(); + return; + }*/ + + // [RZC 27/04/2021] Per ara llevem les variables + /*int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + emmit(OP_LOADI); + emmit_w(ivar); + } else { + emmit(OP_LOAD); + emmit_w(ivar); + }*/ + // [RZC 27/04/2021] Atenció! Ara les variables han de estar declarades abans, pel que pot haver error ací si no s'ha trobat. + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_MINUS) { + tkn_next(); + parse_expr_atom(); + emmit(OP_NEG); + return; + } + if (tkn_get_token() == TOKEN_NOT) { + tkn_next(); + parse_expr_atom(); + emmit(OP_NOT); + return; + } + if (tkn_get_token() == TOKEN_LPAR) { + tkn_next(); + parse_expression(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + tkn_next(); + return; + } + // [RZC 27/04/2021] No usat en JailScript, pero hi haurà que afegir les funcions + /* + if (tkn_get_token() == TOKEN_STRLEN) { tkn_next(); parse_strlen(); return; } + if (tkn_get_token() == TOKEN_KEYPRESSED) { tkn_next(); parse_keypressed(); return; } + if (tkn_get_token() == TOKEN_ANYKEY) { tkn_next(); parse_anykey(); return; } + if (tkn_get_token() == TOKEN_GETCHAR) { tkn_next(); parse_getchar(); return; } + if (tkn_get_token() == TOKEN_GETCOLOR) { tkn_next(); parse_getcolor(); return; } + */ + parser_finished = true; error_raise("Syntax error"); + return; +} + +static void parse_expr_mul() { + parse_expr_atom(); + while (tkn_get_token() == TOKEN_ASTERISC || tkn_get_token() == TOKEN_SLASH || tkn_get_token() == TOKEN_MOD) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_atom(); + switch (operat) { + case TOKEN_ASTERISC: emmit(OP_MUL); break; + case TOKEN_SLASH: emmit(OP_DIV); break; + case TOKEN_MOD: emmit(OP_MOD); break; + default: /* Impossible */ break; + } + } +} +static void parse_expr_sum() { + parse_expr_mul(); + while (tkn_get_token() == TOKEN_PLUS || tkn_get_token() == TOKEN_MINUS) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_mul(); + switch (operat) { + case TOKEN_PLUS: emmit(OP_ADD); break; + case TOKEN_MINUS: emmit(OP_SUB); break; + default: /* Impossible */ break; + } + } +} +static void parse_expr_bool() { + parse_expr_sum(); + while (tkn_get_token() == TOKEN_AND || tkn_get_token() == TOKEN_OR) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_sum(); + switch (operat) { + case TOKEN_AND: emmit(OP_AND); break; + case TOKEN_OR: emmit(OP_OR); break; + default: /* Impossible */ break; + } + } +} +static void parse_expression() { + parse_expr_bool(); + while (tkn_get_token() == TOKEN_EQ || tkn_get_token() == TOKEN_NEQ || tkn_get_token() == TOKEN_LT || tkn_get_token() == TOKEN_GT || tkn_get_token() == TOKEN_LEQ || tkn_get_token() == TOKEN_GEQ) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_bool(); + switch (operat) { + case TOKEN_EQ: emmit(OP_EQ); break; + case TOKEN_NEQ: emmit(OP_NEQ); break; + case TOKEN_LT: emmit(OP_LT); break; + case TOKEN_GT: emmit(OP_GT); break; + case TOKEN_LEQ: emmit(OP_LEQ); break; + case TOKEN_GEQ: emmit(OP_GEQ); break; + default: /* Impossible */ break; + } + } +} + +static void parse_const() { + EXPECT(TOKEN_IDENTIFIER, "Expected identifier"); + char const_name[40]; + strcpy(const_name, tkn_get_string()); + tkn_next(); EXPECT(TOKEN_AS, "Expected 'as'"); + tkn_next(); EXPECT(TOKEN_IDENTIFIER, "Expected type identifier"); + char type_name[40]; + strcpy(type_name, tkn_get_string()); + tkn_next(); EXPECT(TOKEN_EQ, "Expected '='"); + tkn_next(); EXPECT(TOKEN_NUMBER, "Expected integer constant"); + int constvalue = tkn_get_value(); + parser_register_constant(constname, constvalue); + tkn_next(); +} + +static void parse_dim() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + char varname[40]; + strcpy(varname, tkn_get_string()); + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array size"); + tkn_next(); EXPECT(TOKEN_NUMBER, "Expected integer constant as array size"); + get_variable_index(varname, tkn_get_value()); // register variable with size, no need to keep ival + tkn_next(); EXPECT(TOKEN_RPAR, "Expected ')'"); + tkn_next(); +} +// [RZC 27/04/2021] Desactivat INC i DEC per ara +/*static void parse_inc() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + emmit(OP_LOADI); + emmit_w(ivar); + emmit(OP_INC); + emmit(OP_STOREI); + emmit_w(ivar); + } else { + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_INC); + emmit(OP_STORE); + emmit_w(ivar); + } + tkn_next(); +} + +static void parse_dec() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + emmit(OP_LOADI); + emmit_w(ivar); + emmit(OP_DEC); + emmit(OP_STOREI); + emmit_w(ivar); + } + else { + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_DEC); + emmit(OP_STORE); + emmit_w(ivar); + } + tkn_next(); +}*/ + +// [RZC 27/04/2021] Desactivat, no se molt be ni què es +/*static void parse_let() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); + if (tkn_get_token() == TOKEN_COLON) { + tkn_next(); parse_concatenation(); + emmit(OP_SETX); + emmit_w(ivar); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_store")); + return; + } else { + EXPECT(TOKEN_LPAR, "Expected ':' or '('"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + } + } + tkn_next(); + EXPECT(TOKEN_EQ, "Expected '='"); + tkn_next(); parse_expression(); + if (is_variable_array()) { + emmit(OP_STOREI); + emmit_w(ivar); + } else { + emmit(OP_STORE); + emmit_w(ivar); + } +}*/ + +static void parse_if() { + char* previous_breaklabel = breaklabel; + parse_expression(); + EXPECT(TOKEN_THEN, "Expected 'THEN'"); + tkn_next(); + emmit(OP_JNT); + char elselabel[8]; + generate_anonymous_labelname(elselabel); + char endlabel[8]; + generate_anonymous_labelname(endlabel); + breaklabel = endlabel; + bool else_visited = false; + emmit_w(get_label_address(elselabel)); + parse_statements(); + if (tkn_get_token() == TOKEN_ELSE) { + else_visited = true; + emmit(OP_JMP); + emmit_w(get_label_address(endlabel)); + register_label_address(elselabel); + tkn_next(); + parse_statements(); + } + if (tkn_get_token() == TOKEN_END) { + if (!else_visited) register_label_address(elselabel); + register_label_address(endlabel); + tkn_next(); + breaklabel = previous_breaklabel; + return; + } + parser_finished = true; + if (!else_visited) error_raise("Expected 'ELSE', 'END' or a statement"); else error_raise("Expected 'END' or a statement"); +} + +static void parse_for() { + char* previous_breaklabel = breaklabel; + char* previous_contlabel = contlabel; + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + tkn_next(); + EXPECT(TOKEN_EQ, "Expected '='"); + tkn_next(); + parse_expression(); + emmit(OP_STORE); + emmit_w(ivar); + char forlabel[8]; + generate_anonymous_labelname(forlabel); + char endlabel[8]; + generate_anonymous_labelname(endlabel); + char continuelabel[8]; + generate_anonymous_labelname(continuelabel); + breaklabel = endlabel; + contlabel = continuelabel; + register_label_address(forlabel); + EXPECT(TOKEN_TO, "Expected 'TO'"); + tkn_next(); + parse_expression(); + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_EQ); + emmit(OP_JTR); + emmit_w(get_label_address(endlabel)); + //int endLabel = GetCurrentAddress(); + parse_statements(); + EXPECT(TOKEN_END, "Expected 'END'"); + tkn_next(); + register_label_address(continuelabel); + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_INC); + emmit(OP_STORE); + emmit_w(ivar); + emmit(OP_JMP); + emmit_w(get_label_address(forlabel)); + register_label_address(endlabel); + //Patch(endLabel, GetCurrentAddress()); + breaklabel = previous_breaklabel; + contlabel = previous_contlabel; +} + +static void parse_break() { + if (breaklabel == nullptr) { parser_finished = true; error_raise("Can't break outside of a loop or condition"); return; } + emmit(OP_JMP); + emmit_w(get_label_address(breaklabel)); +} + +static void parse_continue() { + if (contlabel == nullptr) { parser_finished = true; error_raise("Can't continue outside of a loop"); return; } + emmit(OP_JMP); + emmit_w(get_label_address(contlabel)); +} + +// [RZC 27/04/2021] Ni GOTO ni GOSUB +/*static void parse_goto() { + emmit(OP_JMP); + EXPECT(TOKEN_IDENTIFIER, "Expected label name"); + emmit_w(get_label_address(tkn_get_string())); + tkn_next(); +} + +static void parse_gosub() { + emmit(OP_JSR); + EXPECT(TOKEN_IDENTIFIER, "Expected label name"); + emmit_w(get_label_address(tkn_get_string())); + tkn_next(); +}*/ + +static void parse_return() { + // [RZC 27/04/2021] [TODO] Falta afegir paràmetre de retorn + emmit(OP_RET); +} + +// [RZC 27/04/2021] No hi ha labels +/*static void parse_label() { + register_label_address(tkn_get_string()); + tkn_next(); +}*/ + +static void parse_call() { + for (int i = 0; i < num_external_functions; i++) { + if (strcmp(external_functions[i].name, tkn_get_string()) == 0) { + if (external_functions[i].num_parameters == -1) { + tkn_next(); + EXPECT(TOKEN_STRING, "Expected string"); + emmit(OP_PUSH); + emmit(register_string(tkn_get_string())); + } else { + tkn_next(); + for (int j = 0; j < external_functions[i].num_parameters; j++) { + //if (j != 0) EXPECT(TOKEN_COMMA, "Expected comma"); + parse_expression(); + } + } + emmit(OP_CALL); + emmit(i); + } + } +} + +static void parse_statements() { + while (!parser_finished) { + current_line = tkn_get_line(); + switch (tkn_get_token()) { + case TOKEN_CONST: + tkn_next(); parse_const(); break; + case TOKEN_DIM: + tkn_next(); parse_dim(); break; + case TOKEN_LET: + tkn_next(); parse_let(); break; + case TOKEN_INC: + tkn_next(); parse_inc(); break; + case TOKEN_DEC: + tkn_next(); parse_dec(); break; + case TOKEN_IF: + tkn_next(); parse_if(); break; + case TOKEN_FOR: + tkn_next(); parse_for(); break; + /*case TOKEN_BREAK: + tkn_next(); parse_break(); break; + case TOKEN_CONTINUE: + tkn_next(); parse_continue(); break;*/ + case TOKEN_RETURN: + tkn_next(); parse_return(); break; + case TOKEN_REM: + tkn_next(); break; + + case TOKEN_IDENTIFIER: + for (int i = 0; i < num_external_functions; i++) { + if (strcmp(external_functions[i].name, tkn_get_string()) == 0) { parse_call(); break; } + } + parse_let(); break; + default: + return; + } + } +} + +static void parse_global_statements() { + while (!parser_finished) { + current_line = tkn_get_line(); + switch (tkn_get_token()) { + case TOKEN_CONST: + tkn_next(); parse_const(); break; + case TOKEN_VAR: + tkn_next(); parse_var(); break; + case TOKEN_STRUCT: + tkn_next(); parse_struct(); break; + case TOKEN_FUNCTION: + tkn_next(); parse_function(); break; + case TOKEN_REM: + tkn_next(); break; + default: + return; + } + } +} + +// [RZC 27/04/2021] No usat en JailScript +/*static void include_labels() { + FILE *f = fopen("rom.lbl", "rb"); + byte num_labels; + fread(&num_labels, 1, 1, f); + for (int i = 0; i < num_labels; i++) { + word address; fread(&address, 2, 1, f); + byte len; fread(&len, 1, 1, f); + char label[50]; + fread(label, len, 1, f); + label[len] = 0; + strcpy(known_labels[num_known_labels].name, label); + known_labels[num_known_labels++].address = address; + } + fclose(f); + +}*/ + +/****************************************************************************************/ +/* INTERFACE */ +/****************************************************************************************/ + +void parser_parse(const char* buffer, byte* mem) { + + tkn_init(buffer); + parser_finished = false; + //include_labels(); + code = mem; + codepos = 2; // [RZC 27/04/2021] [TODO] Perquè 2? + tkn_next(); + + parse_global_statements(); + append_strings(); // [RZC 27/04/2021] [TODO] Encara vull açò? si el bytecode no ha de ser independent, no veig que faça falta + + if (error_raised()) { // [RZC 27/04/2021] Res, per ara que printf el error i pete + error_print(NULL); + //codepos = 0xA000; + //emmit(OP_JMP); + //emmit_w(0xA000); + } + + //FILE *f = fopen("test.bin", "wb"); + //fwrite(mem, codepos, 1, f); + //fclose(f); + + //return code; +} + +const int parser_get_codesize() { return codepos; } +const int parser_get_memory_usage() { return num_variables; } +unsigned short* parser_get_lines() { return lines; } + +void parser_register_external_function(const char* name, const char* parameters, void (*fun)(void)) { + strcpy(external_functions[num_external_functions].name, name); + external_functions[num_external_functions++].num_parameters = num_parameters; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..969b872 --- /dev/null +++ b/parser.h @@ -0,0 +1,85 @@ +#pragma once + +enum OPS { + OP_NOP = 0, + OP_PUSH, + OP_POP, + OP_DUP, + OP_SWAP, + + OP_LOAD, + //OP_LOADI, + OP_STORE, + //OP_STOREI, +/* + OP_LOADXY, + OP_STOREXY, + OP_SETX, + OP_SETY, + OP_SETZ, + OP_GETY, + OP_GETZ, + OP_INCX, + OP_DECX, + OP_INCY, + OP_DECY, + OP_INCZ, + OP_DECZ, +*/ + OP_JMP, + OP_JNT, + OP_JTR, + //OP_JSR, + OP_RET, + OP_CALL, + OP_CALLEX, // AFEGIT PER A JAILSCRIPT +/* + OP_RJ, + OP_RB, + OP_RJZ, + OP_RJN, + OP_RBZ, + OP_RBN, + + OP_RJYZ, + OP_RJYN, + OP_RBYZ, + OP_RBYN, + OP_RJZZ, + OP_RJZN, + OP_RBZZ, + OP_RBZN, +*/ + OP_ADD, + OP_SUB, + OP_MUL, + OP_DIV, + OP_MOD, + OP_AND, + OP_OR, + OP_NOT, + OP_NEG, + OP_INC, + OP_DEC, + OP_CONCAT, + + OP_EQ, + OP_NEQ, + OP_LT, + OP_GT, + OP_LEQ, + OP_GEQ, + + //OP_IN, + //OP_OUT, + OP_SLEEP, +}; + +void parser_parse(const char* buffer, unsigned char* mem); +const int parser_get_codesize(); +const int parser_get_memory_usage(); +unsigned short* parser_get_lines(); + +void parser_register_external_function(const char* name, const char* parameters, void (*fun)(void)); + +void parser_register_constant(const char* name, const unsigned char value); \ No newline at end of file diff --git a/scope.cpp b/scope.cpp new file mode 100644 index 0000000..2991042 --- /dev/null +++ b/scope.cpp @@ -0,0 +1,50 @@ +#include "scope.h" +#include +#include "heap.h" + +struct t_variable { + //char name[MAX_IDENTIFIER_LENGTH]; + std::string name; + int type; + int length; + int address; +}; + +struct t_struct { + //char name[MAX_IDENTIFIER_LENGTH]; + std::string name; + std::vector members; +}; + +struct t_scope { + std::vector variables; + t_scope* parent_scope; +}; + +static t_scope scope_global; +static t_scope* scope_current = &scope_global; + +void scope_init() { + scope_current = &scope_global; + scope_global.variables.clear(); +} + +void scope_open_local() { + t_scope* new_scope = new t_scope; + new_scope->parent_scope = scope_current; + scope_current = new_scope; +} + +void scope_close_local() { + t_scope* closing_scope = scope_current; + scope_current = scope_current->parent_scope; + delete closing_scope; +} + +const int scope_declare_variable(const std::string name, const int type, const int length) { + for (t_variable v : scope_current->variables) { if (v.name == name) return -1; } + const int address = heap_get_address(); + const t_variable var {name, type, length, address}; + scope_current->variables.push_back(var); + return address; +} diff --git a/scope.h b/scope.h new file mode 100644 index 0000000..b634e16 --- /dev/null +++ b/scope.h @@ -0,0 +1,8 @@ +#pragma once +#include + +void scope_init(); +void scope_open_local(); +void scope_close_local(); +const int scope_declare_variable(const std::string name, const int type, const int length); +//scope_get_variable... \ No newline at end of file diff --git a/tokenizer.cpp b/tokenizer.cpp new file mode 100644 index 0000000..f2a12a8 --- /dev/null +++ b/tokenizer.cpp @@ -0,0 +1,161 @@ +#include "tokenizer.h" +#include "error.h" + +static bool streq(const char* a, const char* b) { + while (*a != 0 && *b != 0) { + if (*a != *b) return false; + a++; b++; + } + return *a == *b; +} + +static const char* ptr; +static t_tokentype current_token = TOKEN_ERROR; +static char identifier[255]; +static unsigned char array[255]; +static float value; +static int line = 0; +static int row = 0; +static int current_tokenline = 0; +static int current_tokenrow = 0; + +struct t_token_op { + const char* str; + t_tokentype tokentype; +}; + +t_token_op tkn_tokens[] = { + { "struct", TOKEN_STRUCT }, + { "as", TOKEN_AS }, + { "end", TOKEN_END }, + { "function", TOKEN_FUNCTION }, + { "var", TOKEN_VAR }, + { "array", TOKEN_ARRAY }, + { "of", TOKEN_OF }, + { "const", TOKEN_CONST }, + { "if", TOKEN_IF }, + { "then", TOKEN_THEN }, + { "else", TOKEN_ELSE }, + { "while", TOKEN_WHILE }, + { "do", TOKEN_DO }, + { "repeat", TOKEN_REPEAT }, + { "until", TOKEN_UNTIL }, + { "for", TOKEN_FOR }, + { "to", TOKEN_TO }, + { "step", TOKEN_STEP }, + { "return", TOKEN_RETURN }, + { "rem", TOKEN_REM }, + { "and", TOKEN_AND }, + { "or", TOKEN_OR }, + { "not", TOKEN_NOT }, + { "mod", TOKEN_MOD } +}; + +//static char hex_digit(const char digit) { return digit - (digit <= 57 ? 48 : 55); } + +#define CCHR *ptr +#define NEXT if(*ptr==10){line++;row=0;}else{row++;};ptr++ + +static void tkn_do_next() { + identifier[0] = 0; + value = 0; + char* id = identifier; + // Ignore whitespace + while (CCHR != 0 && CCHR != 10 && CCHR <= 32) { NEXT; } + // If zero, we are at the end of the file + current_tokenline = line; current_tokenrow = row; + if (CCHR == 0) { current_token = TOKEN_ENDFILE; return; } + // if 13, we are at the end of the line + if (CCHR == 10) { NEXT; current_token = TOKEN_ENDLINE; return; } + // if 34, grab the string + if (CCHR == 34) { + NEXT; + while (CCHR != 0 && CCHR != 34) { *id = CCHR; id++; NEXT; } + if (CCHR == 0) { current_token = TOKEN_ERROR; error_raise("Unexpected end of file."); return; } + *id = 0; NEXT; + current_token = TOKEN_STRING; return; + } + // If digit, grab the number + if (CCHR >= 48 && CCHR <= 57) { + do { value = value * 10 + (CCHR - 48); NEXT; } while (CCHR >= 48 && CCHR <= 57); + if (CCHR == '.') { + int d = 10; + do { value = value + ((CCHR - 48) / d); d *= 10; NEXT; } while (CCHR >= 48 && CCHR <= 57); + } + current_token = TOKEN_NUMBER; return; + } + // if letter, grab the identifier + if ((CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122)) { + do { *id = CCHR; id++; NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122) || CCHR == 95); + *id = 0; + int i = 0; while (identifier[i] != 0) { if (identifier[i] < 95) { identifier[i] |= 32; }; i++; } // to lowecase + // If it matches a keyword, it's a keyword + for (auto token : tkn_tokens) { + if (streq(token.str, identifier)) { + current_token = token.tokentype; + if (current_token == TOKEN_REM) { while (CCHR != 0 && CCHR != 13) { NEXT; } } + return; + } + } + // else, it's an identifier + current_token = TOKEN_IDENTIFIER; return; + } + // Check if it's any of the operators + if (CCHR == '+') { NEXT; current_token = TOKEN_PLUS; return; } + if (CCHR == '-') { NEXT; current_token = TOKEN_MINUS; return; } + if (CCHR == '*') { NEXT; current_token = TOKEN_ASTERISC; return; } + if (CCHR == '/') { NEXT; current_token = TOKEN_SLASH; return; } + if (CCHR == '.') { NEXT; current_token = TOKEN_DOT; return; } + if (CCHR == '(') { NEXT; current_token = TOKEN_LPAR; return; } + if (CCHR == ')') { NEXT; current_token = TOKEN_RPAR; return; } + if (CCHR == '[') { NEXT; current_token = TOKEN_LBRACKET; return; } + if (CCHR == ']') { NEXT; current_token = TOKEN_RBRACKET; return; } + if (CCHR == ',') { NEXT; current_token = TOKEN_COMMA; return; } + if (CCHR == '<') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_LEQ; } else { if (CCHR == '>') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_LT; } } return; } + if (CCHR == '>') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_GEQ; } else { current_token = TOKEN_GT; } return; } + if (CCHR == '=') { NEXT; current_token = TOKEN_EQ; return; } + if (CCHR == '!') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_NOT; } return; } + if (CCHR == '%') { NEXT; current_token = TOKEN_MOD; return; } + if (CCHR == 39) { NEXT; current_token = TOKEN_REM; while (CCHR != 0 && CCHR != 10) { NEXT; }; return; } + + // Per a agafar numeros en hexadecimal o binari, heretat de PaCO, no se si li fa falta a JailScript + /*if (CCHR == '&') { + NEXT; + if ((CCHR | 32) == 'h') { + NEXT; + if ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)) { + do { value = (value << 4) + hex_digit(CCHR); NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)); + current_token = TOKEN_NUMBER; return; + } + } + if ((CCHR | 32) == 'b') { + NEXT; + if (CCHR == 48 || CCHR == 49) { + do { value = (value << 1) + (CCHR-48); NEXT; } while (CCHR == 48 || CCHR == 49); + current_token = TOKEN_NUMBER; return; + } + } + current_token = TOKEN_ERROR; + error_raise("Syntax error"); + return; + }*/ + + // if no match, it's an error + current_token = TOKEN_ERROR; return; +} + +void tkn_init(const char* buffer) { + ptr = buffer; + current_token = TOKEN_ERROR; + value = line = row = current_tokenline = current_tokenrow = 0; + identifier[0] = 0; array[0] = 0; +} + +void tkn_next() { do { tkn_do_next(); } while (current_token == TOKEN_REM || current_token == TOKEN_ENDLINE); } + +t_tokentype tkn_get_token() { return current_token; } +char* tkn_get_string() { return identifier; } +unsigned char* tkn_get_array() { return array; } +float tkn_get_value() { return value; } +int tkn_get_line() { return current_tokenline; } +int tkn_get_row() { return current_tokenrow; } diff --git a/tokenizer.h b/tokenizer.h new file mode 100644 index 0000000..5d2e5ee --- /dev/null +++ b/tokenizer.h @@ -0,0 +1,65 @@ +#pragma once + +typedef unsigned char byte; +typedef unsigned short word; + +enum t_tokentype { + TOKEN_ERROR, + TOKEN_NUMBER, + TOKEN_STRING, + + TOKEN_IDENTIFIER, + + TOKEN_STRUCT, + TOKEN_AS, + TOKEN_END, + TOKEN_FUNCTION, + TOKEN_VAR, + TOKEN_ARRAY, + TOKEN_OF, + TOKEN_CONST, + TOKEN_IF, + TOKEN_THEN, + TOKEN_ELSE, + TOKEN_WHILE, + TOKEN_DO, + TOKEN_REPEAT, + TOKEN_UNTIL, + TOKEN_FOR, + TOKEN_TO, + TOKEN_STEP, + TOKEN_RETURN, + TOKEN_REM, + + TOKEN_AND, + TOKEN_OR, + TOKEN_NOT, + TOKEN_MOD, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_ASTERISC, + TOKEN_SLASH, + TOKEN_DOT, + TOKEN_LPAR, + TOKEN_RPAR, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_COMMA, + TOKEN_LT, + TOKEN_GT, + TOKEN_LEQ, + TOKEN_GEQ, + TOKEN_EQ, + TOKEN_NEQ, + TOKEN_ENDLINE, + TOKEN_ENDFILE +}; + +void tkn_init(const char* buffer); +void tkn_next(); +t_tokentype tkn_get_token(); +char* tkn_get_string(); +unsigned char* tkn_get_array(); +float tkn_get_value(); +int tkn_get_line(); +int tkn_get_row();