From f1f590735b0aa69ba7e0a438bdedec0cc4a648c7 Mon Sep 17 00:00:00 2001 From: Raimon Zamora Date: Tue, 27 Apr 2021 19:02:20 +0200 Subject: [PATCH] First commit, porting from PaCO --- .gitignore | 2 + error.cpp | 22 ++ error.h | 5 + heap.cpp | 48 +++ heap.h | 11 + parser.cpp | 826 ++++++++++++++++++++++++++++++++++++++++++++++++++ parser.h | 85 ++++++ scope.cpp | 50 +++ scope.h | 8 + tokenizer.cpp | 161 ++++++++++ tokenizer.h | 65 ++++ 11 files changed, 1283 insertions(+) create mode 100644 .gitignore create mode 100644 error.cpp create mode 100644 error.h create mode 100644 heap.cpp create mode 100644 heap.h create mode 100644 parser.cpp create mode 100644 parser.h create mode 100644 scope.cpp create mode 100644 scope.h create mode 100644 tokenizer.cpp create mode 100644 tokenizer.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b15602d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +.vscode diff --git a/error.cpp b/error.cpp new file mode 100644 index 0000000..13ae601 --- /dev/null +++ b/error.cpp @@ -0,0 +1,22 @@ +#include "error.h" +#include +#include "tokenizer.h" + +static char errormsg[255]; +static bool raised = false; + + +void error_raise(const char* msg) { + if (!raised) { + raised = true; + sprintf(errormsg, "ERROR AT SOURCE:%s at %d:%d.", msg, tkn_get_line() + 1, tkn_get_row() + 1); + } +} + +bool error_raised() { return raised; } + +void error_print(unsigned char* mem) { + //char* msg = errormsg; + //while (*msg != 0) *mem++ = *msg++; + printf("%s", errormsg); +} diff --git a/error.h b/error.h new file mode 100644 index 0000000..b753dfe --- /dev/null +++ b/error.h @@ -0,0 +1,5 @@ +#pragma once + +void error_raise(const char* msg); +bool error_raised(); +void error_print(unsigned char* mem); diff --git a/heap.cpp b/heap.cpp new file mode 100644 index 0000000..8ac1b6f --- /dev/null +++ b/heap.cpp @@ -0,0 +1,48 @@ +#include "heap.h" +#include + +static int heap_size = 0; +static int heap_address = 0; +static char* heap_mem = NULL; + +void heap_init() { + heap_size = 0; + heap_address = 0; + if (heap_mem != NULL) { free(heap_mem); heap_mem = NULL; } +} + +const int heap_get_address() { + return heap_address; +} + +void heap_reserve(const int size) { + heap_size += size; + heap_address += size; +} + +const int heap_get_size() { + return heap_size; +} + +void heap_alloc() { + if (heap_mem == NULL) heap_mem = (char*)malloc(heap_size); +} + +const float heap_get_float(const int address) { + return (float)heap_mem[address]; +} + +const char* heap_get_string(const int address) { + return &heap_mem[address]; +} + +void heap_set_float(const int address, const float value) { + *(float*)&heap_mem[address] = value; + //float *f = (float*)&heap_mem[address]; + //*f = value; +} + +void heap_set_string(const int address, const char* value) { + const int size = *value; + for (int i = 0; i <= size; i++) { heap_mem[address+i] = value[i]; } +} diff --git a/heap.h b/heap.h new file mode 100644 index 0000000..48db50e --- /dev/null +++ b/heap.h @@ -0,0 +1,11 @@ +#pragma once + +void heap_init(); +const int heap_get_address(); +void heap_reserve(const int size); +const int heap_get_size(); +void heap_alloc(); +const float heap_get_float(const int address); +const char* heap_get_string(const int address); +void heap_set_float(const int address, const float value); +void heap_set_string(const int address, const char* value); diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..d962963 --- /dev/null +++ b/parser.cpp @@ -0,0 +1,826 @@ +#include "parser.h" +#include "tokenizer.h" +#include "error.h" +#include +#include +#include + +#define MAX_CODE_SIZE 65536 +#define MAX_LABELS 256 +#define MAX_FUNCTIONS 256 +#define MAX_EXTERNAL_FUNCTIONS 256 +#define MAX_IDENTIFIER_LENGTH 40 +#define MAX_CONSTANTS 256 +#define MAX_STRUCT_MEMBERS 10 + +struct t_variable { + char name[MAX_IDENTIFIER_LENGTH]; + int type; + int length; + int address; +}; + +struct t_struct { + char name[MAX_IDENTIFIER_LENGTH]; + int num_members; + t_variable members[MAX_STRUCT_MEMBERS]; +}; + +struct t_scope { + +}; + +struct t_label { + char name[MAX_IDENTIFIER_LENGTH]; + word address; +}; + +/*struct t_variable { + char name[MAX_IDENTIFIER_LENGTH]; + int index = 0; + int size = 1; + t_variable* next{ nullptr }; +}; + +struct t_external_function { + char name[MAX_IDENTIFIER_LENGTH]; + int num_parameters = 0; +}; + +struct t_string { + unsigned char string[256]; + word references[256]; + int num_references = 1; +}; + +struct t_constant { + char name[MAX_IDENTIFIER_LENGTH]; + byte value; +};*/ + +static byte* code; // [MAX_CODE_SIZE]; +static word codepos = 0; +static word lines[32768]; +static word current_line; + +//static t_label known_labels[MAX_LABELS]; +//static int num_known_labels = 0; +static t_label unknown_labels[MAX_LABELS]; +static int num_unknown_labels = 0; +static int num_anonymous_labels = 0; + +static char* breaklabel = nullptr; +static char* contlabel = nullptr; + +static t_variable* variables = nullptr; +static int num_variables = 0; +static bool variable_is_array = false; + +static t_external_function external_functions[MAX_EXTERNAL_FUNCTIONS]; +static int num_external_functions = 0; + +static t_string strings[256]; +static int num_strings = 0; + +static bool parser_finished = false; + +static t_constant constants[MAX_CONSTANTS]; +static int num_constants; + +/****************************************************************************************/ +/* GENERIC FUNCTIONS */ +/****************************************************************************************/ + +static void int_to_string(int value, char* label) { + for (int i = 0; i < 7; i++) label[i] = '0'; + label[7] = 0; + int i = 6; + while (value > 0) { + label[i] = 48 + (value % 10); + value = value / 10; + i--; + } +} + +/****************************************************************************************/ +/* BYTECODE GENERATOR */ +/****************************************************************************************/ + +static void emmit(const byte value) { + lines[codepos] = current_line; + code[codepos++] = value; +} + +static void emmit_w(const word value) { + lines[codepos] = current_line; + code[codepos++] = value & 255; + lines[codepos] = current_line; + code[codepos++] = value >> 8; +} + +static void emmit_f(const float value) { + byte *v = (byte*)&value; + lines[codepos] = current_line; code[codepos++] = *(v++); + lines[codepos] = current_line; code[codepos++] = *(v++); + lines[codepos] = current_line; code[codepos++] = *(v++); + lines[codepos] = current_line; code[codepos++] = *(v++); +} + +static void patch(const word address, const word dest) { + code[address] = dest & 255; + code[address + 1] = dest >> 8; +} + +static const word get_current_address() { + return codepos; +} + +/****************************************************************************************/ +/* VARIABLE MANAGEMENT */ +/****************************************************************************************/ + +static int get_variable_index(const char* string, const int array_size = 1) { + t_variable* variable = variables; + t_variable* last = variables; + while (variable != nullptr) { + if (strcmp(variable->name, string) == 0) { variable_is_array = (variable->size > 1); return variable->index + 0xC000; } + last = variable; + variable = variable->next; + } + t_variable* newvar = (t_variable*)malloc(sizeof(t_variable)); + //newvar->name = (char*)malloc(strlen(string) + 1); + strcpy(newvar->name, string); + newvar->index = num_variables; + num_variables += array_size; + newvar->size = array_size; + variable_is_array = (newvar->size > 1); + newvar->next = nullptr; + if (last != nullptr) { + last->next = newvar; + } + else { + variables = newvar; + } + return newvar->index + 0xC000; +} + +static bool is_variable_array() { return variable_is_array; } + +/****************************************************************************************/ +/* CONSTANT MANAGEMENT */ +/****************************************************************************************/ +void parser_register_constant(const char* name, const unsigned char value) { + strcpy(constants[num_constants].name, name); + constants[num_constants++].value = value; +} + +static const int get_constant(const char* name) { + for (int i = 0; i < num_constants; i++) { + if (strcmp(constants[i].name, name) == 0) + return constants[i].value; + } + return -1; +} + +/****************************************************************************************/ +/* LABEL MANAGEMENT */ +/****************************************************************************************/ + +static const word get_label_address(const char* string) { + for (int i = 0; i < num_known_labels; i++) { if (strcmp(known_labels[i].name, string) == 0) return known_labels[i].address; } + //unknown_labels[num_unknown_labels].name = (char*)malloc(strlen(string) + 1); + strcpy(unknown_labels[num_unknown_labels].name, string); + unknown_labels[num_unknown_labels].address = get_current_address(); + num_unknown_labels++; + return 0; +} + +static void register_label_address(const char* string) { + // If the label already exists, vomit an error + for (int i = 0; i < num_known_labels; i++) { + if (strcmp(known_labels[i].name, string) == 0) { + parser_finished = true; + error_raise("Duplicate label"); + return; + } + } + // register new label + //known_labels[num_known_labels].name = (char*)malloc(strlen(string) + 1); + strcpy(known_labels[num_known_labels].name, string); + known_labels[num_known_labels++].address = get_current_address(); + // patch and remove any references awaiting for this label + int i = 0; + while (i < num_unknown_labels) { + if (strcmp(unknown_labels[i].name, string) == 0) { + patch(unknown_labels[i].address, get_current_address()); + if (i < num_unknown_labels - 1) { + //free(unknown_labels[i].name); + unknown_labels[i].address = unknown_labels[num_unknown_labels - 1].address; + strcpy(unknown_labels[i].name, unknown_labels[num_unknown_labels - 1].name); + //unknown_labels[i].name = unknown_labels[num_unknown_labels - 1].name; + } + num_unknown_labels--; + } + else { + i++; + } + } + //num_known_labels++; +} + +static void generate_anonymous_labelname(char* dest) { + int_to_string(num_anonymous_labels++, dest); +} + +/****************************************************************************************/ +/* STRING MANAGEMENT */ +/****************************************************************************************/ +static const bool same_string(const unsigned char* a, const unsigned char*b) { + for (int i = 0; i <= a[0]; i++) if (a[i] != b[i]) return false; + return true; +} +static void copy_string(unsigned char* dest, const unsigned char* src) { + memcpy(dest, src, src[0] + 1); + //for (int i = 0; i <= src[0]; i++) dest[i] = src[i]; +} +static void to_basic_string(unsigned char* dest, const char* src) { + int len = strlen(src); + *dest = len; dest++; + memcpy(dest, src, len); +} + +static int register_array(const unsigned char* string) { + for (int i = 0; i < num_strings; i++) { + if (same_string(strings[i].string, string)) { strings[i].references[strings[i].num_references++] = get_current_address(); return 0; } + } + copy_string(strings[num_strings].string, string); + strings[num_strings++].references[0] = get_current_address(); + return 0; +} + +static int register_string(const char* string) { + unsigned char new_str[255]; to_basic_string(new_str, string); + for (int i = 0; i < num_strings; i++) { + if (same_string(strings[i].string, new_str)) { strings[i].references[strings[i].num_references++] = get_current_address(); return 0; } + } + copy_string(strings[num_strings].string, new_str); + strings[num_strings++].references[0] = get_current_address(); + return 0; +} + +static void append_strings() { + code[0] = codepos & 255; + code[1] = codepos >> 8; + emmit(num_strings); + for (int i = 0; i < num_strings; i++) { + for (int j = 0; j < strings[i].num_references; j++) { patch(strings[i].references[j], get_current_address()); } + //char len = strings[i].string[0]; + //emmit(len); + for (int j = 0; j <= strings[i].string[0]; j++) { emmit(strings[i].string[j]); } + } +} + +/****************************************************************************************/ +/* PARSER */ +/****************************************************************************************/ + +static void parse_expression(); +static void parse_statements(); + +/* [RZC 27/04/2021] No usat en JailScript +//static void parse_strleft(); +static void parse_str(); +static void parse_chr(); + +static void parse_strlen(); +static void parse_keypressed(); +static void parse_anykey(); +static void parse_getchar(); +static void parse_getcolor(); +*/ + +#define EXPECT(X, Y) if (tkn_get_token() != X) { parser_finished = true; error_raise(Y); return; } + +static void parse_concat_atom() { + if (tkn_get_token() == TOKEN_STRING) { + emmit(OP_SETX); + emmit_w(register_string(tkn_get_string())); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_load")); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_HEXSTRING) { + emmit(OP_SETX); + emmit_w(register_array(tkn_get_array())); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_load")); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_IDENTIFIER) { + int ivar = get_variable_index(tkn_get_string()); + if (!is_variable_array()) { parser_finished = true; error_raise("Expected string constant or variable"); return; } + emmit(OP_SETX); + emmit_w(ivar); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_load")); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_STRLEFT) { tkn_next(); parse_strleft(); return; } + if (tkn_get_token() == TOKEN_STR) { tkn_next(); parse_str(); return; } + if (tkn_get_token() == TOKEN_CHR) { tkn_next(); parse_chr(); return; } + parser_finished = true; error_raise("Syntax error"); + return; +} + +static void parse_concatenation() { + parse_concat_atom(); + while (tkn_get_token() == TOKEN_COMMA) { + tkn_next(); parse_concat_atom(); + emmit(OP_CONCAT); + } +} + +static void parse_expr_atom() { + // NUM, VAR, UNARY, PAREN + if (tkn_get_token() == TOKEN_NUMBER) { + emmit(OP_PUSH); + emmit_f(tkn_get_value()); + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_IDENTIFIER) { + // [RZC 27/04/2021] Per ara llevem les constants + /*int constvalue = get_constant(tkn_get_string()); + if (constvalue != -1) { + emmit(OP_PUSH); + emmit(constvalue); + tkn_next(); + return; + }*/ + + // [RZC 27/04/2021] Per ara llevem les variables + /*int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + emmit(OP_LOADI); + emmit_w(ivar); + } else { + emmit(OP_LOAD); + emmit_w(ivar); + }*/ + // [RZC 27/04/2021] Atenció! Ara les variables han de estar declarades abans, pel que pot haver error ací si no s'ha trobat. + tkn_next(); + return; + } + if (tkn_get_token() == TOKEN_MINUS) { + tkn_next(); + parse_expr_atom(); + emmit(OP_NEG); + return; + } + if (tkn_get_token() == TOKEN_NOT) { + tkn_next(); + parse_expr_atom(); + emmit(OP_NOT); + return; + } + if (tkn_get_token() == TOKEN_LPAR) { + tkn_next(); + parse_expression(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + tkn_next(); + return; + } + // [RZC 27/04/2021] No usat en JailScript, pero hi haurà que afegir les funcions + /* + if (tkn_get_token() == TOKEN_STRLEN) { tkn_next(); parse_strlen(); return; } + if (tkn_get_token() == TOKEN_KEYPRESSED) { tkn_next(); parse_keypressed(); return; } + if (tkn_get_token() == TOKEN_ANYKEY) { tkn_next(); parse_anykey(); return; } + if (tkn_get_token() == TOKEN_GETCHAR) { tkn_next(); parse_getchar(); return; } + if (tkn_get_token() == TOKEN_GETCOLOR) { tkn_next(); parse_getcolor(); return; } + */ + parser_finished = true; error_raise("Syntax error"); + return; +} + +static void parse_expr_mul() { + parse_expr_atom(); + while (tkn_get_token() == TOKEN_ASTERISC || tkn_get_token() == TOKEN_SLASH || tkn_get_token() == TOKEN_MOD) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_atom(); + switch (operat) { + case TOKEN_ASTERISC: emmit(OP_MUL); break; + case TOKEN_SLASH: emmit(OP_DIV); break; + case TOKEN_MOD: emmit(OP_MOD); break; + default: /* Impossible */ break; + } + } +} +static void parse_expr_sum() { + parse_expr_mul(); + while (tkn_get_token() == TOKEN_PLUS || tkn_get_token() == TOKEN_MINUS) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_mul(); + switch (operat) { + case TOKEN_PLUS: emmit(OP_ADD); break; + case TOKEN_MINUS: emmit(OP_SUB); break; + default: /* Impossible */ break; + } + } +} +static void parse_expr_bool() { + parse_expr_sum(); + while (tkn_get_token() == TOKEN_AND || tkn_get_token() == TOKEN_OR) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_sum(); + switch (operat) { + case TOKEN_AND: emmit(OP_AND); break; + case TOKEN_OR: emmit(OP_OR); break; + default: /* Impossible */ break; + } + } +} +static void parse_expression() { + parse_expr_bool(); + while (tkn_get_token() == TOKEN_EQ || tkn_get_token() == TOKEN_NEQ || tkn_get_token() == TOKEN_LT || tkn_get_token() == TOKEN_GT || tkn_get_token() == TOKEN_LEQ || tkn_get_token() == TOKEN_GEQ) { + t_tokentype operat = tkn_get_token(); + tkn_next(); + parse_expr_bool(); + switch (operat) { + case TOKEN_EQ: emmit(OP_EQ); break; + case TOKEN_NEQ: emmit(OP_NEQ); break; + case TOKEN_LT: emmit(OP_LT); break; + case TOKEN_GT: emmit(OP_GT); break; + case TOKEN_LEQ: emmit(OP_LEQ); break; + case TOKEN_GEQ: emmit(OP_GEQ); break; + default: /* Impossible */ break; + } + } +} + +static void parse_const() { + EXPECT(TOKEN_IDENTIFIER, "Expected identifier"); + char const_name[40]; + strcpy(const_name, tkn_get_string()); + tkn_next(); EXPECT(TOKEN_AS, "Expected 'as'"); + tkn_next(); EXPECT(TOKEN_IDENTIFIER, "Expected type identifier"); + char type_name[40]; + strcpy(type_name, tkn_get_string()); + tkn_next(); EXPECT(TOKEN_EQ, "Expected '='"); + tkn_next(); EXPECT(TOKEN_NUMBER, "Expected integer constant"); + int constvalue = tkn_get_value(); + parser_register_constant(constname, constvalue); + tkn_next(); +} + +static void parse_dim() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + char varname[40]; + strcpy(varname, tkn_get_string()); + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array size"); + tkn_next(); EXPECT(TOKEN_NUMBER, "Expected integer constant as array size"); + get_variable_index(varname, tkn_get_value()); // register variable with size, no need to keep ival + tkn_next(); EXPECT(TOKEN_RPAR, "Expected ')'"); + tkn_next(); +} +// [RZC 27/04/2021] Desactivat INC i DEC per ara +/*static void parse_inc() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + emmit(OP_LOADI); + emmit_w(ivar); + emmit(OP_INC); + emmit(OP_STOREI); + emmit_w(ivar); + } else { + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_INC); + emmit(OP_STORE); + emmit_w(ivar); + } + tkn_next(); +} + +static void parse_dec() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + emmit(OP_LOADI); + emmit_w(ivar); + emmit(OP_DEC); + emmit(OP_STOREI); + emmit_w(ivar); + } + else { + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_DEC); + emmit(OP_STORE); + emmit_w(ivar); + } + tkn_next(); +}*/ + +// [RZC 27/04/2021] Desactivat, no se molt be ni què es +/*static void parse_let() { + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + if (is_variable_array()) { + tkn_next(); + if (tkn_get_token() == TOKEN_COLON) { + tkn_next(); parse_concatenation(); + emmit(OP_SETX); + emmit_w(ivar); + emmit(OP_JSR); + emmit_w(get_label_address("_sys_string_store")); + return; + } else { + EXPECT(TOKEN_LPAR, "Expected ':' or '('"); + tkn_next(); parse_expr_atom(); + EXPECT(TOKEN_RPAR, "Expected ')'"); + emmit(OP_SETY); + } + } + tkn_next(); + EXPECT(TOKEN_EQ, "Expected '='"); + tkn_next(); parse_expression(); + if (is_variable_array()) { + emmit(OP_STOREI); + emmit_w(ivar); + } else { + emmit(OP_STORE); + emmit_w(ivar); + } +}*/ + +static void parse_if() { + char* previous_breaklabel = breaklabel; + parse_expression(); + EXPECT(TOKEN_THEN, "Expected 'THEN'"); + tkn_next(); + emmit(OP_JNT); + char elselabel[8]; + generate_anonymous_labelname(elselabel); + char endlabel[8]; + generate_anonymous_labelname(endlabel); + breaklabel = endlabel; + bool else_visited = false; + emmit_w(get_label_address(elselabel)); + parse_statements(); + if (tkn_get_token() == TOKEN_ELSE) { + else_visited = true; + emmit(OP_JMP); + emmit_w(get_label_address(endlabel)); + register_label_address(elselabel); + tkn_next(); + parse_statements(); + } + if (tkn_get_token() == TOKEN_END) { + if (!else_visited) register_label_address(elselabel); + register_label_address(endlabel); + tkn_next(); + breaklabel = previous_breaklabel; + return; + } + parser_finished = true; + if (!else_visited) error_raise("Expected 'ELSE', 'END' or a statement"); else error_raise("Expected 'END' or a statement"); +} + +static void parse_for() { + char* previous_breaklabel = breaklabel; + char* previous_contlabel = contlabel; + EXPECT(TOKEN_IDENTIFIER, "Expected variable"); + int ivar = get_variable_index(tkn_get_string()); + tkn_next(); + EXPECT(TOKEN_EQ, "Expected '='"); + tkn_next(); + parse_expression(); + emmit(OP_STORE); + emmit_w(ivar); + char forlabel[8]; + generate_anonymous_labelname(forlabel); + char endlabel[8]; + generate_anonymous_labelname(endlabel); + char continuelabel[8]; + generate_anonymous_labelname(continuelabel); + breaklabel = endlabel; + contlabel = continuelabel; + register_label_address(forlabel); + EXPECT(TOKEN_TO, "Expected 'TO'"); + tkn_next(); + parse_expression(); + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_EQ); + emmit(OP_JTR); + emmit_w(get_label_address(endlabel)); + //int endLabel = GetCurrentAddress(); + parse_statements(); + EXPECT(TOKEN_END, "Expected 'END'"); + tkn_next(); + register_label_address(continuelabel); + emmit(OP_LOAD); + emmit_w(ivar); + emmit(OP_INC); + emmit(OP_STORE); + emmit_w(ivar); + emmit(OP_JMP); + emmit_w(get_label_address(forlabel)); + register_label_address(endlabel); + //Patch(endLabel, GetCurrentAddress()); + breaklabel = previous_breaklabel; + contlabel = previous_contlabel; +} + +static void parse_break() { + if (breaklabel == nullptr) { parser_finished = true; error_raise("Can't break outside of a loop or condition"); return; } + emmit(OP_JMP); + emmit_w(get_label_address(breaklabel)); +} + +static void parse_continue() { + if (contlabel == nullptr) { parser_finished = true; error_raise("Can't continue outside of a loop"); return; } + emmit(OP_JMP); + emmit_w(get_label_address(contlabel)); +} + +// [RZC 27/04/2021] Ni GOTO ni GOSUB +/*static void parse_goto() { + emmit(OP_JMP); + EXPECT(TOKEN_IDENTIFIER, "Expected label name"); + emmit_w(get_label_address(tkn_get_string())); + tkn_next(); +} + +static void parse_gosub() { + emmit(OP_JSR); + EXPECT(TOKEN_IDENTIFIER, "Expected label name"); + emmit_w(get_label_address(tkn_get_string())); + tkn_next(); +}*/ + +static void parse_return() { + // [RZC 27/04/2021] [TODO] Falta afegir paràmetre de retorn + emmit(OP_RET); +} + +// [RZC 27/04/2021] No hi ha labels +/*static void parse_label() { + register_label_address(tkn_get_string()); + tkn_next(); +}*/ + +static void parse_call() { + for (int i = 0; i < num_external_functions; i++) { + if (strcmp(external_functions[i].name, tkn_get_string()) == 0) { + if (external_functions[i].num_parameters == -1) { + tkn_next(); + EXPECT(TOKEN_STRING, "Expected string"); + emmit(OP_PUSH); + emmit(register_string(tkn_get_string())); + } else { + tkn_next(); + for (int j = 0; j < external_functions[i].num_parameters; j++) { + //if (j != 0) EXPECT(TOKEN_COMMA, "Expected comma"); + parse_expression(); + } + } + emmit(OP_CALL); + emmit(i); + } + } +} + +static void parse_statements() { + while (!parser_finished) { + current_line = tkn_get_line(); + switch (tkn_get_token()) { + case TOKEN_CONST: + tkn_next(); parse_const(); break; + case TOKEN_DIM: + tkn_next(); parse_dim(); break; + case TOKEN_LET: + tkn_next(); parse_let(); break; + case TOKEN_INC: + tkn_next(); parse_inc(); break; + case TOKEN_DEC: + tkn_next(); parse_dec(); break; + case TOKEN_IF: + tkn_next(); parse_if(); break; + case TOKEN_FOR: + tkn_next(); parse_for(); break; + /*case TOKEN_BREAK: + tkn_next(); parse_break(); break; + case TOKEN_CONTINUE: + tkn_next(); parse_continue(); break;*/ + case TOKEN_RETURN: + tkn_next(); parse_return(); break; + case TOKEN_REM: + tkn_next(); break; + + case TOKEN_IDENTIFIER: + for (int i = 0; i < num_external_functions; i++) { + if (strcmp(external_functions[i].name, tkn_get_string()) == 0) { parse_call(); break; } + } + parse_let(); break; + default: + return; + } + } +} + +static void parse_global_statements() { + while (!parser_finished) { + current_line = tkn_get_line(); + switch (tkn_get_token()) { + case TOKEN_CONST: + tkn_next(); parse_const(); break; + case TOKEN_VAR: + tkn_next(); parse_var(); break; + case TOKEN_STRUCT: + tkn_next(); parse_struct(); break; + case TOKEN_FUNCTION: + tkn_next(); parse_function(); break; + case TOKEN_REM: + tkn_next(); break; + default: + return; + } + } +} + +// [RZC 27/04/2021] No usat en JailScript +/*static void include_labels() { + FILE *f = fopen("rom.lbl", "rb"); + byte num_labels; + fread(&num_labels, 1, 1, f); + for (int i = 0; i < num_labels; i++) { + word address; fread(&address, 2, 1, f); + byte len; fread(&len, 1, 1, f); + char label[50]; + fread(label, len, 1, f); + label[len] = 0; + strcpy(known_labels[num_known_labels].name, label); + known_labels[num_known_labels++].address = address; + } + fclose(f); + +}*/ + +/****************************************************************************************/ +/* INTERFACE */ +/****************************************************************************************/ + +void parser_parse(const char* buffer, byte* mem) { + + tkn_init(buffer); + parser_finished = false; + //include_labels(); + code = mem; + codepos = 2; // [RZC 27/04/2021] [TODO] Perquè 2? + tkn_next(); + + parse_global_statements(); + append_strings(); // [RZC 27/04/2021] [TODO] Encara vull açò? si el bytecode no ha de ser independent, no veig que faça falta + + if (error_raised()) { // [RZC 27/04/2021] Res, per ara que printf el error i pete + error_print(NULL); + //codepos = 0xA000; + //emmit(OP_JMP); + //emmit_w(0xA000); + } + + //FILE *f = fopen("test.bin", "wb"); + //fwrite(mem, codepos, 1, f); + //fclose(f); + + //return code; +} + +const int parser_get_codesize() { return codepos; } +const int parser_get_memory_usage() { return num_variables; } +unsigned short* parser_get_lines() { return lines; } + +void parser_register_external_function(const char* name, const char* parameters, void (*fun)(void)) { + strcpy(external_functions[num_external_functions].name, name); + external_functions[num_external_functions++].num_parameters = num_parameters; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..969b872 --- /dev/null +++ b/parser.h @@ -0,0 +1,85 @@ +#pragma once + +enum OPS { + OP_NOP = 0, + OP_PUSH, + OP_POP, + OP_DUP, + OP_SWAP, + + OP_LOAD, + //OP_LOADI, + OP_STORE, + //OP_STOREI, +/* + OP_LOADXY, + OP_STOREXY, + OP_SETX, + OP_SETY, + OP_SETZ, + OP_GETY, + OP_GETZ, + OP_INCX, + OP_DECX, + OP_INCY, + OP_DECY, + OP_INCZ, + OP_DECZ, +*/ + OP_JMP, + OP_JNT, + OP_JTR, + //OP_JSR, + OP_RET, + OP_CALL, + OP_CALLEX, // AFEGIT PER A JAILSCRIPT +/* + OP_RJ, + OP_RB, + OP_RJZ, + OP_RJN, + OP_RBZ, + OP_RBN, + + OP_RJYZ, + OP_RJYN, + OP_RBYZ, + OP_RBYN, + OP_RJZZ, + OP_RJZN, + OP_RBZZ, + OP_RBZN, +*/ + OP_ADD, + OP_SUB, + OP_MUL, + OP_DIV, + OP_MOD, + OP_AND, + OP_OR, + OP_NOT, + OP_NEG, + OP_INC, + OP_DEC, + OP_CONCAT, + + OP_EQ, + OP_NEQ, + OP_LT, + OP_GT, + OP_LEQ, + OP_GEQ, + + //OP_IN, + //OP_OUT, + OP_SLEEP, +}; + +void parser_parse(const char* buffer, unsigned char* mem); +const int parser_get_codesize(); +const int parser_get_memory_usage(); +unsigned short* parser_get_lines(); + +void parser_register_external_function(const char* name, const char* parameters, void (*fun)(void)); + +void parser_register_constant(const char* name, const unsigned char value); \ No newline at end of file diff --git a/scope.cpp b/scope.cpp new file mode 100644 index 0000000..2991042 --- /dev/null +++ b/scope.cpp @@ -0,0 +1,50 @@ +#include "scope.h" +#include +#include "heap.h" + +struct t_variable { + //char name[MAX_IDENTIFIER_LENGTH]; + std::string name; + int type; + int length; + int address; +}; + +struct t_struct { + //char name[MAX_IDENTIFIER_LENGTH]; + std::string name; + std::vector members; +}; + +struct t_scope { + std::vector variables; + t_scope* parent_scope; +}; + +static t_scope scope_global; +static t_scope* scope_current = &scope_global; + +void scope_init() { + scope_current = &scope_global; + scope_global.variables.clear(); +} + +void scope_open_local() { + t_scope* new_scope = new t_scope; + new_scope->parent_scope = scope_current; + scope_current = new_scope; +} + +void scope_close_local() { + t_scope* closing_scope = scope_current; + scope_current = scope_current->parent_scope; + delete closing_scope; +} + +const int scope_declare_variable(const std::string name, const int type, const int length) { + for (t_variable v : scope_current->variables) { if (v.name == name) return -1; } + const int address = heap_get_address(); + const t_variable var {name, type, length, address}; + scope_current->variables.push_back(var); + return address; +} diff --git a/scope.h b/scope.h new file mode 100644 index 0000000..b634e16 --- /dev/null +++ b/scope.h @@ -0,0 +1,8 @@ +#pragma once +#include + +void scope_init(); +void scope_open_local(); +void scope_close_local(); +const int scope_declare_variable(const std::string name, const int type, const int length); +//scope_get_variable... \ No newline at end of file diff --git a/tokenizer.cpp b/tokenizer.cpp new file mode 100644 index 0000000..f2a12a8 --- /dev/null +++ b/tokenizer.cpp @@ -0,0 +1,161 @@ +#include "tokenizer.h" +#include "error.h" + +static bool streq(const char* a, const char* b) { + while (*a != 0 && *b != 0) { + if (*a != *b) return false; + a++; b++; + } + return *a == *b; +} + +static const char* ptr; +static t_tokentype current_token = TOKEN_ERROR; +static char identifier[255]; +static unsigned char array[255]; +static float value; +static int line = 0; +static int row = 0; +static int current_tokenline = 0; +static int current_tokenrow = 0; + +struct t_token_op { + const char* str; + t_tokentype tokentype; +}; + +t_token_op tkn_tokens[] = { + { "struct", TOKEN_STRUCT }, + { "as", TOKEN_AS }, + { "end", TOKEN_END }, + { "function", TOKEN_FUNCTION }, + { "var", TOKEN_VAR }, + { "array", TOKEN_ARRAY }, + { "of", TOKEN_OF }, + { "const", TOKEN_CONST }, + { "if", TOKEN_IF }, + { "then", TOKEN_THEN }, + { "else", TOKEN_ELSE }, + { "while", TOKEN_WHILE }, + { "do", TOKEN_DO }, + { "repeat", TOKEN_REPEAT }, + { "until", TOKEN_UNTIL }, + { "for", TOKEN_FOR }, + { "to", TOKEN_TO }, + { "step", TOKEN_STEP }, + { "return", TOKEN_RETURN }, + { "rem", TOKEN_REM }, + { "and", TOKEN_AND }, + { "or", TOKEN_OR }, + { "not", TOKEN_NOT }, + { "mod", TOKEN_MOD } +}; + +//static char hex_digit(const char digit) { return digit - (digit <= 57 ? 48 : 55); } + +#define CCHR *ptr +#define NEXT if(*ptr==10){line++;row=0;}else{row++;};ptr++ + +static void tkn_do_next() { + identifier[0] = 0; + value = 0; + char* id = identifier; + // Ignore whitespace + while (CCHR != 0 && CCHR != 10 && CCHR <= 32) { NEXT; } + // If zero, we are at the end of the file + current_tokenline = line; current_tokenrow = row; + if (CCHR == 0) { current_token = TOKEN_ENDFILE; return; } + // if 13, we are at the end of the line + if (CCHR == 10) { NEXT; current_token = TOKEN_ENDLINE; return; } + // if 34, grab the string + if (CCHR == 34) { + NEXT; + while (CCHR != 0 && CCHR != 34) { *id = CCHR; id++; NEXT; } + if (CCHR == 0) { current_token = TOKEN_ERROR; error_raise("Unexpected end of file."); return; } + *id = 0; NEXT; + current_token = TOKEN_STRING; return; + } + // If digit, grab the number + if (CCHR >= 48 && CCHR <= 57) { + do { value = value * 10 + (CCHR - 48); NEXT; } while (CCHR >= 48 && CCHR <= 57); + if (CCHR == '.') { + int d = 10; + do { value = value + ((CCHR - 48) / d); d *= 10; NEXT; } while (CCHR >= 48 && CCHR <= 57); + } + current_token = TOKEN_NUMBER; return; + } + // if letter, grab the identifier + if ((CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122)) { + do { *id = CCHR; id++; NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 90) || (CCHR >= 97 && CCHR <= 122) || CCHR == 95); + *id = 0; + int i = 0; while (identifier[i] != 0) { if (identifier[i] < 95) { identifier[i] |= 32; }; i++; } // to lowecase + // If it matches a keyword, it's a keyword + for (auto token : tkn_tokens) { + if (streq(token.str, identifier)) { + current_token = token.tokentype; + if (current_token == TOKEN_REM) { while (CCHR != 0 && CCHR != 13) { NEXT; } } + return; + } + } + // else, it's an identifier + current_token = TOKEN_IDENTIFIER; return; + } + // Check if it's any of the operators + if (CCHR == '+') { NEXT; current_token = TOKEN_PLUS; return; } + if (CCHR == '-') { NEXT; current_token = TOKEN_MINUS; return; } + if (CCHR == '*') { NEXT; current_token = TOKEN_ASTERISC; return; } + if (CCHR == '/') { NEXT; current_token = TOKEN_SLASH; return; } + if (CCHR == '.') { NEXT; current_token = TOKEN_DOT; return; } + if (CCHR == '(') { NEXT; current_token = TOKEN_LPAR; return; } + if (CCHR == ')') { NEXT; current_token = TOKEN_RPAR; return; } + if (CCHR == '[') { NEXT; current_token = TOKEN_LBRACKET; return; } + if (CCHR == ']') { NEXT; current_token = TOKEN_RBRACKET; return; } + if (CCHR == ',') { NEXT; current_token = TOKEN_COMMA; return; } + if (CCHR == '<') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_LEQ; } else { if (CCHR == '>') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_LT; } } return; } + if (CCHR == '>') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_GEQ; } else { current_token = TOKEN_GT; } return; } + if (CCHR == '=') { NEXT; current_token = TOKEN_EQ; return; } + if (CCHR == '!') { NEXT; if (CCHR == '=') { NEXT; current_token = TOKEN_NEQ; } else { current_token = TOKEN_NOT; } return; } + if (CCHR == '%') { NEXT; current_token = TOKEN_MOD; return; } + if (CCHR == 39) { NEXT; current_token = TOKEN_REM; while (CCHR != 0 && CCHR != 10) { NEXT; }; return; } + + // Per a agafar numeros en hexadecimal o binari, heretat de PaCO, no se si li fa falta a JailScript + /*if (CCHR == '&') { + NEXT; + if ((CCHR | 32) == 'h') { + NEXT; + if ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)) { + do { value = (value << 4) + hex_digit(CCHR); NEXT; } while ((CCHR >= 48 && CCHR <= 57) || (CCHR >= 65 && CCHR <= 70)); + current_token = TOKEN_NUMBER; return; + } + } + if ((CCHR | 32) == 'b') { + NEXT; + if (CCHR == 48 || CCHR == 49) { + do { value = (value << 1) + (CCHR-48); NEXT; } while (CCHR == 48 || CCHR == 49); + current_token = TOKEN_NUMBER; return; + } + } + current_token = TOKEN_ERROR; + error_raise("Syntax error"); + return; + }*/ + + // if no match, it's an error + current_token = TOKEN_ERROR; return; +} + +void tkn_init(const char* buffer) { + ptr = buffer; + current_token = TOKEN_ERROR; + value = line = row = current_tokenline = current_tokenrow = 0; + identifier[0] = 0; array[0] = 0; +} + +void tkn_next() { do { tkn_do_next(); } while (current_token == TOKEN_REM || current_token == TOKEN_ENDLINE); } + +t_tokentype tkn_get_token() { return current_token; } +char* tkn_get_string() { return identifier; } +unsigned char* tkn_get_array() { return array; } +float tkn_get_value() { return value; } +int tkn_get_line() { return current_tokenline; } +int tkn_get_row() { return current_tokenrow; } diff --git a/tokenizer.h b/tokenizer.h new file mode 100644 index 0000000..5d2e5ee --- /dev/null +++ b/tokenizer.h @@ -0,0 +1,65 @@ +#pragma once + +typedef unsigned char byte; +typedef unsigned short word; + +enum t_tokentype { + TOKEN_ERROR, + TOKEN_NUMBER, + TOKEN_STRING, + + TOKEN_IDENTIFIER, + + TOKEN_STRUCT, + TOKEN_AS, + TOKEN_END, + TOKEN_FUNCTION, + TOKEN_VAR, + TOKEN_ARRAY, + TOKEN_OF, + TOKEN_CONST, + TOKEN_IF, + TOKEN_THEN, + TOKEN_ELSE, + TOKEN_WHILE, + TOKEN_DO, + TOKEN_REPEAT, + TOKEN_UNTIL, + TOKEN_FOR, + TOKEN_TO, + TOKEN_STEP, + TOKEN_RETURN, + TOKEN_REM, + + TOKEN_AND, + TOKEN_OR, + TOKEN_NOT, + TOKEN_MOD, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_ASTERISC, + TOKEN_SLASH, + TOKEN_DOT, + TOKEN_LPAR, + TOKEN_RPAR, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_COMMA, + TOKEN_LT, + TOKEN_GT, + TOKEN_LEQ, + TOKEN_GEQ, + TOKEN_EQ, + TOKEN_NEQ, + TOKEN_ENDLINE, + TOKEN_ENDFILE +}; + +void tkn_init(const char* buffer); +void tkn_next(); +t_tokentype tkn_get_token(); +char* tkn_get_string(); +unsigned char* tkn_get_array(); +float tkn_get_value(); +int tkn_get_line(); +int tkn_get_row();