#include "parser.h" #include "tokenizer.h" #include "error.h" #include #include #include #define MAX_CODE_SIZE 65536 #define MAX_LABELS 256 #define MAX_EXTERNAL_FUNCTIONS 256 #define MAX_IDENTIFIER_LENGTH 40 #define MAX_CONSTANTS 256 struct t_label { char name[MAX_IDENTIFIER_LENGTH]; word address; }; struct t_variable { char name[MAX_IDENTIFIER_LENGTH]; int index = 0; int size = 1; t_variable* next{ nullptr }; }; struct t_external_function { char name[MAX_IDENTIFIER_LENGTH]; int num_parameters = 0; }; struct t_string { char string[256]; word references[256]; int num_references = 1; }; struct t_constant { char name[MAX_IDENTIFIER_LENGTH]; byte value; }; static byte* code; // [MAX_CODE_SIZE]; static word codepos = 0; static word lines[32768]; static word current_line; static t_label known_labels[MAX_LABELS]; static int num_known_labels = 0; static t_label unknown_labels[MAX_LABELS]; static int num_unknown_labels = 0; static int num_anonymous_labels = 0; static char* breaklabel = nullptr; static char* contlabel = nullptr; static t_variable* variables = nullptr; static int num_variables = 0; static bool variable_is_array = false; static t_external_function external_functions[MAX_EXTERNAL_FUNCTIONS]; static int num_external_functions = 0; static t_string strings[256]; static int num_strings = 0; static bool parser_finished = false; static t_constant constants[MAX_CONSTANTS]; static int num_constants; /****************************************************************************************/ /* GENERIC FUNCTIONS */ /****************************************************************************************/ static void int_to_string(int value, char* label) { for (int i = 0; i < 7; i++) label[i] = '0'; label[7] = 0; int i = 6; while (value > 0) { label[i] = 48 + (value % 10); value = value / 10; } } /****************************************************************************************/ /* BYTECODE GENERATOR */ /****************************************************************************************/ static void emmit(const byte value) { lines[codepos] = current_line; code[codepos++] = value; } static void emmit_w(const word value) { lines[codepos] = current_line; code[codepos++] = value & 255; lines[codepos] = current_line; code[codepos++] = value >> 8; } static void patch(const word address, const word dest) { code[address] = dest & 255; code[address + 1] = dest >> 8; } static const word get_current_address() { return codepos; } /****************************************************************************************/ /* VARIABLE MANAGEMENT */ /****************************************************************************************/ static int get_variable_index(const char* string, const int array_size = 1) { t_variable* variable = variables; t_variable* last = variables; while (variable != nullptr) { if (strcmp(variable->name, string) == 0) { variable_is_array = (variable->size > 1); return variable->index + 0xC000; } last = variable; variable = variable->next; } t_variable* newvar = (t_variable*)malloc(sizeof(t_variable)); //newvar->name = (char*)malloc(strlen(string) + 1); strcpy(newvar->name, string); newvar->index = num_variables; num_variables += array_size; newvar->size = array_size; variable_is_array = (newvar->size > 1); newvar->next = nullptr; if (last != nullptr) { last->next = newvar; } else { variables = newvar; } return newvar->index + 0xC000; } static bool is_variable_array() { return variable_is_array; } /****************************************************************************************/ /* CONSTANT MANAGEMENT */ /****************************************************************************************/ void parser_register_constant(const char* name, const unsigned char value) { strcpy(constants[num_constants].name, name); constants[num_constants++].value = value; } static const int get_constant(const char* name) { for (int i = 0; i < num_constants; i++) { if (strcmp(constants[i].name, name) == 0) return constants[i].value; } return -1; } /****************************************************************************************/ /* LABEL MANAGEMENT */ /****************************************************************************************/ static const word get_label_address(const char* string) { for (int i = 0; i < num_known_labels; i++) { if (strcmp(known_labels[i].name, string) == 0) return known_labels[i].address; } //unknown_labels[num_unknown_labels].name = (char*)malloc(strlen(string) + 1); strcpy(unknown_labels[num_unknown_labels].name, string); unknown_labels[num_unknown_labels].address = get_current_address(); num_unknown_labels++; return 0; } static void register_label_address(const char* string) { // If the label already exists, vomit an error for (int i = 0; i < num_known_labels; i++) { if (strcmp(known_labels[i].name, string) == 0) { parser_finished = true; error_raise("Duplicate label"); return; } } // register new label //known_labels[num_known_labels].name = (char*)malloc(strlen(string) + 1); strcpy(known_labels[num_known_labels].name, string); known_labels[num_known_labels++].address = get_current_address(); // patch and remove any references awaiting for this label int i = 0; while (i < num_unknown_labels) { if (strcmp(unknown_labels[i].name, string) == 0) { patch(unknown_labels[i].address, get_current_address()); if (i < num_unknown_labels - 1) { //free(unknown_labels[i].name); unknown_labels[i].address = unknown_labels[num_unknown_labels - 1].address; strcpy(unknown_labels[i].name, unknown_labels[num_unknown_labels - 1].name); //unknown_labels[i].name = unknown_labels[num_unknown_labels - 1].name; } num_unknown_labels--; } else { i++; } } //num_known_labels++; } static void generate_anonymous_labelname(char* dest) { int_to_string(num_anonymous_labels++, dest); } /****************************************************************************************/ /* STRING MANAGEMENT */ /****************************************************************************************/ static int register_string(const char* string) { for (int i = 0; i < num_strings; i++) { if (strcmp(strings[i].string, string) == 0) { strings[i].references[strings[i].num_references++] = get_current_address(); return 0; } } strcpy(strings[num_strings].string, string); strings[num_strings++].references[0] = get_current_address(); return 0; } static void append_strings() { code[0] = codepos & 255; code[1] = codepos >> 8; emmit(num_strings); for (int i = 0; i < num_strings; i++) { for (int j = 0; j < strings[i].num_references; j++) { patch(strings[i].references[j], get_current_address()); } char len = strlen(strings[i].string); emmit(len); for (int j = 0; j < len; j++) { emmit(strings[i].string[j]); } } } /****************************************************************************************/ /* PARSER */ /****************************************************************************************/ static void parse_expression(); static void parse_statements(); static void parse_strleft(); static void parse_str(); static void parse_chr(); static void parse_strlen(); static void parse_keypressed(); static void parse_anykey(); #define EXPECT(X, Y) if (tkn_get_token() != X) { parser_finished = true; error_raise(Y); return; } static void parse_concat_atom() { if (tkn_get_token() == TOKEN_STRING) { emmit(OP_SETX); emmit_w(register_string(tkn_get_string())); emmit(OP_JSR); emmit_w(get_label_address("_sys_string_load")); tkn_next(); return; } if (tkn_get_token() == TOKEN_IDENTIFIER) { int ivar = get_variable_index(tkn_get_string()); if (!is_variable_array()) { parser_finished = true; error_raise("Expected string constant or variable"); return; } emmit(OP_SETX); emmit_w(ivar); emmit(OP_JSR); emmit_w(get_label_address("_sys_string_load")); tkn_next(); return; } if (tkn_get_token() == TOKEN_STRLEFT) { tkn_next(); parse_strleft(); return; } if (tkn_get_token() == TOKEN_STR) { tkn_next(); parse_str(); return; } if (tkn_get_token() == TOKEN_CHR) { tkn_next(); parse_chr(); return; } parser_finished = true; error_raise("Syntax error"); return; } static void parse_concatenation() { parse_concat_atom(); while (tkn_get_token() == TOKEN_COMMA) { tkn_next(); parse_concat_atom(); emmit(OP_CONCAT); } } static void parse_expr_atom() { // NUM, VAR, UNARY, PAREN if (tkn_get_token() == TOKEN_NUMBER) { emmit(OP_PUSH); emmit(tkn_get_value()); tkn_next(); return; } if (tkn_get_token() == TOKEN_IDENTIFIER) { int constvalue = get_constant(tkn_get_string()); if (constvalue != -1) { emmit(OP_PUSH); emmit(constvalue); tkn_next(); return; } int ivar = get_variable_index(tkn_get_string()); if (is_variable_array()) { tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); tkn_next(); parse_expr_atom(); EXPECT(TOKEN_RPAR, "Expected ')'"); emmit(OP_SETY); emmit(OP_LOADI); emmit_w(ivar); } else { emmit(OP_LOAD); emmit_w(ivar); } tkn_next(); return; } if (tkn_get_token() == TOKEN_MINUS) { tkn_next(); parse_expr_atom(); emmit(OP_NEG); return; } if (tkn_get_token() == TOKEN_NOT) { tkn_next(); parse_expr_atom(); emmit(OP_NOT); return; } if (tkn_get_token() == TOKEN_LPAR) { tkn_next(); parse_expression(); EXPECT(TOKEN_RPAR, "Expected ')'"); tkn_next(); return; } if (tkn_get_token() == TOKEN_STRLEN) { tkn_next(); parse_strlen(); return; } if (tkn_get_token() == TOKEN_KEYPRESSED) { tkn_next(); parse_keypressed(); return; } if (tkn_get_token() == TOKEN_ANYKEY) { tkn_next(); parse_anykey(); return; } parser_finished = true; error_raise("Syntax error"); return; } static void parse_expr_mul() { parse_expr_atom(); while (tkn_get_token() == TOKEN_ASTERISC || tkn_get_token() == TOKEN_SLASH || tkn_get_token() == TOKEN_MOD) { t_tokentype operat = tkn_get_token(); tkn_next(); parse_expr_atom(); switch (operat) { case TOKEN_ASTERISC: emmit(OP_MUL); break; case TOKEN_SLASH: emmit(OP_DIV); break; case TOKEN_MOD: emmit(OP_MOD); break; } } } static void parse_expr_sum() { parse_expr_mul(); while (tkn_get_token() == TOKEN_PLUS || tkn_get_token() == TOKEN_MINUS) { t_tokentype operat = tkn_get_token(); tkn_next(); parse_expr_mul(); switch (operat) { case TOKEN_PLUS: emmit(OP_ADD); break; case TOKEN_MINUS: emmit(OP_SUB); break; } } } static void parse_expr_bool() { parse_expr_sum(); while (tkn_get_token() == TOKEN_AND || tkn_get_token() == TOKEN_OR) { t_tokentype operat = tkn_get_token(); tkn_next(); parse_expr_sum(); switch (operat) { case TOKEN_AND: emmit(OP_AND); break; case TOKEN_OR: emmit(OP_OR); break; } } } static void parse_expression() { parse_expr_bool(); while (tkn_get_token() == TOKEN_EQ || tkn_get_token() == TOKEN_LT || tkn_get_token() == TOKEN_GT || tkn_get_token() == TOKEN_LEQ || tkn_get_token() == TOKEN_GEQ) { t_tokentype operat = tkn_get_token(); tkn_next(); parse_expr_bool(); switch (operat) { case TOKEN_EQ: emmit(OP_EQ); break; case TOKEN_LT: emmit(OP_LT); break; case TOKEN_GT: emmit(OP_GT); break; case TOKEN_LEQ: emmit(OP_LEQ); break; case TOKEN_GEQ: emmit(OP_GEQ); break; } } } static void parse_const() { EXPECT(TOKEN_IDENTIFIER, "Expected identifier"); char constname[40]; strcpy(constname, tkn_get_string()); tkn_next(); EXPECT(TOKEN_EQ, "Expected '='"); tkn_next(); EXPECT(TOKEN_NUMBER, "Expected integer constant"); int constvalue = tkn_get_value(); parser_register_constant(constname, constvalue); tkn_next(); } static void parse_dim() { EXPECT(TOKEN_IDENTIFIER, "Expected variable"); char varname[40]; strcpy(varname, tkn_get_string()); tkn_next(); EXPECT(TOKEN_LPAR, "Expected array size"); tkn_next(); EXPECT(TOKEN_NUMBER, "Expected integer constant as array size"); int ivar = get_variable_index(varname, tkn_get_value()); tkn_next(); EXPECT(TOKEN_RPAR, "Expected ')'"); tkn_next(); } static void parse_inc() { EXPECT(TOKEN_IDENTIFIER, "Expected variable"); int ivar = get_variable_index(tkn_get_string()); if (is_variable_array()) { tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); tkn_next(); parse_expr_atom(); EXPECT(TOKEN_RPAR, "Expected ')'"); emmit(OP_SETY); emmit(OP_LOADI); emmit_w(ivar); emmit(OP_INC); emmit(OP_STOREI); emmit_w(ivar); } else { emmit(OP_LOAD); emmit_w(ivar); emmit(OP_INC); emmit(OP_STORE); emmit_w(ivar); } tkn_next(); } static void parse_dec() { EXPECT(TOKEN_IDENTIFIER, "Expected variable"); int ivar = get_variable_index(tkn_get_string()); if (is_variable_array()) { tkn_next(); EXPECT(TOKEN_LPAR, "Expected array index"); tkn_next(); parse_expr_atom(); EXPECT(TOKEN_RPAR, "Expected ')'"); emmit(OP_SETY); emmit(OP_LOADI); emmit_w(ivar); emmit(OP_DEC); emmit(OP_STOREI); emmit_w(ivar); } else { emmit(OP_LOAD); emmit_w(ivar); emmit(OP_DEC); emmit(OP_STORE); emmit_w(ivar); } tkn_next(); } static void parse_let() { EXPECT(TOKEN_IDENTIFIER, "Expected variable"); int ivar = get_variable_index(tkn_get_string()); if (is_variable_array()) { tkn_next(); if (tkn_get_token() == TOKEN_COLON) { tkn_next(); parse_concatenation(); emmit(OP_SETX); emmit_w(ivar); emmit(OP_JSR); emmit_w(get_label_address("_sys_string_store")); return; } else { EXPECT(TOKEN_LPAR, "Expected ':' or '('"); tkn_next(); parse_expr_atom(); EXPECT(TOKEN_RPAR, "Expected ')'"); emmit(OP_SETY); } } tkn_next(); EXPECT(TOKEN_EQ, "Expected '='"); tkn_next(); parse_expression(); if (is_variable_array()) { emmit(OP_STOREI); emmit_w(ivar); } else { emmit(OP_STORE); emmit_w(ivar); } } static void parse_if() { char* previous_breaklabel = breaklabel; parse_expression(); EXPECT(TOKEN_THEN, "Expected 'THEN'"); tkn_next(); emmit(OP_JNT); char elselabel[8]; generate_anonymous_labelname(elselabel); char endlabel[8]; generate_anonymous_labelname(endlabel); breaklabel = endlabel; bool else_visited = false; emmit_w(get_label_address(elselabel)); parse_statements(); if (tkn_get_token() == TOKEN_ELSE) { else_visited = true; emmit(OP_JMP); emmit_w(get_label_address(endlabel)); register_label_address(elselabel); tkn_next(); parse_statements(); } if (tkn_get_token() == TOKEN_END) { if (!else_visited) register_label_address(elselabel); register_label_address(endlabel); tkn_next(); breaklabel = previous_breaklabel; return; } parser_finished = true; if (!else_visited) error_raise("Expected 'ELSE', 'END' or a statement"); else error_raise("Expected 'END' or a statement"); } static void parse_for() { char* previous_breaklabel = breaklabel; char* previous_contlabel = contlabel; EXPECT(TOKEN_IDENTIFIER, "Expected variable"); int ivar = get_variable_index(tkn_get_string()); tkn_next(); EXPECT(TOKEN_EQ, "Expected '='"); tkn_next(); parse_expression(); emmit(OP_STORE); emmit_w(ivar); char forlabel[8]; generate_anonymous_labelname(forlabel); char endlabel[8]; generate_anonymous_labelname(endlabel); char continuelabel[8]; generate_anonymous_labelname(continuelabel); breaklabel = endlabel; contlabel = continuelabel; register_label_address(forlabel); EXPECT(TOKEN_TO, "Expected 'TO'"); tkn_next(); parse_expression(); emmit(OP_LOAD); emmit_w(ivar); emmit(OP_EQ); emmit(OP_JTR); emmit_w(get_label_address(endlabel)); //int endLabel = GetCurrentAddress(); parse_statements(); EXPECT(TOKEN_END, "Expected 'END'"); tkn_next(); register_label_address(continuelabel); emmit(OP_LOAD); emmit_w(ivar); emmit(OP_INC); emmit(OP_STORE); emmit_w(ivar); emmit(OP_JMP); emmit_w(get_label_address(forlabel)); register_label_address(endlabel); //Patch(endLabel, GetCurrentAddress()); breaklabel = previous_breaklabel; contlabel = previous_contlabel; } static void parse_break() { if (breaklabel == nullptr) { parser_finished = true; error_raise("Can't break outside of a loop or condition"); return; } emmit(OP_JMP); emmit_w(get_label_address(breaklabel)); } static void parse_continue() { if (contlabel == nullptr) { parser_finished = true; error_raise("Can't continue outside of a loop"); return; } emmit(OP_JMP); emmit_w(get_label_address(contlabel)); } static void parse_goto() { emmit(OP_JMP); EXPECT(TOKEN_IDENTIFIER, "Expected label name"); emmit_w(get_label_address(tkn_get_string())); tkn_next(); } static void parse_gosub() { emmit(OP_JSR); EXPECT(TOKEN_IDENTIFIER, "Expected label name"); emmit_w(get_label_address(tkn_get_string())); tkn_next(); } static void parse_return() { emmit(OP_RET); } static void parse_label() { register_label_address(tkn_get_string()); tkn_next(); } static void parse_call() { for (int i = 0; i < num_external_functions; i++) { if (strcmp(external_functions[i].name, tkn_get_string()) == 0) { if (external_functions[i].num_parameters == -1) { tkn_next(); EXPECT(TOKEN_STRING, "Expected string"); emmit(OP_PUSH); emmit(register_string(tkn_get_string())); } else { tkn_next(); for (int j = 0; j < external_functions[i].num_parameters; j++) { //if (j != 0) EXPECT(TOKEN_COMMA, "Expected comma"); parse_expression(); } } emmit(OP_CALL); emmit(i); } } } static void parse_strleft() { EXPECT(TOKEN_IDENTIFIER, "Expected variable"); int ivar = get_variable_index(tkn_get_string()); tkn_next(); parse_expression(); emmit(OP_SETX); emmit_w(ivar); emmit(OP_JSR); emmit_w(get_label_address("_sys_string_left")); } static void parse_strlen() { EXPECT(TOKEN_IDENTIFIER, "Expected variable"); int ivar = get_variable_index(tkn_get_string()); emmit(OP_LOAD); emmit_w(ivar); tkn_next(); } static void parse_debug() { parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_string_debug")); } static void parse_chr() { parse_expression(); emmit(OP_PUSH); emmit(1); } static void parse_str() { parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_string_str")); } static void parse_setsprite() { parse_expression(); parse_expression(); parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_setsprite")); } static void parse_putsprite() { parse_expression(); parse_expression(); parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_putsprite")); } static void parse_locate() { parse_expression(); parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_locate")); } static void parse_print() { parse_concatenation(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_print")); } static void parse_putchar() { parse_expression(); parse_expression(); parse_expression(); parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_putchar")); } static void parse_setchar() { parse_expression(); parse_concatenation(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_setchar")); } static void parse_keypressed() { parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_input_keypressed")); } static void parse_anykey() { emmit(OP_PUSH); emmit(0); emmit(OP_JSR); emmit_w(get_label_address("_sys_input_keypressed")); } static void parse_updatescr() { emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_flip")); } static void parse_color() { parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_color")); } static void parse_border() { parse_expression(); emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_border")); } static void parse_clrscr() { emmit(OP_JSR); emmit_w(get_label_address("_sys_vdp_clrscr")); } static void parse_wait() { parse_expression(); emmit(OP_SLEEP); } static void parse_statements() { while (!parser_finished) { current_line = tkn_get_line(); switch (tkn_get_token()) { case TOKEN_CONST: tkn_next(); parse_const(); break; case TOKEN_DIM: tkn_next(); parse_dim(); break; case TOKEN_LET: tkn_next(); parse_let(); break; case TOKEN_INC: tkn_next(); parse_inc(); break; case TOKEN_DEC: tkn_next(); parse_dec(); break; case TOKEN_IF: tkn_next(); parse_if(); break; case TOKEN_FOR: tkn_next(); parse_for(); break; case TOKEN_BREAK: tkn_next(); parse_break(); break; case TOKEN_CONTINUE: tkn_next(); parse_continue(); break; case TOKEN_GOTO: tkn_next(); parse_goto(); break; case TOKEN_GOSUB: tkn_next(); parse_gosub(); break; case TOKEN_RETURN: tkn_next(); parse_return(); break; case TOKEN_LABEL: parse_label(); break; case TOKEN_REM: tkn_next(); break; //case TOKEN_STRLEFT: <--- Parsed in parse_concat_atom // tkn_next(); parse_strleft(); break; //case TOKEN_STRLEN: <--- Parsed in parse_expr_atom // tkn_next(); parse_strlen(); break; case TOKEN_DEBUG: tkn_next(); parse_debug(); break; //case TOKEN_CHR: // tkn_next(); parse_chr(); break; <--- Parsed in parse_concat_atom //case TOKEN_STR: // tkn_next(); parse_str(); break; <--- Parsed in parse_concat_atom case TOKEN_SETSPRITE: tkn_next(); parse_setsprite(); break; case TOKEN_PUTSPRITE: tkn_next(); parse_putsprite(); break; case TOKEN_LOCATE: tkn_next(); parse_locate(); break; case TOKEN_PRINT: tkn_next(); parse_print(); break; case TOKEN_PUTCHAR: tkn_next(); parse_putchar(); break; case TOKEN_SETCHAR: tkn_next(); parse_setchar(); break; //case TOKEN_KEYPRESSED: <--- Parsed in parse_expr_atom // tkn_next(); parse_keypressed(); break; //case TOKEN_ANYKEY: <--- Parsed in parse_expr_atom // tkn_next(); parse_anykey(); break; case TOKEN_UPDATESCR: tkn_next(); parse_updatescr(); break; case TOKEN_COLOR: tkn_next(); parse_color(); break; case TOKEN_BORDER: tkn_next(); parse_border(); break; case TOKEN_CLRSCR: tkn_next(); parse_clrscr(); break; case TOKEN_WAIT: tkn_next(); parse_wait(); break; case TOKEN_IDENTIFIER: for (int i = 0; i < num_external_functions; i++) { if (strcmp(external_functions[i].name, tkn_get_string()) == 0) { parse_call(); break; } } parse_let(); break; default: return; } } } static void include_labels() { FILE *f = fopen("rom.lbl", "rb"); byte num_labels; fread(&num_labels, 1, 1, f); for (int i = 0; i < num_labels; i++) { word address; fread(&address, 2, 1, f); byte len; fread(&len, 1, 1, f); char label[50]; fread(label, len, 1, f); label[len] = 0; strcpy(known_labels[num_known_labels].name, label); known_labels[num_known_labels++].address = address; } fclose(f); } /****************************************************************************************/ /* INTERFACE */ /****************************************************************************************/ void parser_parse(const char* buffer, byte* mem) { tkn_init(buffer); parser_finished = false; include_labels(); code = mem; codepos = 2; tkn_next(); parse_statements(); append_strings(); FILE *f = fopen("test.bin", "wb"); fwrite(mem, codepos, 1, f); fclose(f); //return code; } const int parser_get_codesize() { return codepos; } const int parser_get_memory_usage() { return num_variables; } unsigned short* parser_get_lines() { return lines; } void parser_register_external_function(const char* name, const int num_parameters) { strcpy(external_functions[num_external_functions].name, name); external_functions[num_external_functions++].num_parameters = num_parameters; }