From 099bc7d9bf0ed227024388229bbade7e69412721 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sat, 19 Sep 2020 14:34:43 +1000 Subject: [PATCH] Add file structure tests --- corpus/source_file.txt | 30 ++++ grammar.js | 1 + src/grammar.json | 4 + src/node-types.json | 4 + src/parser.c | 394 +++++++++++++++++++---------------------- 5 files changed, 217 insertions(+), 216 deletions(-) diff --git a/corpus/source_file.txt b/corpus/source_file.txt index f220b70..6a3a5fe 100644 --- a/corpus/source_file.txt +++ b/corpus/source_file.txt @@ -1,3 +1,33 @@ +========== +Empty File +========== + +--- + +(source_file) + +================== +End with semicolon +================== + +a = b; + +--- + +(source_file + (assignment_item (identifier) (identifier))) + +===================== +End without semicolon +===================== + +a = b + +--- + +(source_file + (assignment_item (identifier) (identifier))) + ============== Block comments ============== diff --git a/grammar.js b/grammar.js index 77a0728..ff3d6ed 100644 --- a/grammar.js +++ b/grammar.js @@ -19,6 +19,7 @@ module.exports = grammar({ _expression: $ => choice( $._literal, + $.identifier, // TODO: Other expression types ), diff --git a/src/grammar.json b/src/grammar.json index 2d52e91..4f78bd8 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -91,6 +91,10 @@ { "type": "SYMBOL", "name": "_literal" + }, + { + "type": "SYMBOL", + "name": "identifier" } ] }, diff --git a/src/node-types.json b/src/node-types.json index e3d2430..70e2eec 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -10,6 +10,10 @@ { "type": "boolean_literal", "named": true + }, + { + "type": "identifier", + "named": true } ] }, diff --git a/src/parser.c b/src/parser.c index 94df08e..1106ceb 100644 --- a/src/parser.c +++ b/src/parser.c @@ -7,7 +7,7 @@ #define LANGUAGE_VERSION 11 #define STATE_COUNT 15 -#define LARGE_STATE_COUNT 2 +#define LARGE_STATE_COUNT 3 #define SYMBOL_COUNT 15 #define ALIAS_COUNT 0 #define TOKEN_COUNT 8 @@ -161,173 +161,136 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { eof = lexer->eof(lexer); switch (state) { case 0: - if (eof) ADVANCE(13); - if (lookahead == '%') ADVANCE(28); - if (lookahead == '/') ADVANCE(2); - if (lookahead == ';') ADVANCE(14); - if (lookahead == '=') ADVANCE(15); - if (lookahead == 'f') ADVANCE(20); - if (lookahead == 't') ADVANCE(24); + if (eof) ADVANCE(5); + if (lookahead == '%') ADVANCE(18); + if (lookahead == '/') ADVANCE(1); + if (lookahead == ';') ADVANCE(6); + if (lookahead == '=') ADVANCE(7); + if (lookahead == 'f') ADVANCE(10); + if (lookahead == 't') ADVANCE(14); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(0) if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); END_STATE(); case 1: - if (lookahead == '%') ADVANCE(28); - if (lookahead == '/') ADVANCE(2); - if (lookahead == 'f') ADVANCE(5); - if (lookahead == 't') ADVANCE(9); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') SKIP(1) + if (lookahead == '*') ADVANCE(3); END_STATE(); case 2: - if (lookahead == '*') ADVANCE(4); + if (lookahead == '*') ADVANCE(2); + if (lookahead == '/') ADVANCE(19); + if (lookahead != 0) ADVANCE(3); END_STATE(); case 3: - if (lookahead == '*') ADVANCE(3); - if (lookahead == '/') ADVANCE(29); - if (lookahead != 0) ADVANCE(4); + if (lookahead == '*') ADVANCE(2); + if (lookahead != 0) ADVANCE(3); END_STATE(); case 4: - if (lookahead == '*') ADVANCE(3); - if (lookahead != 0) ADVANCE(4); - END_STATE(); - case 5: - if (lookahead == 'a') ADVANCE(8); - END_STATE(); - case 6: - if (lookahead == 'e') ADVANCE(16); - END_STATE(); - case 7: - if (lookahead == 'e') ADVANCE(18); - END_STATE(); - case 8: - if (lookahead == 'l') ADVANCE(10); - END_STATE(); - case 9: - if (lookahead == 'r') ADVANCE(11); - END_STATE(); - case 10: - if (lookahead == 's') ADVANCE(7); - END_STATE(); - case 11: - if (lookahead == 'u') ADVANCE(6); - END_STATE(); - case 12: - if (eof) ADVANCE(13); - if (lookahead == '%') ADVANCE(28); - if (lookahead == '/') ADVANCE(2); - if (lookahead == ';') ADVANCE(14); + if (eof) ADVANCE(5); + if (lookahead == '%') ADVANCE(18); + if (lookahead == '/') ADVANCE(1); + if (lookahead == ';') ADVANCE(6); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || - lookahead == ' ') SKIP(12) + lookahead == ' ') SKIP(4) if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); END_STATE(); - case 13: + case 5: ACCEPT_TOKEN(ts_builtin_sym_end); END_STATE(); - case 14: + case 6: ACCEPT_TOKEN(anon_sym_SEMI); END_STATE(); - case 15: + case 7: ACCEPT_TOKEN(anon_sym_EQ); END_STATE(); - case 16: + case 8: ACCEPT_TOKEN(anon_sym_true); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 9: + ACCEPT_TOKEN(anon_sym_false); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 10: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'a') ADVANCE(13); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('b' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 11: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'e') ADVANCE(8); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 12: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'e') ADVANCE(9); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 13: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'l') ADVANCE(15); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 14: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'r') ADVANCE(16); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 15: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 's') ADVANCE(12); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + END_STATE(); + case 16: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'u') ADVANCE(11); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); END_STATE(); case 17: - ACCEPT_TOKEN(anon_sym_true); + ACCEPT_TOKEN(sym_identifier); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); END_STATE(); case 18: - ACCEPT_TOKEN(anon_sym_false); - END_STATE(); - case 19: - ACCEPT_TOKEN(anon_sym_false); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 20: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'a') ADVANCE(23); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('b' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 21: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'e') ADVANCE(17); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 22: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'e') ADVANCE(19); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 23: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'l') ADVANCE(25); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 24: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'r') ADVANCE(26); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 25: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 's') ADVANCE(22); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 26: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'u') ADVANCE(21); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 27: - ACCEPT_TOKEN(sym_identifier); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); - END_STATE(); - case 28: ACCEPT_TOKEN(sym_line_comment); if (lookahead != 0 && - lookahead != '\n') ADVANCE(28); + lookahead != '\n') ADVANCE(18); END_STATE(); - case 29: + case 19: ACCEPT_TOKEN(sym_block_comment); END_STATE(); default: @@ -337,14 +300,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0}, - [1] = {.lex_state = 12}, - [2] = {.lex_state = 1}, - [3] = {.lex_state = 12}, - [4] = {.lex_state = 12}, + [1] = {.lex_state = 4}, + [2] = {.lex_state = 0}, + [3] = {.lex_state = 4}, + [4] = {.lex_state = 4}, [5] = {.lex_state = 0}, [6] = {.lex_state = 0}, [7] = {.lex_state = 0}, - [8] = {.lex_state = 12}, + [8] = {.lex_state = 4}, [9] = {.lex_state = 0}, [10] = {.lex_state = 0}, [11] = {.lex_state = 0}, @@ -374,32 +337,20 @@ static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [sym_line_comment] = ACTIONS(3), [sym_block_comment] = ACTIONS(3), }, + [2] = { + [sym__expression] = STATE(10), + [sym__literal] = STATE(10), + [sym_boolean_literal] = STATE(10), + [anon_sym_true] = ACTIONS(11), + [anon_sym_false] = ACTIONS(11), + [sym_identifier] = ACTIONS(13), + [sym_line_comment] = ACTIONS(3), + [sym_block_comment] = ACTIONS(3), + }, }; static uint16_t ts_small_parse_table[] = { - [0] = 3, - ACTIONS(3), 2, - sym_line_comment, - sym_block_comment, - ACTIONS(11), 2, - anon_sym_true, - anon_sym_false, - STATE(10), 3, - sym__expression, - sym__literal, - sym_boolean_literal, - [14] = 4, - ACTIONS(9), 1, - sym_identifier, - ACTIONS(13), 1, - ts_builtin_sym_end, - ACTIONS(3), 2, - sym_line_comment, - sym_block_comment, - STATE(11), 2, - sym__items, - sym_assignment_item, - [29] = 4, + [0] = 4, ACTIONS(9), 1, sym_identifier, ACTIONS(15), 1, @@ -410,37 +361,48 @@ static uint16_t ts_small_parse_table[] = { STATE(11), 2, sym__items, sym_assignment_item, - [44] = 4, + [15] = 4, + ACTIONS(9), 1, + sym_identifier, ACTIONS(17), 1, ts_builtin_sym_end, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + STATE(11), 2, + sym__items, + sym_assignment_item, + [30] = 4, ACTIONS(19), 1, + ts_builtin_sym_end, + ACTIONS(21), 1, anon_sym_SEMI, STATE(6), 1, aux_sym_source_file_repeat1, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [58] = 4, - ACTIONS(13), 1, + [44] = 4, + ACTIONS(15), 1, ts_builtin_sym_end, - ACTIONS(21), 1, - anon_sym_SEMI, - STATE(7), 1, - aux_sym_source_file_repeat1, - ACTIONS(3), 2, - sym_line_comment, - sym_block_comment, - [72] = 4, ACTIONS(23), 1, - ts_builtin_sym_end, - ACTIONS(25), 1, anon_sym_SEMI, STATE(7), 1, aux_sym_source_file_repeat1, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [86] = 3, + [58] = 4, + ACTIONS(25), 1, + ts_builtin_sym_end, + ACTIONS(27), 1, + anon_sym_SEMI, + STATE(7), 1, + aux_sym_source_file_repeat1, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [72] = 3, ACTIONS(9), 1, sym_identifier, ACTIONS(3), 2, @@ -449,41 +411,41 @@ static uint16_t ts_small_parse_table[] = { STATE(11), 2, sym__items, sym_assignment_item, - [98] = 2, - ACTIONS(3), 2, - sym_line_comment, - sym_block_comment, - ACTIONS(28), 2, - ts_builtin_sym_end, - anon_sym_SEMI, - [107] = 2, + [84] = 2, ACTIONS(3), 2, sym_line_comment, sym_block_comment, ACTIONS(30), 2, ts_builtin_sym_end, anon_sym_SEMI, - [116] = 2, + [93] = 2, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - ACTIONS(23), 2, + ACTIONS(32), 2, ts_builtin_sym_end, anon_sym_SEMI, - [125] = 2, - ACTIONS(17), 1, + [102] = 2, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(25), 2, + ts_builtin_sym_end, + anon_sym_SEMI, + [111] = 2, + ACTIONS(19), 1, ts_builtin_sym_end, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [133] = 2, - ACTIONS(32), 1, + [119] = 2, + ACTIONS(34), 1, anon_sym_EQ, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [141] = 2, - ACTIONS(34), 1, + [127] = 2, + ACTIONS(36), 1, ts_builtin_sym_end, ACTIONS(3), 2, sym_line_comment, @@ -491,19 +453,18 @@ static uint16_t ts_small_parse_table[] = { }; static uint32_t ts_small_parse_table_map[] = { - [SMALL_STATE(2)] = 0, - [SMALL_STATE(3)] = 14, - [SMALL_STATE(4)] = 29, - [SMALL_STATE(5)] = 44, - [SMALL_STATE(6)] = 58, - [SMALL_STATE(7)] = 72, - [SMALL_STATE(8)] = 86, - [SMALL_STATE(9)] = 98, - [SMALL_STATE(10)] = 107, - [SMALL_STATE(11)] = 116, - [SMALL_STATE(12)] = 125, - [SMALL_STATE(13)] = 133, - [SMALL_STATE(14)] = 141, + [SMALL_STATE(3)] = 0, + [SMALL_STATE(4)] = 15, + [SMALL_STATE(5)] = 30, + [SMALL_STATE(6)] = 44, + [SMALL_STATE(7)] = 58, + [SMALL_STATE(8)] = 72, + [SMALL_STATE(9)] = 84, + [SMALL_STATE(10)] = 93, + [SMALL_STATE(11)] = 102, + [SMALL_STATE(12)] = 111, + [SMALL_STATE(13)] = 119, + [SMALL_STATE(14)] = 127, }; static TSParseActionEntry ts_parse_actions[] = { @@ -513,18 +474,19 @@ static TSParseActionEntry ts_parse_actions[] = { [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), - [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), - [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), - [15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), - [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), - [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), - [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), - [23] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), - [25] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8), - [28] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), - [30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), - [32] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [34] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(9), + [13] = {.entry = {.count = 1, .reusable = false}}, SHIFT(10), + [15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), + [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), + [19] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), + [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [23] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [25] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), + [27] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8), + [30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), + [32] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), + [34] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [36] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), }; #ifdef __cplusplus