diff --git a/corpus/source_file.txt b/corpus/source_file.txt new file mode 100644 index 0000000..f220b70 --- /dev/null +++ b/corpus/source_file.txt @@ -0,0 +1,26 @@ +============== +Block comments +============== + +/* + * Block comments + */ + +/* Comment with asterisks **/ + +---- + +(source_file + (block_comment) + (block_comment)) + +============= +Line comments +============= + +% Comment + +---- + +(source_file + (line_comment)) diff --git a/grammar.js b/grammar.js index 597af1b..77a0728 100644 --- a/grammar.js +++ b/grammar.js @@ -1,6 +1,8 @@ module.exports = grammar({ name: 'minizinc', + extras: $ => [/\s/, $.line_comment, $.block_comment], + rules: { source_file: $ => seq(sepBy(';', $._items), optional(';')), @@ -34,6 +36,9 @@ module.exports = grammar({ identifier: $ => /[A-Za-z][A-Za-z0-9_]*/, + line_comment: $ => token(seq('%', /.*/)), + block_comment: $ => token(seq('/*', /[^*]*\*+([^/*][^*]*\*+)*/, '/')), + } }); diff --git a/src/grammar.json b/src/grammar.json index bd2d3f1..2d52e91 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -119,12 +119,56 @@ "identifier": { "type": "PATTERN", "value": "[A-Za-z][A-Za-z0-9_]*" + }, + "line_comment": { + "type": "TOKEN", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "%" + }, + { + "type": "PATTERN", + "value": ".*" + } + ] + } + }, + "block_comment": { + "type": "TOKEN", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "/*" + }, + { + "type": "PATTERN", + "value": "[^*]*\\*+([^/*][^*]*\\*+)*" + }, + { + "type": "STRING", + "value": "/" + } + ] + } } }, "extras": [ { "type": "PATTERN", "value": "\\s" + }, + { + "type": "SYMBOL", + "name": "line_comment" + }, + { + "type": "SYMBOL", + "name": "block_comment" } ], "conflicts": [], diff --git a/src/parser.c b/src/parser.c index c05098d..94df08e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -8,9 +8,9 @@ #define LANGUAGE_VERSION 11 #define STATE_COUNT 15 #define LARGE_STATE_COUNT 2 -#define SYMBOL_COUNT 13 +#define SYMBOL_COUNT 15 #define ALIAS_COUNT 0 -#define TOKEN_COUNT 6 +#define TOKEN_COUNT 8 #define EXTERNAL_TOKEN_COUNT 0 #define FIELD_COUNT 2 #define MAX_ALIAS_SEQUENCE_LENGTH 3 @@ -21,13 +21,15 @@ enum { anon_sym_true = 3, anon_sym_false = 4, sym_identifier = 5, - sym_source_file = 6, - sym__items = 7, - sym_assignment_item = 8, - sym__expression = 9, - sym__literal = 10, - sym_boolean_literal = 11, - aux_sym_source_file_repeat1 = 12, + sym_line_comment = 6, + sym_block_comment = 7, + sym_source_file = 8, + sym__items = 9, + sym_assignment_item = 10, + sym__expression = 11, + sym__literal = 12, + sym_boolean_literal = 13, + aux_sym_source_file_repeat1 = 14, }; static const char *ts_symbol_names[] = { @@ -37,6 +39,8 @@ static const char *ts_symbol_names[] = { [anon_sym_true] = "true", [anon_sym_false] = "false", [sym_identifier] = "identifier", + [sym_line_comment] = "line_comment", + [sym_block_comment] = "block_comment", [sym_source_file] = "source_file", [sym__items] = "_items", [sym_assignment_item] = "assignment_item", @@ -53,6 +57,8 @@ static TSSymbol ts_symbol_map[] = { [anon_sym_true] = anon_sym_true, [anon_sym_false] = anon_sym_false, [sym_identifier] = sym_identifier, + [sym_line_comment] = sym_line_comment, + [sym_block_comment] = sym_block_comment, [sym_source_file] = sym_source_file, [sym__items] = sym__items, [sym_assignment_item] = sym_assignment_item, @@ -87,6 +93,14 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = true, }, + [sym_line_comment] = { + .visible = true, + .named = true, + }, + [sym_block_comment] = { + .visible = true, + .named = true, + }, [sym_source_file] = { .visible = true, .named = true, @@ -147,148 +161,174 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { eof = lexer->eof(lexer); switch (state) { case 0: - if (eof) ADVANCE(10); - if (lookahead == ';') ADVANCE(11); - if (lookahead == '=') ADVANCE(12); - if (lookahead == 'f') ADVANCE(17); - if (lookahead == 't') ADVANCE(21); + if (eof) ADVANCE(13); + if (lookahead == '%') ADVANCE(28); + if (lookahead == '/') ADVANCE(2); + if (lookahead == ';') ADVANCE(14); + if (lookahead == '=') ADVANCE(15); + if (lookahead == 'f') ADVANCE(20); + if (lookahead == 't') ADVANCE(24); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(0) if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 1: - if (lookahead == 'a') ADVANCE(5); + if (lookahead == '%') ADVANCE(28); + if (lookahead == '/') ADVANCE(2); + if (lookahead == 'f') ADVANCE(5); + if (lookahead == 't') ADVANCE(9); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(1) END_STATE(); case 2: - if (lookahead == 'e') ADVANCE(13); + if (lookahead == '*') ADVANCE(4); END_STATE(); case 3: - if (lookahead == 'e') ADVANCE(15); + if (lookahead == '*') ADVANCE(3); + if (lookahead == '/') ADVANCE(29); + if (lookahead != 0) ADVANCE(4); END_STATE(); case 4: - if (lookahead == 'f') ADVANCE(1); - if (lookahead == 't') ADVANCE(6); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') SKIP(4) + if (lookahead == '*') ADVANCE(3); + if (lookahead != 0) ADVANCE(4); END_STATE(); case 5: - if (lookahead == 'l') ADVANCE(7); + if (lookahead == 'a') ADVANCE(8); END_STATE(); case 6: - if (lookahead == 'r') ADVANCE(8); + if (lookahead == 'e') ADVANCE(16); END_STATE(); case 7: - if (lookahead == 's') ADVANCE(3); + if (lookahead == 'e') ADVANCE(18); END_STATE(); case 8: - if (lookahead == 'u') ADVANCE(2); + if (lookahead == 'l') ADVANCE(10); END_STATE(); case 9: - if (eof) ADVANCE(10); - if (lookahead == ';') ADVANCE(11); + if (lookahead == 'r') ADVANCE(11); + END_STATE(); + case 10: + if (lookahead == 's') ADVANCE(7); + END_STATE(); + case 11: + if (lookahead == 'u') ADVANCE(6); + END_STATE(); + case 12: + if (eof) ADVANCE(13); + if (lookahead == '%') ADVANCE(28); + if (lookahead == '/') ADVANCE(2); + if (lookahead == ';') ADVANCE(14); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || - lookahead == ' ') SKIP(9) + lookahead == ' ') SKIP(12) if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); - END_STATE(); - case 10: - ACCEPT_TOKEN(ts_builtin_sym_end); - END_STATE(); - case 11: - ACCEPT_TOKEN(anon_sym_SEMI); - END_STATE(); - case 12: - ACCEPT_TOKEN(anon_sym_EQ); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 13: - ACCEPT_TOKEN(anon_sym_true); + ACCEPT_TOKEN(ts_builtin_sym_end); END_STATE(); case 14: + ACCEPT_TOKEN(anon_sym_SEMI); + END_STATE(); + case 15: + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 16: + ACCEPT_TOKEN(anon_sym_true); + END_STATE(); + case 17: ACCEPT_TOKEN(anon_sym_true); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); - END_STATE(); - case 15: - ACCEPT_TOKEN(anon_sym_false); - END_STATE(); - case 16: - ACCEPT_TOKEN(anon_sym_false); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); - END_STATE(); - case 17: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'a') ADVANCE(20); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('b' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 18: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'e') ADVANCE(14); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ACCEPT_TOKEN(anon_sym_false); END_STATE(); case 19: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'e') ADVANCE(16); + ACCEPT_TOKEN(anon_sym_false); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 20: ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'l') ADVANCE(22); + if (lookahead == 'a') ADVANCE(23); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('b' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 21: ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'r') ADVANCE(23); + if (lookahead == 'e') ADVANCE(17); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 22: ACCEPT_TOKEN(sym_identifier); - if (lookahead == 's') ADVANCE(19); + if (lookahead == 'e') ADVANCE(19); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 23: ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'u') ADVANCE(18); + if (lookahead == 'l') ADVANCE(25); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); END_STATE(); case 24: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'r') ADVANCE(26); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + END_STATE(); + case 25: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 's') ADVANCE(22); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + END_STATE(); + case 26: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'u') ADVANCE(21); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + END_STATE(); + case 27: ACCEPT_TOKEN(sym_identifier); if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(27); + END_STATE(); + case 28: + ACCEPT_TOKEN(sym_line_comment); + if (lookahead != 0 && + lookahead != '\n') ADVANCE(28); + END_STATE(); + case 29: + ACCEPT_TOKEN(sym_block_comment); END_STATE(); default: return false; @@ -297,14 +337,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0}, - [1] = {.lex_state = 9}, - [2] = {.lex_state = 4}, - [3] = {.lex_state = 9}, - [4] = {.lex_state = 9}, + [1] = {.lex_state = 12}, + [2] = {.lex_state = 1}, + [3] = {.lex_state = 12}, + [4] = {.lex_state = 12}, [5] = {.lex_state = 0}, [6] = {.lex_state = 0}, [7] = {.lex_state = 0}, - [8] = {.lex_state = 9}, + [8] = {.lex_state = 12}, [9] = {.lex_state = 0}, [10] = {.lex_state = 0}, [11] = {.lex_state = 0}, @@ -321,126 +361,170 @@ static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [anon_sym_true] = ACTIONS(1), [anon_sym_false] = ACTIONS(1), [sym_identifier] = ACTIONS(1), + [sym_line_comment] = ACTIONS(3), + [sym_block_comment] = ACTIONS(3), }, [1] = { [sym_source_file] = STATE(14), [sym__items] = STATE(5), [sym_assignment_item] = STATE(5), - [ts_builtin_sym_end] = ACTIONS(3), - [anon_sym_SEMI] = ACTIONS(5), - [sym_identifier] = ACTIONS(7), + [ts_builtin_sym_end] = ACTIONS(5), + [anon_sym_SEMI] = ACTIONS(7), + [sym_identifier] = ACTIONS(9), + [sym_line_comment] = ACTIONS(3), + [sym_block_comment] = ACTIONS(3), }, }; static uint16_t ts_small_parse_table[] = { - [0] = 2, - ACTIONS(9), 2, + [0] = 3, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(11), 2, anon_sym_true, anon_sym_false, STATE(10), 3, sym__expression, sym__literal, sym_boolean_literal, - [10] = 3, - ACTIONS(7), 1, - sym_identifier, - ACTIONS(11), 1, - ts_builtin_sym_end, - STATE(11), 2, - sym__items, - sym_assignment_item, - [21] = 3, - ACTIONS(7), 1, + [14] = 4, + ACTIONS(9), 1, sym_identifier, ACTIONS(13), 1, ts_builtin_sym_end, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, STATE(11), 2, sym__items, sym_assignment_item, - [32] = 3, + [29] = 4, + ACTIONS(9), 1, + sym_identifier, ACTIONS(15), 1, ts_builtin_sym_end, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + STATE(11), 2, + sym__items, + sym_assignment_item, + [44] = 4, ACTIONS(17), 1, - anon_sym_SEMI, - STATE(6), 1, - aux_sym_source_file_repeat1, - [42] = 3, - ACTIONS(11), 1, ts_builtin_sym_end, ACTIONS(19), 1, anon_sym_SEMI, - STATE(7), 1, + STATE(6), 1, aux_sym_source_file_repeat1, - [52] = 3, - ACTIONS(21), 1, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [58] = 4, + ACTIONS(13), 1, ts_builtin_sym_end, - ACTIONS(23), 1, + ACTIONS(21), 1, anon_sym_SEMI, STATE(7), 1, aux_sym_source_file_repeat1, - [62] = 2, - ACTIONS(7), 1, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [72] = 4, + ACTIONS(23), 1, + ts_builtin_sym_end, + ACTIONS(25), 1, + anon_sym_SEMI, + STATE(7), 1, + aux_sym_source_file_repeat1, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [86] = 3, + ACTIONS(9), 1, sym_identifier, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, STATE(11), 2, sym__items, sym_assignment_item, - [70] = 1, - ACTIONS(26), 2, - ts_builtin_sym_end, - anon_sym_SEMI, - [75] = 1, + [98] = 2, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, ACTIONS(28), 2, ts_builtin_sym_end, anon_sym_SEMI, - [80] = 1, - ACTIONS(21), 2, + [107] = 2, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(30), 2, ts_builtin_sym_end, anon_sym_SEMI, - [85] = 1, - ACTIONS(15), 1, + [116] = 2, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(23), 2, ts_builtin_sym_end, - [89] = 1, - ACTIONS(30), 1, - anon_sym_EQ, - [93] = 1, + anon_sym_SEMI, + [125] = 2, + ACTIONS(17), 1, + ts_builtin_sym_end, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [133] = 2, ACTIONS(32), 1, + anon_sym_EQ, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [141] = 2, + ACTIONS(34), 1, ts_builtin_sym_end, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, }; static uint32_t ts_small_parse_table_map[] = { [SMALL_STATE(2)] = 0, - [SMALL_STATE(3)] = 10, - [SMALL_STATE(4)] = 21, - [SMALL_STATE(5)] = 32, - [SMALL_STATE(6)] = 42, - [SMALL_STATE(7)] = 52, - [SMALL_STATE(8)] = 62, - [SMALL_STATE(9)] = 70, - [SMALL_STATE(10)] = 75, - [SMALL_STATE(11)] = 80, - [SMALL_STATE(12)] = 85, - [SMALL_STATE(13)] = 89, - [SMALL_STATE(14)] = 93, + [SMALL_STATE(3)] = 14, + [SMALL_STATE(4)] = 29, + [SMALL_STATE(5)] = 44, + [SMALL_STATE(6)] = 58, + [SMALL_STATE(7)] = 72, + [SMALL_STATE(8)] = 86, + [SMALL_STATE(9)] = 98, + [SMALL_STATE(10)] = 107, + [SMALL_STATE(11)] = 116, + [SMALL_STATE(12)] = 125, + [SMALL_STATE(13)] = 133, + [SMALL_STATE(14)] = 141, }; static TSParseActionEntry ts_parse_actions[] = { [0] = {.entry = {.count = 0, .reusable = false}}, [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), - [3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), - [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), - [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), - [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), - [11] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), - [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), - [15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), - [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), - [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), - [21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), - [23] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8), - [26] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), - [28] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), - [30] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [32] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(), + [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), + [15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), + [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), + [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [23] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), + [25] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8), + [28] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), + [30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), + [32] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [34] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), }; #ifdef __cplusplus