From da4a36d60e872a1221d0c7ef77281f797f7e3fa2 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sat, 19 Sep 2020 14:05:43 +1000 Subject: [PATCH] Add initial structure for items/expressions --- corpus/expressions.txt | 12 ++ grammar.js | 41 ++++- src/grammar.json | 119 ++++++++++++- src/node-types.json | 61 ++++++- src/parser.c | 377 +++++++++++++++++++++++++++++++++++++---- 5 files changed, 569 insertions(+), 41 deletions(-) create mode 100644 corpus/expressions.txt diff --git a/corpus/expressions.txt b/corpus/expressions.txt new file mode 100644 index 0000000..3004b65 --- /dev/null +++ b/corpus/expressions.txt @@ -0,0 +1,12 @@ +================ +Boolean Literals +================ + +boolean_id = false; +boolean_id = true; + +--- + +(source_file + (assignment_item (identifier) (boolean_literal)) + (assignment_item (identifier) (boolean_literal))) diff --git a/grammar.js b/grammar.js index a73cbeb..597af1b 100644 --- a/grammar.js +++ b/grammar.js @@ -2,6 +2,45 @@ module.exports = grammar({ name: 'minizinc', rules: { - source_file: $ => 'constraint', + source_file: $ => seq(sepBy(';', $._items), optional(';')), + + _items: $ => choice( + $.assignment_item, + // TODO: Other statements types + ), + + assignment_item: $ => seq( + field('name', $.identifier), + '=', + field('expr', $._expression) + ), + + _expression: $ => choice( + $._literal, + // TODO: Other expression types + ), + + + _literal: $ => choice( + // TODO: absent, + $.boolean_literal, + // TODO: float_literal, + // TODO: integer_literal, + // TODO: string_literal, + ), + + boolean_literal: $ => choice('true', 'false'), + + + identifier: $ => /[A-Za-z][A-Za-z0-9_]*/, + } }); + +function sepBy1(sep, rule) { + return seq(rule, repeat(seq(sep, rule))) +} + +function sepBy(sep, rule) { + return optional(sepBy1(sep, rule)) +} diff --git a/src/grammar.json b/src/grammar.json index 638c308..bd2d3f1 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -2,8 +2,123 @@ "name": "minizinc", "rules": { "source_file": { - "type": "STRING", - "value": "constraint" + "type": "SEQ", + "members": [ + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "_items" + }, + { + "type": "REPEAT", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": ";" + }, + { + "type": "SYMBOL", + "name": "_items" + } + ] + } + } + ] + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": ";" + }, + { + "type": "BLANK" + } + ] + } + ] + }, + "_items": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "assignment_item" + } + ] + }, + "assignment_item": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "name", + "content": { + "type": "SYMBOL", + "name": "identifier" + } + }, + { + "type": "STRING", + "value": "=" + }, + { + "type": "FIELD", + "name": "expr", + "content": { + "type": "SYMBOL", + "name": "_expression" + } + } + ] + }, + "_expression": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "_literal" + } + ] + }, + "_literal": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "boolean_literal" + } + ] + }, + "boolean_literal": { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "true" + }, + { + "type": "STRING", + "value": "false" + } + ] + }, + "identifier": { + "type": "PATTERN", + "value": "[A-Za-z][A-Za-z0-9_]*" } }, "extras": [ diff --git a/src/node-types.json b/src/node-types.json index 82e92eb..e3d2430 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -1,11 +1,68 @@ [ { - "type": "source_file", + "type": "assignment_item", + "named": true, + "fields": { + "expr": { + "multiple": false, + "required": true, + "types": [ + { + "type": "boolean_literal", + "named": true + } + ] + }, + "name": { + "multiple": false, + "required": true, + "types": [ + { + "type": "identifier", + "named": true + } + ] + } + } + }, + { + "type": "boolean_literal", "named": true, "fields": {} }, { - "type": "constraint", + "type": "source_file", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [ + { + "type": "assignment_item", + "named": true + } + ] + } + }, + { + "type": ";", + "named": false + }, + { + "type": "=", + "named": false + }, + { + "type": "false", + "named": false + }, + { + "type": "identifier", + "named": true + }, + { + "type": "true", "named": false } ] \ No newline at end of file diff --git a/src/parser.c b/src/parser.c index 26d7153..c05098d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,30 +6,60 @@ #endif #define LANGUAGE_VERSION 11 -#define STATE_COUNT 4 +#define STATE_COUNT 15 #define LARGE_STATE_COUNT 2 -#define SYMBOL_COUNT 3 +#define SYMBOL_COUNT 13 #define ALIAS_COUNT 0 -#define TOKEN_COUNT 2 +#define TOKEN_COUNT 6 #define EXTERNAL_TOKEN_COUNT 0 -#define FIELD_COUNT 0 -#define MAX_ALIAS_SEQUENCE_LENGTH 1 +#define FIELD_COUNT 2 +#define MAX_ALIAS_SEQUENCE_LENGTH 3 enum { - anon_sym_constraint = 1, - sym_source_file = 2, + anon_sym_SEMI = 1, + anon_sym_EQ = 2, + anon_sym_true = 3, + anon_sym_false = 4, + sym_identifier = 5, + sym_source_file = 6, + sym__items = 7, + sym_assignment_item = 8, + sym__expression = 9, + sym__literal = 10, + sym_boolean_literal = 11, + aux_sym_source_file_repeat1 = 12, }; static const char *ts_symbol_names[] = { [ts_builtin_sym_end] = "end", - [anon_sym_constraint] = "constraint", + [anon_sym_SEMI] = ";", + [anon_sym_EQ] = "=", + [anon_sym_true] = "true", + [anon_sym_false] = "false", + [sym_identifier] = "identifier", [sym_source_file] = "source_file", + [sym__items] = "_items", + [sym_assignment_item] = "assignment_item", + [sym__expression] = "_expression", + [sym__literal] = "_literal", + [sym_boolean_literal] = "boolean_literal", + [aux_sym_source_file_repeat1] = "source_file_repeat1", }; static TSSymbol ts_symbol_map[] = { [ts_builtin_sym_end] = ts_builtin_sym_end, - [anon_sym_constraint] = anon_sym_constraint, + [anon_sym_SEMI] = anon_sym_SEMI, + [anon_sym_EQ] = anon_sym_EQ, + [anon_sym_true] = anon_sym_true, + [anon_sym_false] = anon_sym_false, + [sym_identifier] = sym_identifier, [sym_source_file] = sym_source_file, + [sym__items] = sym__items, + [sym_assignment_item] = sym_assignment_item, + [sym__expression] = sym__expression, + [sym__literal] = sym__literal, + [sym_boolean_literal] = sym_boolean_literal, + [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1, }; static const TSSymbolMetadata ts_symbol_metadata[] = { @@ -37,17 +67,78 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = false, .named = true, }, - [anon_sym_constraint] = { + [anon_sym_SEMI] = { .visible = true, .named = false, }, + [anon_sym_EQ] = { + .visible = true, + .named = false, + }, + [anon_sym_true] = { + .visible = true, + .named = false, + }, + [anon_sym_false] = { + .visible = true, + .named = false, + }, + [sym_identifier] = { + .visible = true, + .named = true, + }, [sym_source_file] = { .visible = true, .named = true, }, + [sym__items] = { + .visible = false, + .named = true, + }, + [sym_assignment_item] = { + .visible = true, + .named = true, + }, + [sym__expression] = { + .visible = false, + .named = true, + }, + [sym__literal] = { + .visible = false, + .named = true, + }, + [sym_boolean_literal] = { + .visible = true, + .named = true, + }, + [aux_sym_source_file_repeat1] = { + .visible = false, + .named = false, + }, }; -static TSSymbol ts_alias_sequences[1][MAX_ALIAS_SEQUENCE_LENGTH] = { +enum { + field_expr = 1, + field_name = 2, +}; + +static const char *ts_field_names[] = { + [0] = NULL, + [field_expr] = "expr", + [field_name] = "name", +}; + +static const TSFieldMapSlice ts_field_map_slices[2] = { + [1] = {.index = 0, .length = 2}, +}; + +static const TSFieldMapEntry ts_field_map_entries[] = { + [0] = + {field_expr, 2}, + {field_name, 0}, +}; + +static TSSymbol ts_alias_sequences[2][MAX_ALIAS_SEQUENCE_LENGTH] = { [0] = {0}, }; @@ -57,44 +148,147 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { switch (state) { case 0: if (eof) ADVANCE(10); - if (lookahead == 'c') ADVANCE(5); + if (lookahead == ';') ADVANCE(11); + if (lookahead == '=') ADVANCE(12); + if (lookahead == 'f') ADVANCE(17); + if (lookahead == 't') ADVANCE(21); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(0) + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); END_STATE(); case 1: - if (lookahead == 'a') ADVANCE(2); + if (lookahead == 'a') ADVANCE(5); END_STATE(); case 2: - if (lookahead == 'i') ADVANCE(4); + if (lookahead == 'e') ADVANCE(13); END_STATE(); case 3: - if (lookahead == 'n') ADVANCE(7); + if (lookahead == 'e') ADVANCE(15); END_STATE(); case 4: - if (lookahead == 'n') ADVANCE(9); + if (lookahead == 'f') ADVANCE(1); + if (lookahead == 't') ADVANCE(6); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(4) END_STATE(); case 5: - if (lookahead == 'o') ADVANCE(3); + if (lookahead == 'l') ADVANCE(7); END_STATE(); case 6: - if (lookahead == 'r') ADVANCE(1); + if (lookahead == 'r') ADVANCE(8); END_STATE(); case 7: - if (lookahead == 's') ADVANCE(8); + if (lookahead == 's') ADVANCE(3); END_STATE(); case 8: - if (lookahead == 't') ADVANCE(6); + if (lookahead == 'u') ADVANCE(2); END_STATE(); case 9: - if (lookahead == 't') ADVANCE(11); + if (eof) ADVANCE(10); + if (lookahead == ';') ADVANCE(11); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(9) + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); END_STATE(); case 10: ACCEPT_TOKEN(ts_builtin_sym_end); END_STATE(); case 11: - ACCEPT_TOKEN(anon_sym_constraint); + ACCEPT_TOKEN(anon_sym_SEMI); + END_STATE(); + case 12: + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 13: + ACCEPT_TOKEN(anon_sym_true); + END_STATE(); + case 14: + ACCEPT_TOKEN(anon_sym_true); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 15: + ACCEPT_TOKEN(anon_sym_false); + END_STATE(); + case 16: + ACCEPT_TOKEN(anon_sym_false); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 17: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'a') ADVANCE(20); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('b' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 18: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'e') ADVANCE(14); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 19: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'e') ADVANCE(16); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 20: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'l') ADVANCE(22); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 21: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'r') ADVANCE(23); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 22: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 's') ADVANCE(19); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 23: + ACCEPT_TOKEN(sym_identifier); + if (lookahead == 'u') ADVANCE(18); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); + END_STATE(); + case 24: + ACCEPT_TOKEN(sym_identifier); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(24); END_STATE(); default: return false; @@ -103,42 +297,150 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0}, - [1] = {.lex_state = 0}, - [2] = {.lex_state = 0}, - [3] = {.lex_state = 0}, + [1] = {.lex_state = 9}, + [2] = {.lex_state = 4}, + [3] = {.lex_state = 9}, + [4] = {.lex_state = 9}, + [5] = {.lex_state = 0}, + [6] = {.lex_state = 0}, + [7] = {.lex_state = 0}, + [8] = {.lex_state = 9}, + [9] = {.lex_state = 0}, + [10] = {.lex_state = 0}, + [11] = {.lex_state = 0}, + [12] = {.lex_state = 0}, + [13] = {.lex_state = 0}, + [14] = {.lex_state = 0}, }; static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [0] = { [ts_builtin_sym_end] = ACTIONS(1), - [anon_sym_constraint] = ACTIONS(1), + [anon_sym_SEMI] = ACTIONS(1), + [anon_sym_EQ] = ACTIONS(1), + [anon_sym_true] = ACTIONS(1), + [anon_sym_false] = ACTIONS(1), + [sym_identifier] = ACTIONS(1), }, [1] = { - [sym_source_file] = STATE(3), - [anon_sym_constraint] = ACTIONS(3), + [sym_source_file] = STATE(14), + [sym__items] = STATE(5), + [sym_assignment_item] = STATE(5), + [ts_builtin_sym_end] = ACTIONS(3), + [anon_sym_SEMI] = ACTIONS(5), + [sym_identifier] = ACTIONS(7), }, }; static uint16_t ts_small_parse_table[] = { - [0] = 1, - ACTIONS(5), 1, - ts_builtin_sym_end, - [4] = 1, + [0] = 2, + ACTIONS(9), 2, + anon_sym_true, + anon_sym_false, + STATE(10), 3, + sym__expression, + sym__literal, + sym_boolean_literal, + [10] = 3, ACTIONS(7), 1, + sym_identifier, + ACTIONS(11), 1, + ts_builtin_sym_end, + STATE(11), 2, + sym__items, + sym_assignment_item, + [21] = 3, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(13), 1, + ts_builtin_sym_end, + STATE(11), 2, + sym__items, + sym_assignment_item, + [32] = 3, + ACTIONS(15), 1, + ts_builtin_sym_end, + ACTIONS(17), 1, + anon_sym_SEMI, + STATE(6), 1, + aux_sym_source_file_repeat1, + [42] = 3, + ACTIONS(11), 1, + ts_builtin_sym_end, + ACTIONS(19), 1, + anon_sym_SEMI, + STATE(7), 1, + aux_sym_source_file_repeat1, + [52] = 3, + ACTIONS(21), 1, + ts_builtin_sym_end, + ACTIONS(23), 1, + anon_sym_SEMI, + STATE(7), 1, + aux_sym_source_file_repeat1, + [62] = 2, + ACTIONS(7), 1, + sym_identifier, + STATE(11), 2, + sym__items, + sym_assignment_item, + [70] = 1, + ACTIONS(26), 2, + ts_builtin_sym_end, + anon_sym_SEMI, + [75] = 1, + ACTIONS(28), 2, + ts_builtin_sym_end, + anon_sym_SEMI, + [80] = 1, + ACTIONS(21), 2, + ts_builtin_sym_end, + anon_sym_SEMI, + [85] = 1, + ACTIONS(15), 1, + ts_builtin_sym_end, + [89] = 1, + ACTIONS(30), 1, + anon_sym_EQ, + [93] = 1, + ACTIONS(32), 1, ts_builtin_sym_end, }; static uint32_t ts_small_parse_table_map[] = { [SMALL_STATE(2)] = 0, - [SMALL_STATE(3)] = 4, + [SMALL_STATE(3)] = 10, + [SMALL_STATE(4)] = 21, + [SMALL_STATE(5)] = 32, + [SMALL_STATE(6)] = 42, + [SMALL_STATE(7)] = 52, + [SMALL_STATE(8)] = 62, + [SMALL_STATE(9)] = 70, + [SMALL_STATE(10)] = 75, + [SMALL_STATE(11)] = 80, + [SMALL_STATE(12)] = 85, + [SMALL_STATE(13)] = 89, + [SMALL_STATE(14)] = 93, }; static TSParseActionEntry ts_parse_actions[] = { [0] = {.entry = {.count = 0, .reusable = false}}, [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), - [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), - [7] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), + [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [11] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), + [13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), + [15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), + [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), + [23] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8), + [26] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), + [28] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), + [30] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [32] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), }; #ifdef __cplusplus @@ -165,6 +467,9 @@ extern const TSLanguage *tree_sitter_minizinc(void) { .public_symbol_map = ts_symbol_map, .alias_sequences = (const TSSymbol *)ts_alias_sequences, .field_count = FIELD_COUNT, + .field_names = ts_field_names, + .field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices, + .field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries, .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, .lex_fn = ts_lex, .external_token_count = EXTERNAL_TOKEN_COUNT,