diff --git a/corpus/expressions.txt b/corpus/expressions.txt deleted file mode 100644 index 3004b65..0000000 --- a/corpus/expressions.txt +++ /dev/null @@ -1,12 +0,0 @@ -================ -Boolean Literals -================ - -boolean_id = false; -boolean_id = true; - ---- - -(source_file - (assignment_item (identifier) (boolean_literal)) - (assignment_item (identifier) (boolean_literal))) diff --git a/corpus/literals.txt b/corpus/literals.txt new file mode 100644 index 0000000..1434690 --- /dev/null +++ b/corpus/literals.txt @@ -0,0 +1,66 @@ +============== +Absent Literal +============== + +is_absent = <>; + +--- + +(source_file + (assignment_item (identifier) (absent))) + +================ +Boolean Literals +================ + +boolean_id = false; +boolean_id = true; + +--- + +(source_file + (assignment_item (identifier) (boolean_literal)) + (assignment_item (identifier) (boolean_literal))) + +=============== +Number Literals +=============== + +simple_float = 3.65; +exponent_float = 4.5e10; +simple_exponent_float = 5E-10; + +decimal_integer = 123; +hexadecimal_integer = 0xA23F; +octal_integer = 0o701; +binary_integer = 0b00110101; + + +--- + +(source_file + (assignment_item (identifier) (float_literal)) + (assignment_item (identifier) (float_literal)) + (assignment_item (identifier) (float_literal)) + (assignment_item (identifier) (integer_literal)) + (assignment_item (identifier) (integer_literal)) + (assignment_item (identifier) (integer_literal)) + (assignment_item (identifier) (integer_literal))) + +=============== +String Literals +=============== + +empty_string = ""; +small_string = "test"; +spaced_string = " Another test "; +escaped_string = "\t\n"; + +--- + +(source_file + (assignment_item (identifier) (string_literal)) + (assignment_item (identifier) (string_literal)) + (assignment_item (identifier) (string_literal)) + (assignment_item (identifier) (string_literal + (escape_sequence) (escape_sequence)))) diff --git a/grammar.js b/grammar.js index ff3d6ed..ecd84fb 100644 --- a/grammar.js +++ b/grammar.js @@ -3,6 +3,8 @@ module.exports = grammar({ extras: $ => [/\s/, $.line_comment, $.block_comment], + word: $ => $.identifier, + rules: { source_file: $ => seq(sepBy(';', $._items), optional(';')), @@ -25,15 +27,45 @@ module.exports = grammar({ _literal: $ => choice( - // TODO: absent, + $.absent, $.boolean_literal, - // TODO: float_literal, - // TODO: integer_literal, - // TODO: string_literal, + $.float_literal, + $.integer_literal, + $.string_literal, ), + absent: $ => '<>', boolean_literal: $ => choice('true', 'false'), + float_literal: $ => token(choice( + /\d+\.\d+/, + /\d+(\.\d+)?[Ee][+-]?\d+/, + // TODO: Hexadecimal floating point numbers + )), + integer_literal: $ => token(choice( + /[0-9]+/, + /0x[0-9a-fA-F]+/, + /0b[01]+/, + /0o[0-7]+/ + )), + string_literal: $ => seq( + '"', + repeat(choice( + token.immediate(prec(1, /[^"\n\\]+/)), + $.escape_sequence + )), + '"' + ), + escape_sequence: $ => token.immediate(seq( + '\\', + choice( + /[^xuU]/, + /\d{2,3}/, + /x[0-9a-fA-F]{2,}/, + /u[0-9a-fA-F]{4}/, + /U[0-9a-fA-F]{8}/ + ) + )), identifier: $ => /[A-Za-z][A-Za-z0-9_]*/, diff --git a/src/grammar.json b/src/grammar.json index 4f78bd8..f57b459 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -1,5 +1,6 @@ { "name": "minizinc", + "word": "identifier", "rules": { "source_file": { "type": "SEQ", @@ -101,12 +102,32 @@ "_literal": { "type": "CHOICE", "members": [ + { + "type": "SYMBOL", + "name": "absent" + }, { "type": "SYMBOL", "name": "boolean_literal" + }, + { + "type": "SYMBOL", + "name": "float_literal" + }, + { + "type": "SYMBOL", + "name": "integer_literal" + }, + { + "type": "SYMBOL", + "name": "string_literal" } ] }, + "absent": { + "type": "STRING", + "value": "<>" + }, "boolean_literal": { "type": "CHOICE", "members": [ @@ -120,6 +141,119 @@ } ] }, + "float_literal": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "PATTERN", + "value": "\\d+\\.\\d+" + }, + { + "type": "PATTERN", + "value": "\\d+(\\.\\d+)?[Ee][+-]?\\d+" + } + ] + } + }, + "integer_literal": { + "type": "TOKEN", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "PATTERN", + "value": "[0-9]+" + }, + { + "type": "PATTERN", + "value": "0x[0-9a-fA-F]+" + }, + { + "type": "PATTERN", + "value": "0b[01]+" + }, + { + "type": "PATTERN", + "value": "0o[0-7]+" + } + ] + } + }, + "string_literal": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "IMMEDIATE_TOKEN", + "content": { + "type": "PREC", + "value": 1, + "content": { + "type": "PATTERN", + "value": "[^\"\\n\\\\]+" + } + } + }, + { + "type": "SYMBOL", + "name": "escape_sequence" + } + ] + } + }, + { + "type": "STRING", + "value": "\"" + } + ] + }, + "escape_sequence": { + "type": "IMMEDIATE_TOKEN", + "content": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\\" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "PATTERN", + "value": "[^xuU]" + }, + { + "type": "PATTERN", + "value": "\\d{2,3}" + }, + { + "type": "PATTERN", + "value": "x[0-9a-fA-F]{2,}" + }, + { + "type": "PATTERN", + "value": "u[0-9a-fA-F]{4}" + }, + { + "type": "PATTERN", + "value": "U[0-9a-fA-F]{8}" + } + ] + } + ] + } + }, "identifier": { "type": "PATTERN", "value": "[A-Za-z][A-Za-z0-9_]*" diff --git a/src/node-types.json b/src/node-types.json index 70e2eec..a9b377f 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -7,13 +7,29 @@ "multiple": false, "required": true, "types": [ + { + "type": "absent", + "named": true + }, { "type": "boolean_literal", "named": true }, + { + "type": "float_literal", + "named": true + }, { "type": "identifier", "named": true + }, + { + "type": "integer_literal", + "named": true + }, + { + "type": "string_literal", + "named": true } ] }, @@ -49,6 +65,25 @@ ] } }, + { + "type": "string_literal", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [ + { + "type": "escape_sequence", + "named": true + } + ] + } + }, + { + "type": "\"", + "named": false + }, { "type": ";", "named": false @@ -57,14 +92,30 @@ "type": "=", "named": false }, + { + "type": "absent", + "named": true + }, + { + "type": "escape_sequence", + "named": true + }, { "type": "false", "named": false }, + { + "type": "float_literal", + "named": true + }, { "type": "identifier", "named": true }, + { + "type": "integer_literal", + "named": true + }, { "type": "true", "named": false diff --git a/src/parser.c b/src/parser.c index 1106ceb..06a6d45 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,39 +6,53 @@ #endif #define LANGUAGE_VERSION 11 -#define STATE_COUNT 15 +#define STATE_COUNT 20 #define LARGE_STATE_COUNT 3 -#define SYMBOL_COUNT 15 +#define SYMBOL_COUNT 23 #define ALIAS_COUNT 0 -#define TOKEN_COUNT 8 +#define TOKEN_COUNT 14 #define EXTERNAL_TOKEN_COUNT 0 #define FIELD_COUNT 2 #define MAX_ALIAS_SEQUENCE_LENGTH 3 enum { - anon_sym_SEMI = 1, - anon_sym_EQ = 2, - anon_sym_true = 3, - anon_sym_false = 4, - sym_identifier = 5, - sym_line_comment = 6, - sym_block_comment = 7, - sym_source_file = 8, - sym__items = 9, - sym_assignment_item = 10, - sym__expression = 11, - sym__literal = 12, - sym_boolean_literal = 13, - aux_sym_source_file_repeat1 = 14, + sym_identifier = 1, + anon_sym_SEMI = 2, + anon_sym_EQ = 3, + sym_absent = 4, + anon_sym_true = 5, + anon_sym_false = 6, + sym_float_literal = 7, + sym_integer_literal = 8, + anon_sym_DQUOTE = 9, + aux_sym_string_literal_token1 = 10, + sym_escape_sequence = 11, + sym_line_comment = 12, + sym_block_comment = 13, + sym_source_file = 14, + sym__items = 15, + sym_assignment_item = 16, + sym__expression = 17, + sym__literal = 18, + sym_boolean_literal = 19, + sym_string_literal = 20, + aux_sym_source_file_repeat1 = 21, + aux_sym_string_literal_repeat1 = 22, }; static const char *ts_symbol_names[] = { [ts_builtin_sym_end] = "end", + [sym_identifier] = "identifier", [anon_sym_SEMI] = ";", [anon_sym_EQ] = "=", + [sym_absent] = "absent", [anon_sym_true] = "true", [anon_sym_false] = "false", - [sym_identifier] = "identifier", + [sym_float_literal] = "float_literal", + [sym_integer_literal] = "integer_literal", + [anon_sym_DQUOTE] = "\"", + [aux_sym_string_literal_token1] = "string_literal_token1", + [sym_escape_sequence] = "escape_sequence", [sym_line_comment] = "line_comment", [sym_block_comment] = "block_comment", [sym_source_file] = "source_file", @@ -47,16 +61,24 @@ static const char *ts_symbol_names[] = { [sym__expression] = "_expression", [sym__literal] = "_literal", [sym_boolean_literal] = "boolean_literal", + [sym_string_literal] = "string_literal", [aux_sym_source_file_repeat1] = "source_file_repeat1", + [aux_sym_string_literal_repeat1] = "string_literal_repeat1", }; static TSSymbol ts_symbol_map[] = { [ts_builtin_sym_end] = ts_builtin_sym_end, + [sym_identifier] = sym_identifier, [anon_sym_SEMI] = anon_sym_SEMI, [anon_sym_EQ] = anon_sym_EQ, + [sym_absent] = sym_absent, [anon_sym_true] = anon_sym_true, [anon_sym_false] = anon_sym_false, - [sym_identifier] = sym_identifier, + [sym_float_literal] = sym_float_literal, + [sym_integer_literal] = sym_integer_literal, + [anon_sym_DQUOTE] = anon_sym_DQUOTE, + [aux_sym_string_literal_token1] = aux_sym_string_literal_token1, + [sym_escape_sequence] = sym_escape_sequence, [sym_line_comment] = sym_line_comment, [sym_block_comment] = sym_block_comment, [sym_source_file] = sym_source_file, @@ -65,7 +87,9 @@ static TSSymbol ts_symbol_map[] = { [sym__expression] = sym__expression, [sym__literal] = sym__literal, [sym_boolean_literal] = sym_boolean_literal, + [sym_string_literal] = sym_string_literal, [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1, + [aux_sym_string_literal_repeat1] = aux_sym_string_literal_repeat1, }; static const TSSymbolMetadata ts_symbol_metadata[] = { @@ -73,6 +97,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = false, .named = true, }, + [sym_identifier] = { + .visible = true, + .named = true, + }, [anon_sym_SEMI] = { .visible = true, .named = false, @@ -81,6 +109,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = false, }, + [sym_absent] = { + .visible = true, + .named = true, + }, [anon_sym_true] = { .visible = true, .named = false, @@ -89,7 +121,23 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = false, }, - [sym_identifier] = { + [sym_float_literal] = { + .visible = true, + .named = true, + }, + [sym_integer_literal] = { + .visible = true, + .named = true, + }, + [anon_sym_DQUOTE] = { + .visible = true, + .named = false, + }, + [aux_sym_string_literal_token1] = { + .visible = false, + .named = false, + }, + [sym_escape_sequence] = { .visible = true, .named = true, }, @@ -125,10 +173,18 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { .visible = true, .named = true, }, + [sym_string_literal] = { + .visible = true, + .named = true, + }, [aux_sym_source_file_repeat1] = { .visible = false, .named = false, }, + [aux_sym_string_literal_repeat1] = { + .visible = false, + .named = false, + }, }; enum { @@ -161,137 +217,315 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { eof = lexer->eof(lexer); switch (state) { case 0: - if (eof) ADVANCE(5); - if (lookahead == '%') ADVANCE(18); - if (lookahead == '/') ADVANCE(1); - if (lookahead == ';') ADVANCE(6); - if (lookahead == '=') ADVANCE(7); - if (lookahead == 'f') ADVANCE(10); - if (lookahead == 't') ADVANCE(14); + if (eof) ADVANCE(23); + if (lookahead == '"') ADVANCE(34); + if (lookahead == '%') ADVANCE(44); + if (lookahead == '/') ADVANCE(3); + if (lookahead == '0') ADVANCE(29); + if (lookahead == ';') ADVANCE(24); + if (lookahead == '<') ADVANCE(6); + if (lookahead == '=') ADVANCE(25); + if (lookahead == '\\') ADVANCE(7); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(22) + if (('1' <= lookahead && lookahead <= '9')) ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(43); + END_STATE(); + case 1: + if (lookahead == '\n') SKIP(2) + if (lookahead == '"') ADVANCE(34); + if (lookahead == '%') ADVANCE(39); + if (lookahead == '/') ADVANCE(38); + if (lookahead == '\\') ADVANCE(7); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(35); + if (lookahead != 0) ADVANCE(39); + END_STATE(); + case 2: + if (lookahead == '"') ADVANCE(34); + if (lookahead == '%') ADVANCE(44); + if (lookahead == '/') ADVANCE(3); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(2) + END_STATE(); + case 3: + if (lookahead == '*') ADVANCE(5); + END_STATE(); + case 4: + if (lookahead == '*') ADVANCE(4); + if (lookahead == '/') ADVANCE(45); + if (lookahead != 0) ADVANCE(5); + END_STATE(); + case 5: + if (lookahead == '*') ADVANCE(4); + if (lookahead != 0) ADVANCE(5); + END_STATE(); + case 6: + if (lookahead == '>') ADVANCE(26); + END_STATE(); + case 7: + if (lookahead == 'U') ADVANCE(21); + if (lookahead == 'u') ADVANCE(17); + if (lookahead == 'x') ADVANCE(15); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(42); + if (lookahead != 0) ADVANCE(40); + END_STATE(); + case 8: + if (lookahead == '+' || + lookahead == '-') ADVANCE(12); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(28); + END_STATE(); + case 9: + if (lookahead == '0' || + lookahead == '1') ADVANCE(31); + END_STATE(); + case 10: + if (('0' <= lookahead && lookahead <= '7')) ADVANCE(32); + END_STATE(); + case 11: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); + END_STATE(); + case 12: + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(28); + END_STATE(); + case 13: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(40); + END_STATE(); + case 14: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(33); + END_STATE(); + case 15: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(13); + END_STATE(); + case 16: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(15); + END_STATE(); + case 17: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(16); + END_STATE(); + case 18: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(17); + END_STATE(); + case 19: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(18); + END_STATE(); + case 20: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(19); + END_STATE(); + case 21: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(20); + END_STATE(); + case 22: + if (eof) ADVANCE(23); + if (lookahead == '"') ADVANCE(34); + if (lookahead == '%') ADVANCE(44); + if (lookahead == '/') ADVANCE(3); + if (lookahead == '0') ADVANCE(29); + if (lookahead == ';') ADVANCE(24); + if (lookahead == '<') ADVANCE(6); + if (lookahead == '=') ADVANCE(25); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(22) + if (('1' <= lookahead && lookahead <= '9')) ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(43); + END_STATE(); + case 23: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 24: + ACCEPT_TOKEN(anon_sym_SEMI); + END_STATE(); + case 25: + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 26: + ACCEPT_TOKEN(sym_absent); + END_STATE(); + case 27: + ACCEPT_TOKEN(sym_float_literal); + if (lookahead == 'E' || + lookahead == 'e') ADVANCE(8); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); + END_STATE(); + case 28: + ACCEPT_TOKEN(sym_float_literal); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(28); + END_STATE(); + case 29: + ACCEPT_TOKEN(sym_integer_literal); + if (lookahead == '.') ADVANCE(11); + if (lookahead == 'b') ADVANCE(9); + if (lookahead == 'o') ADVANCE(10); + if (lookahead == 'x') ADVANCE(14); + if (lookahead == 'E' || + lookahead == 'e') ADVANCE(8); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(30); + END_STATE(); + case 30: + ACCEPT_TOKEN(sym_integer_literal); + if (lookahead == '.') ADVANCE(11); + if (lookahead == 'E' || + lookahead == 'e') ADVANCE(8); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(30); + END_STATE(); + case 31: + ACCEPT_TOKEN(sym_integer_literal); + if (lookahead == '0' || + lookahead == '1') ADVANCE(31); + END_STATE(); + case 32: + ACCEPT_TOKEN(sym_integer_literal); + if (('0' <= lookahead && lookahead <= '7')) ADVANCE(32); + END_STATE(); + case 33: + ACCEPT_TOKEN(sym_integer_literal); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(33); + END_STATE(); + case 34: + ACCEPT_TOKEN(anon_sym_DQUOTE); + END_STATE(); + case 35: + ACCEPT_TOKEN(aux_sym_string_literal_token1); + if (lookahead == '%') ADVANCE(39); + if (lookahead == '/') ADVANCE(38); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(35); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\\') ADVANCE(39); + END_STATE(); + case 36: + ACCEPT_TOKEN(aux_sym_string_literal_token1); + if (lookahead == '*') ADVANCE(36); + if (lookahead == '/') ADVANCE(39); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\\') ADVANCE(37); + END_STATE(); + case 37: + ACCEPT_TOKEN(aux_sym_string_literal_token1); + if (lookahead == '*') ADVANCE(36); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\\') ADVANCE(37); + END_STATE(); + case 38: + ACCEPT_TOKEN(aux_sym_string_literal_token1); + if (lookahead == '*') ADVANCE(37); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\\') ADVANCE(39); + END_STATE(); + case 39: + ACCEPT_TOKEN(aux_sym_string_literal_token1); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != '"' && + lookahead != '\\') ADVANCE(39); + END_STATE(); + case 40: + ACCEPT_TOKEN(sym_escape_sequence); + END_STATE(); + case 41: + ACCEPT_TOKEN(sym_escape_sequence); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(40); + END_STATE(); + case 42: + ACCEPT_TOKEN(sym_escape_sequence); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(41); + END_STATE(); + case 43: + ACCEPT_TOKEN(sym_identifier); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(43); + END_STATE(); + case 44: + ACCEPT_TOKEN(sym_line_comment); + if (lookahead != 0 && + lookahead != '\n') ADVANCE(44); + END_STATE(); + case 45: + ACCEPT_TOKEN(sym_block_comment); + END_STATE(); + default: + return false; + } +} + +static bool ts_lex_keywords(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (lookahead == 'f') ADVANCE(1); + if (lookahead == 't') ADVANCE(2); if (lookahead == '\t' || lookahead == '\n' || lookahead == '\r' || lookahead == ' ') SKIP(0) - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); END_STATE(); case 1: - if (lookahead == '*') ADVANCE(3); + if (lookahead == 'a') ADVANCE(3); END_STATE(); case 2: - if (lookahead == '*') ADVANCE(2); - if (lookahead == '/') ADVANCE(19); - if (lookahead != 0) ADVANCE(3); + if (lookahead == 'r') ADVANCE(4); END_STATE(); case 3: - if (lookahead == '*') ADVANCE(2); - if (lookahead != 0) ADVANCE(3); + if (lookahead == 'l') ADVANCE(5); END_STATE(); case 4: - if (eof) ADVANCE(5); - if (lookahead == '%') ADVANCE(18); - if (lookahead == '/') ADVANCE(1); - if (lookahead == ';') ADVANCE(6); - if (lookahead == '\t' || - lookahead == '\n' || - lookahead == '\r' || - lookahead == ' ') SKIP(4) - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); + if (lookahead == 'u') ADVANCE(6); END_STATE(); case 5: - ACCEPT_TOKEN(ts_builtin_sym_end); + if (lookahead == 's') ADVANCE(7); END_STATE(); case 6: - ACCEPT_TOKEN(anon_sym_SEMI); + if (lookahead == 'e') ADVANCE(8); END_STATE(); case 7: - ACCEPT_TOKEN(anon_sym_EQ); + if (lookahead == 'e') ADVANCE(9); END_STATE(); case 8: ACCEPT_TOKEN(anon_sym_true); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); END_STATE(); case 9: ACCEPT_TOKEN(anon_sym_false); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 10: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'a') ADVANCE(13); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('b' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 11: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'e') ADVANCE(8); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 12: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'e') ADVANCE(9); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 13: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'l') ADVANCE(15); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 14: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'r') ADVANCE(16); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 15: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 's') ADVANCE(12); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 16: - ACCEPT_TOKEN(sym_identifier); - if (lookahead == 'u') ADVANCE(11); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 17: - ACCEPT_TOKEN(sym_identifier); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'Z') || - lookahead == '_' || - ('a' <= lookahead && lookahead <= 'z')) ADVANCE(17); - END_STATE(); - case 18: - ACCEPT_TOKEN(sym_line_comment); - if (lookahead != 0 && - lookahead != '\n') ADVANCE(18); - END_STATE(); - case 19: - ACCEPT_TOKEN(sym_block_comment); END_STATE(); default: return false; @@ -300,50 +534,65 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0}, - [1] = {.lex_state = 4}, + [1] = {.lex_state = 0}, [2] = {.lex_state = 0}, - [3] = {.lex_state = 4}, - [4] = {.lex_state = 4}, + [3] = {.lex_state = 0}, + [4] = {.lex_state = 1}, [5] = {.lex_state = 0}, - [6] = {.lex_state = 0}, - [7] = {.lex_state = 0}, - [8] = {.lex_state = 4}, + [6] = {.lex_state = 1}, + [7] = {.lex_state = 1}, + [8] = {.lex_state = 0}, [9] = {.lex_state = 0}, [10] = {.lex_state = 0}, [11] = {.lex_state = 0}, [12] = {.lex_state = 0}, [13] = {.lex_state = 0}, [14] = {.lex_state = 0}, + [15] = {.lex_state = 0}, + [16] = {.lex_state = 0}, + [17] = {.lex_state = 0}, + [18] = {.lex_state = 0}, + [19] = {.lex_state = 0}, }; static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [0] = { [ts_builtin_sym_end] = ACTIONS(1), + [sym_identifier] = ACTIONS(1), [anon_sym_SEMI] = ACTIONS(1), [anon_sym_EQ] = ACTIONS(1), + [sym_absent] = ACTIONS(1), [anon_sym_true] = ACTIONS(1), [anon_sym_false] = ACTIONS(1), - [sym_identifier] = ACTIONS(1), + [sym_float_literal] = ACTIONS(1), + [sym_integer_literal] = ACTIONS(1), + [anon_sym_DQUOTE] = ACTIONS(1), + [sym_escape_sequence] = ACTIONS(1), [sym_line_comment] = ACTIONS(3), [sym_block_comment] = ACTIONS(3), }, [1] = { - [sym_source_file] = STATE(14), - [sym__items] = STATE(5), - [sym_assignment_item] = STATE(5), + [sym_source_file] = STATE(19), + [sym__items] = STATE(8), + [sym_assignment_item] = STATE(8), [ts_builtin_sym_end] = ACTIONS(5), - [anon_sym_SEMI] = ACTIONS(7), - [sym_identifier] = ACTIONS(9), + [sym_identifier] = ACTIONS(7), + [anon_sym_SEMI] = ACTIONS(9), [sym_line_comment] = ACTIONS(3), [sym_block_comment] = ACTIONS(3), }, [2] = { - [sym__expression] = STATE(10), - [sym__literal] = STATE(10), - [sym_boolean_literal] = STATE(10), - [anon_sym_true] = ACTIONS(11), - [anon_sym_false] = ACTIONS(11), - [sym_identifier] = ACTIONS(13), + [sym__expression] = STATE(13), + [sym__literal] = STATE(13), + [sym_boolean_literal] = STATE(13), + [sym_string_literal] = STATE(13), + [sym_identifier] = ACTIONS(11), + [sym_absent] = ACTIONS(13), + [anon_sym_true] = ACTIONS(15), + [anon_sym_false] = ACTIONS(15), + [sym_float_literal] = ACTIONS(13), + [sym_integer_literal] = ACTIONS(11), + [anon_sym_DQUOTE] = ACTIONS(17), [sym_line_comment] = ACTIONS(3), [sym_block_comment] = ACTIONS(3), }, @@ -351,101 +600,148 @@ static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { static uint16_t ts_small_parse_table[] = { [0] = 4, - ACTIONS(9), 1, + ACTIONS(7), 1, sym_identifier, - ACTIONS(15), 1, + ACTIONS(19), 1, ts_builtin_sym_end, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - STATE(11), 2, + STATE(14), 2, sym__items, sym_assignment_item, [15] = 4, - ACTIONS(9), 1, - sym_identifier, - ACTIONS(17), 1, - ts_builtin_sym_end, - ACTIONS(3), 2, - sym_line_comment, - sym_block_comment, - STATE(11), 2, - sym__items, - sym_assignment_item, - [30] = 4, - ACTIONS(19), 1, - ts_builtin_sym_end, ACTIONS(21), 1, - anon_sym_SEMI, + anon_sym_DQUOTE, STATE(6), 1, - aux_sym_source_file_repeat1, - ACTIONS(3), 2, + aux_sym_string_literal_repeat1, + ACTIONS(23), 2, + aux_sym_string_literal_token1, + sym_escape_sequence, + ACTIONS(25), 2, sym_line_comment, sym_block_comment, - [44] = 4, - ACTIONS(15), 1, - ts_builtin_sym_end, - ACTIONS(23), 1, - anon_sym_SEMI, - STATE(7), 1, - aux_sym_source_file_repeat1, - ACTIONS(3), 2, - sym_line_comment, - sym_block_comment, - [58] = 4, - ACTIONS(25), 1, - ts_builtin_sym_end, + [30] = 4, + ACTIONS(7), 1, + sym_identifier, ACTIONS(27), 1, - anon_sym_SEMI, + ts_builtin_sym_end, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + STATE(14), 2, + sym__items, + sym_assignment_item, + [45] = 4, + ACTIONS(29), 1, + anon_sym_DQUOTE, STATE(7), 1, + aux_sym_string_literal_repeat1, + ACTIONS(25), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(31), 2, + aux_sym_string_literal_token1, + sym_escape_sequence, + [60] = 4, + ACTIONS(33), 1, + anon_sym_DQUOTE, + STATE(7), 1, + aux_sym_string_literal_repeat1, + ACTIONS(25), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(35), 2, + aux_sym_string_literal_token1, + sym_escape_sequence, + [75] = 4, + ACTIONS(38), 1, + ts_builtin_sym_end, + ACTIONS(40), 1, + anon_sym_SEMI, + STATE(9), 1, aux_sym_source_file_repeat1, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [72] = 3, - ACTIONS(9), 1, + [89] = 4, + ACTIONS(19), 1, + ts_builtin_sym_end, + ACTIONS(42), 1, + anon_sym_SEMI, + STATE(10), 1, + aux_sym_source_file_repeat1, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [103] = 4, + ACTIONS(44), 1, + ts_builtin_sym_end, + ACTIONS(46), 1, + anon_sym_SEMI, + STATE(10), 1, + aux_sym_source_file_repeat1, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + [117] = 3, + ACTIONS(7), 1, sym_identifier, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - STATE(11), 2, + STATE(14), 2, sym__items, sym_assignment_item, - [84] = 2, + [129] = 2, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - ACTIONS(30), 2, + ACTIONS(49), 2, ts_builtin_sym_end, anon_sym_SEMI, - [93] = 2, + [138] = 2, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - ACTIONS(32), 2, + ACTIONS(51), 2, ts_builtin_sym_end, anon_sym_SEMI, - [102] = 2, + [147] = 2, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - ACTIONS(25), 2, + ACTIONS(44), 2, ts_builtin_sym_end, anon_sym_SEMI, - [111] = 2, - ACTIONS(19), 1, + [156] = 2, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(53), 2, + ts_builtin_sym_end, + anon_sym_SEMI, + [165] = 2, + ACTIONS(3), 2, + sym_line_comment, + sym_block_comment, + ACTIONS(55), 2, + ts_builtin_sym_end, + anon_sym_SEMI, + [174] = 2, + ACTIONS(38), 1, ts_builtin_sym_end, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [119] = 2, - ACTIONS(34), 1, + [182] = 2, + ACTIONS(57), 1, anon_sym_EQ, ACTIONS(3), 2, sym_line_comment, sym_block_comment, - [127] = 2, - ACTIONS(36), 1, + [190] = 2, + ACTIONS(59), 1, ts_builtin_sym_end, ACTIONS(3), 2, sym_line_comment, @@ -456,15 +752,20 @@ static uint32_t ts_small_parse_table_map[] = { [SMALL_STATE(3)] = 0, [SMALL_STATE(4)] = 15, [SMALL_STATE(5)] = 30, - [SMALL_STATE(6)] = 44, - [SMALL_STATE(7)] = 58, - [SMALL_STATE(8)] = 72, - [SMALL_STATE(9)] = 84, - [SMALL_STATE(10)] = 93, - [SMALL_STATE(11)] = 102, - [SMALL_STATE(12)] = 111, - [SMALL_STATE(13)] = 119, - [SMALL_STATE(14)] = 127, + [SMALL_STATE(6)] = 45, + [SMALL_STATE(7)] = 60, + [SMALL_STATE(8)] = 75, + [SMALL_STATE(9)] = 89, + [SMALL_STATE(10)] = 103, + [SMALL_STATE(11)] = 117, + [SMALL_STATE(12)] = 129, + [SMALL_STATE(13)] = 138, + [SMALL_STATE(14)] = 147, + [SMALL_STATE(15)] = 156, + [SMALL_STATE(16)] = 165, + [SMALL_STATE(17)] = 174, + [SMALL_STATE(18)] = 182, + [SMALL_STATE(19)] = 190, }; static TSParseActionEntry ts_parse_actions[] = { @@ -472,21 +773,32 @@ static TSParseActionEntry ts_parse_actions[] = { [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(), [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), - [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), - [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), - [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(9), - [13] = {.entry = {.count = 1, .reusable = false}}, SHIFT(10), - [15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), - [17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), - [19] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), - [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), - [23] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), - [25] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), - [27] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8), - [30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), - [32] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), - [34] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [36] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(18), + [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), + [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(13), + [13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(12), + [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [19] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2), + [21] = {.entry = {.count = 1, .reusable = false}}, SHIFT(15), + [23] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), + [25] = {.entry = {.count = 1, .reusable = false}}, SHIFT_EXTRA(), + [27] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3), + [29] = {.entry = {.count = 1, .reusable = false}}, SHIFT(16), + [31] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), + [33] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_string_literal_repeat1, 2), + [35] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_string_literal_repeat1, 2), SHIFT_REPEAT(7), + [38] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), + [40] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [42] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5), + [44] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), + [46] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(11), + [49] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1), + [51] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1), + [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string_literal, 2), + [55] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string_literal, 3), + [57] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [59] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), }; #ifdef __cplusplus @@ -518,6 +830,8 @@ extern const TSLanguage *tree_sitter_minizinc(void) { .field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries, .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, .lex_fn = ts_lex, + .keyword_lex_fn = ts_lex_keywords, + .keyword_capture_token = sym_identifier, .external_token_count = EXTERNAL_TOKEN_COUNT, }; return &language;