1
0

Add support for comments

This commit is contained in:
Jip J. Dekker 2020-09-19 14:21:27 +10:00
parent da4a36d60e
commit f7f9242c0a
4 changed files with 310 additions and 151 deletions

26
corpus/source_file.txt Normal file
View File

@ -0,0 +1,26 @@
==============
Block comments
==============
/*
* Block comments
*/
/* Comment with asterisks **/
----
(source_file
(block_comment)
(block_comment))
=============
Line comments
=============
% Comment
----
(source_file
(line_comment))

View File

@ -1,6 +1,8 @@
module.exports = grammar({
name: 'minizinc',
extras: $ => [/\s/, $.line_comment, $.block_comment],
rules: {
source_file: $ => seq(sepBy(';', $._items), optional(';')),
@ -34,6 +36,9 @@ module.exports = grammar({
identifier: $ => /[A-Za-z][A-Za-z0-9_]*/,
line_comment: $ => token(seq('%', /.*/)),
block_comment: $ => token(seq('/*', /[^*]*\*+([^/*][^*]*\*+)*/, '/')),
}
});

View File

@ -119,12 +119,56 @@
"identifier": {
"type": "PATTERN",
"value": "[A-Za-z][A-Za-z0-9_]*"
},
"line_comment": {
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "%"
},
{
"type": "PATTERN",
"value": ".*"
}
]
}
},
"block_comment": {
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "/*"
},
{
"type": "PATTERN",
"value": "[^*]*\\*+([^/*][^*]*\\*+)*"
},
{
"type": "STRING",
"value": "/"
}
]
}
}
},
"extras": [
{
"type": "PATTERN",
"value": "\\s"
},
{
"type": "SYMBOL",
"name": "line_comment"
},
{
"type": "SYMBOL",
"name": "block_comment"
}
],
"conflicts": [],

View File

@ -8,9 +8,9 @@
#define LANGUAGE_VERSION 11
#define STATE_COUNT 15
#define LARGE_STATE_COUNT 2
#define SYMBOL_COUNT 13
#define SYMBOL_COUNT 15
#define ALIAS_COUNT 0
#define TOKEN_COUNT 6
#define TOKEN_COUNT 8
#define EXTERNAL_TOKEN_COUNT 0
#define FIELD_COUNT 2
#define MAX_ALIAS_SEQUENCE_LENGTH 3
@ -21,13 +21,15 @@ enum {
anon_sym_true = 3,
anon_sym_false = 4,
sym_identifier = 5,
sym_source_file = 6,
sym__items = 7,
sym_assignment_item = 8,
sym__expression = 9,
sym__literal = 10,
sym_boolean_literal = 11,
aux_sym_source_file_repeat1 = 12,
sym_line_comment = 6,
sym_block_comment = 7,
sym_source_file = 8,
sym__items = 9,
sym_assignment_item = 10,
sym__expression = 11,
sym__literal = 12,
sym_boolean_literal = 13,
aux_sym_source_file_repeat1 = 14,
};
static const char *ts_symbol_names[] = {
@ -37,6 +39,8 @@ static const char *ts_symbol_names[] = {
[anon_sym_true] = "true",
[anon_sym_false] = "false",
[sym_identifier] = "identifier",
[sym_line_comment] = "line_comment",
[sym_block_comment] = "block_comment",
[sym_source_file] = "source_file",
[sym__items] = "_items",
[sym_assignment_item] = "assignment_item",
@ -53,6 +57,8 @@ static TSSymbol ts_symbol_map[] = {
[anon_sym_true] = anon_sym_true,
[anon_sym_false] = anon_sym_false,
[sym_identifier] = sym_identifier,
[sym_line_comment] = sym_line_comment,
[sym_block_comment] = sym_block_comment,
[sym_source_file] = sym_source_file,
[sym__items] = sym__items,
[sym_assignment_item] = sym_assignment_item,
@ -87,6 +93,14 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true,
.named = true,
},
[sym_line_comment] = {
.visible = true,
.named = true,
},
[sym_block_comment] = {
.visible = true,
.named = true,
},
[sym_source_file] = {
.visible = true,
.named = true,
@ -147,148 +161,174 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
eof = lexer->eof(lexer);
switch (state) {
case 0:
if (eof) ADVANCE(10);
if (lookahead == ';') ADVANCE(11);
if (lookahead == '=') ADVANCE(12);
if (lookahead == 'f') ADVANCE(17);
if (lookahead == 't') ADVANCE(21);
if (eof) ADVANCE(13);
if (lookahead == '%') ADVANCE(28);
if (lookahead == '/') ADVANCE(2);
if (lookahead == ';') ADVANCE(14);
if (lookahead == '=') ADVANCE(15);
if (lookahead == 'f') ADVANCE(20);
if (lookahead == 't') ADVANCE(24);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(0)
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 1:
if (lookahead == 'a') ADVANCE(5);
if (lookahead == '%') ADVANCE(28);
if (lookahead == '/') ADVANCE(2);
if (lookahead == 'f') ADVANCE(5);
if (lookahead == 't') ADVANCE(9);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(1)
END_STATE();
case 2:
if (lookahead == 'e') ADVANCE(13);
if (lookahead == '*') ADVANCE(4);
END_STATE();
case 3:
if (lookahead == 'e') ADVANCE(15);
if (lookahead == '*') ADVANCE(3);
if (lookahead == '/') ADVANCE(29);
if (lookahead != 0) ADVANCE(4);
END_STATE();
case 4:
if (lookahead == 'f') ADVANCE(1);
if (lookahead == 't') ADVANCE(6);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(4)
if (lookahead == '*') ADVANCE(3);
if (lookahead != 0) ADVANCE(4);
END_STATE();
case 5:
if (lookahead == 'l') ADVANCE(7);
if (lookahead == 'a') ADVANCE(8);
END_STATE();
case 6:
if (lookahead == 'r') ADVANCE(8);
if (lookahead == 'e') ADVANCE(16);
END_STATE();
case 7:
if (lookahead == 's') ADVANCE(3);
if (lookahead == 'e') ADVANCE(18);
END_STATE();
case 8:
if (lookahead == 'u') ADVANCE(2);
if (lookahead == 'l') ADVANCE(10);
END_STATE();
case 9:
if (eof) ADVANCE(10);
if (lookahead == ';') ADVANCE(11);
if (lookahead == 'r') ADVANCE(11);
END_STATE();
case 10:
if (lookahead == 's') ADVANCE(7);
END_STATE();
case 11:
if (lookahead == 'u') ADVANCE(6);
END_STATE();
case 12:
if (eof) ADVANCE(13);
if (lookahead == '%') ADVANCE(28);
if (lookahead == '/') ADVANCE(2);
if (lookahead == ';') ADVANCE(14);
if (lookahead == '\t' ||
lookahead == '\n' ||
lookahead == '\r' ||
lookahead == ' ') SKIP(9)
lookahead == ' ') SKIP(12)
if (('A' <= lookahead && lookahead <= 'Z') ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
END_STATE();
case 10:
ACCEPT_TOKEN(ts_builtin_sym_end);
END_STATE();
case 11:
ACCEPT_TOKEN(anon_sym_SEMI);
END_STATE();
case 12:
ACCEPT_TOKEN(anon_sym_EQ);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 13:
ACCEPT_TOKEN(anon_sym_true);
ACCEPT_TOKEN(ts_builtin_sym_end);
END_STATE();
case 14:
ACCEPT_TOKEN(anon_sym_SEMI);
END_STATE();
case 15:
ACCEPT_TOKEN(anon_sym_EQ);
END_STATE();
case 16:
ACCEPT_TOKEN(anon_sym_true);
END_STATE();
case 17:
ACCEPT_TOKEN(anon_sym_true);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
END_STATE();
case 15:
ACCEPT_TOKEN(anon_sym_false);
END_STATE();
case 16:
ACCEPT_TOKEN(anon_sym_false);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
END_STATE();
case 17:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'a') ADVANCE(20);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('b' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 18:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'e') ADVANCE(14);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
ACCEPT_TOKEN(anon_sym_false);
END_STATE();
case 19:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'e') ADVANCE(16);
ACCEPT_TOKEN(anon_sym_false);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 20:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'l') ADVANCE(22);
if (lookahead == 'a') ADVANCE(23);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('b' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 21:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'r') ADVANCE(23);
if (lookahead == 'e') ADVANCE(17);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 22:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 's') ADVANCE(19);
if (lookahead == 'e') ADVANCE(19);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 23:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'u') ADVANCE(18);
if (lookahead == 'l') ADVANCE(25);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 24:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'r') ADVANCE(26);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 25:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 's') ADVANCE(22);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 26:
ACCEPT_TOKEN(sym_identifier);
if (lookahead == 'u') ADVANCE(21);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 27:
ACCEPT_TOKEN(sym_identifier);
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(24);
('a' <= lookahead && lookahead <= 'z')) ADVANCE(27);
END_STATE();
case 28:
ACCEPT_TOKEN(sym_line_comment);
if (lookahead != 0 &&
lookahead != '\n') ADVANCE(28);
END_STATE();
case 29:
ACCEPT_TOKEN(sym_block_comment);
END_STATE();
default:
return false;
@ -297,14 +337,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
static TSLexMode ts_lex_modes[STATE_COUNT] = {
[0] = {.lex_state = 0},
[1] = {.lex_state = 9},
[2] = {.lex_state = 4},
[3] = {.lex_state = 9},
[4] = {.lex_state = 9},
[1] = {.lex_state = 12},
[2] = {.lex_state = 1},
[3] = {.lex_state = 12},
[4] = {.lex_state = 12},
[5] = {.lex_state = 0},
[6] = {.lex_state = 0},
[7] = {.lex_state = 0},
[8] = {.lex_state = 9},
[8] = {.lex_state = 12},
[9] = {.lex_state = 0},
[10] = {.lex_state = 0},
[11] = {.lex_state = 0},
@ -321,126 +361,170 @@ static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[anon_sym_true] = ACTIONS(1),
[anon_sym_false] = ACTIONS(1),
[sym_identifier] = ACTIONS(1),
[sym_line_comment] = ACTIONS(3),
[sym_block_comment] = ACTIONS(3),
},
[1] = {
[sym_source_file] = STATE(14),
[sym__items] = STATE(5),
[sym_assignment_item] = STATE(5),
[ts_builtin_sym_end] = ACTIONS(3),
[anon_sym_SEMI] = ACTIONS(5),
[sym_identifier] = ACTIONS(7),
[ts_builtin_sym_end] = ACTIONS(5),
[anon_sym_SEMI] = ACTIONS(7),
[sym_identifier] = ACTIONS(9),
[sym_line_comment] = ACTIONS(3),
[sym_block_comment] = ACTIONS(3),
},
};
static uint16_t ts_small_parse_table[] = {
[0] = 2,
ACTIONS(9), 2,
[0] = 3,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
ACTIONS(11), 2,
anon_sym_true,
anon_sym_false,
STATE(10), 3,
sym__expression,
sym__literal,
sym_boolean_literal,
[10] = 3,
ACTIONS(7), 1,
sym_identifier,
ACTIONS(11), 1,
ts_builtin_sym_end,
STATE(11), 2,
sym__items,
sym_assignment_item,
[21] = 3,
ACTIONS(7), 1,
[14] = 4,
ACTIONS(9), 1,
sym_identifier,
ACTIONS(13), 1,
ts_builtin_sym_end,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
STATE(11), 2,
sym__items,
sym_assignment_item,
[32] = 3,
[29] = 4,
ACTIONS(9), 1,
sym_identifier,
ACTIONS(15), 1,
ts_builtin_sym_end,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
STATE(11), 2,
sym__items,
sym_assignment_item,
[44] = 4,
ACTIONS(17), 1,
anon_sym_SEMI,
STATE(6), 1,
aux_sym_source_file_repeat1,
[42] = 3,
ACTIONS(11), 1,
ts_builtin_sym_end,
ACTIONS(19), 1,
anon_sym_SEMI,
STATE(7), 1,
STATE(6), 1,
aux_sym_source_file_repeat1,
[52] = 3,
ACTIONS(21), 1,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
[58] = 4,
ACTIONS(13), 1,
ts_builtin_sym_end,
ACTIONS(23), 1,
ACTIONS(21), 1,
anon_sym_SEMI,
STATE(7), 1,
aux_sym_source_file_repeat1,
[62] = 2,
ACTIONS(7), 1,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
[72] = 4,
ACTIONS(23), 1,
ts_builtin_sym_end,
ACTIONS(25), 1,
anon_sym_SEMI,
STATE(7), 1,
aux_sym_source_file_repeat1,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
[86] = 3,
ACTIONS(9), 1,
sym_identifier,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
STATE(11), 2,
sym__items,
sym_assignment_item,
[70] = 1,
ACTIONS(26), 2,
ts_builtin_sym_end,
anon_sym_SEMI,
[75] = 1,
[98] = 2,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
ACTIONS(28), 2,
ts_builtin_sym_end,
anon_sym_SEMI,
[80] = 1,
ACTIONS(21), 2,
[107] = 2,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
ACTIONS(30), 2,
ts_builtin_sym_end,
anon_sym_SEMI,
[85] = 1,
ACTIONS(15), 1,
[116] = 2,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
ACTIONS(23), 2,
ts_builtin_sym_end,
[89] = 1,
ACTIONS(30), 1,
anon_sym_EQ,
[93] = 1,
anon_sym_SEMI,
[125] = 2,
ACTIONS(17), 1,
ts_builtin_sym_end,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
[133] = 2,
ACTIONS(32), 1,
anon_sym_EQ,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
[141] = 2,
ACTIONS(34), 1,
ts_builtin_sym_end,
ACTIONS(3), 2,
sym_line_comment,
sym_block_comment,
};
static uint32_t ts_small_parse_table_map[] = {
[SMALL_STATE(2)] = 0,
[SMALL_STATE(3)] = 10,
[SMALL_STATE(4)] = 21,
[SMALL_STATE(5)] = 32,
[SMALL_STATE(6)] = 42,
[SMALL_STATE(7)] = 52,
[SMALL_STATE(8)] = 62,
[SMALL_STATE(9)] = 70,
[SMALL_STATE(10)] = 75,
[SMALL_STATE(11)] = 80,
[SMALL_STATE(12)] = 85,
[SMALL_STATE(13)] = 89,
[SMALL_STATE(14)] = 93,
[SMALL_STATE(3)] = 14,
[SMALL_STATE(4)] = 29,
[SMALL_STATE(5)] = 44,
[SMALL_STATE(6)] = 58,
[SMALL_STATE(7)] = 72,
[SMALL_STATE(8)] = 86,
[SMALL_STATE(9)] = 98,
[SMALL_STATE(10)] = 107,
[SMALL_STATE(11)] = 116,
[SMALL_STATE(12)] = 125,
[SMALL_STATE(13)] = 133,
[SMALL_STATE(14)] = 141,
};
static TSParseActionEntry ts_parse_actions[] = {
[0] = {.entry = {.count = 0, .reusable = false}},
[1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
[3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0),
[5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12),
[7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13),
[9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9),
[11] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2),
[13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3),
[15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[21] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2),
[23] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8),
[26] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1),
[28] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1),
[30] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[32] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
[3] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(),
[5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0),
[7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12),
[9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13),
[11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9),
[13] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2),
[15] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3),
[17] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1),
[19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
[21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4),
[23] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2),
[25] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(8),
[28] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_boolean_literal, 1),
[30] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_assignment_item, 3, .production_id = 1),
[32] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
[34] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(),
};
#ifdef __cplusplus