From ca404580e6f3fc18dbcddfc4c28d303a02cac4e1 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sat, 19 Sep 2020 13:37:20 +1000 Subject: [PATCH] Add initial tree-sitter grammar setup --- .gitignore | 4 + binding.gyp | 18 +++ grammar.js | 7 ++ index.js | 13 +++ package.json | 29 +++++ src/binding.cc | 28 +++++ src/grammar.json | 20 ++++ src/node-types.json | 11 ++ src/parser.c | 176 +++++++++++++++++++++++++++++ src/tree_sitter/parser.h | 235 +++++++++++++++++++++++++++++++++++++++ 10 files changed, 541 insertions(+) create mode 100644 .gitignore create mode 100644 binding.gyp create mode 100644 grammar.js create mode 100644 index.js create mode 100644 package.json create mode 100644 src/binding.cc create mode 100644 src/grammar.json create mode 100644 src/node-types.json create mode 100644 src/parser.c create mode 100644 src/tree_sitter/parser.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad15b4f --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +node_modules +build +*.log +package-lock.json diff --git a/binding.gyp b/binding.gyp new file mode 100644 index 0000000..a78ac2f --- /dev/null +++ b/binding.gyp @@ -0,0 +1,18 @@ +{ + "targets": [ + { + "target_name": "tree_sitter_MiniZinc_binding", + "include_dirs": [ + " 'constraint', + } +}); diff --git a/index.js b/index.js new file mode 100644 index 0000000..bd3ea98 --- /dev/null +++ b/index.js @@ -0,0 +1,13 @@ +try { + module.exports = require("./build/Release/tree_sitter_minizinc_binding"); +} catch (error) { + try { + module.exports = require("./build/Debug/tree_sitter_minizinc_binding"); + } catch (_) { + throw error + } +} + +try { + module.exports.nodeTypeInfo = require("./src/node-types.json"); +} catch (_) {} diff --git a/package.json b/package.json new file mode 100644 index 0000000..ffac1c1 --- /dev/null +++ b/package.json @@ -0,0 +1,29 @@ +{ + "name": "tree-sitter-minizinc", + "version": "0.1.0", + "description": "MiniZinc grammer for tree-sitter", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/Dekker1/tree-sitter-minizinc.git" + }, + "keywords": [ + "parser", + "minizinc" + ], + "author": "Jip J. Dekker (https://dekker.one/)", + "license": "MPL-2.0", + "bugs": { + "url": "https://github.com/Dekker1/tree-sitter-minizinc/issues" + }, + "homepage": "https://github.com/Dekker1/tree-sitter-minizinc#readme", + "dependencies": { + "nan": "^2.14.1" + }, + "devDependencies": { + "tree-sitter-cli": "^0.16.9" + } +} diff --git a/src/binding.cc b/src/binding.cc new file mode 100644 index 0000000..2d53847 --- /dev/null +++ b/src/binding.cc @@ -0,0 +1,28 @@ +#include "tree_sitter/parser.h" +#include +#include "nan.h" + +using namespace v8; + +extern "C" TSLanguage * tree_sitter_MiniZinc(); + +namespace { + +NAN_METHOD(New) {} + +void Init(Local exports, Local module) { + Local tpl = Nan::New(New); + tpl->SetClassName(Nan::New("Language").ToLocalChecked()); + tpl->InstanceTemplate()->SetInternalFieldCount(1); + + Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); + Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); + Nan::SetInternalFieldPointer(instance, 0, tree_sitter_MiniZinc()); + + Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("MiniZinc").ToLocalChecked()); + Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); +} + +NODE_MODULE(tree_sitter_MiniZinc_binding, Init) + +} // namespace diff --git a/src/grammar.json b/src/grammar.json new file mode 100644 index 0000000..638c308 --- /dev/null +++ b/src/grammar.json @@ -0,0 +1,20 @@ +{ + "name": "minizinc", + "rules": { + "source_file": { + "type": "STRING", + "value": "constraint" + } + }, + "extras": [ + { + "type": "PATTERN", + "value": "\\s" + } + ], + "conflicts": [], + "externals": [], + "inline": [], + "supertypes": [] +} + diff --git a/src/node-types.json b/src/node-types.json new file mode 100644 index 0000000..82e92eb --- /dev/null +++ b/src/node-types.json @@ -0,0 +1,11 @@ +[ + { + "type": "source_file", + "named": true, + "fields": {} + }, + { + "type": "constraint", + "named": false + } +] \ No newline at end of file diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..26d7153 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,176 @@ +#include + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 11 +#define STATE_COUNT 4 +#define LARGE_STATE_COUNT 2 +#define SYMBOL_COUNT 3 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 2 +#define EXTERNAL_TOKEN_COUNT 0 +#define FIELD_COUNT 0 +#define MAX_ALIAS_SEQUENCE_LENGTH 1 + +enum { + anon_sym_constraint = 1, + sym_source_file = 2, +}; + +static const char *ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_constraint] = "constraint", + [sym_source_file] = "source_file", +}; + +static TSSymbol ts_symbol_map[] = { + [ts_builtin_sym_end] = ts_builtin_sym_end, + [anon_sym_constraint] = anon_sym_constraint, + [sym_source_file] = sym_source_file, +}; + +static const TSSymbolMetadata ts_symbol_metadata[] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + }, + [anon_sym_constraint] = { + .visible = true, + .named = false, + }, + [sym_source_file] = { + .visible = true, + .named = true, + }, +}; + +static TSSymbol ts_alias_sequences[1][MAX_ALIAS_SEQUENCE_LENGTH] = { + [0] = {0}, +}; + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(10); + if (lookahead == 'c') ADVANCE(5); + if (lookahead == '\t' || + lookahead == '\n' || + lookahead == '\r' || + lookahead == ' ') SKIP(0) + END_STATE(); + case 1: + if (lookahead == 'a') ADVANCE(2); + END_STATE(); + case 2: + if (lookahead == 'i') ADVANCE(4); + END_STATE(); + case 3: + if (lookahead == 'n') ADVANCE(7); + END_STATE(); + case 4: + if (lookahead == 'n') ADVANCE(9); + END_STATE(); + case 5: + if (lookahead == 'o') ADVANCE(3); + END_STATE(); + case 6: + if (lookahead == 'r') ADVANCE(1); + END_STATE(); + case 7: + if (lookahead == 's') ADVANCE(8); + END_STATE(); + case 8: + if (lookahead == 't') ADVANCE(6); + END_STATE(); + case 9: + if (lookahead == 't') ADVANCE(11); + END_STATE(); + case 10: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 11: + ACCEPT_TOKEN(anon_sym_constraint); + END_STATE(); + default: + return false; + } +} + +static TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 0}, + [2] = {.lex_state = 0}, + [3] = {.lex_state = 0}, +}; + +static uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [0] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_constraint] = ACTIONS(1), + }, + [1] = { + [sym_source_file] = STATE(3), + [anon_sym_constraint] = ACTIONS(3), + }, +}; + +static uint16_t ts_small_parse_table[] = { + [0] = 1, + ACTIONS(5), 1, + ts_builtin_sym_end, + [4] = 1, + ACTIONS(7), 1, + ts_builtin_sym_end, +}; + +static uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(2)] = 0, + [SMALL_STATE(3)] = 4, +}; + +static TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), + [7] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef _WIN32 +#define extern __declspec(dllexport) +#endif + +extern const TSLanguage *tree_sitter_minizinc(void) { + static TSLanguage language = { + .version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .symbol_metadata = ts_symbol_metadata, + .parse_table = (const unsigned short *)ts_parse_table, + .small_parse_table = (const uint16_t *)ts_small_parse_table, + .small_parse_table_map = (const uint32_t *)ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .lex_modes = ts_lex_modes, + .symbol_names = ts_symbol_names, + .public_symbol_map = ts_symbol_map, + .alias_sequences = (const TSSymbol *)ts_alias_sequences, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .lex_fn = ts_lex, + .external_token_count = EXTERNAL_TOKEN_COUNT, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h new file mode 100644 index 0000000..11bf4fc --- /dev/null +++ b/src/tree_sitter/parser.h @@ -0,0 +1,235 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef uint16_t TSStateId; + +typedef struct { + bool visible : 1; + bool named : 1; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef struct { + union { + struct { + TSStateId state; + bool extra : 1; + bool repetition : 1; + } shift; + struct { + TSSymbol symbol; + int16_t dynamic_precedence; + uint8_t child_count; + uint8_t production_id; + } reduce; + } params; + TSParseActionType type : 4; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable : 1; + } entry; +} TSParseActionEntry; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + const char **symbol_names; + const TSSymbolMetadata *symbol_metadata; + const uint16_t *parse_table; + const TSParseActionEntry *parse_actions; + const TSLexMode *lex_modes; + const TSSymbol *alias_sequences; + uint16_t max_alias_sequence_length; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + uint32_t field_count; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const char **field_names; + uint32_t large_state_count; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSSymbol *public_symbol_map; +}; + +/* + * Lexer Macros + */ + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) id - LARGE_STATE_COUNT + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + { \ + { \ + .params = { \ + .shift = { \ + .state = state_value \ + } \ + }, \ + .type = TSParseActionTypeShift \ + } \ + } + +#define SHIFT_REPEAT(state_value) \ + { \ + { \ + .params = { \ + .shift = { \ + .state = state_value, \ + .repetition = true \ + } \ + }, \ + .type = TSParseActionTypeShift \ + } \ + } + +#define RECOVER() \ + { \ + { .type = TSParseActionTypeRecover } \ + } + +#define SHIFT_EXTRA() \ + { \ + { \ + .params = { \ + .shift = { \ + .extra = true \ + } \ + }, \ + .type = TSParseActionTypeShift \ + } \ + } + +#define REDUCE(symbol_val, child_count_val, ...) \ + { \ + { \ + .params = { \ + .reduce = { \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }, \ + .type = TSParseActionTypeReduce \ + } \ + } + +#define ACCEPT_INPUT() \ + { \ + { .type = TSParseActionTypeAccept } \ + } + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_