Skip to content

Commit

Permalink
Check for recovery mode and fix numeric list missparsing (#41)
Browse files Browse the repository at this point in the history
I'm making the external scanner aware of tree-sitter correction mode (https://tree-sitter.github.io/tree-sitter/creating-parsers#other-external-scanner-details), this will hopefully help to move the parsing of text nodes to JS.

While doing this also found a bug

https://github.com/stsewd/tree-sitter-rst/blob/c7c2b24918e231072af2d10c03b4893818de9d5d/src/tree_sitter_rst/parser.c#L135-L137

That is trying to parse the text as a numeric bullet, even if the token isn't valid in that context, a check for ` && valid_symbols[T_NUMERIC_BULLET]` was added.

Fixes #31
  • Loading branch information
stsewd committed Jun 18, 2023
1 parent c7c2b24 commit a41a933
Show file tree
Hide file tree
Showing 10 changed files with 231 additions and 164 deletions.
Binary file modified docs/js/tree-sitter-rst.wasm
Binary file not shown.
2 changes: 2 additions & 0 deletions grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ module.exports = grammar({
$._directive_name,
$._substitution_mark,
$._empty_comment,

$._invalid_token,
],

extras: $ => [
Expand Down
4 changes: 4 additions & 0 deletions src/grammar.json
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,10 @@
{
"type": "SYMBOL",
"name": "_empty_comment"
},
{
"type": "SYMBOL",
"name": "_invalid_token"
}
],
"inline": [],
Expand Down
153 changes: 82 additions & 71 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
#define LANGUAGE_VERSION 14
#define STATE_COUNT 365
#define LARGE_STATE_COUNT 34
#define SYMBOL_COUNT 110
#define SYMBOL_COUNT 111
#define ALIAS_COUNT 9
#define TOKEN_COUNT 51
#define EXTERNAL_TOKEN_COUNT 40
#define TOKEN_COUNT 52
#define EXTERNAL_TOKEN_COUNT 41
#define FIELD_COUNT 3
#define MAX_ALIAS_SEQUENCE_LENGTH 4
#define PRODUCTION_ID_COUNT 27
Expand Down Expand Up @@ -67,74 +67,75 @@ enum {
sym__directive_name = 48,
sym__substitution_mark = 49,
sym__empty_comment = 50,
sym_document = 51,
sym_section = 52,
sym__overline_section = 53,
sym__underline_section = 54,
sym__transition_block = 55,
sym__body_element_block = 56,
sym_body = 57,
sym__body_element = 58,
sym_paragraph = 59,
sym__paragraph_line = 60,
sym__list = 61,
sym_bullet_list = 62,
sym__bullet_list_item = 63,
sym_enumerated_list = 64,
sym__numeric_list_item = 65,
sym_definition_list = 66,
sym__definition_list_item = 67,
aux_sym__classifiers = 68,
sym_field_list = 69,
sym_field = 70,
sym__literal_block = 71,
sym__indented_text_block = 72,
sym_line_block = 73,
sym_line = 74,
sym__block_quote_block = 75,
sym_block_quote = 76,
sym_attribution = 77,
sym_doctest_block = 78,
aux_sym__explicit_markup_block = 79,
sym__markup_block = 80,
sym_footnote = 81,
sym_citation = 82,
sym_target = 83,
sym__anonymous_target = 84,
sym_directive = 85,
sym__directive_body = 86,
sym_substitution_definition = 87,
sym__embedded_directive = 88,
sym_comment = 89,
sym__line = 90,
aux_sym__text_block = 91,
sym__text_line = 92,
sym__inline_markup = 93,
sym_interpreted_text = 94,
sym__default_role = 95,
sym__prefix_role = 96,
sym__suffix_role = 97,
aux_sym_document_repeat1 = 98,
aux_sym_body_repeat1 = 99,
aux_sym_paragraph_repeat1 = 100,
aux_sym__paragraph_line_repeat1 = 101,
aux_sym_bullet_list_repeat1 = 102,
aux_sym_enumerated_list_repeat1 = 103,
aux_sym_definition_list_repeat1 = 104,
aux_sym_field_list_repeat1 = 105,
aux_sym__indented_text_block_repeat1 = 106,
aux_sym_line_block_repeat1 = 107,
aux_sym_line_repeat1 = 108,
aux_sym__text_line_repeat1 = 109,
alias_sym_arguments = 110,
alias_sym_classifier = 111,
alias_sym_content = 112,
alias_sym_definition = 113,
alias_sym_field_body = 114,
alias_sym_field_name = 115,
alias_sym_options = 116,
alias_sym_term = 117,
alias_sym_title = 118,
sym__invalid_token = 51,
sym_document = 52,
sym_section = 53,
sym__overline_section = 54,
sym__underline_section = 55,
sym__transition_block = 56,
sym__body_element_block = 57,
sym_body = 58,
sym__body_element = 59,
sym_paragraph = 60,
sym__paragraph_line = 61,
sym__list = 62,
sym_bullet_list = 63,
sym__bullet_list_item = 64,
sym_enumerated_list = 65,
sym__numeric_list_item = 66,
sym_definition_list = 67,
sym__definition_list_item = 68,
aux_sym__classifiers = 69,
sym_field_list = 70,
sym_field = 71,
sym__literal_block = 72,
sym__indented_text_block = 73,
sym_line_block = 74,
sym_line = 75,
sym__block_quote_block = 76,
sym_block_quote = 77,
sym_attribution = 78,
sym_doctest_block = 79,
aux_sym__explicit_markup_block = 80,
sym__markup_block = 81,
sym_footnote = 82,
sym_citation = 83,
sym_target = 84,
sym__anonymous_target = 85,
sym_directive = 86,
sym__directive_body = 87,
sym_substitution_definition = 88,
sym__embedded_directive = 89,
sym_comment = 90,
sym__line = 91,
aux_sym__text_block = 92,
sym__text_line = 93,
sym__inline_markup = 94,
sym_interpreted_text = 95,
sym__default_role = 96,
sym__prefix_role = 97,
sym__suffix_role = 98,
aux_sym_document_repeat1 = 99,
aux_sym_body_repeat1 = 100,
aux_sym_paragraph_repeat1 = 101,
aux_sym__paragraph_line_repeat1 = 102,
aux_sym_bullet_list_repeat1 = 103,
aux_sym_enumerated_list_repeat1 = 104,
aux_sym_definition_list_repeat1 = 105,
aux_sym_field_list_repeat1 = 106,
aux_sym__indented_text_block_repeat1 = 107,
aux_sym_line_block_repeat1 = 108,
aux_sym_line_repeat1 = 109,
aux_sym__text_line_repeat1 = 110,
alias_sym_arguments = 111,
alias_sym_classifier = 112,
alias_sym_content = 113,
alias_sym_definition = 114,
alias_sym_field_body = 115,
alias_sym_field_name = 116,
alias_sym_options = 117,
alias_sym_term = 118,
alias_sym_title = 119,
};

static const char * const ts_symbol_names[] = {
Expand Down Expand Up @@ -189,6 +190,7 @@ static const char * const ts_symbol_names[] = {
[sym__directive_name] = "type",
[sym__substitution_mark] = "substitution",
[sym__empty_comment] = "comment",
[sym__invalid_token] = "_invalid_token",
[sym_document] = "document",
[sym_section] = "section",
[sym__overline_section] = "_overline_section",
Expand Down Expand Up @@ -311,6 +313,7 @@ static const TSSymbol ts_symbol_map[] = {
[sym__directive_name] = sym__directive_name,
[sym__substitution_mark] = sym__substitution_mark,
[sym__empty_comment] = sym_comment,
[sym__invalid_token] = sym__invalid_token,
[sym_document] = sym_document,
[sym_section] = sym_section,
[sym__overline_section] = sym__overline_section,
Expand Down Expand Up @@ -586,6 +589,10 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
.visible = true,
.named = true,
},
[sym__invalid_token] = {
.visible = false,
.named = true,
},
[sym_document] = {
.visible = true,
.named = true,
Expand Down Expand Up @@ -1958,6 +1965,7 @@ enum {
ts_external_token__directive_name = 37,
ts_external_token__substitution_mark = 38,
ts_external_token__empty_comment = 39,
ts_external_token__invalid_token = 40,
};

static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {
Expand Down Expand Up @@ -2001,6 +2009,7 @@ static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {
[ts_external_token__directive_name] = sym__directive_name,
[ts_external_token__substitution_mark] = sym__substitution_mark,
[ts_external_token__empty_comment] = sym__empty_comment,
[ts_external_token__invalid_token] = sym__invalid_token,
};

static const bool ts_external_scanner_states[45][EXTERNAL_TOKEN_COUNT] = {
Expand Down Expand Up @@ -2045,6 +2054,7 @@ static const bool ts_external_scanner_states[45][EXTERNAL_TOKEN_COUNT] = {
[ts_external_token__directive_name] = true,
[ts_external_token__substitution_mark] = true,
[ts_external_token__empty_comment] = true,
[ts_external_token__invalid_token] = true,
},
[2] = {
[ts_external_token__indent] = true,
Expand Down Expand Up @@ -2499,6 +2509,7 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
[sym__directive_name] = ACTIONS(1),
[sym__substitution_mark] = ACTIONS(1),
[sym__empty_comment] = ACTIONS(1),
[sym__invalid_token] = ACTIONS(1),
},
[1] = {
[sym_document] = STATE(356),
Expand Down
Loading

0 comments on commit a41a933

Please sign in to comment.