Skip to content

Commit 7ac095a

Browse files
committed
fixup! Allow to parse macro identifiers in variable decls
1 parent 3fe7a45 commit 7ac095a

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2+
From: Diego Alonso <[email protected]>
3+
Date: Thu, 26 Jun 2025 14:04:51 +0200
4+
Subject: Allow to parse macro identifiers in variable decls
5+
6+
---
7+
grammar.js | 2 +
8+
src/scanner.c | 128 +++++++++++++++++++++++++++++++++++++++-----------
9+
2 files changed, 103 insertions(+), 27 deletions(-)
10+
11+
diff --git a/grammar.js b/grammar.js
12+
index 6e79004..40ac8b7 100644
13+
--- a/grammar.js
14+
+++ b/grammar.js
15+
@@ -67,6 +67,7 @@ module.exports = grammar({
16+
$._external_end_of_statement,
17+
$._preproc_unary_operator,
18+
$.hollerith_constant,
19+
+ $.macro_identifier,
20+
],
21+
22+
extras: $ => [
23+
@@ -870,6 +871,7 @@ module.exports = grammar({
24+
$.derived_type,
25+
alias($.procedure_declaration, $.procedure),
26+
$.declared_type,
27+
+ $.macro_identifier,
28+
)),
29+
optional(seq(',',
30+
commaSep1(
31+
diff --git a/src/scanner.c b/src/scanner.c
32+
index b768d99..e477df4 100644
33+
--- a/src/scanner.c
34+
+++ b/src/scanner.c
35+
@@ -1,4 +1,5 @@
36+
#include "tree_sitter/alloc.h"
37+
+#include "tree_sitter/array.h"
38+
#include "tree_sitter/parser.h"
39+
#include <ctype.h>
40+
#include <wctype.h>
41+
@@ -13,10 +14,12 @@ enum TokenType {
42+
END_OF_STATEMENT,
43+
PREPROC_UNARY_OPERATOR,
44+
HOLLERITH_CONSTANT,
45+
+ MACRO_IDENTIFIER,
46+
};
47+
48+
typedef struct {
49+
bool in_line_continuation;
50+
+ Array(char *) MacroIdentifiers;
51+
} Scanner;
52+
53+
typedef enum {
54+
@@ -301,31 +304,44 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
55+
return true;
56+
}
57+
58+
-static bool scan_string_literal_kind(TSLexer *lexer) {
59+
- // Strictly, it's allowed for the kind to be an integer literal, in
60+
- // practice I've not seen it
61+
+typedef Array(char) String;
62+
+
63+
+// Returns NULL on error, otherwise an allocated char array for an identifier
64+
+static String *scan_identifier(TSLexer *lexer) {
65+
if (!iswalpha(lexer->lookahead)) {
66+
+ return NULL;
67+
+ }
68+
+ String *possible_identifier = ts_calloc(1, sizeof(String));
69+
+ while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
70+
+ array_push(possible_identifier, lexer->lookahead);
71+
+ // Don't capture the trailing underscore as part of the kind identifier
72+
+ // If another user of this function wants to mark the end again after
73+
+ // the identifier they're free to do so
74+
+ if (lexer->lookahead == '_') {
75+
+ lexer->mark_end(lexer);
76+
+ }
77+
+ advance(lexer);
78+
+ }
79+
+ if (possible_identifier->size == 0) {
80+
+ array_delete(possible_identifier);
81+
+ ts_free(possible_identifier);
82+
+ return NULL;
83+
+ }
84+
+ return possible_identifier;
85+
+}
86+
+
87+
+static bool scan_string_literal_kind(TSLexer *lexer, String *identifier) {
88+
+ if (identifier->size == 0) {
89+
+ return false;
90+
+ }
91+
+
92+
+ char last_char = identifier->contents[identifier->size - 1];
93+
+ if ((last_char != '_') ||
94+
+ (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
95+
return false;
96+
}
97+
98+
lexer->result_symbol = STRING_LITERAL_KIND;
99+
-
100+
- // We need two characters of lookahead to see `_"`
101+
- char current_char = '\0';
102+
-
103+
- while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
104+
- current_char = lexer->lookahead;
105+
- // Don't capture the trailing underscore as part of the kind identifier
106+
- if (lexer->lookahead == '_') {
107+
- lexer->mark_end(lexer);
108+
- }
109+
- advance(lexer);
110+
- }
111+
-
112+
- if ((current_char != '_') || (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
113+
- return false;
114+
- }
115+
-
116+
return true;
117+
}
118+
119+
@@ -393,6 +409,28 @@ static bool scan_string_literal(TSLexer *lexer) {
120+
return false;
121+
}
122+
123+
+static bool scan_macro_identifier(Scanner *scanner, TSLexer *lexer,
124+
+ String *identifier) {
125+
+ unsigned num_macro_ids = scanner->MacroIdentifiers.size;
126+
+ if (num_macro_ids == 0) {
127+
+ return false;
128+
+ }
129+
+
130+
+ for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
131+
+ char *macro_id = *array_get(&scanner->MacroIdentifiers, i);
132+
+ unsigned macro_id_len = strlen(macro_id);
133+
+ if (identifier->size != macro_id_len) {
134+
+ continue;
135+
+ }
136+
+ if (strncmp(macro_id, identifier->contents, identifier->size) == 0) {
137+
+ lexer->mark_end(lexer);
138+
+ lexer->result_symbol = MACRO_IDENTIFIER;
139+
+ return true;
140+
+ }
141+
+ }
142+
+ return false;
143+
+}
144+
+
145+
/// Need an external scanner to catch '!' before its parsed as a comment
146+
static bool scan_preproc_unary_operator(TSLexer *lexer) {
147+
const char next_char = lexer->lookahead;
148+
@@ -467,19 +505,50 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
149+
return true;
150+
}
151+
152+
- if (valid_symbols[STRING_LITERAL_KIND]) {
153+
+ // These symbols both scan for an identifier, we need to combine the logic
154+
+ // and they always need to be the last to look for since we can't backtrack
155+
+ if (valid_symbols[STRING_LITERAL_KIND] || valid_symbols[MACRO_IDENTIFIER]) {
156+
+ String *identifier = scan_identifier(lexer);
157+
+ bool identifier_result = false;
158+
// This may need a lot of lookahead, so should (probably) always
159+
// be the last token to look for
160+
- if (scan_string_literal_kind(lexer)) {
161+
+ if (identifier && valid_symbols[STRING_LITERAL_KIND]) {
162+
+ if (scan_string_literal_kind(lexer, identifier)) {
163+
+ identifier_result = true;
164+
+ }
165+
+ }
166+
+ if (!identifier_result && identifier && valid_symbols[MACRO_IDENTIFIER]) {
167+
+ if (scan_macro_identifier(scanner, lexer, identifier)) {
168+
+ identifier_result = true;
169+
+ }
170+
+ }
171+
+ if (identifier) {
172+
+ ts_free(identifier);
173+
+ }
174+
+ if (identifier_result) {
175+
return true;
176+
}
177+
}
178+
-
179+
return false;
180+
}
181+
182+
void *tree_sitter_fortran_external_scanner_create() {
183+
- return ts_calloc(1, sizeof(bool));
184+
+ Scanner *result = (Scanner *)ts_calloc(1, sizeof(Scanner));
185+
+ char *macro_ids = getenv("CODEE_TS_MACRO_IDS");
186+
+ if (!macro_ids) {
187+
+ return result;
188+
+ }
189+
+ char *macro_id = strtok(macro_ids, ":");
190+
+ Array(char *) *macroIdsResult = &result->MacroIdentifiers;
191+
+ while (macro_id) {
192+
+ int length = strlen(macro_id);
193+
+ char *new_str = (char *)ts_malloc((length + 1) * sizeof(char));
194+
+ strncpy(new_str, macro_id, length);
195+
+ array_push(macroIdsResult, new_str);
196+
+ // Keep splitting
197+
+ macro_id = strtok(NULL, ":");
198+
+ }
199+
+ return result;
200+
}
201+
202+
bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
203+
@@ -491,8 +560,9 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
204+
unsigned tree_sitter_fortran_external_scanner_serialize(void *payload,
205+
char *buffer) {
206+
Scanner *scanner = (Scanner *)payload;
207+
- buffer[0] = (char)scanner->in_line_continuation;
208+
- return 1;
209+
+ unsigned size = sizeof(*scanner);
210+
+ memcpy(buffer, scanner, size);
211+
+ return size;
212+
}
213+
214+
void tree_sitter_fortran_external_scanner_deserialize(void *payload,
215+
@@ -500,11 +570,17 @@ void tree_sitter_fortran_external_scanner_deserialize(void *payload,
216+
unsigned length) {
217+
Scanner *scanner = (Scanner *)payload;
218+
if (length > 0) {
219+
- scanner->in_line_continuation = buffer[0];
220+
+ unsigned size = sizeof(*scanner);
221+
+ memcpy(scanner, buffer, size);
222+
}
223+
}
224+
225+
void tree_sitter_fortran_external_scanner_destroy(void *payload) {
226+
Scanner *scanner = (Scanner *)payload;
227+
+ for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
228+
+ char *str = *array_get(&scanner->MacroIdentifiers, i);
229+
+ ts_free(str);
230+
+ }
231+
+ array_delete(&scanner->MacroIdentifiers);
232+
ts_free(scanner);
233+
}

0 commit comments

Comments
 (0)