diff --git a/examples/arithmetics/src/language-server/generated/grammar.ts b/examples/arithmetics/src/language-server/generated/grammar.ts index 4d4f84210..bc5af5209 100644 --- a/examples/arithmetics/src/language-server/generated/grammar.ts +++ b/examples/arithmetics/src/language-server/generated/grammar.ts @@ -14,8 +14,8 @@ export const ArithmeticsGrammar = (): Grammar => loadedArithmeticsGrammar ?? (lo "rules": [ { "$type": "ParserRule", - "name": "Module", "entry": true, + "name": "Module", "definition": { "$type": "Group", "elements": [ diff --git a/examples/domainmodel/src/language-server/generated/grammar.ts b/examples/domainmodel/src/language-server/generated/grammar.ts index 6dd894e01..541155426 100644 --- a/examples/domainmodel/src/language-server/generated/grammar.ts +++ b/examples/domainmodel/src/language-server/generated/grammar.ts @@ -14,8 +14,8 @@ export const DomainModelGrammar = (): Grammar => loadedDomainModelGrammar ?? (lo "rules": [ { "$type": "ParserRule", - "name": "Domainmodel", "entry": true, + "name": "Domainmodel", "definition": { "$type": "Assignment", "feature": "elements", diff --git a/examples/requirements/src/language-server/generated/grammar.ts b/examples/requirements/src/language-server/generated/grammar.ts index 06c0f5cdc..4507c2b64 100644 --- a/examples/requirements/src/language-server/generated/grammar.ts +++ b/examples/requirements/src/language-server/generated/grammar.ts @@ -15,8 +15,8 @@ export const RequirementsGrammar = (): Grammar => loadedRequirementsGrammar ?? ( "rules": [ { "$type": "ParserRule", - "name": "RequirementModel", "entry": true, + "name": "RequirementModel", "definition": { "$type": "Group", "elements": [ @@ -321,8 +321,8 @@ export const TestsGrammar = (): Grammar => loadedTestsGrammar ?? (loadedTestsGra "rules": [ { "$type": "ParserRule", - "name": "TestModel", "entry": true, + "name": "TestModel", "definition": { "$type": "Group", "elements": [ @@ -519,8 +519,8 @@ export const TestsGrammar = (): Grammar => loadedTestsGrammar ?? (loadedTestsGra }, { "$type": "ParserRule", - "name": "RequirementModel", "entry": false, + "name": "RequirementModel", "definition": { "$type": "Group", "elements": [ diff --git a/examples/statemachine/src/language-server/generated/grammar.ts b/examples/statemachine/src/language-server/generated/grammar.ts index 48e4abe6d..2c26e6c7a 100644 --- a/examples/statemachine/src/language-server/generated/grammar.ts +++ b/examples/statemachine/src/language-server/generated/grammar.ts @@ -14,8 +14,8 @@ export const StatemachineGrammar = (): Grammar => loadedStatemachineGrammar ?? ( "rules": [ { "$type": "ParserRule", - "name": "Statemachine", "entry": true, + "name": "Statemachine", "definition": { "$type": "Group", "elements": [ diff --git a/packages/langium/src/grammar/generated/grammar.ts b/packages/langium/src/grammar/generated/grammar.ts index 067db4e0d..56d20c1f5 100644 --- a/packages/langium/src/grammar/generated/grammar.ts +++ b/packages/langium/src/grammar/generated/grammar.ts @@ -14,8 +14,8 @@ export const LangiumGrammarGrammar = (): Grammar => loadedLangiumGrammarGrammar "rules": [ { "$type": "ParserRule", - "name": "Grammar", "entry": true, + "name": "Grammar", "definition": { "$type": "Group", "elements": [ @@ -1348,8 +1348,8 @@ export const LangiumGrammarGrammar = (): Grammar => loadedLangiumGrammarGrammar }, { "$type": "ParserRule", - "name": "RuleNameAndParams", "fragment": true, + "name": "RuleNameAndParams", "definition": { "$type": "Group", "elements": [ diff --git a/packages/langium/src/parser/cst-node-builder.ts b/packages/langium/src/parser/cst-node-builder.ts index 0cc3d42b6..95a744ab6 100644 --- a/packages/langium/src/parser/cst-node-builder.ts +++ b/packages/langium/src/parser/cst-node-builder.ts @@ -18,7 +18,7 @@ export class CstNodeBuilder { private nodeStack: CompositeCstNodeImpl[] = []; private get current(): CompositeCstNodeImpl { - return this.nodeStack[this.nodeStack.length - 1]; + return this.nodeStack[this.nodeStack.length - 1] ?? this.rootNode; } buildRootNode(input: string): RootCstNode { diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index ba630176e..be07feaf2 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -94,7 +94,7 @@ export interface BaseParser { * Requires a unique index within the rule for a specific sub rule. * Arguments can be supplied to the rule invocation for semantic predicates */ - subrule(idx: number, rule: RuleResult, feature: AbstractElement, args: Args): void; + subrule(idx: number, rule: RuleResult, fragment: boolean, feature: AbstractElement, args: Args): void; /** * Executes a grammar action that modifies the currently active AST node */ @@ -161,7 +161,7 @@ export abstract class AbstractLangiumParser implements BaseParser { abstract rule(rule: ParserRule, impl: RuleImpl): RuleResult; abstract consume(idx: number, tokenType: TokenType, feature: AbstractElement): void; - abstract subrule(idx: number, rule: RuleResult, feature: AbstractElement, args: Args): void; + abstract subrule(idx: number, rule: RuleResult, fragment: boolean, feature: AbstractElement, args: Args): void; abstract action($type: string, action: Action): void; abstract construct(): unknown; @@ -251,7 +251,9 @@ export class LangiumParser extends AbstractLangiumParser { private startImplementation($type: string | symbol | undefined, implementation: RuleImpl): RuleImpl { return (args) => { - if (!this.isRecording()) { + // Only create a new AST node in case the calling rule is not a fragment rule + const createNode = !this.isRecording() && $type !== undefined; + if (createNode) { const node: any = { $type }; this.stack.push(node); if ($type === DatatypeSymbol) { @@ -264,7 +266,7 @@ export class LangiumParser extends AbstractLangiumParser { } catch (err) { result = undefined; } - if (!this.isRecording() && result === undefined) { + if (result === undefined && createNode) { result = this.construct(); } return result; @@ -300,9 +302,13 @@ export class LangiumParser extends AbstractLangiumParser { return !token.isInsertedInRecovery && !isNaN(token.startOffset) && typeof token.endOffset === 'number' && !isNaN(token.endOffset); } - subrule(idx: number, rule: RuleResult, feature: AbstractElement, args: Args): void { + subrule(idx: number, rule: RuleResult, fragment: boolean, feature: AbstractElement, args: Args): void { let cstNode: CompositeCstNode | undefined; - if (!this.isRecording()) { + if (!this.isRecording() && !fragment) { + // We only want to create a new CST node if the subrule actually creates a new AST node. + // In other cases like calls of fragment rules the current CST/AST is populated further. + // Note that skipping this initialization and leaving cstNode unassigned also skips the subrule assignment later on. + // This is intended, as fragment rules only enrich the current AST node cstNode = this.nodeBuilder.buildCompositeNode(feature); } const subruleResult = this.wrapper.wrapSubrule(idx, rule, args) as any; @@ -325,11 +331,7 @@ export class LangiumParser extends AbstractLangiumParser { if (isDataTypeNode(current)) { current.value += result.toString(); } else if (typeof result === 'object' && result) { - const resultKind = result.$type; const object = this.assignWithoutOverride(result, current); - if (resultKind) { - object.$type = resultKind; - } const newItem = object; this.stack.pop(); this.stack.push(newItem); @@ -592,7 +594,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser { } } - subrule(idx: number, rule: RuleResult, feature: AbstractElement, args: Args): void { + subrule(idx: number, rule: RuleResult, fragment: boolean, feature: AbstractElement, args: Args): void { this.before(feature); this.wrapper.wrapSubrule(idx, rule, args); this.after(feature); diff --git a/packages/langium/src/parser/parser-builder-base.ts b/packages/langium/src/parser/parser-builder-base.ts index 9c34d8081..38f7d3af6 100644 --- a/packages/langium/src/parser/parser-builder-base.ts +++ b/packages/langium/src/parser/parser-builder-base.ts @@ -99,14 +99,15 @@ function buildRuleCall(ctx: RuleContext, ruleCall: RuleCall): Method { const rule = ruleCall.rule.ref; if (isParserRule(rule)) { const idx = ctx.subrule++; + const fragment = rule.fragment; const predicate = ruleCall.arguments.length > 0 ? buildRuleCallPredicate(rule, ruleCall.arguments) : () => ({}); - return (args) => ctx.parser.subrule(idx, getRule(ctx, rule), ruleCall, predicate(args)); + return (args) => ctx.parser.subrule(idx, getRule(ctx, rule), fragment, ruleCall, predicate(args)); } else if (isTerminalRule(rule)) { const idx = ctx.consume++; const method = getToken(ctx, rule.name); return () => ctx.parser.consume(idx, method, ruleCall); } else if (!rule) { - throw new ErrorWithLocation(ruleCall.$cstNode, `Undefined rule type: ${ruleCall.$type}`); + throw new ErrorWithLocation(ruleCall.$cstNode, `Undefined rule: ${ruleCall.rule.$refText}`); } else { assertUnreachable(rule); } @@ -273,8 +274,10 @@ function buildCrossReference(ctx: RuleContext, crossRef: CrossReference, termina } return buildCrossReference(ctx, crossRef, assignTerminal); } else if (isRuleCall(terminal) && isParserRule(terminal.rule.ref)) { + // The terminal is a data type rule here. Everything else will result in a validation error. + const rule = terminal.rule.ref; const idx = ctx.subrule++; - return (args) => ctx.parser.subrule(idx, getRule(ctx, terminal.rule.ref as ParserRule), crossRef, args); + return (args) => ctx.parser.subrule(idx, getRule(ctx, rule), false, crossRef, args); } else if (isRuleCall(terminal) && isTerminalRule(terminal.rule.ref)) { const idx = ctx.consume++; const terminalRule = getToken(ctx, terminal.rule.ref.name); diff --git a/packages/langium/test/parser/langium-parser-builder.test.ts b/packages/langium/test/parser/langium-parser-builder.test.ts index 5ebd679bb..0ac83c330 100644 --- a/packages/langium/test/parser/langium-parser-builder.test.ts +++ b/packages/langium/test/parser/langium-parser-builder.test.ts @@ -4,9 +4,11 @@ * terms of the MIT License, which is available in the project root. ******************************************************************************/ +/* eslint-disable @typescript-eslint/no-explicit-any */ + import type { TokenType, TokenVocabulary } from 'chevrotain'; -import type { AstNode, CstNode, GenericAstNode, Grammar, GrammarAST, LangiumParser, ParseResult, TokenBuilderOptions } from 'langium'; -import { EmptyFileSystem, DefaultTokenBuilder, GrammarUtils, CstUtils } from 'langium'; +import type { AstNode, CstNode, GenericAstNode, Grammar, GrammarAST, LangiumParser, ParseResult, ReferenceInfo, Scope, TokenBuilderOptions } from 'langium'; +import { EmptyFileSystem, DefaultTokenBuilder, GrammarUtils, CstUtils, DefaultScopeProvider, URI } from 'langium'; import { describe, expect, test, onTestFailed, beforeAll } from 'vitest'; import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar'; import { expandToString } from 'langium/generate'; @@ -706,6 +708,66 @@ describe('Fragment rules', () => { expect(result.value).toHaveProperty('values', ['ab', 'cd', 'ef']); }); + for (const [name, content] of [ + ['Array in fragment', ` + Start: + element=[Def] + ArrayCallSignature?; + fragment ArrayCallSignature: + (isArray?='[' ']');`], + ['Reference in fragment', ` + Start: + ElementRef + (isArray?='[' ']')?; + fragment ElementRef: + element=[Def];`] + ]) { + test(`Fragment rules don't create AST elements during parsing - ${name}`, async () => { + // This test is mostly based on a bug report in a GitHub discussion: + // https://github.com/eclipse-langium/langium/discussions/1638 + // In particular, the issue was that fragment rules used to create AST elements with type "undefined" + // When passing those into references, the reference would fail to resolve because the created AST element was just a placeholder + // Eventually, the parser would assign the properties of the fragment rule to the actual AST element + // But the reference would still use the old reference + // The fixed implementation no longer creates these fake AST elements, thereby skipping any potential issues completely. + let resolved = false; + const services = await createServicesForGrammar({ + grammar: ` + grammar FragmentRuleOverride + entry Entry: items+=(MemberCall|Def)*; + Def: 'def' name=ID; + MemberCall: + Start ({infer MemberCall.previous=current} "." element=[Def])*; + + ${content} + + terminal ID: /[_a-zA-Z][\\w_]*/; + hidden terminal WS: /\\s+/; + `, module: { + references: { + ScopeProvider: (services: any) => new class extends DefaultScopeProvider { + override getScope(context: ReferenceInfo): Scope { + const item = context.container as any; + const previous = item.previous; + if (previous) { + const previousElement = previous.element.ref; + // Ensure that the reference can be resolved + resolved = Boolean(previousElement); + } + return super.getScope(context); + } + }(services) + } + } + }); + const document = services.shared.workspace.LangiumDocumentFactory.fromString('def a def b a[].b', URI.parse('file:///test')); + await services.shared.workspace.DocumentBuilder.build([document]); + expect(document.parseResult.lexerErrors).toHaveLength(0); + expect(document.parseResult.parserErrors).toHaveLength(0); + expect(resolved).toBe(true); + }); + } + }); describe('Unicode terminal rules', () => {