From b595e49123acca0031e16775c28e0cf2b37ce0f1 Mon Sep 17 00:00:00 2001 From: Saeed-Nourizadeh Date: Thu, 25 Jun 2026 18:54:39 +0300 Subject: [PATCH 1/3] fix(csharp): emit INHERITS edges from base_list clause tree-sitter-csharp wraps a class's `: Base, IFace` clause in a base_list node. _get_bases() handled C# in the shared java/kotlin branch, which only looks for superclass/super_interfaces nodes the C# grammar never emits, so every C# class produced zero INHERITS edges and inheritors_of / get_impact_radius returned empty for .cs files. Adds a csharp-specific branch that drills into base_list for the named base-type children (identifier / qualified_name / generic_name), extracting bare type names. Kotlin retains its existing handling. Extends tests/fixtures/Sample.cs with extends + implements, a qualified base (System.IDisposable) and a generic base (List), and adds inheritance assertions to TestCSharpParsing. --- code_review_graph/parser.py | 20 +++++++++++++++++++- tests/fixtures/Sample.cs | 21 +++++++++++++++++++++ tests/test_multilang.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index c55b2e8f..e3a8ed81 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -6085,7 +6085,25 @@ def _get_bases(self, node, language: str, source: bytes) -> list[str]: for ident in sub.children: if ident.type in ("type_identifier", "generic_type"): bases.append(ident.text.decode("utf-8", errors="replace")) - elif language in ("csharp", "kotlin"): + elif language == "csharp": + # C#: class_declaration carries a `base_list` child wrapping the + # `: Base, IFace, ...` clause. Its `.text` includes the leading + # colon and commas, so drill into the named base-type children + # (identifier / qualified_name / generic_name) for bare names. + # Without this, the shared handler below looked for + # superclass/super_interfaces nodes the C# grammar never emits, + # so C# classes produced zero INHERITS edges and inheritors_of / + # get_impact_radius returned empty for .cs files. + for child in node.children: + if child.type == "base_list": + for sub in child.children: + if sub.type in ( + "identifier", "qualified_name", "generic_name", + ): + bases.append( + sub.text.decode("utf-8", errors="replace") + ) + elif language == "kotlin": # Look for superclass/interfaces in extends/implements clauses for child in node.children: if child.type in ( diff --git a/tests/fixtures/Sample.cs b/tests/fixtures/Sample.cs index 250331bd..a3f07228 100644 --- a/tests/fixtures/Sample.cs +++ b/tests/fixtures/Sample.cs @@ -45,4 +45,25 @@ public User GetUser(int id) return _repo.FindById(id); } } + + // Inheritance coverage for INHERITS edges (base_list clause). + // Extends a base class AND implements an interface (both bare identifiers). + public class CachedRepo : InMemoryRepo, IRepository + { + public new User FindById(int id) + { + return base.FindById(id); + } + } + + // Qualified base type name (qualified_name node). + public class DisposableService : System.IDisposable + { + public void Dispose() { } + } + + // Generic base type name (generic_name node). + public class UserList : List + { + } } diff --git a/tests/test_multilang.py b/tests/test_multilang.py index afda355e..7914f872 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -389,6 +389,36 @@ def test_finds_methods(self): names = {f.name for f in funcs} assert "FindById" in names or "Save" in names + def test_finds_inheritance(self): + inherits = [e for e in self.edges if e.kind == "INHERITS"] + # InMemoryRepo : IRepository, CachedRepo : InMemoryRepo, IRepository + assert len(inherits) >= 3 + targets = {e.target for e in inherits} + assert "IRepository" in targets + assert "InMemoryRepo" in targets + + def test_inheritance_target_is_bare_name(self): + """INHERITS target must be the bare type name, not ': Foo' or 'Foo, Bar'. + + tree-sitter-csharp wraps the `: Base, IFace` clause in a base_list + node whose .text includes the colon and commas. Without the + csharp-specific branch in _get_bases the whole clause is stored as + the edge target, so inheritors_of / get_impact_radius miss .cs files. + Qualified (System.IDisposable) and generic (List) base names + must be preserved. + """ + inherits = [e for e in self.edges if e.kind == "INHERITS"] + targets = {e.target for e in inherits} + assert "System.IDisposable" in targets # qualified_name preserved + assert "List" in targets # generic_name preserved + for e in inherits: + assert not e.target.startswith(":"), ( + f"INHERITS target should be a bare type name, got: {e.target!r}" + ) + assert "," not in e.target, ( + f"INHERITS target should be a single type, got: {e.target!r}" + ) + class TestRubyParsing: def setup_method(self): From 9ed04c3becaf6cac6ecc9bae38ecccb2a00bb7a4 Mon Sep 17 00:00:00 2001 From: Saeed-Nourizadeh Date: Fri, 26 Jun 2026 09:46:09 +0300 Subject: [PATCH 2/3] address review: records/structs, alias+qualified-generic bases, constraint safety - Parse record_declaration / record_struct_declaration as C# class-like nodes so record inheritance reaches _get_bases(). - Broaden the csharp base_list extraction to iterate *named* base entries (robust across grammar versions: identifier / qualified_name / generic_name / alias_qualified_name / type), and handle positional-record primary_constructor_base_type by taking its type (dropping the (args)). Generic constraints (where T : Base) live outside base_list and are never captured. - Tests: add record + interface, positional record, struct, nested-qualified generic base, and a constraint-non-capture assertion. --- code_review_graph/parser.py | 43 +++++++++++++++++++++++++++---------- tests/fixtures/Sample.cs | 32 +++++++++++++++++++++++++++ tests/test_multilang.py | 26 ++++++++++++++++++++++ 3 files changed, 90 insertions(+), 11 deletions(-) diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index e3a8ed81..2d387dea 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -195,6 +195,7 @@ class EdgeInfo: "csharp": [ "class_declaration", "interface_declaration", "enum_declaration", "struct_declaration", + "record_declaration", "record_struct_declaration", ], "ruby": ["class", "module"], "r": [], # Classes detected via call pattern-matching, not AST node types @@ -6086,23 +6087,43 @@ def _get_bases(self, node, language: str, source: bytes) -> list[str]: if ident.type in ("type_identifier", "generic_type"): bases.append(ident.text.decode("utf-8", errors="replace")) elif language == "csharp": - # C#: class_declaration carries a `base_list` child wrapping the - # `: Base, IFace, ...` clause. Its `.text` includes the leading - # colon and commas, so drill into the named base-type children - # (identifier / qualified_name / generic_name) for bare names. + # C#: class / record / struct / interface declarations carry a + # `base_list` child wrapping the `: Base, IFace, ...` clause. Its + # `.text` includes the leading colon and commas, so iterate the + # *named* base-type entries and take each one's bare text. Using + # is_named (rather than a fixed node-type allowlist) keeps this + # robust across tree-sitter-c-sharp versions, which variously emit + # identifier / qualified_name / generic_name / alias_qualified_name + # / type for a base entry. Punctuation (':' and ',') is unnamed and + # skipped. Generic parameter constraints (`where T : Base`) live in + # a sibling clause, not base_list, so they are never captured. # Without this, the shared handler below looked for # superclass/super_interfaces nodes the C# grammar never emits, - # so C# classes produced zero INHERITS edges and inheritors_of / + # so C# types produced zero INHERITS edges and inheritors_of / # get_impact_radius returned empty for .cs files. for child in node.children: - if child.type == "base_list": - for sub in child.children: - if sub.type in ( - "identifier", "qualified_name", "generic_name", - ): + if child.type != "base_list": + continue + for sub in child.children: + if not sub.is_named: + continue + if sub.type == "primary_constructor_base_type": + # positional record: `record R(...) : Base(args)` — + # keep the type, drop the constructor argument_list. + type_node = sub.child_by_field_name("type") + if type_node is not None: bases.append( - sub.text.decode("utf-8", errors="replace") + type_node.text.decode("utf-8", errors="replace") ) + else: + for t in sub.children: + if t.is_named and t.type != "argument_list": + bases.append( + t.text.decode("utf-8", errors="replace") + ) + break + continue + bases.append(sub.text.decode("utf-8", errors="replace")) elif language == "kotlin": # Look for superclass/interfaces in extends/implements clauses for child in node.children: diff --git a/tests/fixtures/Sample.cs b/tests/fixtures/Sample.cs index a3f07228..5af79eb9 100644 --- a/tests/fixtures/Sample.cs +++ b/tests/fixtures/Sample.cs @@ -66,4 +66,36 @@ public void Dispose() { } public class UserList : List { } + + // Nested-qualified generic base (qualified generic_name). + public class ScopedUserList : System.Collections.Generic.List + { + } + + // Generic type parameter constraint — `where T : IRepository` is NOT a base + // and must NOT produce an INHERITS edge. ConstrainedHolder itself has no base. + public class ConstrainedHolder where T : IRepository + { + public T Value { get; set; } + } + + // record with a base class + interface (record_declaration must be parsed + // as a class-like node so its base_list is reached). + public record AuditedUser : User, IRepository + { + public User FindById(int id) { return null; } + public void Save(User user) { } + } + + // positional record with a primary-constructor base (drop the (args)). + public record TaggedUser(int Id, string Tag) : User + { + } + + // struct implementing an interface. + public struct Token : IRepository + { + public User FindById(int id) { return null; } + public void Save(User user) { } + } } diff --git a/tests/test_multilang.py b/tests/test_multilang.py index 7914f872..38c6a9ac 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -419,6 +419,32 @@ def test_inheritance_target_is_bare_name(self): f"INHERITS target should be a single type, got: {e.target!r}" ) + def test_inheritance_hard_cases(self): + """Records, structs and nested-qualified generics reach _get_bases, and + generic constraints (`where T : Base`) do NOT produce edges. + """ + inherits = [e for e in self.edges if e.kind == "INHERITS"] + by_source = {} + for e in inherits: + by_source.setdefault(e.source.rsplit("::", 1)[-1], set()).add(e.target) + + # record AuditedUser : User, IRepository (record_declaration parsed) + assert by_source.get("AuditedUser") == {"User", "IRepository"}, by_source.get( + "AuditedUser" + ) + # positional record TaggedUser(...) : User (drop the primary-ctor args) + assert by_source.get("TaggedUser") == {"User"}, by_source.get("TaggedUser") + # struct Token : IRepository + assert "IRepository" in by_source.get("Token", set()) + # nested-qualified generic base preserved verbatim + assert "System.Collections.Generic.List" in { + e.target for e in inherits + } + # `where T : IRepository` is a constraint, not a base — no edge for it. + assert "ConstrainedHolder" not in by_source, ( + "generic constraint must not produce an INHERITS edge" + ) + class TestRubyParsing: def setup_method(self): From 97518a0ed6945fe23544919b1bcbeede77ce2695 Mon Sep 17 00:00:00 2001 From: Saeed-Nourizadeh Date: Fri, 26 Jun 2026 10:18:02 +0300 Subject: [PATCH 3/3] review: don't emit bogus bases for primary-ctor args or enum underlying types - Skip argument_list children of base_list: `class C(int x) : Base(x)` lists the ctor args `(x)` as a named sibling of the base type; it must not become an INHERITS target. - Return no bases for enum_declaration: `enum E : byte` carries the underlying storage type in base_list, which is not inheritance. - Tests: primary-constructor class (assert base only, no '(args)') and enum underlying type (assert no edge). --- code_review_graph/parser.py | 11 ++++++++++- tests/fixtures/Sample.cs | 15 +++++++++++++++ tests/test_multilang.py | 12 ++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index 2d387dea..a27a61d4 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -6101,11 +6101,20 @@ def _get_bases(self, node, language: str, source: bytes) -> list[str]: # superclass/super_interfaces nodes the C# grammar never emits, # so C# types produced zero INHERITS edges and inheritors_of / # get_impact_radius returned empty for .cs files. + # + # Two things the base_list contains that are NOT base types: + # * `enum E : byte` — the base_list holds the enum's *underlying + # type*, not a base. Enums never inherit, so emit nothing. + # * `class C(int x) : Base(x)` — the primary-constructor arguments + # `(x)` appear as an `argument_list` sibling of the base type + # (and inside `primary_constructor_base_type` for records). + if node.type == "enum_declaration": + return bases for child in node.children: if child.type != "base_list": continue for sub in child.children: - if not sub.is_named: + if not sub.is_named or sub.type == "argument_list": continue if sub.type == "primary_constructor_base_type": # positional record: `record R(...) : Base(args)` — diff --git a/tests/fixtures/Sample.cs b/tests/fixtures/Sample.cs index 5af79eb9..37bc0837 100644 --- a/tests/fixtures/Sample.cs +++ b/tests/fixtures/Sample.cs @@ -98,4 +98,19 @@ public struct Token : IRepository public User FindById(int id) { return null; } public void Save(User user) { } } + + // primary-constructor class: the base ctor args `(seed)` appear as an + // argument_list sibling in base_list and must NOT become a base target. + public class SeededRepo(int seed) : InMemoryRepo + { + public int Seed { get; } = seed; + } + + // enum with an underlying type — `: byte` is the storage type, NOT a base. + // Must produce no INHERITS edge. + public enum Status : byte + { + Active, + Closed, + } } diff --git a/tests/test_multilang.py b/tests/test_multilang.py index 38c6a9ac..2d7cfd76 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -444,6 +444,18 @@ def test_inheritance_hard_cases(self): assert "ConstrainedHolder" not in by_source, ( "generic constraint must not produce an INHERITS edge" ) + # primary-constructor class: `: InMemoryRepo` is the base; the ctor + # args `(seed)` must NOT leak in as a bogus '(seed)' target. + assert by_source.get("SeededRepo") == {"InMemoryRepo"}, by_source.get( + "SeededRepo" + ) + for e in inherits: + assert not e.target.startswith("("), ( + f"primary-ctor args must not become a base target: {e.target!r}" + ) + # enum underlying type (`enum Status : byte`) is not inheritance. + assert "Status" not in by_source, "enum underlying type must not inherit" + assert "byte" not in {e.target for e in inherits} class TestRubyParsing: