From e0af783558b81ba90ec55d2dd862c25277a6f347 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Sat, 23 May 2026 18:14:29 +0100 Subject: [PATCH] Detect Java in Gradle projects regardless of build script DSL The default extension scan stopped at depth 4, which misses app/src/main/java// in the layout that gradle init produces, so **/*.java found nothing and the ecosystem gate then filtered out Gradle itself. At the same time *.kts in Kotlin's detect.files matched settings.gradle.kts at the root and reported the project as Kotlin. Replace the depth bound with a 10000-file cap (ScanDepth still applies when set explicitly), drop Gradle build script patterns from the Java and Kotlin language definitions, and add testdata to skipDirs so fixture source does not leak into the host project's language list. Fixes #84 --- cmd/brief/enrich.go | 2 +- cmd/brief/main.go | 2 +- cmd/brief/threat.go | 2 +- detect/detect.go | 32 ++++++----- detect/detect_test.go | 53 +++++++++++++++++++ knowledge/java/language.toml | 2 +- knowledge/kotlin/language.toml | 2 +- .../gradle-java-groovy-dsl/app/build.gradle | 7 +++ .../app/src/main/java/org/example/App.java | 7 +++ .../gradle-java-groovy-dsl/settings.gradle | 2 + .../app/build.gradle.kts | 7 +++ .../app/src/main/java/org/example/App.java | 7 +++ .../settings.gradle.kts | 2 + 13 files changed, 108 insertions(+), 19 deletions(-) create mode 100644 testdata/gradle-java-groovy-dsl/app/build.gradle create mode 100644 testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java create mode 100644 testdata/gradle-java-groovy-dsl/settings.gradle create mode 100644 testdata/gradle-java-kotlin-dsl/app/build.gradle.kts create mode 100644 testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java create mode 100644 testdata/gradle-java-kotlin-dsl/settings.gradle.kts diff --git a/cmd/brief/enrich.go b/cmd/brief/enrich.go index cdb13c2..0d6c000 100644 --- a/cmd/brief/enrich.go +++ b/cmd/brief/enrich.go @@ -30,7 +30,7 @@ func cmdEnrich(args []string) { keep := fs.Bool("keep", false, "Keep downloaded remote source") depth := fs.Int("depth", -1, "Git clone depth (0 = full clone, default shallow)") dir := fs.String("dir", "", "Directory to clone remote source into") - scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") + scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (0 = unlimited)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") _ = fs.Parse(args) diff --git a/cmd/brief/main.go b/cmd/brief/main.go index d564168..7efe2c4 100644 --- a/cmd/brief/main.go +++ b/cmd/brief/main.go @@ -70,7 +70,7 @@ func cmdScan(args []string) { keep := fs.Bool("keep", false, "Keep downloaded remote source") depth := fs.Int("depth", -1, "Git clone depth (0 = full clone, default shallow)") dir := fs.String("dir", "", "Directory to clone remote source into") - scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") + scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (0 = unlimited)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") tracked := fs.Bool("tracked", false, "Only consider files tracked by git") version := fs.Bool("version", false, "Print version and exit") diff --git a/cmd/brief/threat.go b/cmd/brief/threat.go index f2189ca..1dbe839 100644 --- a/cmd/brief/threat.go +++ b/cmd/brief/threat.go @@ -30,7 +30,7 @@ func runDetection(name string, args []string) (*detect.Engine, *brief.Report, ou jsonFlag := fs.Bool("json", false, "Force JSON output") humanFlag := fs.Bool("human", false, "Force human-readable output") markdownFlag := fs.Bool("markdown", false, "Force markdown output") - scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") + scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (0 = unlimited)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") _ = fs.Parse(args) diff --git a/detect/detect.go b/detect/detect.go index c781499..f3a3d4c 100644 --- a/detect/detect.go +++ b/detect/detect.go @@ -28,7 +28,7 @@ import ( ) const ( - defaultScanDepth = 4 // max directory depth for language detection + extScanFileLimit = 10000 // max files to visit when collecting extensions microsPerMS = 1000.0 // microseconds per millisecond globSplitParts = 2 // expected parts when splitting "**/" patterns @@ -41,7 +41,7 @@ const ( type Engine struct { KB *kb.KnowledgeBase Root string - ScanDepth int // max directory depth for recursive detection (0 = default 4) + ScanDepth int // optional max directory depth for recursive detection (0 = unlimited) SkipDirs []string // additional directories to skip during walks TrackedOnly bool // only consider files tracked by git filesChecked int @@ -109,6 +109,7 @@ var skipDirs = map[string]bool{ "third_party": true, "thirdparty": true, "external": true, + "testdata": true, "tmp": true, "temp": true, "cache": true, @@ -564,35 +565,34 @@ func (e *Engine) recursiveGlob(pattern string) bool { return found } -// loadFileExts walks the project to a bounded depth to collect file extensions. -// Cached for the lifetime of the engine. Default depth of 4 covers most layouts -// (src/main/java/*.java, lib/something/*.rb). +// loadFileExts walks the project to collect file extension counts. Cached for +// the lifetime of the engine. The walk is bounded by extScanFileLimit rather +// than directory depth so that deep source layouts such as +// app/src/main/java// are reached; skipDirs already prunes the +// expensive vendor/build directories. // Uses WalkDir instead of Walk to avoid following symlinks into directories. func (e *Engine) loadFileExts() { if e.fileExts != nil { return } e.fileExts = make(map[string]int) - maxDepth := e.ScanDepth - if maxDepth == 0 { - maxDepth = defaultScanDepth - } rootLen := len(e.Root) + seen := 0 + errDone := errors.New("done") _ = filepath.WalkDir(e.Root, func(path string, d os.DirEntry, err error) error { if err != nil { return nil } + rel := strings.TrimPrefix(path[rootLen:], string(filepath.Separator)) if d.IsDir() { name := d.Name() if name != "." && e.shouldSkipDir(name) { return filepath.SkipDir } - rel := path[rootLen:] - depth := strings.Count(rel, string(filepath.Separator)) - if depth > maxDepth { + if e.ScanDepth > 0 && strings.Count(rel, string(filepath.Separator))+1 > e.ScanDepth { return filepath.SkipDir } - if !e.isTracked(strings.TrimPrefix(rel, string(filepath.Separator))) { + if !e.isTracked(rel) { return filepath.SkipDir } return nil @@ -600,13 +600,17 @@ func (e *Engine) loadFileExts() { if d.Type()&os.ModeSymlink != 0 { return nil } - if !e.isTracked(strings.TrimPrefix(path[rootLen:], string(filepath.Separator))) { + if !e.isTracked(rel) { return nil } ext := filepath.Ext(d.Name()) if ext != "" { e.fileExts[ext]++ } + seen++ + if seen >= extScanFileLimit { + return errDone + } return nil }) } diff --git a/detect/detect_test.go b/detect/detect_test.go index caf5a18..76546eb 100644 --- a/detect/detect_test.go +++ b/detect/detect_test.go @@ -609,6 +609,59 @@ func TestPythonProject(t *testing.T) { } } +func TestGradleJavaKotlinDSL(t *testing.T) { + // Regression for #84: a Java project that uses the Kotlin DSL for its + // Gradle build scripts must be reported as Java, not Kotlin. + r := runOn(t, "../testdata/gradle-java-kotlin-dsl") + + if len(r.Languages) != 1 || r.Languages[0].Name != "Java" { + t.Fatalf("expected only Java language, got %v", languageNames(r)) + } + if !slices.Contains(packageManagerNames(r), "Gradle") { + t.Errorf("expected Gradle package manager, got %v", packageManagerNames(r)) + } +} + +func TestGradleJavaGroovyDSL(t *testing.T) { + // Regression for #84: with build.gradle in app/ and source under + // app/src/main/java//, both Java and Gradle must be detected. + r := runOn(t, "../testdata/gradle-java-groovy-dsl") + + if len(r.Languages) == 0 || r.Languages[0].Name != "Java" { + t.Fatalf("expected Java language, got %v", languageNames(r)) + } + for _, l := range r.Languages { + if l.Name == "Groovy" { + t.Errorf("did not expect Groovy language for build script only, got %v", languageNames(r)) + } + } + if !slices.Contains(packageManagerNames(r), "Gradle") { + t.Errorf("expected Gradle package manager, got %v", packageManagerNames(r)) + } +} + +func TestScanDepthOverride(t *testing.T) { + engine := New(loadKB(t), "../testdata/gradle-java-groovy-dsl") + engine.ScanDepth = 2 + r, err := engine.Run() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, l := range r.Languages { + if l.Name == "Java" { + t.Errorf("expected ScanDepth=2 to miss app/src/main/java/..., got %v", languageNames(r)) + } + } +} + +func languageNames(r *brief.Report) []string { + names := make([]string, 0, len(r.Languages)) + for _, l := range r.Languages { + names = append(names, l.Name) + } + return names +} + func writeProjectFile(t *testing.T, dir, path, content string) { t.Helper() full := filepath.Join(dir, path) diff --git a/knowledge/java/language.toml b/knowledge/java/language.toml index 7db4644..396f9eb 100644 --- a/knowledge/java/language.toml +++ b/knowledge/java/language.toml @@ -6,7 +6,7 @@ docs = "https://docs.oracle.com/en/java/" description = "General-purpose, class-based, object-oriented programming language" [detect] -files = ["pom.xml", "build.gradle", "build.gradle.kts", "*.java", "**/*.java"] +files = ["pom.xml", "*.java", "**/*.java"] ecosystems = ["java"] [taxonomy] diff --git a/knowledge/kotlin/language.toml b/knowledge/kotlin/language.toml index 49823bb..636f798 100644 --- a/knowledge/kotlin/language.toml +++ b/knowledge/kotlin/language.toml @@ -7,7 +7,7 @@ repo = "https://github.com/JetBrains/kotlin" description = "Modern, concise programming language for the JVM" [detect] -files = ["*.kt", "**/*.kt", "*.kts", "build.gradle.kts"] +files = ["*.kt", "**/*.kt"] ecosystems = ["kotlin"] [taxonomy] diff --git a/testdata/gradle-java-groovy-dsl/app/build.gradle b/testdata/gradle-java-groovy-dsl/app/build.gradle new file mode 100644 index 0000000..e611be9 --- /dev/null +++ b/testdata/gradle-java-groovy-dsl/app/build.gradle @@ -0,0 +1,7 @@ +plugins { + id 'application' +} + +application { + mainClass = 'org.example.App' +} diff --git a/testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java b/testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java new file mode 100644 index 0000000..1232146 --- /dev/null +++ b/testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java @@ -0,0 +1,7 @@ +package org.example; + +public class App { + public static void main(String[] args) { + System.out.println("hello"); + } +} diff --git a/testdata/gradle-java-groovy-dsl/settings.gradle b/testdata/gradle-java-groovy-dsl/settings.gradle new file mode 100644 index 0000000..8cb2fb9 --- /dev/null +++ b/testdata/gradle-java-groovy-dsl/settings.gradle @@ -0,0 +1,2 @@ +rootProject.name = 'test-gradle' +include('app') diff --git a/testdata/gradle-java-kotlin-dsl/app/build.gradle.kts b/testdata/gradle-java-kotlin-dsl/app/build.gradle.kts new file mode 100644 index 0000000..c52ccb3 --- /dev/null +++ b/testdata/gradle-java-kotlin-dsl/app/build.gradle.kts @@ -0,0 +1,7 @@ +plugins { + application +} + +application { + mainClass = "org.example.App" +} diff --git a/testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java b/testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java new file mode 100644 index 0000000..1232146 --- /dev/null +++ b/testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java @@ -0,0 +1,7 @@ +package org.example; + +public class App { + public static void main(String[] args) { + System.out.println("hello"); + } +} diff --git a/testdata/gradle-java-kotlin-dsl/settings.gradle.kts b/testdata/gradle-java-kotlin-dsl/settings.gradle.kts new file mode 100644 index 0000000..27ee486 --- /dev/null +++ b/testdata/gradle-java-kotlin-dsl/settings.gradle.kts @@ -0,0 +1,2 @@ +rootProject.name = "test-gradle" +include("app")