diff --git a/cmd/brief/enrich.go b/cmd/brief/enrich.go index cdb13c2..0d6c000 100644 --- a/cmd/brief/enrich.go +++ b/cmd/brief/enrich.go @@ -30,7 +30,7 @@ func cmdEnrich(args []string) { keep := fs.Bool("keep", false, "Keep downloaded remote source") depth := fs.Int("depth", -1, "Git clone depth (0 = full clone, default shallow)") dir := fs.String("dir", "", "Directory to clone remote source into") - scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") + scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (0 = unlimited)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") _ = fs.Parse(args) diff --git a/cmd/brief/main.go b/cmd/brief/main.go index d564168..7efe2c4 100644 --- a/cmd/brief/main.go +++ b/cmd/brief/main.go @@ -70,7 +70,7 @@ func cmdScan(args []string) { keep := fs.Bool("keep", false, "Keep downloaded remote source") depth := fs.Int("depth", -1, "Git clone depth (0 = full clone, default shallow)") dir := fs.String("dir", "", "Directory to clone remote source into") - scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") + scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (0 = unlimited)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") tracked := fs.Bool("tracked", false, "Only consider files tracked by git") version := fs.Bool("version", false, "Print version and exit") diff --git a/cmd/brief/threat.go b/cmd/brief/threat.go index f2189ca..1dbe839 100644 --- a/cmd/brief/threat.go +++ b/cmd/brief/threat.go @@ -30,7 +30,7 @@ func runDetection(name string, args []string) (*detect.Engine, *brief.Report, ou jsonFlag := fs.Bool("json", false, "Force JSON output") humanFlag := fs.Bool("human", false, "Force human-readable output") markdownFlag := fs.Bool("markdown", false, "Force markdown output") - scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") + scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (0 = unlimited)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") _ = fs.Parse(args) diff --git a/detect/detect.go b/detect/detect.go index c781499..f3a3d4c 100644 --- a/detect/detect.go +++ b/detect/detect.go @@ -28,7 +28,7 @@ import ( ) const ( - defaultScanDepth = 4 // max directory depth for language detection + extScanFileLimit = 10000 // max files to visit when collecting extensions microsPerMS = 1000.0 // microseconds per millisecond globSplitParts = 2 // expected parts when splitting "**/" patterns @@ -41,7 +41,7 @@ const ( type Engine struct { KB *kb.KnowledgeBase Root string - ScanDepth int // max directory depth for recursive detection (0 = default 4) + ScanDepth int // optional max directory depth for recursive detection (0 = unlimited) SkipDirs []string // additional directories to skip during walks TrackedOnly bool // only consider files tracked by git filesChecked int @@ -109,6 +109,7 @@ var skipDirs = map[string]bool{ "third_party": true, "thirdparty": true, "external": true, + "testdata": true, "tmp": true, "temp": true, "cache": true, @@ -564,35 +565,34 @@ func (e *Engine) recursiveGlob(pattern string) bool { return found } -// loadFileExts walks the project to a bounded depth to collect file extensions. -// Cached for the lifetime of the engine. Default depth of 4 covers most layouts -// (src/main/java/*.java, lib/something/*.rb). +// loadFileExts walks the project to collect file extension counts. Cached for +// the lifetime of the engine. The walk is bounded by extScanFileLimit rather +// than directory depth so that deep source layouts such as +// app/src/main/java// are reached; skipDirs already prunes the +// expensive vendor/build directories. // Uses WalkDir instead of Walk to avoid following symlinks into directories. func (e *Engine) loadFileExts() { if e.fileExts != nil { return } e.fileExts = make(map[string]int) - maxDepth := e.ScanDepth - if maxDepth == 0 { - maxDepth = defaultScanDepth - } rootLen := len(e.Root) + seen := 0 + errDone := errors.New("done") _ = filepath.WalkDir(e.Root, func(path string, d os.DirEntry, err error) error { if err != nil { return nil } + rel := strings.TrimPrefix(path[rootLen:], string(filepath.Separator)) if d.IsDir() { name := d.Name() if name != "." && e.shouldSkipDir(name) { return filepath.SkipDir } - rel := path[rootLen:] - depth := strings.Count(rel, string(filepath.Separator)) - if depth > maxDepth { + if e.ScanDepth > 0 && strings.Count(rel, string(filepath.Separator))+1 > e.ScanDepth { return filepath.SkipDir } - if !e.isTracked(strings.TrimPrefix(rel, string(filepath.Separator))) { + if !e.isTracked(rel) { return filepath.SkipDir } return nil @@ -600,13 +600,17 @@ func (e *Engine) loadFileExts() { if d.Type()&os.ModeSymlink != 0 { return nil } - if !e.isTracked(strings.TrimPrefix(path[rootLen:], string(filepath.Separator))) { + if !e.isTracked(rel) { return nil } ext := filepath.Ext(d.Name()) if ext != "" { e.fileExts[ext]++ } + seen++ + if seen >= extScanFileLimit { + return errDone + } return nil }) } diff --git a/detect/detect_test.go b/detect/detect_test.go index caf5a18..76546eb 100644 --- a/detect/detect_test.go +++ b/detect/detect_test.go @@ -609,6 +609,59 @@ func TestPythonProject(t *testing.T) { } } +func TestGradleJavaKotlinDSL(t *testing.T) { + // Regression for #84: a Java project that uses the Kotlin DSL for its + // Gradle build scripts must be reported as Java, not Kotlin. + r := runOn(t, "../testdata/gradle-java-kotlin-dsl") + + if len(r.Languages) != 1 || r.Languages[0].Name != "Java" { + t.Fatalf("expected only Java language, got %v", languageNames(r)) + } + if !slices.Contains(packageManagerNames(r), "Gradle") { + t.Errorf("expected Gradle package manager, got %v", packageManagerNames(r)) + } +} + +func TestGradleJavaGroovyDSL(t *testing.T) { + // Regression for #84: with build.gradle in app/ and source under + // app/src/main/java//, both Java and Gradle must be detected. + r := runOn(t, "../testdata/gradle-java-groovy-dsl") + + if len(r.Languages) == 0 || r.Languages[0].Name != "Java" { + t.Fatalf("expected Java language, got %v", languageNames(r)) + } + for _, l := range r.Languages { + if l.Name == "Groovy" { + t.Errorf("did not expect Groovy language for build script only, got %v", languageNames(r)) + } + } + if !slices.Contains(packageManagerNames(r), "Gradle") { + t.Errorf("expected Gradle package manager, got %v", packageManagerNames(r)) + } +} + +func TestScanDepthOverride(t *testing.T) { + engine := New(loadKB(t), "../testdata/gradle-java-groovy-dsl") + engine.ScanDepth = 2 + r, err := engine.Run() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, l := range r.Languages { + if l.Name == "Java" { + t.Errorf("expected ScanDepth=2 to miss app/src/main/java/..., got %v", languageNames(r)) + } + } +} + +func languageNames(r *brief.Report) []string { + names := make([]string, 0, len(r.Languages)) + for _, l := range r.Languages { + names = append(names, l.Name) + } + return names +} + func writeProjectFile(t *testing.T, dir, path, content string) { t.Helper() full := filepath.Join(dir, path) diff --git a/knowledge/java/language.toml b/knowledge/java/language.toml index 7db4644..396f9eb 100644 --- a/knowledge/java/language.toml +++ b/knowledge/java/language.toml @@ -6,7 +6,7 @@ docs = "https://docs.oracle.com/en/java/" description = "General-purpose, class-based, object-oriented programming language" [detect] -files = ["pom.xml", "build.gradle", "build.gradle.kts", "*.java", "**/*.java"] +files = ["pom.xml", "*.java", "**/*.java"] ecosystems = ["java"] [taxonomy] diff --git a/knowledge/kotlin/language.toml b/knowledge/kotlin/language.toml index 49823bb..636f798 100644 --- a/knowledge/kotlin/language.toml +++ b/knowledge/kotlin/language.toml @@ -7,7 +7,7 @@ repo = "https://github.com/JetBrains/kotlin" description = "Modern, concise programming language for the JVM" [detect] -files = ["*.kt", "**/*.kt", "*.kts", "build.gradle.kts"] +files = ["*.kt", "**/*.kt"] ecosystems = ["kotlin"] [taxonomy] diff --git a/testdata/gradle-java-groovy-dsl/app/build.gradle b/testdata/gradle-java-groovy-dsl/app/build.gradle new file mode 100644 index 0000000..e611be9 --- /dev/null +++ b/testdata/gradle-java-groovy-dsl/app/build.gradle @@ -0,0 +1,7 @@ +plugins { + id 'application' +} + +application { + mainClass = 'org.example.App' +} diff --git a/testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java b/testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java new file mode 100644 index 0000000..1232146 --- /dev/null +++ b/testdata/gradle-java-groovy-dsl/app/src/main/java/org/example/App.java @@ -0,0 +1,7 @@ +package org.example; + +public class App { + public static void main(String[] args) { + System.out.println("hello"); + } +} diff --git a/testdata/gradle-java-groovy-dsl/settings.gradle b/testdata/gradle-java-groovy-dsl/settings.gradle new file mode 100644 index 0000000..8cb2fb9 --- /dev/null +++ b/testdata/gradle-java-groovy-dsl/settings.gradle @@ -0,0 +1,2 @@ +rootProject.name = 'test-gradle' +include('app') diff --git a/testdata/gradle-java-kotlin-dsl/app/build.gradle.kts b/testdata/gradle-java-kotlin-dsl/app/build.gradle.kts new file mode 100644 index 0000000..c52ccb3 --- /dev/null +++ b/testdata/gradle-java-kotlin-dsl/app/build.gradle.kts @@ -0,0 +1,7 @@ +plugins { + application +} + +application { + mainClass = "org.example.App" +} diff --git a/testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java b/testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java new file mode 100644 index 0000000..1232146 --- /dev/null +++ b/testdata/gradle-java-kotlin-dsl/app/src/main/java/org/example/App.java @@ -0,0 +1,7 @@ +package org.example; + +public class App { + public static void main(String[] args) { + System.out.println("hello"); + } +} diff --git a/testdata/gradle-java-kotlin-dsl/settings.gradle.kts b/testdata/gradle-java-kotlin-dsl/settings.gradle.kts new file mode 100644 index 0000000..27ee486 --- /dev/null +++ b/testdata/gradle-java-kotlin-dsl/settings.gradle.kts @@ -0,0 +1,2 @@ +rootProject.name = "test-gradle" +include("app")