diff --git a/actions/setup/js/glob_pattern_helpers.cjs b/actions/setup/js/glob_pattern_helpers.cjs new file mode 100644 index 0000000000..64c0c057f0 --- /dev/null +++ b/actions/setup/js/glob_pattern_helpers.cjs @@ -0,0 +1,70 @@ +// @ts-check + +/** + * Convert a glob pattern to a RegExp + * @param {string} pattern - Glob pattern (e.g., "*.json", "metrics/**", "data/**\/*.csv") + * @returns {RegExp} - Regular expression that matches the pattern + * + * Supports: + * - * matches any characters except / + * - ** matches any characters including / + * - . is escaped to match literal dots + * - \ is escaped properly + * + * @example + * const regex = globPatternToRegex("*.json"); + * regex.test("file.json"); // true + * regex.test("file.txt"); // false + * + * @example + * const regex = globPatternToRegex("metrics/**"); + * regex.test("metrics/data.json"); // true + * regex.test("metrics/daily/data.json"); // true + */ +function globPatternToRegex(pattern) { + // Convert glob pattern to regex that supports directory wildcards + // ** matches any path segment (including /) + // * matches any characters except / + let regexPattern = pattern + .replace(/\\/g, "\\\\") // Escape backslashes + .replace(/\./g, "\\.") // Escape dots + .replace(/\*\*/g, "") // Temporarily replace ** + .replace(/\*/g, "[^/]*") // Single * matches non-slash chars + .replace(//g, ".*"); // ** matches everything including / + return new RegExp(`^${regexPattern}$`); +} + +/** + * Parse a space-separated list of glob patterns into RegExp objects + * @param {string} fileGlobFilter - Space-separated glob patterns (e.g., "*.json *.jsonl *.csv *.md") + * @returns {RegExp[]} - Array of regular expressions + * + * @example + * const patterns = parseGlobPatterns("*.json *.jsonl"); + * patterns[0].test("file.json"); // true + * patterns[1].test("file.jsonl"); // true + */ +function parseGlobPatterns(fileGlobFilter) { + return fileGlobFilter.trim().split(/\s+/).filter(Boolean).map(globPatternToRegex); +} + +/** + * Check if a file path matches any of the provided glob patterns + * @param {string} filePath - File path to test (e.g., "data/file.json") + * @param {string} fileGlobFilter - Space-separated glob patterns + * @returns {boolean} - True if the file matches at least one pattern + * + * @example + * matchesGlobPattern("file.json", "*.json *.jsonl"); // true + * matchesGlobPattern("file.txt", "*.json *.jsonl"); // false + */ +function matchesGlobPattern(filePath, fileGlobFilter) { + const patterns = parseGlobPatterns(fileGlobFilter); + return patterns.some(pattern => pattern.test(filePath)); +} + +module.exports = { + globPatternToRegex, + parseGlobPatterns, + matchesGlobPattern, +}; diff --git a/actions/setup/js/glob_pattern_helpers.test.cjs b/actions/setup/js/glob_pattern_helpers.test.cjs new file mode 100644 index 0000000000..eb5f2b180b --- /dev/null +++ b/actions/setup/js/glob_pattern_helpers.test.cjs @@ -0,0 +1,361 @@ +import { describe, it, expect } from "vitest"; +import { globPatternToRegex, parseGlobPatterns, matchesGlobPattern } from "./glob_pattern_helpers.cjs"; + +describe("glob_pattern_helpers.cjs", () => { + describe("globPatternToRegex", () => { + describe("basic pattern matching", () => { + it("should match exact filenames without wildcards", () => { + const regex = globPatternToRegex("specific-file.txt"); + + expect(regex.test("specific-file.txt")).toBe(true); + expect(regex.test("specific-file.md")).toBe(false); + expect(regex.test("other-file.txt")).toBe(false); + }); + + it("should match files with * wildcard (single segment)", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex.test("data.json")).toBe(true); + expect(regex.test("config.json")).toBe(true); + expect(regex.test("file.jsonl")).toBe(false); + expect(regex.test("dir/data.json")).toBe(false); // * doesn't cross directories + }); + + it("should match files with ** wildcard (multi-segment)", () => { + const regex = globPatternToRegex("metrics/**"); + + expect(regex.test("metrics/file.json")).toBe(true); + expect(regex.test("metrics/daily/file.json")).toBe(true); + expect(regex.test("metrics/daily/archive/file.json")).toBe(true); + expect(regex.test("data/file.json")).toBe(false); + }); + + it("should distinguish between * and **", () => { + const singleStar = globPatternToRegex("logs/*"); + const doubleStar = globPatternToRegex("logs/**"); + + // Single * should match direct children only + expect(singleStar.test("logs/error.log")).toBe(true); + expect(singleStar.test("logs/2024/error.log")).toBe(false); + + // Double ** should match nested paths + expect(doubleStar.test("logs/error.log")).toBe(true); + expect(doubleStar.test("logs/2024/error.log")).toBe(true); + expect(doubleStar.test("logs/2024/12/error.log")).toBe(true); + }); + }); + + describe("special character escaping", () => { + it("should escape dots correctly", () => { + const regex = globPatternToRegex("file.txt"); + + expect(regex.test("file.txt")).toBe(true); + expect(regex.test("filextxt")).toBe(false); // dot shouldn't act as wildcard + expect(regex.test("file_txt")).toBe(false); + }); + + it("should escape backslashes correctly", () => { + // Test pattern with backslash (though rare in file patterns) + const regex = globPatternToRegex("test\\.txt"); + + // The backslash should be escaped, making this match literally + expect(regex.source).toContain("\\\\"); + }); + + it("should handle patterns with multiple dots", () => { + const regex = globPatternToRegex("file.min.js"); + + expect(regex.test("file.min.js")).toBe(true); + expect(regex.test("filexminxjs")).toBe(false); + }); + }); + + describe("real-world patterns", () => { + it("should match .jsonl files (daily-code-metrics use case)", () => { + const regex = globPatternToRegex("*.jsonl"); + + expect(regex.test("history.jsonl")).toBe(true); + expect(regex.test("data.jsonl")).toBe(true); + expect(regex.test("metrics.jsonl")).toBe(true); + expect(regex.test("file.json")).toBe(false); + }); + + it("should match nested metrics files", () => { + const regex = globPatternToRegex("metrics/**/*.json"); + + // metrics/**/*.json = metrics/ + .* + / + [^/]*.json + // The ** matches any path (including empty), but literal / after ** must exist + expect(regex.test("metrics/daily/2024-12-26.json")).toBe(true); + expect(regex.test("metrics/subdir/another/file.json")).toBe(true); + + // This won't match because we need the / after ** even if ** matches empty + expect(regex.test("metrics/2024-12-26.json")).toBe(false); + expect(regex.test("data/metrics.json")).toBe(false); + + // To match both nested and direct children, use: metrics/** + const flexibleRegex = globPatternToRegex("metrics/**"); + expect(flexibleRegex.test("metrics/2024-12-26.json")).toBe(true); + expect(flexibleRegex.test("metrics/daily/file.json")).toBe(true); + }); + + it("should match campaign-specific patterns", () => { + const cursorRegex = globPatternToRegex("security-q1/cursor.json"); + const metricsRegex = globPatternToRegex("security-q1/metrics/**"); + + expect(cursorRegex.test("security-q1/cursor.json")).toBe(true); + expect(cursorRegex.test("security-q1/metrics/file.json")).toBe(false); + + expect(metricsRegex.test("security-q1/metrics/2024-12-29.json")).toBe(true); + expect(metricsRegex.test("security-q1/metrics/daily/snapshot.json")).toBe(true); + expect(metricsRegex.test("security-q1/cursor.json")).toBe(false); + }); + + it("should match multiple file extensions", () => { + const patterns = ["*.json", "*.jsonl", "*.csv", "*.md"].map(globPatternToRegex); + + const testCases = [ + { file: "data.json", shouldMatch: true }, + { file: "history.jsonl", shouldMatch: true }, + { file: "metrics.csv", shouldMatch: true }, + { file: "README.md", shouldMatch: true }, + { file: "script.js", shouldMatch: false }, + { file: "image.png", shouldMatch: false }, + ]; + + for (const { file, shouldMatch } of testCases) { + const matches = patterns.some(p => p.test(file)); + expect(matches).toBe(shouldMatch); + } + }); + }); + + describe("edge cases", () => { + it("should handle empty pattern", () => { + const regex = globPatternToRegex(""); + + expect(regex.test("")).toBe(true); + expect(regex.test("anything")).toBe(false); + }); + + it("should handle pattern with only wildcards", () => { + const singleWildcard = globPatternToRegex("*"); + const doubleWildcard = globPatternToRegex("**"); + + expect(singleWildcard.test("file.txt")).toBe(true); + expect(singleWildcard.test("dir/file.txt")).toBe(false); + + expect(doubleWildcard.test("file.txt")).toBe(true); + expect(doubleWildcard.test("dir/file.txt")).toBe(true); + }); + + it("should handle complex nested patterns", () => { + const regex = globPatternToRegex("data/**/archive/*.csv"); + + // data/**/archive/*.csv = data/ + .* + /archive/ + [^/]*.csv + // The ** matches any path, but literal /archive/ must follow + expect(regex.test("data/2024/archive/metrics.csv")).toBe(true); + expect(regex.test("data/2024/12/archive/metrics.csv")).toBe(true); + + // This won't match - ** matches empty but /archive/ must still be literal + expect(regex.test("data/archive/metrics.csv")).toBe(false); + + expect(regex.test("data/metrics.csv")).toBe(false); + expect(regex.test("data/archive/metrics.json")).toBe(false); + + // Test with a simpler pattern for direct match + const directRegex = globPatternToRegex("data/archive/*.csv"); + expect(directRegex.test("data/archive/metrics.csv")).toBe(true); + }); + + it("should handle patterns with hyphens and underscores", () => { + const regex = globPatternToRegex("test-file_name.json"); + + expect(regex.test("test-file_name.json")).toBe(true); + expect(regex.test("test_file-name.json")).toBe(false); + }); + + it("should be case-sensitive", () => { + const regex = globPatternToRegex("*.JSON"); + + expect(regex.test("file.JSON")).toBe(true); + expect(regex.test("file.json")).toBe(false); + }); + }); + + describe("regex output format", () => { + it("should return RegExp objects", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex).toBeInstanceOf(RegExp); + }); + + it("should anchor patterns with ^ and $", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex.source).toMatch(/^\^.*\$$/); + }); + + it("should convert * to [^/]* in regex source", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex.source).toContain("[^/]*"); + }); + + it("should convert ** to .* in regex source", () => { + const regex = globPatternToRegex("data/**"); + + expect(regex.source).toContain(".*"); + }); + }); + }); + + describe("parseGlobPatterns", () => { + it("should parse space-separated patterns", () => { + const patterns = parseGlobPatterns("*.json *.jsonl *.csv"); + + expect(patterns).toHaveLength(3); + expect(patterns[0].test("file.json")).toBe(true); + expect(patterns[1].test("file.jsonl")).toBe(true); + expect(patterns[2].test("file.csv")).toBe(true); + }); + + it("should handle extra whitespace", () => { + const patterns = parseGlobPatterns(" *.json *.jsonl "); + + expect(patterns).toHaveLength(2); + expect(patterns[0].test("file.json")).toBe(true); + expect(patterns[1].test("file.jsonl")).toBe(true); + }); + + it("should handle empty string", () => { + const patterns = parseGlobPatterns(""); + + expect(patterns).toHaveLength(0); + }); + + it("should handle single pattern", () => { + const patterns = parseGlobPatterns("*.json"); + + expect(patterns).toHaveLength(1); + expect(patterns[0].test("file.json")).toBe(true); + }); + + it("should filter out empty patterns from multiple spaces", () => { + const patterns = parseGlobPatterns("*.json *.jsonl"); + + expect(patterns).toHaveLength(2); + }); + }); + + describe("matchesGlobPattern", () => { + it("should return true when file matches at least one pattern", () => { + expect(matchesGlobPattern("file.json", "*.json *.jsonl")).toBe(true); + expect(matchesGlobPattern("file.jsonl", "*.json *.jsonl")).toBe(true); + }); + + it("should return false when file matches no patterns", () => { + expect(matchesGlobPattern("file.txt", "*.json *.jsonl")).toBe(false); + expect(matchesGlobPattern("script.js", "*.json *.jsonl *.csv")).toBe(false); + }); + + it("should work with complex patterns", () => { + expect(matchesGlobPattern("metrics/daily/2024.json", "metrics/**")).toBe(true); + expect(matchesGlobPattern("data/file.json", "metrics/**")).toBe(false); + }); + + it("should handle empty filter (no patterns)", () => { + expect(matchesGlobPattern("file.json", "")).toBe(false); + }); + + it("should handle the daily-code-metrics use case", () => { + const filter = "*.json *.jsonl *.csv *.md"; + + expect(matchesGlobPattern("history.jsonl", filter)).toBe(true); + expect(matchesGlobPattern("data.json", filter)).toBe(true); + expect(matchesGlobPattern("metrics.csv", filter)).toBe(true); + expect(matchesGlobPattern("README.md", filter)).toBe(true); + expect(matchesGlobPattern("script.js", filter)).toBe(false); + }); + + it("should work with campaign patterns", () => { + const filter = "security-q1/cursor.json security-q1/metrics/**"; + + expect(matchesGlobPattern("security-q1/cursor.json", filter)).toBe(true); + expect(matchesGlobPattern("security-q1/metrics/2024.json", filter)).toBe(true); + expect(matchesGlobPattern("security-q1/data.json", filter)).toBe(false); + }); + }); + + describe("security and correctness", () => { + it("should prevent ReDoS with reasonable patterns", () => { + // Test that the regex doesn't hang on pathological inputs + const regex = globPatternToRegex("**/*.json"); + const longPath = "a/".repeat(100) + "file.json"; + + // This should complete quickly + const start = Date.now(); + regex.test(longPath); + const duration = Date.now() - start; + + // Should complete in less than 100ms + expect(duration).toBeLessThan(100); + }); + + it("should correctly escape special regex characters", () => { + const regex = globPatternToRegex("file.txt"); + + // The dot should be escaped, not act as a wildcard + expect(regex.test("file.txt")).toBe(true); + expect(regex.test("fileXtxt")).toBe(false); + }); + + it("should handle backslash escaping securely", () => { + // This test verifies the security fix for proper escape order + const pattern = "test.txt"; + + // Correct: Escape backslashes first, then dots + const regexPattern = pattern.replace(/\\/g, "\\\\").replace(/\./g, "\\."); + const regex = new RegExp(`^${regexPattern}$`); + + expect(regex.test("test.txt")).toBe(true); + expect(regex.test("test_txt")).toBe(false); + }); + }); + + describe("integration with push_repo_memory", () => { + it("should validate file paths as used in push_repo_memory", () => { + const fileGlobFilter = "*.json *.jsonl *.csv *.md"; + const testFiles = [ + { path: "history.jsonl", shouldMatch: true }, + { path: "data.json", shouldMatch: true }, + { path: "metrics.csv", shouldMatch: true }, + { path: "README.md", shouldMatch: true }, + { path: "script.js", shouldMatch: false }, + { path: "image.png", shouldMatch: false }, + ]; + + for (const { path, shouldMatch } of testFiles) { + expect(matchesGlobPattern(path, fileGlobFilter)).toBe(shouldMatch); + } + }); + + it("should support subdirectory patterns", () => { + const patterns = parseGlobPatterns("metrics/** data/**"); + + expect(patterns.some(p => p.test("metrics/file.json"))).toBe(true); + expect(patterns.some(p => p.test("data/file.json"))).toBe(true); + expect(patterns.some(p => p.test("other/file.json"))).toBe(false); + }); + + it("should handle root-level files", () => { + const patterns = parseGlobPatterns("*.jsonl"); + + // Root level files (no subdirectory) + expect(patterns.some(p => p.test("history.jsonl"))).toBe(true); + expect(patterns.some(p => p.test("data.jsonl"))).toBe(true); + + // Should not match files in subdirectories + expect(patterns.some(p => p.test("dir/history.jsonl"))).toBe(false); + }); + }); +}); diff --git a/actions/setup/js/push_repo_memory.cjs b/actions/setup/js/push_repo_memory.cjs index 86b184f753..12ea165084 100644 --- a/actions/setup/js/push_repo_memory.cjs +++ b/actions/setup/js/push_repo_memory.cjs @@ -5,6 +5,7 @@ const fs = require("fs"); const path = require("path"); const { execSync } = require("child_process"); const { getErrorMessage } = require("./error_helpers.cjs"); +const { globPatternToRegex } = require("./glob_pattern_helpers.cjs"); /** * Push repo-memory changes to git branch @@ -34,6 +35,14 @@ async function main() { const ghToken = process.env.GH_TOKEN; const githubRunId = process.env.GITHUB_RUN_ID || "unknown"; + // Log environment variable configuration for debugging + core.info("Environment configuration:"); + core.info(` MEMORY_ID: ${memoryId}`); + core.info(` MAX_FILE_SIZE: ${maxFileSize}`); + core.info(` MAX_FILE_COUNT: ${maxFileCount}`); + core.info(` FILE_GLOB_FILTER: ${fileGlobFilter ? `"${fileGlobFilter}"` : "(empty - all files accepted)"}`); + core.info(` FILE_GLOB_FILTER length: ${fileGlobFilter.length}`); + /** @param {unknown} value */ function isPlainObject(value) { return typeof value === "object" && value !== null && !Array.isArray(value); @@ -246,6 +255,15 @@ async function main() { let campaignCursorFound = false; let campaignMetricsCount = 0; + // Log the file glob filter configuration + if (fileGlobFilter) { + core.info(`File glob filter enabled: ${fileGlobFilter}`); + const patternCount = fileGlobFilter.trim().split(/\s+/).filter(Boolean).length; + core.info(`Number of patterns: ${patternCount}`); + } else { + core.info("No file glob filter - all files will be accepted"); + } + /** * Recursively scan directory and collect files * @param {string} dirPath - Directory to scan @@ -266,26 +284,28 @@ async function main() { // Validate file name patterns if filter is set if (fileGlobFilter) { - const patterns = fileGlobFilter - .trim() - .split(/\s+/) - .filter(Boolean) - .map(pattern => { - // Convert glob pattern to regex that supports directory wildcards - // ** matches any path segment (including /) - // * matches any characters except / - let regexPattern = pattern - .replace(/\\/g, "\\\\") // Escape backslashes - .replace(/\./g, "\\.") // Escape dots - .replace(/\*\*/g, "") // Temporarily replace ** - .replace(/\*/g, "[^/]*") // Single * matches non-slash chars - .replace(//g, ".*"); // ** matches everything including / - return new RegExp(`^${regexPattern}$`); - }); + const patterns = fileGlobFilter.trim().split(/\s+/).filter(Boolean).map(globPatternToRegex); - if (!patterns.some(pattern => pattern.test(relativeFilePath))) { + // Debug logging: Show what we're testing + core.debug(`Testing file: ${relativeFilePath}`); + core.debug(`File glob filter: ${fileGlobFilter}`); + core.debug(`Number of patterns: ${patterns.length}`); + + const matchResults = patterns.map((pattern, idx) => { + const matches = pattern.test(relativeFilePath); + const patternStr = fileGlobFilter.trim().split(/\s+/).filter(Boolean)[idx]; + core.debug(` Pattern ${idx + 1}: "${patternStr}" -> ${pattern.source} -> ${matches ? "✓ MATCH" : "✗ NO MATCH"}`); + return matches; + }); + + if (!matchResults.some(m => m)) { core.error(`File does not match allowed patterns: ${relativeFilePath}`); core.error(`Allowed patterns: ${fileGlobFilter}`); + core.error(`Pattern test results:`); + const patternStrs = fileGlobFilter.trim().split(/\s+/).filter(Boolean); + patterns.forEach((pattern, idx) => { + core.error(` ${patternStrs[idx]} -> regex: ${pattern.source} -> ${matchResults[idx] ? "MATCH" : "NO MATCH"}`); + }); core.setFailed("File pattern validation failed"); throw new Error("File pattern validation failed"); } @@ -323,6 +343,15 @@ async function main() { try { scanDirectory(sourceMemoryPath); + core.info(`Scan complete: Found ${filesToCopy.length} file(s) to copy`); + if (filesToCopy.length > 0 && filesToCopy.length <= 10) { + core.info("Files found:"); + filesToCopy.forEach(f => core.info(` - ${f.relativePath} (${f.size} bytes)`)); + } else if (filesToCopy.length > 10) { + core.info(`First 10 files:`); + filesToCopy.slice(0, 10).forEach(f => core.info(` - ${f.relativePath} (${f.size} bytes)`)); + core.info(` ... and ${filesToCopy.length - 10} more`); + } } catch (error) { core.setFailed(`Failed to scan artifact directory: ${getErrorMessage(error)}`); return; diff --git a/actions/setup/js/push_repo_memory.test.cjs b/actions/setup/js/push_repo_memory.test.cjs index 2e53fac3da..e499fcec3d 100644 --- a/actions/setup/js/push_repo_memory.test.cjs +++ b/actions/setup/js/push_repo_memory.test.cjs @@ -1,4 +1,212 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; +import { globPatternToRegex } from "./glob_pattern_helpers.cjs"; + +describe("push_repo_memory.cjs - globPatternToRegex helper", () => { + describe("basic pattern matching", () => { + it("should match exact filenames without wildcards", () => { + const regex = globPatternToRegex("specific-file.txt"); + + expect(regex.test("specific-file.txt")).toBe(true); + expect(regex.test("specific-file.md")).toBe(false); + expect(regex.test("other-file.txt")).toBe(false); + }); + + it("should match files with * wildcard (single segment)", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex.test("data.json")).toBe(true); + expect(regex.test("config.json")).toBe(true); + expect(regex.test("file.jsonl")).toBe(false); + expect(regex.test("dir/data.json")).toBe(false); // * doesn't cross directories + }); + + it("should match files with ** wildcard (multi-segment)", () => { + const regex = globPatternToRegex("metrics/**"); + + expect(regex.test("metrics/file.json")).toBe(true); + expect(regex.test("metrics/daily/file.json")).toBe(true); + expect(regex.test("metrics/daily/archive/file.json")).toBe(true); + expect(regex.test("data/file.json")).toBe(false); + }); + + it("should distinguish between * and **", () => { + const singleStar = globPatternToRegex("logs/*"); + const doubleStar = globPatternToRegex("logs/**"); + + // Single * should match direct children only + expect(singleStar.test("logs/error.log")).toBe(true); + expect(singleStar.test("logs/2024/error.log")).toBe(false); + + // Double ** should match nested paths + expect(doubleStar.test("logs/error.log")).toBe(true); + expect(doubleStar.test("logs/2024/error.log")).toBe(true); + expect(doubleStar.test("logs/2024/12/error.log")).toBe(true); + }); + }); + + describe("special character escaping", () => { + it("should escape dots correctly", () => { + const regex = globPatternToRegex("file.txt"); + + expect(regex.test("file.txt")).toBe(true); + expect(regex.test("filextxt")).toBe(false); // dot shouldn't act as wildcard + expect(regex.test("file_txt")).toBe(false); + }); + + it("should escape backslashes correctly", () => { + // Test pattern with backslash (though rare in file patterns) + const regex = globPatternToRegex("test\\.txt"); + + // The backslash should be escaped, making this match literally + expect(regex.source).toContain("\\\\"); + }); + + it("should handle patterns with multiple dots", () => { + const regex = globPatternToRegex("file.min.js"); + + expect(regex.test("file.min.js")).toBe(true); + expect(regex.test("filexminxjs")).toBe(false); + }); + }); + + describe("real-world patterns", () => { + it("should match .jsonl files (daily-code-metrics use case)", () => { + const regex = globPatternToRegex("*.jsonl"); + + expect(regex.test("history.jsonl")).toBe(true); + expect(regex.test("data.jsonl")).toBe(true); + expect(regex.test("metrics.jsonl")).toBe(true); + expect(regex.test("file.json")).toBe(false); + }); + + it("should match nested metrics files", () => { + const regex = globPatternToRegex("metrics/**/*.json"); + + // metrics/**/*.json = metrics/ + .* + / + [^/]*.json + // The ** matches any path (including empty), but literal / after ** must exist + expect(regex.test("metrics/daily/2024-12-26.json")).toBe(true); + expect(regex.test("metrics/subdir/another/file.json")).toBe(true); + + // This won't match because we need the / after ** even if ** matches empty + expect(regex.test("metrics/2024-12-26.json")).toBe(false); + expect(regex.test("data/metrics.json")).toBe(false); + + // To match both nested and direct children, use: metrics/** + const flexibleRegex = globPatternToRegex("metrics/**"); + expect(flexibleRegex.test("metrics/2024-12-26.json")).toBe(true); + expect(flexibleRegex.test("metrics/daily/file.json")).toBe(true); + }); + + it("should match campaign-specific patterns", () => { + const cursorRegex = globPatternToRegex("security-q1/cursor.json"); + const metricsRegex = globPatternToRegex("security-q1/metrics/**"); + + expect(cursorRegex.test("security-q1/cursor.json")).toBe(true); + expect(cursorRegex.test("security-q1/metrics/file.json")).toBe(false); + + expect(metricsRegex.test("security-q1/metrics/2024-12-29.json")).toBe(true); + expect(metricsRegex.test("security-q1/metrics/daily/snapshot.json")).toBe(true); + expect(metricsRegex.test("security-q1/cursor.json")).toBe(false); + }); + + it("should match multiple file extensions", () => { + const patterns = ["*.json", "*.jsonl", "*.csv", "*.md"].map(globPatternToRegex); + + const testCases = [ + { file: "data.json", shouldMatch: true }, + { file: "history.jsonl", shouldMatch: true }, + { file: "metrics.csv", shouldMatch: true }, + { file: "README.md", shouldMatch: true }, + { file: "script.js", shouldMatch: false }, + { file: "image.png", shouldMatch: false }, + ]; + + for (const { file, shouldMatch } of testCases) { + const matches = patterns.some(p => p.test(file)); + expect(matches).toBe(shouldMatch); + } + }); + }); + + describe("edge cases", () => { + it("should handle empty pattern", () => { + const regex = globPatternToRegex(""); + + expect(regex.test("")).toBe(true); + expect(regex.test("anything")).toBe(false); + }); + + it("should handle pattern with only wildcards", () => { + const singleWildcard = globPatternToRegex("*"); + const doubleWildcard = globPatternToRegex("**"); + + expect(singleWildcard.test("file.txt")).toBe(true); + expect(singleWildcard.test("dir/file.txt")).toBe(false); + + expect(doubleWildcard.test("file.txt")).toBe(true); + expect(doubleWildcard.test("dir/file.txt")).toBe(true); + }); + + it("should handle complex nested patterns", () => { + const regex = globPatternToRegex("data/**/archive/*.csv"); + + // data/**/archive/*.csv = data/ + .* + /archive/ + [^/]*.csv + // The ** matches any path, but literal /archive/ must follow + expect(regex.test("data/2024/archive/metrics.csv")).toBe(true); + expect(regex.test("data/2024/12/archive/metrics.csv")).toBe(true); + + // This won't match - ** matches empty but /archive/ must still be literal + expect(regex.test("data/archive/metrics.csv")).toBe(false); + + expect(regex.test("data/metrics.csv")).toBe(false); + expect(regex.test("data/archive/metrics.json")).toBe(false); + + // To match data/archive/*.csv directly, use this pattern + const directRegex = globPatternToRegex("data/archive/*.csv"); + expect(directRegex.test("data/archive/metrics.csv")).toBe(true); + }); + + it("should handle patterns with hyphens and underscores", () => { + const regex = globPatternToRegex("test-file_name.json"); + + expect(regex.test("test-file_name.json")).toBe(true); + expect(regex.test("test_file-name.json")).toBe(false); + }); + + it("should be case-sensitive", () => { + const regex = globPatternToRegex("*.JSON"); + + expect(regex.test("file.JSON")).toBe(true); + expect(regex.test("file.json")).toBe(false); + }); + }); + + describe("regex output format", () => { + it("should return RegExp objects", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex).toBeInstanceOf(RegExp); + }); + + it("should anchor patterns with ^ and $", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex.source).toMatch(/^\^.*\$$/); + }); + + it("should convert * to [^/]* in regex source", () => { + const regex = globPatternToRegex("*.json"); + + expect(regex.source).toContain("[^/]*"); + }); + + it("should convert ** to .* in regex source", () => { + const regex = globPatternToRegex("data/**"); + + expect(regex.source).toContain(".*"); + }); + }); +}); describe("push_repo_memory.cjs - glob pattern security tests", () => { describe("glob-to-regex conversion", () => { @@ -562,4 +770,140 @@ describe("push_repo_memory.cjs - glob pattern security tests", () => { } }); }); + + describe("debug logging for pattern matching", () => { + it("should log pattern matching details for debugging", () => { + // Test that debug logging provides helpful information + const fileGlobFilter = "*.json *.jsonl *.csv *.md"; + const testFile = "history.jsonl"; + + const patterns = fileGlobFilter + .trim() + .split(/\s+/) + .filter(Boolean) + .map(pattern => { + const regexPattern = pattern + .replace(/\\/g, "\\\\") + .replace(/\./g, "\\.") + .replace(/\*\*/g, "") + .replace(/\*/g, "[^/]*") + .replace(//g, ".*"); + return new RegExp(`^${regexPattern}$`); + }); + + // Log what we're testing + const matchResults = patterns.map((pattern, idx) => { + const matches = pattern.test(testFile); + const patternStr = fileGlobFilter.trim().split(/\s+/).filter(Boolean)[idx]; + return { patternStr, regex: pattern.source, matches }; + }); + + // Verify that history.jsonl matches the *.jsonl pattern + const jsonlMatch = matchResults.find(r => r.patternStr === "*.jsonl"); + expect(jsonlMatch).toBeDefined(); + expect(jsonlMatch.matches).toBe(true); + expect(jsonlMatch.regex).toBe("^[^/]*\\.jsonl$"); + + // Verify overall that at least one pattern matches + expect(matchResults.some(r => r.matches)).toBe(true); + }); + + it("should show which patterns match and which don't for a given file", () => { + // Test with a file that should only match one pattern + const fileGlobFilter = "*.json *.jsonl *.csv *.md"; + const testFile = "data.csv"; + + const patterns = fileGlobFilter + .trim() + .split(/\s+/) + .filter(Boolean) + .map(pattern => { + const regexPattern = pattern + .replace(/\\/g, "\\\\") + .replace(/\./g, "\\.") + .replace(/\*\*/g, "") + .replace(/\*/g, "[^/]*") + .replace(//g, ".*"); + return new RegExp(`^${regexPattern}$`); + }); + + const patternStrs = fileGlobFilter.trim().split(/\s+/).filter(Boolean); + const matchResults = patterns.map((pattern, idx) => ({ + pattern: patternStrs[idx], + regex: pattern.source, + matches: pattern.test(testFile), + })); + + // Should match *.csv but not others + expect(matchResults[0].matches).toBe(false); // *.json + expect(matchResults[1].matches).toBe(false); // *.jsonl + expect(matchResults[2].matches).toBe(true); // *.csv + expect(matchResults[3].matches).toBe(false); // *.md + }); + + it("should provide helpful error details when no patterns match", () => { + // Test with a file that doesn't match any pattern + const fileGlobFilter = "*.json *.jsonl *.csv *.md"; + const testFile = "script.js"; + + const patterns = fileGlobFilter + .trim() + .split(/\s+/) + .filter(Boolean) + .map(pattern => { + const regexPattern = pattern + .replace(/\\/g, "\\\\") + .replace(/\./g, "\\.") + .replace(/\*\*/g, "") + .replace(/\*/g, "[^/]*") + .replace(//g, ".*"); + return new RegExp(`^${regexPattern}$`); + }); + + const patternStrs = fileGlobFilter.trim().split(/\s+/).filter(Boolean); + const matchResults = patterns.map((pattern, idx) => ({ + pattern: patternStrs[idx], + regex: pattern.source, + matches: pattern.test(testFile), + })); + + // None should match + expect(matchResults.every(r => !r.matches)).toBe(true); + + // Error message should include pattern details + const errorDetails = matchResults.map(r => `${r.pattern} -> regex: ${r.regex} -> ${r.matches ? "MATCH" : "NO MATCH"}`); + + expect(errorDetails[0]).toContain("*.json -> regex: ^[^/]*\\.json$ -> NO MATCH"); + expect(errorDetails[1]).toContain("*.jsonl -> regex: ^[^/]*\\.jsonl$ -> NO MATCH"); + expect(errorDetails[2]).toContain("*.csv -> regex: ^[^/]*\\.csv$ -> NO MATCH"); + expect(errorDetails[3]).toContain("*.md -> regex: ^[^/]*\\.md$ -> NO MATCH"); + }); + + it("should correctly match files in the root directory (no subdirectories)", () => { + // The daily-code-metrics workflow writes history.jsonl to the root of repo memory + // Test that pattern matching works for root-level files + const fileGlobFilter = "*.json *.jsonl *.csv *.md"; + const rootFiles = ["history.jsonl", "data.json", "metrics.csv", "README.md"]; + + const patterns = fileGlobFilter + .trim() + .split(/\s+/) + .filter(Boolean) + .map(pattern => { + const regexPattern = pattern + .replace(/\\/g, "\\\\") + .replace(/\./g, "\\.") + .replace(/\*\*/g, "") + .replace(/\*/g, "[^/]*") + .replace(//g, ".*"); + return new RegExp(`^${regexPattern}$`); + }); + + // All root files should match at least one pattern + for (const file of rootFiles) { + const matches = patterns.some(p => p.test(file)); + expect(matches).toBe(true); + } + }); + }); });