diff --git a/.gitignore b/.gitignore index f6e459a..5838121 100644 --- a/.gitignore +++ b/.gitignore @@ -37,5 +37,6 @@ hypergraph CPU.* results +generated-datasets cosmos-export .vercel diff --git a/package.json b/package.json index cb442fc..7953cf7 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "benchmark1": "bun run --cpu-prof-md scripts/profiling/hg07-first10.ts", "benchmark:bus": "./benchmark-bus.sh", "benchmark:srj13": "./benchmark-srj13.sh", + "generate:srj18": "bun run scripts/benchmarking/generate-srj18.ts", "benchmark:section": "bun run scripts/benchmarking/hg07-section-pipeline.ts", "benchmark:section:profile": "bun run scripts/benchmarking/hg07-first40-section-profile.ts", "benchmark:port-point-pathing": "bun run scripts/benchmarking/port-point-pathing-section-pipeline.ts" @@ -34,6 +35,8 @@ "typescript": "^5" }, "dependencies": { - "@tsci/seveibar.dataset-srj13": "https://github.com/tscircuit/dataset-srj13#161a835ed40ce72a2fa18ffe5ba6ba6392fcc176" + "@tsci/seveibar.dataset-srj13": "https://github.com/tscircuit/dataset-srj13#161a835ed40ce72a2fa18ffe5ba6ba6392fcc176", + "@tscircuit/capacity-autorouter": "^0.0.557", + "dataset-srj18": "git+https://github.com/tscircuit/dataset-srj18.git#48cd2f8f651bc9213a339580fdf125d2a1dd9254" } } diff --git a/pages/dataset-srj18.page.tsx b/pages/dataset-srj18.page.tsx new file mode 100644 index 0000000..d1d1077 --- /dev/null +++ b/pages/dataset-srj18.page.tsx @@ -0,0 +1,181 @@ +import type { SerializedHyperGraph } from "@tscircuit/hypergraph" +import { type ChangeEvent, useEffect, useMemo, useState } from "react" +import { TinyHyperGraphSectionPipelineSolver } from "lib/index" +import { Debugger } from "./components/Debugger" + +type ImportGlob = ( + pattern: string, + options?: { import?: string }, +) => Record Promise> + +const SAMPLE_HASH_PARAM = "sample" +const SAMPLE_NAMES = Array.from( + { length: 16 }, + (_, index) => `sample${String(index + 1).padStart(3, "0")}`, +) +const srj18SampleLoaders = ( + import.meta as ImportMeta & { glob: ImportGlob } +).glob("../generated-datasets/srj18/*.hg.json", { + import: "default", +}) + +const clampSampleIndex = (sampleIndex: number) => + Math.min( + Math.max(Number.isFinite(sampleIndex) ? sampleIndex : 0, 0), + SAMPLE_NAMES.length - 1, + ) + +const getSampleIndexFromHash = () => { + if (typeof window === "undefined") return 0 + + const hashParams = new URLSearchParams(window.location.hash.slice(1)) + const sampleNumber = Number(hashParams.get(SAMPLE_HASH_PARAM)) + + if (!Number.isFinite(sampleNumber)) return 0 + return clampSampleIndex(sampleNumber - 1) +} + +const setSampleIndexInHash = (sampleIndex: number) => { + if (typeof window === "undefined") return + + const url = new URL(window.location.href) + const hashParams = new URLSearchParams(url.hash.slice(1)) + hashParams.set(SAMPLE_HASH_PARAM, String(sampleIndex + 1)) + url.hash = hashParams.toString() + + window.history.replaceState(window.history.state, "", url) +} + +const getSampleLoader = (sampleName: string) => + srj18SampleLoaders[`../generated-datasets/srj18/${sampleName}.hg.json`] + +export default function DatasetSrj18Page() { + const [selectedSampleIndex, setSelectedSampleIndex] = useState( + getSampleIndexFromHash, + ) + const [serializedHyperGraph, setSerializedHyperGraph] = + useState(null) + const [loadError, setLoadError] = useState(null) + + useEffect(() => { + const syncSelectedSampleFromHash = () => { + setSelectedSampleIndex(getSampleIndexFromHash()) + } + + window.addEventListener("hashchange", syncSelectedSampleFromHash) + return () => + window.removeEventListener("hashchange", syncSelectedSampleFromHash) + }, []) + + useEffect(() => { + setSampleIndexInHash(selectedSampleIndex) + }, [selectedSampleIndex]) + + const selectedSampleName = + SAMPLE_NAMES[selectedSampleIndex] ?? SAMPLE_NAMES[0]! + const generatedSampleNames = useMemo( + () => + Object.keys(srj18SampleLoaders) + .map((samplePath) => /\/(sample\d+)\.hg\.json$/.exec(samplePath)?.[1]) + .filter((sampleName): sampleName is string => Boolean(sampleName)) + .sort(), + [], + ) + + useEffect(() => { + let cancelled = false + setSerializedHyperGraph(null) + setLoadError(null) + + const loadSample = async () => { + const load = getSampleLoader(selectedSampleName) + if (!load) { + throw new Error(`Missing generated srj18 sample: ${selectedSampleName}`) + } + + const nextSerializedHyperGraph = await load() + if (!cancelled) { + setSerializedHyperGraph(nextSerializedHyperGraph) + } + } + + loadSample().catch((error) => { + if (!cancelled) { + setLoadError(error instanceof Error ? error.message : String(error)) + } + }) + + return () => { + cancelled = true + } + }, [selectedSampleName]) + + return ( +
+
+ + + +
+ {selectedSampleName} • srj18 • generated {generatedSampleNames.length} + /{SAMPLE_NAMES.length} +
+
+
+ {loadError ? ( +
+ {loadError} +
+ ) : serializedHyperGraph ? ( + + new TinyHyperGraphSectionPipelineSolver({ + serializedHyperGraph: nextSerializedHyperGraph, + }) + } + /> + ) : ( +
+ Loading {selectedSampleName} +
+ )} +
+
+ ) +} diff --git a/scripts/benchmarking/benchmark.ts b/scripts/benchmarking/benchmark.ts index a43dc22..3d3e895 100644 --- a/scripts/benchmarking/benchmark.ts +++ b/scripts/benchmarking/benchmark.ts @@ -4,7 +4,7 @@ import { stackGraphicsHorizontally, type GraphicsObject, } from "graphics-debug" -import { mkdir, readdir, writeFile } from "node:fs/promises" +import { mkdir, readFile, readdir, writeFile } from "node:fs/promises" import path from "node:path" import { loadSerializedHyperGraph } from "../../lib/compat/loadSerializedHyperGraph" import { @@ -54,14 +54,16 @@ type BenchmarkSampleResult = { } type SolverVariant = "core" | "poly" +type DatasetKey = "hg07" | "srj18" const IMPROVEMENT_EPSILON = 1e-9 const HELP_TEXT = `Usage: ./benchmark.sh [options] -Run the hg07 section-pipeline benchmark and write per-sample artifacts under ./results/runNNN/. +Run the section-pipeline benchmark and write per-sample artifacts under ./results/runNNN/. Options: + --dataset NAME Dataset to run: hg07 or 18/srj18. Defaults to hg07. --limit N Run the first N samples from the dataset. --sample NUM Run a specific sample by number or name (e.g. 2, 002, sample002). --solver NAME Solver variant: core or poly. Defaults to core. @@ -71,6 +73,7 @@ Options: Examples: ./benchmark.sh + ./benchmark.sh --dataset 18 ./benchmark.sh --limit 20 ./benchmark.sh --limit 20 --solver poly ./benchmark.sh --sample 2 @@ -163,6 +166,7 @@ const parseArgs = () => { let sampleName: string | null = null let candidateFamilies: TinyHyperGraphSectionCandidateFamily[] | null = null let solverVariant: SolverVariant = "core" + let datasetKey: DatasetKey = "hg07" for (let index = 0; index < process.argv.length; index += 1) { const arg = process.argv[index] @@ -185,6 +189,20 @@ const parseArgs = () => { continue } + if (arg === "--dataset") { + const rawValue = process.argv[index + 1] + if (rawValue === "hg07" || rawValue === "7") { + datasetKey = "hg07" + } else if (rawValue === "18" || rawValue === "srj18") { + datasetKey = "srj18" + } else { + usageError(`Invalid --dataset value: ${rawValue ?? ""}`) + } + + index += 1 + continue + } + if (arg === "--sample") { const rawValue = process.argv[index + 1] if (!rawValue) { @@ -227,7 +245,7 @@ const parseArgs = () => { usageError("Use either --limit or --sample, not both") } - return { limit, sampleName, candidateFamilies, solverVariant } + return { limit, sampleName, candidateFamilies, solverVariant, datasetKey } } const formatSeconds = (durationMs: number) => @@ -299,7 +317,7 @@ const getNextRunDirectory = async (resultsDir: string) => { } } -const loadDatasetModule = async (): Promise => { +const loadHg07DatasetModule = async (): Promise => { console.log("loading dataset=hg07") const datasetModule = (await import("dataset-hg07")) as DatasetModule console.log( @@ -308,6 +326,78 @@ const loadDatasetModule = async (): Promise => { return datasetModule } +const loadSrj18DatasetModule = async ( + cwd: string, + limit: number | null, + sampleName: string | null, +): Promise => { + const { + ensureSrj18DatasetGenerated, + getSrj18DatasetDir, + getSrj18SampleNames, + } = await import("./generate-srj18") + + const allSampleNames = getSrj18SampleNames() + if (sampleName && !allSampleNames.includes(sampleName)) { + usageError(`Unknown sample: ${sampleName}`) + } + + const requestedSampleNames = sampleName + ? [sampleName] + : allSampleNames.slice( + 0, + limit === null + ? allSampleNames.length + : Math.min(limit, allSampleNames.length), + ) + + await ensureSrj18DatasetGenerated(cwd, requestedSampleNames) + + const datasetDir = getSrj18DatasetDir(cwd) + console.log(`loading dataset=srj18 dir=${datasetDir}`) + + const datasetModule: DatasetModule = { + manifest: { + sampleCount: allSampleNames.length, + samples: allSampleNames.map((srj18SampleName) => ({ + sampleName: srj18SampleName, + circuitKey: "srj18", + circuitId: srj18SampleName, + stepsToPortPointSolve: 0, + })), + }, + } + + for (const srj18SampleName of requestedSampleNames) { + const serializedHyperGraph = JSON.parse( + await readFile( + path.join(datasetDir, `${srj18SampleName}.hg.json`), + "utf8", + ), + ) as SerializedHyperGraph + + datasetModule[srj18SampleName] = serializedHyperGraph + } + + console.log( + `loaded dataset=srj18 samples=${datasetModule.manifest.sampleCount}`, + ) + return datasetModule +} + +const loadDatasetModule = async ( + datasetKey: DatasetKey, + cwd: string, + limit: number | null, + sampleName: string | null, +): Promise => { + if (datasetKey === "srj18") { + return loadSrj18DatasetModule(cwd, limit, sampleName) + } + + return loadHg07DatasetModule() +} + const getSelectedSamples = ( datasetModule: DatasetModule, limit: number | null, @@ -360,9 +450,15 @@ const stringifyLogValue = (value: unknown) => typeof value === "string" ? value : JSON.stringify(value, null, 2) const main = async () => { - const { limit, sampleName, candidateFamilies, solverVariant } = parseArgs() - const datasetModule = await loadDatasetModule() const cwd = process.cwd() + const { limit, sampleName, candidateFamilies, solverVariant, datasetKey } = + parseArgs() + const datasetModule = await loadDatasetModule( + datasetKey, + cwd, + limit, + sampleName, + ) const resultsDir = path.join(cwd, "results") const { runName } = await getNextRunDirectory(resultsDir) const runDir = path.join(resultsDir, runName) @@ -375,7 +471,7 @@ const main = async () => { : TinyHyperGraphSectionPipelineSolver console.log( - `dataset=hg07 samples=${sampleMetas.length}/${datasetModule.manifest.sampleCount} run=${runName} solver=${solverVariant} families=${candidateFamilies?.join(",") ?? "default"}`, + `dataset=${datasetKey} samples=${sampleMetas.length}/${datasetModule.manifest.sampleCount} run=${runName} solver=${solverVariant} families=${candidateFamilies?.join(",") ?? "default"}`, ) for (const sampleMeta of sampleMetas) { diff --git a/scripts/benchmarking/generate-srj18.ts b/scripts/benchmarking/generate-srj18.ts new file mode 100644 index 0000000..bcb8334 --- /dev/null +++ b/scripts/benchmarking/generate-srj18.ts @@ -0,0 +1,255 @@ +import type { SerializedHyperGraph } from "@tscircuit/hypergraph" +import { + AutoroutingPipelineSolver7_MultiGraph as Pipeline7, + type SimpleRouteJson, +} from "@tscircuit/capacity-autorouter" +import { dataset as srj18Dataset } from "dataset-srj18" +import { mkdir, rename, rm, stat, writeFile } from "node:fs/promises" +import path from "node:path" + +type Srj18Dataset = Record + +type Pipeline7RuntimeShape = { + portPointPathingSolver?: { + tinyPipelineSolver?: { + inputProblem?: { + serializedHyperGraph?: SerializedHyperGraph + } + } + } +} + +const DEFAULT_MAX_PIPELINE_STEPS = 1_000_000 + +export const getSrj18DatasetDir = (cwd = process.cwd()) => + path.join(cwd, "generated-datasets", "srj18") + +const getSrj18SampleEntries = () => + Object.entries(srj18Dataset as Srj18Dataset) + .filter(([sampleName]) => /^sample\d+$/.test(sampleName)) + .sort(([leftSampleName], [rightSampleName]) => + leftSampleName.localeCompare(rightSampleName), + ) + +export const getSrj18SampleNames = () => + getSrj18SampleEntries().map(([sampleName]) => sampleName) + +const directoryExists = async (dir: string) => { + try { + return (await stat(dir)).isDirectory() + } catch (error) { + if ((error as { code?: string }).code === "ENOENT") { + return false + } + throw error + } +} + +const fileExists = async (filePath: string) => { + try { + return (await stat(filePath)).isFile() + } catch (error) { + if ((error as { code?: string }).code === "ENOENT") { + return false + } + throw error + } +} + +const assertSerializedHyperGraph = ( + value: unknown, + sampleName: string, +): SerializedHyperGraph => { + if ( + typeof value === "object" && + value !== null && + Array.isArray((value as SerializedHyperGraph).regions) && + Array.isArray((value as SerializedHyperGraph).ports) && + Array.isArray((value as SerializedHyperGraph).connections) + ) { + return value as SerializedHyperGraph + } + + throw new Error( + `Pipeline7 did not expose a serialized tiny-hypergraph input for ${sampleName}`, + ) +} + +const getTinyHyperGraphInputFromPipeline7 = ( + sampleName: string, + simpleRouteJson: SimpleRouteJson, + maxPipelineSteps: number, +) => { + const solver = new Pipeline7(structuredClone(simpleRouteJson), { + cacheProvider: null, + effort: 1, + }) + const introspectableSolver = solver as unknown as Pipeline7RuntimeShape + let stepCount = 0 + + while (solver.getCurrentPhase() !== "portPointPathingSolver") { + if (solver.failed) { + throw new Error( + `Pipeline7 failed before portPointPathingSolver for ${sampleName}: ${solver.error ?? "unknown error"}`, + ) + } + if (solver.solved) { + throw new Error( + `Pipeline7 solved before reaching portPointPathingSolver for ${sampleName}`, + ) + } + if (stepCount >= maxPipelineSteps) { + throw new Error( + `Pipeline7 exceeded ${maxPipelineSteps} steps before portPointPathingSolver for ${sampleName}`, + ) + } + + solver.step() + stepCount += 1 + } + + while ( + !introspectableSolver.portPointPathingSolver?.tinyPipelineSolver + ?.inputProblem?.serializedHyperGraph + ) { + if (solver.failed) { + throw new Error( + `Pipeline7 failed while creating tiny-hypergraph input for ${sampleName}: ${solver.error ?? "unknown error"}`, + ) + } + if (stepCount >= maxPipelineSteps) { + throw new Error( + `Pipeline7 exceeded ${maxPipelineSteps} steps while creating tiny-hypergraph input for ${sampleName}`, + ) + } + + solver.step() + stepCount += 1 + } + + const serializedHyperGraph = + introspectableSolver.portPointPathingSolver.tinyPipelineSolver.inputProblem + .serializedHyperGraph + + return { + serializedHyperGraph: assertSerializedHyperGraph( + serializedHyperGraph, + sampleName, + ), + stepCount, + } +} + +export const generateSrj18Dataset = async ({ + cwd = process.cwd(), + force = false, + maxPipelineSteps = DEFAULT_MAX_PIPELINE_STEPS, + sampleNames, +}: { + cwd?: string + force?: boolean + maxPipelineSteps?: number + sampleNames?: string[] +} = {}) => { + const outputDir = getSrj18DatasetDir(cwd) + const requestedSampleNames = new Set(sampleNames ?? getSrj18SampleNames()) + const sampleEntries = getSrj18SampleEntries().filter(([sampleName]) => + requestedSampleNames.has(sampleName), + ) + + const unknownSampleNames = [...requestedSampleNames].filter( + (sampleName) => + !sampleEntries.some( + ([candidateSampleName]) => candidateSampleName === sampleName, + ), + ) + if (unknownSampleNames.length > 0) { + throw new Error(`Unknown srj18 sample(s): ${unknownSampleNames.join(", ")}`) + } + + const missingSampleEntries = [] + for (const sampleEntry of sampleEntries) { + const [sampleName] = sampleEntry + const outputPath = path.join(outputDir, `${sampleName}.hg.json`) + if (force || !(await fileExists(outputPath))) { + missingSampleEntries.push(sampleEntry) + } + } + + if (missingSampleEntries.length === 0) { + return { outputDir, generated: false, sampleCount: sampleEntries.length } + } + + if (force && sampleNames === undefined) { + await rm(outputDir, { recursive: true, force: true }) + } + await mkdir(outputDir, { recursive: true }) + + for (const [sampleName, simpleRouteJson] of missingSampleEntries) { + const startedAt = performance.now() + const { serializedHyperGraph, stepCount } = + getTinyHyperGraphInputFromPipeline7( + sampleName, + simpleRouteJson, + maxPipelineSteps, + ) + const outputPath = path.join(outputDir, `${sampleName}.hg.json`) + const tempPath = `${outputPath}.tmp-${Date.now()}-${Math.random() + .toString(36) + .slice(2)}` + await writeFile(tempPath, `${JSON.stringify(serializedHyperGraph)}\n`) + await rename(tempPath, outputPath) + const durationSeconds = ((performance.now() - startedAt) / 1000).toFixed(2) + console.log( + `generated ${sampleName}.hg.json regions=${serializedHyperGraph.regions.length} ports=${serializedHyperGraph.ports.length} connections=${serializedHyperGraph.connections?.length ?? 0} pipelineSteps=${stepCount} duration=${durationSeconds}s`, + ) + } + + return { + outputDir, + generated: true, + sampleCount: missingSampleEntries.length, + } +} + +export const ensureSrj18DatasetGenerated = async ( + cwd = process.cwd(), + sampleNames?: string[], +) => { + const outputDir = getSrj18DatasetDir(cwd) + const requiredSampleNames = sampleNames ?? getSrj18SampleNames() + const missingSampleNames = [] + + for (const sampleName of requiredSampleNames) { + const outputPath = path.join(outputDir, `${sampleName}.hg.json`) + if (!(await fileExists(outputPath))) { + missingSampleNames.push(sampleName) + } + } + + if (missingSampleNames.length === 0) { + return { + outputDir, + generated: false, + sampleCount: requiredSampleNames.length, + } + } + + if (!(await directoryExists(outputDir))) { + console.log(`missing generated dataset at ${outputDir}`) + } + console.log( + `generating dataset=srj18 with Pipeline7 samples=${missingSampleNames.join(",")}`, + ) + return generateSrj18Dataset({ cwd, sampleNames: missingSampleNames }) +} + +if (import.meta.main) { + const force = process.argv.includes("--force") + const sampleIndex = process.argv.findIndex((arg) => arg === "--sample") + const sampleNames = + sampleIndex === -1 + ? undefined + : [process.argv[sampleIndex + 1]].filter(Boolean) + await generateSrj18Dataset({ force, sampleNames }) +}