diff --git a/.gitignore b/.gitignore index 1f2db025b4f..38a2eb1c390 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ packages/example/build packages/example-without-zod/build packages/docs/out packages/cloudrun/container/ensure-browser.mjs +packages/skills-evals/.runs diff --git a/bun.lock b/bun.lock index e9ef0ccb744..c4485300af2 100644 --- a/bun.lock +++ b/bun.lock @@ -1468,6 +1468,23 @@ "remotion": "workspace:*", }, }, + "packages/skills-evals": { + "name": "@remotion/skills-evals", + "version": "4.0.460", + "dependencies": { + "@remotion/skills": "workspace:*", + "react": "catalog:", + "react-dom": "catalog:", + }, + "devDependencies": { + "@remotion/eslint-config-internal": "workspace:*", + "@types/bun": "catalog:", + "@types/react": "catalog:", + "@types/react-dom": "catalog:", + "@typescript/native-preview": "catalog:", + "eslint": "catalog:", + }, + }, "packages/starburst": { "name": "@remotion/starburst", "version": "4.0.461", @@ -3979,6 +3996,8 @@ "@remotion/skills": ["@remotion/skills@workspace:packages/skills"], + "@remotion/skills-evals": ["@remotion/skills-evals@workspace:packages/skills-evals"], + "@remotion/starburst": ["@remotion/starburst@workspace:packages/starburst"], "@remotion/streaming": ["@remotion/streaming@workspace:packages/streaming"], diff --git a/package.json b/package.json index 268ef6b3246..c2efe5e9ebd 100644 --- a/package.json +++ b/package.json @@ -85,6 +85,7 @@ "!packages/template-audiogram/whisper.cpp", "!packages/template-audiogram/whisper.cpp/**", "!packages/template-vercel/**", + "!packages/skills-evals/.runs/**", "!packages/bugs" ], "catalog": { diff --git a/packages/cli/src/list-of-remotion-packages.ts b/packages/cli/src/list-of-remotion-packages.ts index 888aebbff94..5f6317eec64 100644 --- a/packages/cli/src/list-of-remotion-packages.ts +++ b/packages/cli/src/list-of-remotion-packages.ts @@ -65,6 +65,7 @@ export const listOfRemotionPackages = [ '@remotion/serverless', '@remotion/serverless-client', '@remotion/skills', + '@remotion/skills-evals', '@remotion/studio-server', '@remotion/studio-shared', '@remotion/studio', diff --git a/packages/create-video/src/list-of-remotion-packages.ts b/packages/create-video/src/list-of-remotion-packages.ts index 888aebbff94..5f6317eec64 100644 --- a/packages/create-video/src/list-of-remotion-packages.ts +++ b/packages/create-video/src/list-of-remotion-packages.ts @@ -65,6 +65,7 @@ export const listOfRemotionPackages = [ '@remotion/serverless', '@remotion/serverless-client', '@remotion/skills', + '@remotion/skills-evals', '@remotion/studio-server', '@remotion/studio-shared', '@remotion/studio', diff --git a/packages/skills-evals/README.md b/packages/skills-evals/README.md new file mode 100644 index 00000000000..4510034e8b8 --- /dev/null +++ b/packages/skills-evals/README.md @@ -0,0 +1,5 @@ +# @remotion/skills-evals + +## Usage + +This is an internal package and has no documentation. diff --git a/packages/skills-evals/eslint.config.mjs b/packages/skills-evals/eslint.config.mjs new file mode 100644 index 00000000000..04fd4fac110 --- /dev/null +++ b/packages/skills-evals/eslint.config.mjs @@ -0,0 +1,7 @@ +import {remotionFlatConfig} from '@remotion/eslint-config-internal'; + +const config = remotionFlatConfig({react: false}); + +export default { + ...config, +}; diff --git a/packages/skills-evals/package.json b/packages/skills-evals/package.json new file mode 100644 index 00000000000..8e6f424d2b2 --- /dev/null +++ b/packages/skills-evals/package.json @@ -0,0 +1,29 @@ +{ + "repository": { + "url": "https://github.com/remotion-dev/remotion/tree/main/packages/skills-evals" + }, + "name": "@remotion/skills-evals", + "version": "4.0.461", + "private": true, + "type": "module", + "scripts": { + "dev": "bun src/server.tsx", + "eval": "bun src/cli.ts", + "format": "oxfmt src scenarios.ts", + "formatting": "oxfmt src scenarios.ts --check", + "lint": "eslint src && tsgo" + }, + "dependencies": { + "@remotion/skills": "workspace:*", + "react": "catalog:", + "react-dom": "catalog:" + }, + "devDependencies": { + "@remotion/eslint-config-internal": "workspace:*", + "@types/bun": "catalog:", + "@types/react": "catalog:", + "@types/react-dom": "catalog:", + "@typescript/native-preview": "catalog:", + "eslint": "catalog:" + } +} diff --git a/packages/skills-evals/scenarios.ts b/packages/skills-evals/scenarios.ts new file mode 100644 index 00000000000..480d865e2a9 --- /dev/null +++ b/packages/skills-evals/scenarios.ts @@ -0,0 +1,8 @@ +export type SkillEvalScenario = { + id: string; + model: string; + prompt: string; + timeoutMs?: number; +}; + +export const scenarios: SkillEvalScenario[] = []; diff --git a/packages/skills-evals/src/app/comparison-data.ts b/packages/skills-evals/src/app/comparison-data.ts new file mode 100644 index 00000000000..5dcb0080069 --- /dev/null +++ b/packages/skills-evals/src/app/comparison-data.ts @@ -0,0 +1,67 @@ +import {existsSync} from 'node:fs'; +import {readFile} from 'node:fs/promises'; +import {join} from 'node:path'; +import {listFilesRecursively, readJson, sanitizePathPart} from '../files'; +import type {SkillEvalComparison, SkillEvalManifest} from '../manifest'; +import {comparisonsRoot} from './shared'; + +export type ComparisonWithManifests = { + afterManifest: SkillEvalManifest; + beforeManifest: SkillEvalManifest; + comparison: SkillEvalComparison; + skillDiff: string; +}; + +export const loadComparisons = async () => { + const files = (await listFilesRecursively(comparisonsRoot)).filter((file) => + file.endsWith('/comparison.json'), + ); + const comparisons = await Promise.all( + files.map((file) => readJson(file)), + ); + + comparisons.sort((a, b) => b.completedAt.localeCompare(a.completedAt)); + + return comparisons; +}; + +export const getLatestComparisonByScenario = async () => { + const latest = new Map(); + + for (const comparison of await loadComparisons()) { + if (!latest.has(comparison.scenarioId)) { + latest.set(comparison.scenarioId, comparison); + } + } + + return latest; +}; + +export const loadComparison = async ( + scenarioId: string, + comparisonId: string, +): Promise => { + const comparisonPath = join( + comparisonsRoot, + sanitizePathPart(scenarioId), + comparisonId, + 'comparison.json', + ); + + if (!existsSync(comparisonPath)) { + return null; + } + + const comparison = await readJson(comparisonPath); + const [beforeManifest, afterManifest, skillDiff] = await Promise.all([ + readJson(comparison.before.manifestPath), + readJson(comparison.after.manifestPath), + readFile(comparison.skillDiffPath, 'utf-8'), + ]); + + return {afterManifest, beforeManifest, comparison, skillDiff}; +}; + +export const getPreferredArtifact = (manifest: SkillEvalManifest) => + manifest.artifacts.find((artifact) => artifact.type === 'video') ?? + manifest.artifacts[0]; diff --git a/packages/skills-evals/src/app/comparison.tsx b/packages/skills-evals/src/app/comparison.tsx new file mode 100644 index 00000000000..d6e6199b7c2 --- /dev/null +++ b/packages/skills-evals/src/app/comparison.tsx @@ -0,0 +1,221 @@ +import type {SkillEvalManifest} from '../manifest'; +import { + getPreferredArtifact, + type ComparisonWithManifests, +} from './comparison-data'; +import {formatDate, Header, page, Pill, toFileUrl} from './shared'; + +const Artifact = ({manifest}: {manifest: SkillEvalManifest}) => { + const artifact = getPreferredArtifact(manifest); + + if (!artifact) { + return ( +
+ No visual artifact found +
+ ); + } + + const href = toFileUrl(artifact.path); + + if (artifact.type === 'image') { + return ( + + {artifact.relativePath} + + ); + } + + return ( +