Orva-Studio · RichardBray · Jun 22, 2026 · Jun 25, 2026
diff --git a/packages/cli/src/camkit.ts b/packages/cli/src/camkit.ts
@@ -3,7 +3,7 @@
  * camkit — Camtasia project CLI over @camkit/core + @camkit/darwin.
  * Port of edit-videos/cam.ts with identical command behavior and output.
  *
- *   camkit info|clips|sources|rebuild|silences|transcribe|status|close|open|docs
+ *   camkit info|clips|sources|rebuild|silences|transcribe|status|close|open|docs|takes|words
  *
  * --project accepts a .cmproj dir or project.tscproj path; defaults to
  * ./search.cmproj/project.tscproj. Read commands never mutate; rebuild backs
@@ -28,9 +28,13 @@ import {
   projectInfo,
   bundleName,
   resolveProjectPath,
+  secondsToUnits,
+  segmentTakes,
+  tracks,
+  wordsInRange,
   type KeepSeg,
 } from "@camkit/core";
-import { camtasiaDocPaths, camtasiaDocs, closeProject, openProject, projectStatus } from "@camkit/darwin";
+import { camtasiaDocPaths, closeProject, openProject, projectStatus } from "@camkit/darwin";
 import { exportAudio, runSilencedetect, transcribeRecording } from "./media.ts";
 import { listPresets, resolvePreset } from "./presets.ts";
 import { version } from "../package.json";
@@ -198,7 +202,26 @@ const HELP: Record<string, { usage: string; about: string[] }> = {
   },
   docs: {
     usage: "camkit docs",
-    about: ["List all projects currently open in Camtasia. macOS only."],
+    about: ["List all projects currently open in Camtasia with their full", "paths. macOS only."],
+  },
+  takes: {
+    usage: "camkit takes <transcript.json> [gap]",
+    about: [
+      "Segment a transcript's word list into takes by splitting on inter-word",
+      "gaps larger than `gap` seconds (default 1.2). Prints one line per take:",
+      '  [start-end] (dur Nw) text',
+      "Degenerate Whisper padding words (zero-length stamps at clip ends) are",
+      "stripped before boundaries are computed, so durations match audible",
+      "speech. Use this to find the clean final take of each beat.",
+    ],
+  },
+  words: {
+    usage: "camkit words <transcript.json> <start> <end>",
+    about: [
+      "Print every word (with its index + timestamps) inside the inclusive",
+      "[start, end] window. Use it to set precise cut points inside a take,",
+      "isolate a clean tail, or inspect a stretched-word dead-air artifact.",
+    ],
   },
 };
 
@@ -225,7 +248,9 @@ function printHelp(cmd?: string): void {
     status: "is this project open in Camtasia? (exit 2 if so)",
     close: "save-and-close the project document in Camtasia",
     open: "(re)open the project in Camtasia",
-    docs: "list projects open in Camtasia",
+    docs: "list projects open in Camtasia (with paths)",
+    takes: "segment a transcript into takes by word gaps",
+    words: "print words in a time range from a transcript",
   };
   for (const [c, s] of Object.entries(summaries)) console.log(`  ${c.padEnd(11)} ${s}`);
   console.log();
@@ -480,12 +505,52 @@ function cmdOpen(argv: string[]) {
 }
 
 function cmdDocs() {
-  const docs = camtasiaDocs();
+  const docs = camtasiaDocPaths();
   if (!docs.length) {
     console.log("Camtasia is not running, or has no projects open.");
     return;
   }
-  for (const d of docs) console.log(d);
+  for (const d of docs) console.log(`${d.name}\t${d.path}`);
+}
+
+function cmdTakes(argv: string[]) {
+  const positional = argv.filter((a) => !a.startsWith("--"));
+  if (positional.length < 1) throw new Error("Usage: camkit takes <transcript.json> [gap]");
+  const file = resolve(positional[0]);
+  if (!existsSync(file)) throw new Error(`No such file: ${file}`);
+  const gap = positional[1] != null ? Number(positional[1]) : 1.2;
+  if (Number.isNaN(gap)) throw new Error("gap must be a number of seconds");
+
+  const transcript = JSON.parse(readFileSync(file, "utf8"));
+  if (!Array.isArray(transcript.words)) {
+    throw new Error(`${file} has no word-level "words" array (transcribe with whisper-1).`);
+  }
+  const takes = segmentTakes(transcript.words, gap);
+  for (const t of takes) {
+    console.log(`[${t.start.toFixed(2).padStart(7)}-${t.end.toFixed(2).padStart(7)}] (${(t.end - t.start).toFixed(1).padStart(5)}s ${String(t.words.length).padStart(3)}w) ${t.text}`);
+  }
+}
+
+function cmdWords(argv: string[]) {
+  const positional = argv.filter((a) => !a.startsWith("--"));
+  if (positional.length < 3) {
+    throw new Error("Usage: camkit words <transcript.json> <start> <end>");
+  }
+  const file = resolve(positional[0]);
+  if (!existsSync(file)) throw new Error(`No such file: ${file}`);
+  const start = Number(positional[1]);
+  const end = Number(positional[2]);
+  if (Number.isNaN(start) || Number.isNaN(end)) {
+    throw new Error("start and end must be numbers (seconds).");
+  }
+
+  const transcript = JSON.parse(readFileSync(file, "utf8"));
+  if (!Array.isArray(transcript.words)) {
+    throw new Error(`${file} has no word-level "words" array (transcribe with whisper-1).`);
+  }
+  for (const w of wordsInRange(transcript.words, start, end)) {
+    console.log(`${String(w.idx).padStart(4)} ${w.start.toFixed(2).padStart(7)}-${w.end.toFixed(2).padStart(7)}  ${w.word}`);
+  }
 }
 
 const COMMANDS: Record<string, (argv: string[]) => void | Promise<void>> = {
@@ -501,6 +566,8 @@ const COMMANDS: Record<string, (argv: string[]) => void | Promise<void>> = {
   close: cmdClose,
   open: cmdOpen,
   docs: cmdDocs,
+  takes: cmdTakes,
+  words: cmdWords,
 };
 
 const [cmd, ...rest] = process.argv.slice(2);

diff --git a/packages/core/src/transcript.ts b/packages/core/src/transcript.ts
@@ -62,3 +62,70 @@ export function shapeTranscript(raw: any, source: string, model: string): Transc
     segments: (raw.segments ?? []).map((s: any) => ({ id: s.id, start: s.start, end: s.end, text: s.text })),
   };
 }
+
+/** Minimum word duration (seconds) to count as real speech. Whisper pads
+ * clip ends with degenerate zero-length words at a frozen timestamp; these
+ * must be stripped before computing take boundaries or the reported start,
+ * end, and word count are all wrong. */
+export const DEGENERATE_THRESHOLD = 0.05;
+
+/** Whether a word is degenerate (zero-length or near-zero). Whisper emits
+ * these as padding at clip ends — a cluster of words all sharing the same
+ * frozen timestamp (e.g. 20 words at 223.78). */
+export function isDegenerate(w: TranscriptWord, threshold = DEGENERATE_THRESHOLD): boolean {
+  return w.end - w.start < threshold;
+}
+
+export interface Take {
+  start: number;
+  end: number;
+  words: TranscriptWord[];
+  text: string;
+}
+
+/**
+ * Segment a word list into takes by splitting on inter-word gaps larger than
+ * `gap` seconds (default 1.2). Degenerate tail words (Whisper padding — zero-
+ * length stamps at a frozen timestamp) are stripped from each take before
+ * boundaries are computed, so the reported start/end/duration reflect audible
+ * speech, not padding artifacts. Takes that are empty after stripping are
+ * dropped entirely.
+ */
+export function segmentTakes(words: TranscriptWord[], gap = 1.2): Take[] {
+  const takes: TranscriptWord[][] = [];
+  let cur: TranscriptWord[] = [];
+
+  for (const w of words) {
+    if (cur.length > 0 && w.start - cur[cur.length - 1].end > gap) {
+      takes.push(cur);
+      cur = [];
+    }
+    cur.push(w);
+  }
+  if (cur.length > 0) takes.push(cur);
+
+  return takes
+    .map((raw) => raw.filter((w) => !isDegenerate(w)))
+    .filter((w) => w.length > 0)
+    .map((words) => ({
+      start: words[0].start,
+      end: words[words.length - 1].end,
+      words,
+      text: words.map((w) => w.word).join(" "),
+    }));
+}
+
+/**
+ * Filter words that fall within [start, end] (inclusive on both ends).
+ * Each result includes the original index in the source array so callers
+ * can reference exact positions for cut-point decisions.
+ */
+export function wordsInRange(
+  words: TranscriptWord[],
+  start: number,
+  end: number,
+): { idx: number; word: string; start: number; end: number }[] {
+  return words
+    .map((w, idx) => ({ idx, word: w.word, start: w.start, end: w.end }))
+    .filter((w) => w.start >= start && w.end <= end);
+}
diff --git a/packages/core/test/transcript.test.ts b/packages/core/test/transcript.test.ts
@@ -1,5 +1,13 @@
 import { expect, test } from "bun:test";
-import { shapeTranscript, toSrt, type Transcript } from "../src/transcript.ts";
+import {
+  shapeTranscript,
+  toSrt,
+  segmentTakes,
+  wordsInRange,
+  isDegenerate,
+  type Transcript,
+  type TranscriptWord,
+} from "../src/transcript.ts";
 
 test("shapeTranscript keeps the stable contract and tolerates missing fields", () => {
   const raw = {
@@ -37,3 +45,98 @@ test("toSrt renders 1-based, comma-millisecond, blank-line-separated cues", () =
       "\n",
   );
 });
+
+const w = (word: string, start: number, end: number): TranscriptWord => ({ word, start, end });
+
+test("segmentTakes splits on gaps larger than the threshold", () => {
+  const words = [
+    w("hello", 0, 0.5),
+    w("world", 0.6, 1.1),
+    // 2s gap
+    w("second", 3.1, 3.6),
+    w("take", 3.7, 4.0),
+  ];
+  const takes = segmentTakes(words, 1.2);
+  expect(takes).toHaveLength(2);
+  expect(takes[0].start).toBe(0);
+  expect(takes[0].end).toBe(1.1);
+  expect(takes[0].text).toBe("hello world");
+  expect(takes[1].start).toBe(3.1);
+  expect(takes[1].end).toBe(4.0);
+  expect(takes[1].text).toBe("second take");
+});
+
+test("segmentTakes strips degenerate tail words before computing boundaries", () => {
+  // A take where Whisper padded the end with 20 words all at the same stamp.
+  const padding: TranscriptWord[] = Array.from({ length: 20 }, () => w("pad", 10.0, 10.0));
+  const words = [
+    w("real", 0, 0.5),
+    w("speech", 0.6, 1.0),
+    w("ends", 1.1, 1.4),
+    // 3s gap then degenerate cluster
+    ...padding,
+  ];
+  const takes = segmentTakes(words, 1.2);
+  // The degenerate cluster forms its own "take" but is entirely stripped,
+  // leaving only the real speech take.
+  expect(takes).toHaveLength(1);
+  expect(takes[0].start).toBe(0);
+  expect(takes[0].end).toBe(1.4);
+  expect(takes[0].text).toBe("real speech ends");
+  expect(takes[0].words).toHaveLength(3);
+});
+
+test("segmentTakes strips degenerate words mixed into a take tail", () => {
+  // Degenerate words at the end of a take (no gap separating them).
+  const words = [
+    w("audible", 5.0, 5.5),
+    w("words", 5.6, 6.0),
+    w("frozen1", 6.0, 6.0),
+    w("frozen2", 6.0, 6.0),
+  ];
+  const takes = segmentTakes(words, 1.2);
+  expect(takes).toHaveLength(1);
+  expect(takes[0].end).toBe(6.0);
+  expect(takes[0].words).toHaveLength(2);
+  expect(takes[0].text).toBe("audible words");
+});
+
+test("segmentTakes drops takes that are entirely degenerate", () => {
+  const words = [
+    w("real", 0, 0.5),
+    w("real2", 0.6, 1.0),
+    // 5s gap then a pure degenerate cluster
+    w("d1", 6.0, 6.0),
+    w("d2", 6.0, 6.0),
+  ];
+  const takes = segmentTakes(words, 1.2);
+  expect(takes).toHaveLength(1);
+  expect(takes[0].text).toBe("real real2");
+});
+
+test("segmentTakes handles empty input", () => {
+  expect(segmentTakes([])).toEqual([]);
+});
+
+test("isDegenerate detects zero-length and near-zero words", () => {
+  expect(isDegenerate(w("x", 10, 10))).toBe(true);
+  expect(isDegenerate(w("x", 10, 10.01))).toBe(true);
+  expect(isDegenerate(w("x", 10, 10.06))).toBe(false);
+  expect(isDegenerate(w("x", 10, 10.5))).toBe(false);
+});
+
+test("wordsInRange filters to the inclusive window and preserves indices", () => {
+  const words = [
+    w("zero", 0, 0.5),
+    w("one", 0.6, 1.0),
+    w("two", 1.1, 1.5),
+    w("three", 1.6, 2.0),
+    w("four", 2.1, 2.5),
+  ];
+  const result = wordsInRange(words, 0.6, 2.0);
+  expect(result).toEqual([
+    { idx: 1, word: "one", start: 0.6, end: 1.0 },
+    { idx: 2, word: "two", start: 1.1, end: 1.5 },
+    { idx: 3, word: "three", start: 1.6, end: 2.0 },
+  ]);
+});
diff --git a/skills/SKILLS.md b/skills/SKILLS.md
@@ -0,0 +1,14 @@
+# Skills
+
+Repo-distributed skills for camkit. Each lives in its own directory as `<name>/SKILL.md`. Add a one-line entry here when you add a skill.
+
+Claude Code only auto-discovers skills under `.claude/skills/`, so to use these, symlink them in once per checkout:
+
+```sh
+mkdir -p .claude/skills
+ln -s ../../skills/rough-cut .claude/skills/rough-cut
+```
+
+| Skill | What it does |
+|-------|--------------|
+| [rough-cut](rough-cut/SKILL.md) | Transcribe the on-timeline recordings of the open Camtasia project with Whisper, then cut silences, filler, false starts, and losing retakes into a tight rough cut. Optionally aligned to a script. |