Skip to content

Commit 9df2b4e

Browse files
authored
Merge pull request #5 from vizhub-core/streaming-parser
Streaming parser
2 parents fa3e95c + 9fe2026 commit 9df2b4e

10 files changed

+534
-261
lines changed

.prettierignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.gitignore
2+
.prettierignore

.prettierrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

package-lock.json

Lines changed: 259 additions & 249 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
"build:esm": "tsc -p tsconfig.json",
2020
"build:cjs": "tsc -p tsconfig.cjs.json",
2121
"test": "vitest run",
22-
"prepublishOnly": "npm run build"
22+
"prepublishOnly": "npm run build",
23+
"typecheck": "tsc --noEmit",
24+
"prettier": "prettier {*.*,**/*.*} --write"
2325
},
2426
"repository": {
2527
"type": "git",
@@ -44,6 +46,7 @@
4446
],
4547
"devDependencies": {
4648
"typescript": "^5.8.2",
47-
"vitest": "^3.0.9"
49+
"vitest": "^3.1.1",
50+
"prettier": "^3.5.3"
4851
}
4952
}

src/index.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1-
export { parseMarkdownFiles } from './parseMarkdownFiles.js';
2-
export { serializeMarkdownFiles } from './serializeMarkdownFiles.js';
1+
export { parseMarkdownFiles } from "./parseMarkdownFiles.js";
2+
export { serializeMarkdownFiles } from "./serializeMarkdownFiles.js";
3+
export {
4+
StreamingMarkdownParser,
5+
StreamingParserCallbacks,
6+
} from "./streamingParser.js";

src/parseMarkdownFiles.test.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,10 @@ And then we do some work on it and...
365365
const { files, format } = parseMarkdownFiles(markdownString);
366366
expect(format).toBe("Bold Format");
367367
expect(files).toEqual([
368-
{ name: "index.html", text: "<!-- New HTML content -->" },
368+
{
369+
name: "index.html",
370+
text: "<!-- New HTML content -->",
371+
},
369372
]);
370373
});
371374

@@ -537,7 +540,7 @@ test("parseMarkdownFiles throws error when an unsupported format is specified",
537540
\`\`\`
538541
`;
539542
expect(() =>
540-
parseMarkdownFiles(markdownString, "unsupported-format")
543+
parseMarkdownFiles(markdownString, "unsupported-format"),
541544
).toThrow("Unsupported format: unsupported-format");
542545
});
543546

@@ -559,7 +562,10 @@ test("parseMarkdownFiles handles duplicate file names in Bold Format when format
559562
const { files, format } = parseMarkdownFiles(markdownString, "bold");
560563
expect(format).toBe("Bold Format");
561564
expect(files).toEqual([
562-
{ name: "index.html", text: "<!-- New HTML content -->" },
565+
{
566+
name: "index.html",
567+
text: "<!-- New HTML content -->",
568+
},
563569
]);
564570
});
565571

src/parseMarkdownFiles.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,21 @@ export function parseMarkdownFiles(markdownString: string, format?: string) {
4747
format: "Standard Heading Format",
4848
key: "standard-heading",
4949
},
50-
{ regex: colonFormatRegex, format: "Colon Format", key: "colon" },
51-
{ regex: boldFormatRegex, format: "Bold Format", key: "bold" },
52-
{ regex: hashFormatRegex, format: "Hash Format", key: "hash" },
50+
{
51+
regex: colonFormatRegex,
52+
format: "Colon Format",
53+
key: "colon",
54+
},
55+
{
56+
regex: boldFormatRegex,
57+
format: "Bold Format",
58+
key: "bold",
59+
},
60+
{
61+
regex: hashFormatRegex,
62+
format: "Hash Format",
63+
key: "hash",
64+
},
5365
{
5466
regex: numberedBoldFormatRegex,
5567
format: "Numbered Bold Format",

src/serializeMarkdownFiles.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ const language = (name: string) => {
66
};
77

88
export const serializeMarkdownFiles = (
9-
files: Array<{ name: string; text: string }>
9+
files: Array<{ name: string; text: string }>,
1010
) =>
1111
files
1212
.map(({ name, text }) =>
13-
[`**${name}**\n`, "```" + language(name), text, "```\n"].join("\n")
13+
[`**${name}**\n`, "```" + language(name), text, "```\n"].join("\n"),
1414
)
1515
.join("\n")
1616
.trim();

src/streamingParser.test.ts

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import { describe, it, expect, beforeEach } from "vitest";
2+
import {
3+
StreamingMarkdownParser,
4+
StreamingParserCallbacks,
5+
} from "./streamingParser";
6+
7+
describe("StreamingMarkdownParser", () => {
8+
let fileNameChanges: Array<{
9+
name: string;
10+
format: string;
11+
}>;
12+
let codeLines: string[];
13+
let parser: StreamingMarkdownParser;
14+
15+
const callbacks: StreamingParserCallbacks = {
16+
onFileNameChange: (fileName, format) => {
17+
fileNameChanges.push({ name: fileName, format });
18+
},
19+
onCodeLine: (line) => {
20+
codeLines.push(line);
21+
},
22+
};
23+
24+
beforeEach(() => {
25+
fileNameChanges = [];
26+
codeLines = [];
27+
parser = new StreamingMarkdownParser(callbacks);
28+
});
29+
30+
it("should process a complete markdown block in one chunk", () => {
31+
const input = "**index.html**\n```\n<html>\n</html>\n```\n";
32+
parser.processChunk(input);
33+
parser.flushRemaining();
34+
35+
expect(fileNameChanges).toEqual([
36+
{ name: "index.html", format: "Bold Format" },
37+
]);
38+
expect(codeLines).toEqual(["<html>", "</html>"]);
39+
});
40+
41+
it("should handle multiple chunks with split lines", () => {
42+
// Simulate splitting a header and code block across chunks
43+
parser.processChunk("**inde");
44+
parser.processChunk("x.html**\n```\n<ht");
45+
parser.processChunk("ml>\n</html>\n```");
46+
parser.flushRemaining();
47+
48+
expect(fileNameChanges).toEqual([
49+
{ name: "index.html", format: "Bold Format" },
50+
]);
51+
expect(codeLines).toEqual(["<html>", "</html>"]);
52+
});
53+
54+
it("should process multiple files and code blocks", () => {
55+
const input =
56+
"**index.html**\n```\n<html>\n</html>\n```\n" +
57+
"**styles.css**\n```\nbody { color: blue; }\n```\n";
58+
parser.processChunk(input);
59+
parser.flushRemaining();
60+
61+
expect(fileNameChanges).toEqual([
62+
{ name: "index.html", format: "Bold Format" },
63+
{ name: "styles.css", format: "Bold Format" },
64+
]);
65+
expect(codeLines).toEqual(["<html>", "</html>", "body { color: blue; }"]);
66+
});
67+
68+
it("should handle code fence markers with language specifiers", () => {
69+
const input = "**script.js**\n```js\nconsole.log('Hello');\n```\n";
70+
parser.processChunk(input);
71+
parser.flushRemaining();
72+
73+
expect(fileNameChanges).toEqual([
74+
{ name: "script.js", format: "Bold Format" },
75+
]);
76+
expect(codeLines).toEqual(["console.log('Hello');"]);
77+
});
78+
79+
it("should flush remaining partial lines on flushRemaining", () => {
80+
// Provide a header line without a trailing newline
81+
parser.processChunk("**partial.html**");
82+
parser.flushRemaining();
83+
84+
expect(fileNameChanges).toEqual([
85+
{ name: "partial.html", format: "Bold Format" },
86+
]);
87+
});
88+
89+
it("should not trigger callbacks for irrelevant lines outside code fences", () => {
90+
// Provide a line that doesn't match any header or code fence
91+
parser.processChunk("This is an irrelevant line\n");
92+
parser.flushRemaining();
93+
94+
expect(fileNameChanges).toEqual([]);
95+
expect(codeLines).toEqual([]);
96+
});
97+
98+
it("should handle nested chunks with header and code block boundaries", () => {
99+
// Simulate a stream with multiple boundaries and chunk splits
100+
const chunks = [
101+
"**index.html**\n", // Header detected
102+
"```\n<ht", // Start code fence and partial code
103+
"ml>\n</ht", // Continuation of code
104+
"ml>\n```\n", // End code fence
105+
"Some irrelevant line\n", // Irrelevant line outside code fence
106+
"**styles.css**\n", // New header
107+
"```\nbody { color:", // Start second code fence with split code line
108+
" blue; }\n```\n", // End code fence
109+
];
110+
111+
chunks.forEach((chunk) => parser.processChunk(chunk));
112+
parser.flushRemaining();
113+
114+
expect(fileNameChanges).toEqual([
115+
{ name: "index.html", format: "Bold Format" },
116+
{ name: "styles.css", format: "Bold Format" },
117+
]);
118+
119+
expect(codeLines).toEqual(["<html>", "</html>", "body { color: blue; }"]);
120+
});
121+
122+
it("should handle bold format with extra text", () => {
123+
const input =
124+
"**index.html** (some commentary)\n```\n<html>\n</html>\n```\n";
125+
parser.processChunk(input);
126+
parser.flushRemaining();
127+
128+
expect(fileNameChanges).toEqual([
129+
{ name: "index.html", format: "Bold Format" },
130+
]);
131+
expect(codeLines).toEqual(["<html>", "</html>"]);
132+
});
133+
});

src/streamingParser.ts

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
export type StreamingParserCallbacks = {
2+
/**
3+
* Called when a file header is detected outside a code fence.
4+
* @param fileName - The detected file name.
5+
* @param format - The header format that was matched.
6+
*/
7+
onFileNameChange: (fileName: string, format: string) => void;
8+
9+
/**
10+
* Called for each line emitted from inside a code fence.
11+
* @param line - A line of code from the code block.
12+
*/
13+
onCodeLine: (line: string) => void;
14+
};
15+
16+
export class StreamingMarkdownParser {
17+
private buffer: string = "";
18+
private insideCodeFence: boolean = false;
19+
private currentFileName: string | null = null;
20+
private detectedFormat: string = "Unknown Format";
21+
private callbacks: StreamingParserCallbacks;
22+
23+
/**
24+
* An array of regex patterns for detecting file headers.
25+
* Currently only supports Bold Format, but can be extended in the future.
26+
*/
27+
private headerPatterns: {
28+
regex: RegExp;
29+
format: string;
30+
}[] = [
31+
// Matches: **filename.js**
32+
{
33+
regex: /^\s*\*\*([^\n*`]+?)\*\*(?:[^\n]*)\s*$/,
34+
format: "Bold Format",
35+
},
36+
];
37+
38+
constructor(callbacks: StreamingParserCallbacks) {
39+
this.callbacks = callbacks;
40+
}
41+
42+
/**
43+
* Processes an incoming chunk from the stream.
44+
* Chunks are buffered until full lines (ending with '\n') are available.
45+
* @param chunk - A string chunk from the stream.
46+
*/
47+
processChunk(chunk: string) {
48+
this.buffer += chunk;
49+
let newlineIndex: number;
50+
51+
while ((newlineIndex = this.buffer.indexOf("\n")) !== -1) {
52+
const line = this.buffer.slice(0, newlineIndex);
53+
this.buffer = this.buffer.slice(newlineIndex + 1);
54+
this.processLine(line);
55+
}
56+
}
57+
58+
/**
59+
* Flushes any remaining content in the buffer.
60+
* Should be called once after the stream has ended.
61+
*/
62+
flushRemaining() {
63+
if (this.buffer.length > 0) {
64+
this.processLine(this.buffer);
65+
this.buffer = "";
66+
}
67+
}
68+
69+
/**
70+
* Processes a single line.
71+
* If the line is a code fence marker (starting with "```"), it toggles the code block state.
72+
* When inside a code block, every line is emitted via onCodeLine.
73+
* Outside of a code block, the line is checked against header patterns,
74+
* and if a match is found, onFileNameChange is invoked.
75+
* @param line - A single line of text.
76+
*/
77+
private processLine(line: string) {
78+
// Check if the line is a code fence marker (could be "```" or "```lang")
79+
if (line.trim().startsWith("```")) {
80+
this.insideCodeFence = !this.insideCodeFence;
81+
return; // The fence marker itself is not emitted as code content.
82+
}
83+
84+
if (this.insideCodeFence) {
85+
// Emit every line inside the code fence as a code line.
86+
this.callbacks.onCodeLine(line);
87+
} else {
88+
// Outside a code fence, check for file header patterns.
89+
for (const { regex, format } of this.headerPatterns) {
90+
const match = regex.exec(line);
91+
if (match) {
92+
const fileName = match[1].trim();
93+
this.currentFileName = fileName;
94+
this.detectedFormat = format;
95+
this.callbacks.onFileNameChange(fileName, format);
96+
break; // Stop after the first matching header is found.
97+
}
98+
}
99+
// Non-header lines outside code fences are ignored
100+
}
101+
}
102+
}

0 commit comments

Comments
 (0)