Skip to content

Commit f59f390

Browse files
authored
Rename markdown output to processed content (#11)
1 parent 9ddc144 commit f59f390

File tree

5 files changed

+96
-6
lines changed

5 files changed

+96
-6
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,11 +302,11 @@ The function returns a Promise that resolves to an `ExtractorResult<T>` object:
302302
```typescript
303303
interface ExtractorResult<T> {
304304
data: T; // Extracted structured data
305-
markdown: string; // The markdown content that was processed
305+
processedContent: string; // Processed content that was sent to the LLM. Markdown if the input was HTM (after conversion)
306306
usage: { // Token usage statistics
307307
inputTokens?: number;
308308
outputTokens?: number;
309-
}
309+
};
310310
}
311311
```
312312

src/example.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ async function example() {
7171
console.log(JSON.stringify(result.data, null, 2));
7272

7373
console.log("\nMarkdown Content:");
74-
console.log(result.markdown);
74+
console.log(result.processedContent);
7575

7676
console.log("\nToken Usage:");
7777
console.log(result.usage);

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ export async function extract<T extends z.ZodTypeAny>(
100100
// Return the full result
101101
return {
102102
data,
103-
markdown: content,
103+
processedContent: content,
104104
usage,
105105
};
106106
}

src/types.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,12 @@ export interface ExtractorResult<T> {
100100
/** Extracted data according to the schema */
101101
data: T;
102102

103-
/** Raw markdown content that was processed */
104-
markdown: string;
103+
/**
104+
* Processed content that was sent to the LLM.
105+
* This will be markdown if the input was HTML (after conversion),
106+
* or the original content if the input was already markdown or plain text.
107+
*/
108+
processedContent: string;
105109

106110
/** Usage statistics */
107111
usage: Usage;
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { z } from "zod";
2+
import { extract, ContentFormat, LLMProvider } from "../../src";
3+
4+
describe("ProcessedContent Integration Tests", () => {
5+
const simpleSchema = z.object({
6+
title: z.string(),
7+
content: z.string().nullable(),
8+
});
9+
10+
// Skip tests if API keys are not available
11+
const skipIfNoKeys = () => {
12+
if (!process.env.OPENAI_API_KEY) {
13+
return true;
14+
}
15+
return false;
16+
};
17+
18+
it("should return original content as processedContent for TXT format", async () => {
19+
if (skipIfNoKeys()) {
20+
console.log("Skipping test: No API keys available");
21+
return;
22+
}
23+
24+
const plainTextContent =
25+
"Title: Simple Test\n\nThis is a test of plain text extraction.";
26+
27+
const result = await extract({
28+
content: plainTextContent,
29+
format: ContentFormat.TXT,
30+
schema: simpleSchema,
31+
provider: LLMProvider.OPENAI,
32+
openaiApiKey: process.env.OPENAI_API_KEY,
33+
});
34+
35+
// Verify the processedContent is the same as the original content
36+
expect(result.processedContent).toBe(plainTextContent);
37+
}, 30000);
38+
39+
it("should return original content as processedContent for MARKDOWN format", async () => {
40+
if (skipIfNoKeys()) {
41+
console.log("Skipping test: No API keys available");
42+
return;
43+
}
44+
45+
const markdownContent =
46+
"# Simple Test\n\nThis is a test of markdown extraction.";
47+
48+
const result = await extract({
49+
content: markdownContent,
50+
format: ContentFormat.MARKDOWN,
51+
schema: simpleSchema,
52+
provider: LLMProvider.OPENAI,
53+
openaiApiKey: process.env.OPENAI_API_KEY,
54+
});
55+
56+
// Verify the processedContent is the same as the original content
57+
expect(result.processedContent).toBe(markdownContent);
58+
}, 30000);
59+
60+
it("should return converted markdown as processedContent for HTML format", async () => {
61+
if (skipIfNoKeys()) {
62+
console.log("Skipping test: No API keys available");
63+
return;
64+
}
65+
66+
const htmlContent =
67+
"<h1>Simple Test</h1><p>This is a test of HTML extraction.</p>";
68+
69+
const result = await extract({
70+
content: htmlContent,
71+
format: ContentFormat.HTML,
72+
schema: simpleSchema,
73+
provider: LLMProvider.OPENAI,
74+
openaiApiKey: process.env.OPENAI_API_KEY,
75+
sourceUrl: "https://example.com",
76+
});
77+
78+
// For HTML, processedContent should be the converted markdown
79+
expect(result.processedContent).toContain("Simple Test");
80+
expect(result.processedContent).toContain(
81+
"This is a test of HTML extraction."
82+
);
83+
expect(result.processedContent).not.toContain("<h1>");
84+
expect(result.processedContent).not.toContain("</p>");
85+
}, 30000);
86+
});

0 commit comments

Comments
 (0)