|
| 1 | +import { z } from "zod"; |
| 2 | +import { extract, ContentFormat, LLMProvider } from "../../src"; |
| 3 | + |
| 4 | +describe("ProcessedContent Integration Tests", () => { |
| 5 | + const simpleSchema = z.object({ |
| 6 | + title: z.string(), |
| 7 | + content: z.string().nullable(), |
| 8 | + }); |
| 9 | + |
| 10 | + // Skip tests if API keys are not available |
| 11 | + const skipIfNoKeys = () => { |
| 12 | + if (!process.env.OPENAI_API_KEY) { |
| 13 | + return true; |
| 14 | + } |
| 15 | + return false; |
| 16 | + }; |
| 17 | + |
| 18 | + it("should return original content as processedContent for TXT format", async () => { |
| 19 | + if (skipIfNoKeys()) { |
| 20 | + console.log("Skipping test: No API keys available"); |
| 21 | + return; |
| 22 | + } |
| 23 | + |
| 24 | + const plainTextContent = |
| 25 | + "Title: Simple Test\n\nThis is a test of plain text extraction."; |
| 26 | + |
| 27 | + const result = await extract({ |
| 28 | + content: plainTextContent, |
| 29 | + format: ContentFormat.TXT, |
| 30 | + schema: simpleSchema, |
| 31 | + provider: LLMProvider.OPENAI, |
| 32 | + openaiApiKey: process.env.OPENAI_API_KEY, |
| 33 | + }); |
| 34 | + |
| 35 | + // Verify the processedContent is the same as the original content |
| 36 | + expect(result.processedContent).toBe(plainTextContent); |
| 37 | + }, 30000); |
| 38 | + |
| 39 | + it("should return original content as processedContent for MARKDOWN format", async () => { |
| 40 | + if (skipIfNoKeys()) { |
| 41 | + console.log("Skipping test: No API keys available"); |
| 42 | + return; |
| 43 | + } |
| 44 | + |
| 45 | + const markdownContent = |
| 46 | + "# Simple Test\n\nThis is a test of markdown extraction."; |
| 47 | + |
| 48 | + const result = await extract({ |
| 49 | + content: markdownContent, |
| 50 | + format: ContentFormat.MARKDOWN, |
| 51 | + schema: simpleSchema, |
| 52 | + provider: LLMProvider.OPENAI, |
| 53 | + openaiApiKey: process.env.OPENAI_API_KEY, |
| 54 | + }); |
| 55 | + |
| 56 | + // Verify the processedContent is the same as the original content |
| 57 | + expect(result.processedContent).toBe(markdownContent); |
| 58 | + }, 30000); |
| 59 | + |
| 60 | + it("should return converted markdown as processedContent for HTML format", async () => { |
| 61 | + if (skipIfNoKeys()) { |
| 62 | + console.log("Skipping test: No API keys available"); |
| 63 | + return; |
| 64 | + } |
| 65 | + |
| 66 | + const htmlContent = |
| 67 | + "<h1>Simple Test</h1><p>This is a test of HTML extraction.</p>"; |
| 68 | + |
| 69 | + const result = await extract({ |
| 70 | + content: htmlContent, |
| 71 | + format: ContentFormat.HTML, |
| 72 | + schema: simpleSchema, |
| 73 | + provider: LLMProvider.OPENAI, |
| 74 | + openaiApiKey: process.env.OPENAI_API_KEY, |
| 75 | + sourceUrl: "https://example.com", |
| 76 | + }); |
| 77 | + |
| 78 | + // For HTML, processedContent should be the converted markdown |
| 79 | + expect(result.processedContent).toContain("Simple Test"); |
| 80 | + expect(result.processedContent).toContain( |
| 81 | + "This is a test of HTML extraction." |
| 82 | + ); |
| 83 | + expect(result.processedContent).not.toContain("<h1>"); |
| 84 | + expect(result.processedContent).not.toContain("</p>"); |
| 85 | + }, 30000); |
| 86 | +}); |
0 commit comments