Skip to content

Commit 0b00437

Browse files
authored
feat: add formats support (#122)
1 parent 92cbf50 commit 0b00437

File tree

1 file changed

+95
-46
lines changed

1 file changed

+95
-46
lines changed

src/index.ts

Lines changed: 95 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,11 @@ function removeEmptyTopLevel<T extends Record<string, any>>(
5252
}
5353

5454
class ConsoleLogger implements Logger {
55-
private shouldLog = (
55+
private shouldLog =
5656
process.env.CLOUD_SERVICE === 'true' ||
5757
process.env.SSE_LOCAL === 'true' ||
58-
process.env.HTTP_STREAMABLE_SERVER === 'true'
59-
);
60-
58+
process.env.HTTP_STREAMABLE_SERVER === 'true';
59+
6160
debug(...args: unknown[]): void {
6261
if (this.shouldLog) {
6362
console.debug('[DEBUG]', new Date().toISOString(), ...args);
@@ -90,7 +89,9 @@ const server = new FastMCP<SessionData>({
9089
version: '3.0.0',
9190
logger: new ConsoleLogger(),
9291
roots: { enabled: false },
93-
authenticate: async (request: { headers: IncomingHttpHeaders }): Promise<SessionData> => {
92+
authenticate: async (request: {
93+
headers: IncomingHttpHeaders;
94+
}): Promise<SessionData> => {
9495
if (process.env.CLOUD_SERVICE === 'true') {
9596
const apiKey = extractApiKey(request.headers);
9697

@@ -101,7 +102,9 @@ const server = new FastMCP<SessionData>({
101102
} else {
102103
// For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided
103104
if (!process.env.FIRECRAWL_API_KEY && !process.env.FIRECRAWL_API_URL) {
104-
console.error('Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided');
105+
console.error(
106+
'Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided'
107+
);
105108
process.exit(1);
106109
}
107110
return { firecrawlApiKey: process.env.FIRECRAWL_API_KEY };
@@ -122,12 +125,12 @@ function createClient(apiKey?: string): FirecrawlApp {
122125
apiUrl: process.env.FIRECRAWL_API_URL,
123126
}),
124127
};
125-
128+
126129
// Only add apiKey if it's provided (required for cloud, optional for self-hosted)
127130
if (apiKey) {
128131
config.apiKey = apiKey;
129132
}
130-
133+
131134
return new FirecrawlApp(config);
132135
}
133136

@@ -144,12 +147,17 @@ function getClient(session?: SessionData): FirecrawlApp {
144147
}
145148
return createClient(session.firecrawlApiKey);
146149
}
147-
150+
148151
// For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided
149-
if (!process.env.FIRECRAWL_API_URL && (!session || !session.firecrawlApiKey)) {
150-
throw new Error('Unauthorized: API key is required when not using a self-hosted instance');
152+
if (
153+
!process.env.FIRECRAWL_API_URL &&
154+
(!session || !session.firecrawlApiKey)
155+
) {
156+
throw new Error(
157+
'Unauthorized: API key is required when not using a self-hosted instance'
158+
);
151159
}
152-
160+
153161
return createClient(session?.firecrawlApiKey);
154162
}
155163

@@ -162,7 +170,13 @@ function asText(data: unknown): string {
162170

163171
// Define safe action types
164172
const safeActionTypes = ['wait', 'screenshot', 'scroll', 'scrape'] as const;
165-
const otherActions = ['click', 'write', 'press', 'executeJavascript', 'generatePDF'] as const;
173+
const otherActions = [
174+
'click',
175+
'write',
176+
'press',
177+
'executeJavascript',
178+
'generatePDF',
179+
] as const;
166180
const allActionTypes = [...safeActionTypes, ...otherActions] as const;
167181

168182
// Use appropriate action types based on safe mode
@@ -198,26 +212,39 @@ const scrapeParamsSchema = z.object({
198212
])
199213
)
200214
.optional(),
215+
parsers: z
216+
.array(
217+
z.union([
218+
z.enum(['pdf']),
219+
z.object({
220+
type: z.enum(['pdf']),
221+
maxPages: z.number().int().min(1).max(10000).optional(),
222+
}),
223+
])
224+
)
225+
.optional(),
201226
onlyMainContent: z.boolean().optional(),
202227
includeTags: z.array(z.string()).optional(),
203228
excludeTags: z.array(z.string()).optional(),
204229
waitFor: z.number().optional(),
205-
...(SAFE_MODE ? {} : {
206-
actions: z
207-
.array(
208-
z.object({
209-
type: z.enum(allowedActionTypes),
210-
selector: z.string().optional(),
211-
milliseconds: z.number().optional(),
212-
text: z.string().optional(),
213-
key: z.string().optional(),
214-
direction: z.enum(['up', 'down']).optional(),
215-
script: z.string().optional(),
216-
fullPage: z.boolean().optional(),
217-
})
218-
)
219-
.optional(),
220-
}),
230+
...(SAFE_MODE
231+
? {}
232+
: {
233+
actions: z
234+
.array(
235+
z.object({
236+
type: z.enum(allowedActionTypes),
237+
selector: z.string().optional(),
238+
milliseconds: z.number().optional(),
239+
text: z.string().optional(),
240+
key: z.string().optional(),
241+
direction: z.enum(['up', 'down']).optional(),
242+
script: z.string().optional(),
243+
fullPage: z.boolean().optional(),
244+
})
245+
)
246+
.optional(),
247+
}),
221248
mobile: z.boolean().optional(),
222249
skipTlsVerification: z.boolean().optional(),
223250
removeBase64Images: z.boolean().optional(),
@@ -254,18 +281,28 @@ This is the most powerful, fastest and most reliable scraper tool, if available
254281
\`\`\`
255282
**Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
256283
**Returns:** Markdown, HTML, or other formats as specified.
257-
${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
284+
${
285+
SAFE_MODE
286+
? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.'
287+
: ''
288+
}
258289
`,
259290
parameters: scrapeParamsSchema,
260291
execute: async (
261292
args: unknown,
262293
{ session, log }: { session?: SessionData; log: Logger }
263294
): Promise<string> => {
264-
const { url, ...options } = args as { url: string } & Record<string, unknown>;
295+
const { url, ...options } = args as { url: string } & Record<
296+
string,
297+
unknown
298+
>;
265299
const client = getClient(session);
266300
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
267301
log.info('Scraping URL', { url: String(url) });
268-
const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN } as any);
302+
const res = await client.scrape(String(url), {
303+
...cleaned,
304+
origin: ORIGIN,
305+
} as any);
269306
return asText(res);
270307
},
271308
});
@@ -302,11 +339,17 @@ Map a website to discover all indexed URLs on the site.
302339
args: unknown,
303340
{ session, log }: { session?: SessionData; log: Logger }
304341
): Promise<string> => {
305-
const { url, ...options } = args as { url: string } & Record<string, unknown>;
342+
const { url, ...options } = args as { url: string } & Record<
343+
string,
344+
unknown
345+
>;
306346
const client = getClient(session);
307347
const cleaned = removeEmptyTopLevel(options as Record<string, unknown>);
308348
log.info('Mapping URL', { url: String(url) });
309-
const res = await client.map(String(url), { ...cleaned, origin: ORIGIN } as any);
349+
const res = await client.map(String(url), {
350+
...cleaned,
351+
origin: ORIGIN,
352+
} as any);
310353
return asText(res);
311354
},
312355
});
@@ -424,7 +467,11 @@ server.addTool({
424467
}
425468
\`\`\`
426469
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
427-
${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
470+
${
471+
SAFE_MODE
472+
? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.'
473+
: ''
474+
}
428475
`,
429476
parameters: z.object({
430477
url: z.string(),
@@ -439,17 +486,19 @@ server.addTool({
439486
crawlEntireDomain: z.boolean().optional(),
440487
delay: z.number().optional(),
441488
maxConcurrency: z.number().optional(),
442-
...(SAFE_MODE ? {} : {
443-
webhook: z
444-
.union([
445-
z.string(),
446-
z.object({
447-
url: z.string(),
448-
headers: z.record(z.string(), z.string()).optional(),
449-
}),
450-
])
451-
.optional(),
452-
}),
489+
...(SAFE_MODE
490+
? {}
491+
: {
492+
webhook: z
493+
.union([
494+
z.string(),
495+
z.object({
496+
url: z.string(),
497+
headers: z.record(z.string(), z.string()).optional(),
498+
}),
499+
])
500+
.optional(),
501+
}),
453502
deduplicateSimilarURLs: z.boolean().optional(),
454503
ignoreQueryParameters: z.boolean().optional(),
455504
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),

0 commit comments

Comments
 (0)