forked from BuilderIO/gpt-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.ts
41 lines (40 loc) · 1.05 KB
/
config.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import type { Page } from "playwright";
export type Config = {
/**
* URL to start the crawl
* @example "https://www.builder.io/c/docs/developers"
* @default ""
*/
url: string;
/**
* Pattern to match against for links on a page to subsequently crawl
* @example "https://www.builder.io/c/docs/**"
* @default ""
*/
match: string | string[];
/**
* Selector to grab the inner text from
* @example ".docs-builder-container"
* @default ""
*/
selector?: string;
/**
* Don't crawl more than this many pages
* @default 50
*/
maxPagesToCrawl: number;
/**
* File name for the finished data
* @default "output.json"
*/
outputFileName: string;
/** Optional cookie to be set. E.g. for Cookie Consent */
cookie?: { name: string; value: string };
/** Optional function to run for each page found */
onVisitPage?: (options: {
page: Page;
pushData: (data: any) => Promise<void>;
}) => Promise<void>;
/** Optional timeout for waiting for a selector to appear */
waitForSelectorTimeout?: number;
};