diff --git a/.changeset/rare-mails-fail.md b/.changeset/rare-mails-fail.md new file mode 100644 index 0000000000..fadbf8b95b --- /dev/null +++ b/.changeset/rare-mails-fail.md @@ -0,0 +1,8 @@ +--- +"@trigger.dev/build": patch +"trigger.dev": patch +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +--- + +Adding Lightpanda extension diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx index 449ef16dcb..78ff19fd9b 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam._index/route.tsx @@ -715,6 +715,7 @@ function HelpfulInfoHasTasks({ onClose }: { onClose: () => void }) { isExternal /> + + +To use Lightpanda in your project, add these build settings to your `trigger.config.ts` file: + +```ts trigger.config.ts +import { defineConfig } from "@trigger.dev/sdk/v3"; +import { lightpanda } from "@trigger.dev/build/extensions/lightpanda"; + +export default defineConfig({ + project: "", + // Your other config settings... + build: { + extensions: [lightpanda()], + }, +}); +``` + +And add the following environment variable in your Trigger.dev dashboard on the Environment Variables page: + +```bash +LIGHTPANDA_BROWSER_PATH: "/usr/bin/lightpanda", +``` + +Follow [this example](/guides/examples/lightpanda) to get setup with Trigger.dev and Lightpanda in your project. diff --git a/docs/config/extensions/overview.mdx b/docs/config/extensions/overview.mdx index abba56694e..a3c2542a2f 100644 --- a/docs/config/extensions/overview.mdx +++ b/docs/config/extensions/overview.mdx @@ -52,6 +52,7 @@ Trigger.dev provides a set of built-in extensions that you can use to customize | [pythonExtension](/config/extensions/pythonExtension) | Execute Python scripts in your project | | [playwright](/config/extensions/playwright) | Use Playwright in your Trigger.dev tasks | | [puppeteer](/config/extensions/puppeteer) | Use Puppeteer in your Trigger.dev tasks | +| [lightpanda](/config/extensions/lightpanda) | Use Lightpanda in your Trigger.dev tasks | | [ffmpeg](/config/extensions/ffmpeg) | Use FFmpeg in your Trigger.dev tasks | | [aptGet](/config/extensions/aptGet) | Install system packages in your build image | | [additionalFiles](/config/extensions/additionalFiles) | Copy additional files to your build image | diff --git a/docs/docs.json b/docs/docs.json index 88b0d12a1d..e6fb338de1 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -78,6 +78,7 @@ "config/extensions/pythonExtension", "config/extensions/playwright", "config/extensions/puppeteer", + "config/extensions/lightpanda", "config/extensions/ffmpeg", "config/extensions/aptGet", "config/extensions/additionalFiles", @@ -358,6 +359,7 @@ "guides/examples/fal-ai-realtime", "guides/examples/ffmpeg-video-processing", "guides/examples/firecrawl-url-crawl", + "guides/examples/lightpanda", "guides/examples/libreoffice-pdf-conversion", "guides/examples/open-ai-with-retrying", "guides/examples/pdf-to-image", diff --git a/docs/guides/examples/lightpanda.mdx b/docs/guides/examples/lightpanda.mdx new file mode 100644 index 0000000000..9f70d81df2 --- /dev/null +++ b/docs/guides/examples/lightpanda.mdx @@ -0,0 +1,245 @@ +--- +title: "Get a webpage's content using Lightpanda browser" +sidebarTitle: "Lightpanda" +description: "In these examples, we will show you how to crawl using Lightpanda browser and Trigger.dev." +tag: "v4" +--- + +## Overview + +Lightpanda is a purpose-built browser for AI and automation workflows. It is 10x faster, uses 10x less RAM than Chrome headless. + +You will find here are a couple of examples of how to use Lightpanda with Trigger.dev. + + + When using Lightpanda, we recommend that you respect robots.txt files and avoid high frequency requesting websites. + DDOS could happen fast for small infrastructures. + + +## Prerequisites + +- A project with [Trigger.dev initialized](/quick-start) +- A [Lightpanda](https://lightpanda.io/) cloud token (for the 1st example) + +## Example \#1 - Get links from a website using Lightpanda cloud & Puppeteer + +In this task, we use Lightpanda browser to get links from a provided URL. +You will have to pass the URL as a payload when triggering the task. + +Make sure to add `$LIGHTPANDA_TOKEN` to your Trigger.dev dashboard on the Environment Variables page: +```bash +LIGHTPANDA_TOKEN: "", +``` + +```ts trigger/lightpanda-cloud-puppeteer.ts +import { logger, task } from '@trigger.dev/sdk/v3' +import puppeteer from 'puppeteer' + +export const lightpandaCloudPuppeteer = task({ + id: 'lightpanda-cloud-puppeteer', + machine: { + preset: 'micro', + }, + run: async (payload: { url: string }, { ctx }) => { + logger.log("Lets get a page's links with Lightpanda!", { payload, ctx }) + if (!payload.url) { + logger.warn('Please define the payload url') + throw new Error('payload.url is undefined') + } + + if (typeof process.env.LIGHTPANDA_TOKEN === 'undefined') { + logger.warn('Please define the env variable $LIGHTPANDA_TOKEN', { + env: process.env, + }) + throw new Error('$LIGHTPANDA_TOKEN is undefined') + } + + // Connect to Lightpanda's cloud + const browser = await puppeteer.connect({ + browserWSEndpoint: `wss://cloud.lightpanda.io/ws?browser=lightpanda&token=${process.env.LIGHTPANDA_TOKEN}`, + }) + const context = await browser.createBrowserContext() + const page = await context.newPage() + + // Dump all the links from the page. + await page.goto(payload.url) + + const links = await page.evaluate(() => { + return Array.from(document.querySelectorAll('a')).map(row => { + return row.getAttribute('href') + }) + }) + + logger.info('Processing done') + logger.info('Shutting down…') + + await page.close() + await context.close() + await browser.disconnect() + + logger.info('✅ Completed') + + return { + links, + } + }, +}) +``` +### Proxies + +Proxies can be used with your browser via the proxy query string parameter. By default, the proxy used is "datacenter" which is a pool of shared datacenter IPs. +`datacenter` accepts an optional `country` query string parameter, an [ISO 3166-1 alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country code. + +_Example using a German IP :_ + +```wss://cloud.lightpanda.io/ws?proxy=datacenter&country=de&token=TOKEN``` + + +### Session +A session is alive until you close it or the connection is closed. The max time duration of a session is 15 min. + + +## Example \#2 - Get a webpage using Lightpanda + +Using the Lightpanda binary we will dump the HTML for a provided URL. +You will have to pass the URL as a payload when triggering the task. + + +### Prerequisites +- Setup the [Lightpanda build extension](/config/extensions/lightpanda) + +### Task +```ts trigger/lightpanda-lightpanda-fetch.ts +import { logger, task } from '@trigger.dev/sdk/v3' +import { execSync } from 'node:child_process' + +export const lightpandaFetch = task({ + id: 'lightpanda-fetch', + machine: { + preset: "micro", + }, + run: async (payload: { url: string }, { ctx }) => { + logger.log("Lets get a page's content with Lightpanda!", { payload, ctx }) + + if (!payload.url) { + logger.warn('Please define the payload url') + throw new Error('payload.url is undefined') + } + + if (typeof process.env.LIGHTPANDA_BROWSER_PATH === 'undefined') { + logger.warn('Please define the env variable $LIGHTPANDA_BROWSER_PATH', { + env: process.env, + }) + throw new Error('$LIGHTPANDA_BROWSER_PATH is undefined') + } + + const e = execSync(`${process.env.LIGHTPANDA_BROWSER_PATH} fetch --dump ${payload.url}`) + + logger.info('✅ Completed') + + return { + message: e.toString(), + } + }, +}) +``` + +## Example \#3 - Launch and use a Lightpanda CDP server + +This task initialises a Lightpanda CDP server to allow you to scrape directly via Trigger.dev. + +### Prerequisites +- Setup the [Lightpanda build extension](/config/extensions/lightpanda) + +### Task +Your task will have to launch a child process in order to have the websocket available to scrape using Puppeteer. + +```ts trigger/lightpandaCDP.ts +import { logger, task } from '@trigger.dev/sdk/v3' +import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process' +import puppeteer from 'puppeteer' + +const spawnLightpanda = async (log: typeof logger) => + new Promise((resolve, reject) => { + const child = spawn(process.env.LIGHTPANDA_BROWSER_PATH as string, [ + 'serve', + '--host', + '127.0.0.1', + '--port', + '9222', + '--log_level', + 'info', + ]) + + child.on('spawn', async () => { + log.info("Running Lightpanda's CDP server…", { + pid: child.pid, + }) + + await new Promise(resolve => setTimeout(resolve, 250)) + resolve(child) + }) + child.on('error', e => reject(e)) + }) + +export const lightpandaCDP = task({ + id: 'lightpanda-cdp', + machine: { + preset: 'micro', + }, + run: async (payload: { url: string }, { ctx }) => { + logger.log("Lets get a page's links with Lightpanda!", { payload, ctx }) + + if (!payload.url) { + logger.warn('Please define the payload url') + throw new Error('payload.url is undefined') + } + + if (typeof process.env.LIGHTPANDA_BROWSER_PATH === 'undefined') { + logger.warn('Please define the env variable $LIGHTPANDA_BROWSER_PATH', { + env: process.env, + }) + throw new Error('$LIGHTPANDA_BROWSER_PATH is undefined') + } + + try { + // Launch Lightpanda's CDP server + const lpProcess = await spawnLightpanda(logger) + + const browser = await puppeteer.connect({ + browserWSEndpoint: 'ws://127.0.0.1:9222', + }) + const context = await browser.createBrowserContext() + const page = await context.newPage() + + // Dump all the links from the page. + await page.goto(payload.url) + + const links = await page.evaluate(() => { + return Array.from(document.querySelectorAll('a')).map(row => { + return row.getAttribute('href') + }) + }) + + logger.info('Processing done') + logger.info('Shutting down…') + + // Close Puppeteer instance + await browser.close() + + // Stop Lightpanda's CDP Server + lpProcess.stdout.destroy() + lpProcess.stderr.destroy() + lpProcess.kill() + + logger.info('✅ Completed') + + return { + links, + } + } catch (e: any) { + throw new Error(e) + } + }, +}) +``` diff --git a/docs/guides/introduction.mdx b/docs/guides/introduction.mdx index 3fba744586..a9b1061ce1 100644 --- a/docs/guides/introduction.mdx +++ b/docs/guides/introduction.mdx @@ -69,6 +69,7 @@ Task code you can copy and paste to use in your project. They can all be extende | [FFmpeg video processing](/guides/examples/ffmpeg-video-processing) | Use FFmpeg to process a video in various ways and save it to Cloudflare R2. | | [Firecrawl URL crawl](/guides/examples/firecrawl-url-crawl) | Learn how to use Firecrawl to crawl a URL and return LLM-ready markdown. | | [LibreOffice PDF conversion](/guides/examples/libreoffice-pdf-conversion) | Convert a document to PDF using LibreOffice. | +| [Lightpanda](/guides/examples/lightpanda) | Use Lightpanda browser (or cloud version) to get a webpage's content. | | [OpenAI with retrying](/guides/examples/open-ai-with-retrying) | Create a reusable OpenAI task with custom retry options. | | [PDF to image](/guides/examples/pdf-to-image) | Use `MuPDF` to turn a PDF into images and save them to Cloudflare R2. | | [Puppeteer](/guides/examples/puppeteer) | Use Puppeteer to generate a PDF or scrape a webpage. | diff --git a/docs/images/intro-lightpanda.jpg b/docs/images/intro-lightpanda.jpg new file mode 100644 index 0000000000..8fc2102bb0 Binary files /dev/null and b/docs/images/intro-lightpanda.jpg differ diff --git a/docs/introduction.mdx b/docs/introduction.mdx index c8ed0f3480..77aa189a97 100644 --- a/docs/introduction.mdx +++ b/docs/introduction.mdx @@ -83,6 +83,7 @@ We provide everything you need to build and manage background tasks: a CLI and S + ## Explore by build extension @@ -92,6 +93,7 @@ We provide everything you need to build and manage background tasks: a CLI and S | prismaExtension | Use Prisma with Trigger.dev | [Learn more](/config/extensions/prismaExtension) | | pythonExtension | Execute Python scripts in Trigger.dev | [Learn more](/config/extensions/pythonExtension) | | puppeteer | Use Puppeteer with Trigger.dev | [Learn more](/config/extensions/puppeteer) | +| lightpanda | Use Lightpanda Browser with Trigger.dev | [Learn more](/config/extensions/lightpanda) | | ffmpeg | Use FFmpeg with Trigger.dev | [Learn more](/config/extensions/ffmpeg) | | aptGet | Install system packages with aptGet | [Learn more](/config/extensions/aptGet) | | additionalFiles | Copy additional files to the build directory | [Learn more](/config/extensions/additionalFiles) | diff --git a/packages/build/package.json b/packages/build/package.json index 6df2b1a0f9..71a055ea32 100644 --- a/packages/build/package.json +++ b/packages/build/package.json @@ -30,7 +30,8 @@ "./extensions/audioWaveform": "./src/extensions/audioWaveform.ts", "./extensions/typescript": "./src/extensions/typescript.ts", "./extensions/puppeteer": "./src/extensions/puppeteer.ts", - "./extensions/playwright": "./src/extensions/playwright.ts" + "./extensions/playwright": "./src/extensions/playwright.ts", + "./extensions/lightpanda": "./src/extensions/lightpanda.ts" }, "sourceDialects": [ "@triggerdotdev/source" @@ -61,6 +62,9 @@ ], "extensions/playwright": [ "dist/commonjs/extensions/playwright.d.ts" + ], + "extensions/lightpanda": [ + "dist/commonjs/extensions/lightpanda.d.ts" ] } }, @@ -188,6 +192,17 @@ "types": "./dist/commonjs/extensions/playwright.d.ts", "default": "./dist/commonjs/extensions/playwright.js" } + }, + "./extensions/lightpanda": { + "import": { + "@triggerdotdev/source": "./src/extensions/lightpanda.ts", + "types": "./dist/esm/extensions/lightpanda.d.ts", + "default": "./dist/esm/extensions/lightpanda.js" + }, + "require": { + "types": "./dist/commonjs/extensions/lightpanda.d.ts", + "default": "./dist/commonjs/extensions/lightpanda.js" + } } }, "main": "./dist/commonjs/index.js", diff --git a/packages/build/src/extensions/lightpanda.ts b/packages/build/src/extensions/lightpanda.ts new file mode 100644 index 0000000000..248aa792d1 --- /dev/null +++ b/packages/build/src/extensions/lightpanda.ts @@ -0,0 +1,57 @@ +import type { BuildExtension } from "@trigger.dev/core/v3/build" + +const NAME = 'LightpandaExtension' + +type LightpandaOpts = { + arch?: 'aarch64' | 'x86_64' + version?: 'nightly' + disableTelemetry?: boolean +} + +export const lightpanda = ({ arch = 'x86_64', version = 'nightly', disableTelemetry = false }: LightpandaOpts = {}): BuildExtension => ({ + name: NAME, + onBuildComplete: async (context) => { + context.logger.progress(`Running ${NAME} on ${context.target} env for arch ${arch}`) + context.logger.progress(`version: ${version}`) + + if (context.target === "dev") { + return + } + + const instructions: string[] = [] + + if (disableTelemetry) { + instructions.push('RUN export LIGHTPANDA_DISABLE_TELEMETRY=true') + } + + /* Update / install required packages */ + instructions.push( + `RUN apt-get update && apt-get install --no-install-recommends -y \ + curl \ + ca-certificates \ + && update-ca-certificates \ + && apt-get clean && rm -rf /var/lib/apt/lists/*`, + ) + + /* Install Lightpanda */ + instructions.push( + `RUN curl -L -f --retry 3 -o lightpanda https://github.com/lightpanda-io/browser/releases/download/${version}/lightpanda-${arch}-linux || (echo "Failed to download Lightpanda binary" && exit 1) \ + && chmod a+x ./lightpanda \ + && mv ./lightpanda /usr/bin/lightpanda \ + && /usr/bin/lightpanda version || (echo "Downloaded binary is not functional" && exit 1)`, + ) + + context.addLayer({ + id: "lightpanda", + image: { + instructions, + }, + deploy: { + env: { + LIGHTPANDA_BROWSER_PATH: "/usr/bin/lightpanda", + }, + override: true, + }, + }) + }, +}) diff --git a/references/v3-catalog/src/trigger/lightpandaTask.ts b/references/v3-catalog/src/trigger/lightpandaTask.ts new file mode 100644 index 0000000000..b0cb3699e5 --- /dev/null +++ b/references/v3-catalog/src/trigger/lightpandaTask.ts @@ -0,0 +1,25 @@ +import { logger, task } from "@trigger.dev/sdk/v3"; +import { execSync } from "node:child_process"; + +export const lightpandaFetch = task({ + id: "lightpanda-fetch", + machine: { + preset: "micro", + }, + run: async (payload: { url: string }, { ctx }) => { + logger.log("Lets get a page's content with Lightpanda!", { payload, ctx }); + + if (!payload.url) { + logger.warn("Please define the payload url"); + throw new Error("payload.url is undefined"); + } + + const e = execSync( + `${process.env.LIGHTPANDA_BROWSER_PATH} fetch --dump ${payload.url}`, + ); + + return { + message: e.toString(), + }; + }, +}); \ No newline at end of file diff --git a/references/v3-catalog/trigger.config.ts b/references/v3-catalog/trigger.config.ts index 62698d14fb..055c75d6a4 100644 --- a/references/v3-catalog/trigger.config.ts +++ b/references/v3-catalog/trigger.config.ts @@ -6,6 +6,7 @@ import { audioWaveform } from "@trigger.dev/build/extensions/audioWaveform"; import { additionalFiles, ffmpeg, syncEnvVars } from "@trigger.dev/build/extensions/core"; import { puppeteer } from "@trigger.dev/build/extensions/puppeteer"; import { playwright } from "@trigger.dev/build/extensions/playwright"; +import { lightpanda } from "@trigger.dev/build/extensions/lightpanda"; import { prismaExtension } from "@trigger.dev/build/extensions/prisma"; import { emitDecoratorMetadata } from "@trigger.dev/build/extensions/typescript"; import { defineConfig } from "@trigger.dev/sdk/v3"; @@ -83,6 +84,7 @@ export default defineConfig({ }), puppeteer(), playwright(), + lightpanda(), ], external: ["re2"], },