From 694bd1b491e480621cd6e0e59559457fc4f7d912 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 5 Jun 2026 17:07:48 +0200 Subject: [PATCH 1/3] docs: add Browser Use guide Add a guide and runnable example Actor for the Browser Use LLM browser-automation library, following the existing scraping-library guides. --- docs/01_introduction/quick-start.mdx | 1 + docs/03_guides/10_browser_use.mdx | 119 ++++++++++++++++++ .../code/browser_use_project/Dockerfile | 21 ++++ .../browser_use_project/my_actor/__init__.py | 0 .../browser_use_project/my_actor/__main__.py | 8 ++ .../browser_use_project/my_actor/agent.py | 69 ++++++++++ .../code/browser_use_project/my_actor/main.py | 53 ++++++++ pyproject.toml | 4 + 8 files changed, 275 insertions(+) create mode 100644 docs/03_guides/10_browser_use.mdx create mode 100644 docs/03_guides/code/browser_use_project/Dockerfile create mode 100644 docs/03_guides/code/browser_use_project/my_actor/__init__.py create mode 100644 docs/03_guides/code/browser_use_project/my_actor/__main__.py create mode 100644 docs/03_guides/code/browser_use_project/my_actor/agent.py create mode 100644 docs/03_guides/code/browser_use_project/my_actor/main.py diff --git a/docs/01_introduction/quick-start.mdx b/docs/01_introduction/quick-start.mdx index da166da9..9eed691f 100644 --- a/docs/01_introduction/quick-start.mdx +++ b/docs/01_introduction/quick-start.mdx @@ -105,4 +105,5 @@ To see how you can integrate the Apify SDK with popular web scraping libraries, - [Selenium](../guides/selenium) - [Crawlee](../guides/crawlee) - [Scrapy](../guides/scrapy) +- [Browser Use](../guides/browser-use) - [Running webserver](../guides/running-webserver) diff --git a/docs/03_guides/10_browser_use.mdx b/docs/03_guides/10_browser_use.mdx new file mode 100644 index 00000000..6c5ff5b5 --- /dev/null +++ b/docs/03_guides/10_browser_use.mdx @@ -0,0 +1,119 @@ +--- +id: browser-use +title: Use Browser Use +description: Build an Apify Actor that automates a browser with an LLM agent using the Browser Use library. +--- + +import CodeBlock from '@theme/CodeBlock'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +import BrowserUseMain from '!!raw-loader!./code/browser_use_project/my_actor/main.py'; +import BrowserUseAgent from '!!raw-loader!./code/browser_use_project/my_actor/agent.py'; +import BrowserUseEntrypoint from '!!raw-loader!./code/browser_use_project/my_actor/__main__.py'; +import BrowserUseDockerfile from '!!raw-loader!./code/browser_use_project/Dockerfile'; + +In this guide, you'll learn how to use the [Browser Use](https://browser-use.com/) library in your Apify Actors. + +## Introduction + +[Browser Use](https://browser-use.com/) is a Python library that lets an LLM control a real web browser. Instead of writing selectors and navigation steps by hand, you give an agent a natural-language task - such as "find the top post on Hacker News and return its title and URL" - and the agent decides which pages to open, what to click, and what to read until the task is done. + +Some of the features that make Browser Use a good fit for Apify Actors: + +- **Natural-language tasks** - Describe what you want in plain English; the agent figures out the steps. This is well suited to pages whose structure changes often or is hard to target with fixed selectors. +- **Model-agnostic** - Browser Use ships wrappers for many providers (`ChatOpenAI`, `ChatAnthropic`, `ChatGoogle`, and more), so you can pick the model that fits your task and budget. +- **Structured output** - Pass a [Pydantic](https://docs.pydantic.dev/) model as the output schema and the agent returns a validated object instead of free-form text, which maps cleanly onto an Apify dataset. +- **Real browser via CDP** - The agent drives a real Chromium over the Chrome DevTools Protocol, so JavaScript-heavy pages render just like they would for a human. +- **First-class async support** - The agent's `run` method is asynchronous, which integrates naturally with the asyncio-based Apify SDK. + +Browser Use needs only the `browser-use` package - install it with: + +```bash +pip install browser-use +``` + +## Configuring the LLM + +Browser Use needs an LLM to drive the agent. You choose a provider wrapper, give it a model name, and supply the provider's API key: + +- **`ChatOpenAI`** - OpenAI models such as `gpt-4.1-mini` or `gpt-5-mini`. Reads the key from `OPENAI_API_KEY`, or accepts it via the `api_key` argument. +- **`ChatAnthropic`** - Anthropic Claude models such as `claude-sonnet-4-5` or `claude-haiku-4-5`. Reads the key from `ANTHROPIC_API_KEY`. +- **`ChatGoogle`** - Google Gemini models such as `gemini-2.5-flash`. Reads the key from `GOOGLE_API_KEY`. + +The example Actor in this guide uses `ChatOpenAI`, but switching providers is a one-line change in `my_actor/agent.py`. More capable models generally complete tasks in fewer steps and more reliably, while smaller models are cheaper per step. + +Keep the API key out of the Actor input and source code. The example reads it from an environment variable, which on the Apify platform you set as a [secret environment variable](https://docs.apify.com/platform/actors/development/programming-interface/environment-variables) (for example `OPENAI_API_KEY`), and locally you export in your shell. + +## Example Actor + +The following Actor runs a Browser Use agent for a single task and stores its structured result in the default dataset. By default it opens [Hacker News](https://news.ycombinator.com) and returns the title and URL of the top five posts, but the task, model, and step limit are all configurable through the Actor input. + +The code is split into three small modules, following the structure of the Apify Python Actor templates: + +- `my_actor/main.py` - The Actor's main coroutine. It handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy), runs the agent, and stores the result. +- `my_actor/agent.py` - The Browser Use-specific logic. It defines the output schema and a single `run_agent_task` function that builds the LLM, browser, and agent, then returns the agent's structured output. +- `my_actor/__main__.py` - The entry point that runs the `main` coroutine with `asyncio`. + + + + + {BrowserUseMain} + + + + + {BrowserUseAgent} + + + + + {BrowserUseEntrypoint} + + + + +A few things worth pointing out: + +- Keeping the agent setup in `run_agent_task` separates the Browser Use-specific code from the Actor's orchestration logic. `my_actor/main.py` only decides what to read from the input and what to store. +- Passing `output_model_schema=Posts` makes the agent return a validated `Posts` instance via `history.structured_output`, so `my_actor/main.py` can push each item straight to the dataset. Adapt the task and the `Post`/`Posts` models together to fit your own use case. +- `enable_signal_handler=False` leaves signal handling to the Actor, which manages the run's lifecycle. Without it, Browser Use would install its own handlers and interfere with a clean shutdown. +- `headless=Actor.configuration.headless` runs the browser without a visible window, which is what you want on the platform. + +## Using Apify Proxy + +Running on the Apify platform gives your agent access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `my_actor/main.py` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `run_agent_task`. + +Browser Use expects the proxy as a `ProxySettings` object with separate `server`, `username`, and `password` fields, whereas `ProxyConfiguration.new_url` returns a single URL string (for example `http://user:pass@proxy.apify.com:8000`). The `_proxy_settings` helper in `my_actor/agent.py` splits that URL into the fields Browser Use expects. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide. + +## Running on the Apify platform + +Browser Use drives a real Chromium over CDP, so the Actor needs a browser binary available at runtime. The simplest way to provide one is to build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies. Browser Use discovers that browser automatically, so no extra install step is needed in the image. + + + + + {BrowserUseDockerfile} + + + + +When running the Actor locally, install the browser once with the `browser-use install` command, which downloads a Chromium build together with its dependencies: + +```bash +browser-use install +``` + +Remember to provide the LLM API key in both environments - as a secret environment variable on the platform, and exported in your shell when running locally. + +## Conclusion + +In this guide, you learned how to use Browser Use in your Apify Actors. You can now drive a real browser with an LLM agent, return its results as a validated Pydantic model, route the browser through Apify Proxy, and run the whole thing on the Apify platform. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own automation tasks. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy automating! + +## Additional resources + +- [Browser Use: Official documentation](https://docs.browser-use.com/) +- [Browser Use: Supported models](https://docs.browser-use.com/customize/supported-models) +- [Browser Use: Structured output](https://docs.browser-use.com/customize/agent/output-format) +- [Browser Use: GitHub repository](https://github.com/browser-use/browser-use) +- [Apify: Proxy management](https://docs.apify.com/platform/proxy) diff --git a/docs/03_guides/code/browser_use_project/Dockerfile b/docs/03_guides/code/browser_use_project/Dockerfile new file mode 100644 index 00000000..c35bbfc9 --- /dev/null +++ b/docs/03_guides/code/browser_use_project/Dockerfile @@ -0,0 +1,21 @@ +# Use the Apify Playwright base image, which already ships a Chromium browser together +# with all of its system-level dependencies. Browser Use launches that browser via CDP, +# so no extra browser install step is needed. +FROM apify/actor-python-playwright:3.14-1.60.0 + +USER myuser + +# Copy just requirements.txt first to leverage the Docker build cache. +COPY --chown=myuser:myuser requirements.txt ./ +RUN pip install -r requirements.txt + +# Copy the rest of the source code and verify that it compiles. +COPY --chown=myuser:myuser . ./ +RUN python -m compileall -q my_actor/ + +# Disable Browser Use telemetry and cloud sync inside the Actor. +ENV ANONYMIZED_TELEMETRY=false +ENV BROWSER_USE_CLOUD_SYNC=false + +# Specify how to launch the Actor. +CMD ["python", "-m", "my_actor"] diff --git a/docs/03_guides/code/browser_use_project/my_actor/__init__.py b/docs/03_guides/code/browser_use_project/my_actor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docs/03_guides/code/browser_use_project/my_actor/__main__.py b/docs/03_guides/code/browser_use_project/my_actor/__main__.py new file mode 100644 index 00000000..6aeaf3d5 --- /dev/null +++ b/docs/03_guides/code/browser_use_project/my_actor/__main__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import asyncio + +from .main import main + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/docs/03_guides/code/browser_use_project/my_actor/agent.py b/docs/03_guides/code/browser_use_project/my_actor/agent.py new file mode 100644 index 00000000..708265fb --- /dev/null +++ b/docs/03_guides/code/browser_use_project/my_actor/agent.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from urllib.parse import urlsplit + +from browser_use import Agent, Browser, ChatOpenAI +from browser_use.browser import ProxySettings +from pydantic import BaseModel + + +class Post(BaseModel): + """A single item the agent is asked to extract.""" + + title: str + url: str + + +class Posts(BaseModel): + """The structured result returned by the agent.""" + + posts: list[Post] + + +async def run_agent_task( + task: str, + *, + model: str, + llm_api_key: str, + max_steps: int, + headless: bool = True, + proxy_url: str | None = None, +) -> Posts | None: + """Run a Browser Use agent for a single task and return its structured output. + + The agent is driven by an OpenAI model and a real Chromium browser. Passing + `output_model_schema` makes the agent return a validated `Posts` instance instead + of free-form text, and `enable_signal_handler=False` leaves signal handling to the + Actor. + """ + # Configure the LLM that drives the agent. Swap `ChatOpenAI` for `ChatAnthropic`, + # `ChatGoogle`, or another provider to use a different model. + llm = ChatOpenAI(model=model, api_key=llm_api_key) + + # Configure the browser. When a proxy URL is provided, route the browser through it. + browser = Browser( + headless=headless, + proxy=_proxy_settings(proxy_url) if proxy_url else None, + ) + + # Create the agent and run it for at most `max_steps` steps. + agent = Agent( + task=task, + llm=llm, + browser=browser, + output_model_schema=Posts, + enable_signal_handler=False, + ) + + history = await agent.run(max_steps=max_steps) + return history.structured_output + + +def _proxy_settings(proxy_url: str) -> ProxySettings: + """Convert an Apify Proxy URL into Browser Use `ProxySettings`.""" + parts = urlsplit(proxy_url) + return ProxySettings( + server=f'{parts.scheme}://{parts.hostname}:{parts.port}', + username=parts.username, + password=parts.password, + ) diff --git a/docs/03_guides/code/browser_use_project/my_actor/main.py b/docs/03_guides/code/browser_use_project/my_actor/main.py new file mode 100644 index 00000000..d045759e --- /dev/null +++ b/docs/03_guides/code/browser_use_project/my_actor/main.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import os + +from apify import Actor + +from .agent import run_agent_task + +# The default task is aligned with the `Posts` output schema defined in `agent.py`. +DEFAULT_TASK = ( + 'Open https://news.ycombinator.com and return the title and URL ' + 'of the top 5 posts on the front page.' +) + + +async def main() -> None: + # Enter the context of the Actor. + async with Actor: + # Retrieve the Actor input, and use default values if not provided. + actor_input = await Actor.get_input() or {} + task = actor_input.get('task', DEFAULT_TASK) + model = actor_input.get('model', 'gpt-4.1-mini') + max_steps = actor_input.get('max_steps', 25) + + # Read the LLM API key from the environment so it is never stored in the Actor + # input. On the Apify platform, set it as a secret environment variable. + llm_api_key = os.environ.get('OPENAI_API_KEY') + if not llm_api_key: + raise RuntimeError('The OPENAI_API_KEY environment variable is not set.') + + # Create a proxy configuration that routes the browser through Apify Proxy. + proxy_configuration = await Actor.create_proxy_configuration() + proxy_url = await proxy_configuration.new_url() if proxy_configuration else None + + Actor.log.info(f'Running the agent (model={model}) for task: {task}') + + # Run the Browser Use agent and collect its structured output. + result = await run_agent_task( + task, + model=model, + llm_api_key=llm_api_key, + max_steps=max_steps, + headless=Actor.configuration.headless, + proxy_url=proxy_url, + ) + + if result is None: + Actor.log.warning('The agent did not return any structured output.') + return + + # Store every extracted item as a separate row in the default dataset. + for post in result.posts: + await Actor.push_data(post.model_dump()) diff --git a/pyproject.toml b/pyproject.toml index d17bdc01..a84ef15e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -181,6 +181,10 @@ indent-style = "space" # Local imports in Scrapy project. "TID252", # Prefer absolute imports over relative imports from parent modules ] +"**/docs/**/browser_use_project/**" = [ + # Local imports are mixed up with the Apify SDK. + "I001", # Import block is un-sorted or un-formatted +] [tool.ruff.lint.flake8-quotes] docstring-quotes = "double" From c6c4a859eee31ecdff7bef85a98a3d03af4b5b10 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 5 Jun 2026 20:45:29 +0200 Subject: [PATCH 2/3] docs: renumber Browser Use guide to 09 and switch to a single-file example --- ...{10_browser_use.mdx => 09_browser_use.mdx} | 59 +++------ docs/03_guides/code/09_browser_use.py | 113 ++++++++++++++++++ .../code/browser_use_project/Dockerfile | 21 ---- .../browser_use_project/my_actor/__init__.py | 0 .../browser_use_project/my_actor/__main__.py | 8 -- .../browser_use_project/my_actor/agent.py | 69 ----------- .../code/browser_use_project/my_actor/main.py | 53 -------- 7 files changed, 128 insertions(+), 195 deletions(-) rename docs/03_guides/{10_browser_use.mdx => 09_browser_use.mdx} (67%) create mode 100644 docs/03_guides/code/09_browser_use.py delete mode 100644 docs/03_guides/code/browser_use_project/Dockerfile delete mode 100644 docs/03_guides/code/browser_use_project/my_actor/__init__.py delete mode 100644 docs/03_guides/code/browser_use_project/my_actor/__main__.py delete mode 100644 docs/03_guides/code/browser_use_project/my_actor/agent.py delete mode 100644 docs/03_guides/code/browser_use_project/my_actor/main.py diff --git a/docs/03_guides/10_browser_use.mdx b/docs/03_guides/09_browser_use.mdx similarity index 67% rename from docs/03_guides/10_browser_use.mdx rename to docs/03_guides/09_browser_use.mdx index 6c5ff5b5..77529963 100644 --- a/docs/03_guides/10_browser_use.mdx +++ b/docs/03_guides/09_browser_use.mdx @@ -1,19 +1,14 @@ --- id: browser-use -title: Use Browser Use +title: Browser AI agents with Browser Use description: Build an Apify Actor that automates a browser with an LLM agent using the Browser Use library. --- -import CodeBlock from '@theme/CodeBlock'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; -import BrowserUseMain from '!!raw-loader!./code/browser_use_project/my_actor/main.py'; -import BrowserUseAgent from '!!raw-loader!./code/browser_use_project/my_actor/agent.py'; -import BrowserUseEntrypoint from '!!raw-loader!./code/browser_use_project/my_actor/__main__.py'; -import BrowserUseDockerfile from '!!raw-loader!./code/browser_use_project/Dockerfile'; +import BrowserUseExample from '!!raw-loader!roa-loader!./code/09_browser_use.py'; -In this guide, you'll learn how to use the [Browser Use](https://browser-use.com/) library in your Apify Actors. +In this guide, you'll learn how to use the [Browser Use](https://browser-use.com/) library to drive a browser with an LLM agent in your Apify Actors. ## Introduction @@ -41,7 +36,7 @@ Browser Use needs an LLM to drive the agent. You choose a provider wrapper, give - **`ChatAnthropic`** - Anthropic Claude models such as `claude-sonnet-4-5` or `claude-haiku-4-5`. Reads the key from `ANTHROPIC_API_KEY`. - **`ChatGoogle`** - Google Gemini models such as `gemini-2.5-flash`. Reads the key from `GOOGLE_API_KEY`. -The example Actor in this guide uses `ChatOpenAI`, but switching providers is a one-line change in `my_actor/agent.py`. More capable models generally complete tasks in fewer steps and more reliably, while smaller models are cheaper per step. +The example Actor in this guide uses `ChatOpenAI`, but switching providers is a one-line change in `run_agent_task`. More capable models generally complete tasks in fewer steps and more reliably, while smaller models are cheaper per step. Keep the API key out of the Actor input and source code. The example reads it from an environment variable, which on the Apify platform you set as a [secret environment variable](https://docs.apify.com/platform/actors/development/programming-interface/environment-variables) (for example `OPENAI_API_KEY`), and locally you export in your shell. @@ -49,54 +44,30 @@ Keep the API key out of the Actor input and source code. The example reads it fr The following Actor runs a Browser Use agent for a single task and stores its structured result in the default dataset. By default it opens [Hacker News](https://news.ycombinator.com) and returns the title and URL of the top five posts, but the task, model, and step limit are all configurable through the Actor input. -The code is split into three small modules, following the structure of the Apify Python Actor templates: - -- `my_actor/main.py` - The Actor's main coroutine. It handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy), runs the agent, and stores the result. -- `my_actor/agent.py` - The Browser Use-specific logic. It defines the output schema and a single `run_agent_task` function that builds the LLM, browser, and agent, then returns the agent's structured output. -- `my_actor/__main__.py` - The entry point that runs the `main` coroutine with `asyncio`. - - - - - {BrowserUseMain} - - - - - {BrowserUseAgent} - - - - - {BrowserUseEntrypoint} - - - +The whole Actor fits in a single file. A `run_agent_task` helper holds the Browser Use-specific logic - it defines the output schema and builds the LLM, browser, and agent - while the `main` coroutine handles the [Actor](https://docs.apify.com/platform/actors) lifecycle, reads the input, sets up [Apify Proxy](https://docs.apify.com/platform/proxy), runs the agent, and stores the result: + + + {BrowserUseExample} + A few things worth pointing out: -- Keeping the agent setup in `run_agent_task` separates the Browser Use-specific code from the Actor's orchestration logic. `my_actor/main.py` only decides what to read from the input and what to store. -- Passing `output_model_schema=Posts` makes the agent return a validated `Posts` instance via `history.structured_output`, so `my_actor/main.py` can push each item straight to the dataset. Adapt the task and the `Post`/`Posts` models together to fit your own use case. +- Keeping the agent setup in `run_agent_task` separates the Browser Use-specific code from the Actor's orchestration logic. `main` only decides what to read from the input and what to store. +- Passing `output_model_schema=Posts` makes the agent return a validated `Posts` instance via `history.structured_output`, so `main` can push each item straight to the dataset. Adapt the task and the `Post`/`Posts` models together to fit your own use case. - `enable_signal_handler=False` leaves signal handling to the Actor, which manages the run's lifecycle. Without it, Browser Use would install its own handlers and interfere with a clean shutdown. - `headless=Actor.configuration.headless` runs the browser without a visible window, which is what you want on the platform. ## Using Apify Proxy -Running on the Apify platform gives your agent access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `my_actor/main.py` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `run_agent_task`. +Running on the Apify platform gives your agent access to [Apify Proxy](https://docs.apify.com/platform/proxy), which rotates IP addresses to avoid rate limiting and blocking. In the example above, `main` creates a proxy configuration with `Actor.create_proxy_configuration` and passes a fresh proxy URL to `run_agent_task`. -Browser Use expects the proxy as a `ProxySettings` object with separate `server`, `username`, and `password` fields, whereas `ProxyConfiguration.new_url` returns a single URL string (for example `http://user:pass@proxy.apify.com:8000`). The `_proxy_settings` helper in `my_actor/agent.py` splits that URL into the fields Browser Use expects. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide. +Browser Use expects the proxy as a `ProxySettings` object with separate `server`, `username`, and `password` fields, whereas `ProxyConfiguration.new_url` returns a single URL string (for example `http://user:pass@proxy.apify.com:8000`). The `_proxy_settings` helper splits that URL into the fields Browser Use expects. To select specific proxy groups or a country, pass the relevant arguments to `Actor.create_proxy_configuration`. For more details, see the [Proxy management](../concepts/proxy-management) guide. ## Running on the Apify platform Browser Use drives a real Chromium over CDP, so the Actor needs a browser binary available at runtime. The simplest way to provide one is to build on top of the [Apify Playwright base image](https://hub.docker.com/r/apify/actor-python-playwright), which already ships a browser together with all of its system-level dependencies. Browser Use discovers that browser automatically, so no extra install step is needed in the image. - - - - {BrowserUseDockerfile} - - - +Disable Browser Use's telemetry and cloud sync inside the Actor by setting the `ANONYMIZED_TELEMETRY=false` and `BROWSER_USE_CLOUD_SYNC=false` environment variables in your Dockerfile. When running the Actor locally, install the browser once with the `browser-use install` command, which downloads a Chromium build together with its dependencies: diff --git a/docs/03_guides/code/09_browser_use.py b/docs/03_guides/code/09_browser_use.py new file mode 100644 index 00000000..cd16773f --- /dev/null +++ b/docs/03_guides/code/09_browser_use.py @@ -0,0 +1,113 @@ +import asyncio +import os +from urllib.parse import urlsplit + +from browser_use import Agent, Browser, ChatOpenAI +from browser_use.browser import ProxySettings +from pydantic import BaseModel + +from apify import Actor + +# Default task, aligned with the `Posts` schema below. +DEFAULT_TASK = ( + 'Open https://news.ycombinator.com and return the title and URL ' + 'of the top 5 posts on the front page.' +) + + +class Post(BaseModel): + """A single item the agent is asked to extract.""" + + title: str + url: str + + +class Posts(BaseModel): + """The structured result returned by the agent.""" + + posts: list[Post] + + +def to_browser_use_proxy(proxy_url: str) -> ProxySettings: + """Convert an Apify Proxy URL into Browser Use `ProxySettings`.""" + parts = urlsplit(proxy_url) + return ProxySettings( + server=f'{parts.scheme}://{parts.hostname}:{parts.port}', + username=parts.username, + password=parts.password, + ) + + +async def run_agent_task( + task: str, + *, + model: str, + llm_api_key: str, + max_steps: int, + headless: bool = True, + proxy_url: str | None = None, +) -> Posts | None: + """Run a Browser Use agent for one task and return its structured output.""" + # Configure the LLM. Swap `ChatOpenAI` for another provider if needed. + llm = ChatOpenAI(model=model, api_key=llm_api_key) + + # Configure the browser, optionally routed through a proxy. + browser = Browser( + headless=headless, + proxy=to_browser_use_proxy(proxy_url) if proxy_url else None, + ) + + # `output_model_schema` returns a validated `Posts`; signals stay with the Actor. + agent = Agent( + task=task, + llm=llm, + browser=browser, + output_model_schema=Posts, + enable_signal_handler=False, + ) + + history = await agent.run(max_steps=max_steps) + return history.structured_output + + +async def main() -> None: + async with Actor: + # Read the Actor input. + actor_input = await Actor.get_input() or {} + task = actor_input.get('task', DEFAULT_TASK) + model = actor_input.get('model', 'gpt-4.1-mini') + max_steps = actor_input.get('maxSteps', 25) + + # Read the LLM API key from the environment (set it as a secret on Apify). + llm_api_key = os.environ.get('OPENAI_API_KEY') + if not llm_api_key: + raise RuntimeError('The OPENAI_API_KEY environment variable is not set.') + + # Route the browser through Apify Proxy. + proxy_configuration = await Actor.create_proxy_configuration() + proxy_url = await proxy_configuration.new_url() if proxy_configuration else None + + Actor.log.info(f'Running the agent (model={model}) for task: {task}') + + result = await run_agent_task( + task, + model=model, + llm_api_key=llm_api_key, + max_steps=max_steps, + headless=Actor.configuration.headless, + proxy_url=proxy_url, + ) + + if result is None: + Actor.log.warning('The agent did not return any structured output.') + return + + # Store each extracted item as a dataset row. + Actor.log.info(f'The agent returned {len(result.posts)} post(s); storing them.') + for post in result.posts: + Actor.log.info(f'Storing post: {post.title!r} ({post.url})') + await Actor.push_data(post.model_dump()) + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/docs/03_guides/code/browser_use_project/Dockerfile b/docs/03_guides/code/browser_use_project/Dockerfile deleted file mode 100644 index c35bbfc9..00000000 --- a/docs/03_guides/code/browser_use_project/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# Use the Apify Playwright base image, which already ships a Chromium browser together -# with all of its system-level dependencies. Browser Use launches that browser via CDP, -# so no extra browser install step is needed. -FROM apify/actor-python-playwright:3.14-1.60.0 - -USER myuser - -# Copy just requirements.txt first to leverage the Docker build cache. -COPY --chown=myuser:myuser requirements.txt ./ -RUN pip install -r requirements.txt - -# Copy the rest of the source code and verify that it compiles. -COPY --chown=myuser:myuser . ./ -RUN python -m compileall -q my_actor/ - -# Disable Browser Use telemetry and cloud sync inside the Actor. -ENV ANONYMIZED_TELEMETRY=false -ENV BROWSER_USE_CLOUD_SYNC=false - -# Specify how to launch the Actor. -CMD ["python", "-m", "my_actor"] diff --git a/docs/03_guides/code/browser_use_project/my_actor/__init__.py b/docs/03_guides/code/browser_use_project/my_actor/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/03_guides/code/browser_use_project/my_actor/__main__.py b/docs/03_guides/code/browser_use_project/my_actor/__main__.py deleted file mode 100644 index 6aeaf3d5..00000000 --- a/docs/03_guides/code/browser_use_project/my_actor/__main__.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import annotations - -import asyncio - -from .main import main - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/docs/03_guides/code/browser_use_project/my_actor/agent.py b/docs/03_guides/code/browser_use_project/my_actor/agent.py deleted file mode 100644 index 708265fb..00000000 --- a/docs/03_guides/code/browser_use_project/my_actor/agent.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import annotations - -from urllib.parse import urlsplit - -from browser_use import Agent, Browser, ChatOpenAI -from browser_use.browser import ProxySettings -from pydantic import BaseModel - - -class Post(BaseModel): - """A single item the agent is asked to extract.""" - - title: str - url: str - - -class Posts(BaseModel): - """The structured result returned by the agent.""" - - posts: list[Post] - - -async def run_agent_task( - task: str, - *, - model: str, - llm_api_key: str, - max_steps: int, - headless: bool = True, - proxy_url: str | None = None, -) -> Posts | None: - """Run a Browser Use agent for a single task and return its structured output. - - The agent is driven by an OpenAI model and a real Chromium browser. Passing - `output_model_schema` makes the agent return a validated `Posts` instance instead - of free-form text, and `enable_signal_handler=False` leaves signal handling to the - Actor. - """ - # Configure the LLM that drives the agent. Swap `ChatOpenAI` for `ChatAnthropic`, - # `ChatGoogle`, or another provider to use a different model. - llm = ChatOpenAI(model=model, api_key=llm_api_key) - - # Configure the browser. When a proxy URL is provided, route the browser through it. - browser = Browser( - headless=headless, - proxy=_proxy_settings(proxy_url) if proxy_url else None, - ) - - # Create the agent and run it for at most `max_steps` steps. - agent = Agent( - task=task, - llm=llm, - browser=browser, - output_model_schema=Posts, - enable_signal_handler=False, - ) - - history = await agent.run(max_steps=max_steps) - return history.structured_output - - -def _proxy_settings(proxy_url: str) -> ProxySettings: - """Convert an Apify Proxy URL into Browser Use `ProxySettings`.""" - parts = urlsplit(proxy_url) - return ProxySettings( - server=f'{parts.scheme}://{parts.hostname}:{parts.port}', - username=parts.username, - password=parts.password, - ) diff --git a/docs/03_guides/code/browser_use_project/my_actor/main.py b/docs/03_guides/code/browser_use_project/my_actor/main.py deleted file mode 100644 index d045759e..00000000 --- a/docs/03_guides/code/browser_use_project/my_actor/main.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import annotations - -import os - -from apify import Actor - -from .agent import run_agent_task - -# The default task is aligned with the `Posts` output schema defined in `agent.py`. -DEFAULT_TASK = ( - 'Open https://news.ycombinator.com and return the title and URL ' - 'of the top 5 posts on the front page.' -) - - -async def main() -> None: - # Enter the context of the Actor. - async with Actor: - # Retrieve the Actor input, and use default values if not provided. - actor_input = await Actor.get_input() or {} - task = actor_input.get('task', DEFAULT_TASK) - model = actor_input.get('model', 'gpt-4.1-mini') - max_steps = actor_input.get('max_steps', 25) - - # Read the LLM API key from the environment so it is never stored in the Actor - # input. On the Apify platform, set it as a secret environment variable. - llm_api_key = os.environ.get('OPENAI_API_KEY') - if not llm_api_key: - raise RuntimeError('The OPENAI_API_KEY environment variable is not set.') - - # Create a proxy configuration that routes the browser through Apify Proxy. - proxy_configuration = await Actor.create_proxy_configuration() - proxy_url = await proxy_configuration.new_url() if proxy_configuration else None - - Actor.log.info(f'Running the agent (model={model}) for task: {task}') - - # Run the Browser Use agent and collect its structured output. - result = await run_agent_task( - task, - model=model, - llm_api_key=llm_api_key, - max_steps=max_steps, - headless=Actor.configuration.headless, - proxy_url=proxy_url, - ) - - if result is None: - Actor.log.warning('The agent did not return any structured output.') - return - - # Store every extracted item as a separate row in the default dataset. - for post in result.posts: - await Actor.push_data(post.model_dump()) From c4580b83772645fc0f1eef43ed3b6e5087a14135 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 5 Jun 2026 21:03:22 +0200 Subject: [PATCH 3/3] chore: drop unused ruff ignore for the removed Browser Use project --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a84ef15e..d17bdc01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -181,10 +181,6 @@ indent-style = "space" # Local imports in Scrapy project. "TID252", # Prefer absolute imports over relative imports from parent modules ] -"**/docs/**/browser_use_project/**" = [ - # Local imports are mixed up with the Apify SDK. - "I001", # Import block is un-sorted or un-formatted -] [tool.ruff.lint.flake8-quotes] docstring-quotes = "double"