onkernel · raiden-staging · Jun 18, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/README.md b/README.md
@@ -47,6 +47,7 @@ create-kernel-app [app-name] [options]
   - `stagehand`: Template with Stagehand SDK (Typescript only)
   - `advanced-sample`: Implements sample apps using advanced Kernel configs
   - `computer-use`: Implements a prompt loop using Anthropic Computer Use
+  - `cua`: Implements a Computer Use Agent (OpenAI CUA) sample
 
 ### Examples
 
@@ -121,6 +122,12 @@ kernel invoke python-basic get-page-title --payload '{"url": "https://www.google
 
 # Python + Browser Use
 kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
+
+# Typescript + CUA Sample
+kernel invoke ts-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
+
+# Python + CUA Sample
+kernel invoke python-cua cua-task --payload '{"task": "Get current market price range for an unboxed Dreamcast"}'
 ```
 
 ## Sample apps reference
@@ -134,6 +141,7 @@ These are the sample apps currently available when you run `npx @onkernel/create
 | **stagehand** | Returns the first result of a specified Google search | Stagehand | `{ query }` |
 | **advanced-sample** | Implements sample apps using advanced Kernel configs | n/a |
 | **computer-use** | Implements a prompt loop | Anthropic Computer Use API | `{ query }` |
+| **cua** | Implements the OpenAI Computer Using Agent (CUA) | OpenAI CUA | `{ task }` |
 
 ## Documentation
 

diff --git a/index.ts b/index.ts
@@ -18,7 +18,8 @@ type TemplateKey =
   | "browser-use"
   | "stagehand"
   | "advanced-sample"
-  | "computer-use";
+  | "computer-use"
+  | "cua";
 type LanguageInfo = { name: string; shorthand: string };
 type TemplateInfo = {
   name: string;
@@ -34,6 +35,7 @@ const TEMPLATE_BROWSER_USE = "browser-use";
 const TEMPLATE_STAGEHAND = "stagehand";
 const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
 const TEMPLATE_COMPUTER_USE = "computer-use";
+const TEMPLATE_CUA = "cua";
 const LANGUAGE_SHORTHAND_TS = "ts";
 const LANGUAGE_SHORTHAND_PY = "py";
 
@@ -73,6 +75,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
     description: "Implements the Anthropic Computer Use SDK",
     languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
   },
+  [TEMPLATE_CUA]: {
+    name: "CUA Sample",
+    description: "Implements a Computer Use Agent (OpenAI CUA) sample",
+    languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
+  },
 };
 
 const INVOKE_SAMPLES: Record<
@@ -88,6 +95,8 @@ const INVOKE_SAMPLES: Record<
       'kernel invoke ts-advanced test-captcha-solver',
     [TEMPLATE_COMPUTER_USE]:
       'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
+    [TEMPLATE_CUA]:
+      'kernel invoke ts-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
   },
   [LANGUAGE_PYTHON]: {
     [TEMPLATE_SAMPLE_APP]:
@@ -98,6 +107,8 @@ const INVOKE_SAMPLES: Record<
       'kernel invoke python-advanced test-captcha-solver',
     [TEMPLATE_COMPUTER_USE]:
       'kernel invoke python-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
+    [TEMPLATE_CUA]:
+      'kernel invoke python-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
   },
 };
 
@@ -114,6 +125,8 @@ const REGISTERED_APP_NAMES: Record<
       'ts-advanced',
     [TEMPLATE_COMPUTER_USE]:
       'ts-cu',
+    [TEMPLATE_CUA]:
+      'ts-cua',
   },
   [LANGUAGE_PYTHON]: {
     [TEMPLATE_SAMPLE_APP]:
@@ -124,6 +137,8 @@ const REGISTERED_APP_NAMES: Record<
       'python-advanced',
     [TEMPLATE_COMPUTER_USE]:
       'python-cu',
+    [TEMPLATE_CUA]:
+      'python-cua',
   },
 };
 
@@ -354,12 +369,16 @@ function printNextSteps(
       ? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_COMPUTER_USE
       ? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
+      : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA
+      ? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_PYTHON && (template === TEMPLATE_SAMPLE_APP || template === TEMPLATE_ADVANCED_SAMPLE)
       ? "kernel deploy main.py"
       : language === LANGUAGE_PYTHON && template === TEMPLATE_BROWSER_USE
       ? "kernel deploy main.py --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_PYTHON && template === TEMPLATE_COMPUTER_USE
       ? "kernel deploy main.py --env ANTHROPIC_API_KEY=XXX"
+      : language === LANGUAGE_PYTHON && template === TEMPLATE_CUA
+      ? "kernel deploy main.py --env OPENAI_API_KEY=XXX"
       : "";
 
   console.log(

diff --git a/templates/python/browser-use/main.py b/templates/python/browser-use/main.py
@@ -1,8 +1,9 @@
 from langchain_openai import ChatOpenAI
-from browser_use import Agent, BrowserSession
+from browser_use import Agent
 import kernel
 from kernel import Kernel
 from typing import TypedDict
+from session import BrowserSessionCustomResize
 
 client = Kernel()
 
@@ -13,7 +14,7 @@ class TaskInput(TypedDict):
 
 # LLM API Keys are set in the environment during `kernel deploy <filename> -e OPENAI_API_KEY=XXX`
 # See https://docs.onkernel.com/launch/deploy#environment-variables
-llm = ChatOpenAI(model="gpt-4o")
+llm = ChatOpenAI(model="gpt-4o-mini")
 
 @app.action("bu-task")
 async def bu_task(ctx: kernel.KernelContext, input_data: TaskInput):
@@ -37,7 +38,7 @@ async def bu_task(ctx: kernel.KernelContext, input_data: TaskInput):
         #task="Compare the price of gpt-4o and DeepSeek-V3",
         task=input_data["task"],
         llm=llm,
-        browser_session=BrowserSession(cdp_url=kernel_browser.cdp_ws_url)
+        browser_session=BrowserSessionCustomResize(cdp_url=kernel_browser.cdp_ws_url)
     )
     result = await agent.run()
     if result.final_result() is not None:

diff --git a/templates/python/browser-use/session.py b/templates/python/browser-use/session.py
@@ -0,0 +1,85 @@
+from browser_use import BrowserSession
+
+# Define a subclass of BrowserSession that overrides _setup_viewports (which mishandles resizeing on connecting via cdp)
+class BrowserSessionCustomResize(BrowserSession):
+    async def _setup_viewports(self) -> None:
+        """Resize any existing page viewports to match the configured size, set up storage_state, permissions, geolocation, etc."""
+
+        assert self.browser_context, 'BrowserSession.browser_context must already be set up before calling _setup_viewports()'
+
+        self.browser_profile.window_size = {"width": 1024, "height": 786}
+        self.browser_profile.viewport = {"width": 1024, "height": 786}
+        self.browser_profile.screen = {"width": 1024, "height": 786}
+        self.browser_profile.device_scale_factor = 1.0
+
+        # log the viewport settings to terminal
+        viewport = self.browser_profile.viewport
+        # if we have any viewport settings in the profile, make sure to apply them to the entire browser_context as defaults
+        if self.browser_profile.permissions:
+            try:
+                await self.browser_context.grant_permissions(self.browser_profile.permissions)
+            except Exception as e:
+                print(e)
+        try:
+            if self.browser_profile.default_timeout:
+                self.browser_context.set_default_timeout(self.browser_profile.default_timeout)
+            if self.browser_profile.default_navigation_timeout:
+                self.browser_context.set_default_navigation_timeout(self.browser_profile.default_navigation_timeout)
+        except Exception as e:
+            print(e)
+        try:
+            if self.browser_profile.extra_http_headers:
+                self.browser_context.set_extra_http_headers(self.browser_profile.extra_http_headers)
+        except Exception as e:
+            print(e)
+
+        try:
+            if self.browser_profile.geolocation:
+                await self.browser_context.set_geolocation(self.browser_profile.geolocation)
+        except Exception as e:
+            print(e)
+
+        await self.load_storage_state()
+
+        page = None
+
+        for page in self.browser_context.pages:
+            # apply viewport size settings to any existing pages
+            if viewport:
+                await page.set_viewport_size(viewport)
+
+            # show browser-use dvd screensaver-style bouncing loading animation on any about:blank pages
+            if page.url == 'about:blank':
+                await self._show_dvd_screensaver_loading_animation(page)
+
+        page = page or (await self.browser_context.new_page())
+
+        if (not viewport) and (self.browser_profile.window_size is not None) and not self.browser_profile.headless:
+            # attempt to resize the actual browser window
+
+            # cdp api: https://chromedevtools.github.io/devtools-protocol/tot/Browser/#method-setWindowBounds
+            try:
+                cdp_session = await page.context.new_cdp_session(page)
+                window_id_result = await cdp_session.send('Browser.getWindowForTarget')
+                await cdp_session.send(
+                    'Browser.setWindowBounds',
+                    {
+                        'windowId': window_id_result['windowId'],
+                        'bounds': {
+                            **self.browser_profile.window_size,
+                            'windowState': 'normal',  # Ensure window is not minimized/maximized
+                        },
+                    },
+                )
+                await cdp_session.detach()
+            except Exception as e:
+                _log_size = lambda size: f'{size["width"]}x{size["height"]}px'
+                try:
+                    # fallback to javascript resize if cdp setWindowBounds fails
+                    await page.evaluate(
+                        """(width, height) => {window.resizeTo(width, height)}""",
+                        **self.browser_profile.window_size,
+                    )
+                    return
+                except Exception as e:
+                    pass
diff --git a/templates/python/cua/README.md b/templates/python/cua/README.md
@@ -0,0 +1,7 @@
+# Kernel Python Sample App - CUA
+
+This is a Kernel application that demonstrates using the Computer Using Agent (CUA) from OpenAI.
+
+It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation.
+
+See the [docs](https://docs.onkernel.com/quickstart) for more information.
diff --git a/templates/python/cua/__init__.py b/templates/python/cua/__init__.py
diff --git a/templates/python/cua/_gitignore b/templates/python/cua/_gitignore
@@ -0,0 +1,4 @@
+__pycache__/
+.env
+.venv/
+env/
diff --git a/templates/python/cua/agent/__init__.py b/templates/python/cua/agent/__init__.py
@@ -0,0 +1 @@
+from .agent import Agent