feat: support generate e2e cases

lvqq · Nov 10, 2024 · dcc86b6 · dcc86b6
1 parent 4209b46
commit dcc86b6
Show file tree

Hide file tree

Showing 12 changed files with 265 additions and 56 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,132 @@
-# intelli-browser
-Use natural language to write e2e test cases, powered by AI and playwright
+<h2 align="center">Intelli-Browser</h2>
+
+<p align="center">
+  <a href="https://github.com/lvqq/intelli-browser/tags">
+    <img alt="GitHub tag (latest by date)" src="https://img.shields.io/github/v/release/lvqq/intelli-browser">
+  </a>
+  <a href="https://github.com/GPTGenius/chatgpt-vercel/blob/main/LICENSE">
+    <img alt="license" src="https://img.shields.io/github/license/lvqq/intelli-browser">
+  </a>
+</p>
+
+## Introduction
+Use natural language to write e2e test cases, powered by LLM and playwright
+
+
+## Features
+- ✨ Use natural language to write and run e2e test cases
+- 🧪 Generate traditional e2e test cases after executing the cases
+- 💭 More feautes are coming soon! Feel free to open an issue or submit a pull request
+
+## Demo
+![](assets/demo.mp4)
+
+Task:
+> Click search and input "Web API", press "arrow down" once to select the second result. Then press "ENTER" to search it. Find "Keyboard API" nearby title "K" and click it
+
+## Limitaions
+- Only *claude-3-5-sonnet* LLM is supported for now
+- Only *playwright* framework is supported for now
+
+## Usage
+
+### Installation
+```bash
+# use npm
+npm install @intelli-browser/core
+
+# use yarn
+yarn add @intelli-browser/core
+
+# use pnpm
+pnpm add @intelli-browser/core
+```
+
+### API reference
+```javascript
+  import { IntelliBrowser } from '@intelli-browser/core';
+
+  const client = new IntelliBrowser({
+    apiKey: '',  // add apiKey or provide ANTHROPIC_API_KEY in .env file
+  })
+
+  await client.run({
+    page,  // playwright Page instance
+    message: 'Click search and input "Web API", press "arrow down" to select the second result. then press "ENTER" to search it',  // user prompt
+  })
+```
+
+### Generate E2E cases
+If you want to generate the tranditional E2E test cases after executing, just get the return data from `client.run`
+
+```javascript
+  import { IntelliBrowser } from '@intelli-browser/core';
+
+  const client = new IntelliBrowser({
+    apiKey: '',  // add apiKey or provide ANTHROPIC_API_KEY in .env file
+  })
+
+  // will return the generated e2e cases as string array
+  const e2e = await client.run({
+    page,  // playwright Page instance
+    message: 'Click search and input "Web API", press "arrow down" to select the second result. then press "ENTER" to search it',  // user prompt
+  })
+
+  console.log(e2e)
+  // As the demo shows:
+  // [
+  //   'await page.mouse.move(1241.61, 430.2)',
+  //   'await page.waitForTimeout(2266)',
+  //   'await page.mouse.down()',
+  //   'await page.mouse.up()',
+  //   'await page.waitForTimeout(3210)',
+  //   "await page.mouse.type('Web API')",
+  //   'await page.waitForTimeout(3064)',
+  //   "await this.page.keyboard.press('ArrowDown')",
+  //   'await page.waitForTimeout(2917)',
+  //   "await this.page.keyboard.press('Enter')",
+  //   'await page.waitForTimeout(6471)',
+  //   "await this.page.keyboard.press('PageDown')",
+  //   'await page.waitForTimeout(7021)',
+  //   'await page.mouse.move(687.39, 923.4)',
+  //   'await page.waitForTimeout(4501)',
+  //   'await page.mouse.down()',
+  //   'await page.mouse.up()'
+  // ]
+
+```
+
+
+### Other options
+By default, LLM conversations and actions are logged as the demo shows. If you don't want it, you can try:
+```javascript
+import { IntelliBrowser } from '@intelli-browser/core';
+
+const client = new IntelliBrowser({
+  apiKey: '',  // add apiKey or provide ANTHROPIC_API_KEY in .env file
+  verbose: false,  // don't log out conversations and actions
+})
+```
+
+By default, context will be cleaned after each `client.run` to save tokens usage. If you want to retain context, you can try:
+```javascript
+import { IntelliBrowser } from '@intelli-browser/core';
+
+const client = new IntelliBrowser({
+  apiKey: '',  // add apiKey or provide ANTHROPIC_API_KEY in .env file
+  autoClean: false,  // don't auto clean context
+})
+```
+
+## How it works
+- Inspired by claude-3.5-sonnet **computer use** funtion, it simulates browser use and combines with e2e cases
+- User prompt and page info are sent to the LLM to analyze page content and interactive elements
+- Intelli-Browser executes the LLM planned actions and feeds it back
+- It ends when no more action or cannot achieve the goal of the task
+
+## Credits
+- [Computer Use](https://docs.anthropic.com/en/docs/build-with-claude/computer-use)
+- [agent.ext](https://github.com/corbt/agent.exe)
+
+## License
+Based on [MIT License](./LICENSE)
diff --git a/assets/demo.mp4 b/assets/demo.mp4
diff --git a/example/index.js b/example/index.js
@@ -12,14 +12,15 @@ async function main() {
   await page.goto('https://developer.mozilla.org/en-US/');
 
   const client = new IntelliBrowser({
-    apiKey: '',  // modify apiKey or provide ANTHROPIC_API_KEY in .env file
+    apiKey: '',  // add apiKey or provide ANTHROPIC_API_KEY in .env file
   })
 
   await page.waitForTimeout(5000);  
-  await client.run({
+  const e2e = await client.run({
     page,
-    message: 'Click search and input "Web API", press "arrow down" to select the second result. then press "ENTER" to search it'
+    message: 'Click search and input "Web API", press "arrow down" once to select the second result. Then press "ENTER" to search it. Find "Keyboard API" nearby title "K" and click it'
   })
+  console.log(e2e)
   await browser.close();
 }
 

diff --git a/example/package.json b/example/package.json
@@ -11,7 +11,7 @@
   "author": "",
   "license": "MIT",
   "devDependencies": {
-    "@intelli-browser/core": "^0.1.0",
+    "@intelli-browser/core": "workspace:^",
     "@playwright/test": "^1.48.2",
     "@types/node": "^22.9.0",
     "playwright": "*"

diff --git a/packages/anthropic/package.json b/packages/anthropic/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@intelli-browser/anthropic",
-  "version": "0.1.0",
+  "version": "0.1.1",
   "description": "",
   "type": "module",
   "main": "./dist/index.cjs",

diff --git a/packages/anthropic/src/anthropic.ts b/packages/anthropic/src/anthropic.ts
@@ -44,10 +44,14 @@ export class AnthropicClient {
     this.messages = [];
   }
 
-  public clear() {
+  public clean() {
     this.messages = [];
   }
 
+  public getMessages() {
+    return this.messages;
+  }
+
   public async prompt({ width, height, message }: PromptOption) {
     const prompt: BetaMessageParam = { role: 'user', content: message };
     const { content, role } = await this.client.beta.messages.create({

diff --git a/packages/core/package.json b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@intelli-browser/core",
-  "version": "0.1.0",
+  "version": "0.1.1",
   "description": "",
   "type": "module",
   "main": "./dist/index.cjs",

diff --git a/packages/core/src/browser.ts b/packages/core/src/browser.ts
@@ -3,7 +3,18 @@ import { PlayWrightAgent } from '@intelli-browser/playwright'
 
 interface IntelliBrowserProps {
   apiKey?: string;
-  verbose?: boolean;  // display LLM text output, default is true
+  /**
+   * Display LLM text output, default is true
+   */
+  verbose?: boolean;
+  /**
+   * Whether auto clean context after each run
+   */
+  autoClean?: boolean;
+  /**
+   * Max steps for LLM actions, default is 50
+   */
+  maxSteps?: number;
 }
 
 interface ExecuteOptions {
@@ -14,32 +25,39 @@ interface ExecuteOptions {
 
 export class IntelliBrowser {
   private anthropicClient: AnthropicClient
-  private playwrightAgent?: PlayWrightAgent
   private verbose: boolean
+  private autoClean: boolean
+  private maxSteps: number
 
   constructor({
-    verbose = true,
     apiKey,
+    verbose = true,
+    autoClean = true,
+    maxSteps = 50,
   }: IntelliBrowserProps) {
-    this.verbose = verbose;
     const anthropocApiKey = apiKey || process.env["ANTHROPIC_API_KEY"]
     this.anthropicClient = new AnthropicClient({
       apiKey: anthropocApiKey,
     })
+    this.verbose = verbose
+    this.autoClean = autoClean
+    this.maxSteps = maxSteps
   }
 
   public async run({
     page,
     message,
   }: ExecuteOptions) {
-    this.playwrightAgent = new PlayWrightAgent({ page })
+    const playwrightAgent = new PlayWrightAgent({ page })
     let isFinish = false;
     let nextPrompt = message;
+    let loopTime = 0;  // for caculate delay time between each action
     if (this.verbose) {
       console.log(`User: ${message}\n`)
     }
-    while(!isFinish) {
-      const { scaledWidth, scaledHeight } = this.playwrightAgent!.getScaledScreenDimensions();
+
+    while(!isFinish || this.anthropicClient.getMessages().length <= this.maxSteps) {
+      const { scaledWidth, scaledHeight } = playwrightAgent.getScaledScreenDimensions();
       const { tool, text } = await this.anthropicClient.prompt({ message: nextPrompt, width: scaledWidth, height: scaledHeight })
       if (this.verbose && text) {
         console.log(`Assistant: ${text?.text || ''}\n`)
@@ -53,10 +71,29 @@ export class IntelliBrowser {
         }
       }
       if (tool) {
-        nextPrompt = await this.playwrightAgent!.runAction(tool, formatToolResult)
+        const delay = loopTime ? Date.now() - loopTime : 0
+        nextPrompt = await playwrightAgent.runAction({
+          tool,
+          formatFn: formatToolResult,
+          delay: delay < 50 ? 0 : delay,
+          showAction: this.verbose,
+        })
+        // if screenshot, skip update time
+        if (tool?.input?.action !== 'screenshot') {
+          loopTime = Date.now();
+        }
       } else {
         isFinish = true;
       }
     }
+    if (this.autoClean) {
+      this.clean();
+    }
+
+    return playwrightAgent.getActions();
+  }
+
+  public clean() {
+    this.anthropicClient.clean();
   }
 }
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -1 +1 @@
-export * from './browser'
+export * from './browser'
diff --git a/packages/playwright/package.json b/packages/playwright/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@intelli-browser/playwright",
-  "version": "0.1.0",
+  "version": "0.1.1",
   "description": "",
   "type": "module",
   "main": "./dist/index.cjs",
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		export * from './browser'
		export * from './browser'