diff --git a/apps/site/docs/zh/index.mdx b/apps/site/docs/zh/index.mdx index ec8aba38..d83eea92 100644 --- a/apps/site/docs/zh/index.mdx +++ b/apps/site/docs/zh/index.mdx @@ -22,7 +22,7 @@ Midscene.js 采用了多模态大语言模型(LLM),能够直观地“理 ```typescript // 👀 输入关键字,执行搜索 -// 注:尽管这是一个英文页面,你也可以用中文指令控制它 +// 尽管这是一个英文页面,你也可以用中文指令控制它 await ai('在搜索框输入 "Headphones" ,敲回车'); // 👀 找到列表里耳机相关的信息 diff --git a/packages/web-integration/src/appium/page.ts b/packages/web-integration/src/appium/page.ts index 369e0198..bfb2d256 100644 --- a/packages/web-integration/src/appium/page.ts +++ b/packages/web-integration/src/appium/page.ts @@ -1,5 +1,5 @@ import fs from 'node:fs'; -import type { Size } from '@midscene/core/.'; +import type { Point, Size } from '@midscene/core/.'; import { getTmpFile } from '@midscene/core/utils'; import { base64Encoded, resizeImg } from '@midscene/shared/img'; import { DOMParser } from '@xmldom/xmldom'; @@ -104,62 +104,82 @@ export class Page implements AbstractPage { } // Scroll to top element - async scrollUntilTop(distance?: number): Promise { - const { height } = await this.browser.getWindowSize(); - const scrollDistance = distance || height * 0.7; + async scrollUntilTop(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } - await this.mouseWheel(0, -scrollDistance, 100); + await this.mouseWheel(0, -9999999, 100); } // Scroll to bottom element - async scrollUntilBottom(distance?: number): Promise { - const { height } = await this.browser.getWindowSize(); - const scrollDistance = distance || height * 0.7; + async scrollUntilBottom(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } - await this.mouseWheel(0, scrollDistance, 100); + await this.mouseWheel(0, 9999999, 100); } - async scrollUntilLeft(distance?: number): Promise { - const { width } = await this.browser.getWindowSize(); - const scrollDistance = distance || width * 0.7; + async scrollUntilLeft(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } - await this.mouseWheel(-scrollDistance, 0, 100); + await this.mouseWheel(-9999999, 0, 100); } - async scrollUntilRight(distance?: number): Promise { - const { width } = await this.browser.getWindowSize(); - const scrollDistance = distance || width * 0.7; + async scrollUntilRight(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } - await this.mouseWheel(scrollDistance, 0, 100); + await this.mouseWheel(9999999, 0, 100); } // Scroll up one screen - async scrollUp(distance?: number): Promise { + async scrollUp(distance?: number, startingPoint?: Point): Promise { const { height } = await this.browser.getWindowSize(); const scrollDistance = distance || height * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } + await this.mouseWheel(0, -scrollDistance, 1000); } // Scroll down one screen - async scrollDown(distance?: number): Promise { + async scrollDown(distance?: number, startingPoint?: Point): Promise { const { height } = await this.browser.getWindowSize(); const scrollDistance = distance || height * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } + await this.mouseWheel(0, scrollDistance, 1000); } - async scrollLeft(distance?: number): Promise { + async scrollLeft(distance?: number, startingPoint?: Point): Promise { const { width } = await this.browser.getWindowSize(); const scrollDistance = distance || width * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } + await this.mouseWheel(-scrollDistance, 0, 1000); } - async scrollRight(distance?: number): Promise { + async scrollRight(distance?: number, startingPoint?: Point): Promise { const { width } = await this.browser.getWindowSize(); const scrollDistance = distance || width * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } + await this.mouseWheel(scrollDistance, 0, 1000); } diff --git a/packages/web-integration/src/chrome-extension/page.ts b/packages/web-integration/src/chrome-extension/page.ts index 438a2194..1b13d48c 100644 --- a/packages/web-integration/src/chrome-extension/page.ts +++ b/packages/web-integration/src/chrome-extension/page.ts @@ -9,7 +9,7 @@ import fs from 'node:fs'; import type { WebKeyInput } from '@/common/page'; import type { ElementInfo } from '@/extractor'; import type { AbstractPage } from '@/page'; -import type { Rect, Size } from '@midscene/core/.'; +import type { Point, Rect, Size } from '@midscene/core/.'; import { ifInBrowser } from '@midscene/shared/utils'; import type { Protocol as CDPTypes } from 'devtools-protocol'; @@ -184,44 +184,76 @@ export default class ChromeExtensionProxyPage implements AbstractPage { return url || ''; } - async scrollUntilTop() { + async scrollUntilTop(startingPoint?: Point) { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(0, -9999999); } - async scrollUntilBottom() { + async scrollUntilBottom(startingPoint?: Point) { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(0, 9999999); } - async scrollUntilLeft() { + async scrollUntilLeft(startingPoint?: Point) { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(-9999999, 0); } - async scrollUntilRight() { + async scrollUntilRight(startingPoint?: Point) { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(9999999, 0); } - async scrollUp(distance?: number) { + async scrollUp(distance?: number, startingPoint?: Point) { const { height } = await this.size(); const scrollDistance = distance || height * 0.7; - return this.mouse.wheel(0, -scrollDistance); + return this.mouse.wheel( + 0, + -scrollDistance, + startingPoint?.left, + startingPoint?.top, + ); } - async scrollDown(distance?: number) { + async scrollDown(distance?: number, startingPoint?: Point) { const { height } = await this.size(); const scrollDistance = distance || height * 0.7; - return this.mouse.wheel(0, scrollDistance); + return this.mouse.wheel( + 0, + scrollDistance, + startingPoint?.left, + startingPoint?.top, + ); } - async scrollLeft(distance?: number) { + async scrollLeft(distance?: number, startingPoint?: Point) { const { width } = await this.size(); const scrollDistance = distance || width * 0.7; - return this.mouse.wheel(-scrollDistance, 0); + return this.mouse.wheel( + -scrollDistance, + 0, + startingPoint?.left, + startingPoint?.top, + ); } - async scrollRight(distance?: number) { + async scrollRight(distance?: number, startingPoint?: Point) { const { width } = await this.size(); const scrollDistance = distance || width * 0.7; - return this.mouse.wheel(scrollDistance, 0); + return this.mouse.wheel( + scrollDistance, + 0, + startingPoint?.left, + startingPoint?.top, + ); } async clearInput(element: ElementInfo) { @@ -267,11 +299,16 @@ export default class ChromeExtensionProxyPage implements AbstractPage { clickCount: 1, }); }, - wheel: async (deltaX: number, deltaY: number) => { + wheel: async ( + deltaX: number, + deltaY: number, + startX?: number, + startY?: number, + ) => { await this.sendCommandToDebugger('Input.dispatchMouseEvent', { type: 'mouseWheel', - x: 10, - y: 10, + x: startX || 10, + y: startY || 10, deltaX, deltaY, }); diff --git a/packages/web-integration/src/common/tasks.ts b/packages/web-integration/src/common/tasks.ts index 225d108d..3192a977 100644 --- a/packages/web-integration/src/common/tasks.ts +++ b/packages/web-integration/src/common/tasks.ts @@ -314,35 +314,49 @@ export class PageTaskExecutor { thought: plan.thought, locate: plan.locate, executor: async (taskParam, { element }) => { - if (element) { - await this.page.mouse.move( - element.center[0], - element.center[1], - ); - } + const startingPoint = element + ? { + left: element.center[0], + top: element.center[1], + } + : undefined; const scrollToEventName = taskParam?.scrollType; if (scrollToEventName === 'untilTop') { - await this.page.scrollUntilTop(); + await this.page.scrollUntilTop(startingPoint); } else if (scrollToEventName === 'untilBottom') { - await this.page.scrollUntilBottom(); + await this.page.scrollUntilBottom(startingPoint); } else if (scrollToEventName === 'untilRight') { - await this.page.scrollUntilRight(); + await this.page.scrollUntilRight(startingPoint); } else if (scrollToEventName === 'untilLeft') { - await this.page.scrollUntilLeft(); + await this.page.scrollUntilLeft(startingPoint); } else if (scrollToEventName === 'once') { if (taskParam.direction === 'down') { - await this.page.scrollDown(taskParam.distance || undefined); + await this.page.scrollDown( + taskParam.distance || undefined, + startingPoint, + ); } else if (taskParam.direction === 'up') { - await this.page.scrollUp(taskParam.distance || undefined); + await this.page.scrollUp( + taskParam.distance || undefined, + startingPoint, + ); } else if (taskParam.direction === 'left') { - await this.page.scrollLeft(taskParam.distance || undefined); + await this.page.scrollLeft( + taskParam.distance || undefined, + startingPoint, + ); } else if (taskParam.direction === 'right') { - await this.page.scrollRight(taskParam.distance || undefined); + await this.page.scrollRight( + taskParam.distance || undefined, + startingPoint, + ); } else { throw new Error( `Unknown scroll direction: ${taskParam.direction}`, ); } + // until mouse event is done + await sleep(500); } else { throw new Error( `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify( diff --git a/packages/web-integration/src/page.ts b/packages/web-integration/src/page.ts index 45eebb37..17879519 100644 --- a/packages/web-integration/src/page.ts +++ b/packages/web-integration/src/page.ts @@ -1,4 +1,4 @@ -import type { Size } from '@midscene/core/.'; +import type { Point, Size } from '@midscene/core/.'; import type { WebKeyInput } from './common/page'; import type { WebUIContext } from './common/utils'; import type { ElementInfo } from './extractor'; @@ -33,13 +33,13 @@ export abstract class AbstractPage { async clearInput(element: ElementInfo): Promise {} - abstract scrollUntilTop(): Promise; - abstract scrollUntilBottom(): Promise; - abstract scrollUntilLeft(): Promise; - abstract scrollUntilRight(): Promise; - abstract scrollUp(distance?: number): Promise; - abstract scrollDown(distance?: number): Promise; - abstract scrollLeft(distance?: number): Promise; + abstract scrollUntilTop(startingPoint?: Point): Promise; + abstract scrollUntilBottom(startingPoint?: Point): Promise; + abstract scrollUntilLeft(startingPoint?: Point): Promise; + abstract scrollUntilRight(startingPoint?: Point): Promise; + abstract scrollUp(distance?: number, startingPoint?: Point): Promise; + abstract scrollDown(distance?: number, startingPoint?: Point): Promise; + abstract scrollLeft(distance?: number, startingPoint?: Point): Promise; abstract scrollRight(distance?: number): Promise; abstract _forceUsePageContext?(): Promise; diff --git a/packages/web-integration/src/playground/static-page.ts b/packages/web-integration/src/playground/static-page.ts index 2f67a70c..9a0fa61f 100644 --- a/packages/web-integration/src/playground/static-page.ts +++ b/packages/web-integration/src/playground/static-page.ts @@ -3,6 +3,7 @@ import { type WebUIContext, } from '@/common/utils'; import type { AbstractPage } from '@/page'; +import type { Point } from '@midscene/core/.'; const ThrowNotImplemented: any = (methodName: string) => { throw new Error( @@ -39,35 +40,35 @@ export default class StaticPage implements AbstractPage { return this.uiContext.url; } - async scrollUntilTop() { + async scrollUntilTop(startingPoint?: Point) { return ThrowNotImplemented('scrollUntilTop'); } - async scrollUntilBottom() { + async scrollUntilBottom(startingPoint?: Point) { return ThrowNotImplemented('scrollUntilBottom'); } - async scrollUntilLeft() { + async scrollUntilLeft(startingPoint?: Point) { return ThrowNotImplemented('scrollUntilLeft'); } - async scrollUntilRight() { + async scrollUntilRight(startingPoint?: Point) { return ThrowNotImplemented('scrollUntilRight'); } - async scrollUp(distance?: number) { + async scrollUp(distance?: number, startingPoint?: Point) { return ThrowNotImplemented('scrollUp'); } - async scrollDown(distance?: number) { + async scrollDown(distance?: number, startingPoint?: Point) { return ThrowNotImplemented('scrollDown'); } - async scrollLeft(distance?: number) { + async scrollLeft(distance?: number, startingPoint?: Point) { return ThrowNotImplemented('scrollLeft'); } - async scrollRight(distance?: number) { + async scrollRight(distance?: number, startingPoint?: Point) { return ThrowNotImplemented('scrollRight'); } diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index 5229d70b..ed37d113 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -1,4 +1,4 @@ -import type { Size } from '@midscene/core/.'; +import type { Point, Size } from '@midscene/core/.'; import { getTmpFile } from '@midscene/core/utils'; import { base64Encoded } from '@midscene/shared/img'; import type { Page as PlaywrightPage } from 'playwright'; @@ -133,40 +133,67 @@ export class Page< await this.keyboard.press('Backspace'); } - scrollUntilTop(): Promise { + async scrollUntilTop(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(0, -9999999); } - scrollUntilBottom(): Promise { + async scrollUntilBottom(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(0, 9999999); } - scrollUntilLeft(): Promise { + async scrollUntilLeft(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(-9999999, 0); } - scrollUntilRight(): Promise { + async scrollUntilRight(startingPoint?: Point): Promise { + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } return this.mouse.wheel(9999999, 0); } - async scrollUp(distance?: number): Promise { + async scrollUp(distance?: number, startingPoint?: Point): Promise { const innerHeight = await this.evaluate(() => window.innerHeight); const scrollDistance = distance || innerHeight * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } await this.mouse.wheel(0, -scrollDistance); } - async scrollDown(distance?: number): Promise { + + async scrollDown(distance?: number, startingPoint?: Point): Promise { const innerHeight = await this.evaluate(() => window.innerHeight); const scrollDistance = distance || innerHeight * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } await this.mouse.wheel(0, scrollDistance); } - async scrollLeft(distance?: number): Promise { + + async scrollLeft(distance?: number, startingPoint?: Point): Promise { const innerWidth = await this.evaluate(() => window.innerWidth); const scrollDistance = distance || innerWidth * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } await this.mouse.wheel(-scrollDistance, 0); } - async scrollRight(distance?: number): Promise { + + async scrollRight(distance?: number, startingPoint?: Point): Promise { const innerWidth = await this.evaluate(() => window.innerWidth); const scrollDistance = distance || innerWidth * 0.7; + if (startingPoint) { + await this.mouse.move(startingPoint.left, startingPoint.top); + } await this.mouse.wheel(scrollDistance, 0); }