Skip to content

Commit 57cd24a

Browse files
authored
refactor(core): remove tree in context (#1376)
* refactor(core): remove tree info in uiContext * chore(core): fix lint * chore(core): remove dom-based locator * fix(core): test cases * chore(core): fix lint * fix(core): test cases
1 parent 5300926 commit 57cd24a

File tree

41 files changed

+286
-836
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+286
-836
lines changed

apps/chrome-extension/src/utils/eventOptimizer.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import Insight from '@midscene/core';
2-
import type { BaseElement, Rect, UIContext } from '@midscene/core';
2+
import type { Rect, UIContext } from '@midscene/core';
33
import type { RecordedEvent } from '@midscene/recorder';
44
import { globalModelConfigManager } from '@midscene/shared/env';
55
import { compositeElementInfoImg } from '@midscene/shared/img';
@@ -105,10 +105,9 @@ export const generateAIDescription = async (
105105

106106
const descriptionPromise = (async () => {
107107
try {
108-
const mockContext: UIContext<BaseElement> = {
108+
const mockContext: UIContext = {
109109
screenshotBase64: event.screenshotBefore as string,
110110
size: { width: event.pageInfo.width, height: event.pageInfo.height },
111-
tree: { node: null, children: [] },
112111
};
113112

114113
const insight = new Insight(mockContext);

packages/core/src/agent/agent.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ import {
6767
parsePrompt,
6868
printReportMsg,
6969
} from './utils';
70-
import { trimContextByViewport } from './utils';
7170

7271
const debug = getDebug('agent');
7372

@@ -348,10 +347,8 @@ export class Agent<
348347
}
349348

350349
appendExecutionDump(execution: ExecutionDump) {
351-
// use trimContextByViewport to process execution
352-
const trimmedExecution = trimContextByViewport(execution);
353350
const currentDump = this.dump;
354-
currentDump.executions.push(trimmedExecution);
351+
currentDump.executions.push(execution);
355352
}
356353

357354
dumpDataString() {

packages/core/src/agent/index.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ export { cacheFileExt } from './task-cache';
1111

1212
export { TaskExecutor } from './tasks';
1313

14-
export {
15-
getCurrentExecutionFile,
16-
trimContextByViewport,
17-
} from './utils';
14+
export { getCurrentExecutionFile } from './utils';
1815

1916
export type { AgentOpt } from '../types';

packages/core/src/agent/task-builder.ts

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import type {
1919
import { InsightError } from '@/types';
2020
import { sleep } from '@/utils';
2121
import type { IModelConfig } from '@midscene/shared/env';
22+
import { generateElementByPosition } from '@midscene/shared/extractor';
2223
import { getDebug } from '@midscene/shared/logger';
2324
import { assert } from '@midscene/shared/utils';
2425
import type { TaskCache } from './task-cache';
@@ -351,7 +352,7 @@ export class TaskBuilder {
351352
let { uiContext } = taskContext;
352353

353354
assert(
354-
param?.prompt || param?.id || param?.bbox,
355+
param?.prompt || param?.bbox,
355356
`No prompt or id or position or bbox to locate, param=${JSON.stringify(
356357
param,
357358
)}`,
@@ -381,12 +382,18 @@ export class TaskBuilder {
381382
};
382383

383384
// from xpath
384-
let elementFromXpath: Rect | undefined;
385+
let rectFromXpath: Rect | undefined;
385386
if (param.xpath && this.interface.rectMatchesCacheFeature) {
386-
elementFromXpath = await this.interface.rectMatchesCacheFeature({
387+
rectFromXpath = await this.interface.rectMatchesCacheFeature({
387388
xpaths: [param.xpath],
388389
});
389390
}
391+
const elementFromXpath = rectFromXpath
392+
? generateElementByPosition({
393+
x: rectFromXpath.left + rectFromXpath.width / 2,
394+
y: rectFromXpath.top + rectFromXpath.height / 2,
395+
})
396+
: undefined;
390397
const userExpectedPathHitFlag = !!elementFromXpath;
391398

392399
const cachePrompt = param.prompt;
@@ -408,7 +415,7 @@ export class TaskBuilder {
408415

409416
const elementFromPlan =
410417
!userExpectedPathHitFlag && !cacheHitFlag
411-
? matchElementFromPlan(param, uiContext.tree)
418+
? matchElementFromPlan(param)
412419
: undefined;
413420
const planHitFlag = !!elementFromPlan;
414421

@@ -485,7 +492,7 @@ export class TaskBuilder {
485492
if (!element) {
486493
if (locateDump) {
487494
throw new InsightError(
488-
`Element not found: ${param.prompt}`,
495+
`Element not found : ${param.prompt}`,
489496
locateDump,
490497
);
491498
}
@@ -513,8 +520,7 @@ export class TaskBuilder {
513520
hitBy = {
514521
from: 'Planning',
515522
context: {
516-
id: elementFromPlan?.id,
517-
bbox: elementFromPlan?.bbox,
523+
rect: elementFromPlan?.rect,
518524
},
519525
};
520526
}

packages/core/src/agent/tasks.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import { ExecutionSession } from './execution-session';
3030
import { TaskBuilder } from './task-builder';
3131
import type { TaskCache } from './task-cache';
3232
export { locatePlanForLocate } from './task-builder';
33+
import { descriptionOfTree } from '@midscene/shared/extractor';
3334
import { taskTitleStr } from './ui-utils';
3435
import { parsePrompt } from './utils';
3536

@@ -422,11 +423,25 @@ export class TaskExecutor {
422423
}
423424

424425
let extractResult;
426+
427+
let extraPageDescription = '';
428+
if (opt?.domIncluded && this.interface.getElementsNodeTree) {
429+
debug('appending tree info for page');
430+
const tree = await this.interface.getElementsNodeTree();
431+
extraPageDescription = await descriptionOfTree(
432+
tree,
433+
200,
434+
false,
435+
opt?.domIncluded === 'visible-only',
436+
);
437+
}
438+
425439
try {
426440
extractResult = await this.insight.extract<any>(
427441
demandInput,
428442
modelConfig,
429443
opt,
444+
extraPageDescription,
430445
multimodalPrompt,
431446
);
432447
} catch (error) {

packages/core/src/agent/ui-utils.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ export function paramStr(task: ExecutionTask) {
128128
if (task.type === 'Insight') {
129129
value =
130130
locateParamStr((task as ExecutionTaskInsightLocate)?.param) ||
131-
(task as ExecutionTaskInsightLocate)?.param?.id ||
132131
(task as ExecutionTaskInsightQuery)?.param?.dataDemand ||
133132
(task as ExecutionTaskInsightAssertion)?.param?.assertion;
134133
}

packages/core/src/agent/utils.ts

Lines changed: 3 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,17 @@
1-
import { elementByPositionWithElementInfo } from '@/ai-model';
21
import type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';
32
import type { AbstractInterface } from '@/device';
43
import type {
5-
BaseElement,
64
ElementCacheFeature,
7-
ElementTreeNode,
8-
ExecutionDump,
9-
ExecutionTask,
10-
ExecutorContext,
115
LocateResultElement,
126
PlanningLocateParam,
137
UIContext,
148
} from '@/types';
159
import { uploadTestInfoToServer } from '@/utils';
16-
import { NodeType } from '@midscene/shared/constants';
1710
import {
1811
MIDSCENE_REPORT_TAG_NAME,
1912
globalConfigManager,
2013
} from '@midscene/shared/env';
21-
import {
22-
generateElementByPosition,
23-
getNodeFromCacheList,
24-
} from '@midscene/shared/extractor';
14+
import { generateElementByPosition } from '@midscene/shared/extractor';
2515
import { getDebug } from '@midscene/shared/logger';
2616
import { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';
2717
import { assert, logMsg, uuid } from '@midscene/shared/utils';
@@ -55,10 +45,6 @@ export async function commonContextParser(
5545
debugProfile(`size: ${size.width}x${size.height} dpr: ${size.dpr}`);
5646

5747
return {
58-
tree: {
59-
node: null,
60-
children: [],
61-
},
6248
size,
6349
screenshotBase64: screenshotBase64!,
6450
};
@@ -133,26 +119,18 @@ export function generateCacheId(fileName?: string): string {
133119

134120
export function matchElementFromPlan(
135121
planLocateParam: PlanningLocateParam,
136-
tree: ElementTreeNode<BaseElement>,
137-
) {
122+
): LocateResultElement | undefined {
138123
if (!planLocateParam) {
139124
return undefined;
140125
}
141-
if (planLocateParam.id) {
142-
return getNodeFromCacheList(planLocateParam.id);
143-
}
144126

145127
if (planLocateParam.bbox) {
146128
const centerPosition = {
147129
x: Math.floor((planLocateParam.bbox[0] + planLocateParam.bbox[2]) / 2),
148130
y: Math.floor((planLocateParam.bbox[1] + planLocateParam.bbox[3]) / 2),
149131
};
150-
let element = elementByPositionWithElementInfo(tree, centerPosition);
151-
152-
if (!element) {
153-
element = generateElementByPosition(centerPosition) as BaseElement;
154-
}
155132

133+
const element = generateElementByPosition(centerPosition);
156134
return element;
157135
}
158136

@@ -198,10 +176,6 @@ export async function matchElementFromCache(
198176
Math.round(rect.top + rect.height / 2),
199177
],
200178
rect,
201-
xpaths: [],
202-
attributes: {
203-
nodeType: NodeType.POSITION,
204-
},
205179
};
206180

207181
cacheDebug('cache hit, prompt: %s', cachePrompt);
@@ -212,59 +186,6 @@ export async function matchElementFromCache(
212186
}
213187
}
214188

215-
export function trimContextByViewport(execution: ExecutionDump) {
216-
function filterVisibleTree(
217-
node: ElementTreeNode<BaseElement>,
218-
): ElementTreeNode<BaseElement> | null {
219-
if (!node) return null;
220-
221-
// recursively process all children
222-
const filteredChildren = Array.isArray(node.children)
223-
? (node.children
224-
.map(filterVisibleTree)
225-
.filter((child) => child !== null) as ElementTreeNode<BaseElement>[])
226-
: [];
227-
228-
// if the current node is visible, keep it and the filtered children
229-
if (node.node && node.node.isVisible === true) {
230-
return {
231-
...node,
232-
children: filteredChildren,
233-
};
234-
}
235-
236-
// if the current node is invisible, but has visible children, create an empty node to include these children
237-
if (filteredChildren.length > 0) {
238-
return {
239-
node: null,
240-
children: filteredChildren,
241-
};
242-
}
243-
244-
// if the current node is invisible and has no visible children, return null
245-
return null;
246-
}
247-
248-
return {
249-
...execution,
250-
tasks: Array.isArray(execution.tasks)
251-
? execution.tasks.map((task: ExecutionTask) => {
252-
const newTask = { ...task };
253-
if (task.uiContext?.tree) {
254-
newTask.uiContext = {
255-
...task.uiContext,
256-
tree: filterVisibleTree(task.uiContext.tree) || {
257-
node: null,
258-
children: [],
259-
},
260-
};
261-
}
262-
return newTask;
263-
})
264-
: execution.tasks,
265-
};
266-
}
267-
268189
declare const __VERSION__: string | undefined;
269190

270191
export const getMidsceneVersion = (): string => {

packages/core/src/ai-model/common.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import type {
2-
AIUsageInfo,
32
BaseElement,
43
DeviceAction,
54
ElementTreeNode,

packages/core/src/ai-model/index.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@ export {
44
callAI,
55
} from './service-caller/index';
66
export { systemPromptToLocateElement } from './prompt/llm-locator';
7-
export {
8-
describeUserPage,
9-
elementByPositionWithElementInfo,
10-
} from './prompt/util';
7+
export { describeUserPage } from './prompt/util';
118
export {
129
generatePlaywrightTest,
1310
generatePlaywrightTestStream,

0 commit comments

Comments
 (0)