Skip to content

Commit d5e84ae

Browse files
committed
fix: address PR review — combine import, add single-session GT guard
1 parent ebbe38a commit d5e84ae

File tree

3 files changed

+22
-1
lines changed

3 files changed

+22
-1
lines changed

src/cli/aws/agentcore.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ import { getCredentialProvider } from './account';
33
import {
44
BedrockAgentCoreClient,
55
EvaluateCommand,
6+
type EvaluationReferenceInput,
67
InvokeAgentRuntimeCommand,
78
StopRuntimeSessionCommand,
89
} from '@aws-sdk/client-bedrock-agentcore';
9-
import type { EvaluationReferenceInput } from '@aws-sdk/client-bedrock-agentcore';
1010
import type { HttpRequest } from '@smithy/protocol-http';
1111
import type { DocumentType } from '@smithy/types';
1212

src/cli/operations/eval/__tests__/run-eval.test.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,4 +1204,18 @@ describe('handleRunEval', () => {
12041204
expectedTrajectory: ['tool_1', 'tool_2'],
12051205
});
12061206
});
1207+
1208+
it('returns error when ground truth flags are used with multiple sessions', async () => {
1209+
setupDefaultAgent();
1210+
setupCloudWatchToReturn([makeOtelSpanRow('session-1', 'trace-1'), makeOtelSpanRow('session-2', 'trace-2')]);
1211+
1212+
const result = await handleRunEval({
1213+
evaluator: ['Builtin.GoalSuccessRate'],
1214+
days: 7,
1215+
assertions: ['Agent should greet user'],
1216+
});
1217+
1218+
expect(result.success).toBe(false);
1219+
expect(result.error).toContain('require exactly one session');
1220+
});
12071221
});

src/cli/operations/eval/run-eval.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,13 @@ export async function handleRunEval(options: RunEvalOptions): Promise<RunEvalRes
606606
!!options.expectedResponse;
607607

608608
let evaluationReferenceInputs: EvaluationReferenceInput[] | undefined;
609+
if (hasRefInputs && sessions.length !== 1) {
610+
return {
611+
success: false,
612+
error:
613+
'Ground truth flags (-A, --expected-trajectory, --expected-response) require exactly one session. Use -s/--session-id to target a single session.',
614+
};
615+
}
609616
if (hasRefInputs) {
610617
const refInputs: EvaluationReferenceInput[] = [];
611618
const firstSession = sessions[0]!;

0 commit comments

Comments
 (0)