Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/cli/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,9 @@ Gemini CLI.

- **`!<shell_command>`**
- **Description:** Execute the given `<shell_command>` using `bash` on
Linux/macOS or `cmd.exe` on Windows. Any output or errors from the command
are displayed in the terminal.
Linux/macOS or `powershell.exe -NoProfile -Command` on Windows (unless you
override `ComSpec`). Any output or errors from the command are displayed in
the terminal.
- **Examples:**
- `!ls -la` (executes `ls -la` and returns to Gemini CLI)
- `!git status` (executes `git status` and returns to Gemini CLI)
Expand Down
5 changes: 3 additions & 2 deletions docs/tools/shell.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ command, including interactive commands that require user input (e.g., `vim`,
`git rebase -i`) if the `tools.shell.enableInteractiveShell` setting is set to
`true`.

On Windows, commands are executed with `cmd.exe /c`. On other platforms, they
are executed with `bash -c`.
On Windows, commands are executed with `powershell.exe -NoProfile -Command`
(unless you explicitly point `ComSpec` at another shell). On other platforms,
they are executed with `bash -c`.

### Arguments

Expand Down
34 changes: 34 additions & 0 deletions esbuild.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { createRequire } from 'node:module';
import { writeFileSync } from 'node:fs';
import { wasmLoader } from 'esbuild-plugin-wasm';

let esbuild;
try {
Expand All @@ -22,6 +23,37 @@ const __dirname = path.dirname(__filename);
const require = createRequire(import.meta.url);
const pkg = require(path.resolve(__dirname, 'package.json'));

function createWasmPlugins() {
const wasmBinaryPlugin = {
name: 'wasm-binary',
setup(build) {
build.onResolve({ filter: /\.wasm\?binary$/ }, (args) => {
const specifier = args.path.replace(/\?binary$/, '');
const resolveDir = args.resolveDir || '';
const isBareSpecifier =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: do we really need both the isBareSpecified and relative file path cases?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll investigate this in a subsequent PR.

!path.isAbsolute(specifier) &&
!specifier.startsWith('./') &&
!specifier.startsWith('../');

let resolvedPath;
if (isBareSpecifier) {
resolvedPath = require.resolve(specifier, {
paths: resolveDir ? [resolveDir, __dirname] : [__dirname],
});
} else {
resolvedPath = path.isAbsolute(specifier)
? specifier
: path.join(resolveDir, specifier);
}

return { path: resolvedPath, namespace: 'wasm-embedded' };
});
},
};

return [wasmBinaryPlugin, wasmLoader({ mode: 'embedded' })];
}

const external = [
'@lydell/node-pty',
'node-pty',
Expand Down Expand Up @@ -51,6 +83,7 @@ const cliConfig = {
define: {
'process.env.CLI_VERSION': JSON.stringify(pkg.version),
},
plugins: createWasmPlugins(),
alias: {
'is-in-ci': path.resolve(__dirname, 'packages/cli/src/patches/is-in-ci.ts'),
},
Expand All @@ -67,6 +100,7 @@ const a2aServerConfig = {
define: {
'process.env.CLI_VERSION': JSON.stringify(pkg.version),
},
plugins: createWasmPlugins(),
};

Promise.allSettled([
Expand Down
3 changes: 2 additions & 1 deletion integration-tests/flicker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import { describe, it, expect } from 'vitest';
import { TestRig } from './test-helper.js';

describe('Flicker Detector', () => {
it('should not detect a flicker under the max height budget', async () => {
// TODO: https://github.com/google-gemini/gemini-cli/issues/11170
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this still needed? I believe this got fixed earlier today.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll remove this in a subsequent PR.

it.skip('should not detect a flicker under the max height budget', async () => {
const rig = new TestRig();
await rig.setup('flicker-detector-test');

Expand Down
174 changes: 174 additions & 0 deletions integration-tests/run_shell_command.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,46 @@ function getLineCountCommand(): { command: string; tool: string } {
}
}

function getInvalidCommand(): string {
switch (shell) {
case 'powershell':
return `Get-ChildItem | | Select-Object`;
case 'cmd':
return `dir | | findstr foo`;
case 'bash':
default:
return `echo "hello" > > file`;
}
}

function getAllowedListCommand(): string {
switch (shell) {
case 'powershell':
return 'Get-ChildItem';
case 'cmd':
return 'dir';
case 'bash':
default:
return 'ls';
}
}

function getDisallowedFileReadCommand(testFile: string): {
command: string;
tool: string;
} {
const quotedPath = `"${testFile}"`;
switch (shell) {
case 'powershell':
return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' };
case 'cmd':
return { command: `type ${quotedPath}`, tool: 'type' };
case 'bash':
default:
return { command: `cat ${quotedPath}`, tool: 'cat' };
}
}

describe('run_shell_command', () => {
it('should be able to run a shell command', async () => {
const rig = new TestRig();
Expand Down Expand Up @@ -102,8 +142,17 @@ describe('run_shell_command', () => {
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);

if (!foundToolCall) {
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
name: toolRequest.name,
success: toolRequest.success,
args: toolRequest.args,
}));
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Allowed tools flag': `run_shell_command(${tool})`,
Prompt: prompt,
'Tool logs': toolLogs,
Result: result,
});
}

Expand Down Expand Up @@ -210,8 +259,17 @@ describe('run_shell_command', () => {
const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);

if (!foundToolCall) {
const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
name: toolRequest.name,
success: toolRequest.success,
args: toolRequest.args,
}));
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
'Allowed tools flag': `ShellTool(${tool})`,
Prompt: prompt,
'Tool logs': toolLogs,
Result: result,
});
}

Expand Down Expand Up @@ -280,6 +338,73 @@ describe('run_shell_command', () => {
}
});

it('should reject commands not on the allowlist', async () => {
const rig = new TestRig();
await rig.setup('should reject commands not on the allowlist');

const testFile = rig.createFile('test.txt', 'Disallowed command check\n');
const allowedCommand = getAllowedListCommand();
const disallowed = getDisallowedFileReadCommand(testFile);
const prompt =
`I am testing the allowed tools configuration. ` +
`Attempt to run "${disallowed.command}" to read the contents of ${testFile}. ` +
`If the command fails because it is not permitted, respond with the single word FAIL. ` +
`If it succeeds, respond with SUCCESS.`;

const result = await rig.run(
{
stdin: prompt,
yolo: false,
},
`--allowed-tools=run_shell_command(${allowedCommand})`,
);

if (!result.toLowerCase().includes('fail')) {
printDebugInfo(rig, result, {
Result: result,
AllowedCommand: allowedCommand,
DisallowedCommand: disallowed.command,
});
}
expect(result).toContain('FAIL');

const foundToolCall = await rig.waitForToolCall(
'run_shell_command',
15000,
(args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
);

if (!foundToolCall) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);

const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(disallowed.tool.toLowerCase()),
);

if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
DisallowedTool: disallowed.tool,
});
}

expect(
failureLog,
'Expected failing run_shell_command invocation',
).toBeTruthy();
expect(failureLog!.toolRequest.success).toBe(false);
});

it('should allow all with "ShellTool" and other specific tools', async () => {
const rig = new TestRig();
await rig.setup(
Expand Down Expand Up @@ -386,4 +511,53 @@ describe('run_shell_command', () => {
validateModelOutput(result, fileName, 'Platform-specific listing test');
expect(result).toContain(fileName);
});

it('rejects invalid shell expressions', async () => {
const rig = new TestRig();
await rig.setup('rejects invalid shell expressions');
const invalidCommand = getInvalidCommand();
const result = await rig.run(
`I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
);
expect(result).toContain('FAIL');

const escapedInvalidCommand = JSON.stringify(invalidCommand).slice(1, -1);
const foundToolCall = await rig.waitForToolCall(
'run_shell_command',
15000,
(args) =>
args.toLowerCase().includes(escapedInvalidCommand.toLowerCase()),
);

if (!foundToolCall) {
printDebugInfo(rig, result, {
'Found tool call': foundToolCall,
EscapedCommand: escapedInvalidCommand,
ToolLogs: rig.readToolLogs(),
});
}
expect(foundToolCall).toBe(true);

const toolLogs = rig
.readToolLogs()
.filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
const failureLog = toolLogs.find((toolLog) =>
toolLog.toolRequest.args
.toLowerCase()
.includes(escapedInvalidCommand.toLowerCase()),
);

if (!failureLog || failureLog.toolRequest.success) {
printDebugInfo(rig, result, {
ToolLogs: toolLogs,
EscapedCommand: escapedInvalidCommand,
});
}

expect(
failureLog,
'Expected failing run_shell_command invocation for invalid syntax',
).toBeTruthy();
expect(failureLog!.toolRequest.success).toBe(false);
});
});
Loading