google-gemini
diff --git a/‎docs/cli/commands.md‎
Lines changed: 3 additions & 2 deletions b/‎docs/cli/commands.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/tools/shell.md‎
Lines changed: 3 additions & 2 deletions b/‎docs/tools/shell.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎esbuild.config.js‎
Lines changed: 34 additions & 0 deletions b/‎esbuild.config.js‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎integration-tests/flicker.test.ts‎
Lines changed: 2 additions & 1 deletion b/‎integration-tests/flicker.test.ts‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎integration-tests/run_shell_command.test.ts‎
Lines changed: 174 additions & 0 deletions b/‎integration-tests/run_shell_command.test.ts‎
Lines changed: 174 additions & 0 deletions
@@ -292,8 +292,9 @@ Gemini CLI.
 
 - **`!<shell_command>`**
   - **Description:** Execute the given `<shell_command>` using `bash` on
-    Linux/macOS or `cmd.exe` on Windows. Any output or errors from the command
-    are displayed in the terminal.
+    Linux/macOS or `powershell.exe -NoProfile -Command` on Windows (unless you
+    override `ComSpec`). Any output or errors from the command are displayed in
+    the terminal.
   - **Examples:**
     - `!ls -la` (executes `ls -la` and returns to Gemini CLI)
     - `!git status` (executes `git status` and returns to Gemini CLI)
 
@@ -10,8 +10,9 @@ command, including interactive commands that require user input (e.g., `vim`,
 `git rebase -i`) if the `tools.shell.enableInteractiveShell` setting is set to
 `true`.
 
-On Windows, commands are executed with `cmd.exe /c`. On other platforms, they
-are executed with `bash -c`.
+On Windows, commands are executed with `powershell.exe -NoProfile -Command`
+(unless you explicitly point `ComSpec` at another shell). On other platforms,
+they are executed with `bash -c`.
 
 ### Arguments
 
 
@@ -8,6 +8,7 @@ import path from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { createRequire } from 'node:module';
 import { writeFileSync } from 'node:fs';
+import { wasmLoader } from 'esbuild-plugin-wasm';
 
 let esbuild;
 try {
@@ -22,6 +23,37 @@ const __dirname = path.dirname(__filename);
 const require = createRequire(import.meta.url);
 const pkg = require(path.resolve(__dirname, 'package.json'));
 
+function createWasmPlugins() {
+  const wasmBinaryPlugin = {
+    name: 'wasm-binary',
+    setup(build) {
+      build.onResolve({ filter: /\.wasm\?binary$/ }, (args) => {
+        const specifier = args.path.replace(/\?binary$/, '');
+        const resolveDir = args.resolveDir || '';
+        const isBareSpecifier =
+          !path.isAbsolute(specifier) &&
+          !specifier.startsWith('./') &&
+          !specifier.startsWith('../');
+
+        let resolvedPath;
+        if (isBareSpecifier) {
+          resolvedPath = require.resolve(specifier, {
+            paths: resolveDir ? [resolveDir, __dirname] : [__dirname],
+          });
+        } else {
+          resolvedPath = path.isAbsolute(specifier)
+            ? specifier
+            : path.join(resolveDir, specifier);
+        }
+
+        return { path: resolvedPath, namespace: 'wasm-embedded' };
+      });
+    },
+  };
+
+  return [wasmBinaryPlugin, wasmLoader({ mode: 'embedded' })];
+}
+
 const external = [
   '@lydell/node-pty',
   'node-pty',
@@ -51,6 +83,7 @@ const cliConfig = {
   define: {
     'process.env.CLI_VERSION': JSON.stringify(pkg.version),
   },
+  plugins: createWasmPlugins(),
   alias: {
     'is-in-ci': path.resolve(__dirname, 'packages/cli/src/patches/is-in-ci.ts'),
   },
@@ -67,6 +100,7 @@ const a2aServerConfig = {
   define: {
     'process.env.CLI_VERSION': JSON.stringify(pkg.version),
   },
+  plugins: createWasmPlugins(),
 };
 
 Promise.allSettled([
 
@@ -8,7 +8,8 @@ import { describe, it, expect } from 'vitest';
 import { TestRig } from './test-helper.js';
 
 describe('Flicker Detector', () => {
-  it('should not detect a flicker under the max height budget', async () => {
+  // TODO: https://github.com/google-gemini/gemini-cli/issues/11170
+  it.skip('should not detect a flicker under the max height budget', async () => {
     const rig = new TestRig();
     await rig.setup('flicker-detector-test');
 
 
@@ -21,6 +21,46 @@ function getLineCountCommand(): { command: string; tool: string } {
   }
 }
 
+function getInvalidCommand(): string {
+  switch (shell) {
+    case 'powershell':
+      return `Get-ChildItem | | Select-Object`;
+    case 'cmd':
+      return `dir | | findstr foo`;
+    case 'bash':
+    default:
+      return `echo "hello" > > file`;
+  }
+}
+
+function getAllowedListCommand(): string {
+  switch (shell) {
+    case 'powershell':
+      return 'Get-ChildItem';
+    case 'cmd':
+      return 'dir';
+    case 'bash':
+    default:
+      return 'ls';
+  }
+}
+
+function getDisallowedFileReadCommand(testFile: string): {
+  command: string;
+  tool: string;
+} {
+  const quotedPath = `"${testFile}"`;
+  switch (shell) {
+    case 'powershell':
+      return { command: `Get-Content ${quotedPath}`, tool: 'Get-Content' };
+    case 'cmd':
+      return { command: `type ${quotedPath}`, tool: 'type' };
+    case 'bash':
+    default:
+      return { command: `cat ${quotedPath}`, tool: 'cat' };
+  }
+}
+
 describe('run_shell_command', () => {
   it('should be able to run a shell command', async () => {
     const rig = new TestRig();
@@ -102,8 +142,17 @@ describe('run_shell_command', () => {
     const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
 
     if (!foundToolCall) {
+      const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
+        name: toolRequest.name,
+        success: toolRequest.success,
+        args: toolRequest.args,
+      }));
       printDebugInfo(rig, result, {
         'Found tool call': foundToolCall,
+        'Allowed tools flag': `run_shell_command(${tool})`,
+        Prompt: prompt,
+        'Tool logs': toolLogs,
+        Result: result,
       });
     }
 
@@ -210,8 +259,17 @@ describe('run_shell_command', () => {
     const foundToolCall = await rig.waitForToolCall('run_shell_command', 15000);
 
     if (!foundToolCall) {
+      const toolLogs = rig.readToolLogs().map(({ toolRequest }) => ({
+        name: toolRequest.name,
+        success: toolRequest.success,
+        args: toolRequest.args,
+      }));
       printDebugInfo(rig, result, {
         'Found tool call': foundToolCall,
+        'Allowed tools flag': `ShellTool(${tool})`,
+        Prompt: prompt,
+        'Tool logs': toolLogs,
+        Result: result,
       });
     }
 
@@ -280,6 +338,73 @@ describe('run_shell_command', () => {
     }
   });
 
+  it('should reject commands not on the allowlist', async () => {
+    const rig = new TestRig();
+    await rig.setup('should reject commands not on the allowlist');
+
+    const testFile = rig.createFile('test.txt', 'Disallowed command check\n');
+    const allowedCommand = getAllowedListCommand();
+    const disallowed = getDisallowedFileReadCommand(testFile);
+    const prompt =
+      `I am testing the allowed tools configuration. ` +
+      `Attempt to run "${disallowed.command}" to read the contents of ${testFile}. ` +
+      `If the command fails because it is not permitted, respond with the single word FAIL. ` +
+      `If it succeeds, respond with SUCCESS.`;
+
+    const result = await rig.run(
+      {
+        stdin: prompt,
+        yolo: false,
+      },
+      `--allowed-tools=run_shell_command(${allowedCommand})`,
+    );
+
+    if (!result.toLowerCase().includes('fail')) {
+      printDebugInfo(rig, result, {
+        Result: result,
+        AllowedCommand: allowedCommand,
+        DisallowedCommand: disallowed.command,
+      });
+    }
+    expect(result).toContain('FAIL');
+
+    const foundToolCall = await rig.waitForToolCall(
+      'run_shell_command',
+      15000,
+      (args) => args.toLowerCase().includes(disallowed.tool.toLowerCase()),
+    );
+
+    if (!foundToolCall) {
+      printDebugInfo(rig, result, {
+        'Found tool call': foundToolCall,
+        ToolLogs: rig.readToolLogs(),
+      });
+    }
+    expect(foundToolCall).toBe(true);
+
+    const toolLogs = rig
+      .readToolLogs()
+      .filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
+    const failureLog = toolLogs.find((toolLog) =>
+      toolLog.toolRequest.args
+        .toLowerCase()
+        .includes(disallowed.tool.toLowerCase()),
+    );
+
+    if (!failureLog || failureLog.toolRequest.success) {
+      printDebugInfo(rig, result, {
+        ToolLogs: toolLogs,
+        DisallowedTool: disallowed.tool,
+      });
+    }
+
+    expect(
+      failureLog,
+      'Expected failing run_shell_command invocation',
+    ).toBeTruthy();
+    expect(failureLog!.toolRequest.success).toBe(false);
+  });
+
   it('should allow all with "ShellTool" and other specific tools', async () => {
     const rig = new TestRig();
     await rig.setup(
@@ -386,4 +511,53 @@ describe('run_shell_command', () => {
     validateModelOutput(result, fileName, 'Platform-specific listing test');
     expect(result).toContain(fileName);
   });
+
+  it('rejects invalid shell expressions', async () => {
+    const rig = new TestRig();
+    await rig.setup('rejects invalid shell expressions');
+    const invalidCommand = getInvalidCommand();
+    const result = await rig.run(
+      `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
+    );
+    expect(result).toContain('FAIL');
+
+    const escapedInvalidCommand = JSON.stringify(invalidCommand).slice(1, -1);
+    const foundToolCall = await rig.waitForToolCall(
+      'run_shell_command',
+      15000,
+      (args) =>
+        args.toLowerCase().includes(escapedInvalidCommand.toLowerCase()),
+    );
+
+    if (!foundToolCall) {
+      printDebugInfo(rig, result, {
+        'Found tool call': foundToolCall,
+        EscapedCommand: escapedInvalidCommand,
+        ToolLogs: rig.readToolLogs(),
+      });
+    }
+    expect(foundToolCall).toBe(true);
+
+    const toolLogs = rig
+      .readToolLogs()
+      .filter((toolLog) => toolLog.toolRequest.name === 'run_shell_command');
+    const failureLog = toolLogs.find((toolLog) =>
+      toolLog.toolRequest.args
+        .toLowerCase()
+        .includes(escapedInvalidCommand.toLowerCase()),
+    );
+
+    if (!failureLog || failureLog.toolRequest.success) {
+      printDebugInfo(rig, result, {
+        ToolLogs: toolLogs,
+        EscapedCommand: escapedInvalidCommand,
+      });
+    }
+
+    expect(
+      failureLog,
+      'Expected failing run_shell_command invocation for invalid syntax',
+    ).toBeTruthy();
+    expect(failureLog!.toolRequest.success).toBe(false);
+  });
 });