From ea83bc7c3b21ee0abd83305771ab08a433dc5f43 Mon Sep 17 00:00:00 2001 From: Ankit Shankar Date: Tue, 18 Feb 2025 20:40:25 -0700 Subject: [PATCH 1/2] Add support for reading paths from stdin Fixes #43 --- README.md | 28 +++++++++++++ files_to_prompt/cli.py | 30 +++++++++++++- tests/test_files_to_prompt.py | 74 +++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index eb336fe..d5e92fc 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,12 @@ This will output the contents of every file, with each file preceded by its rela ... ``` +- `-0/--null`: Use NUL character as separator when reading paths from stdin. Useful when filenames may contain spaces. + + ```bash + find . -name "*.py" -print0 | files-to-prompt --null + ``` + ### Example Suppose you have a directory structure like this: @@ -157,6 +163,28 @@ Contents of file2.txt --- ``` +### Reading from stdin + +The tool can also read paths from standard input. This can be used to pipe in the output of another command: + +```bash +# Find files modified in the last day +find . -mtime -1 | files-to-prompt +``` + +When using the `--null` (or `-0`) option, paths are expected to be NUL-separated (useful when dealing with filenames containing spaces): + +```bash +find . -name "*.txt" -print0 | files-to-prompt --null +``` + +You can mix and match paths from command line arguments and stdin: + +```bash +# Include files modified in the last day, and also include README.md +find . -mtime -1 | files-to-prompt README.md +``` + ### Claude XML Output Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index ad4a0e7..6623219 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -1,4 +1,5 @@ import os +import sys from fnmatch import fnmatch import click @@ -30,7 +31,7 @@ def add_line_numbers(content): padding = len(str(len(lines))) - numbered_lines = [f"{i+1:{padding}} {line}" for i, line in enumerate(lines)] + numbered_lines = [f"{i + 1:{padding}} {line}" for i, line in enumerate(lines)] return "\n".join(numbered_lines) @@ -132,6 +133,19 @@ def process_path( click.echo(click.style(warning_message, fg="red"), err=True) +def read_paths_from_stdin(use_null_separator): + if sys.stdin.isatty(): + # No ready input from stdin, don't block for input + return [] + + stdin_content = sys.stdin.read() + if use_null_separator: + paths = stdin_content.split("\0") + else: + paths = stdin_content.split() # split on whitespace + return [p for p in paths if p] + + @click.command() @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @click.option("extensions", "-e", "--extension", multiple=True) @@ -178,6 +192,12 @@ def process_path( is_flag=True, help="Add line numbers to the output", ) +@click.option( + "--null", + "-0", + is_flag=True, + help="Use NUL character as separator when reading from stdin", +) @click.version_option() def cli( paths, @@ -189,6 +209,7 @@ def cli( output_file, claude_xml, line_numbers, + null, ): """ Takes one or more paths to files or directories and outputs every file, @@ -219,6 +240,13 @@ def cli( # Reset global_index for pytest global global_index global_index = 1 + + # Read paths from stdin if available + stdin_paths = read_paths_from_stdin(use_null_separator=null) + + # Combine paths from arguments and stdin + paths = [*paths, *stdin_paths] + gitignore_rules = [] writer = click.echo fp = None diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 07dd53e..4574912 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -322,3 +322,77 @@ def test_line_numbers(tmpdir): assert "2 Second line" in result.output assert "3 Third line" in result.output assert "4 Fourth line" in result.output + + +def test_reading_paths_from_stdin(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + # Create test files + os.makedirs("test_dir1") + os.makedirs("test_dir2") + with open("test_dir1/file1.txt", "w") as f: + f.write("Contents of file1") + with open("test_dir2/file2.txt", "w") as f: + f.write("Contents of file2") + + # Test space-separated paths from stdin + result = runner.invoke(cli, input="test_dir1/file1.txt test_dir2/file2.txt") + assert result.exit_code == 0 + assert "test_dir1/file1.txt" in result.output + assert "Contents of file1" in result.output + assert "test_dir2/file2.txt" in result.output + assert "Contents of file2" in result.output + + # Test newline-separated paths from stdin + result = runner.invoke(cli, input="test_dir1/file1.txt\ntest_dir2/file2.txt") + assert result.exit_code == 0 + assert "test_dir1/file1.txt" in result.output + assert "Contents of file1" in result.output + assert "test_dir2/file2.txt" in result.output + assert "Contents of file2" in result.output + + +def test_reading_null_separated_paths(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + # Create test files + os.makedirs("test_dir1") + os.makedirs("test_dir2") + with open("test_dir1/file1.txt", "w") as f: + f.write("Contents of file1") + with open("test_dir2/file2.txt", "w") as f: + f.write("Contents of file2") + + # Test NUL-separated paths from stdin + result = runner.invoke( + cli, args=["--null"], input="test_dir1/file1.txt\0test_dir2/file2.txt" + ) + assert result.exit_code == 0 + assert "test_dir1/file1.txt" in result.output + assert "Contents of file1" in result.output + assert "test_dir2/file2.txt" in result.output + assert "Contents of file2" in result.output + + +def test_paths_from_arguments_and_stdin(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + # Create test files + os.makedirs("test_dir1") + os.makedirs("test_dir2") + with open("test_dir1/file1.txt", "w") as f: + f.write("Contents of file1") + with open("test_dir2/file2.txt", "w") as f: + f.write("Contents of file2") + + # Test paths from arguments and stdin + result = runner.invoke( + cli, + args=["test_dir1"], + input="test_dir2/file2.txt", + ) + assert result.exit_code == 0 + assert "test_dir1/file1.txt" in result.output + assert "Contents of file1" in result.output + assert "test_dir2/file2.txt" in result.output + assert "Contents of file2" in result.output From 7e3a62420a504d3445958450f4c4a19b224a1873 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 18 Feb 2025 20:50:10 -0800 Subject: [PATCH 2/2] Refactor tests a bit --- tests/test_files_to_prompt.py | 43 +++++++++-------------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 4574912..e133f54 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -324,7 +324,16 @@ def test_line_numbers(tmpdir): assert "4 Fourth line" in result.output -def test_reading_paths_from_stdin(tmpdir): +@pytest.mark.parametrize( + "input,extra_args", + ( + ("test_dir1/file1.txt\ntest_dir2/file2.txt", []), + ("test_dir1/file1.txt\ntest_dir2/file2.txt", []), + ("test_dir1/file1.txt\0test_dir2/file2.txt", ["--null"]), + ("test_dir1/file1.txt\0test_dir2/file2.txt", ["-0"]), + ), +) +def test_reading_paths_from_stdin(tmpdir, input, extra_args): runner = CliRunner() with tmpdir.as_cwd(): # Create test files @@ -336,37 +345,7 @@ def test_reading_paths_from_stdin(tmpdir): f.write("Contents of file2") # Test space-separated paths from stdin - result = runner.invoke(cli, input="test_dir1/file1.txt test_dir2/file2.txt") - assert result.exit_code == 0 - assert "test_dir1/file1.txt" in result.output - assert "Contents of file1" in result.output - assert "test_dir2/file2.txt" in result.output - assert "Contents of file2" in result.output - - # Test newline-separated paths from stdin - result = runner.invoke(cli, input="test_dir1/file1.txt\ntest_dir2/file2.txt") - assert result.exit_code == 0 - assert "test_dir1/file1.txt" in result.output - assert "Contents of file1" in result.output - assert "test_dir2/file2.txt" in result.output - assert "Contents of file2" in result.output - - -def test_reading_null_separated_paths(tmpdir): - runner = CliRunner() - with tmpdir.as_cwd(): - # Create test files - os.makedirs("test_dir1") - os.makedirs("test_dir2") - with open("test_dir1/file1.txt", "w") as f: - f.write("Contents of file1") - with open("test_dir2/file2.txt", "w") as f: - f.write("Contents of file2") - - # Test NUL-separated paths from stdin - result = runner.invoke( - cli, args=["--null"], input="test_dir1/file1.txt\0test_dir2/file2.txt" - ) + result = runner.invoke(cli, args=extra_args, input=input) assert result.exit_code == 0 assert "test_dir1/file1.txt" in result.output assert "Contents of file1" in result.output