|
| 1 | +""" |
| 2 | +Uncomment `# !pip ...` lines in Jupyter notebooks so they become `!pip ...`. |
| 3 | +
|
| 4 | +- Operates only on code cells (does not touch outputs/metadata/markdown). |
| 5 | +- Matches lines that start with optional whitespace, then `# !pip` (e.g., " # !pip install ..."). |
| 6 | +- Rewrites to keep the original indentation and replace the leading "# !pip" with "!pip". |
| 7 | +- Processes one or more paths (files or directories) given as CLI args, recursively for directories. |
| 8 | +""" |
| 9 | + |
| 10 | +from __future__ import annotations |
| 11 | + |
| 12 | +import argparse |
| 13 | +import re |
| 14 | +import sys |
| 15 | +from pathlib import Path |
| 16 | + |
| 17 | +import nbformat |
| 18 | + |
| 19 | +# Regex: beginning-of-line, capture leading whitespace, then "#", spaces, then "!pip" |
| 20 | +_PATTERN = re.compile(r'^(\s*)#\s*!pip\b') |
| 21 | + |
| 22 | + |
| 23 | +def fix_cell_source(src: str) -> tuple[str, int]: |
| 24 | + """ |
| 25 | + Replace lines starting with optional whitespace + '# !pip' with '!pip'. |
| 26 | + Returns the updated source and number of replacements performed. |
| 27 | + """ |
| 28 | + changed = 0 |
| 29 | + new_lines: list[str] = [] |
| 30 | + for line in src.splitlines(keepends=False): |
| 31 | + m = _PATTERN.match(line) |
| 32 | + if m: |
| 33 | + # Replace only the first '# !pip' at the beginning, preserve the rest of the line |
| 34 | + # e.g., " # !pip install foo" -> " !pip install foo" |
| 35 | + new_line = _PATTERN.sub(r'\1!pip', line, count=1) |
| 36 | + if new_line != line: |
| 37 | + changed += 1 |
| 38 | + new_lines.append(new_line) |
| 39 | + else: |
| 40 | + new_lines.append(line) |
| 41 | + return ('\n'.join(new_lines), changed) |
| 42 | + |
| 43 | + |
| 44 | +def process_notebook(path: Path) -> int: |
| 45 | + """ |
| 46 | + Process a single .ipynb file. Returns number of lines changed. |
| 47 | + """ |
| 48 | + nb = nbformat.read(path, as_version=4) |
| 49 | + total_changes = 0 |
| 50 | + for cell in nb.cells: |
| 51 | + if cell.cell_type != 'code': |
| 52 | + continue |
| 53 | + new_src, changes = fix_cell_source(cell.source or '') |
| 54 | + if changes: |
| 55 | + cell.source = new_src |
| 56 | + total_changes += changes |
| 57 | + if total_changes: |
| 58 | + nbformat.write(nb, path) |
| 59 | + return total_changes |
| 60 | + |
| 61 | + |
| 62 | +def iter_notebooks(paths: list[Path]): |
| 63 | + for p in paths: |
| 64 | + if p.is_dir(): |
| 65 | + yield from (q for q in p.rglob('*.ipynb') if q.is_file()) |
| 66 | + elif p.is_file() and p.suffix == '.ipynb': |
| 67 | + yield p |
| 68 | + |
| 69 | + |
| 70 | +def main(argv: list[str]) -> int: |
| 71 | + ap = argparse.ArgumentParser(description="Uncomment '# !pip ...' to '!pip ...' in code cells of .ipynb notebooks.") |
| 72 | + ap.add_argument('paths', nargs='+', help='Notebook files or directories to process') |
| 73 | + ap.add_argument('--dry-run', action='store_true', help='Report changes without writing files') |
| 74 | + args = ap.parse_args(argv) |
| 75 | + |
| 76 | + targets = list(iter_notebooks([Path(p) for p in args.paths])) |
| 77 | + if not targets: |
| 78 | + print('No .ipynb files found.', file=sys.stderr) |
| 79 | + return 1 |
| 80 | + |
| 81 | + total_files = 0 |
| 82 | + total_changes = 0 |
| 83 | + for nb_path in targets: |
| 84 | + changes = process_notebook(nb_path) if not args.dry_run else 0 |
| 85 | + if args.dry_run: |
| 86 | + # For dry-run, compute changes without writing |
| 87 | + nb = nbformat.read(nb_path, as_version=4) |
| 88 | + changes = 0 |
| 89 | + for cell in nb.cells: |
| 90 | + if cell.cell_type != 'code': |
| 91 | + continue |
| 92 | + _, c = fix_cell_source(cell.source or '') |
| 93 | + changes += c |
| 94 | + if changes: |
| 95 | + action = 'UPDATED' if not args.dry_run else 'WOULD UPDATE' |
| 96 | + print(f'{action}: {nb_path} ({changes} line(s))') |
| 97 | + total_files += 1 |
| 98 | + total_changes += changes |
| 99 | + |
| 100 | + if total_files == 0: |
| 101 | + print('No changes needed.') |
| 102 | + else: |
| 103 | + print(f'Done. Files changed: {total_files}, lines changed: {total_changes}') |
| 104 | + return 0 |
| 105 | + |
| 106 | + |
| 107 | +if __name__ == '__main__': |
| 108 | + raise SystemExit(main(sys.argv[1:])) |
0 commit comments