diff --git a/docs/designs/multi_trace_repl.md b/docs/designs/multi_trace_repl.md new file mode 100644 index 0000000..75348c1 --- /dev/null +++ b/docs/designs/multi_trace_repl.md @@ -0,0 +1,213 @@ +# Multi Trace Open Command + +This is the design doc for extending the `torq open` command +to support a list of trace files or a directory path containing +multiple trace files. + +When `torq open` is executed with multiple traces, it should +verify the traces are there and should proceed to launch an interactive +query REPL, where the CLI user can write a PerfettoSQL query and the +output of the query will be printed to the screen. + + +## BatchTraceProcessor + +The tool to execute a query on multiple traces will be Perfetto's +BatchTraceProcessor. + +Batch Trace Processor is part of the perfetto Python library and can be installed by running: + +```bash +pip3 install pandas # prerequisite for Batch Trace Processor +pip3 install perfetto +``` + +**Note:** As this project uses Bazel, ensure dependencies are added to `MODULE.bazel` or the appropriate build files. + +To load traces, the simplest way to load traces in is by passing a list of file paths to load: + +```python +from perfetto.batch_trace_processor.api import BatchTraceProcessor + +files = [ + 'traces/slow-start.pftrace', + 'traces/oom.pftrace', + 'traces/high-battery-drain.pftrace', +] +with BatchTraceProcessor(files) as btp: + btp.query('...') +``` + +glob can be used to load all traces in a directory: + +```python +from perfetto.batch_trace_processor.api import BatchTraceProcessor + +files = glob.glob('traces/*.pftrace') +with BatchTraceProcessor(files) as btp: + btp.query('...') +``` + +Writing queries with batch trace processor works very similarly to the Python API. + +For example, to get a count of the number of userspace slices: + +``` +>>> btp.query('select count(1) from slice') +[ count(1) +0 2092592, count(1) +0 156071, count(1) +0 121431] +``` +The return value of query is a list of Pandas dataframes, one for each trace loaded. + +A common requirement is for all of the traces to be flattened into a single dataframe instead of getting one dataframe per-trace. To support this, the query_and_flatten function can be used: + +``` +>>> btp.query_and_flatten('select count(1) from slice') + count(1) +0 2092592 +1 156071 +2 121431 +``` + +query_and_flatten also implicitly adds columns indicating the originating trace. The exact columns added depend on the resolver being used: consult your resolver's documentation for more information. + +## Torq open command + +The `torq open` command will be updated to accept one or more file paths or directory paths. + +**Usage:** + +```bash +torq open [file_path [file_path ...]] +``` + +### Supported Arguments + +- `file_path`: (Required) One or more paths to trace files or directories containing trace files. + - **Single Trace Found:** If the collection of traces (from all provided files and directories) contains **exactly one** file, Torq opens it in the Perfetto UI (standard behavior). + - **Multiple Traces Found:** If the collection contains **more than one** file, Torq launches the interactive BatchTraceProcessor REPL. + +### Trace Collection Logic + +Torq will iterate through all provided `file_path` arguments to collect a list of valid trace files (e.g., `.pftrace`, `.perfetto-trace`): +1. **Direct Files:** If an argument is a file, it is added to the collection. +2. **Directories:** If an argument is a directory, Torq scans it non-recursively for valid trace files and adds all found files to the collection. + +The final execution mode is determined by the total number of traces collected: +- **1 trace:** Opens in Perfetto UI. +- **> 1 trace:** Launches interactive BTP REPL. +- **0 traces:** Returns a validation error. + +### Interactive REPL Mode + +When multiple traces are detected, Torq transitions into an interactive SQL environment powered by `BatchTraceProcessor`. + +#### Behavior +- **Trace Loading:** Displays a loading message indicating the number of traces being processed. +- **Prompt:** Provides a persistent `> ` prompt for user input. +- **Query Execution:** Executes the provided PerfettoSQL query across all loaded traces. +- **Output:** Prints the results as a unified table. By default, it uses `query_and_flatten` to provide a consolidated view with trace identification columns. +- **Session Control:** Users can exit the REPL using `exit`, `quit`, `Ctrl-D`, or `Ctrl-C`. + +#### Internal Implementation +1. **Initialization:** The REPL will be implemented as a specialized command executor. It will initialize a `BatchTraceProcessor` instance with the collected file list. +2. **Input Loop:** A `while` loop will capture user input. For improved UX, the `readline` module should be used to support command history and basic line editing. Catching `KeyboardInterrupt` (`Ctrl-C`) or `EOFError` (`Ctrl-D`) will gracefully exit the session. +3. **Command Parsing:** Basic checks for "exit" or "quit" commands before treating the input as a SQL query. +4. **Error Handling:** SQL syntax errors or execution failures within `btp.query_and_flatten` will be caught and printed to `stderr` without crashing the REPL session. + +#### Example Session +```bash +$ torq open ./slow_run_traces/ +Loading 3 traces into BatchTraceProcessor... +[torq REPL] - Type 'quit' to exit. +> select count(1) as total_slices from slice + total_slices +0 2092592 +1 156071 +2 121431 +> select name, dur from slice order by dur desc limit 2 + name dur +0 actual_frame_timeline 105672344 +1 actual_frame_timeline 98234122 +> quit +Exiting torq REPL. +``` + +### Rejected Commands & Error Handling + +To ensure a robust developer experience, the following cases will be rejected with clear error messages and suggestions: + +**1. Invalid file or directory path:** +```bash +torq open non_existent_path +``` +*Error:* `Command is invalid because 'non_existent_path' is not a valid file or directory path.` +*Suggestion:* `Make sure the path exists.` + +**2. Mixed valid and invalid paths:** +```bash +torq open trace1.pftrace non_existent_path +``` +*Error:* `Command is invalid because 'non_existent_path' is not a valid file or directory path.` +*Suggestion:* `Make sure the path exists.` +*Rationale:* Fails fast to ensure the user is aware of the missing data before starting a analysis session. + +**3. Empty directory or directory with no valid traces:** +```bash +torq open ./empty_dir +``` +*Error:* `Command is invalid because no valid trace files were found in the provided paths.` +*Suggestion:* `Make sure the provided paths contain at least one valid trace file (e.g. .pftrace, .perfetto-trace).` + +**4. Unsupported file types:** +```bash +torq open image.png +``` +*Error:* `Command is invalid because 'image.png' is not a supported trace file format.` +*Suggestion:* `Provide a path to a supported trace file format (e.g. .pftrace, .perfetto-trace).` + +**5. No arguments provided:** +```bash +torq open +``` +*Error:* `torq open: error: the following arguments are required: file_path` (Standard argparse error). + +## Project milestones + +The implementation of the multi-trace open command will be executed in the following order: + +### 1. Dependency Management +- **Objective:** Ensure `pandas` and `perfetto` Python libraries are available in the Torq environment via Bazel. +- **Tasks:** + - Update `MODULE.bazel` (or equivalent build files) to include `pandas` and `perfetto` as dependencies. + - Verify the dependencies are correctly fetched and importable in the Torq source code. + +### 2. Argument Parsing & Validation +- **Objective:** Update the `torq open` command to accept multiple arguments and enforce validation rules. +- **Tasks:** + - Modify `add_open_parser` in `src/open.py` to change `file_path` `nargs` to `+` (one or more). + - Update `verify_open_args` to iterate through all provided paths. + - Implement validation logic to reject invalid paths, empty directories, and unsupported file types with the specified error messages. + - Add unit tests for all valid and invalid argument scenarios. + +### 3. Trace Collection Logic +- **Objective:** Implement the logic to scan directories and collect all valid trace files. +- **Tasks:** + - Create a helper function (e.g., `collect_traces`) that takes the list of input paths. + - Implement the logic to distinguish between files and directories. + - Implement non-recursive directory scanning for `.pftrace` and `.perfetto-trace` files. + - Return a consolidated list of unique file paths. + - Unit test the collection logic with various file/directory combinations. + +### 4. Interactive REPL Implementation +- **Objective:** Build the interactive SQL shell powered by `BatchTraceProcessor`. +- **Tasks:** + - Create a new class or function (e.g., `BatchTraceRepl`) to handle the interactive session. + - Initialize `BatchTraceProcessor` with the collected trace files. + - Implement the `while` loop for user input, integrating `readline` for history support. + - Implement the "exit", "quit", "Ctrl-D", and "Ctrl-C" handling. + - Execute queries using `btp.query_and_flatten()` and print the results using Pandas' string formatting. + - Implement error handling for SQL errors to prevent crashing the REPL. + - Integrate this new mode into `execute_open_command` in `src/open.py`, triggering it only when multiple traces are collected. diff --git a/src/open.py b/src/open.py index 5f9ca09..9e96b4d 100644 --- a/src/open.py +++ b/src/open.py @@ -14,9 +14,11 @@ # limitations under the License. # +import os +import readline +from perfetto.batch_trace_processor.api import BatchTraceProcessor from .base import ValidationError from .open_ui_utils import open_trace, WEB_UI_ADDRESS -from .utils import path_exists def add_open_parser(subparsers): @@ -25,7 +27,8 @@ def add_open_parser(subparsers): help=('The open subcommand is used ' 'to open trace files in the ' 'perfetto ui.')) - open_parser.add_argument('file_path', help='Path to trace file.') + open_parser.add_argument( + 'file_path', nargs='+', help='Path to trace file or directory.') open_parser.add_argument( '--use_trace_processor', default=False, @@ -35,13 +38,95 @@ def add_open_parser(subparsers): def verify_open_args(args): - if not path_exists(args.file_path): + valid_extensions = ('.pftrace', '.perfetto-trace') + valid_files = [] + # When running with bazel run, the current working directory is changed to + # the bazel sandbox. To correctly resolve relative paths, we need to use + # the BUILD_WORKING_DIRECTORY environment variable. + working_dir = os.environ.get("BUILD_WORKING_DIRECTORY", os.getcwd()) + + for path in args.file_path: + full_path = path + if not os.path.isabs(path): + full_path = os.path.join(working_dir, path) + full_path = os.path.expanduser(full_path) + + if not os.path.exists(full_path): + return None, ValidationError( + f"Command is invalid because '{path}' is not a valid file or directory path.", + "Make sure the path exists.") + + if os.path.isfile(full_path): + if not full_path.endswith(valid_extensions): + return None, ValidationError( + f"Command is invalid because '{os.path.basename(path)}' is not a supported trace file format.", + "Provide a path to a supported trace file format (e.g. .pftrace, .perfetto-trace)." + ) + valid_files.append(full_path) + elif os.path.isdir(full_path): + for file in os.listdir(full_path): + if file.endswith(valid_extensions): + valid_files.append(os.path.join(full_path, file)) + + if not valid_files: return None, ValidationError( - "Command is invalid because %s is an invalid file path." % - args.file_path, "Make sure your file exists.") + "Command is invalid because no valid trace files were found in the provided paths.", + "Make sure the provided paths contain at least one valid trace file (e.g. .pftrace, .perfetto-trace)." + ) + args.file_path = sorted(list(set(valid_files))) return args, None +def execute_repl(traces): + print(f"Loading {len(traces)} traces into torq...") + try: + with BatchTraceProcessor(traces) as btp: + print("[torq REPL] - Type 'quit' to exit.") + print("[torq REPL] - Separate lines with Enter (or Shift+Enter).") + print("[torq REPL] - Press Enter on an empty line to execute.") + query_lines = [] + while True: + try: + prompt = "> " if not query_lines else ".. " + line = input(prompt) + + if line.strip().lower() in ("exit", "quit"): + if not query_lines: + break + else: + print( + "Discarding current query buffer. Type 'quit' again to exit.") + query_lines = [] + continue + + if not line.strip(): + if query_lines: + full_query = "\n".join(query_lines) + df = btp.query_and_flatten(full_query) + print(df.to_string()) + query_lines = [] + continue + + query_lines.append(line) + except KeyboardInterrupt: + print() # Move to next line + query_lines = [] + continue + except EOFError: + break + except Exception as e: + print(f"Error executing query: {e}") + query_lines = [] + except Exception as e: + print(f"Failed to initialize the torq REPL: {e}") + print("Exiting") + + def execute_open_command(args, device): - return open_trace(args.file_path, WEB_UI_ADDRESS, args.use_trace_processor) + if len(args.file_path) == 1: + return open_trace(args.file_path[0], WEB_UI_ADDRESS, + args.use_trace_processor) + else: + execute_repl(args.file_path) + return None diff --git a/tests/open_ui_unit_test.py b/tests/open_ui_unit_test.py index 53e691a..372dde8 100644 --- a/tests/open_ui_unit_test.py +++ b/tests/open_ui_unit_test.py @@ -25,7 +25,7 @@ import webbrowser from unittest import mock from src.open_ui_utils import download_trace_processor, open_trace -from tests.test_utils import generate_mock_completed_process +from tests.test_utils import generate_mock_completed_process, run_cli ANDROID_BUILD_TOP = "/main" TEST_FILE = "file.txtpb" @@ -275,6 +275,189 @@ def test_download_trace_processor_temp_scripts_exist(self, mock_getsize, self.assertEqual(trace_processor_path, TORQ_TEMP_TRACE_PROCESSOR) self.assertEqual(terminal_output.getvalue(), "") + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_quit(self, mock_btp, mock_input, mock_exists, + mock_isfile): + mock_input.return_value = "quit" + mock_btp_instance = mock_btp.return_value.__enter__.return_value + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + mock_btp.assert_called_with( + [os.path.abspath("trace1.pftrace"), + os.path.abspath("trace2.pftrace")]) + self.assertIn("Loading 2 traces into torq...", terminal_output.getvalue()) + self.assertIn("[torq REPL] - Type 'quit' to exit.", + terminal_output.getvalue()) + self.assertIn("Exiting", terminal_output.getvalue()) + mock_btp_instance.query_and_flatten.assert_not_called() + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_exit(self, mock_btp, mock_input, mock_exists, + mock_isfile): + mock_input.return_value = "exit" + mock_btp_instance = mock_btp.return_value.__enter__.return_value + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + # Needs two distinct valid trace files to enter REPL + run_cli("torq open trace1.pftrace trace2.pftrace") + + self.assertIn("Exiting", terminal_output.getvalue()) + mock_btp_instance.query_and_flatten.assert_not_called() + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_query_and_quit(self, mock_btp, mock_input, mock_exists, + mock_isfile): + mock_input.side_effect = ["select * from slice;", "", "quit"] + mock_btp_instance = mock_btp.return_value.__enter__.return_value + mock_df = mock.Mock() + mock_df.to_string.return_value = "mock_dataframe_output" + mock_btp_instance.query_and_flatten.return_value = mock_df + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + mock_btp_instance.query_and_flatten.assert_called_with( + "select * from slice;") + self.assertIn("mock_dataframe_output", terminal_output.getvalue()) + self.assertIn("Exiting", terminal_output.getvalue()) + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_multi_line_query(self, mock_btp, mock_input, + mock_exists, mock_isfile): + mock_input.side_effect = ["select *", "from slice;", "", "quit"] + mock_btp_instance = mock_btp.return_value.__enter__.return_value + mock_df = mock.Mock() + mock_df.to_string.return_value = "multi_line_output" + mock_btp_instance.query_and_flatten.return_value = mock_df + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + mock_btp_instance.query_and_flatten.assert_called_with( + "select *\nfrom slice;") + self.assertIn("multi_line_output", terminal_output.getvalue()) + self.assertIn("Exiting", terminal_output.getvalue()) + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_empty_query_ignored(self, mock_btp, mock_input, + mock_exists, mock_isfile): + mock_input.side_effect = ["", " ", "quit"] + mock_btp_instance = mock_btp.return_value.__enter__.return_value + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + mock_btp_instance.query_and_flatten.assert_not_called() + self.assertIn("Exiting", terminal_output.getvalue()) + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_sql_error_handled(self, mock_btp, mock_input, + mock_exists, mock_isfile): + mock_input.side_effect = ["bad query;", "", "quit"] + mock_btp_instance = mock_btp.return_value.__enter__.return_value + mock_btp_instance.query_and_flatten.side_effect = Exception("Syntax error") + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + self.assertIn("Error executing query: Syntax error", + terminal_output.getvalue()) + self.assertIn("Exiting", terminal_output.getvalue()) + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_keyboard_interrupt_handled(self, mock_btp, mock_input, + mock_exists, mock_isfile): + mock_input.side_effect = [KeyboardInterrupt(), "quit"] + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + self.assertIn("Exiting", terminal_output.getvalue()) + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch.object(builtins, "input") + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_eof_error_handled(self, mock_btp, mock_input, + mock_exists, mock_isfile): + mock_input.side_effect = EOFError() + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + self.assertIn("Exiting", terminal_output.getvalue()) + + @mock.patch.object(os.path, "isfile", autospec=True) + @mock.patch.object(os.path, "exists", autospec=True) + @mock.patch("src.open.BatchTraceProcessor") + def test_execute_repl_initialization_failure(self, mock_btp, mock_exists, + mock_isfile): + mock_btp.side_effect = Exception("Failed to load trace") + mock_exists.return_value = True + mock_isfile.return_value = True + + terminal_output = io.StringIO() + sys.stdout = terminal_output + + run_cli("torq open trace1.pftrace trace2.pftrace") + + self.assertIn("Failed to initialize the torq REPL: Failed to load trace", + terminal_output.getvalue()) + self.assertIn("Exiting", terminal_output.getvalue()) + if __name__ == '__main__': unittest.main() diff --git a/tests/torq_unit_test.py b/tests/torq_unit_test.py index 0e6f157..9c5b13f 100644 --- a/tests/torq_unit_test.py +++ b/tests/torq_unit_test.py @@ -1125,14 +1125,17 @@ def test_verify_args_config_invalid_trigger_timeout_ms( (MIN_DURATION_MS, (MIN_DURATION_MS / 1000)))) @mock.patch.object(os.path, "exists", autospec=True) - def test_create_parser_valid_open_subcommand(self, mock_exists): + @mock.patch.object(os.path, "isfile", autospec=True) + def test_create_parser_valid_open_subcommand(self, mock_isfile, mock_exists): mock_exists.return_value = True - args = parse_cli("torq open %s" % TEST_FILE) + mock_isfile.return_value = True + test_trace_file = TEST_FILE + ".pftrace" + args = parse_cli("torq open %s" % test_trace_file) args, error = verify_args(args) self.assertEqual(error, None) - self.assertEqual(args.file_path, TEST_FILE) + self.assertEqual(args.file_path, [os.path.abspath(test_trace_file)]) def test_create_parser_open_subcommand_no_file(self): parser, error = create_parser_from_cli("torq open") @@ -1149,9 +1152,9 @@ def test_create_parser_open_subcommand_invalid_file(self, mock_exists): args, error = verify_args(args) self.assertEqual( - error.message, "Command is invalid because %s is an " - "invalid file path." % TEST_FILE) - self.assertEqual(error.suggestion, "Make sure your file exists.") + error.message, f"Command is invalid because '{TEST_FILE}' is not a " + "valid file or directory path.") + self.assertEqual(error.suggestion, "Make sure the path exists.") if __name__ == '__main__':