plasma-umass · Copilot · Aug 23, 2025 · Aug 23, 2025 · Aug 23, 2025 · Aug 23, 2025
diff --git a/scalene/scalene_code_executor.py b/scalene/scalene_code_executor.py
@@ -0,0 +1,259 @@
+"""
+Code execution and tracing functionality for Scalene profiler.
+
+This module extracts code execution and tracing functionality from the main Scalene class
+to improve code organization and reduce complexity.
+"""
+
+import functools
+import os
+import pathlib
@@ -7,7 +7,6 @@
 import functools
 import os
-import pathlib
 import re
 import sys
 import traceback
@@ -7,7 +7,6 @@

 import functools
 import os
-import pathlib
 import re
 import sys
 import traceback
+import re
+import sys
+import traceback
+from typing import Any, Dict, List, Optional, Set
@@ -11,7 +11,7 @@
 import re
 import sys
 import traceback
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Set
 from scalene.scalene_statistics import Filename, LineNumber
 from scalene.scalene_utility import generate_html
@@ -11,7 +11,7 @@
 import re
 import sys
 import traceback
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Set

 from scalene.scalene_statistics import Filename, LineNumber
 from scalene.scalene_utility import generate_html
+
+from scalene.scalene_statistics import Filename, LineNumber
+from scalene.scalene_utility import generate_html
+from scalene import launchbrowser
+
+
+class ScaleneCodeExecutor:
+    """Handles code execution and tracing for Scalene."""
+
+    def __init__(self, args, files_to_profile: Set[Filename], 
+                 functions_to_profile: Dict[Filename, Set[Any]],
+                 program_being_profiled: Filename, 
+                 program_path: Filename,
+                 entrypoint_dir: Filename):
+        """Initialize the code executor."""
+        self.__args = args
+        self.__files_to_profile = files_to_profile
+        self.__functions_to_profile = functions_to_profile
+        self.__program_being_profiled = program_being_profiled
+        self.__program_path = program_path
+        self.__entrypoint_dir = entrypoint_dir
+        self.__error_message = "Error in program being profiled"
+
+    def profile_code(
+        self,
+        code: str,
+        the_globals: Dict[str, str],
+        the_locals: Dict[str, str],
+        left: List[str],
+        start_func,
+        stop_func,
+        output_profile_func,
+        stats,
+        last_profiled_tuple_func,
+    ) -> int:
+        """Initiate execution and profiling."""
+        if self.__args.memory:
+            from scalene import pywhere  # type: ignore
+
+            pywhere.populate_struct()
+        # If --off is set, tell all children to not profile and stop profiling before we even start.
+        if "off" not in self.__args or not self.__args.off:
+            start_func()
+        # Run the code being profiled.
+        exit_status = 0
+        try:
+            exec(code, the_globals, the_locals)
+        except SystemExit as se:
+            # Intercept sys.exit and propagate the error code.
+            exit_status = se.code if isinstance(se.code, int) else 1
+        except KeyboardInterrupt:
+            # Cleanly handle keyboard interrupts (quits execution and dumps the profile).
+            print("Scalene execution interrupted.", file=sys.stderr)
+        except Exception as e:
+            print(f"{self.__error_message}:\n", e, file=sys.stderr)
+            traceback.print_exc()
+            exit_status = 1
+
+        finally:
+            stop_func()
+            if self.__args.memory:
+                pywhere.disable_settrace()
+                pywhere.depopulate_struct()
+
+        # Leaving here in case of reversion
+        # sys.settrace(None)
+        (last_file, last_line, _) = last_profiled_tuple_func()
+        stats.memory_stats.memory_malloc_count[last_file][last_line] += 1
+        stats.memory_stats.memory_aggregate_footprint[last_file][
+            last_line
+        ] += stats.memory_stats.memory_current_highwater_mark[last_file][last_line]
+        # If we've collected any samples, dump them.
+        did_output = output_profile_func(left)
+        if not did_output:
+            print(
+                "Scalene: The specified code did not run for long enough to profile.",
+                file=sys.stderr,
+            )
+            # Print out hints to explain why the above message may have been printed.
+            if not self.__args.profile_all:
+                print(
+                    "To track the time spent in all files, use the `--profile-all` option.",
+                    file=sys.stderr,
+                )
+            elif self.__args.profile_only or self.__args.profile_exclude:
+                # if --profile-only or --profile-exclude were
+                # specified, suggest that the patterns might be
+                # excluding too many files. Collecting the
+                # previously filtered out files could allow
+                # suggested fixes (as in, remove foo because it
+                # matches too many files).
+                print(
+                    "The patterns used in `--profile-only` or `--profile-exclude` may be filtering out too many files.",
+                    file=sys.stderr,
+                )
+            else:
+                # if none of the above cases hold, indicate that
+                # Scalene can only profile code that runs for at
+                # least one second or allocates some threshold
+                # amount of memory.
+                print(
+                    "Scalene can only profile code that runs for at least one second or allocates at least 10MB.",
+                    file=sys.stderr,
+                )
+
+            if not (
+                did_output
+                and self.__args.web
+                and not self.__args.cli
+                and not self.__args.is_child
+            ):
+                return exit_status
+
+        assert did_output
+        if self.__args.web or self.__args.html:
+            profile_filename = self.__args.profile_filename
+            if self.__args.outfile:
+                profile_filename = Filename(
+                    os.path.join(
+                        os.path.dirname(self.__args.outfile),
+                        os.path.basename(profile_filename),
+                    )
+                )
+            # Generate HTML file
+            # (will also generate a JSON file to be consumed by the HTML)
+            html_output = generate_html(
+                profile_filename,
+                self.__args,
+                stats,
+                profile_metadata={},
+                program_args=left,
+            )
@@ -141,7 +141,6 @@
                profile_filename,
                self.__args,
                stats,
-                profile_metadata={},
                program_args=left,
            )
@@ -137,7 +137,7 @@
                )
            # Generate HTML file
            # (will also generate a JSON file to be consumed by the HTML)
-            html_output = generate_html(
+            generate_html(
                profile_filename,
                self.__args,
                stats,
@@ -146,7 +146,7 @@
            )
            if self.__args.web and not self.__args.cli and not self.__args.is_child:
-                launchbrowser.launch_browser(html_output)
+                launchbrowser.launch_browser(profile_filename)
        return exit_status
@@ -141,7 +141,6 @@
                profile_filename,
                self.__args,
                stats,
-                profile_metadata={},
                program_args=left,
            )

@@ -137,7 +137,7 @@
                )
            # Generate HTML file
            # (will also generate a JSON file to be consumed by the HTML)
-            html_output = generate_html(
+            generate_html(
                profile_filename,
                self.__args,
                stats,
@@ -146,7 +146,7 @@
            )

            if self.__args.web and not self.__args.cli and not self.__args.is_child:
-                launchbrowser.launch_browser(html_output)
+                launchbrowser.launch_browser(profile_filename)

        return exit_status

+
+            if self.__args.web and not self.__args.cli and not self.__args.is_child:
+                launchbrowser.launch_browser(html_output)
+
+        return exit_status
+
+    @staticmethod
+    @functools.cache
+    def should_trace(filename: Filename, func: str) -> bool:
+        """Return true if we should trace this filename and function."""
+        # Profile everything in a Jupyter notebook cell.
+        if re.match(r"<ipython-input-\d+-.*>", filename):
+            return True
+
+        if ScaleneCodeExecutor._should_trace_decorated_function(filename, func):
+            return True
+
+        if not ScaleneCodeExecutor._passes_exclusion_rules(filename):
+            return False
+
+        if ScaleneCodeExecutor._handle_jupyter_cell(filename):
+            return True
+
+        if not ScaleneCodeExecutor._passes_profile_only_rules(filename):
+            return False
+
+        return ScaleneCodeExecutor._should_trace_by_location(filename)
+
+    @staticmethod
+    def _should_trace_decorated_function(filename: Filename, func: str) -> bool:
+        """Check if this function is decorated with @profile."""
+        # Import here to avoid circular imports
+        from scalene.scalene_profiler import Scalene
+        if filename in Scalene._Scalene__files_to_profile:
+            # If we have specified to profile functions in this file,
+            # check if this function is one of them.
+            return func in Scalene._Scalene__functions_to_profile[filename]
+        return False
+
+    @staticmethod
+    def _passes_exclusion_rules(filename: Filename) -> bool:
+        """Check if filename passes exclusion rules (libraries, exclude patterns)."""
+        # Import here to avoid circular imports
+        from scalene.scalene_profiler import Scalene
+        args = Scalene._Scalene__args
+
+        # Don't profile Scalene itself.
+        if "scalene" in filename:
+            return False
+
+        # Don't profile Python builtins/standard library.
+        try:
+            if not args.profile_all:
+                if (
+                    ("python" in filename)
+                    or ("site-packages" in filename)
+                    or ("<built-in>" in filename)
+                    or ("<frozen" in filename)
+                ):
+                    return False
+        except BaseException:
@@ -204,7 +204,7 @@
                    or ("<frozen" in filename)
                ):
                    return False
-        except BaseException:
+        except Exception:
            return False
        # Handle --profile-exclude patterns
@@ -204,7 +204,7 @@
                    or ("<frozen" in filename)
                ):
                    return False
-        except BaseException:
+        except Exception:
            return False

        # Handle --profile-exclude patterns
+            return False
+
+        # Handle --profile-exclude patterns
+        if args.profile_exclude:
+            for pattern in args.profile_exclude:
+                if re.search(pattern, filename):
+                    return False
+
+        return True
+
+    @staticmethod
+    def _handle_jupyter_cell(filename: Filename) -> bool:
+        """Handle special Jupyter cell profiling."""
+        # Check for Jupyter cells
+        if "<stdin>" in filename:
+            return True
+
+        # Profile everything in a Jupyter notebook cell.
+        if re.match(r"<ipython-input-\d+-.*>", filename):
+            return True
+
+        return False
+
+    @staticmethod
+    def _passes_profile_only_rules(filename: Filename) -> bool:
+        """Check if filename passes profile-only patterns."""
+        from scalene.scalene_profiler import Scalene
+        args = Scalene._Scalene__args
+
+        if args.profile_only:
+            for pattern in args.profile_only:
+                if re.search(pattern, filename):
+                    return True
+            return False
+        return True
+
+    @staticmethod
+    def _should_trace_by_location(filename: Filename) -> bool:
+        """Determine if we should trace based on file location."""
+        from scalene.scalene_profiler import Scalene
+
+        # Check if the file is in our program's directory or a subdirectory.
+        filename_abs = os.path.abspath(filename)
+        program_path = os.path.abspath(Scalene._Scalene__program_path)
+        entrypoint_dir = os.path.abspath(Scalene._Scalene__entrypoint_dir)
+
+        return (
+            filename_abs.startswith(program_path)
+            or filename_abs.startswith(entrypoint_dir)
+            or os.path.commonpath([filename_abs, program_path]) == program_path
+            or os.path.commonpath([filename_abs, entrypoint_dir]) == entrypoint_dir
+        )
diff --git a/scalene/scalene_cpu_profiler.py b/scalene/scalene_cpu_profiler.py
@@ -0,0 +1,131 @@
+"""
+CPU profiling functionality for Scalene profiler.
+
+This module extracts CPU profiling functionality from the main Scalene class
+to improve code organization and reduce complexity.
+"""
+
+import math
@@ -5,7 +5,6 @@
 to improve code organization and reduce complexity.
 """
-import math
 import signal
 import sys
 import time
@@ -5,7 +5,6 @@
 to improve code organization and reduce complexity.
 """

-import math
 import signal
 import sys
 import time
+import signal
+import sys
+import time
+from typing import Any, Dict, Optional
@@ -9,7 +9,7 @@
 import signal
 import sys
 import time
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 from scalene.scalene_signals import SignumType
 from scalene.time_info import TimeInfo, get_times
@@ -9,7 +9,7 @@
 import signal
 import sys
 import time
-from typing import Any, Dict, Optional
+from typing import Any, Optional

 from scalene.scalene_signals import SignumType
 from scalene.time_info import TimeInfo, get_times
+
+from scalene.scalene_signals import SignumType
+from scalene.time_info import TimeInfo, get_times
+from scalene.scalene_utility import compute_frames_to_record
+
+if sys.version_info >= (3, 11):
+    from types import FrameType
+else:
+    from typing import TYPE_CHECKING
+    if TYPE_CHECKING:
+        from types import FrameType
+    else:
+        FrameType = Any
+
+
+class ScaleneCPUProfiler:
+    """Handles CPU profiling functionality for Scalene."""
+
+    def __init__(self, stats, signal_manager, accelerator, client_timer, orig_raise_signal, is_thread_sleeping):
+        """Initialize the CPU profiler."""
+        self.__stats = stats
+        self.__signal_manager = signal_manager
+        self.__accelerator = accelerator
+        self.__client_timer = client_timer
+        self.__orig_raise_signal = orig_raise_signal
+        self.__is_thread_sleeping = is_thread_sleeping
+        self.__last_signal_time = TimeInfo()
+        self.__last_cpu_interval = 0.0
+
+    @staticmethod
+    def generate_exponential_sample(scale: float) -> float:
+        """Generate an exponentially distributed sample."""
+        import math
+        import random
+
+        u = random.random()  # Uniformly distributed random number between 0 and 1
+        return -scale * math.log(1 - u)
+
+    def sample_cpu_interval(self, cpu_sampling_rate: float) -> float:
+        """Return the CPU sampling interval."""
+        # Sample an interval from an exponential distribution.
+        self.__last_cpu_interval = self.generate_exponential_sample(cpu_sampling_rate)
+        return self.__last_cpu_interval
+
+    def cpu_signal_handler(
+        self,
+        signum: SignumType,
+        this_frame: Optional[FrameType],
+        should_trace_func,
+        process_cpu_sample_func,
+        sample_cpu_interval_func,
+        restart_timer_func,
+    ) -> None:
+        """Handle CPU signals."""
+        try:
+            # Get current time stats.
+            now = TimeInfo()
+            now.sys, now.user = get_times()
+            now.virtual = time.process_time()
+            now.wallclock = time.perf_counter()
+            if (
+                self.__last_signal_time.virtual == 0
+                or self.__last_signal_time.wallclock == 0
+            ):
+                # Initialization: store values and update on the next pass.
+                self.__last_signal_time = now
+                if sys.platform != "win32":
+                    next_interval = sample_cpu_interval_func()
+                    restart_timer_func(next_interval)
+                return
+
+            if self.__accelerator:
+                (gpu_load, gpu_mem_used) = self.__accelerator.get_stats()
+            else:
+                (gpu_load, gpu_mem_used) = (0.0, 0.0)
+
+            # Process this CPU sample.
+            process_cpu_sample_func(
+                signum,
+                compute_frames_to_record(should_trace_func),
+                now,
+                gpu_load,
+                gpu_mem_used,
+                self.__last_signal_time,
+                self.__is_thread_sleeping,
+            )
+            elapsed = now.wallclock - self.__last_signal_time.wallclock
+            # Store the latest values as the previously recorded values.
+            self.__last_signal_time = now
+            # Restart the timer while handling any timers set by the client.
+            next_interval = sample_cpu_interval_func()
+            if sys.platform != "win32":
+                if self.__client_timer.is_set:
+                    (
+                        should_raise,
+                        remaining_time,
+                    ) = self.__client_timer.yield_next_delay(elapsed)
+                    if should_raise:
+                        self.__orig_raise_signal(signal.SIGUSR1)
+                    # NOTE-- 0 will only be returned if the 'seconds' have elapsed
+                    # and there is no interval
+                    to_wait: float
+                    if remaining_time > 0:
+                        to_wait = min(remaining_time, next_interval)
+                    else:
+                        to_wait = next_interval
+                        self.__client_timer.reset()
+                    restart_timer_func(to_wait)
+                else:
+                    restart_timer_func(next_interval)
+        finally:
+            if sys.platform == "win32":
+                restart_timer_func(next_interval)
+
+    def windows_timer_loop(self, windows_queue, timer_signals) -> None:
+        """Timer loop for Windows CPU profiling."""
+        while timer_signals:
+            time.sleep(0.01)
+            windows_queue.put(None)