googleapis · chandra-siri · Dec 3, 2025 · Dec 23, 2025 · Dec 27, 2025 · Dec 27, 2025
@@ -62,3 +62,6 @@ system_tests/local_test_setup
 # Make sure a generated file isn't accidentally committed.
 pylintrc
 pylintrc.test
+
+# Benchmarking results and logs
+__benchmark_results__/**
@@ -0,0 +1,13 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,164 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List
+import statistics
+import io
+import os
+
+
+def publish_benchmark_extra_info(
+    benchmark: Any,
+    params: Any,
+    benchmark_group: str = "read",
+    true_times: List[float] = [],
+) -> None:
+    """
+    Helper function to publish benchmark parameters to the extra_info property.
+    """
+
+    benchmark.extra_info["num_files"] = params.num_files
+    benchmark.extra_info["file_size"] = params.file_size_bytes
+    benchmark.extra_info["chunk_size"] = params.chunk_size_bytes
+    if benchmark_group == "write":
+        benchmark.extra_info["pattern"] = "seq"
+    else:
+        benchmark.extra_info["pattern"] = params.pattern
+    benchmark.extra_info["coros"] = params.num_coros
+    benchmark.extra_info["rounds"] = params.rounds
+    benchmark.extra_info["bucket_name"] = params.bucket_name
+    benchmark.extra_info["bucket_type"] = params.bucket_type
+    benchmark.extra_info["processes"] = params.num_processes
+    benchmark.group = benchmark_group
+
+    object_size = params.file_size_bytes
+    num_files = params.num_files
+    min_throughput = (object_size / (1024 * 1024) * num_files) / benchmark.stats["max"]
+    max_throughput = (object_size / (1024 * 1024) * num_files) / benchmark.stats["min"]
+    mean_throughput = (object_size / (1024 * 1024) * num_files) / benchmark.stats["mean"]
+    median_throughput = (
+        object_size / (1024 * 1024) * num_files
+    ) / benchmark.stats["median"]
+
+    benchmark.extra_info["throughput_MiB_s_min"] = min_throughput
+    benchmark.extra_info["throughput_MiB_s_max"] = max_throughput
+    benchmark.extra_info["throughput_MiB_s_mean"] = mean_throughput
+    benchmark.extra_info["throughput_MiB_s_median"] = median_throughput
+
+    print("\nThroughput Statistics (MiB/s):")
+    print(f"  Min:    {min_throughput:.2f} (from max time)")
+    print(f"  Max:    {max_throughput:.2f} (from min time)")
+    print(f"  Mean:   {mean_throughput:.2f} (approx, from mean time)")
+    print(f"  Median: {median_throughput:.2f} (approx, from median time)")
+
+    if true_times:
+        throughputs = [(object_size / (1024 * 1024) * num_files) / t for t in true_times]
+        true_min_throughput = min(throughputs)
+        true_max_throughput = max(throughputs)
+        true_mean_throughput = statistics.mean(throughputs)
+        true_median_throughput = statistics.median(throughputs)
+
+        benchmark.extra_info["true_throughput_MiB_s_min"] = true_min_throughput
+        benchmark.extra_info["true_throughput_MiB_s_max"] = true_max_throughput
+        benchmark.extra_info["true_throughput_MiB_s_mean"] = true_mean_throughput
+        benchmark.extra_info["true_throughput_MiB_s_median"] = true_median_throughput
+
+        print("\nThroughput Statistics from true_times (MiB/s):")
+        print(f"  Min:    {true_min_throughput:.2f}")
+        print(f"  Max:    {true_max_throughput:.2f}")
+        print(f"  Mean:   {true_mean_throughput:.2f}")
+        print(f"  Median: {true_median_throughput:.2f}")
+
+    # Get benchmark name, rounds, and iterations
+    name = benchmark.name
+    rounds = benchmark.stats['rounds']
+    iterations = benchmark.stats['iterations']
+
+    # Header for throughput table
+    header = "\n\n" + "-" * 125 + "\n"
+    header += "Throughput Benchmark (MiB/s)\n"
+    header += "-" * 125 + "\n"
+    header += f"{'Name':<50} {'Min':>10} {'Max':>10} {'Mean':>10} {'StdDev':>10} {'Median':>10} {'Rounds':>8} {'Iterations':>12}\n"
+    header += "-" * 125
+
+    # Data row for throughput table
+    # The table headers (Min, Max) refer to the throughput values.
+    row = f"{name:<50} {min_throughput:>10.4f} {max_throughput:>10.4f} {mean_throughput:>10.4f} {'N/A':>10} {median_throughput:>10.4f} {rounds:>8} {iterations:>12}"
+
+    print(header)
+    print(row)
+    print("-" * 125)
+
+class RandomBytesIO(io.RawIOBase):
+    """
+    A file-like object that generates random bytes using os.urandom.
+    It enforces a fixed size and an upper safety cap.
+    """
+    # 10 GiB default safety cap
+    DEFAULT_CAP = 10 * 1024 * 1024 * 1024 
+
+    def __init__(self, size, max_size=DEFAULT_CAP):
+        """
+        Args:
+            size (int): The exact size of the virtual file in bytes.
+            max_size (int): The maximum allowed size to prevent safety issues.
+        """
+        if size is None:
+            raise ValueError("Size must be defined (cannot be infinite).")
+
+        if size > max_size:
+            raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).")
+
+        self._size = size
+        self._pos = 0
+
+    def read(self, n=-1):
+        # 1. Handle "read all" (n=-1)
+        if n is None or n < 0:
+            n = self._size - self._pos
+
+        # 2. Handle EOF (End of File)
+        if self._pos >= self._size:
+            return b""
+
+        # 3. Clamp read amount to remaining size
+        # This ensures we stop exactly at `size` bytes.
+        n = min(n, self._size - self._pos)
+
+        # 4. Generate data
+        data = os.urandom(n)
+        self._pos += len(data)
+        return data
+
+    def readable(self):
+        return True
+
+    def seekable(self):
+        return True
+
+    def tell(self):
+        return self._pos
+
+    def seek(self, offset, whence=io.SEEK_SET):
+        if whence == io.SEEK_SET:
+            new_pos = offset
+        elif whence == io.SEEK_CUR:
+            new_pos = self._pos + offset
+        elif whence == io.SEEK_END:
+            new_pos = self._size + offset
+        else:
+            raise ValueError(f"Invalid whence: {whence}")
+
+        # Clamp position to valid range [0, size]
+        self._pos = max(0, min(new_pos, self._size))
+        return self._pos
@@ -0,0 +1,181 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# nit: TODO: rename it to config_to_params.py
+import itertools
+import os
+from typing import Dict, List
+
+import yaml
+
+try:
+    from tests.perf.microbenchmarks.parameters import ReadParameters, WriteParameters
+except ModuleNotFoundError:
+    from parameters import ReadParameters, WriteParameters
+
+
+
+
+
+def _get_params() -> Dict[str, List[ReadParameters]]:
+    """
+    Docstring for _get_params
+    1. this function output a list of readParameters.
+    2. to populate the values of readparameters, use default values from config.yaml
+    3. generate all possible params , ie
+        no. of params should be equal to bucket_type*file_size_mib, chunk_size * process * coros
+        you may use itertools.product
+    """
+    params: Dict[str, List[ReadParameters]] = {}
+    config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
+    with open(config_path, "r") as f:
+        config = yaml.safe_load(f)
+
+    common_params = config["common"]
+    bucket_types = common_params["bucket_types"]
+    file_sizes_mib = common_params["file_sizes_mib"]
+    chunk_sizes_mib = common_params["chunk_sizes_mib"]
+    rounds = common_params["rounds"]
+
+    bucket_map = {
+        "zonal": config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"],
+        "regional": config["defaults"]["DEFAULT_STANDARD_BUCKET"],
+    }
+
+    for workload in config["workload"]:
+        workload_name = workload["name"]
+        params[workload_name] = []
+        pattern = workload["pattern"]
+        processes = workload["processes"]
+        coros = workload["coros"]
+
+        # Create a product of all parameter combinations
+        product = itertools.product(
+            bucket_types,
+            file_sizes_mib,
+            chunk_sizes_mib,
+            processes,
+            coros,
+        )
+
+        for (
+            bucket_type,
+            file_size_mib,
+            chunk_size_mib,
+            num_processes,
+            num_coros,
+        ) in product:
+            file_size_bytes = file_size_mib * 1024 * 1024
+            chunk_size_bytes = chunk_size_mib * 1024 * 1024
+            bucket_name = bucket_map[bucket_type]
+
+            if "single_file" in workload_name:
+                num_files = 1
+            else:
+                num_files = num_processes * num_coros
+
+            # Create a descriptive name for the parameter set
+            name = f"{pattern}_{bucket_type}_{num_processes}p_{num_coros}c"
+
+            params[workload_name].append(
+                ReadParameters(
+                    name=name,
+                    workload_name=workload_name,
+                    pattern=pattern,
+                    bucket_name=bucket_name,
+                    bucket_type=bucket_type,
+                    num_coros=num_coros,
+                    num_processes=num_processes,
+                    num_files=num_files,
+                    rounds=rounds,
+                    chunk_size_bytes=chunk_size_bytes,
+                    file_size_bytes=file_size_bytes,
+                )
+            )
+    return params
+
+
+def get_write_params() -> Dict[str, List[WriteParameters]]:
+    """
+    Docstring for get_write_params
+    1. this function output a list of WriteParameters.
+    2. to populate the values of WriteParameters, use default values from config_writes.yaml
+    3. generate all possible params , ie
+        no. of params should be equal to bucket_type*file_size_mib, chunk_size * process * coros
+        you may use itertools.product
+    """
+    params: Dict[str, List[WriteParameters]] = {}
+    config_path = os.path.join(os.path.dirname(__file__), "config_writes.yaml")
+    with open(config_path, "r") as f:
+        config = yaml.safe_load(f)
+
+    common_params = config["common"]
+    bucket_types = common_params["bucket_types"]
+    file_sizes_mib = common_params["file_sizes_mib"]
+    chunk_sizes_mib = common_params["chunk_sizes_mib"]
+    rounds = common_params["rounds"]
+
+    bucket_map = {
+        "zonal": config["defaults"]["DEFAULT_RAPID_ZONAL_BUCKET"],
+        "regional": config["defaults"]["DEFAULT_STANDARD_BUCKET"],
+    }
+
+    for workload in config["workload"]:
+        workload_name = workload["name"]
+        params[workload_name] = []
+        processes = workload["processes"]
+        coros = workload["coros"]
+
+        # Create a product of all parameter combinations
+        product = itertools.product(
+            bucket_types,
+            file_sizes_mib,
+            chunk_sizes_mib,
+            processes,
+            coros,
+        )
+
+        for (
+            bucket_type,
+            file_size_mib,
+            chunk_size_mib,
+            num_processes,
+            num_coros,
+        ) in product:
+            file_size_bytes = file_size_mib * 1024 * 1024
+            chunk_size_bytes = chunk_size_mib * 1024 * 1024
+            bucket_name = bucket_map[bucket_type]
+
+            if "single_file" in workload_name:
+                num_files = 1
+            else:
+                num_files = num_processes * num_coros
+
+            # Create a descriptive name for the parameter set
+            name = f"{workload_name}_{bucket_type}_{num_processes}p_{num_coros}c"
+
+            params[workload_name].append(
+                WriteParameters(
+                    name=name,
+                    workload_name=workload_name,
+                    bucket_name=bucket_name,
+                    bucket_type=bucket_type,
+                    num_coros=num_coros,
+                    num_processes=num_processes,
+                    num_files=num_files,
+                    rounds=rounds,
+                    chunk_size_bytes=chunk_size_bytes,
+                    file_size_bytes=file_size_bytes,
+                )
+            )
+    return params