Skip to content

Commit 6738b66

Browse files
author
Andrija Kolic
committed
[GR-71293] Facilitate the addition of C-extension-module GraalPy interpreter micro benchmarks
PullRequest: graal/22746
2 parents 2a6d51f + 23c427c commit 6738b66

File tree

6 files changed

+251
-38
lines changed

6 files changed

+251
-38
lines changed

sdk/mx.sdk/mx_sdk_benchmark.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1965,6 +1965,9 @@ def _prepare_for_running(self, args, out, err, cwd, nonZeroIsFatal):
19651965
self.bmSuite.benchmark_output_dir(bm_exec_context().get("benchmark"), args),
19661966
self.bmSuite.get_full_image_name(self.bmSuite.get_base_image_name(), bm_exec_context().get("vm").config_name())
19671967
)
1968+
if self.language == "Python":
1969+
# C-extension-module micros would break if they did not have 'graalpython' somewhere in the path
1970+
output_dir = output_dir / "graalpython"
19681971
self.staged_program_file_path = output_dir / file_name
19691972
self.staged_program_file_path.parent.mkdir(parents=True, exist_ok=True)
19701973
self.staging_args = args + [

truffle/mx.truffle/mx_polybench/model.py

Lines changed: 195 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,14 @@
3939
# SOFTWARE.
4040
#
4141
import contextlib
42+
import dataclasses
4243
import hashlib
4344
import json
4445
import os
4546
import re
4647
import shutil
4748
from argparse import ArgumentParser, Namespace
49+
from enum import Enum
4850
from pathlib import Path
4951
from typing import Callable, Dict, FrozenSet, Iterable, List, NamedTuple, Optional, Set, Tuple, Union, Any, Generator
5052

@@ -260,6 +262,112 @@ def _check_dist(dist_name: str, require_built: bool = True) -> Optional[str]:
260262
mx.abort(f"Unsupported distribution kind {type(dist)}")
261263

262264

265+
@dataclasses.dataclass(frozen=True)
266+
class OutlierExclusionConfig:
267+
"""Record class that contains the outlier exclusion lower and upper percentiles."""
268+
269+
lower_percentile: float
270+
upper_percentile: float
271+
272+
@staticmethod
273+
def from_string(s: str) -> "OutlierExclusionConfig":
274+
"""Constructs an `OutlierExclusionConfig` object from a "<lower_percentile>-<upper_percentile>" string."""
275+
parts = s.strip().split("-")
276+
if len(parts) != 2:
277+
raise ValueError(f"Invalid outlier exclusion value: '{s}'")
278+
return OutlierExclusionConfig(float(parts[0]), float(parts[1]))
279+
280+
281+
class SuiteStableRunConfig:
282+
"""Interface for a PolyBench Stable-Run Configuration file."""
283+
284+
def __init__(self, file_path: Path):
285+
with open(file_path) as f:
286+
self._dict: dict = json.load(f)
287+
288+
def get_benchmark(self, bench_name: str) -> "BenchmarkStableRunConfig":
289+
"""Returns an interface for the benchmark entry of the configuration file."""
290+
return BenchmarkStableRunConfig(self._dict[bench_name])
291+
292+
def contains(self, bench_name: str) -> bool:
293+
"""Returns whether an entry in the configuration file exists for a benchmark."""
294+
return bench_name in self._dict
295+
296+
def benchmarks(self) -> List[str]:
297+
"""Returns all the benchmarks for which an entry is defined in the configuration file."""
298+
return self._dict.keys()
299+
300+
301+
class StableRunPolicy(Enum):
302+
INDIVIDUAL_BUILDS = "outlier-elimination-individual-builds"
303+
ALL_BUILDS = "outlier-elimination-all-builds"
304+
305+
306+
class BenchmarkStableRunConfig:
307+
"""Interface for a benchmark entry of a PolyBench Stable-Run Configuration file."""
308+
309+
def __init__(self, d: dict):
310+
self._dict: dict = d
311+
312+
@property
313+
def policy(self) -> StableRunPolicy:
314+
"""
315+
The policy of the benchmark configuration entry.
316+
317+
Different policies warrant specific handling in the computation of the stabilized metric.
318+
Different policies may also require different entry formats.
319+
"""
320+
# We should move towards deprecating the INDIVIDUAL_BUILDS policy (GR-71845)
321+
return StableRunPolicy(self._dict.get("policy", StableRunPolicy.INDIVIDUAL_BUILDS))
322+
323+
@property
324+
def builds(self):
325+
"""The number of image builds to execute (in the case of Native Image benchmarks)."""
326+
if self.policy == StableRunPolicy.INDIVIDUAL_BUILDS:
327+
return self._dict["builds"]["count"]
328+
return self._parse_builds_x_forks()[0]
329+
330+
@property
331+
def forks(self):
332+
"""The number of forks to execute (per image build, in the case of Native Image benchmarks)."""
333+
if self.policy == StableRunPolicy.INDIVIDUAL_BUILDS:
334+
return self._dict["run-forks"]["count"]
335+
return self._parse_builds_x_forks()[1]
336+
337+
@property
338+
def outlier_exclusion(self) -> OutlierExclusionConfig:
339+
"""The outlier exclusion configuration to be used on fork data."""
340+
if self.policy != StableRunPolicy.ALL_BUILDS:
341+
raise ValueError(f"This property is not available for the {self.policy} policy!")
342+
return OutlierExclusionConfig.from_string(self._dict.get("focus"))
343+
344+
@property
345+
def build_outlier_exclusion(self) -> OutlierExclusionConfig:
346+
"""The outlier exclusion configuration to be used on image build data."""
347+
if self.policy == StableRunPolicy.ALL_BUILDS:
348+
return self.outlier_exclusion
349+
config = self._dict["builds"]
350+
return OutlierExclusionConfig(config["lower-percentile"], config["upper-percentile"])
351+
352+
@property
353+
def fork_outlier_exclusion(self) -> OutlierExclusionConfig:
354+
"""The outlier exclusion configuration to be used on data belonging to forks of one image build."""
355+
if self.policy == StableRunPolicy.ALL_BUILDS:
356+
return self.outlier_exclusion
357+
config = self._dict["run-forks"]
358+
return OutlierExclusionConfig(config["lower-percentile"], config["upper-percentile"])
359+
360+
def _parse_builds_x_forks(self) -> (int, int):
361+
"""Parses a "<builds>x<forks>" string into a tuple containing the build and fork numbers."""
362+
if self.policy != StableRunPolicy.ALL_BUILDS:
363+
raise ValueError(f"This method is not available for the {self.policy} policy!")
364+
forks = self._dict["forks"]
365+
parts = forks.strip().split("x")
366+
if len(parts) != 2:
367+
raise ValueError(f"Invalid forks value: '{forks}'")
368+
return int(parts[0]), int(parts[1])
369+
370+
263371
class StabilizingPolybenchBenchmarkDispatcher(mx_benchmark.DefaultBenchmarkDispatcher):
264372
"""
265373
Custom dispatching class for non-native-image PolybenchBenchmarkSuite stable runs that facilitates scheduling based
@@ -320,8 +428,7 @@ def __init__(self, state: BenchmarkDispatcherState, stable_run_config: str):
320428
if not self._stable_run_config_path.exists():
321429
msg = f"Cannot initialize {self.__class__.__name__} instance with non-existing configuration file '{self._stable_run_config_path}'!"
322430
raise ValueError(msg)
323-
with open(self._stable_run_config_path) as f:
324-
self._stable_run_config: dict = json.load(f)
431+
self._stable_run_config: SuiteStableRunConfig = SuiteStableRunConfig(self._stable_run_config_path)
325432

326433
def validated_env_dispatch(self) -> Generator[BenchmarkExecutionConfiguration, Any, None]:
327434
"""
@@ -368,7 +475,7 @@ def dispatch_and_log(
368475
* Second, it iterates over each benchmark which requires to be run in the current benchmark batch.
369476
"""
370477
dispatch_counter = 0
371-
number_of_batches = max([self._stable_run_config[bench]["run-forks"]["count"] for bench in benchmarks])
478+
number_of_batches = max([self._stable_run_config.get_benchmark(bench).forks for bench in benchmarks])
372479
for batch_index in range(number_of_batches):
373480
if dry_run:
374481
mx.log(f" * Bench batch #{batch_index + 1}")
@@ -383,14 +490,14 @@ def dispatch_and_log(
383490
last_dispatch = dispatch_counter + 1 == total_dispatch_count
384491
with ConstantContextValueManager("last_dispatch", last_dispatch):
385492
fork_info = ForkInfo(
386-
fork_number_dict[benchmark], self._stable_run_config[benchmark]["run-forks"]["count"]
493+
fork_number_dict[benchmark], self._stable_run_config.get_benchmark(benchmark).forks
387494
)
388495
yield BenchmarkExecutionConfiguration([benchmark], mx_benchmark_args, bm_suite_args, fork_info)
389496
dispatch_counter += 1
390497
fork_number_dict[benchmark] += 1
391498

392499
def _get_benchmarks_for_batch(self, benchmarks: List[str], batch_index: int):
393-
return [bench for bench in benchmarks if self._stable_run_config[bench]["run-forks"]["count"] > batch_index]
500+
return [bench for bench in benchmarks if self._stable_run_config.get_benchmark(bench).forks > batch_index]
394501

395502
def _verify_no_conflicting_args_are_set(self):
396503
mx_benchmark_args_dict = vars(self.state.mx_benchmark_args)
@@ -412,16 +519,24 @@ def _parse_benchmark_list(self) -> List[str]:
412519
def _verify_stable_run_config(self, benchmarks: List[str]):
413520
levels = self._get_required_config_levels()
414521
fields = ["count"]
522+
v2_fields = ["forks", "focus"]
415523
for bench in benchmarks:
416-
if bench not in self._stable_run_config:
524+
if not self._stable_run_config.contains(bench):
417525
msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing an entry for the '{bench}' benchmark!"
418526
raise ValueError(msg)
419-
bench_config = self._stable_run_config[bench]
527+
bench_config = self._stable_run_config.get_benchmark(bench)
528+
if bench_config.policy == StableRunPolicy.ALL_BUILDS:
529+
for field in v2_fields:
530+
if field not in bench_config._dict:
531+
msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing the '{field}' key in the '{bench}' object!"
532+
raise ValueError(msg)
533+
continue
534+
# To be removed once all INDIVIDUAL_BUILDS policy benchmarks are updated
420535
for level in levels:
421-
if level not in bench_config:
536+
if level not in bench_config._dict:
422537
msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing the '{level}' key in the '{bench}' object!"
423538
raise ValueError(msg)
424-
level_config = bench_config[level]
539+
level_config = bench_config._dict[level]
425540
for field in fields:
426541
if field not in level_config:
427542
msg = f"PolyBench stable run configuration file at '{self._stable_run_config_path}' is missing the '{field}' key in the '{bench}.{level}' object!"
@@ -563,7 +678,7 @@ def dispatch_and_log(
563678
* Third, it iterates over each benchmark which requires to be run in the current benchmark batch.
564679
This loop is implemented in the `dispatch_batch` method.
565680
"""
566-
build_count = max([self._stable_run_config[bench]["builds"]["count"] for bench in benchmarks])
681+
build_count = max([self._stable_run_config.get_benchmark(bench).builds for bench in benchmarks])
567682
self._dispatch_counter = 0
568683
with ConstantContextValueManager(PolybenchBenchmarkSuite.PGO_PROFILES, []):
569684
for build_index in range(build_count):
@@ -582,10 +697,10 @@ def dispatch_build(
582697
mx.log(f" * Build #{build_index + 1}")
583698
build_stages = ["agent", "instrument-image", "instrument-run", "image"] if build_index == 0 else ["image"]
584699
current_build_benchmarks = [
585-
bench for bench in benchmarks if self._stable_run_config[bench]["builds"]["count"] > build_index
700+
bench for bench in benchmarks if self._stable_run_config.get_benchmark(bench).builds > build_index
586701
]
587702
number_of_preparation_batches = len(build_stages)
588-
bench_batches = [self._stable_run_config[bench]["run-forks"]["count"] for bench in current_build_benchmarks]
703+
bench_batches = [self._stable_run_config.get_benchmark(bench).forks for bench in current_build_benchmarks]
589704
number_of_batches = number_of_preparation_batches + max(bench_batches)
590705
with ConstantContextValueManager(PolybenchBenchmarkSuite.BUILD_BENCHMARKS, current_build_benchmarks):
591706
for batch_index in range(number_of_batches):
@@ -639,8 +754,8 @@ def dispatch_batch(
639754
last_dispatch = self._dispatch_counter + 1 == total_dispatch_count
640755
with ConstantContextValueManager("last_dispatch", last_dispatch):
641756
total_fork_count = (
642-
self._stable_run_config[benchmark]["builds"]["count"]
643-
* self._stable_run_config[benchmark]["run-forks"]["count"]
757+
self._stable_run_config.get_benchmark(benchmark).builds
758+
* self._stable_run_config.get_benchmark(benchmark).forks
644759
)
645760
fork_info = ForkInfo(fork_number_dict[benchmark], total_fork_count)
646761
yield BenchmarkExecutionConfiguration([benchmark], mx_bench_args, extended_bm_suite_args, fork_info)
@@ -747,14 +862,12 @@ def __init__(
747862
key_fn: Optional[Callable[[DataPoint], Any]],
748863
field: str,
749864
update_fn: Optional[Callable[[DataPoint], DataPoint]],
750-
aggregation_level: str,
751865
final_consumer: bool,
752866
):
753867
# The lower and upper percentiles will be set on a per-group basis in `calculate_aggregate_value` - as they
754868
# can have different values for different benchmarks.
755869
super().__init__(selector_fn, key_fn, field, update_fn, 0, 1)
756870
self._suite = suite
757-
self._aggregation_level = aggregation_level
758871
self._final_consumer = final_consumer
759872

760873
def select_datapoints(self, datapoints: DataPoints) -> DataPoints:
@@ -772,16 +885,25 @@ def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
772885
return super().process_datapoints(datapoints)
773886

774887
def calculate_aggregate_value(self, datapoints: DataPoints) -> Any:
775-
config = bm_exec_context().get(PolybenchBenchmarkSuite.STABLE_CONFIG)
776-
benchmark = self.get_and_verify_unique_benchmark_dimension(datapoints)
777-
self._lower_percentile = self._suite.resolve_config_field_or_default(
778-
config, [benchmark, self._aggregation_level, "lower-percentile"], 0
779-
)
780-
self._upper_percentile = self._suite.resolve_config_field_or_default(
781-
config, [benchmark, self._aggregation_level, "upper-percentile"], 1
782-
)
888+
self.determine_outlier_exclusion_percentiles(datapoints)
783889
return super().calculate_aggregate_value(datapoints)
784890

891+
def determine_outlier_exclusion_percentiles(self, datapoints: DataPoints):
892+
config: Optional[SuiteStableRunConfig] = bm_exec_context().get(PolybenchBenchmarkSuite.STABLE_CONFIG)
893+
benchmark: str = self.get_and_verify_unique_benchmark_dimension(datapoints)
894+
if config is None:
895+
# Handle non-stable-run benchmarks
896+
self._lower_percentile = 0
897+
self._upper_percentile = 1
898+
return
899+
# Handle stable-run benchmarks
900+
bench_config = config.get_benchmark(benchmark)
901+
self.determine_stable_run_outlier_exclusion_percentiles(bench_config)
902+
903+
def determine_stable_run_outlier_exclusion_percentiles(self, bench_config: BenchmarkStableRunConfig):
904+
self._lower_percentile = bench_config.outlier_exclusion.lower_percentile
905+
self._upper_percentile = bench_config.outlier_exclusion.upper_percentile
906+
785907
def get_and_verify_unique_benchmark_dimension(self, datapoints: DataPoints) -> str:
786908
benchmark = datapoints[0]["benchmark"]
787909
for dp in datapoints:
@@ -819,7 +941,11 @@ def update_fn(dp):
819941
self.verify_and_process_id_score_function(dp)
820942
return dp
821943

822-
super().__init__(suite, selector_fn, key_fn, field, update_fn, "run-forks", True)
944+
super().__init__(suite, selector_fn, key_fn, field, update_fn, True)
945+
946+
def determine_stable_run_outlier_exclusion_percentiles(self, bench_config: BenchmarkStableRunConfig):
947+
self._lower_percentile = bench_config.fork_outlier_exclusion.lower_percentile
948+
self._upper_percentile = bench_config.fork_outlier_exclusion.upper_percentile
823949

824950

825951
class NativeModeBuildSummaryPostProcessor(FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor):
@@ -843,7 +969,11 @@ def update_fn(dp):
843969
self.verify_and_process_id_score_function(dp)
844970
return dp
845971

846-
super().__init__(suite, selector_fn, key_fn, field, update_fn, "run-forks", False)
972+
super().__init__(suite, selector_fn, key_fn, field, update_fn, False)
973+
974+
def determine_stable_run_outlier_exclusion_percentiles(self, bench_config: BenchmarkStableRunConfig):
975+
self._lower_percentile = bench_config.fork_outlier_exclusion.lower_percentile
976+
self._upper_percentile = bench_config.fork_outlier_exclusion.upper_percentile
847977

848978

849979
class NativeModeBenchmarkSummaryPostProcessor(FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor):
@@ -860,14 +990,48 @@ def __init__(self, suite: "PolybenchBenchmarkSuite"):
860990

861991
def update_fn(dp):
862992
dp["metric.name"] = "time"
993+
if "metric.fork-number" in dp:
994+
del dp["metric.fork-number"]
995+
if "native-image.image-fork-number" in dp:
996+
del dp["native-image.image-fork-number"]
863997
if "metric.object" in dp:
864998
del dp["metric.object"]
865999
if "native-image.rebuild-number" in dp:
8661000
del dp["native-image.rebuild-number"]
8671001
self.verify_and_process_id_score_function(dp)
8681002
return dp
8691003

870-
super().__init__(suite, selector_fn, key_fn, field, update_fn, "builds", True)
1004+
config: Optional[SuiteStableRunConfig] = bm_exec_context().get(PolybenchBenchmarkSuite.STABLE_CONFIG)
1005+
if config is not None:
1006+
self._stable_run: bool = True
1007+
self._v1_benchmarks: List[str] = [
1008+
b for b in config.benchmarks() if config.get_benchmark(b).policy == StableRunPolicy.INDIVIDUAL_BUILDS
1009+
]
1010+
else:
1011+
self._stable_run: bool = False
1012+
self._v1_benchmarks: List[str] = []
1013+
1014+
super().__init__(suite, selector_fn, key_fn, field, update_fn, True)
1015+
1016+
def select_datapoints(self, datapoints: DataPoints) -> DataPoints:
1017+
if self._stable_run:
1018+
self._selector_fn = lambda dp: (
1019+
(
1020+
dp["benchmark"] in self._v1_benchmarks
1021+
and dp["metric.name"] == "avg-time"
1022+
and dp["metric.object"] == "build"
1023+
)
1024+
or (
1025+
dp["benchmark"] not in self._v1_benchmarks
1026+
and dp["metric.name"] == "avg-time"
1027+
and dp["metric.object"] == "fork"
1028+
)
1029+
)
1030+
return super().select_datapoints(datapoints)
1031+
1032+
def determine_stable_run_outlier_exclusion_percentiles(self, bench_config: BenchmarkStableRunConfig):
1033+
self._lower_percentile = bench_config.build_outlier_exclusion.lower_percentile
1034+
self._upper_percentile = bench_config.build_outlier_exclusion.upper_percentile
8711035

8721036

8731037
class GraalSpecificFieldsRemoverPostProcessor(DataPointsPostProcessor):
@@ -1011,7 +1175,7 @@ def name(self):
10111175
return "polybench"
10121176

10131177
def version(self):
1014-
return "0.3.0"
1178+
return "0.4.0"
10151179

10161180
def _resolve_benchmarks(self) -> Dict[str, ResolvedPolybenchBenchmark]:
10171181
if not hasattr(self, "_benchmarks"):
@@ -1152,12 +1316,11 @@ def _resolve_current_benchmark(self, benchmarks) -> ResolvedPolybenchBenchmark:
11521316
mx.abort(f"Must specify one benchmark at a time (given: {benchmarks})")
11531317
return self._resolve_benchmarks()[benchmarks[0]]
11541318

1155-
def _resolve_stable_run_config(self):
1319+
def _resolve_stable_run_config(self) -> Optional[SuiteStableRunConfig]:
11561320
config_path = self.polybench_bench_suite_args(bm_exec_context().get("bm_suite_args")).stable_run_config
11571321
if config_path is None:
1158-
return {}
1159-
with open(config_path) as f:
1160-
return json.load(f)
1322+
return None
1323+
return SuiteStableRunConfig(config_path)
11611324

11621325
@staticmethod
11631326
def _prepare_distributions(

0 commit comments

Comments
 (0)