3939# SOFTWARE.
4040#
4141import contextlib
42+ import dataclasses
4243import hashlib
4344import json
4445import os
4546import re
4647import shutil
4748from argparse import ArgumentParser , Namespace
49+ from enum import Enum
4850from pathlib import Path
4951from typing import Callable , Dict , FrozenSet , Iterable , List , NamedTuple , Optional , Set , Tuple , Union , Any , Generator
5052
@@ -260,6 +262,112 @@ def _check_dist(dist_name: str, require_built: bool = True) -> Optional[str]:
260262 mx .abort (f"Unsupported distribution kind { type (dist )} " )
261263
262264
265+ @dataclasses .dataclass (frozen = True )
266+ class OutlierExclusionConfig :
267+ """Record class that contains the outlier exclusion lower and upper percentiles."""
268+
269+ lower_percentile : float
270+ upper_percentile : float
271+
272+ @staticmethod
273+ def from_string (s : str ) -> "OutlierExclusionConfig" :
274+ """Constructs an `OutlierExclusionConfig` object from a "<lower_percentile>-<upper_percentile>" string."""
275+ parts = s .strip ().split ("-" )
276+ if len (parts ) != 2 :
277+ raise ValueError (f"Invalid outlier exclusion value: '{ s } '" )
278+ return OutlierExclusionConfig (float (parts [0 ]), float (parts [1 ]))
279+
280+
281+ class SuiteStableRunConfig :
282+ """Interface for a PolyBench Stable-Run Configuration file."""
283+
284+ def __init__ (self , file_path : Path ):
285+ with open (file_path ) as f :
286+ self ._dict : dict = json .load (f )
287+
288+ def get_benchmark (self , bench_name : str ) -> "BenchmarkStableRunConfig" :
289+ """Returns an interface for the benchmark entry of the configuration file."""
290+ return BenchmarkStableRunConfig (self ._dict [bench_name ])
291+
292+ def contains (self , bench_name : str ) -> bool :
293+ """Returns whether an entry in the configuration file exists for a benchmark."""
294+ return bench_name in self ._dict
295+
296+ def benchmarks (self ) -> List [str ]:
297+ """Returns all the benchmarks for which an entry is defined in the configuration file."""
298+ return self ._dict .keys ()
299+
300+
301+ class StableRunPolicy (Enum ):
302+ INDIVIDUAL_BUILDS = "outlier-elimination-individual-builds"
303+ ALL_BUILDS = "outlier-elimination-all-builds"
304+
305+
306+ class BenchmarkStableRunConfig :
307+ """Interface for a benchmark entry of a PolyBench Stable-Run Configuration file."""
308+
309+ def __init__ (self , d : dict ):
310+ self ._dict : dict = d
311+
312+ @property
313+ def policy (self ) -> StableRunPolicy :
314+ """
315+ The policy of the benchmark configuration entry.
316+
317+ Different policies warrant specific handling in the computation of the stabilized metric.
318+ Different policies may also require different entry formats.
319+ """
320+ # We should move towards deprecating the INDIVIDUAL_BUILDS policy (GR-71845)
321+ return StableRunPolicy (self ._dict .get ("policy" , StableRunPolicy .INDIVIDUAL_BUILDS ))
322+
323+ @property
324+ def builds (self ):
325+ """The number of image builds to execute (in the case of Native Image benchmarks)."""
326+ if self .policy == StableRunPolicy .INDIVIDUAL_BUILDS :
327+ return self ._dict ["builds" ]["count" ]
328+ return self ._parse_builds_x_forks ()[0 ]
329+
330+ @property
331+ def forks (self ):
332+ """The number of forks to execute (per image build, in the case of Native Image benchmarks)."""
333+ if self .policy == StableRunPolicy .INDIVIDUAL_BUILDS :
334+ return self ._dict ["run-forks" ]["count" ]
335+ return self ._parse_builds_x_forks ()[1 ]
336+
337+ @property
338+ def outlier_exclusion (self ) -> OutlierExclusionConfig :
339+ """The outlier exclusion configuration to be used on fork data."""
340+ if self .policy != StableRunPolicy .ALL_BUILDS :
341+ raise ValueError (f"This property is not available for the { self .policy } policy!" )
342+ return OutlierExclusionConfig .from_string (self ._dict .get ("focus" ))
343+
344+ @property
345+ def build_outlier_exclusion (self ) -> OutlierExclusionConfig :
346+ """The outlier exclusion configuration to be used on image build data."""
347+ if self .policy == StableRunPolicy .ALL_BUILDS :
348+ return self .outlier_exclusion
349+ config = self ._dict ["builds" ]
350+ return OutlierExclusionConfig (config ["lower-percentile" ], config ["upper-percentile" ])
351+
352+ @property
353+ def fork_outlier_exclusion (self ) -> OutlierExclusionConfig :
354+ """The outlier exclusion configuration to be used on data belonging to forks of one image build."""
355+ if self .policy == StableRunPolicy .ALL_BUILDS :
356+ return self .outlier_exclusion
357+ config = self ._dict ["run-forks" ]
358+ return OutlierExclusionConfig (config ["lower-percentile" ], config ["upper-percentile" ])
359+
360+ def _parse_builds_x_forks (self ) -> (int , int ):
361+ """Parses a "<builds>x<forks>" string into a tuple containing the build and fork numbers."""
362+ if self .policy != StableRunPolicy .ALL_BUILDS :
363+ raise ValueError (f"This method is not available for the { self .policy } policy!" )
364+ forks = self ._dict ["forks" ]
365+ parts = forks .strip ().split ("x" )
366+ if len (parts ) != 2 :
367+ raise ValueError (f"Invalid forks value: '{ forks } '" )
368+ return int (parts [0 ]), int (parts [1 ])
369+
370+
263371class StabilizingPolybenchBenchmarkDispatcher (mx_benchmark .DefaultBenchmarkDispatcher ):
264372 """
265373 Custom dispatching class for non-native-image PolybenchBenchmarkSuite stable runs that facilitates scheduling based
@@ -320,8 +428,7 @@ def __init__(self, state: BenchmarkDispatcherState, stable_run_config: str):
320428 if not self ._stable_run_config_path .exists ():
321429 msg = f"Cannot initialize { self .__class__ .__name__ } instance with non-existing configuration file '{ self ._stable_run_config_path } '!"
322430 raise ValueError (msg )
323- with open (self ._stable_run_config_path ) as f :
324- self ._stable_run_config : dict = json .load (f )
431+ self ._stable_run_config : SuiteStableRunConfig = SuiteStableRunConfig (self ._stable_run_config_path )
325432
326433 def validated_env_dispatch (self ) -> Generator [BenchmarkExecutionConfiguration , Any , None ]:
327434 """
@@ -368,7 +475,7 @@ def dispatch_and_log(
368475 * Second, it iterates over each benchmark which requires to be run in the current benchmark batch.
369476 """
370477 dispatch_counter = 0
371- number_of_batches = max ([self ._stable_run_config [ bench ][ "run- forks" ][ "count" ] for bench in benchmarks ])
478+ number_of_batches = max ([self ._stable_run_config . get_benchmark ( bench ). forks for bench in benchmarks ])
372479 for batch_index in range (number_of_batches ):
373480 if dry_run :
374481 mx .log (f" * Bench batch #{ batch_index + 1 } " )
@@ -383,14 +490,14 @@ def dispatch_and_log(
383490 last_dispatch = dispatch_counter + 1 == total_dispatch_count
384491 with ConstantContextValueManager ("last_dispatch" , last_dispatch ):
385492 fork_info = ForkInfo (
386- fork_number_dict [benchmark ], self ._stable_run_config [ benchmark ][ "run- forks" ][ "count" ]
493+ fork_number_dict [benchmark ], self ._stable_run_config . get_benchmark ( benchmark ). forks
387494 )
388495 yield BenchmarkExecutionConfiguration ([benchmark ], mx_benchmark_args , bm_suite_args , fork_info )
389496 dispatch_counter += 1
390497 fork_number_dict [benchmark ] += 1
391498
392499 def _get_benchmarks_for_batch (self , benchmarks : List [str ], batch_index : int ):
393- return [bench for bench in benchmarks if self ._stable_run_config [ bench ][ "run- forks" ][ "count" ] > batch_index ]
500+ return [bench for bench in benchmarks if self ._stable_run_config . get_benchmark ( bench ). forks > batch_index ]
394501
395502 def _verify_no_conflicting_args_are_set (self ):
396503 mx_benchmark_args_dict = vars (self .state .mx_benchmark_args )
@@ -412,16 +519,24 @@ def _parse_benchmark_list(self) -> List[str]:
412519 def _verify_stable_run_config (self , benchmarks : List [str ]):
413520 levels = self ._get_required_config_levels ()
414521 fields = ["count" ]
522+ v2_fields = ["forks" , "focus" ]
415523 for bench in benchmarks :
416- if bench not in self ._stable_run_config :
524+ if not self ._stable_run_config . contains ( bench ) :
417525 msg = f"PolyBench stable run configuration file at '{ self ._stable_run_config_path } ' is missing an entry for the '{ bench } ' benchmark!"
418526 raise ValueError (msg )
419- bench_config = self ._stable_run_config [bench ]
527+ bench_config = self ._stable_run_config .get_benchmark (bench )
528+ if bench_config .policy == StableRunPolicy .ALL_BUILDS :
529+ for field in v2_fields :
530+ if field not in bench_config ._dict :
531+ msg = f"PolyBench stable run configuration file at '{ self ._stable_run_config_path } ' is missing the '{ field } ' key in the '{ bench } ' object!"
532+ raise ValueError (msg )
533+ continue
534+ # To be removed once all INDIVIDUAL_BUILDS policy benchmarks are updated
420535 for level in levels :
421- if level not in bench_config :
536+ if level not in bench_config . _dict :
422537 msg = f"PolyBench stable run configuration file at '{ self ._stable_run_config_path } ' is missing the '{ level } ' key in the '{ bench } ' object!"
423538 raise ValueError (msg )
424- level_config = bench_config [level ]
539+ level_config = bench_config . _dict [level ]
425540 for field in fields :
426541 if field not in level_config :
427542 msg = f"PolyBench stable run configuration file at '{ self ._stable_run_config_path } ' is missing the '{ field } ' key in the '{ bench } .{ level } ' object!"
@@ -563,7 +678,7 @@ def dispatch_and_log(
563678 * Third, it iterates over each benchmark which requires to be run in the current benchmark batch.
564679 This loop is implemented in the `dispatch_batch` method.
565680 """
566- build_count = max ([self ._stable_run_config [ bench ][ " builds" ][ "count" ] for bench in benchmarks ])
681+ build_count = max ([self ._stable_run_config . get_benchmark ( bench ). builds for bench in benchmarks ])
567682 self ._dispatch_counter = 0
568683 with ConstantContextValueManager (PolybenchBenchmarkSuite .PGO_PROFILES , []):
569684 for build_index in range (build_count ):
@@ -582,10 +697,10 @@ def dispatch_build(
582697 mx .log (f" * Build #{ build_index + 1 } " )
583698 build_stages = ["agent" , "instrument-image" , "instrument-run" , "image" ] if build_index == 0 else ["image" ]
584699 current_build_benchmarks = [
585- bench for bench in benchmarks if self ._stable_run_config [ bench ][ " builds" ][ "count" ] > build_index
700+ bench for bench in benchmarks if self ._stable_run_config . get_benchmark ( bench ). builds > build_index
586701 ]
587702 number_of_preparation_batches = len (build_stages )
588- bench_batches = [self ._stable_run_config [ bench ][ "run- forks" ][ "count" ] for bench in current_build_benchmarks ]
703+ bench_batches = [self ._stable_run_config . get_benchmark ( bench ). forks for bench in current_build_benchmarks ]
589704 number_of_batches = number_of_preparation_batches + max (bench_batches )
590705 with ConstantContextValueManager (PolybenchBenchmarkSuite .BUILD_BENCHMARKS , current_build_benchmarks ):
591706 for batch_index in range (number_of_batches ):
@@ -639,8 +754,8 @@ def dispatch_batch(
639754 last_dispatch = self ._dispatch_counter + 1 == total_dispatch_count
640755 with ConstantContextValueManager ("last_dispatch" , last_dispatch ):
641756 total_fork_count = (
642- self ._stable_run_config [ benchmark ][ " builds" ][ "count" ]
643- * self ._stable_run_config [ benchmark ][ "run- forks" ][ "count" ]
757+ self ._stable_run_config . get_benchmark ( benchmark ). builds
758+ * self ._stable_run_config . get_benchmark ( benchmark ). forks
644759 )
645760 fork_info = ForkInfo (fork_number_dict [benchmark ], total_fork_count )
646761 yield BenchmarkExecutionConfiguration ([benchmark ], mx_bench_args , extended_bm_suite_args , fork_info )
@@ -747,14 +862,12 @@ def __init__(
747862 key_fn : Optional [Callable [[DataPoint ], Any ]],
748863 field : str ,
749864 update_fn : Optional [Callable [[DataPoint ], DataPoint ]],
750- aggregation_level : str ,
751865 final_consumer : bool ,
752866 ):
753867 # The lower and upper percentiles will be set on a per-group basis in `calculate_aggregate_value` - as they
754868 # can have different values for different benchmarks.
755869 super ().__init__ (selector_fn , key_fn , field , update_fn , 0 , 1 )
756870 self ._suite = suite
757- self ._aggregation_level = aggregation_level
758871 self ._final_consumer = final_consumer
759872
760873 def select_datapoints (self , datapoints : DataPoints ) -> DataPoints :
@@ -772,16 +885,25 @@ def process_datapoints(self, datapoints: DataPoints) -> DataPoints:
772885 return super ().process_datapoints (datapoints )
773886
774887 def calculate_aggregate_value (self , datapoints : DataPoints ) -> Any :
775- config = bm_exec_context ().get (PolybenchBenchmarkSuite .STABLE_CONFIG )
776- benchmark = self .get_and_verify_unique_benchmark_dimension (datapoints )
777- self ._lower_percentile = self ._suite .resolve_config_field_or_default (
778- config , [benchmark , self ._aggregation_level , "lower-percentile" ], 0
779- )
780- self ._upper_percentile = self ._suite .resolve_config_field_or_default (
781- config , [benchmark , self ._aggregation_level , "upper-percentile" ], 1
782- )
888+ self .determine_outlier_exclusion_percentiles (datapoints )
783889 return super ().calculate_aggregate_value (datapoints )
784890
891+ def determine_outlier_exclusion_percentiles (self , datapoints : DataPoints ):
892+ config : Optional [SuiteStableRunConfig ] = bm_exec_context ().get (PolybenchBenchmarkSuite .STABLE_CONFIG )
893+ benchmark : str = self .get_and_verify_unique_benchmark_dimension (datapoints )
894+ if config is None :
895+ # Handle non-stable-run benchmarks
896+ self ._lower_percentile = 0
897+ self ._upper_percentile = 1
898+ return
899+ # Handle stable-run benchmarks
900+ bench_config = config .get_benchmark (benchmark )
901+ self .determine_stable_run_outlier_exclusion_percentiles (bench_config )
902+
903+ def determine_stable_run_outlier_exclusion_percentiles (self , bench_config : BenchmarkStableRunConfig ):
904+ self ._lower_percentile = bench_config .outlier_exclusion .lower_percentile
905+ self ._upper_percentile = bench_config .outlier_exclusion .upper_percentile
906+
785907 def get_and_verify_unique_benchmark_dimension (self , datapoints : DataPoints ) -> str :
786908 benchmark = datapoints [0 ]["benchmark" ]
787909 for dp in datapoints :
@@ -819,7 +941,11 @@ def update_fn(dp):
819941 self .verify_and_process_id_score_function (dp )
820942 return dp
821943
822- super ().__init__ (suite , selector_fn , key_fn , field , update_fn , "run-forks" , True )
944+ super ().__init__ (suite , selector_fn , key_fn , field , update_fn , True )
945+
946+ def determine_stable_run_outlier_exclusion_percentiles (self , bench_config : BenchmarkStableRunConfig ):
947+ self ._lower_percentile = bench_config .fork_outlier_exclusion .lower_percentile
948+ self ._upper_percentile = bench_config .fork_outlier_exclusion .upper_percentile
823949
824950
825951class NativeModeBuildSummaryPostProcessor (FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor ):
@@ -843,7 +969,11 @@ def update_fn(dp):
843969 self .verify_and_process_id_score_function (dp )
844970 return dp
845971
846- super ().__init__ (suite , selector_fn , key_fn , field , update_fn , "run-forks" , False )
972+ super ().__init__ (suite , selector_fn , key_fn , field , update_fn , False )
973+
974+ def determine_stable_run_outlier_exclusion_percentiles (self , bench_config : BenchmarkStableRunConfig ):
975+ self ._lower_percentile = bench_config .fork_outlier_exclusion .lower_percentile
976+ self ._upper_percentile = bench_config .fork_outlier_exclusion .upper_percentile
847977
848978
849979class NativeModeBenchmarkSummaryPostProcessor (FinalDispatchFinalStageAverageWithOutlierRemovalPostProcessor ):
@@ -860,14 +990,48 @@ def __init__(self, suite: "PolybenchBenchmarkSuite"):
860990
861991 def update_fn (dp ):
862992 dp ["metric.name" ] = "time"
993+ if "metric.fork-number" in dp :
994+ del dp ["metric.fork-number" ]
995+ if "native-image.image-fork-number" in dp :
996+ del dp ["native-image.image-fork-number" ]
863997 if "metric.object" in dp :
864998 del dp ["metric.object" ]
865999 if "native-image.rebuild-number" in dp :
8661000 del dp ["native-image.rebuild-number" ]
8671001 self .verify_and_process_id_score_function (dp )
8681002 return dp
8691003
870- super ().__init__ (suite , selector_fn , key_fn , field , update_fn , "builds" , True )
1004+ config : Optional [SuiteStableRunConfig ] = bm_exec_context ().get (PolybenchBenchmarkSuite .STABLE_CONFIG )
1005+ if config is not None :
1006+ self ._stable_run : bool = True
1007+ self ._v1_benchmarks : List [str ] = [
1008+ b for b in config .benchmarks () if config .get_benchmark (b ).policy == StableRunPolicy .INDIVIDUAL_BUILDS
1009+ ]
1010+ else :
1011+ self ._stable_run : bool = False
1012+ self ._v1_benchmarks : List [str ] = []
1013+
1014+ super ().__init__ (suite , selector_fn , key_fn , field , update_fn , True )
1015+
1016+ def select_datapoints (self , datapoints : DataPoints ) -> DataPoints :
1017+ if self ._stable_run :
1018+ self ._selector_fn = lambda dp : (
1019+ (
1020+ dp ["benchmark" ] in self ._v1_benchmarks
1021+ and dp ["metric.name" ] == "avg-time"
1022+ and dp ["metric.object" ] == "build"
1023+ )
1024+ or (
1025+ dp ["benchmark" ] not in self ._v1_benchmarks
1026+ and dp ["metric.name" ] == "avg-time"
1027+ and dp ["metric.object" ] == "fork"
1028+ )
1029+ )
1030+ return super ().select_datapoints (datapoints )
1031+
1032+ def determine_stable_run_outlier_exclusion_percentiles (self , bench_config : BenchmarkStableRunConfig ):
1033+ self ._lower_percentile = bench_config .build_outlier_exclusion .lower_percentile
1034+ self ._upper_percentile = bench_config .build_outlier_exclusion .upper_percentile
8711035
8721036
8731037class GraalSpecificFieldsRemoverPostProcessor (DataPointsPostProcessor ):
@@ -1011,7 +1175,7 @@ def name(self):
10111175 return "polybench"
10121176
10131177 def version (self ):
1014- return "0.3 .0"
1178+ return "0.4 .0"
10151179
10161180 def _resolve_benchmarks (self ) -> Dict [str , ResolvedPolybenchBenchmark ]:
10171181 if not hasattr (self , "_benchmarks" ):
@@ -1152,12 +1316,11 @@ def _resolve_current_benchmark(self, benchmarks) -> ResolvedPolybenchBenchmark:
11521316 mx .abort (f"Must specify one benchmark at a time (given: { benchmarks } )" )
11531317 return self ._resolve_benchmarks ()[benchmarks [0 ]]
11541318
1155- def _resolve_stable_run_config (self ):
1319+ def _resolve_stable_run_config (self ) -> Optional [ SuiteStableRunConfig ] :
11561320 config_path = self .polybench_bench_suite_args (bm_exec_context ().get ("bm_suite_args" )).stable_run_config
11571321 if config_path is None :
1158- return {}
1159- with open (config_path ) as f :
1160- return json .load (f )
1322+ return None
1323+ return SuiteStableRunConfig (config_path )
11611324
11621325 @staticmethod
11631326 def _prepare_distributions (
0 commit comments