diff --git a/iree-tf/benchmark/benchmark_model.py b/iree-tf/benchmark/benchmark_model.py index a1f63c6c..83ec5ae6 100644 --- a/iree-tf/benchmark/benchmark_model.py +++ b/iree-tf/benchmark/benchmark_model.py @@ -15,7 +15,7 @@ from typing import Optional # Add library dir to the search path. -sys.path.insert(0, str(pathlib.Path(__file__).parent.parent / "library")) +sys.path.insert(0, str(pathlib.Path(__file__).parents[1] / "library")) from models import resnet50, bert_large, t5_large # Add benchmark definitions to the search path. @@ -46,6 +46,15 @@ def benchmark_lookup(unique_id: str): raise ValueError(f"Model definition not supported") +def benchmark_lookup(benchmark_id: str): + benchmark = tf_inference_benchmarks.ID_TO_BENCHMARK_MAP.get(benchmark_id) + if benchmark is None: + raise ValueError(f"Id {benchmark_id} does not exist in benchmark suite.") + + model_name, model_class = model_lookup(benchmark.model.id) + return (model_name, model_class, benchmark) + + def dump_result(file_path: str, result: dict) -> None: with open(file_path, "r") as f: dictObj = json.load(f) @@ -66,7 +75,8 @@ def bytes_to_mb(bytes: Optional[int]) -> Optional[float]: def run_framework_benchmark(model_name: str, model_class: type[tf.Module], batch_size: int, warmup_iterations: int, benchmark_iterations: int, tf_device: str, - hlo_dump_dir: str, dump_hlo: bool, shared_dict) -> None: + hlo_dump_dir: str, dump_hlo: bool, + shared_dict) -> None: try: with tf.device(tf_device): if dump_hlo: @@ -216,17 +226,16 @@ def run_compiler_benchmark(hlo_benchmark_tool_path: str, hlo_dir: str, args = argParser.parse_args() - model_name, model_class, model_definition = benchmark_lookup( - args.benchmark_id) + model_name, model_class, benchmark = benchmark_lookup(args.benchmark_id) print( - f"\n\n--- {model_name} {args.benchmark_id} -------------------------------------" + f"\n\n--- {benchmark.name} {args.benchmark_id} -------------------------------------" ) if os.path.exists(_HLO_DUMP_DIR): shutil.rmtree(_HLO_DUMP_DIR) os.mkdir(_HLO_DUMP_DIR) - batch_size = model_definition.input_batch_size + batch_size = benchmark.input_batch_size benchmark_definition = { "benchmark_id": args.benchmark_id, "benchmark_name": model_definition.name, @@ -248,9 +257,9 @@ def run_compiler_benchmark(hlo_benchmark_tool_path: str, hlo_dir: str, shared_dict = manager.dict() if args.run_in_process: - run_framework_benchmark(model_name, model_class, batch_size, args.warmup_iterations, - args.iterations, tf_device, _HLO_DUMP_DIR, dump_hlo, - shared_dict) + run_framework_benchmark(model_name, model_class, batch_size, + args.warmup_iterations, args.iterations, + tf_device, _HLO_DUMP_DIR, dump_hlo, shared_dict) else: p = multiprocessing.Process(target=run_framework_benchmark, args=(model_name, model_class, batch_size, @@ -269,8 +278,10 @@ def run_compiler_benchmark(hlo_benchmark_tool_path: str, hlo_dir: str, shared_dict = manager.dict() if args.run_in_process: - run_compiler_benchmark(args.hlo_benchmark_path, _HLO_DUMP_DIR, args.hlo_iterations, - "cuda" if args.device == "gpu" else "cpu", shared_dict) + run_compiler_benchmark(args.hlo_benchmark_path, _HLO_DUMP_DIR, + args.hlo_iterations, + "cuda" if args.device == "gpu" else "cpu", + shared_dict) else: p = multiprocessing.Process( target=run_compiler_benchmark, diff --git a/oobi/benchmark-definitions/python/data_types.py b/oobi/benchmark-definitions/python/data_types.py index 0c95b31b..87dd2b3c 100644 --- a/oobi/benchmark-definitions/python/data_types.py +++ b/oobi/benchmark-definitions/python/data_types.py @@ -97,11 +97,32 @@ class Model(object): # Tags that describe the model characteristics. tags: List[str] meta_model: MetaModel - input_batch_size: int - inputs: ModelData - outputs: ModelData # A list of artifacts derived from this model. artifacts: List[ModelArtifact] def __str__(self): return self.name + + +@serialization.serializable +@dataclass(frozen=True) +class InferenceBenchmark(object): + """Inference benchmark definition""" + + id: str + # Unique friendly name. + name: str + # Tags that describe the benchmark characteristics. + tags: List[str] + + # Model to inference + model: Model + # Model inputs + inputs: ModelData + # Model input batch size + input_batch_size: int + # Expected model outputs + outputs: ModelData + + def __str__(self): + return self.name diff --git a/oobi/benchmark-definitions/python/input_data_definitions.py b/oobi/benchmark-definitions/python/input_data_definitions.py index 17765657..e41cba88 100644 --- a/oobi/benchmark-definitions/python/input_data_definitions.py +++ b/oobi/benchmark-definitions/python/input_data_definitions.py @@ -91,6 +91,15 @@ ], ) +IMAGENET_APPLES_224X224X3XF32_BATCHES = { + 1: IMAGENET_APPLES_224X224X3XF32_BATCH1, + 8: IMAGENET_APPLES_224X224X3XF32_BATCH8, + 64: IMAGENET_APPLES_224X224X3XF32_BATCH64, + 128: IMAGENET_APPLES_224X224X3XF32_BATCH128, + 256: IMAGENET_APPLES_224X224X3XF32_BATCH256, + 2048: IMAGENET_APPLES_224X224X3XF32_BATCH2048, +} + BERT_LARGE_SEQLEN384_I32_BATCH1 = data_types.ModelData( id=unique_ids.INPUT_DATA_BERT_LARGE_SEQLEN384_I32_BATCH1, name="BERT_LARGE_SEQLEN384_I32_BATCH1", @@ -244,6 +253,18 @@ ], ) +BERT_LARGE_SEQLEN384_I32_BATCHES = { + 1: BERT_LARGE_SEQLEN384_I32_BATCH1, + 16: BERT_LARGE_SEQLEN384_I32_BATCH16, + 24: BERT_LARGE_SEQLEN384_I32_BATCH24, + 32: BERT_LARGE_SEQLEN384_I32_BATCH32, + 48: BERT_LARGE_SEQLEN384_I32_BATCH48, + 64: BERT_LARGE_SEQLEN384_I32_BATCH64, + 512: BERT_LARGE_SEQLEN384_I32_BATCH512, + 1024: BERT_LARGE_SEQLEN384_I32_BATCH1024, + 1280: BERT_LARGE_SEQLEN384_I32_BATCH1280, +} + T5_LARGE_SEQLEN512_I32_BATCH1 = data_types.ModelData( id=unique_ids.INPUT_DATA_T5_LARGE_SEQLEN512_I32_BATCH1, name="T5_LARGE_SEQLEN512_I32_BATCH1", @@ -369,3 +390,13 @@ "https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.12.0_1681767794/T5_LARGE/batch_512/input_1.npy", ], ) + +T5_LARGE_SEQLEN512_I32_BATCHES = { + 1: T5_LARGE_SEQLEN512_I32_BATCH1, + 16: T5_LARGE_SEQLEN512_I32_BATCH16, + 24: T5_LARGE_SEQLEN512_I32_BATCH24, + 32: T5_LARGE_SEQLEN512_I32_BATCH32, + 48: T5_LARGE_SEQLEN512_I32_BATCH48, + 64: T5_LARGE_SEQLEN512_I32_BATCH64, + 512: T5_LARGE_SEQLEN512_I32_BATCH512, +} diff --git a/oobi/benchmark-definitions/python/tf_model_definitions.py b/oobi/benchmark-definitions/python/tf_model_definitions.py index f7a5b60e..a51ad94b 100644 --- a/oobi/benchmark-definitions/python/tf_model_definitions.py +++ b/oobi/benchmark-definitions/python/tf_model_definitions.py @@ -1,6 +1,4 @@ import data_types -import input_data_definitions -import tf_output_data_definitions import unique_ids PARENT_GCS_DIR = "https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.12.0_1681767794" @@ -36,7 +34,6 @@ data_type=data_types.DataType.FP32, ) - # Resnet50 models. # Model implementation from https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet50. # Batch sizes from MLPerf A100 Configs: https://github.com/mlcommons/inference_results_v2.1/tree/master/closed/NVIDIA/configs/resnet50 @@ -45,9 +42,6 @@ name="RESNET50_FP32_TF_224X224X3XF32_BATCH1", tags=["batch-1"], meta_model=RESNET50_FP32_TF, - input_batch_size=1, - inputs=input_data_definitions.IMAGENET_APPLES_224X224X3XF32_BATCH1, - outputs=tf_output_data_definitions.RESNET50_FP32_TF_1000XF32_BATCH1, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -69,9 +63,6 @@ name="RESNET50_FP32_TF_224X224X3XF32_BATCH8", tags=["batch-8"], meta_model=RESNET50_FP32_TF, - input_batch_size=8, - inputs=input_data_definitions.IMAGENET_APPLES_224X224X3XF32_BATCH8, - outputs=tf_output_data_definitions.RESNET50_FP32_TF_1000XF32_BATCH8, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -93,9 +84,6 @@ name="RESNET50_FP32_TF_224X224X3XF32_BATCH64", tags=["batch-64"], meta_model=RESNET50_FP32_TF, - input_batch_size=64, - inputs=input_data_definitions.IMAGENET_APPLES_224X224X3XF32_BATCH64, - outputs=tf_output_data_definitions.RESNET50_FP32_TF_1000XF32_BATCH64, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -117,9 +105,6 @@ name="RESNET50_FP32_TF_224X224X3XF32_BATCH128", tags=["batch-128"], meta_model=RESNET50_FP32_TF, - input_batch_size=128, - inputs=input_data_definitions.IMAGENET_APPLES_224X224X3XF32_BATCH128, - outputs=tf_output_data_definitions.RESNET50_FP32_TF_1000XF32_BATCH128, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -141,9 +126,6 @@ name="RESNET50_FP32_TF_224X224X3XF32_BATCH256", tags=["batch-256"], meta_model=RESNET50_FP32_TF, - input_batch_size=256, - inputs=input_data_definitions.IMAGENET_APPLES_224X224X3XF32_BATCH256, - outputs=tf_output_data_definitions.RESNET50_FP32_TF_1000XF32_BATCH256, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -165,9 +147,6 @@ name="RESNET50_FP32_TF_224X224X3XF32_BATCH2048", tags=["batch-2048"], meta_model=RESNET50_FP32_TF, - input_batch_size=2048, - inputs=input_data_definitions.IMAGENET_APPLES_224X224X3XF32_BATCH2048, - outputs=tf_output_data_definitions.RESNET50_FP32_TF_1000XF32_BATCH2048, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -184,6 +163,14 @@ ], ) +RESNET50_FP32_TF_224X224X3XF32_BATCHES = { + 1: RESNET50_FP32_TF_224X224X3XF32_BATCH1, + 8: RESNET50_FP32_TF_224X224X3XF32_BATCH8, + 64: RESNET50_FP32_TF_224X224X3XF32_BATCH64, + 128: RESNET50_FP32_TF_224X224X3XF32_BATCH128, + 256: RESNET50_FP32_TF_224X224X3XF32_BATCH256, + 2048: RESNET50_FP32_TF_224X224X3XF32_BATCH2048, +} # Bert-Large models. # Model implementation from https://huggingface.co/docs/transformers/model_doc/bert#transformers.TFBertModel. @@ -193,9 +180,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH1", tags=["batch-1"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=1, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH1, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH1, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -217,9 +201,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH16", tags=["batch-16"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=16, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH16, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH16, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -241,9 +222,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH24", tags=["batch-24"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=24, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH24, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH24, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -265,9 +243,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH32", tags=["batch-32"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=32, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH32, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH32, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -289,9 +264,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH48", tags=["batch-48"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=48, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH48, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH48, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -313,9 +285,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH64", tags=["batch-64"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=64, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH64, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH64, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -337,9 +306,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH512", tags=["batch-512"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=512, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH512, - outputs=tf_output_data_definitions.BERT_LARGE_FP32_TF_384X1024XF32_BATCH512, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -361,10 +327,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH1024", tags=["batch-1024"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=1024, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH1024, - outputs=tf_output_data_definitions. - BERT_LARGE_FP32_TF_384X1024XF32_BATCH1024, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -386,10 +348,6 @@ name="BERT_LARGE_FP32_TF_384XI32_BATCH1280", tags=["batch-1280"], meta_model=BERT_LARGE_FP32_TF, - input_batch_size=1280, - inputs=input_data_definitions.BERT_LARGE_SEQLEN384_I32_BATCH1280, - outputs=tf_output_data_definitions. - BERT_LARGE_FP32_TF_384X1024XF32_BATCH1280, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -406,6 +364,18 @@ ], ) +BERT_LARGE_FP32_TF_384XI32_BATCHES = { + 1: BERT_LARGE_FP32_TF_384XI32_BATCH1, + 16: BERT_LARGE_FP32_TF_384XI32_BATCH16, + 24: BERT_LARGE_FP32_TF_384XI32_BATCH24, + 32: BERT_LARGE_FP32_TF_384XI32_BATCH32, + 48: BERT_LARGE_FP32_TF_384XI32_BATCH48, + 64: BERT_LARGE_FP32_TF_384XI32_BATCH64, + 512: BERT_LARGE_FP32_TF_384XI32_BATCH512, + 1024: BERT_LARGE_FP32_TF_384XI32_BATCH1024, + 1280: BERT_LARGE_FP32_TF_384XI32_BATCH1280, +} + # T5-Large models. # Model implementation from https://huggingface.co/docs/transformers/model_doc/t5#transformers.TFT5Model # Bert-Large batch sizes used for T5-Large models. @@ -414,9 +384,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH1", tags=["batch-1"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=1, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH1, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH1, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -438,9 +405,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH16", tags=["batch-16"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=16, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH16, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH16, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -462,9 +426,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH24", tags=["batch-24"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=24, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH24, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH24, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -486,9 +447,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH32", tags=["batch-32"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=32, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH32, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH32, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -510,9 +468,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH48", tags=["batch-48"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=48, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH48, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH48, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -534,9 +489,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH64", tags=["batch-64"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=64, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH64, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH64, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -558,9 +510,6 @@ name="T5_LARGE_FP32_TF_512XI32_BATCH512", tags=["batch-512"], meta_model=T5_LARGE_FP32_TF, - input_batch_size=512, - inputs=input_data_definitions.T5_LARGE_SEQLEN512_I32_BATCH512, - outputs=tf_output_data_definitions.T5_LARGE_FP32_TF_512X1024XF32_BATCH512, artifacts=[ data_types.ModelArtifact( artifact_type=data_types.ModelArtifactType.TF_HLO_DUMP, @@ -577,6 +526,15 @@ ], ) +T5_LARGE_FP32_TF_512XI32_BATCHES = { + 1: T5_LARGE_FP32_TF_512XI32_BATCH1, + 16: T5_LARGE_FP32_TF_512XI32_BATCH16, + 24: T5_LARGE_FP32_TF_512XI32_BATCH24, + 32: T5_LARGE_FP32_TF_512XI32_BATCH32, + 48: T5_LARGE_FP32_TF_512XI32_BATCH48, + 64: T5_LARGE_FP32_TF_512XI32_BATCH64, + 512: T5_LARGE_FP32_TF_512XI32_BATCH512, +} # Dictionaries. TF_MODELS_DICT = { diff --git a/oobi/benchmark-definitions/python/tf_output_data_definitions.py b/oobi/benchmark-definitions/python/tf_output_data_definitions.py index c0a284b0..432eb604 100644 --- a/oobi/benchmark-definitions/python/tf_output_data_definitions.py +++ b/oobi/benchmark-definitions/python/tf_output_data_definitions.py @@ -86,6 +86,15 @@ ], ) +RESNET50_FP32_TF_1000XF32_BATCHES = { + 1: RESNET50_FP32_TF_1000XF32_BATCH1, + 8: RESNET50_FP32_TF_1000XF32_BATCH8, + 64: RESNET50_FP32_TF_1000XF32_BATCH64, + 128: RESNET50_FP32_TF_1000XF32_BATCH128, + 256: RESNET50_FP32_TF_1000XF32_BATCH256, + 2048: RESNET50_FP32_TF_1000XF32_BATCH2048, +} + # Bert-Large Outputs. BERT_LARGE_FP32_TF_384X1024XF32_BATCH1 = data_types.ModelData( id=unique_ids.OUTPUT_DATA_BERT_LARGE_FP32_TF_384X1024XF32_BATCH1, @@ -213,6 +222,18 @@ ], ) +BERT_LARGE_FP32_TF_384X1024XF32_BATCHES = { + 1: BERT_LARGE_FP32_TF_384X1024XF32_BATCH1, + 16: BERT_LARGE_FP32_TF_384X1024XF32_BATCH16, + 24: BERT_LARGE_FP32_TF_384X1024XF32_BATCH24, + 32: BERT_LARGE_FP32_TF_384X1024XF32_BATCH32, + 48: BERT_LARGE_FP32_TF_384X1024XF32_BATCH48, + 64: BERT_LARGE_FP32_TF_384X1024XF32_BATCH64, + 512: BERT_LARGE_FP32_TF_384X1024XF32_BATCH512, + 1024: BERT_LARGE_FP32_TF_384X1024XF32_BATCH1024, + 1280: BERT_LARGE_FP32_TF_384X1024XF32_BATCH1280, +} + # T5-Large Outputs. T5_LARGE_FP32_TF_512X1024XF32_BATCH1 = data_types.ModelData( id=unique_ids.OUTPUT_DATA_T5_LARGE_FP32_TF_512X1024XF32_BATCH1, @@ -311,3 +332,13 @@ "https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.12.0_1681767794/T5_LARGE/batch_512/output_0.npy" ], ) + +T5_LARGE_FP32_TF_512X1024XF32_BATCHES = { + 1: T5_LARGE_FP32_TF_512X1024XF32_BATCH1, + 16: T5_LARGE_FP32_TF_512X1024XF32_BATCH16, + 24: T5_LARGE_FP32_TF_512X1024XF32_BATCH24, + 32: T5_LARGE_FP32_TF_512X1024XF32_BATCH32, + 48: T5_LARGE_FP32_TF_512X1024XF32_BATCH48, + 64: T5_LARGE_FP32_TF_512X1024XF32_BATCH64, + 512: T5_LARGE_FP32_TF_512X1024XF32_BATCH512, +} diff --git a/oobi/benchmark-definitions/python/unique_ids.py b/oobi/benchmark-definitions/python/unique_ids.py index 2b9b6d73..31976af2 100644 --- a/oobi/benchmark-definitions/python/unique_ids.py +++ b/oobi/benchmark-definitions/python/unique_ids.py @@ -22,6 +22,7 @@ MODEL_RESNET50_FP32_TF = f"{MODEL_RESNET50_FP32}-TF" MODEL_RESNET50_FP32_TF_224X224X3XF32 = f"{MODEL_RESNET50_FP32_TF}-224x224x3xf32" +INFERENCE_BENCH_RESNET50_FP32_TF_224X224X3XF32_BASE = MODEL_RESNET50_FP32_TF_224X224X3XF32 MODEL_RESNET50_FP32_TF_224X224X3XF32_BATCH1 = f"{MODEL_RESNET50_FP32_TF_224X224X3XF32}-batch1" MODEL_RESNET50_FP32_TF_224X224X3XF32_BATCH8 = f"{MODEL_RESNET50_FP32_TF_224X224X3XF32}-batch8" MODEL_RESNET50_FP32_TF_224X224X3XF32_BATCH64 = f"{MODEL_RESNET50_FP32_TF_224X224X3XF32}-batch64" @@ -52,6 +53,7 @@ MODEL_BERT_LARGE_FP32_TF = f"{MODEL_BERT_LARGE_FP32}-TF" MODEL_BERT_LARGE_FP32_TF_384XI32 = f"{MODEL_BERT_LARGE_FP32_TF}-384xi32" +INFERENCE_BENCH_BERT_LARGE_FP32_TF_384XI32_BASE = MODEL_BERT_LARGE_FP32_TF_384XI32 MODEL_BERT_LARGE_FP32_TF_384XI32_BATCH1 = f"{MODEL_BERT_LARGE_FP32_TF_384XI32}-batch1" MODEL_BERT_LARGE_FP32_TF_384XI32_BATCH16 = f"{MODEL_BERT_LARGE_FP32_TF_384XI32}-batch16" MODEL_BERT_LARGE_FP32_TF_384XI32_BATCH24 = f"{MODEL_BERT_LARGE_FP32_TF_384XI32}-batch24" @@ -85,14 +87,14 @@ INPUT_DATA_T5_LARGE_SEQLEN512_I32_BATCH512 = f"{INPUT_DATA_T5_LARGE_SEQLEN512_I32}-batch512" MODEL_T5_LARGE_FP32_TF = f"{MODEL_T5_LARGE_FP32}-TF" -MODEL_T5_LARGE_FP32_TF_512XI32 = f"{MODEL_T5_LARGE_FP32_TF}-512xi32" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH1 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch1" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH16 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch16" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH24 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch24" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH32 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch32" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH48 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch48" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH64 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch64" -MODEL_T5_LARGE_FP32_TF_512XI32_BATCH512 = f"{MODEL_T5_LARGE_FP32_TF_512XI32}-batch512" +INFERENCE_BENCH_T5_LARGE_FP32_TF_512XI32_BATCHES_BASE = MODEL_T5_LARGE_FP32_TF +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH1 = f"{MODEL_T5_LARGE_FP32_TF}-batch1" +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH16 = f"{MODEL_T5_LARGE_FP32_TF}-batch16" +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH24 = f"{MODEL_T5_LARGE_FP32_TF}-batch24" +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH32 = f"{MODEL_T5_LARGE_FP32_TF}-batch32" +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH48 = f"{MODEL_T5_LARGE_FP32_TF}-batch48" +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH64 = f"{MODEL_T5_LARGE_FP32_TF}-batch64" +MODEL_T5_LARGE_FP32_TF_512XI32_BATCH512 = f"{MODEL_T5_LARGE_FP32_TF}-batch512" OUTPUT_DATA_T5_LARGE_FP32_TF_512X1024XF32 = "c72052c0-85b0-49f2-b875-ca3e5031b0df" OUTPUT_DATA_T5_LARGE_FP32_TF_512X1024XF32_BATCH1 = f"{OUTPUT_DATA_T5_LARGE_FP32_TF_512X1024XF32}-batch1"