From dac2848f4aed39a97b38dc26dbc21100006199db Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Mon, 29 Jun 2026 22:31:39 -0400 Subject: [PATCH 1/9] Upgrade TFX stack to 1.21.x and remove numpy/pandas bounds --- .../mltransform_generate_vocab_requirements.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt index 4e00b3b4316c..6d7cd31f4579 100644 --- a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt +++ b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt @@ -18,9 +18,7 @@ # MLTransform TFT operations need a consistent TensorFlow Transform stack; # otherwise workers can crash-loop with pandas/numpy ABI mismatches. google-cloud-monitoring>=2.27.0 -tensorflow_transform>=1.14.0,<1.15.0 -tensorflow-metadata>=1.14.0,<1.15.0 -tfx-bsl>=1.14.0,<1.15.0 -numpy<2 -pandas<2 +tensorflow_transform>=1.21.0,<1.22.0 +tensorflow-metadata>=1.21.0,<1.22.0 +tfx-bsl>=1.21.0,<1.22.0 dill From c2a62c12f94cff969d33c96024aecbea0aba71e0 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 09:32:50 -0400 Subject: [PATCH 2/9] Append timestamp to artifact_location and output_vocab --- .github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml index 6bda0379bc7d..be90a4fc5edb 100644 --- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -226,7 +226,7 @@ jobs: -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}}' + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}' - name: run MLTransform One-Hot Encoding Batch uses: ./.github/actions/gradle-command-self-hosted-action timeout-minutes: 180 From 11cb59ed70c47059d627bf91726d0e61af2e32f4 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 21:21:45 -0400 Subject: [PATCH 3/9] Upgrade tfx stack. Remove some upper bounds. --- .../mltransform_tests_requirements.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt b/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt index 9f37e070a606..1e64bf29333c 100644 --- a/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt +++ b/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt @@ -18,12 +18,12 @@ # Requirements for MLTransform tests on Dataflow workers. # Keep this aligned with CloudML benchmark stack to avoid worker import errors. dill==0.4.1 -tfx_bsl==1.16.1 -tensorflow-transform==1.16.0 -tensorflow>=2.16,<2.17 -numpy>=1.22.0,<2.0 -tensorflow-metadata>=1.16.1,<1.17.0 -pyarrow>=10,<11 -tensorflow-serving-api>=2.16.1,<2.20 -tf-keras>=2.16.0,<2.17 +tfx_bsl==1.21.0 +tensorflow-transform==1.21.0 +tensorflow-metadata==1.21.0 +tensorflow +numpy +pyarrow +tensorflow-serving-api +tf-keras google-cloud-monitoring>=2.27.0 From 781ec5cdecfbc26e3d2ba4e858148b8a43c74382 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 21:30:08 -0400 Subject: [PATCH 4/9] Consolidate the test requirements file to examples. --- .../workflows/beam_Inference_Python_Benchmarks_Dataflow.yml | 4 ++-- ...Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt | 2 +- .../mltransform_one_hot_encoding_requirements.txt} | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename sdks/python/apache_beam/{ml/transforms/mltransform_tests_requirements.txt => examples/ml_transform/mltransform_one_hot_encoding_requirements.txt} (100%) diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml index be90a4fc5edb..3dfece4ecfc6 100644 --- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -237,5 +237,5 @@ jobs: -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ -PbeamPythonExtra=ml_test \ - -PloadTest.requirementsTxtFile=apache_beam/ml/transforms/mltransform_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_12 }} --autoscaling_algorithm=NONE --metrics_table=mltransform_one_hot_encoding_batch --influx_measurement=mltransform_one_hot_encoding_batch --job_name=benchmark-tests-mltransform-one-hot-encoding-batch-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/mltransform/one_hot_output_${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-end-to-end-tests/mltransform/artifacts_${{env.NOW_UTC}}' \ No newline at end of file + -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_12 }} --autoscaling_algorithm=NONE --metrics_table=mltransform_one_hot_encoding_batch --influx_measurement=mltransform_one_hot_encoding_batch --job_name=benchmark-tests-mltransform-one-hot-encoding-batch-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/mltransform/one_hot_output_${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-end-to-end-tests/mltransform/artifacts_${{env.NOW_UTC}}' diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt index 27648d0c0fb0..993dd0820822 100644 --- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt +++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt @@ -22,7 +22,7 @@ --staging_location=gs://temp-storage-for-perf-tests/loadtests --temp_location=gs://temp-storage-for-perf-tests/loadtests --sdk_location=container ---requirements_file=apache_beam/ml/transforms/mltransform_tests_requirements.txt +--requirements_file=apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt --publish_to_big_query=true --metrics_dataset=beam_run_inference --metrics_table=mltransform_one_hot_encoding_batch diff --git a/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt similarity index 100% rename from sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt rename to sdks/python/apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt From 1567148726c0f40e169cb145a61e6a06f94b6c34 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 21:30:53 -0400 Subject: [PATCH 5/9] Run one-hot test only --- ...m_Inference_Python_Benchmarks_Dataflow.yml | 256 +++++++++--------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml index 3dfece4ecfc6..5489e5f5898b 100644 --- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -99,134 +99,134 @@ jobs: # The env variables are created and populated in the test-arguments-action as "_test_arguments_" - name: get current time run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV - - name: Build VLLM Development Image - id: build_vllm_image - uses: ./.github/actions/build-push-docker-action - with: - dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile' - image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base' - image_tag: ${{ github.sha }} - - name: Run VLLM Gemma Batch Test - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \ - -Prunner=DataflowRunner \ - -PsdkLocationOverride=false \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}' - - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ - - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ - - name: run Pytorch Vision Classification with Resnet 101 - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \ - - name: run Pytorch Imagenet Classification with Resnet 152 - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \ - - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \ - - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \ - - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt' - - name: run Table Row Inference Sklearn Batch - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}' - - name: run Table Row Inference Sklearn Stream - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}' - - name: run MLTransform Generate Vocab Batch - uses: ./.github/actions/gradle-command-self-hosted-action - timeout-minutes: 180 - with: - gradle-command: :sdks:python:apache_beam:testing:load_tests:run - arguments: | - -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \ - -Prunner=DataflowRunner \ - -PpythonVersion=3.10 \ - -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \ - '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}' + # - name: Build VLLM Development Image + # id: build_vllm_image + # uses: ./.github/actions/build-push-docker-action + # with: + # dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile' + # image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base' + # image_tag: ${{ github.sha }} + # - name: Run VLLM Gemma Batch Test + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \ + # -Prunner=DataflowRunner \ + # -PsdkLocationOverride=false \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}' + # - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ + # - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ + # - name: run Pytorch Vision Classification with Resnet 101 + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \ + # - name: run Pytorch Imagenet Classification with Resnet 152 + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \ + # - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \ + # - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \ + # - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt' + # - name: run Table Row Inference Sklearn Batch + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}' + # - name: run Table Row Inference Sklearn Stream + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}' + # - name: run MLTransform Generate Vocab Batch + # uses: ./.github/actions/gradle-command-self-hosted-action + # timeout-minutes: 180 + # with: + # gradle-command: :sdks:python:apache_beam:testing:load_tests:run + # arguments: | + # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \ + # -Prunner=DataflowRunner \ + # -PpythonVersion=3.10 \ + # -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \ + # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}' - name: run MLTransform One-Hot Encoding Batch uses: ./.github/actions/gradle-command-self-hosted-action timeout-minutes: 180 From b597f364dbd07dbf07cbb1040a842a7d5b1649ca Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 21:45:02 -0400 Subject: [PATCH 6/9] Remove ml_extra in one-hot load test --- .github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml index 5489e5f5898b..8f04171710bc 100644 --- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -236,6 +236,5 @@ jobs: -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_one_hot_encoding_benchmark \ -Prunner=DataflowRunner \ -PpythonVersion=3.10 \ - -PbeamPythonExtra=ml_test \ -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt \ '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_12 }} --autoscaling_algorithm=NONE --metrics_table=mltransform_one_hot_encoding_batch --influx_measurement=mltransform_one_hot_encoding_batch --job_name=benchmark-tests-mltransform-one-hot-encoding-batch-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/mltransform/one_hot_output_${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-end-to-end-tests/mltransform/artifacts_${{env.NOW_UTC}}' From 54783520416a9ad4caf4ddf144fddbb7cd3d432a Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 21:59:02 -0400 Subject: [PATCH 7/9] Revert "Run one-hot test only" This reverts commit 1567148726c0f40e169cb145a61e6a06f94b6c34. --- ...m_Inference_Python_Benchmarks_Dataflow.yml | 256 +++++++++--------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml index 8f04171710bc..195ce693a388 100644 --- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml +++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml @@ -99,134 +99,134 @@ jobs: # The env variables are created and populated in the test-arguments-action as "_test_arguments_" - name: get current time run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV - # - name: Build VLLM Development Image - # id: build_vllm_image - # uses: ./.github/actions/build-push-docker-action - # with: - # dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile' - # image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base' - # image_tag: ${{ github.sha }} - # - name: Run VLLM Gemma Batch Test - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \ - # -Prunner=DataflowRunner \ - # -PsdkLocationOverride=false \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}' - # - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ - # - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ - # - name: run Pytorch Vision Classification with Resnet 101 - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \ - # - name: run Pytorch Imagenet Classification with Resnet 152 - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \ - # - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \ - # - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \ - # - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt' - # - name: run Table Row Inference Sklearn Batch - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}' - # - name: run Table Row Inference Sklearn Stream - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}' - # - name: run MLTransform Generate Vocab Batch - # uses: ./.github/actions/gradle-command-self-hosted-action - # timeout-minutes: 180 - # with: - # gradle-command: :sdks:python:apache_beam:testing:load_tests:run - # arguments: | - # -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \ - # -Prunner=DataflowRunner \ - # -PpythonVersion=3.10 \ - # -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \ - # '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}' + - name: Build VLLM Development Image + id: build_vllm_image + uses: ./.github/actions/build-push-docker-action + with: + dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile' + image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base' + image_tag: ${{ github.sha }} + - name: Run VLLM Gemma Batch Test + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \ + -Prunner=DataflowRunner \ + -PsdkLocationOverride=false \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}' + - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ + - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \ + - name: run Pytorch Vision Classification with Resnet 101 + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Imagenet Classification with Resnet 152 + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \ + - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt' + - name: run Table Row Inference Sklearn Batch + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}' + - name: run Table Row Inference Sklearn Stream + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}' + - name: run MLTransform Generate Vocab Batch + uses: ./.github/actions/gradle-command-self-hosted-action + timeout-minutes: 180 + with: + gradle-command: :sdks:python:apache_beam:testing:load_tests:run + arguments: | + -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \ + -Prunner=DataflowRunner \ + -PpythonVersion=3.10 \ + -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \ + '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}' - name: run MLTransform One-Hot Encoding Batch uses: ./.github/actions/gradle-command-self-hosted-action timeout-minutes: 180 From 1876512bb88cb9885a989629ab695a4b0f180252 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Tue, 30 Jun 2026 22:01:47 -0400 Subject: [PATCH 8/9] Minor edit on requirements.txt --- .../mltransform_generate_vocab_requirements.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt index 6d7cd31f4579..13dab50f65ec 100644 --- a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt +++ b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt @@ -18,7 +18,9 @@ # MLTransform TFT operations need a consistent TensorFlow Transform stack; # otherwise workers can crash-loop with pandas/numpy ABI mismatches. google-cloud-monitoring>=2.27.0 -tensorflow_transform>=1.21.0,<1.22.0 -tensorflow-metadata>=1.21.0,<1.22.0 -tfx-bsl>=1.21.0,<1.22.0 +tensorflow_transform=1.21.0 +tensorflow-metadata==1.21.0 +tfx-bsl==1.21.0 dill +numpy +pandas From bc9937ade500914e85649aed21772be189b7407f Mon Sep 17 00:00:00 2001 From: Vitaly Terentyev Date: Wed, 1 Jul 2026 13:20:50 +0400 Subject: [PATCH 9/9] Fix typo --- .../ml_transform/mltransform_generate_vocab_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt index 13dab50f65ec..0eb79d9480ec 100644 --- a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt +++ b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt @@ -18,7 +18,7 @@ # MLTransform TFT operations need a consistent TensorFlow Transform stack; # otherwise workers can crash-loop with pandas/numpy ABI mismatches. google-cloud-monitoring>=2.27.0 -tensorflow_transform=1.21.0 +tensorflow_transform==1.21.0 tensorflow-metadata==1.21.0 tfx-bsl==1.21.0 dill