From dac2848f4aed39a97b38dc26dbc21100006199db Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Mon, 29 Jun 2026 22:31:39 -0400
Subject: [PATCH 1/9] Upgrade TFX stack to 1.21.x and remove numpy/pandas
 bounds

---
 .../mltransform_generate_vocab_requirements.txt           | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
index 4e00b3b4316c..6d7cd31f4579 100644
--- a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
+++ b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
@@ -18,9 +18,7 @@
 # MLTransform TFT operations need a consistent TensorFlow Transform stack;
 # otherwise workers can crash-loop with pandas/numpy ABI mismatches.
 google-cloud-monitoring>=2.27.0
-tensorflow_transform>=1.14.0,<1.15.0
-tensorflow-metadata>=1.14.0,<1.15.0
-tfx-bsl>=1.14.0,<1.15.0
-numpy<2
-pandas<2
+tensorflow_transform>=1.21.0,<1.22.0
+tensorflow-metadata>=1.21.0,<1.22.0
+tfx-bsl>=1.21.0,<1.22.0
 dill

From c2a62c12f94cff969d33c96024aecbea0aba71e0 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 09:32:50 -0400
Subject: [PATCH 2/9] Append timestamp to artifact_location and output_vocab

---
 .github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
index 6bda0379bc7d..be90a4fc5edb 100644
--- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
+++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
@@ -226,7 +226,7 @@ jobs:
             -Prunner=DataflowRunner \
             -PpythonVersion=3.10 \
             -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}}'
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}'
       - name: run MLTransform One-Hot Encoding Batch
         uses: ./.github/actions/gradle-command-self-hosted-action
         timeout-minutes: 180

From 11cb59ed70c47059d627bf91726d0e61af2e32f4 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 21:21:45 -0400
Subject: [PATCH 3/9] Upgrade tfx stack. Remove some upper bounds.

---
 .../mltransform_tests_requirements.txt           | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt b/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt
index 9f37e070a606..1e64bf29333c 100644
--- a/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt
+++ b/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt
@@ -18,12 +18,12 @@
 # Requirements for MLTransform tests on Dataflow workers.
 # Keep this aligned with CloudML benchmark stack to avoid worker import errors.
 dill==0.4.1
-tfx_bsl==1.16.1
-tensorflow-transform==1.16.0
-tensorflow>=2.16,<2.17
-numpy>=1.22.0,<2.0
-tensorflow-metadata>=1.16.1,<1.17.0
-pyarrow>=10,<11
-tensorflow-serving-api>=2.16.1,<2.20
-tf-keras>=2.16.0,<2.17
+tfx_bsl==1.21.0
+tensorflow-transform==1.21.0
+tensorflow-metadata==1.21.0
+tensorflow
+numpy
+pyarrow
+tensorflow-serving-api
+tf-keras
 google-cloud-monitoring>=2.27.0

From 781ec5cdecfbc26e3d2ba4e858148b8a43c74382 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 21:30:08 -0400
Subject: [PATCH 4/9] Consolidate the test requirements file to examples.

---
 .../workflows/beam_Inference_Python_Benchmarks_Dataflow.yml   | 4 ++--
 ...Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt | 2 +-
 .../mltransform_one_hot_encoding_requirements.txt}            | 0
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename sdks/python/apache_beam/{ml/transforms/mltransform_tests_requirements.txt => examples/ml_transform/mltransform_one_hot_encoding_requirements.txt} (100%)

diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
index be90a4fc5edb..3dfece4ecfc6 100644
--- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
+++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
@@ -237,5 +237,5 @@ jobs:
             -Prunner=DataflowRunner \
             -PpythonVersion=3.10 \
             -PbeamPythonExtra=ml_test \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/transforms/mltransform_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_12 }} --autoscaling_algorithm=NONE --metrics_table=mltransform_one_hot_encoding_batch --influx_measurement=mltransform_one_hot_encoding_batch --job_name=benchmark-tests-mltransform-one-hot-encoding-batch-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/mltransform/one_hot_output_${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-end-to-end-tests/mltransform/artifacts_${{env.NOW_UTC}}'
\ No newline at end of file
+            -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_12 }} --autoscaling_algorithm=NONE --metrics_table=mltransform_one_hot_encoding_batch --influx_measurement=mltransform_one_hot_encoding_batch --job_name=benchmark-tests-mltransform-one-hot-encoding-batch-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/mltransform/one_hot_output_${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-end-to-end-tests/mltransform/artifacts_${{env.NOW_UTC}}'
diff --git a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt
index 27648d0c0fb0..993dd0820822 100644
--- a/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt
+++ b/.github/workflows/load-tests-pipeline-options/beam_Inference_Python_Benchmarks_Dataflow_MLTransform_One_Hot_Encoding_Batch.txt
@@ -22,7 +22,7 @@
 --staging_location=gs://temp-storage-for-perf-tests/loadtests
 --temp_location=gs://temp-storage-for-perf-tests/loadtests
 --sdk_location=container
---requirements_file=apache_beam/ml/transforms/mltransform_tests_requirements.txt
+--requirements_file=apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt
 --publish_to_big_query=true
 --metrics_dataset=beam_run_inference
 --metrics_table=mltransform_one_hot_encoding_batch
diff --git a/sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt
similarity index 100%
rename from sdks/python/apache_beam/ml/transforms/mltransform_tests_requirements.txt
rename to sdks/python/apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt

From 1567148726c0f40e169cb145a61e6a06f94b6c34 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 21:30:53 -0400
Subject: [PATCH 5/9] Run one-hot test only

---
 ...m_Inference_Python_Benchmarks_Dataflow.yml | 256 +++++++++---------
 1 file changed, 128 insertions(+), 128 deletions(-)

diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
index 3dfece4ecfc6..5489e5f5898b 100644
--- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
+++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
@@ -99,134 +99,134 @@ jobs:
       # The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
       - name: get current time
         run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV
-      - name: Build VLLM Development Image
-        id: build_vllm_image
-        uses: ./.github/actions/build-push-docker-action
-        with:
-          dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile'
-          image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base'
-          image_tag: ${{ github.sha }}
-      - name: Run VLLM Gemma Batch Test
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \
-            -Prunner=DataflowRunner \
-            -PsdkLocationOverride=false \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}'
-      - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
-      - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
-      - name: run Pytorch Vision Classification with Resnet 101
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \
-      - name: run Pytorch Imagenet Classification with Resnet 152
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \
-      - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \
-      - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \
-      - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt'
-      - name: run Table Row Inference Sklearn Batch
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}'
-      - name: run Table Row Inference Sklearn Stream
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}'
-      - name: run MLTransform Generate Vocab Batch
-        uses: ./.github/actions/gradle-command-self-hosted-action
-        timeout-minutes: 180
-        with:
-          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-          arguments: |
-            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \
-            -Prunner=DataflowRunner \
-            -PpythonVersion=3.10 \
-            -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \
-            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}'
+      # - name: Build VLLM Development Image
+      #   id: build_vllm_image
+      #   uses: ./.github/actions/build-push-docker-action
+      #   with:
+      #     dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile'
+      #     image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base'
+      #     image_tag: ${{ github.sha }}
+      # - name: Run VLLM Gemma Batch Test
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PsdkLocationOverride=false \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}'
+      # - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
+      # - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
+      # - name: run Pytorch Vision Classification with Resnet 101
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \
+      # - name: run Pytorch Imagenet Classification with Resnet 152
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \
+      # - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \
+      # - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \
+      # - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt'
+      # - name: run Table Row Inference Sklearn Batch
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}'
+      # - name: run Table Row Inference Sklearn Stream
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}'
+      # - name: run MLTransform Generate Vocab Batch
+      #   uses: ./.github/actions/gradle-command-self-hosted-action
+      #   timeout-minutes: 180
+      #   with:
+      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+      #     arguments: |
+      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \
+      #       -Prunner=DataflowRunner \
+      #       -PpythonVersion=3.10 \
+      #       -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \
+      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}'
       - name: run MLTransform One-Hot Encoding Batch
         uses: ./.github/actions/gradle-command-self-hosted-action
         timeout-minutes: 180

From b597f364dbd07dbf07cbb1040a842a7d5b1649ca Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 21:45:02 -0400
Subject: [PATCH 6/9] Remove ml_extra in one-hot load test

---
 .github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
index 5489e5f5898b..8f04171710bc 100644
--- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
+++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
@@ -236,6 +236,5 @@ jobs:
             -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_one_hot_encoding_benchmark \
             -Prunner=DataflowRunner \
             -PpythonVersion=3.10 \
-            -PbeamPythonExtra=ml_test \
             -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_one_hot_encoding_requirements.txt \
             '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_12 }} --autoscaling_algorithm=NONE --metrics_table=mltransform_one_hot_encoding_batch --influx_measurement=mltransform_one_hot_encoding_batch --job_name=benchmark-tests-mltransform-one-hot-encoding-batch-${{env.NOW_UTC}} --output_file=gs://temp-storage-for-end-to-end-tests/mltransform/one_hot_output_${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-end-to-end-tests/mltransform/artifacts_${{env.NOW_UTC}}'

From 54783520416a9ad4caf4ddf144fddbb7cd3d432a Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 21:59:02 -0400
Subject: [PATCH 7/9] Revert "Run one-hot test only"

This reverts commit 1567148726c0f40e169cb145a61e6a06f94b6c34.
---
 ...m_Inference_Python_Benchmarks_Dataflow.yml | 256 +++++++++---------
 1 file changed, 128 insertions(+), 128 deletions(-)

diff --git a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
index 8f04171710bc..195ce693a388 100644
--- a/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
+++ b/.github/workflows/beam_Inference_Python_Benchmarks_Dataflow.yml
@@ -99,134 +99,134 @@ jobs:
       # The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
       - name: get current time
         run: echo "NOW_UTC=$(date '+%m%d%H%M%S' --utc)" >> $GITHUB_ENV
-      # - name: Build VLLM Development Image
-      #   id: build_vllm_image
-      #   uses: ./.github/actions/build-push-docker-action
-      #   with:
-      #     dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile'
-      #     image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base'
-      #     image_tag: ${{ github.sha }}
-      # - name: Run VLLM Gemma Batch Test
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PsdkLocationOverride=false \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}'
-      # - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
-      # - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
-      # - name: run Pytorch Vision Classification with Resnet 101
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \
-      # - name: run Pytorch Imagenet Classification with Resnet 152
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \
-      # - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \
-      # - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \
-      # - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt'
-      # - name: run Table Row Inference Sklearn Batch
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}'
-      # - name: run Table Row Inference Sklearn Stream
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}'
-      # - name: run MLTransform Generate Vocab Batch
-      #   uses: ./.github/actions/gradle-command-self-hosted-action
-      #   timeout-minutes: 180
-      #   with:
-      #     gradle-command: :sdks:python:apache_beam:testing:load_tests:run
-      #     arguments: |
-      #       -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \
-      #       -Prunner=DataflowRunner \
-      #       -PpythonVersion=3.10 \
-      #       -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \
-      #       '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}'
+      - name: Build VLLM Development Image
+        id: build_vllm_image
+        uses: ./.github/actions/build-push-docker-action
+        with:
+          dockerfile_path: 'sdks/python/apache_beam/ml/inference/test_resources/vllm.dockerfile'
+          image_name: 'us-docker.pkg.dev/apache-beam-testing/beam-temp/beam-vllm-gpu-base'
+          image_tag: ${{ github.sha }}
+      - name: Run VLLM Gemma Batch Test
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.vllm_gemma_benchmarks \
+            -Prunner=DataflowRunner \
+            -PsdkLocationOverride=false \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/vllm_tests_requirements.txt '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_8 }} --mode=batch --job_name=benchmark-tests-vllm-with-gemma-2b-it-batch-${{env.NOW_UTC}} --sdk_container_image=${{ steps.build_vllm_image.outputs.image_url }}'
+      - name: run Pytorch Sentiment Streaming using Hugging Face distilbert-base-uncased model
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_6 }} --mode=streaming --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-streaming-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
+      - name: run Pytorch Sentiment Batch using Hugging Face distilbert-base-uncased model
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_sentiment_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_7 }} --mode=batch --job_name=benchmark-tests-pytorch-sentiment-distilbert-base-uncased-batch-${{env.NOW_UTC}} --output_table=apache-beam-testing.beam_run_inference.result_sentiment_distilbert_base_uncased' \
+      - name: run Pytorch Vision Classification with Resnet 101
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_1 }} --job_name=benchmark-tests-pytorch-imagenet-python-101-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet101-${{env.NOW_UTC}}.txt' \
+      - name: run Pytorch Imagenet Classification with Resnet 152
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_2 }} --job_name=benchmark-tests-pytorch-imagenet-python-152-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152-${{env.NOW_UTC}}.txt' \
+      - name: run Pytorch Language Modeling using Hugging Face bert-base-uncased model
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_3 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-base-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_base_uncased-${{env.NOW_UTC}}.txt' \
+      - name: run Pytorch Langauge Modeling using Hugging Face bert-large-uncased model
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_language_modeling_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_4 }} --job_name=benchmark-tests-pytorch-language-modeling-bert-large-uncased-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_bert_large_uncased-${{env.NOW_UTC}}.txt' \
+      - name: run Pytorch Imagenet Classification with Resnet 152 with Tesla T4 GPU
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/torch_tests_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_5 }} --job_name=benchmark-tests-pytorch-imagenet-python-gpu-${{env.NOW_UTC}} --output=gs://temp-storage-for-end-to-end-tests/torch/result_resnet152_gpu-${{env.NOW_UTC}}.txt'
+      - name: run Table Row Inference Sklearn Batch
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_9 }} --autoscaling_algorithm=NONE --metrics_table=result_table_row_inference_batch --influx_measurement=result_table_row_inference_batch --mode=batch --input_file=gs://apache-beam-ml/testing/inputs/table_rows_100k_benchmark.jsonl --input_expand_factor=100 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_batch_outputs --job_name=benchmark-tests-table-row-inference-batch-${{env.NOW_UTC}}'
+      - name: run Table Row Inference Sklearn Stream
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.table_row_inference_benchmark \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/ml/inference/table_row_inference_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_10 }} --autoscaling_algorithm=THROUGHPUT_BASED --max_num_workers=20 --metrics_table=result_table_row_inference_stream --influx_measurement=result_table_row_inference_stream --mode=streaming --input_subscription=projects/apache-beam-testing/subscriptions/table_row_inference_benchmark --window_size_sec=60 --trigger_interval_sec=30 --timeout_ms=900000 --output_table=apache-beam-testing:beam_run_inference.result_table_row_inference_stream_outputs --job_name=benchmark-tests-table-row-inference-stream-${{env.NOW_UTC}}'
+      - name: run MLTransform Generate Vocab Batch
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        timeout-minutes: 180
+        with:
+          gradle-command: :sdks:python:apache_beam:testing:load_tests:run
+          arguments: |
+            -PloadTest.mainClass=apache_beam.testing.benchmarks.inference.mltransform_generate_vocab_benchmark \
+            -Prunner=DataflowRunner \
+            -PpythonVersion=3.10 \
+            -PloadTest.requirementsTxtFile=apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt \
+            '-PloadTest.args=${{ env.beam_Inference_Python_Benchmarks_Dataflow_test_arguments_11 }} --job_name=benchmark-tests-mltransform-generate-vocab-batch-${{env.NOW_UTC}} --artifact_location=gs://temp-storage-for-perf-tests/mltransform/vocab_artifacts_${{env.NOW_UTC}} --output_vocab=gs://temp-storage-for-perf-tests/mltransform/vocab_outputs/mltransform_generate_vocab_batch_${{env.NOW_UTC}}'
       - name: run MLTransform One-Hot Encoding Batch
         uses: ./.github/actions/gradle-command-self-hosted-action
         timeout-minutes: 180

From 1876512bb88cb9885a989629ab695a4b0f180252 Mon Sep 17 00:00:00 2001
From: Shunping Huang <shunping@google.com>
Date: Tue, 30 Jun 2026 22:01:47 -0400
Subject: [PATCH 8/9] Minor edit on requirements.txt

---
 .../mltransform_generate_vocab_requirements.txt           | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
index 6d7cd31f4579..13dab50f65ec 100644
--- a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
+++ b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
@@ -18,7 +18,9 @@
 # MLTransform TFT operations need a consistent TensorFlow Transform stack;
 # otherwise workers can crash-loop with pandas/numpy ABI mismatches.
 google-cloud-monitoring>=2.27.0
-tensorflow_transform>=1.21.0,<1.22.0
-tensorflow-metadata>=1.21.0,<1.22.0
-tfx-bsl>=1.21.0,<1.22.0
+tensorflow_transform=1.21.0
+tensorflow-metadata==1.21.0
+tfx-bsl==1.21.0
 dill
+numpy
+pandas

From bc9937ade500914e85649aed21772be189b7407f Mon Sep 17 00:00:00 2001
From: Vitaly Terentyev <vitaly.terentyev@akvelon.com>
Date: Wed, 1 Jul 2026 13:20:50 +0400
Subject: [PATCH 9/9] Fix typo

---
 .../ml_transform/mltransform_generate_vocab_requirements.txt    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
index 13dab50f65ec..0eb79d9480ec 100644
--- a/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
+++ b/sdks/python/apache_beam/examples/ml_transform/mltransform_generate_vocab_requirements.txt
@@ -18,7 +18,7 @@
 # MLTransform TFT operations need a consistent TensorFlow Transform stack;
 # otherwise workers can crash-loop with pandas/numpy ABI mismatches.
 google-cloud-monitoring>=2.27.0
-tensorflow_transform=1.21.0
+tensorflow_transform==1.21.0
 tensorflow-metadata==1.21.0
 tfx-bsl==1.21.0
 dill