From 6f47f7fe94b225a8b535be35a1c6c1bead2e325e Mon Sep 17 00:00:00 2001 From: David Rabinowitz Date: Wed, 18 Jun 2025 14:19:07 -0700 Subject: [PATCH 1/4] Simplifying integration test configuration --- .../spark/bigquery/acceptance/AcceptanceTestConstants.java | 7 ++++--- .../cloud/spark/bigquery/integration/TestConstants.java | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java index 9d01dbc964..3c6326a1b7 100644 --- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java +++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java @@ -15,16 +15,17 @@ */ package com.google.cloud.spark.bigquery.acceptance; +import com.google.cloud.ServiceOptions; import com.google.common.base.Preconditions; +import java.util.Optional; public class AcceptanceTestConstants { public static final String REGION = "us-west1"; public static final String DATAPROC_ENDPOINT = REGION + "-dataproc.googleapis.com:443"; public static final String PROJECT_ID = - Preconditions.checkNotNull( - System.getenv("GOOGLE_CLOUD_PROJECT"), - "Please set the 'GOOGLE_CLOUD_PROJECT' environment variable"); + Optional.ofNullable(System.getenv("GOOGLE_CLOUD_PROJECT")) + .orElse(ServiceOptions.getDefaultProjectId()); public static final String SERVERLESS_NETWORK_URI = Preconditions.checkNotNull( System.getenv("SERVERLESS_NETWORK_URI"), diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java index b22e1e11e8..b6874f0532 100644 --- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java +++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java @@ -32,6 +32,7 @@ import static org.apache.spark.sql.types.DataTypes.StringType; import static org.apache.spark.sql.types.DataTypes.TimestampType; +import com.google.cloud.ServiceOptions; import com.google.cloud.spark.bigquery.integration.model.ColumnOrderTestClass; import com.google.cloud.spark.bigquery.integration.model.NumStruct; import com.google.cloud.spark.bigquery.integration.model.StringStruct; @@ -43,6 +44,7 @@ import java.sql.Timestamp; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.TimeZone; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -73,9 +75,8 @@ public class TestConstants { "bigquery-public-data.wikipedia.pageviews_2021"; static final long SHAKESPEARE_TABLE_NUM_ROWS = 164656L; static final String PROJECT_ID = - Preconditions.checkNotNull( - System.getenv("GOOGLE_CLOUD_PROJECT"), - "Please set the GOOGLE_CLOUD_PROJECT env variable"); + Optional.ofNullable(System.getenv("GOOGLE_CLOUD_PROJECT")) + .orElse(ServiceOptions.getDefaultProjectId()); static final String TEMPORARY_GCS_BUCKET_ENV_VARIABLE = "TEMPORARY_GCS_BUCKET"; static final String BIGLAKE_CONNECTION_ID_ENV_VARIABLE = "BIGLAKE_CONNECTION_ID"; static final String TEMPORARY_GCS_BUCKET = From 1002bf6d7d7520898b2ffd9979244584f86c3b9c Mon Sep 17 00:00:00 2001 From: David Rabinowitz Date: Wed, 18 Jun 2025 15:20:25 -0700 Subject: [PATCH 2/4] Adding Code Review by Gemini AI GitHub action --- .github/workflows/ai-code-review.yml | 45 ++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/workflows/ai-code-review.yml diff --git a/.github/workflows/ai-code-review.yml b/.github/workflows/ai-code-review.yml new file mode 100644 index 0000000000..d70c1c644e --- /dev/null +++ b/.github/workflows/ai-code-review.yml @@ -0,0 +1,45 @@ +name: "Code Review by Gemini AI" + +on: + pull_request: + +jobs: + review: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - uses: actions/checkout@v3 + - name: "Get diff of the pull request" + id: get_diff + shell: bash + env: + PULL_REQUEST_HEAD_REF: "${{ github.event.pull_request.head.ref }}" + PULL_REQUEST_BASE_REF: "${{ github.event.pull_request.base.ref }}" + run: |- + git fetch origin "${{ env.PULL_REQUEST_HEAD_REF }}" + git fetch origin "${{ env.PULL_REQUEST_BASE_REF }}" + git checkout "${{ env.PULL_REQUEST_HEAD_REF }}" + git diff "origin/${{ env.PULL_REQUEST_BASE_REF }}" > "diff.txt" + { + echo "pull_request_diff<> $GITHUB_OUTPUT + - uses: ./ + name: "Code Review by Gemini AI" + id: review + with: + gemini_api_key: ${{ secrets.GEMINI_API_KEY }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_repository: ${{ github.repository }} + github_pull_request_number: ${{ github.event.pull_request.number }} + git_commit_hash: ${{ github.event.pull_request.head.sha }} + model: "gemini-1.5-pro" + pull_request_diff: |- + ${{ steps.get_diff.outputs.pull_request_diff }} + pull_request_chunk_size: "3500" + extra_prompt: |- + Sempre responda em português brasileiro! + log_level: "DEBUG" From 863e0b2c85f511a4a973ff4fb60e924f51be9cdf Mon Sep 17 00:00:00 2001 From: David Rabinowitz Date: Wed, 18 Jun 2025 15:24:17 -0700 Subject: [PATCH 3/4] updates --- .github/workflows/ai-code-review.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ai-code-review.yml b/.github/workflows/ai-code-review.yml index d70c1c644e..e591afe6cd 100644 --- a/.github/workflows/ai-code-review.yml +++ b/.github/workflows/ai-code-review.yml @@ -36,10 +36,10 @@ jobs: github_repository: ${{ github.repository }} github_pull_request_number: ${{ github.event.pull_request.number }} git_commit_hash: ${{ github.event.pull_request.head.sha }} - model: "gemini-1.5-pro" + model: "gemini-2.5-pro" pull_request_diff: |- ${{ steps.get_diff.outputs.pull_request_diff }} pull_request_chunk_size: "3500" extra_prompt: |- - Sempre responda em português brasileiro! + Validate your answers, always respond in English! log_level: "DEBUG" From 636aa744be1edaac76a9d771dd998f0edc97bb3f Mon Sep 17 00:00:00 2001 From: David Rabinowitz Date: Wed, 18 Jun 2025 15:28:07 -0700 Subject: [PATCH 4/4] Restored files --- .../spark/bigquery/acceptance/AcceptanceTestConstants.java | 7 +++---- .../cloud/spark/bigquery/integration/TestConstants.java | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java index 3c6326a1b7..9d01dbc964 100644 --- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java +++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java @@ -15,17 +15,16 @@ */ package com.google.cloud.spark.bigquery.acceptance; -import com.google.cloud.ServiceOptions; import com.google.common.base.Preconditions; -import java.util.Optional; public class AcceptanceTestConstants { public static final String REGION = "us-west1"; public static final String DATAPROC_ENDPOINT = REGION + "-dataproc.googleapis.com:443"; public static final String PROJECT_ID = - Optional.ofNullable(System.getenv("GOOGLE_CLOUD_PROJECT")) - .orElse(ServiceOptions.getDefaultProjectId()); + Preconditions.checkNotNull( + System.getenv("GOOGLE_CLOUD_PROJECT"), + "Please set the 'GOOGLE_CLOUD_PROJECT' environment variable"); public static final String SERVERLESS_NETWORK_URI = Preconditions.checkNotNull( System.getenv("SERVERLESS_NETWORK_URI"), diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java index b6874f0532..b22e1e11e8 100644 --- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java +++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java @@ -32,7 +32,6 @@ import static org.apache.spark.sql.types.DataTypes.StringType; import static org.apache.spark.sql.types.DataTypes.TimestampType; -import com.google.cloud.ServiceOptions; import com.google.cloud.spark.bigquery.integration.model.ColumnOrderTestClass; import com.google.cloud.spark.bigquery.integration.model.NumStruct; import com.google.cloud.spark.bigquery.integration.model.StringStruct; @@ -44,7 +43,6 @@ import java.sql.Timestamp; import java.util.Arrays; import java.util.List; -import java.util.Optional; import java.util.TimeZone; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -75,8 +73,9 @@ public class TestConstants { "bigquery-public-data.wikipedia.pageviews_2021"; static final long SHAKESPEARE_TABLE_NUM_ROWS = 164656L; static final String PROJECT_ID = - Optional.ofNullable(System.getenv("GOOGLE_CLOUD_PROJECT")) - .orElse(ServiceOptions.getDefaultProjectId()); + Preconditions.checkNotNull( + System.getenv("GOOGLE_CLOUD_PROJECT"), + "Please set the GOOGLE_CLOUD_PROJECT env variable"); static final String TEMPORARY_GCS_BUCKET_ENV_VARIABLE = "TEMPORARY_GCS_BUCKET"; static final String BIGLAKE_CONNECTION_ID_ENV_VARIABLE = "BIGLAKE_CONNECTION_ID"; static final String TEMPORARY_GCS_BUCKET =