Add more logging for cloud run debugging

BoqianShi · BoqianShi · commit 98dff65c09c3 · 2025-11-06T21:43:06.000-08:00
diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/CatalogIntegrationTestBase.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/CatalogIntegrationTestBase.java
@@ -45,7 +45,7 @@ public class CatalogIntegrationTestBase {
 
   protected static SparkSession spark;
   private String testTable;
-  // 2. Initialize the SparkSession ONCE before all tests
+
   @BeforeClass
   public static void setupSparkSession() {
     spark =
@@ -60,8 +60,6 @@ public static void setupSparkSession() {
             .getOrCreate();
   }
 
-  // 4. Stop the SparkSession ONCE after all tests are done
-  // This fixes the local IllegalStateException (race condition)
   @AfterClass
   public static void teardownSparkSession() {
     if (spark != null) {
@@ -253,66 +251,130 @@ public void testDropDatabase() {
 
   @Test
   public void testCatalogInitializationWithProject() {
-    spark
-        .conf()
-        .set("spark.sql.catalog.public_catalog", "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.public_catalog.project", "bigquery-public-data");
-
-    List<Row> rows = spark.sql("SHOW DATABASES IN public_catalog").collectAsList();
-    List<String> databaseNames =
-        rows.stream().map(row -> row.getString(0)).collect(Collectors.toList());
-    assertThat(databaseNames).contains("samples");
-
-    List<Row> data =
-        spark.sql("SELECT * FROM public_catalog.samples.shakespeare LIMIT 10").collectAsList();
-    assertThat(data).hasSize(10);
+    try {
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.public_catalog",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      // Use 'projectId' instead of 'project' - this is the correct property name
+      spark.conf().set("spark.sql.catalog.public_catalog.projectId", "bigquery-public-data");
+
+      // Add a small delay to ensure catalog is fully initialized
+      Thread.sleep(2000);
+
+      // Verify catalog is accessible before querying
+      try {
+        spark.sql("USE public_catalog");
+      } catch (Exception e) {
+        // Catalog might not support USE, that's okay
+      }
+
+      List<Row> rows = spark.sql("SHOW DATABASES IN public_catalog").collectAsList();
+      List<String> databaseNames =
+          rows.stream().map(row -> row.getString(0)).collect(Collectors.toList());
+      assertThat(databaseNames).contains("samples");
+
+      List<Row> data =
+          spark.sql("SELECT * FROM public_catalog.samples.shakespeare LIMIT 10").collectAsList();
+      assertThat(data).hasSize(10);
+    } catch (Exception e) {
+      // Log the full stack trace to help debug cloud build failures
+      e.printStackTrace();
+      throw new RuntimeException("Test failed with detailed error", e);
+    } finally {
+      // Clean up catalog configuration to avoid interference with other tests
+      try {
+        spark.conf().unset("spark.sql.catalog.public_catalog");
+        spark.conf().unset("spark.sql.catalog.public_catalog.projectId");
+      } catch (Exception ignored) {
+      }
+    }
   }
 
   @Test
   public void testCreateCatalogWithLocation() throws Exception {
     String database = String.format("create_db_with_location_%s", System.nanoTime());
     DatasetId datasetId = DatasetId.of(database);
-    spark
-        .conf()
-        .set(
-            "spark.sql.catalog.test_location_catalog",
-            "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.test_location_catalog.bigquery_location", "EU");
-    spark.sql("CREATE DATABASE test_location_catalog." + database);
-    Dataset dataset = bigquery.getDataset(datasetId);
-    assertThat(dataset).isNotNull();
-    assertThat(dataset.getLocation()).isEqualTo("EU");
-    bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+    try {
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.test_location_catalog",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      spark.conf().set("spark.sql.catalog.test_location_catalog.bigquery_location", "EU");
+
+      // Add delay for catalog initialization
+      Thread.sleep(2000);
+
+      spark.sql("CREATE DATABASE test_location_catalog." + database);
+      Dataset dataset = bigquery.getDataset(datasetId);
+      assertThat(dataset).isNotNull();
+      assertThat(dataset.getLocation()).isEqualTo("EU");
+    } finally {
+      bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+      // Clean up catalog configuration
+      try {
+        spark.conf().unset("spark.sql.catalog.test_location_catalog");
+        spark.conf().unset("spark.sql.catalog.test_location_catalog.bigquery_location");
+      } catch (Exception ignored) {
+      }
+    }
   }
 
   @Test
   public void testCreateTableAsSelectWithProjectAndLocation() {
     String database = String.format("ctas_db_with_location_%s", System.nanoTime());
     String newTable = "ctas_table_from_public";
     DatasetId datasetId = DatasetId.of(database);
-    spark
-        .conf()
-        .set("spark.sql.catalog.public_catalog", "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.public_catalog.projectId", "bigquery-public-data");
-    spark
-        .conf()
-        .set(
-            "spark.sql.catalog.test_catalog_as_select",
-            "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.test_catalog_as_select.bigquery_location", "EU");
-    spark.sql("CREATE DATABASE test_catalog_as_select." + database);
-    spark.sql(
-        "CREATE TABLE test_catalog_as_select."
-            + database
-            + "."
-            + newTable
-            + " AS SELECT * FROM public_catalog.samples.shakespeare LIMIT 10");
-    Dataset dataset = bigquery.getDataset(datasetId);
-    assertThat(dataset).isNotNull();
-    assertThat(dataset.getLocation()).isEqualTo("EU");
-    Table table = bigquery.getTable(TableId.of(datasetId.getDataset(), newTable));
-    assertThat(table).isNotNull();
-    bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+    try {
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.public_catalog",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      // Use 'projectId' instead of 'project'
+      spark.conf().set("spark.sql.catalog.public_catalog.projectId", "bigquery-public-data");
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.test_catalog_as_select",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      spark.conf().set("spark.sql.catalog.test_catalog_as_select.bigquery_location", "EU");
+
+      // Add delay for catalog initialization
+      Thread.sleep(2000);
+
+      spark.sql("CREATE DATABASE test_catalog_as_select." + database);
+
+      // Add another small delay after database creation
+      Thread.sleep(1000);
+
+      spark.sql(
+          "CREATE TABLE test_catalog_as_select."
+              + database
+              + "."
+              + newTable
+              + " AS SELECT * FROM public_catalog.samples.shakespeare LIMIT 10");
+      Dataset dataset = bigquery.getDataset(datasetId);
+      assertThat(dataset).isNotNull();
+      assertThat(dataset.getLocation()).isEqualTo("EU");
+      Table table = bigquery.getTable(TableId.of(datasetId.getDataset(), newTable));
+      assertThat(table).isNotNull();
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new RuntimeException("Test failed with detailed error", e);
+    } finally {
+      bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+      // Clean up catalog configurations
+      try {
+        spark.conf().unset("spark.sql.catalog.public_catalog");
+        spark.conf().unset("spark.sql.catalog.public_catalog.projectId");
+        spark.conf().unset("spark.sql.catalog.test_catalog_as_select");
+        spark.conf().unset("spark.sql.catalog.test_catalog_as_select.bigquery_location");
+      } catch (Exception ignored) {
+      }
+    }
   }
 
   private static SparkSession createSparkSession() {
diff --git a/spark-bigquery-dsv2/spark-3.5-bigquery-lib/src/main/java/com/google/cloud/spark/bigquery/BigQueryCatalog.java b/spark-bigquery-dsv2/spark-3.5-bigquery-lib/src/main/java/com/google/cloud/spark/bigquery/BigQueryCatalog.java
@@ -15,35 +15,14 @@
  */
 package com.google.cloud.spark.bigquery;
 
-import com.google.api.client.googleapis.json.GoogleJsonResponseException;
-import com.google.cloud.bigquery.BigQueryException;
-import com.google.cloud.bigquery.Dataset;
-import com.google.cloud.bigquery.DatasetId;
-import com.google.cloud.bigquery.DatasetInfo;
-import com.google.cloud.bigquery.Schema;
-import com.google.cloud.bigquery.TableDefinition;
-import com.google.cloud.bigquery.TableId;
-import com.google.cloud.bigquery.TableInfo;
-import com.google.cloud.bigquery.connector.common.BigQueryClient;
-import com.google.cloud.bigquery.connector.common.BigQueryConnectorException;
-import com.google.cloud.spark.bigquery.v2.BigQueryIdentifier;
-import com.google.cloud.spark.bigquery.v2.Spark35BigQueryTable;
-import com.google.cloud.spark.bigquery.v2.Spark3Util;
-import com.google.common.base.Preconditions;
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Streams;
-import com.google.inject.Injector;
 import java.util.Arrays;
 import java.util.Map;
 import java.util.Optional;
 import java.util.ServiceLoader;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.StreamSupport;
+
 import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
@@ -63,6 +42,29 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.cloud.bigquery.BigQueryException;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.DatasetInfo;
+import com.google.cloud.bigquery.Schema;
+import com.google.cloud.bigquery.TableDefinition;
+import com.google.cloud.bigquery.TableId;
+import com.google.cloud.bigquery.TableInfo;
+import com.google.cloud.bigquery.connector.common.BigQueryClient;
+import com.google.cloud.bigquery.connector.common.BigQueryConnectorException;
+import com.google.cloud.spark.bigquery.v2.BigQueryIdentifier;
+import com.google.cloud.spark.bigquery.v2.Spark35BigQueryTable;
+import com.google.cloud.spark.bigquery.v2.Spark3Util;
+import com.google.common.base.Preconditions;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Streams;
+import com.google.inject.Injector;
+
 public class BigQueryCatalog implements TableCatalog, SupportsNamespaces {
 
   private static final Logger logger = LoggerFactory.getLogger(BigQueryCatalog.class);
@@ -81,23 +83,33 @@ public class BigQueryCatalog implements TableCatalog, SupportsNamespaces {
 
   @Override
   public void initialize(String name, CaseInsensitiveStringMap caseInsensitiveStringMap) {
-    logger.info("Initializing BigQuery table catalog [{}])", name);
-    Injector injector =
-        new InjectorBuilder()
-            .withOptions(caseInsensitiveStringMap.asCaseSensitiveMap())
-            .withTableIsMandatory(false)
-            .build();
-    tableProvider =
-        StreamSupport.stream(ServiceLoader.load(DataSourceRegister.class).spliterator(), false)
-            .filter(candidate -> candidate.shortName().equals("bigquery"))
-            .map(candidate -> (TableProvider) candidate)
-            .findFirst()
-            .orElseThrow(
-                () -> new IllegalStateException("Could not find a BigQuery TableProvider"));
-    bigQueryClient = injector.getInstance(BigQueryClient.class);
-    schemaConverters =
-        SchemaConverters.from(
-            SchemaConvertersConfiguration.from(injector.getInstance(SparkBigQueryConfig.class)));
+    logger.info(
+        "Initializing BigQuery table catalog [{}] with options: {}",
+        name,
+        caseInsensitiveStringMap);
+
+        try {
+      Injector injector =
+          new InjectorBuilder()
+              .withOptions(caseInsensitiveStringMap.asCaseSensitiveMap())
+              .withTableIsMandatory(false)
+              .build();
+      tableProvider =
+          StreamSupport.stream(ServiceLoader.load(DataSourceRegister.class).spliterator(), false)
+              .filter(candidate -> candidate.shortName().equals("bigquery"))
+              .map(candidate -> (TableProvider) candidate)
+              .findFirst()
+              .orElseThrow(
+                  () -> new IllegalStateException("Could not find a BigQuery TableProvider"));
+      bigQueryClient = injector.getInstance(BigQueryClient.class);
+      schemaConverters =
+          SchemaConverters.from(
+              SchemaConvertersConfiguration.from(injector.getInstance(SparkBigQueryConfig.class)));
+      logger.info("BigQuery table catalog [{}] initialized successfully", name);
+    } catch (Exception e) {
+      logger.error("Failed to initialize BigQuery catalog [{}]", name, e);
+      throw new BigQueryConnectorException("Failed to initialize BigQuery catalog: " + name, e);
+    }
   }
 
   @Override
@@ -161,7 +173,8 @@ Map<String, String> toLoadProperties(Identifier identifier) {
         result.put("dataset", identifier.namespace()[0]);
         break;
       case 2:
-        result.put("project", identifier.namespace()[0]);
+        // Use 'projectId' instead of 'project' to match the connector's configuration
+        result.put("projectId", identifier.namespace()[0]);
         result.put("dataset", identifier.namespace()[1]);
         break;
       default:
@@ -289,10 +302,18 @@ static TableId toTableId(Identifier identifier) {
 
   @Override
   public String[][] listNamespaces() throws NoSuchNamespaceException {
-    return Streams.stream(bigQueryClient.listDatasets())
-        .map(Dataset::getDatasetId)
-        .map(this::toNamespace)
-        .toArray(String[][]::new);
+    if (bigQueryClient == null) {
+      throw new IllegalStateException("BigQuery catalog not properly initialized");
+    }
+    try {
+      return Streams.stream(bigQueryClient.listDatasets())
+          .map(Dataset::getDatasetId)
+          .map(this::toNamespace)
+          .toArray(String[][]::new);
+    } catch (Exception e) {
+      logger.error("Error listing namespaces", e);
+      throw new BigQueryConnectorException("Failed to list namespaces", e);
+    }
   }
 
   private String[] toNamespace(DatasetId datasetId) {