databricks · vikrantpuppala · Apr 13, 2026
diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
@@ -1,10 +1,15 @@
-name: Code Coverage
+name: E2E Tests and Code Coverage
 
 permissions:
   contents: read
   id-token: write
 
-on: [pull_request, workflow_dispatch]
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
 
 jobs:
   test-with-coverage:
@@ -32,25 +37,16 @@ jobs:
         with:
           python-version: "3.10"
           install-args: "--all-extras"
-      - name: Run parallel tests with coverage
+      - name: Run all tests with coverage
         continue-on-error: false
         run: |
           poetry run pytest tests/unit tests/e2e \
-            -m "not serial" \
             -n auto \
+            --dist=loadgroup \
             --cov=src \
             --cov-report=xml \
             --cov-report=term \
             -v
-      - name: Run telemetry tests with coverage (isolated)
-        continue-on-error: false
-        run: |
-          poetry run pytest tests/e2e/test_concurrent_telemetry.py \
-            --cov=src \
-            --cov-append \
-            --cov-report=xml \
-            --cov-report=term \
-            -v
       - name: Check for coverage override
         id: override
         env:

diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml
@@ -78,7 +78,6 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
           install-args: "--all-extras"
-          cache-path: ".venv-pyarrow"
           cache-suffix: "pyarrow-${{ matrix.dependency-version }}-"
       - name: Install Python tools for custom versions
         if: matrix.dependency-version != 'default'

diff --git a/.github/workflows/daily-telemetry-e2e.yml b/.github/workflows/daily-telemetry-e2e.yml
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py
@@ -7,10 +7,8 @@
 log = logging.getLogger(__name__)
 
 
-class LargeQueriesMixin:
-    """
-    This mixin expects to be mixed with a CursorTest-like class
-    """
+class LargeQueriesFetchMixin:
+    """Shared fetch helper for large query test classes."""
 
     def fetch_rows(self, cursor, row_count, fetchmany_size):
         """
@@ -44,6 +42,10 @@ def fetch_rows(self, cursor, row_count, fetchmany_size):
             + "assuming 10K fetch size."
         )
 
+
+class LargeWideResultSetMixin(LargeQueriesFetchMixin):
+    """Test mixin for large wide result set queries."""
+
     @pytest.mark.parametrize(
         "extra_params",
         [
@@ -52,7 +54,7 @@ def fetch_rows(self, cursor, row_count, fetchmany_size):
         ],
     )
     def test_query_with_large_wide_result_set(self, extra_params):
-        resultSize = 300 * 1000 * 1000  # 300 MB
+        resultSize = 100 * 1000 * 1000  # 100 MB
         width = 8192  # B
         rows = resultSize // width
         cols = width // 36
@@ -77,6 +79,10 @@ def test_query_with_large_wide_result_set(self, extra_params):
                     assert row[0] == row_id  # Verify no rows are dropped in the middle.
                     assert len(row[1]) == 36
 
+
+class LargeNarrowResultSetMixin(LargeQueriesFetchMixin):
+    """Test mixin for large narrow result set queries."""
+
     @pytest.mark.parametrize(
         "extra_params",
         [
@@ -85,7 +91,7 @@ def test_query_with_large_wide_result_set(self, extra_params):
         ],
     )
     def test_query_with_large_narrow_result_set(self, extra_params):
-        resultSize = 300 * 1000 * 1000  # 300 MB
+        resultSize = 100 * 1000 * 1000  # 100 MB
         width = 8  # sizeof(long)
         rows = resultSize / width
 
@@ -98,6 +104,10 @@ def test_query_with_large_narrow_result_set(self, extra_params):
             for row_id, row in enumerate(self.fetch_rows(cursor, rows, fetchmany_size)):
                 assert row[0] == row_id
 
+
+class LongRunningQueryMixin:
+    """Test mixin for long running queries."""
+
     @pytest.mark.parametrize(
         "extra_params",
         [
@@ -114,7 +124,7 @@ def test_long_running_query(self, extra_params):
 
         duration = -1
         scale0 = 10000
-        scale_factor = 1
+        scale_factor = 50
         with self.cursor(extra_params) as cursor:
             while duration < min_duration:
                 assert scale_factor < 4096, "Detected infinite loop"
@@ -138,3 +148,8 @@ def test_long_running_query(self, extra_params):
                 print("Took {} s with scale factor={}".format(duration, scale_factor))
                 # Extrapolate linearly to reach 3 min and add 50% padding to push over the limit
                 scale_factor = math.ceil(1.5 * scale_factor / current_fraction)
+
+
+# Keep backward-compatible alias that combines all three
+class LargeQueriesMixin(LargeWideResultSetMixin, LargeNarrowResultSetMixin, LongRunningQueryMixin):
+    pass
diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py
@@ -39,7 +39,11 @@
 )
 from databricks.sql.thrift_api.TCLIService import ttypes
 from tests.e2e.common.core_tests import CoreTestMixin, SmokeTestMixin
-from tests.e2e.common.large_queries_mixin import LargeQueriesMixin
+from tests.e2e.common.large_queries_mixin import (
+    LargeWideResultSetMixin,
+    LargeNarrowResultSetMixin,
+    LongRunningQueryMixin,
+)
 from tests.e2e.common.timestamp_tests import TimestampTestsMixin
 from tests.e2e.common.decimal_tests import DecimalTestsMixin
 from tests.e2e.common.retry_test_mixins import (
@@ -138,24 +142,36 @@ def assertEqualRowValues(self, actual, expected):
                 assert act[i] == exp[i]
 
 
-class TestPySQLLargeQueriesSuite(PySQLPytestTestCase, LargeQueriesMixin):
+class _LargeQueryRowHelper:
+    """Shared helper for fetching rows one at a time in large query tests."""
+
     def get_some_rows(self, cursor, fetchmany_size):
         row = cursor.fetchone()
         if row:
             return [row]
         else:
             return None
 
+
+class TestPySQLLargeWideResultSet(PySQLPytestTestCase, _LargeQueryRowHelper, LargeWideResultSetMixin):
+    pass
+
+
+class TestPySQLLargeNarrowResultSet(PySQLPytestTestCase, _LargeQueryRowHelper, LargeNarrowResultSetMixin):
+    pass
+
+
+class TestPySQLLongRunningQuery(PySQLPytestTestCase, LongRunningQueryMixin):
+    pass
+
+
+class TestPySQLCloudFetch(PySQLPytestTestCase):
     @skipUnless(pysql_supports_arrow(), "needs arrow support")
     @pytest.mark.skip("This test requires a previously uploaded data set")
     def test_cloud_fetch(self):
-        # This test can take several minutes to run
         limits = [100000, 300000]
         threads = [10, 25]
         self.arraysize = 100000
-        # This test requires a large table with many rows to properly initiate cloud fetch.
-        # e2-dogfood host > hive_metastore catalog > main schema has such a table called store_sales.
-        # If this table is deleted or this test is run on a different host, a different table may need to be used.
         base_query = "SELECT * FROM store_sales WHERE ss_sold_date_sk = 2452234 "
         for num_limit, num_threads, lz4_compression in itertools.product(
             limits, threads, [True, False]

diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
@@ -87,6 +87,7 @@ class ClientTestSuite(unittest.TestCase):
         "server_hostname": "foo",
         "http_path": "dummy_path",
         "access_token": "tok",
+        "enable_telemetry": False,
     }
 
     @patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME)
@@ -644,6 +645,7 @@ class TransactionTestSuite(unittest.TestCase):
         "server_hostname": "foo",
         "http_path": "dummy_path",
         "access_token": "tok",
+        "enable_telemetry": False,
     }
 
     def _setup_mock_session_with_http_client(self, mock_session):

diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
@@ -22,6 +22,7 @@ class TestSession:
         "server_hostname": "foo",
         "http_path": "dummy_path",
         "access_token": "tok",
+        "enable_telemetry": False,
     }
 
     @patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME)
@@ -50,13 +51,15 @@ def test_auth_args(self, mock_client_class):
                 "server_hostname": "foo",
                 "http_path": None,
                 "access_token": "tok",
+                "enable_telemetry": False,
             },
             {
                 "server_hostname": "foo",
                 "http_path": None,
                 "_tls_client_cert_file": "something",
                 "_use_cert_as_auth": True,
                 "access_token": None,
+                "enable_telemetry": False,
             },
         ]