@@ -246,8 +246,6 @@ def getDatabase(self, dbName: str) -> Database:
246
246
locationUri = jdb .locationUri (),
247
247
)
248
248
249
- # TODO(SPARK-41725): we don't have to `collect` for every `sql` but
250
- # Spark Connect requires it. We should remove them out.
251
249
def databaseExists (self , dbName : str ) -> bool :
252
250
"""Check if the database with the specified name exists.
253
251
@@ -275,15 +273,15 @@ def databaseExists(self, dbName: str) -> bool:
275
273
276
274
>>> spark.catalog.databaseExists("test_new_database")
277
275
False
278
- >>> _ = spark.sql("CREATE DATABASE test_new_database").collect()
276
+ >>> _ = spark.sql("CREATE DATABASE test_new_database")
279
277
>>> spark.catalog.databaseExists("test_new_database")
280
278
True
281
279
282
280
Using the fully qualified name with the catalog name.
283
281
284
282
>>> spark.catalog.databaseExists("spark_catalog.test_new_database")
285
283
True
286
- >>> _ = spark.sql("DROP DATABASE test_new_database").collect()
284
+ >>> _ = spark.sql("DROP DATABASE test_new_database")
287
285
"""
288
286
return self ._jcatalog .databaseExists (dbName )
289
287
@@ -372,8 +370,8 @@ def getTable(self, tableName: str) -> Table:
372
370
373
371
Examples
374
372
--------
375
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
376
- >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet").collect()
373
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
374
+ >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
377
375
>>> spark.catalog.getTable("tbl1")
378
376
Table(name='tbl1', catalog='spark_catalog', namespace=['default'], ...
379
377
@@ -383,7 +381,7 @@ def getTable(self, tableName: str) -> Table:
383
381
Table(name='tbl1', catalog='spark_catalog', namespace=['default'], ...
384
382
>>> spark.catalog.getTable("spark_catalog.default.tbl1")
385
383
Table(name='tbl1', catalog='spark_catalog', namespace=['default'], ...
386
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
384
+ >>> _ = spark.sql("DROP TABLE tbl1")
387
385
388
386
Throw an analysis exception when the table does not exist.
389
387
@@ -535,7 +533,7 @@ def getFunction(self, functionName: str) -> Function:
535
533
Examples
536
534
--------
537
535
>>> _ = spark.sql(
538
- ... "CREATE FUNCTION my_func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'").collect()
536
+ ... "CREATE FUNCTION my_func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'")
539
537
>>> spark.catalog.getFunction("my_func1")
540
538
Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
541
539
@@ -602,11 +600,11 @@ def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Colu
602
600
603
601
Examples
604
602
--------
605
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
606
- >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet").collect()
603
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
604
+ >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
607
605
>>> spark.catalog.listColumns("tblA")
608
606
[Column(name='name', description=None, dataType='string', nullable=True, ...
609
- >>> _ = spark.sql("DROP TABLE tblA").collect()
607
+ >>> _ = spark.sql("DROP TABLE tblA")
610
608
"""
611
609
if dbName is None :
612
610
iter = self ._jcatalog .listColumns (tableName ).toLocalIterator ()
@@ -667,8 +665,8 @@ def tableExists(self, tableName: str, dbName: Optional[str] = None) -> bool:
667
665
668
666
>>> spark.catalog.tableExists("unexisting_table")
669
667
False
670
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
671
- >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet").collect()
668
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
669
+ >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
672
670
>>> spark.catalog.tableExists("tbl1")
673
671
True
674
672
@@ -680,13 +678,13 @@ def tableExists(self, tableName: str, dbName: Optional[str] = None) -> bool:
680
678
True
681
679
>>> spark.catalog.tableExists("tbl1", "default")
682
680
True
683
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
681
+ >>> _ = spark.sql("DROP TABLE tbl1")
684
682
685
683
Check if views exist:
686
684
687
685
>>> spark.catalog.tableExists("view1")
688
686
False
689
- >>> _ = spark.sql("CREATE VIEW view1 AS SELECT 1").collect()
687
+ >>> _ = spark.sql("CREATE VIEW view1 AS SELECT 1")
690
688
>>> spark.catalog.tableExists("view1")
691
689
True
692
690
@@ -698,14 +696,14 @@ def tableExists(self, tableName: str, dbName: Optional[str] = None) -> bool:
698
696
True
699
697
>>> spark.catalog.tableExists("view1", "default")
700
698
True
701
- >>> _ = spark.sql("DROP VIEW view1").collect()
699
+ >>> _ = spark.sql("DROP VIEW view1")
702
700
703
701
Check if temporary views exist:
704
702
705
- >>> _ = spark.sql("CREATE TEMPORARY VIEW view1 AS SELECT 1").collect()
703
+ >>> _ = spark.sql("CREATE TEMPORARY VIEW view1 AS SELECT 1")
706
704
>>> spark.catalog.tableExists("view1")
707
705
True
708
- >>> df = spark.sql("DROP VIEW view1").collect()
706
+ >>> df = spark.sql("DROP VIEW view1")
709
707
>>> spark.catalog.tableExists("view1")
710
708
False
711
709
"""
@@ -806,15 +804,15 @@ def createTable(
806
804
Creating a managed table.
807
805
808
806
>>> _ = spark.catalog.createTable("tbl1", schema=spark.range(1).schema, source='parquet')
809
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
807
+ >>> _ = spark.sql("DROP TABLE tbl1")
810
808
811
809
Creating an external table
812
810
813
811
>>> import tempfile
814
812
>>> with tempfile.TemporaryDirectory() as d:
815
813
... _ = spark.catalog.createTable(
816
814
... "tbl2", schema=spark.range(1).schema, path=d, source='parquet')
817
- >>> _ = spark.sql("DROP TABLE tbl2").collect()
815
+ >>> _ = spark.sql("DROP TABLE tbl2")
818
816
"""
819
817
if path is not None :
820
818
options ["path" ] = path
@@ -954,8 +952,8 @@ def isCached(self, tableName: str) -> bool:
954
952
955
953
Examples
956
954
--------
957
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
958
- >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet").collect()
955
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
956
+ >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
959
957
>>> spark.catalog.cacheTable("tbl1")
960
958
>>> spark.catalog.isCached("tbl1")
961
959
True
@@ -972,7 +970,7 @@ def isCached(self, tableName: str) -> bool:
972
970
>>> spark.catalog.isCached("spark_catalog.default.tbl1")
973
971
True
974
972
>>> spark.catalog.uncacheTable("tbl1")
975
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
973
+ >>> _ = spark.sql("DROP TABLE tbl1")
976
974
"""
977
975
return self ._jcatalog .isCached (tableName )
978
976
@@ -994,8 +992,8 @@ def cacheTable(self, tableName: str) -> None:
994
992
995
993
Examples
996
994
--------
997
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
998
- >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet").collect()
995
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
996
+ >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
999
997
>>> spark.catalog.cacheTable("tbl1")
1000
998
1001
999
Throw an analysis exception when the table does not exist.
@@ -1009,7 +1007,7 @@ def cacheTable(self, tableName: str) -> None:
1009
1007
1010
1008
>>> spark.catalog.cacheTable("spark_catalog.default.tbl1")
1011
1009
>>> spark.catalog.uncacheTable("tbl1")
1012
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
1010
+ >>> _ = spark.sql("DROP TABLE tbl1")
1013
1011
"""
1014
1012
self ._jcatalog .cacheTable (tableName )
1015
1013
@@ -1031,8 +1029,8 @@ def uncacheTable(self, tableName: str) -> None:
1031
1029
1032
1030
Examples
1033
1031
--------
1034
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
1035
- >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet").collect()
1032
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
1033
+ >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
1036
1034
>>> spark.catalog.cacheTable("tbl1")
1037
1035
>>> spark.catalog.uncacheTable("tbl1")
1038
1036
>>> spark.catalog.isCached("tbl1")
@@ -1050,7 +1048,7 @@ def uncacheTable(self, tableName: str) -> None:
1050
1048
>>> spark.catalog.uncacheTable("spark_catalog.default.tbl1")
1051
1049
>>> spark.catalog.isCached("tbl1")
1052
1050
False
1053
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
1051
+ >>> _ = spark.sql("DROP TABLE tbl1")
1054
1052
"""
1055
1053
self ._jcatalog .uncacheTable (tableName )
1056
1054
@@ -1064,12 +1062,12 @@ def clearCache(self) -> None:
1064
1062
1065
1063
Examples
1066
1064
--------
1067
- >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
1068
- >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet").collect()
1065
+ >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
1066
+ >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
1069
1067
>>> spark.catalog.clearCache()
1070
1068
>>> spark.catalog.isCached("tbl1")
1071
1069
False
1072
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
1070
+ >>> _ = spark.sql("DROP TABLE tbl1")
1073
1071
"""
1074
1072
self ._jcatalog .clearCache ()
1075
1073
@@ -1095,10 +1093,10 @@ def refreshTable(self, tableName: str) -> None:
1095
1093
1096
1094
>>> import tempfile
1097
1095
>>> with tempfile.TemporaryDirectory() as d:
1098
- ... _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
1096
+ ... _ = spark.sql("DROP TABLE IF EXISTS tbl1")
1099
1097
... _ = spark.sql(
1100
- ... "CREATE TABLE tbl1 (col STRING) USING TEXT LOCATION '{}'".format(d)).collect()
1101
- ... _ = spark.sql("INSERT INTO tbl1 SELECT 'abc'").collect()
1098
+ ... "CREATE TABLE tbl1 (col STRING) USING TEXT LOCATION '{}'".format(d))
1099
+ ... _ = spark.sql("INSERT INTO tbl1 SELECT 'abc'")
1102
1100
... spark.catalog.cacheTable("tbl1")
1103
1101
... spark.table("tbl1").show()
1104
1102
+---+
@@ -1121,7 +1119,7 @@ def refreshTable(self, tableName: str) -> None:
1121
1119
Using the fully qualified name for the table.
1122
1120
1123
1121
>>> spark.catalog.refreshTable("spark_catalog.default.tbl1")
1124
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
1122
+ >>> _ = spark.sql("DROP TABLE tbl1")
1125
1123
"""
1126
1124
self ._jcatalog .refreshTable (tableName )
1127
1125
@@ -1149,12 +1147,12 @@ def recoverPartitions(self, tableName: str) -> None:
1149
1147
1150
1148
>>> import tempfile
1151
1149
>>> with tempfile.TemporaryDirectory() as d:
1152
- ... _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
1150
+ ... _ = spark.sql("DROP TABLE IF EXISTS tbl1")
1153
1151
... spark.range(1).selectExpr(
1154
1152
... "id as key", "id as value").write.partitionBy("key").mode("overwrite").save(d)
1155
1153
... _ = spark.sql(
1156
1154
... "CREATE TABLE tbl1 (key LONG, value LONG)"
1157
- ... "USING parquet OPTIONS (path '{}') PARTITIONED BY (key)".format(d)).collect()
1155
+ ... "USING parquet OPTIONS (path '{}') PARTITIONED BY (key)".format(d))
1158
1156
... spark.table("tbl1").show()
1159
1157
... spark.catalog.recoverPartitions("tbl1")
1160
1158
... spark.table("tbl1").show()
@@ -1167,7 +1165,7 @@ def recoverPartitions(self, tableName: str) -> None:
1167
1165
+-----+---+
1168
1166
| 0| 0|
1169
1167
+-----+---+
1170
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
1168
+ >>> _ = spark.sql("DROP TABLE tbl1")
1171
1169
"""
1172
1170
self ._jcatalog .recoverPartitions (tableName )
1173
1171
@@ -1191,10 +1189,10 @@ def refreshByPath(self, path: str) -> None:
1191
1189
1192
1190
>>> import tempfile
1193
1191
>>> with tempfile.TemporaryDirectory() as d:
1194
- ... _ = spark.sql("DROP TABLE IF EXISTS tbl1").collect()
1192
+ ... _ = spark.sql("DROP TABLE IF EXISTS tbl1")
1195
1193
... _ = spark.sql(
1196
- ... "CREATE TABLE tbl1 (col STRING) USING TEXT LOCATION '{}'".format(d)).collect()
1197
- ... _ = spark.sql("INSERT INTO tbl1 SELECT 'abc'").collect()
1194
+ ... "CREATE TABLE tbl1 (col STRING) USING TEXT LOCATION '{}'".format(d))
1195
+ ... _ = spark.sql("INSERT INTO tbl1 SELECT 'abc'")
1198
1196
... spark.catalog.cacheTable("tbl1")
1199
1197
... spark.table("tbl1").show()
1200
1198
+---+
@@ -1214,7 +1212,7 @@ def refreshByPath(self, path: str) -> None:
1214
1212
>>> spark.table("tbl1").count()
1215
1213
0
1216
1214
1217
- >>> _ = spark.sql("DROP TABLE tbl1").collect()
1215
+ >>> _ = spark.sql("DROP TABLE tbl1")
1218
1216
"""
1219
1217
self ._jcatalog .refreshByPath (path )
1220
1218
0 commit comments