Skip to content

Commit 5dcb496

Browse files
committed
address comments
1 parent 1d5670e commit 5dcb496

File tree

2 files changed

+42
-33
lines changed

2 files changed

+42
-33
lines changed

sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaData.scala

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.sql.connect.client.jdbc
2020
import java.sql.{Array => _, _}
2121

2222
import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION}
23+
import org.apache.spark.sql.catalyst.util.QuotingUtils._
2324
import org.apache.spark.sql.connect
2425
import org.apache.spark.sql.connect.client.jdbc.SparkConnectDatabaseMetaData._
2526
import org.apache.spark.sql.functions._
@@ -99,8 +100,7 @@ class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends Databas
99100
override def getTimeDateFunctions: String =
100101
throw new SQLFeatureNotSupportedException
101102

102-
override def getSearchStringEscape: String =
103-
throw new SQLFeatureNotSupportedException
103+
override def getSearchStringEscape: String = "\\"
104104

105105
override def getExtraNameCharacters: String = ""
106106

@@ -316,16 +316,19 @@ class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends Databas
316316
private def getSchemasDataFrame(
317317
catalog: String, schemaPattern: String): connect.DataFrame = {
318318

319-
val schemaFilterClause =
320-
if (isNullOrWildcard(schemaPattern)) "1=1" else s"TABLE_SCHEM LIKE '$schemaPattern'"
319+
val schemaFilterClause = if (isNullOrWildcard(schemaPattern)) {
320+
"TRUE"
321+
} else {
322+
s"TABLE_SCHEM LIKE '${escapeSingleQuotedString(schemaPattern)}'"
323+
}
321324

322325
def internalGetSchemas(
323326
catalogOpt: Option[String],
324327
schemaFilterClause: String): connect.DataFrame = {
325328
val catalog = catalogOpt.getOrElse(conn.getCatalog)
326329
// Spark SQL supports LIKE clause in SHOW SCHEMAS command, but we can't use that
327330
// because the LIKE pattern does not follow SQL standard.
328-
conn.spark.sql(s"SHOW SCHEMAS IN `$catalog`")
331+
conn.spark.sql(s"SHOW SCHEMAS IN ${quoteIdentifier(catalog)}")
329332
.select($"namespace".as("TABLE_SCHEM"))
330333
.filter(schemaFilterClause)
331334
.withColumn("TABLE_CATALOG", lit(catalog))
@@ -336,8 +339,8 @@ class SparkConnectDatabaseMetaData(conn: SparkConnectConnection) extends Databas
336339
val emptyDf = conn.spark.emptyDataFrame
337340
.withColumn("TABLE_SCHEM", lit(""))
338341
.withColumn("TABLE_CATALOG", lit(""))
339-
conn.spark.catalog.listCatalogs().collect().map(_.name).map { catalog =>
340-
internalGetSchemas(Some(catalog), schemaFilterClause)
342+
conn.spark.catalog.listCatalogs().collect().map(_.name).map { c =>
343+
internalGetSchemas(Some(c), schemaFilterClause)
341344
}.fold(emptyDf) { (l, r) => l.unionAll(r) }
342345
} else if (catalog == "") {
343346
// search only in current catalog

sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
6969
assert(metadata.storesLowerCaseQuotedIdentifiers === false)
7070
assert(metadata.storesMixedCaseQuotedIdentifiers === false)
7171
assert(metadata.getIdentifierQuoteString === "`")
72+
assert(metadata.getSearchStringEscape === "\\")
7273
assert(metadata.getExtraNameCharacters === "")
7374
assert(metadata.supportsAlterTableWithAddColumn === true)
7475
assert(metadata.supportsAlterTableWithDropColumn === true)
@@ -253,22 +254,21 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
253254
withConnection { conn =>
254255
implicit val spark: SparkSession = conn.asInstanceOf[SparkConnectConnection].spark
255256

256-
registerCatalog("testcat", TEST_IN_MEMORY_CATALOG)
257+
registerCatalog("test`cat", TEST_IN_MEMORY_CATALOG)
257258

258-
spark.sql("USE testcat")
259-
spark.sql("CREATE DATABASE IF NOT EXISTS testcat.t_db1")
260-
spark.sql("CREATE DATABASE IF NOT EXISTS testcat.t_db2")
261-
spark.sql("CREATE DATABASE IF NOT EXISTS testcat.test_db3")
259+
spark.sql("CREATE DATABASE IF NOT EXISTS `test``cat`.t_db1")
260+
spark.sql("CREATE DATABASE IF NOT EXISTS `test``cat`.t_db2")
261+
spark.sql("CREATE DATABASE IF NOT EXISTS `test``cat`.t_db_")
262262

263-
spark.sql("USE spark_catalog")
264263
spark.sql("CREATE DATABASE IF NOT EXISTS spark_catalog.db1")
265264
spark.sql("CREATE DATABASE IF NOT EXISTS spark_catalog.db2")
266-
spark.sql("CREATE DATABASE IF NOT EXISTS spark_catalog.db_")
265+
spark.sql("CREATE DATABASE IF NOT EXISTS spark_catalog.test_db3")
267266

268267
val metadata = conn.getMetaData
269-
withDatabase("testcat.t_db1", "testcat.t_db2", "testcat.test_db3",
270-
"spark_catalog.db1", "spark_catalog.db2", "spark_catalog.db_") {
271268

269+
// no need to care about "test`cat" because it is memory based and session isolated,
270+
// also is inaccessible from another SparkSession
271+
withDatabase("spark_catalog.db1", "spark_catalog.db2", "spark_catalog.test_db3") {
272272
// list schemas in all catalogs
273273
val getSchemasInAllCatalogs = (() => metadata.getSchemas) ::
274274
List(null, "%").map { database => () => metadata.getSchemas(null, database) } ::: Nil
@@ -280,11 +280,11 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
280280
catalogDatabases === Seq(
281281
("spark_catalog", "db1"),
282282
("spark_catalog", "db2"),
283-
("spark_catalog", "db_"),
284283
("spark_catalog", "default"),
285-
("testcat", "t_db1"),
286-
("testcat", "t_db2"),
287-
("testcat", "test_db3"))
284+
("spark_catalog", "test_db3"),
285+
("test`cat", "t_db1"),
286+
("test`cat", "t_db2"),
287+
("test`cat", "t_db_"))
288288
}
289289
}
290290
}
@@ -300,20 +300,19 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
300300
catalogDatabases === Seq(
301301
("spark_catalog", "db1"),
302302
("spark_catalog", "db2"),
303-
("spark_catalog", "db_"),
304-
("spark_catalog", "default"))
303+
("spark_catalog", "default"),
304+
("spark_catalog", "test_db3"))
305305
}
306306
}
307307
}
308308

309-
// list schemas with SQL pattern
309+
// list schemas with schema pattern
310310
verifyGetSchemas { () => metadata.getSchemas(null, "db%") } { catalogDatabases =>
311311
// results are ordered by TABLE_CATALOG, TABLE_SCHEM
312312
assert {
313313
catalogDatabases === Seq(
314314
("spark_catalog", "db1"),
315-
("spark_catalog", "db2"),
316-
("spark_catalog", "db_"))
315+
("spark_catalog", "db2"))
317316
}
318317
}
319318

@@ -322,18 +321,25 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark
322321
assert {
323322
catalogDatabases === Seq(
324323
("spark_catalog", "db1"),
325-
("spark_catalog", "db2"),
326-
("spark_catalog", "db_"))
324+
("spark_catalog", "db2"))
327325
}
328326
}
329327

330-
verifyGetSchemas { () => metadata.getSchemas(null, "db\\_") } { catalogDatabases =>
331-
// results are ordered by TABLE_CATALOG, TABLE_SCHEM
332-
assert {
333-
catalogDatabases === Seq(
334-
("spark_catalog", "db_"))
335-
}
328+
// escape backtick in catalog, and _ in schema pattern
329+
verifyGetSchemas {
330+
() => metadata.getSchemas("test`cat", "t\\_db\\_")
331+
} { catalogDatabases =>
332+
assert(catalogDatabases === Seq(("test`cat", "t_db_")))
336333
}
334+
335+
// skip testing escape ', % in schema pattern, because Spark SQL does not
336+
// allow using those chars in schema table name.
337+
//
338+
// CREATE DATABASE IF NOT EXISTS `t_db1'`;
339+
//
340+
// the above SQL fails with error condition:
341+
// [INVALID_SCHEMA_OR_RELATION_NAME] `t_db1'` is not a valid name for tables/schemas.
342+
// Valid names only contain alphabet characters, numbers and _. SQLSTATE: 42602
337343
}
338344
}
339345
}

0 commit comments

Comments
 (0)