Skip to content

Commit 65a8fd5

Browse files
cloud-fanyhuang-db
authored andcommitted
[SPARK-52181][SQL][FOLLOWUP] Avoid OOM in variant tests
### What changes were proposed in this pull request? This is a follow-up of apache#50913 . It sets the variant size limit back to 16 MB in test environment, to reduce OOM in tests. ### Why are the changes needed? make tests more reliable. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? no Closes apache#50950 from cloud-fan/test. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: yangjie01 <[email protected]>
1 parent cd135c0 commit 65a8fd5

File tree

4 files changed

+9
-7
lines changed

4 files changed

+9
-7
lines changed

common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ public class VariantUtil {
138138
public static final int U32_SIZE = 4;
139139

140140
// Both variant value and variant metadata need to be no longer than 128MiB.
141-
public static final int SIZE_LIMIT = 128 * 1024 * 1024;
141+
// Note: to make tests more reliable, we set the max size to 16Mib to avoid OOM in tests.
142+
public static final int SIZE_LIMIT =
143+
System.getenv("SPARK_TESTING") != null ? U24_MAX + 1 : 128 * 1024 * 1024;
142144

143145
public static final int MAX_DECIMAL4_PRECISION = 9;
144146
public static final int MAX_DECIMAL8_PRECISION = 18;

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,10 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
133133
checkException(json, "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
134134
Map("badRecord" -> json, "failFastMode" -> "FAILFAST"))
135135
}
136-
for (json <- Seq((0 to 32 * 1024 * 1024).mkString("[", ",", "]"))) {
136+
for (json <- Seq("\"" + "a" * (16 * 1024 * 1024) + "\"",
137+
(0 to 4 * 1024 * 1024).mkString("[", ",", "]"))) {
137138
checkException(json, "VARIANT_SIZE_LIMIT",
138-
Map("sizeLimit" -> "128.0 MiB", "functionName" -> "`parse_json`"))
139+
Map("sizeLimit" -> "16.0 MiB", "functionName" -> "`parse_json`"))
139140
}
140141
}
141142

sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -824,8 +824,8 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
824824
parameters = Map("schema" -> "\"STRUCT<a: VARIANT, b: VARIANT>\""))
825825

826826
// In singleVariantColumn mode, from_csv normally treats all inputs as valid. The only exception
827-
// case is the input exceeds the variant size limit (128MiB).
828-
val largeInput = "a" * (128 * 1024 * 1024)
827+
// case is the input exceeds the variant size limit (16MiB).
828+
val largeInput = "a" * (16 * 1024 * 1024)
829829
checkAnswer(
830830
Seq(largeInput).toDF("value").select(
831831
from_csv(

sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,7 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
135135
check("{1:2}", null)
136136
check("{\"a\":1", null)
137137
check("{\"a\":[a,b,c]}", null)
138-
check("\"" + "a" * (16 * 1024 * 1024) + "\"")
139-
check("\"" + "a" * (128 * 1024 * 1024) + "\"", null)
138+
check("\"" + "a" * (16 * 1024 * 1024) + "\"", null)
140139
}
141140

142141
test("to_json with nested variant") {

0 commit comments

Comments
 (0)