From fd10c0b1faa08d35422f31c05237e25cfdd12c2c Mon Sep 17 00:00:00 2001 From: Christopher Vittal Date: Fri, 2 Aug 2024 17:00:34 -0400 Subject: [PATCH] [query] Remove jackson string length restriction in Spark/Local backends Resolves #14650 --- .../scala/is/hail/backend/local/LocalBackend.scala | 12 ++++++++++++ .../is/hail/backend/service/ServiceBackend.scala | 6 ++++++ .../scala/is/hail/backend/spark/SparkBackend.scala | 12 ++++++++++++ 3 files changed, 30 insertions(+) diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala index 9f8c83195ec..31b8d0e26d2 100644 --- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala +++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala @@ -24,6 +24,7 @@ import scala.reflect.ClassTag import java.io.PrintWriter +import com.fasterxml.jackson.core.StreamReadConstraints import com.google.common.util.concurrent.MoreExecutors import org.apache.hadoop import org.json4s._ @@ -46,6 +47,17 @@ object LocalBackend { skipLoggingConfiguration: Boolean = false, ): LocalBackend = synchronized { require(theLocalBackend == null) + // From https://github.com/hail-is/hail/issues/14580 : + // IR can get quite big, especially as it can contain an arbitrary + // amount of encoded literals from the user's python session. This + // was a (controversial) restriction imposed by Jackson and should be lifted. + // + // We remove this restriction _here_ (as opposed to anywhere else) because + // this is the first call into the JVM we control as part of initializing + // hail for the local backend + StreamReadConstraints.overrideDefaultStreamReadConstraints( + StreamReadConstraints.builder().maxStringLength(Integer.MAX_VALUE).build() + ) if (!skipLoggingConfiguration) HailContext.configureLogging(logFile, quiet, append) diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index bae554c156d..11156aecfee 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -465,6 +465,12 @@ object ServiceBackendAPI { implicit val formats: Formats = DefaultFormats + // From https://github.com/hail-is/hail/issues/14580 : + // IR can get quite big, especially as it can contain an arbitrary + // amount of encoded literals from the user's python session. This + // was a (controversial) restriction imposed by Jackson and should be lifted. + // + // We remove this restriction. StreamReadConstraints.overrideDefaultStreamReadConstraints( StreamReadConstraints.builder().maxStringLength(Integer.MAX_VALUE).build() ); diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index ce02756bdc9..f38eb88e6fc 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -31,6 +31,7 @@ import scala.util.control.NonFatal import java.io.{Closeable, PrintWriter} +import com.fasterxml.jackson.core.StreamReadConstraints import org.apache.hadoop import org.apache.hadoop.conf.Configuration import org.apache.spark._ @@ -257,6 +258,17 @@ object SparkBackend { gcsRequesterPaysBuckets: String = null, ): SparkBackend = synchronized { require(theSparkBackend == null) + // From https://github.com/hail-is/hail/issues/14580 : + // IR can get quite big, especially as it can contain an arbitrary + // amount of encoded literals from the user's python session. This + // was a (controversial) restriction imposed by Jackson and should be lifted. + // + // We remove this restriction _here_ (as opposed to anywhere else) because + // this is the first call into the JVM we control as part of initializing + // hail for the spark backend + StreamReadConstraints.overrideDefaultStreamReadConstraints( + StreamReadConstraints.builder().maxStringLength(Integer.MAX_VALUE).build() + ) if (!skipLoggingConfiguration) HailContext.configureLogging(logFile, quiet, append)