Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JsonStringInput: accept decimal format for integer types #272

Merged
merged 7 commits into from
Jun 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,84 @@ class JsonStringInput(
if (reader.jsonType != jsonType) expectedError(jsonType)
else reader.currentValue.asInstanceOf[T]

private def matchNumericString[T](toNumber: String => T): T =
if (reader.jsonType == JsonType.number || reader.jsonType == JsonType.string) {
val str = reader.currentValue.asInstanceOf[String]
try toNumber(str) catch {
case e: NumberFormatException =>
throw new ParseException(s"Invalid number format: $str ${reader.posInfo(startIdx)}", e)
}
} else expectedError(JsonType.number)

def readNull(): Boolean = reader.jsonType == JsonType.`null`
def readString(): String = checkedValue[String](JsonType.string)
def readBoolean(): Boolean = checkedValue[Boolean](JsonType.boolean)
def readInt(): Int = matchNumericString(_.toInt)
def readLong(): Long = matchNumericString(_.toLong)
override def readFloat(): Float = matchNumericString(_.toFloat)
def readDouble(): Double = matchNumericString(_.toDouble)
def readBigInt(): BigInt = matchNumericString(BigInt(_))
def readBigDecimal(): BigDecimal = matchNumericString(BigDecimal(_, options.mathContext))

private def isInteger(str: String): Boolean = {
@tailrec def loop(i: Int): Boolean =
i >= str.length || str.charAt(i).isDigit && loop(i + 1)
str.nonEmpty && {
if (str.charAt(0) == '-') str.length > 1 && loop(1)
else loop(0)
}
}

private def parseNumber[T](parse: String => T): T = {
val str = reader.jsonType match {
// accepting JSON strings as numbers (also because they may be Infinity/-Infinity/NaN)
case JsonType.number | JsonType.string => reader.currentValue.asInstanceOf[String]
case _ => expectedError(JsonType.number)
ghik marked this conversation as resolved.
Show resolved Hide resolved
}
try parse(str) catch {
case e: NumberFormatException =>
throw new ParseException(s"Invalid number format: $str ${reader.posInfo(startIdx)}", e)
}
}

override def readByte(): Byte = parseNumber { str =>
if (isInteger(str)) str.toByte
else {
val dbl = str.toDouble
if (dbl.isValidByte) dbl.toByte
else throw new NumberFormatException(str)
}
}

override def readShort(): Short = parseNumber { str =>
if (isInteger(str)) str.toShort
else {
ghik marked this conversation as resolved.
Show resolved Hide resolved
val dbl = str.toDouble
if (dbl.isValidShort) dbl.toShort
else throw new NumberFormatException(str)
}
}

def readInt(): Int = parseNumber { str =>
if (isInteger(str)) str.toInt
else {
val dbl = str.toDouble
if (dbl.isValidInt) dbl.toInt
else throw new NumberFormatException(str)
}
}

def readLong(): Long = parseNumber { str =>
if (isInteger(str)) str.toLong
else {
val bd = BigDecimal(str, options.mathContext)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using BigDecimal for parsing of Long values opens ability for DoS/DoW attacks which exploit O(n^2) complexity of Java's way to parse big numbers from textual representations.

I've raised an issue for BigInt and BigDecimal types but now it goes to more widely used types.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@plokhotnyuk do you have a safe BigInt/BigDecimal parsing implementation that we could lend?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the code which has O(n^1.5) complexity and checks limits for the scale and number of digits. I've used the following default values for them.

if (bd.isValidLong) bd.toLong
else throw new NumberFormatException(str)
}
}

override def readFloat(): Float =
parseNumber(_.toFloat)

def readDouble(): Double =
parseNumber(_.toDouble)

def readBigInt(): BigInt = parseNumber { str =>
if (isInteger(str)) BigInt(str)
else {
val bd = BigDecimal(str, options.mathContext)
if (bd.isWhole) bd.toBigInt
else throw new NumberFormatException(str)
ghik marked this conversation as resolved.
Show resolved Hide resolved
}
}

def readBigDecimal(): BigDecimal =
parseNumber(BigDecimal(_, options.mathContext))

override def readTimestamp(): Long = options.dateFormat match {
case JsonDateFormat.EpochMillis => readLong()
Expand Down Expand Up @@ -95,6 +155,8 @@ class JsonStringInput(
reader.json.substring(startIdx, endIdx)
}

def jsonType: JsonType = reader.jsonType
ddworak marked this conversation as resolved.
Show resolved Hide resolved

override def readMetadata[T](metadata: InputMetadata[T]): Opt[T] = metadata match {
case JsonType => Opt(reader.jsonType)
case _ => Opt.Empty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ class JsonStringInputOutputTest extends AnyFunSuite with SerializationTestUtils

roundtrip("dates")(new JDate(0), new JDate(2452323423L))

test("reading decimal numbers as integers") {
assert(read[Byte]("1e2") == 100)
assert(read[Short]("3e4") == 30000)
assert(read[Int]("3e5") == 300000)
assert(read[Long]("3e5") == 300000L)
assert(read[BigInt]("3e5") == BigInt(300000))
}

test("attempting to read number from not a number or string") {
intercept[ReadFailure](read[Byte]("true"))
intercept[ReadFailure](read[Short]("true"))
intercept[ReadFailure](read[Int]("true"))
intercept[ReadFailure](read[Long]("true"))
intercept[ReadFailure](read[BigInt]("true"))
intercept[ReadFailure](read[Float]("true"))
intercept[ReadFailure](read[Double]("true"))
intercept[ReadFailure](read[BigDecimal]("true"))
}

test("byte array binary format") {
val options = JsonOptions(binaryFormat = JsonBinaryFormat.ByteArray)
assert(write[Array[Byte]](Array(-1, 0, 1), options) == "[-1,0,1]")
Expand Down Expand Up @@ -207,27 +226,82 @@ class JsonStringInputOutputTest extends AnyFunSuite with SerializationTestUtils
}
}

test("handle plain numbers in JSON as Int, Long and Double") {
test("handle plain numbers in JSON as Byte, Short, Int, Long, Double, BigInt and BigDecimal") {
val json = "123"
read[Byte](json) shouldBe 123
read[Short](json) shouldBe 123
read[Int](json) shouldBe 123
read[Long](json) shouldBe 123
read[Double](json) shouldBe 123
read[BigInt](json) shouldBe BigInt(123)
read[BigDecimal](json) shouldBe BigDecimal(123)

val maxBytePlusOne: Int = Byte.MaxValue + 1
val jsonShort = maxBytePlusOne.toString
intercept[ReadFailure](read[Byte](jsonShort))
read[Short](jsonShort) shouldBe maxBytePlusOne
read[Int](jsonShort) shouldBe maxBytePlusOne
read[Long](jsonShort) shouldBe maxBytePlusOne
read[Double](jsonShort) shouldBe maxBytePlusOne
read[BigInt](jsonShort) shouldBe BigInt(maxBytePlusOne)
read[BigDecimal](jsonShort) shouldBe BigDecimal(maxBytePlusOne)

val maxShortPlusOne: Int = Short.MaxValue + 1
val jsonInt = maxShortPlusOne.toString
intercept[ReadFailure](read[Byte](jsonInt))
intercept[ReadFailure](read[Short](jsonInt))
read[Int](jsonInt) shouldBe maxShortPlusOne
read[Long](jsonInt) shouldBe maxShortPlusOne
read[Double](jsonInt) shouldBe maxShortPlusOne
read[BigInt](jsonInt) shouldBe BigInt(maxShortPlusOne)
read[BigDecimal](jsonInt) shouldBe BigDecimal(maxShortPlusOne)

val maxIntPlusOne: Long = Int.MaxValue.toLong + 1
val jsonLong = maxIntPlusOne.toString
intercept[ReadFailure](read[Byte](jsonLong))
intercept[ReadFailure](read[Short](jsonLong))
intercept[ReadFailure](read[Int](jsonLong))
read[Long](jsonLong) shouldBe maxIntPlusOne
read[Double](jsonLong) shouldBe maxIntPlusOne
read[BigInt](jsonLong) shouldBe BigInt(maxIntPlusOne)
read[BigDecimal](jsonLong) shouldBe BigDecimal(maxIntPlusOne)

val jsonLongMax = Long.MaxValue.toString
intercept[ReadFailure](read[Int](jsonLong))
intercept[ReadFailure](read[Byte](jsonLongMax))
intercept[ReadFailure](read[Short](jsonLongMax))
intercept[ReadFailure](read[Int](jsonLongMax))
read[Long](jsonLongMax) shouldBe Long.MaxValue
read[Double](jsonLongMax) shouldBe Long.MaxValue
read[BigInt](jsonLongMax) shouldBe BigInt(Long.MaxValue)
read[BigDecimal](jsonLongMax) shouldBe BigDecimal(Long.MaxValue)

val overMaxLong = BigInt(Long.MaxValue) + 1
val overMaxLongStr = overMaxLong.toString
intercept[ReadFailure](read[Byte](overMaxLongStr))
intercept[ReadFailure](read[Short](overMaxLongStr))
intercept[ReadFailure](read[Int](overMaxLongStr))
intercept[ReadFailure](read[Long](overMaxLongStr))
read[Double](overMaxLongStr) shouldBe overMaxLong.toDouble
read[BigInt](overMaxLongStr) shouldBe overMaxLong
read[BigDecimal](overMaxLongStr) shouldBe BigDecimal(overMaxLong)

val jsonDouble = Double.MaxValue.toString
intercept[ReadFailure](read[Byte](jsonDouble))
intercept[ReadFailure](read[Short](jsonDouble))
intercept[ReadFailure](read[Int](jsonDouble))
intercept[ReadFailure](read[Long](jsonDouble))
read[Double](jsonDouble) shouldBe Double.MaxValue
read[BigInt](jsonDouble) shouldBe BigDecimal(Double.MaxValue).toBigInt
read[BigDecimal](jsonDouble) shouldBe BigDecimal(Double.MaxValue)

val jsonSmallDouble = Double.MinPositiveValue.toString
intercept[ReadFailure](read[Byte](jsonSmallDouble))
intercept[ReadFailure](read[Short](jsonSmallDouble))
intercept[ReadFailure](read[Int](jsonSmallDouble))
intercept[ReadFailure](read[Long](jsonSmallDouble))
read[Double](jsonSmallDouble) shouldBe Double.MinPositiveValue
intercept[ReadFailure](read[BigInt](jsonSmallDouble))
read[BigDecimal](jsonSmallDouble) shouldBe BigDecimal(Double.MinPositiveValue)
}

test("work with skipping") {
Expand Down