diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/IDNAException.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/IDNAException.scala index 6bff3fb4..db5059d3 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/IDNAException.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/IDNAException.scala @@ -41,15 +41,4 @@ trait IDNAException extends RuntimeException object IDNAException { implicit val showForIDNAException: Show[IDNAException] = Show.fromToString - - /** - * Exception used both in Bootstring and UTS-46 and which I doubt will ever be thrown. - */ - final private[idna4s] class UnableToResizeBufferException extends IDNAException { - override val getMessage: String = - s"Can not resize buffer as it would exceed largest valid size ${Int.MaxValue}. What are you doing?" - - final override def toString: String = - s"UnableToResizeBufferException(${getMessage})" - } } diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala index c3950f05..1d4ee24d 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala @@ -33,6 +33,64 @@ import scala.util.control.NonFatal object Bootstring { + private val HalfIntMaxValue: Int = Int.MaxValue / 2 + + /** + * Calculate the a new size for an `IntBuffer` so that it can accept at ''least'' the given + * new capacity. + * + * If the buffer is already at or exceeding the required size, then the buffer's current size + * is returned. Otherwise attempt to double the buffer's size as long as that won't overflow. + * If we can not double it, add `neededSize - remaining` to the current capacity. In the + * unbelievable case where `buffer.remaining + neededSize > Int.MaxValue`, then yield an + * error. + */ + @inline + private def calculateNewSize(buffer: IntBuffer, neededSize: Int): Int = + if (buffer.remaining >= neededSize) { + // This will be the branch most often hit by a wide margin. + buffer.capacity + } else if (buffer.capacity <= HalfIntMaxValue && buffer.capacity + buffer.remaining >= neededSize) { + // Double it + buffer.capacity * 2 + } else if (neededSize.toLong - buffer.remaining.toLong <= Int.MaxValue.toLong) { + // I do not expect this branch will ever be executed under normal + // circumstances. + neededSize - buffer.remaining + } else { + // I do not expect this branch will ever be executed under normal + // circumstances. + throw BootstringException.UnableToResizeBufferException + } + + /** + * Copy the contents of a given `IntBuffer` into a new `IntBuffer` with double capacity if the + * given `IntBuffer` is at capacity, unless doubling it would overflow, in that case attempt + * to just add the minimum needed allocation, if that is not possible then throw an error. + * + * The error case should only happen if there is a bug or someone is intentionally abusing the + * system. We need to handle it as it could be used to influence the result to potentially + * change a URI. + */ + @inline + private def maybeResize(buffer: IntBuffer, neededSize: Int): IntBuffer = + if (buffer.remaining >= neededSize) { + // This will be the branch most often hit by a wide margin. + buffer + } else { + val pos: Int = buffer.position + val newSize: Int = calculateNewSize(buffer, neededSize) + + // Shadow here is because `(buffer: IntBuffer).position(pos): Buffer` + // but we want `IntBuffer`, e.g. it is getting widened to the super + // type. + IntBuffer.allocate(newSize).put(buffer.array) match { + case buffer => + buffer.position(pos) + buffer + } + } + /** * Bootstring encode given `String`. * diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/package.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/package.scala index 7c4b207d..2ea368db 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/package.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/package.scala @@ -53,16 +53,12 @@ package object core { acc :+ value } - private[idna4s] def codePointsAsBuffer(value: String): IntBuffer = { - val result: IntBuffer = foldLeftCodePoints(value)(IntBuffer.allocate(value.length)) { + private[idna4s] def codePointsAsBuffer(value: String): IntBuffer = + foldLeftCodePoints(value)(IntBuffer.allocate(value.length)) { case (acc, value) => acc.put(value) } - result.flip - result - } - private[idna4s] def stringFromCodePoints(value: IntBuffer): Either[String, String] = { val out: CharBuffer = CharBuffer.allocate(value.remaining * 2) @@ -107,62 +103,4 @@ package object core { buffer } - - final private[this] val HalfIntMaxValue = Int.MaxValue / 2 - - /** - * Calculate the a new size for an `IntBuffer` so that it can accept at ''least'' the given - * new capacity. - * - * If the buffer is already at or exceeding the required size, then the buffer's current size - * is returned. Otherwise attempt to double the buffer's size as long as that won't overflow. - * If we can not double it, add `neededSize - remaining` to the current capacity. In the - * unbelievable case where `buffer.remaining + neededSize > Int.MaxValue`, then yield an - * error. - */ - @inline - private def calculateNewSize(buffer: IntBuffer, neededSize: Int): Int = - if (buffer.remaining >= neededSize) { - // This will be the branch most often hit by a wide margin. - buffer.capacity - } else if (buffer.capacity <= HalfIntMaxValue && buffer.capacity + buffer.remaining >= neededSize) { - // Double it - buffer.capacity * 2 - } else if (neededSize.toLong - buffer.remaining.toLong <= Int.MaxValue.toLong) { - // I do not expect this branch will ever be executed under normal - // circumstances. - neededSize - buffer.remaining - } else { - // I do not expect this branch will ever be executed under normal - // circumstances. - throw new IDNAException.UnableToResizeBufferException - } - - /** - * Copy the contents of a given `IntBuffer` into a new `IntBuffer` with double capacity if the - * given `IntBuffer` is at capacity, unless doubling it would overflow, in that case attempt - * to just add the minimum needed allocation, if that is not possible then throw an error. - * - * The error case should only happen if there is a bug or someone is intentionally abusing the - * system. We need to handle it as it could be used to influence the result to potentially - * change a URI. - */ - @inline - private[idna4s] def maybeResize(buffer: IntBuffer, neededSize: Int): IntBuffer = - if (buffer.remaining >= neededSize) { - // This will be the branch most often hit by a wide margin. - buffer - } else { - val pos: Int = buffer.position - val newSize: Int = calculateNewSize(buffer, neededSize) - - // Shadow here is because `(buffer: IntBuffer).position(pos): Buffer` - // but we want `IntBuffer`, e.g. it is getting widened to the super - // type. - IntBuffer.allocate(newSize).put(buffer.array) match { - case buffer => - buffer.position(pos) - buffer - } - } } diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/CodePointMapper.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/CodePointMapper.scala index 9b329d24..1b94b002 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/CodePointMapper.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/CodePointMapper.scala @@ -25,11 +25,11 @@ import cats._ import cats.collections.BitSet import cats.data._ import cats.syntax.all._ +import java.lang.StringBuilder import org.typelevel.idna4s.core._ import scala.annotation.tailrec import scala.collection.immutable.IntMap import scala.util.control.NoStackTrace -import java.nio.IntBuffer object CodePointMapper extends GeneratedCodePointMapper { @@ -98,51 +98,43 @@ object CodePointMapper extends GeneratedCodePointMapper { input: String): Either[MappingException, String] = { val len: Int = input.length - @inline - def put(acc: IntBuffer, value: Int): IntBuffer = - maybeResize(acc, 1).put(value) - @tailrec def loop( - acc: IntBuffer, + acc: StringBuilder, errors: Chain[CodePointMappingException], index: Int): Either[MappingException, String] = if (index >= len || index < 0 /* Overflow check */ ) { NonEmptyChain .fromChain(errors) .fold( - Right(new String(acc.array, 0, acc.position)): Either[MappingException, String] - )(errors => { - Left(MappingException(errors, new String(acc.array(), 0, acc.position))) - }) + Right(acc.toString): Either[MappingException, String] + )(errors => Left(MappingException(errors, acc.toString))) } else { val value: Int = input.codePointAt(index) val nextIndex: Int = index + (if (value >= 0x10000) 2 else 1) - val outIndex: Int = acc.position // ASCII fast path if (value <= 0x7f) { asciiCodePointMap(value) match { case ASCII_VALID => - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) case ASCII_DISALLOWED_STD3_VALID => // DISALLOWED_STD3_VALID if (useStd3ASCIIRules) { loop( - put(acc, value), + acc.appendCodePoint(ReplacementCharacter), CodePointMappingException( index, - outIndex, "Disallowed code point in input.", CodePoint.unsafeFromInt(value)) +: errors, nextIndex ) } else { - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) } case otherwise => - loop(put(acc, otherwise), errors, nextIndex) + loop(acc.appendCodePoint(otherwise), errors, nextIndex) } } else { @@ -153,26 +145,25 @@ object CodePointMapper extends GeneratedCodePointMapper { if (validAlways(value) || validNV8(value) || validXV8(value)) { // VALID - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) } else if (mapped.contains(value)) { // MAPPED - loop(put(acc, mapped(value)), errors, nextIndex) + loop(acc.appendCodePoint(mapped(value)), errors, nextIndex) } else if (mappedMultiCodePoints.contains(value)) { // MAPPED MULTI loop( mappedMultiCodePoints(value).foldLeft(acc) { case (acc, value) => - put(acc, value) + acc.appendCodePoint(value) }, errors, nextIndex) } else if (disallowed(value)) { // DISALLOWED loop( - put(acc, value), + acc.appendCodePoint(ReplacementCharacter), CodePointMappingException( index, - outIndex, "Disallowed code point in input.", CodePoint.unsafeFromInt(value)) +: errors, nextIndex @@ -183,9 +174,9 @@ object CodePointMapper extends GeneratedCodePointMapper { } else if (deviationMapped.contains(value)) { // DEVIATION if (transitionalProcessing) { - loop(put(acc, deviationMapped(value)), errors, nextIndex) + loop(acc.appendCodePoint(deviationMapped(value)), errors, nextIndex) } else { - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) } } else if (deviationMultiMapped.contains(value)) { // DEVIATION_MULTI @@ -193,51 +184,48 @@ object CodePointMapper extends GeneratedCodePointMapper { loop( deviationMultiMapped(value).foldLeft(acc) { case (acc, value) => - put(acc, value) + acc.appendCodePoint(value) }, errors, nextIndex) } else { - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) } } else if (disallowedSTD3Valid(value)) { // DISALLOWED_STD3_VALID if (useStd3ASCIIRules) { loop( - put(acc, value), + acc.appendCodePoint(ReplacementCharacter), CodePointMappingException( index, - outIndex, "Disallowed code point in input.", CodePoint.unsafeFromInt(value)) +: errors, nextIndex ) } else { - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) } } else if (disallowedSTD3Mapped.contains(value)) { // DISALLOWED_STD3_MAPPED if (useStd3ASCIIRules) { loop( - put(acc, value), + acc.appendCodePoint(ReplacementCharacter), CodePointMappingException( index, - outIndex, "Disallowed code point in input.", CodePoint.unsafeFromInt(value)) +: errors, nextIndex ) } else { - loop(put(acc, disallowedSTD3Mapped(value)), errors, nextIndex) + loop(acc.appendCodePoint(disallowedSTD3Mapped(value)), errors, nextIndex) } } else if (disallowedSTD3MultiMapped.contains(value)) { // DISALLOWED_STD3_MAPPED_MULTI if (useStd3ASCIIRules) { loop( - put(acc, value), + acc.appendCodePoint(ReplacementCharacter), CodePointMappingException( index, - outIndex, "Disallowed code point in input.", CodePoint.unsafeFromInt(value)) +: errors, nextIndex @@ -246,7 +234,7 @@ object CodePointMapper extends GeneratedCodePointMapper { loop( disallowedSTD3MultiMapped(value).foldLeft(acc) { case (acc, value) => - put(acc, value) + acc.appendCodePoint(value) }, errors, nextIndex) @@ -256,7 +244,7 @@ object CodePointMapper extends GeneratedCodePointMapper { if (transitionalProcessing) { loop(acc, errors, nextIndex) } else { - loop(put(acc, value), errors, nextIndex) + loop(acc.appendCodePoint(value), errors, nextIndex) } } else { // Should be impossible @@ -267,7 +255,7 @@ object CodePointMapper extends GeneratedCodePointMapper { } } - loop(IntBuffer.allocate(len + len / 2), Chain.empty, 0) + loop(new StringBuilder(len), Chain.empty, 0) } /** @@ -363,20 +351,9 @@ object CodePointMapper extends GeneratedCodePointMapper { sealed abstract class CodePointMappingException extends IDNAException with NoStackTrace { /** - * The index of the Unicode code point in the input where the failure occurred in the input - * string. - */ - def inputFailureIndex: Int - - /** - * The index of Unicode code point in the partially mapped output string where the failure - * occurred. - * - * This can deviate from the [[#inputFailureIndex]] because mapping of code points earlier - * in the input might have resulted in what was 1 code point in the input becoming more than - * 1 code point in the output. + * The index of the start of the Unicode code point in the input where the failure occurred. */ - def outputFailureIndex: Int + def failureIndex: Int /** * A description of why the failure occurred. @@ -394,23 +371,21 @@ object CodePointMapper extends GeneratedCodePointMapper { toString final override def toString: String = - s"CodePointMappingException(message = $message, inputFailureIndex = $inputFailureIndex, outputFailureIndex = $outputFailureIndex, codePoint = $codePoint)" + s"CodePointMappingException(message = $message, failureIndex = $failureIndex, codePoint = $codePoint)" } object CodePointMappingException { final private[this] case class CodePointMappingExceptionImpl( - override val inputFailureIndex: Int, - override val outputFailureIndex: Int, + override val failureIndex: Int, override val message: String, override val codePoint: CodePoint) extends CodePointMappingException private[idna4s] def apply( - inputFailureIndex: Int, - outputFailureIndex: Int, + failureIndex: Int, message: String, codePoint: CodePoint): CodePointMappingException = - CodePointMappingExceptionImpl(inputFailureIndex, outputFailureIndex, message, codePoint) + CodePointMappingExceptionImpl(failureIndex, message, codePoint) implicit val hashAndOrderForCodePointMappingException : Hash[CodePointMappingException] with Order[CodePointMappingException] = @@ -418,8 +393,15 @@ object CodePointMapper extends GeneratedCodePointMapper { override def hash(x: CodePointMappingException): Int = x.hashCode override def compare(x: CodePointMappingException, y: CodePointMappingException): Int = - (x.inputFailureIndex, x.outputFailureIndex, x.message, x.codePoint) - .compare((y.inputFailureIndex, y.outputFailureIndex, y.message, y.codePoint)) + x.failureIndex.compare(y.failureIndex) match { + case 0 => + x.message.compare(y.message) match { + case 0 => + x.codePoint.compare(y.codePoint) + case otherwise => otherwise + } + case otherwise => otherwise + } } implicit val showForCodePointMappingException: Show[CodePointMappingException] = @@ -440,63 +422,30 @@ object CodePointMapper extends GeneratedCodePointMapper { */ def errors: NonEmptyChain[CodePointMappingException] - /** - * The input string, mapped as much as was possible. Code points which were disallowed in - * the input are left in place ''unchanged'', this makes this value unsafe to render in - * error messages or back to the user. [[#renderablePartiallyMappedInput]] should be used to - * render error messages to the user. - * - * This value is present because UTS-46 mandates that the algorithm continue to validity - * checks, even in the event of failure, and the validity checks must operate on this - * variant of the partially mapped input. - */ - def unsafePartiallyMappedInput: String - /** * The input string, mapped as much as was possible. Code points which failed replaced with * the Unicode replacement character � (0xFFFD). Returning this value on failure is mandated * by UTS-46. */ - def renderablePartiallyMappedInput: String + def partiallyMappedInput: String final override def getMessage: String = toString final override def toString: String = - s"MappingException(errors = ${errors}, renderablePartiallyMappedInput = ${renderablePartiallyMappedInput}, unsafePartiallyMappedInputHash = ${unsafePartiallyMappedInput.hash})" + s"MappingException(errors = ${errors}, partiallyMappedInput = ${partiallyMappedInput})" } object MappingException { final private[this] case class MappingExceptionImpl( override val errors: NonEmptyChain[CodePointMappingException], - override val unsafePartiallyMappedInput: String - ) extends MappingException { - - // We derive this lazily to avoid having to always have 2x memory - // allocated. It's only needed when rendering errors. - final override lazy val renderablePartiallyMappedInput: String = { - val outBuffer: IntBuffer = - codePointsAsBuffer(unsafePartiallyMappedInput) - val len: Int = outBuffer.limit - errors.traverse_(error => - outBuffer.put(error.outputFailureIndex, ReplacementCharacter): Id[Unit]) - - new String(outBuffer.array(), 0, len) - } - - override def equals(that: Any): Boolean = - that match { - case that: MappingException => - (this: MappingException) === that - case _ => - false - } - } + override val partiallyMappedInput: String) + extends MappingException private[idna4s] def apply( errors: NonEmptyChain[CodePointMappingException], - unsafePartiallyMappedInput: String): MappingException = - MappingExceptionImpl(errors, unsafePartiallyMappedInput) + partiallyMappedInput: String): MappingException = + MappingExceptionImpl(errors, partiallyMappedInput) implicit val hashAndOrderForMappingException : Hash[MappingException] with Order[MappingException] = @@ -507,7 +456,7 @@ object CodePointMapper extends GeneratedCodePointMapper { override def compare(x: MappingException, y: MappingException): Int = x.errors.compare(y.errors) match { case 0 => - x.unsafePartiallyMappedInput.compare(y.unsafePartiallyMappedInput) + x.partiallyMappedInput.compare(y.partiallyMappedInput) case otherwise => otherwise } } @@ -516,7 +465,7 @@ object CodePointMapper extends GeneratedCodePointMapper { /** * A constant for the Unicode replacement character �. */ - final private[this] val ReplacementCharacter = + final private val ReplacementCharacter = 0xfffd } diff --git a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala index ae0e0cdd..1ec9dd11 100644 --- a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala +++ b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala @@ -96,8 +96,8 @@ private[scalacheck] trait ScalaCheckInstances extends Serializable { ) implicit final def cogenCodePointMappingException: Cogen[CodePointMappingException] = - Cogen[(Int, Int, String, CodePoint)].contramap(value => - (value.inputFailureIndex, value.outputFailureIndex, value.message, value.codePoint)) + Cogen[(Int, String, CodePoint)].contramap(value => + (value.failureIndex, value.message, value.codePoint)) /** * A generator which will generate an input String to `CodePointMapper#mapCodePoints` which @@ -124,7 +124,7 @@ private[scalacheck] trait ScalaCheckInstances extends Serializable { implicit final def cogenMappingException: Cogen[MappingException] = Cogen[(List[CodePointMappingException], String)].contramap(value => - (value.errors.toList, value.unsafePartiallyMappedInput)) + (value.errors.toList, value.partiallyMappedInput)) implicit final def arbIDNA2008Status: Arbitrary[IDNA2008Status] = Arbitrary(Gen.oneOf(IDNA2008Status.NV8, IDNA2008Status.XV8)) diff --git a/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperPlatformTests.scala b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperPlatformTests.scala index 3b1823bb..88c13496 100644 --- a/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperPlatformTests.scala +++ b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperPlatformTests.scala @@ -48,7 +48,7 @@ trait CodePointMapperPlatformTests extends DisciplineSuite { mapCodePoints(s) val icu4j: String = icu4jUTS46Normalizer2.normalize(s, new StringBuilder(s.size)).toString - idna4s.fold(_.renderablePartiallyMappedInput, identity) ?= icu4j + idna4s.fold(_.partiallyMappedInput, identity) ?= icu4j } } @@ -61,7 +61,7 @@ trait CodePointMapperPlatformTests extends DisciplineSuite { val icu4j: String = icu4jUTS46Normalizer2.normalize(ascii, new StringBuilder(ascii.size)).toString - idna4s.fold(_.renderablePartiallyMappedInput, identity) ?= icu4j + idna4s.fold(_.partiallyMappedInput, identity) ?= icu4j } } @@ -72,7 +72,7 @@ trait CodePointMapperPlatformTests extends DisciplineSuite { val icu4j: String = icu4jUTS46Normalizer2.normalize(s, new StringBuilder(s.size)).toString - assertEquals(idna4s.fold(_.renderablePartiallyMappedInput, identity), icu4j) + assertEquals(idna4s.fold(_.partiallyMappedInput, identity), icu4j) } test("a̸ࣶa should be consistent with icu4j") { @@ -82,7 +82,7 @@ trait CodePointMapperPlatformTests extends DisciplineSuite { val icu4j: String = icu4jUTS46Normalizer2.normalize(s, new StringBuilder(s.size)).toString - assertEquals(idna4s.fold(_.renderablePartiallyMappedInput, identity), icu4j) + assertEquals(idna4s.fold(_.partiallyMappedInput, identity), icu4j) } test("涇焑ꈛ਽৷降ٰࣶᕹ should be consistent with icu4j") { @@ -92,7 +92,7 @@ trait CodePointMapperPlatformTests extends DisciplineSuite { val icu4j: String = icu4jUTS46Normalizer2.normalize(s, new StringBuilder(s.size)).toString - assertEquals(idna4s.fold(_.renderablePartiallyMappedInput, identity), icu4j) + assertEquals(idna4s.fold(_.partiallyMappedInput, identity), icu4j) } } diff --git a/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperTests.scala b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperTests.scala index e1f22e82..fe1260f6 100644 --- a/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperTests.scala +++ b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/CodePointMapperTests.scala @@ -44,17 +44,17 @@ final class CodePointMapperTests extends DisciplineSuite with CodePointMapperPla test("Known invalid input strings should fail") { import CodePointMapper._ val input: String = "$invalid" + val unicodeReplacementCharacter: String = "\ufffd" assertEquals( CodePointMapper.mapCodePoints(input), Left( MappingException( NonEmptyChain.of( CodePointMappingException( - 0, 0, "Disallowed code point in input.", CodePoint.unsafeFromInt(input.codePointAt(0)))), - input + s"${unicodeReplacementCharacter}invalid" )) ) }