From 2f091e7bdd3144318f1fdb316635ddcfbbf3806c Mon Sep 17 00:00:00 2001 From: David Strawn Date: Fri, 7 Oct 2022 11:22:19 -0600 Subject: [PATCH 1/6] UTS46 Implementation Still needs, * Cleanup * Docs * Benchmarks * _Way more tests_, including from the Unicode sample set **But**, the current tests compare arbitrary strings with icu4j's results and pass. --- .../org/typelevel/idna4s/core/CodePoint.scala | 14 + .../idna4s/core/bootstring/Bootstring.scala | 23 +- .../typelevel/idna4s/core/uts46/UTS46.scala | 701 +++++++++++++++++- .../idna4s/core/uts46/UTS46Config.scala | 54 ++ .../idna4s/core/uts46/UnicodeDataBase.scala | 14 - .../idna4s/build/UnicodeDataCodeGen.scala | 39 - .../scalacheck/ScalaCheckInstances.scala | 17 + .../tests/uts46/UTS46PlatformTests.scala | 5 + .../tests/uts46/UTS46PlatformTests.scala | 51 ++ .../idna4s/tests/uts46/UTS46Tests.scala | 5 + 10 files changed, 853 insertions(+), 70 deletions(-) create mode 100644 core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala create mode 100644 tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala create mode 100644 tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala create mode 100644 tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala index 7a5da916..e8e3b3b4 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala @@ -276,6 +276,20 @@ object CodePoint extends CodePointPlatform { e.getLocalizedMessage } + /** A method to attempt to get a string description of an int32 value which is + * assumed to be a code point. + * + * The primary goal of this method is to create strings for error + * messages. We would like to have a rich description of the code point if + * possible, but in the event the int32 isn't a code point we don't want to + * fail. + */ + def descriptionFromInt(value: Int): String = + CodePoint.fromInt(value).fold( + _ => s"Value is outside the domain of valid code points: ${value}", + _.toString + ) + implicit val hashAndOrderForCodePoint: Hash[CodePoint] with Order[CodePoint] = new Hash[CodePoint] with Order[CodePoint] { override def hash(x: CodePoint): Int = x.hashCode diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala index 1d4ee24d..00cd2891 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala @@ -402,6 +402,12 @@ object Bootstring { } } + def encodePunycodeRaw(value: String): Either[BootstringException, String] = + encodeRaw(BootstringParams.PunycodeParams)(value) + + def decodePunycodeRaw(value: String): Either[BootstringException, String] = + decodeRaw(BootstringParams.PunycodeParams)(value) + /** * An error which occurred during the application of the Bootstring algorithm. */ @@ -412,15 +418,6 @@ object Bootstring { object BootstringException { - private def codePointDescription(value: Int): String = - CodePoint - .fromInt(value) - .fold( - // The first case should be impossible. - _ => s"Value is outside the domain of valid code points: ${value}", - _.toString - ) - private[Bootstring] case object UnableToResizeBufferException extends BootstringException { override val getMessage: String = s"Can not resize buffer as it would exceed largest valid size ${Int.MaxValue}. What are you doing?" @@ -435,11 +432,11 @@ object Bootstring { extends BootstringException { final override def getMessage: String = - s"Input contains a non-basic code point < the initial N value. Code Point: ${codePointDescription( + s"Input contains a non-basic code point < the initial N value. Code Point: ${CodePoint.descriptionFromInt( invalidCodePoint)}, Initial N: ${initialN}." final override def toString: String = - s"InvalidNonBasicCodePointException(invalidCodePoint = ${codePointDescription( + s"InvalidNonBasicCodePointException(invalidCodePoint = ${CodePoint.descriptionFromInt( invalidCodePoint)}, initialN = ${initialN}, getMessage = ${getMessage})" } @@ -459,13 +456,13 @@ object Bootstring { final private[Bootstring] case class BasicCodePointInNonBasicSection(codePoint: Int) extends BootstringException { final override def getMessage: String = - s"Decoded a basic code point in the non-basic section of the input. All basic code points must occur in the basic section. Code point: ${codePointDescription(codePoint)}" + s"Decoded a basic code point in the non-basic section of the input. All basic code points must occur in the basic section. Code point: ${CodePoint.descriptionFromInt(codePoint)}" } final private[Bootstring] case class NonBasicCodePointInBasicSection(codePoint: Int) extends BootstringException { final override def getMessage: String = - s"Decoded a non-basic code point in the basic section of the input. All non-basic code points must occurr in the non-basic section. Code point: ${codePointDescription(codePoint)}" + s"Decoded a non-basic code point in the basic section of the input. All non-basic code points must occurr in the non-basic section. Code point: ${CodePoint.descriptionFromInt(codePoint)}" } final private[Bootstring] case class WrappedException( diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala index 244c9e49..39a0e6fb 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala @@ -21,7 +21,700 @@ package org.typelevel.idna4s.core.uts46 -object UTS46 - extends GeneratedUnicodeData - with GeneratedJoiningType - with GeneratedBidirectionalClass {} +import cats.data._ +import cats.syntax.all._ +import java.text.Normalizer +import org.typelevel.idna4s.core.CodePoint +import org.typelevel.idna4s.core.IDNAException +import org.typelevel.idna4s.core.bootstring._ +import scala.annotation.tailrec +import scala.util.control.NoStackTrace + +object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with GeneratedBidirectionalClass { + + def toASCIIRaw(config: UTS46Config)(value: String): Either[UTS46FailureException, String] = + toASCIIRaw(checkHyphens = config.checkHyphens, checkBidi = config.checkBidi, checkJoiners = config.checkJoiners, useStd3ASCIIRules = config.useStd3ASCIIRules, transitionalProcessing = config.transitionalProcessing, verifyDnsLength = config.verifyDnsLength)(value) + + def toUnicodeRaw(config: UTS46Config)(value: String): Either[UTS46FailureException, String] = + toUnicodeRaw(checkHyphens = config.checkHyphens, checkBidi = config.checkBidi, checkJoiners = config.checkJoiners, useStd3ASCIIRules = config.useStd3ASCIIRules, transitionalProcessing = config.transitionalProcessing)(value) + + def toASCIIRaw(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean, verifyDnsLength: Boolean)(value: String): Either[UTS46FailureException, String] = + process(checkHyphens = checkHyphens, checkBidi = checkBidi, checkJoiners = checkJoiners, useStd3ASCIIRules = useStd3ASCIIRules, transitionalProcessing = transitionalProcessing, value = value).flatMap(labels => + labels.nonEmptyTraverse(label => + encodeToPunycodeIfNeeded(label).fold( + e => Ior.both(NonEmptyChain(e), label), + label => Ior.right(label) + ) + ) + ).flatMap(labels => + if (verifyDnsLength) { + NonEmptyChain.fromChain(checkDnsLength(labels)).fold( + Ior.right(labels): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] + )(errors => + Ior.both(errors, labels) + ) + } else { + Ior.right(labels) + } + ).fold( + errors => Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], + labels => Right(labels.mkString_(FULL_STOP.toString)), + { + case (errors, labels) => Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) + } + ) + + def toUnicodeRaw(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean)(value: String): Either[UTS46FailureException, String] = + process(checkHyphens = checkHyphens, checkBidi = checkBidi, checkJoiners = checkJoiners, useStd3ASCIIRules = useStd3ASCIIRules, transitionalProcessing = transitionalProcessing, value = value).fold( + errors => Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], + labels => Right(labels.mkString_(FULL_STOP.toString)), + { + case (errors, labels) => Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) + } + ) + + // A Bidi domain name is a domain name containing at least one character + // with Bidi_Class R, AL, or AN. See [IDNA2008] RFC 5893, Section 1.4. + + private def isBidiDomainName(value: String): Boolean = { + val len: Int = value.size + @tailrec + def loop(i: Int): Boolean = + if (i >= len) { + false + } else { + val cp: Int = value.codePointAt(i) + val bidiCategory: String = + Either.catchNonFatal( + bidiTypeForCodePointInt(cp) + ).fold( + e => throw new RuntimeException(s"""Error getting ${String.format("%04X", cp)}""", e), + identity + ) + + if (bidiCategory === "R" || bidiCategory === "AL" || bidiCategory === "AN") { + true + } else { + val nextI: Int = if (cp >= 0x10000) i + 2 else i + 1 + loop(nextI) + } + } + + loop(0) + } + + private def encodeToPunycodeIfNeeded(label: String): Either[Bootstring.BootstringException, String] = { + val len: Int = label.size + + @tailrec + def hasNonASCIIChar(charIndex: Int): Boolean = + if (charIndex >= len) { + false + } else { + val c: Char = label.charAt(charIndex) + if (c.toInt > 127) { + true + } else { + hasNonASCIIChar(charIndex + 1) + } + } + + if (hasNonASCIIChar(0)) { + Bootstring.encodePunycodeRaw(label).map(label => + s"${PUNYCODE_PREFIX}${label}" + ) + } else { + Right(label) + } + } + + private def checkDnsLength(value: NonEmptyChain[String]): Chain[UTS46Exception] = { + // TODO: Can be optimized to one traversal + val emptyLabel: Boolean = value.last.isEmpty + val dots: Long = value.length - 1L + val totalSize: Long = value.reduceLeftTo(_.size){case (acc, value) => acc + value.size} + dots + val totalSizeWithoutEmptyLabel: Long = + if (emptyLabel && value.size > 1L) { + // Subtract 1 for the empty label's dot. + totalSize - 1L + } else { + totalSize + } + val withoutEmptyLabel: Chain[String] = + if (emptyLabel) { + value.init + } else { + value.toChain + } + + val emptyLabelError: Chain[UTS46Exception] = + if (emptyLabel) { + Chain.one(UTS46Exception.EmptyRootLabelException) + } else { + Chain.empty + } + + val domainLengthError: Chain[UTS46Exception] = + if (totalSizeWithoutEmptyLabel > 253L) { + Chain.one(UTS46Exception.DomainNameExceedsMaxLengthException(totalSizeWithoutEmptyLabel)) + } else { + Chain.empty + } + + withoutEmptyLabel.foldLeft(emptyLabelError ++ domainLengthError){ + case (errors, label) => + val len: Long = label.size.toLong + if (len < 1L) { + UTS46Exception.NonRootEmptyLabelException +: errors + } else if (len > 63L) { + UTS46Exception.LabelExceedsMaxLengthException(len) +: errors + } else { + errors + } + } + } + + private def validInternal(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, value: String): Chain[UTS46Exception] = { + val len: Int = value.length() + + // We can skip the normalization check, this is always performed by either + // toASCII or toUnicode. Thus we only need to do this for the public + // validity check method. We also don't need to check for FULL_STOP code + // points, as toASCII and toUnicode will split on them. We also do not + // need to check step 6, that is taken care of by the mapping step in + // toASCII and toUnicode. + + def checkHyphen34(errors: Chain[UTS46Exception], previousCodePoint: Option[Int], cp: Int): Chain[UTS46Exception] = + if (cp === HYPHEN_MINUS_INT && previousCodePoint === Some(HYPHEN_MINUS_INT)) { + errors :+ UTS46Exception.HyphenMinusInThirdAndFourthPositionException + } else { + errors + } + + def checkNonJoinerPrevCodePoint(startCharIndex: Int): Boolean = + if (startCharIndex <= 0) { + false + } else { + val cp: Int = value.codePointBefore(startCharIndex) + viramaCanonicalCombiningClassCodePoints(cp) + } + + def checkNonJoinerBefore(startCharIndex: Int): Boolean = { + + @tailrec + def loop(charIndex: Int): Boolean = + if (charIndex <= 0) { + false + } else { + val cp: Int = value.codePointBefore(charIndex) + val nextIndex: Int = charIndex - (if (cp >= 0x10000) 2 else 1) + + if (isJoiningTypeL(cp) || isJoiningTypeD(cp)) { + true + } else { + if (isJoiningTypeT(cp)) { + loop(nextIndex) + } else { + false + } + } + } + + loop(startCharIndex) + } + + def checkNonJoinerAfter(startCharIndex: Int): Boolean = { + + @tailrec + def loop(charIndex: Int): Boolean = + if (charIndex >= len) { + false + } else { + val cp: Int = value.codePointAt(charIndex) + val nextIndex: Int = charIndex + (if (cp >= 0x10000) 2 else 1) + + if (charIndex === startCharIndex && cp === ZERO_WIDTH_NON_JOINER_INT) { + // skip + loop(nextIndex) + } else { + if (isJoiningTypeR(cp) || isJoiningTypeD(cp)) { + true + } else { + if (isJoiningTypeT(cp)) { + loop(nextIndex) + } else { + false + } + } + } + } + + loop(startCharIndex) + } + + def checkFirstHyphen(errors: Chain[UTS46Exception], codePoint: Int): Chain[UTS46Exception] = + if (checkHyphens && codePoint === HYPHEN_MINUS_INT) { + errors :+ UTS46Exception.LabelBeginsWithHyphenMinusException + } else { + errors + } + + def checkCombiningMark(errors: Chain[UTS46Exception], codePoint: Int): Chain[UTS46Exception] = + if (combiningMarkCodePoints.apply(codePoint)) { + errors :+ UTS46Exception.LabelStartsWithGeneralMarkException(codePoint) + } else { + errors + } + + def checkFirstBidi(errors: Chain[UTS46Exception], codePoint: Int): (Chain[UTS46Exception], Option[BidiType]) = + if (checkBidi) { + bidiTypeForCodePointInt(codePoint) match { + case "L" => + (errors, Some(BidiType.LTR)) case "R" | "AL" => + (errors, Some(BidiType.RTL(None))) + case otherwise => + (errors :+ UTS46Exception.InvalidBidiTypeForFirstCodePointException(codePoint, otherwise), None) + } + } else { + (errors, None) + } + + def generalBidiCheck(errors: Chain[UTS46Exception], bidiType: BidiType, bidiNumberTypeError: BidiNumberTypeError, bidiEndLabelValid: BidiEndLabelValid, codePoint: Int): (BidiType, BidiNumberTypeError, BidiEndLabelValid, Chain[UTS46Exception]) = + bidiType match { + case BidiType.RTL(numberType) => + bidiTypeForCodePointInt(codePoint) match { + case "AN" => + numberType match { + case Some(BidiRTLNumberType.ArabicNumber) => + (BidiType.RTL(numberType), bidiNumberTypeError, BidiEndLabelValid(true), errors) + case Some(BidiRTLNumberType.EuropeanNumber) => + // Set error + (BidiType.RTL(numberType), BidiNumberTypeError(true), BidiEndLabelValid(true), errors) + case None => + // Set number type + (BidiType.RTL(Some(BidiRTLNumberType.ArabicNumber)), bidiNumberTypeError, BidiEndLabelValid(true), errors) + } + case "EN" => + numberType match { + case Some(BidiRTLNumberType.ArabicNumber) => + // Set Error + (BidiType.RTL(numberType), BidiNumberTypeError(true), BidiEndLabelValid(true), errors) + case Some(BidiRTLNumberType.EuropeanNumber) => + (BidiType.RTL(numberType), bidiNumberTypeError, BidiEndLabelValid(true), errors) + case None => + // Set number Type + (BidiType.RTL(Some(BidiRTLNumberType.EuropeanNumber)), bidiNumberTypeError, BidiEndLabelValid(true), errors) + } + case "R" | "AL" => + (bidiType, bidiNumberTypeError, BidiEndLabelValid(true), errors) + case "ES" | "CS" | "ET" | "ON" | "BN" => + (bidiType, bidiNumberTypeError, BidiEndLabelValid(false), errors) + case "NSM" => + (bidiType, bidiNumberTypeError, bidiEndLabelValid, errors) + case otherwise => + (bidiType, bidiNumberTypeError, BidiEndLabelValid(false), UTS46Exception.InvalidBidiTypeForRTLLabelException(codePoint, otherwise) +: errors) + } + case BidiType.LTR => + bidiTypeForCodePointInt(codePoint) match { + case "L" | "EN" => + (bidiType, BidiNumberTypeError(false), BidiEndLabelValid(true), errors) + case "ES" | "CS" | "ET" | "ON" | "BN" => + (bidiType, BidiNumberTypeError(false), BidiEndLabelValid(false), errors) + case "NSM" => + (bidiType, BidiNumberTypeError(false), bidiEndLabelValid, errors) + case otherwise => + (bidiType, BidiNumberTypeError(false), bidiEndLabelValid, UTS46Exception.InvalidBidiTypeForLTRLabelException(codePoint, otherwise) +: errors) + } + } + + def checkFirstCodePoint(errors: Chain[UTS46Exception], codePoint: Int): Chain[UTS46Exception] = + checkCombiningMark(checkFirstHyphen(errors, codePoint), codePoint) + + def positionalChecks(errors: Chain[UTS46Exception], codePointIndex: Int, previousCodePoint: Option[Int], codePoint: Int): Chain[UTS46Exception] = + codePointIndex match { + case 0 => + checkFirstCodePoint(errors, codePoint) + case 3 => + checkHyphen34(errors, previousCodePoint, codePoint) + case _ => + errors + } + + def checkForJoiners(errors: Chain[UTS46Exception], previousCodePoint: Option[Int], charIndex: Int, codePoint: Int): Chain[UTS46Exception] = + if (checkJoiners) { + if (codePoint === ZERO_WIDTH_NON_JOINER_INT || codePoint === ZERO_WIDTH_JOINER_INT) { + if (previousCodePoint.fold(false)(viramaCanonicalCombiningClassCodePoints.apply)) { + errors + } else if (codePoint === ZERO_WIDTH_NON_JOINER_INT) { + if (checkNonJoinerPrevCodePoint(charIndex) || (checkNonJoinerBefore(charIndex) && checkNonJoinerAfter(charIndex))) { + errors + } else { + errors :+ UTS46Exception.ContextJViolationForNonJoinerException + } + } else { + errors :+ UTS46Exception.ContextJViolationForJoinerException + } + } else { + errors + } + } else { + errors + } + + def checkFinalHyphen(errors: Chain[UTS46Exception], previousCodePoint: Option[Int]): Chain[UTS46Exception] = + if (checkHyphens) { + previousCodePoint.fold( + errors + ){ + case HYPHEN_MINUS_INT => + UTS46Exception.LabelEndsWithHyphenMinusException +: errors + case _ => + errors + } + } else { + errors + } + + def checkFinalBidi(errors: Chain[UTS46Exception], bidiType: BidiType, bidiNumberTypeError: BidiNumberTypeError, bidiEndLabelValid: BidiEndLabelValid): Chain[UTS46Exception] = + (if (bidiNumberTypeError.value) { + UTS46Exception.MutuallyExclusiveBidiNumberTypesException +: errors + } else { + errors + }) match { + case errors => + if (bidiEndLabelValid.value) { + errors + } else { + bidiType match { + case _: BidiType.LTR.type => + UTS46Exception.LTRLabelDidNotEndWithCorrectBidiTypeException +: errors + case _: BidiType.RTL => + UTS46Exception.RTLLabelDidNotEndWithCorrectBidiTypeException +: errors + } + } + } + + // TODO: Optimization, consider making multiple variants of this loop for + // each permutation of condiditon. + @tailrec + def loop( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + bidiType: Option[BidiType], + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid, + codePointIndex: Int, + charIndex: Int): Chain[UTS46Exception] = + if (charIndex >= len) { + // step 3 end check + bidiType.fold( + errors + )(bidiType => + checkFinalBidi(checkFinalHyphen(errors, previousCodePoint), bidiType, bidiNumberTypeError, bidiEndLabelValid) + ) + } else { + val cp: Int = value.codePointAt(codePointIndex) + val nextCPIndex: Int = codePointIndex + 1 + val nextCharIndex: Int = charIndex + (if (cp >= 0x10000) 2 else 1) + + (if (codePointIndex === 0 && checkBidi) { + checkFirstBidi(errors, cp) + } else { + (errors, bidiType) + }) match { + // Intentional shadow + case (errors, bidiType) => + checkForJoiners( + positionalChecks(errors, codePointIndex, previousCodePoint, cp), + previousCodePoint, + charIndex, + cp + ) match { + case errors => + bidiType match { + case None => + loop(errors, Some(cp), bidiType, bidiNumberTypeError, bidiEndLabelValid, nextCPIndex, nextCharIndex) + case Some(bidiType) => + generalBidiCheck(errors, bidiType, bidiNumberTypeError, bidiEndLabelValid, cp) match { + case (bidiType, bidiNumberTypeError, bidiEndLabelValid, errors) => + loop(errors, Some(cp), Some(bidiType), bidiNumberTypeError, bidiEndLabelValid, nextCPIndex, nextCharIndex) + } + } + } + } + } + + loop(Chain.empty, None, None, BidiNumberTypeError(false), BidiEndLabelValid(false), 0, 0) + } + + private def process(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean, value: String): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = { + + val shouldCheckBidi: Boolean = + checkBidi && isBidiDomainName(value) + + def processLabel(label: String): Ior[NonEmptyChain[IDNAException], String] = { + def validateLablel(label: String): Ior[NonEmptyChain[IDNAException], String] = + NonEmptyChain.fromChain(validInternal(checkHyphens = checkHyphens, checkBidi = shouldCheckBidi, checkJoiners = checkJoiners, label)) match { + case Some(nec) => + Ior.both(nec, label) + case _ => + Ior.right(label) + } + + if (label.startsWith(PUNYCODE_PREFIX)) { + Bootstring.decodePunycodeRaw(label.drop(4)) match { + case Left(e) => + Ior.both(NonEmptyChain.one(e), label) + case Right(label) => + // When it is a Punycode label, we would always use + // non-transitional processing for validation according to UTS-46, + // however the validity check which requires non-transitional + // processing (validity check 6) is not necessarily if the input + // has already gone through the UTS-46 mapping step. It is only + // applicable if we are applying the validity check to an + // arbitrary string, which we never do. + validateLablel(label) + } + } else { + validateLablel(label) + } + } + + @tailrec + def processLabels(labels: NonEmptyChain[String], acc: Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]]): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = + labels.uncons match { + case (label, labels) => + acc.combine(processLabel(label).map(NonEmptyChain.one)) match { + // Intentional Shadow + case acc => + NonEmptyChain.fromChain(labels) match { + case Some(labels) => + processLabels(labels, acc) + case _ => + acc + } + } + } + + @tailrec + def toLabels(value: String, acc: Chain[String]): NonEmptyChain[String] = + if (value.isEmpty) { + // It is important that we don't ignore the empty label. + NonEmptyChain.fromChainAppend(acc, value) + } else { + value.span(_ != FULL_STOP) match { + case (label, rest) if rest.isEmpty => + NonEmptyChain.fromChainAppend(acc, label) + case (label, rest) => + // First character in rest must be '.' + toLabels(rest.tail, acc :+ label) + } + } + + Ior.fromEither( + CodePointMapper.mapCodePoints(useStd3ASCIIRules, transitionalProcessing)(value).map(nfc) + ).leftMap(NonEmptyChain.one[IDNAException]).flatMap(value => + (toLabels(value, Chain.empty).uncons match { + case (label, labels) => + processLabel(label).map(NonEmptyChain.one) match { + case acc => + NonEmptyChain.fromChain(labels) match { + case Some(labels) => + processLabels(labels, acc) + case _ => + acc + } + } + }) + ) + } + + private def nfc(value: String): String = + Normalizer.normalize(value, Normalizer.Form.NFC) + + private final val FULL_STOP = '\u002e' + + private final val PUNYCODE_PREFIX = "xn--" + + private final val HYPHEN_MINUS = '\u002d' + + private final val HYPHEN_MINUS_INT = '\u002d'.toInt + + private final val ZERO_WIDTH_NON_JOINER = '\u200c' + + private final val ZERO_WIDTH_NON_JOINER_INT = '\u200c'.toInt + + private final val ZERO_WIDTH_JOINER = '\u200d' + + private final val ZERO_WIDTH_JOINER_INT = '\u200d'.toInt + + private final case class BidiNumberTypeError(value: Boolean) extends AnyVal + + private final case class BidiEndLabelValid(value: Boolean) extends AnyVal + + private sealed abstract class BidiRTLNumberType extends Serializable + + private object BidiRTLNumberType { + case object EuropeanNumber extends BidiRTLNumberType + case object ArabicNumber extends BidiRTLNumberType + } + + private sealed abstract class BidiType extends Serializable + + private object BidiType { + case object LTR extends BidiType + final case class RTL(numberType: Option[BidiRTLNumberType]) extends BidiType + } + + sealed abstract class UTS46FailureException extends IDNAException with NoStackTrace { + def errors: NonEmptyChain[IDNAException] + + def partiallyProcessedValue: Option[String] + + override final def getMessage: String = + s"""Errors encountered during UTS-46 processing: ${errors.map(_.getLocalizedMessage).mkString_(", ")}""" + + override final def toString: String = + s"UTS46FailureException(errors = ${errors})" + } + + object UTS46FailureException { + private[this] final case class UTS46FailureExceptionImpl(override val errors: NonEmptyChain[IDNAException], override val partiallyProcessedValue: Option[String]) extends UTS46FailureException + + private[UTS46] def apply(errors: NonEmptyChain[IDNAException], partiallyProcessedValue: Option[String]): UTS46FailureException = + UTS46FailureExceptionImpl(errors, partiallyProcessedValue) + } + + sealed abstract class UTS46Exception extends IDNAException with NoStackTrace + + object UTS46Exception { + private[UTS46] case object HyphenMinusInThirdAndFourthPositionException extends UTS46Exception { + override val getMessage: String = + "Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this." + + override def toString: String = + s"HyphenMinusInThirdAndFourthPositionException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object LabelBeginsWithHyphenMinusException extends UTS46Exception { + override val getMessage: String = + "Label begins with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this." + + override def toString: String = + s"LabelBeginsWithHyphenMinusException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object LabelEndsWithHyphenMinusException extends UTS46Exception { + override val getMessage: String = + "Label ends with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this." + + override def toString: String = + s"LabelEndsWithHyphenMinusException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] final case class LabelStartsWithGeneralMarkException(cp: Int) extends UTS46Exception { + private def description: String = + CodePoint.descriptionFromInt(cp) + + override def getMessage: String = + s"The label starts with a code point which indicates a combining mark (General_Category=Mark in Unicode). This is forbidden by UTS-46: ${description}" + + override def toString: String = + s"LabelStartsWithGeneralMarkException(cp = ${description}, getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object ContextJViolationForNonJoinerException extends UTS46Exception { + override val getMessage: String = + "ContextJ violation found for zero width non-joiner code point 0x200c. If present in a label, it must follow a code point which has a canonical combining class of Virama or it must follow a code point with a joining type of L (Left joining) or D (Dual joining) followed by zero or more code points with a joining type of T (transparent), then 0x200c, then be have zero or more code points after with a T joining type then a code point with a joining type of R (Right joining) or D." + + override def toString: String = + s"ContextJViolationForNonJoinerException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object ContextJViolationForJoinerException extends UTS46Exception { + override val getMessage: String = + "ContextJ violation found for zero width joiner code point 0x200d. If present in a label, it must follow a code point which has a canonical combining class of Virama." + + override def toString: String = + s"ContextJViolationForNonJoinerException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] final case class InvalidBidiTypeForFirstCodePointException(codePoint: Int, bidiType: String) extends UTS46Exception { + override def getMessage: String = + s"Invalid bidirectional type for first code point in label. Expected L, R, or AL, got ${bidiType}. Code point: ${CodePoint.descriptionFromInt(codePoint)}" + + override def toString: String = + s"InvalidBidiTypeForFirstCodePointException(codePoint = ${codePoint}, bidiType = ${bidiType})" + } + + private[UTS46] final case class InvalidBidiTypeForRTLLabelException(codePoint: Int, bidiType: String) extends UTS46Exception { + override def getMessage: String = + s"In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed, but got ${bidiType} for code point: ${CodePoint.descriptionFromInt(codePoint)}" + + override def toString: String = + s"InvalidBidiTypeForRTLLabelException(codePoint = ${CodePoint.descriptionFromInt(codePoint)}, bidiType = ${bidiType})" + } + + private[UTS46] final case class InvalidBidiTypeForLTRLabelException(codePoint: Int, bidiType: String) extends UTS46Exception { + override def getMessage: String = + s"In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or NSM are allowed, but got ${bidiType} for code point: ${CodePoint.descriptionFromInt(codePoint)}" + + override def toString: String = + s"InvalidBidiTypeForLTRLabelException(codePoint = ${CodePoint.descriptionFromInt(codePoint)}, bidiType = ${bidiType})" + } + + private[UTS46] case object MutuallyExclusiveBidiNumberTypesException extends UTS46Exception { + override val getMessage: String = + "In an RTL label, if an EN is present, no AN may be present, and vice versa, however this label has both." + + override def toString: String = + s"MutuallyExclusiveBidiNumberTypesException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object RTLLabelDidNotEndWithCorrectBidiTypeException extends UTS46Exception { + override val getMessage: String = + "In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM, but this was not the case." + + override def toString: String = + s"RTLLabelDidNotEndWithCorrectBidiTypeException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object LTRLabelDidNotEndWithCorrectBidiTypeException extends UTS46Exception { + override val getMessage: String = + "In an LTR label, the end of the label must be a character with Bidi property L or EN, followed by zero or more characters with Bidi property NSM, but this was not the case." + + override def toString: String = + s"LTRLabelDidNotEndWithCorrectBidiTypeException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object NonRootEmptyLabelException extends UTS46Exception { + override val getMessage: String = "An empty label was present but it was not the root label. This is forbidden." + + override def toString: String = + s"NonRootEmptyLabelException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case class LabelExceedsMaxLengthException(size: Long) extends UTS46Exception { + // TODO: Include offending label? Need to check Unicode security recommendations. + override def getMessage: String = s"A domain label is required to be between 1 and 63 characters when represented as ASCII, but got ${size}." + + override def toString: String = + s"LabelExceedsMaxLengthException(size = ${size}, getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case class DomainNameExceedsMaxLengthException(size: Long) extends UTS46Exception { + // TODO: Include offending domain? Need to check Unicode security recommendations. + override def getMessage: String = s"A domain name must be between 1 and 253 characters when represented as ASCII, but got ${size}." + + override def toString: String = + s"DomainNameExceedsMaxLengthException(size = ${size}, getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object EmptyRootLabelException extends UTS46Exception { + override val getMessage: String = "The domain ends with the empty root label. While this is a a valid domain, UTS-46 forbids this notation." + + override def toString: String = + s"EmptyRootLabelException(getLocalizedMessage = ${getLocalizedMessage})" + } + } +} diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala new file mode 100644 index 00000000..33b1a4e3 --- /dev/null +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala @@ -0,0 +1,54 @@ +package org.typelevel.idna4s.core.uts46 + +sealed abstract class UTS46Config extends Serializable { + def checkHyphens: Boolean + def checkBidi: Boolean + def checkJoiners: Boolean + def useStd3ASCIIRules: Boolean + def transitionalProcessing: Boolean + def verifyDnsLength: Boolean + + def withCheckHyphens(value: Boolean): UTS46Config + def withCheckBidi(value: Boolean): UTS46Config + def withCheckJoiners(value: Boolean): UTS46Config + def withUseStd3ASCIIRules(value: Boolean): UTS46Config + def withTransitionalProcessing(value: Boolean): UTS46Config + def withVerifyDNSLength(value: Boolean): UTS46Config + + override final def toString: String = + s"UTS46Config(checkHyphens = ${checkHyphens}, checkBidi = ${checkBidi}, checkJoiners = ${checkJoiners}, useStd3ASCIIRules = ${useStd3ASCIIRules}, transitionalProcessing = ${transitionalProcessing}, verifyDnsLength = ${verifyDnsLength})" +} + +object UTS46Config { + private[this] final case class UTS46ConfigImpl(override val checkHyphens: Boolean, override val checkBidi: Boolean, override val checkJoiners: Boolean, override val useStd3ASCIIRules: Boolean, override val transitionalProcessing: Boolean, override val verifyDnsLength: Boolean) extends UTS46Config { + override def withCheckHyphens(value: Boolean): UTS46Config = + copy(checkHyphens = value) + override def withCheckBidi(value: Boolean): UTS46Config = + copy(checkBidi = value) + override def withCheckJoiners(value: Boolean): UTS46Config = + copy(checkJoiners = value) + override def withUseStd3ASCIIRules(value: Boolean): UTS46Config = + copy(useStd3ASCIIRules = value) + override def withTransitionalProcessing(value: Boolean): UTS46Config = + copy(transitionalProcessing = value) + override def withVerifyDNSLength(value: Boolean): UTS46Config = + copy(verifyDnsLength = value) + } + + def apply( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + verifyDnsLength: Boolean + ): UTS46Config = + UTS46ConfigImpl( + checkHyphens, + checkBidi, + checkJoiners, + useStd3ASCIIRules, + transitionalProcessing, + verifyDnsLength + ) +} diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala index fd80a383..8a01af7c 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala @@ -21,7 +21,6 @@ package org.typelevel.idna4s.core.uts46 -import scala.collection.immutable.IntMap import cats.collections.BitSet /** @@ -67,17 +66,4 @@ private[uts46] trait UnicodeDataBase { * [[https://www.iana.org/assignments/idna-tables-12.0.0/idna-tables-12.0.0.xhtml]] */ protected def viramaCanonicalCombiningClassCodePoints: BitSet - - /** - * The bidirectional category for all Unicode code points. - * - * These are used to check the bidi (bidirectional) rules for the UTS-46 validity criteria. - * The actual rules are defined in RFC-5893 section 2. - * - * @see - * [[https://www.unicode.org/reports/tr46/#Validity_Criteria Validity_Criteria]] - * @see - * [[https://www.rfc-editor.org/rfc/rfc5893.txt]] - */ - protected def bidirectionalCategoryMap: IntMap[String] } diff --git a/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala b/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala index 8a40aae4..591a7e94 100644 --- a/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala +++ b/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala @@ -75,7 +75,6 @@ object UnicodeDataCodeGen { package org.typelevel.idna4s.core.uts46 import cats.collections.BitSet -import scala.collection.immutable.IntMap private[uts46] trait ${Type.Name(GeneratedTypeName)} extends ${Init( Type.Name(BaseTypeName), @@ -83,7 +82,6 @@ private[uts46] trait ${Type.Name(GeneratedTypeName)} extends ${Init( Nil)} { override final protected lazy val combiningMarkCodePoints: BitSet = $combiningMarkCodePointsRHS - ..${bidirectionalCategoryDefs(unicodeData)} ..${viramaCanonicalCombiningClassCodePointsDefs(unicodeData)} }""" } @@ -951,43 +949,6 @@ private[uts46] trait ${Type.Name(GeneratedTypeName)} extends ${Init( } } - /** - * Create the defs needed for the bidirectional information about Unicode code points. - */ - private def bidirectionalCategoryDefs( - unicodeData: UnicodeData[UnicodeCodePointInfomation]): List[Defn] = { - val categoryData: UnicodeData[BidirectionalCategory] = - unicodeData.mapValues( - _.bidirectionalCategory - ) - val (singles, ranges): ( - SortedMap[CodePointRange.Single, BidirectionalCategory], - SortedMap[CodePointRange, BidirectionalCategory]) = categoryData.partitioned - val rangeTerms: List[Term] = ranges.toList.map { - case (k, v) => - q"(Range.inclusive(${Lit.Int(k.lower.value)}, ${Lit.Int(k.upper.value)}), ${Lit.String(v.value)})" - } - val singleTerms: List[Term] = - singles.toList.map { - case (k, v) => - q"(${Lit.Int(k.lower.value)}, ${Lit.String(v.value)})" - } - val baseMap: Term = - q"IntMap(..$singleTerms)" - - List( - q"""private final def bidirectionalCategoryBaseMap: IntMap[String] = $baseMap""", - q"""override final protected lazy val bidirectionalCategoryMap: IntMap[String] = - List[(Range, String)](..$rangeTerms).foldLeft(bidirectionalCategoryBaseMap){ - case (k, (range, result)) => - range.foldLeft(k){ - case (k, cp) => - k.updated(cp, result) - } - }""" - ) - } - /** * Extract out the Unicode code points which have a canonical combining class of Virama. This * is the only class we need to know about for UTS-46. diff --git a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala index 1ec9dd11..87b9044d 100644 --- a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala +++ b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala @@ -194,4 +194,21 @@ private[scalacheck] trait ScalaCheckInstances extends Serializable { .shrink(value.codePoint) .filterNot(_.isSurrogate) .map(Delimiter.unsafeFromCodePoint)) + + implicit final def arbUTS46Config: Arbitrary[UTS46Config] = + Arbitrary( + for { + checkHyphens <- Arbitrary.arbitrary[Boolean] + checkBidi <- Arbitrary.arbitrary[Boolean] + checkJoiners <- Arbitrary.arbitrary[Boolean] + useStd3ASCIIRules <- Arbitrary.arbitrary[Boolean] + transitionalProcessing <- Arbitrary.arbitrary[Boolean] + verifyDnsLength <- Arbitrary.arbitrary[Boolean] + } yield UTS46Config(checkHyphens, checkBidi, checkJoiners, useStd3ASCIIRules, transitionalProcessing, verifyDnsLength) + ) + + implicit final def cogenUTS46Config: Cogen[UTS46Config] = + Cogen[(Boolean, Boolean, Boolean, Boolean, Boolean, Boolean)].contramap(value => + (value.checkHyphens, value.checkBidi, value.checkJoiners, value.useStd3ASCIIRules, value.transitionalProcessing, value.verifyDnsLength) + ) } diff --git a/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala b/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala new file mode 100644 index 00000000..e89e8608 --- /dev/null +++ b/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala @@ -0,0 +1,5 @@ +package org.typelevel.idna4s.tests.uts46 + +import munit._ + +trait UTS46PlatformTests extends DisciplineSuite diff --git a/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala new file mode 100644 index 00000000..a2f17057 --- /dev/null +++ b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala @@ -0,0 +1,51 @@ +package org.typelevel.idna4s.tests.uts46 + +import cats.syntax.all._ +import com.ibm.icu.text.IDNA +import java.lang.StringBuilder +import munit._ +import org.scalacheck.Prop._ +import org.scalacheck._ +import org.typelevel.idna4s.core.uts46._ +import org.typelevel.idna4s.scalacheck.all._ +import scala.jdk.CollectionConverters._ + +trait UTS46PlatformTests extends DisciplineSuite { + + private def configToIcu4jConfig(config: UTS46Config): Int = { + val useStd3ASCIIRules: Int = if (config.useStd3ASCIIRules) IDNA.USE_STD3_RULES else IDNA.DEFAULT + val checkBidi: Int = if (config.checkBidi) IDNA.CHECK_BIDI else IDNA.DEFAULT + val checkJoiners: Int = if (config.checkJoiners) IDNA.CHECK_CONTEXTJ else IDNA.DEFAULT + val transitionalProcessing: Int = if (config.transitionalProcessing) IDNA.DEFAULT else IDNA.NONTRANSITIONAL_TO_ASCII | IDNA.NONTRANSITIONAL_TO_UNICODE + + useStd3ASCIIRules | checkBidi | checkJoiners | transitionalProcessing + } + + private def icu4jToASCII(config: UTS46Config, value: String): (IDNA.Info, String) = { + val info: IDNA.Info = new IDNA.Info() + + (info, IDNA.getUTS46Instance(configToIcu4jConfig(config)).nameToASCII(value, new StringBuilder(value.size), info).toString) + } + + private val genIcu4jCompatibleConfig: Gen[UTS46Config] = + Arbitrary.arbitrary[UTS46Config].map(config => + config.withCheckHyphens(true).withVerifyDNSLength(true) + ) + + property("idna4's uts46 implementation should agree with icu4j's uts46 implementation for arbitrary Strings") { + forAll(genIcu4jCompatibleConfig, Arbitrary.arbitrary[String]){(config: UTS46Config, name: String) => + val idna4stoASCIIResult: Either[UTS46.UTS46FailureException, String] = + UTS46.toASCIIRaw(config)(name) + val (icu4jInfo, icu4jToASCIIResult): (IDNA.Info, String) = + icu4jToASCII(config, name) + + idna4stoASCIIResult match { + case Left(errors) => + (icu4jInfo.hasErrors() ?= true) :| s"When idna4s UTS46 fails, so does icu4j: ${errors}." + case Right(asciiName) => + ((icu4jInfo.hasErrors() ?= false) :| s"When idna4s UTS46 passes, so does icu4j: ${icu4jInfo.getErrors()}.") && + ((asciiName ?= icu4jToASCIIResult) :| "idna4s and icu4j produce the same result.") + } + } + } +} diff --git a/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala new file mode 100644 index 00000000..c989efdc --- /dev/null +++ b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala @@ -0,0 +1,5 @@ +package org.typelevel.idna4s.tests.uts46 + +import munit._ + +final class UTS46Tests extends DisciplineSuite with UTS46PlatformTests From 443d725f8a9aa4c9aeabe1942328c39782b2d07f Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 8 Jan 2023 19:54:55 -0700 Subject: [PATCH 2/6] Docs And Fix Check Hyphens * Add some docs * Fix a couple cases where the checkHyphens rules were not being applied. --- .../typelevel/idna4s/core/uts46/UTS46.scala | 17 +++++--- .../idna4s/core/uts46/UTS46Config.scala | 43 +++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala index 39a0e6fb..15b05f67 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala @@ -334,7 +334,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat codePointIndex match { case 0 => checkFirstCodePoint(errors, codePoint) - case 3 => + case 3 if checkHyphens => checkHyphen34(errors, previousCodePoint, codePoint) case _ => errors @@ -407,11 +407,14 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat charIndex: Int): Chain[UTS46Exception] = if (charIndex >= len) { // step 3 end check - bidiType.fold( - errors - )(bidiType => - checkFinalBidi(checkFinalHyphen(errors, previousCodePoint), bidiType, bidiNumberTypeError, bidiEndLabelValid) - ) + checkFinalHyphen(errors, previousCodePoint) match { + case errors => + bidiType.fold( + errors + )(bidiType => + checkFinalBidi(errors, bidiType, bidiNumberTypeError, bidiEndLabelValid) + ) + } } else { val cp: Int = value.codePointAt(codePointIndex) val nextCPIndex: Int = codePointIndex + 1 @@ -502,7 +505,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat // It is important that we don't ignore the empty label. NonEmptyChain.fromChainAppend(acc, value) } else { - value.span(_ != FULL_STOP) match { + value.span(_ =!= FULL_STOP) match { case (label, rest) if rest.isEmpty => NonEmptyChain.fromChainAppend(acc, label) case (label, rest) => diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala index 33b1a4e3..2f6b5e43 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala @@ -1,6 +1,38 @@ package org.typelevel.idna4s.core.uts46 +/** Configuration object for UTS46 processing. + * + * See the member definitions for descriptions of how the affect UTS46. + * + * @see [[https://www.unicode.org/reports/tr46/#Processing]] + * @see [[https://www.unicode.org/reports/tr46/#Validity_Criteria]] + */ sealed abstract class UTS46Config extends Serializable { + + /** From UTS46, section 4.1, validity criteria 2 and 3. + * + * {{{ + * If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions. + * If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character. + * }}} + * + * For example, + * + * {{{ + * scala> val inputs: List[String] = List("-a", "a-", "ab--cd") + * val inputs: List[String] = List(-a, a-, ab--cd) + * + * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(true))).foreach(println) + * Left(UTS46FailureException(errors = Chain(LabelBeginsWithHyphenMinusException(getLocalizedMessage = Label begins with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) + * Left(UTS46FailureException(errors = Chain(LabelEndsWithHyphenMinusException(getLocalizedMessage = Label ends with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) + * Left(UTS46FailureException(errors = Chain(HyphenMinusInThirdAndFourthPositionException(getLocalizedMessage = Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this.)))) + * + * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(false))).foreach(println) + * Right(-a) + * Right(a-) + * Right(ab--cd) + * }}} + */ def checkHyphens: Boolean def checkBidi: Boolean def checkJoiners: Boolean @@ -20,6 +52,17 @@ sealed abstract class UTS46Config extends Serializable { } object UTS46Config { + + val Strict: UTS46Config = + UTS46Config( + checkHyphens = true, + checkBidi = true, + checkJoiners = true, + useStd3ASCIIRules = true, + transitionalProcessing = false, + verifyDnsLength = true + ) + private[this] final case class UTS46ConfigImpl(override val checkHyphens: Boolean, override val checkBidi: Boolean, override val checkJoiners: Boolean, override val useStd3ASCIIRules: Boolean, override val transitionalProcessing: Boolean, override val verifyDnsLength: Boolean) extends UTS46Config { override def withCheckHyphens(value: Boolean): UTS46Config = copy(checkHyphens = value) From 5c56a3ff3970417fbd61e21b0b0667484ac64ce6 Mon Sep 17 00:00:00 2001 From: David Strawn Date: Sun, 8 Jan 2023 20:14:45 -0700 Subject: [PATCH 3/6] Doc Cleanup --- .../typelevel/idna4s/core/uts46/UTS46.scala | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala index 15b05f67..16fcc5a5 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala @@ -184,6 +184,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat // need to check step 6, that is taken care of by the mapping step in // toASCII and toUnicode. + /* Check for validity criteria 2, HYPHEN_MINUS can not occur both positions 3 + * and 4. This should be called at most once when processing position 4 of + * the input. + */ def checkHyphen34(errors: Chain[UTS46Exception], previousCodePoint: Option[Int], cp: Int): Chain[UTS46Exception] = if (cp === HYPHEN_MINUS_INT && previousCodePoint === Some(HYPHEN_MINUS_INT)) { errors :+ UTS46Exception.HyphenMinusInThirdAndFourthPositionException @@ -452,7 +456,9 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat private def process(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean, value: String): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = { - val shouldCheckBidi: Boolean = + // The bidirectional rules apply if checkBidi is true _and_ the intput is + // a bidi domain name. + def shouldCheckBidi: Boolean = checkBidi && isBidiDomainName(value) def processLabel(label: String): Ior[NonEmptyChain[IDNAException], String] = { @@ -541,15 +547,15 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat private final val HYPHEN_MINUS = '\u002d' - private final val HYPHEN_MINUS_INT = '\u002d'.toInt + private final val HYPHEN_MINUS_INT = HYPHEN_MINUS.toInt private final val ZERO_WIDTH_NON_JOINER = '\u200c' - private final val ZERO_WIDTH_NON_JOINER_INT = '\u200c'.toInt + private final val ZERO_WIDTH_NON_JOINER_INT = ZERO_WIDTH_NON_JOINER.toInt private final val ZERO_WIDTH_JOINER = '\u200d' - private final val ZERO_WIDTH_JOINER_INT = '\u200d'.toInt + private final val ZERO_WIDTH_JOINER_INT = ZERO_WIDTH_NON_JOINER_INT.toInt private final case class BidiNumberTypeError(value: Boolean) extends AnyVal @@ -636,7 +642,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat private[UTS46] case object ContextJViolationForJoinerException extends UTS46Exception { override val getMessage: String = - "ContextJ violation found for zero width joiner code point 0x200d. If present in a label, it must follow a code point which has a canonical combining class of Virama." + "ContextJ violation found for zero width joiner code point 0x200d. If present in a label, it must follow a code point which has a canonical combining class of Virama, but did not." override def toString: String = s"ContextJViolationForNonJoinerException(getLocalizedMessage = ${getLocalizedMessage})" @@ -714,7 +720,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } private[UTS46] case object EmptyRootLabelException extends UTS46Exception { - override val getMessage: String = "The domain ends with the empty root label. While this is a a valid domain, UTS-46 forbids this notation." + override val getMessage: String = "The domain ends with the empty root label. While this is a valid domain, UTS-46 forbids this notation." override def toString: String = s"EmptyRootLabelException(getLocalizedMessage = ${getLocalizedMessage})" From 26559787f4477a15fe7cc75d6e1abfa895b61f93 Mon Sep 17 00:00:00 2001 From: Michael Pilquist Date: Mon, 12 Jan 2026 20:46:25 -0500 Subject: [PATCH 4/6] Fix ZERO_WIDTH_JOINER_INT --- .../src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala index 16fcc5a5..92aa9bd0 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala @@ -555,7 +555,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat private final val ZERO_WIDTH_JOINER = '\u200d' - private final val ZERO_WIDTH_JOINER_INT = ZERO_WIDTH_NON_JOINER_INT.toInt + private final val ZERO_WIDTH_JOINER_INT = ZERO_WIDTH_JOINER.toInt private final case class BidiNumberTypeError(value: Boolean) extends AnyVal From 8a6b1e7f89777163a52a757a477aa8907bbf9283 Mon Sep 17 00:00:00 2001 From: Michael Pilquist Date: Mon, 12 Jan 2026 21:03:06 -0500 Subject: [PATCH 5/6] Scalafmt --- .../org/typelevel/idna4s/core/CodePoint.scala | 26 +- .../typelevel/idna4s/core/uts46/UTS46.scala | 455 ++++++++++++------ .../idna4s/core/uts46/UTS46Config.scala | 110 +++-- .../scalacheck/ScalaCheckInstances.scala | 17 +- .../tests/uts46/UTS46PlatformTests.scala | 21 + .../tests/uts46/UTS46PlatformTests.scala | 77 ++- .../idna4s/tests/uts46/UTS46Tests.scala | 21 + 7 files changed, 516 insertions(+), 211 deletions(-) diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala index e8e3b3b4..21931f21 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala @@ -276,19 +276,21 @@ object CodePoint extends CodePointPlatform { e.getLocalizedMessage } - /** A method to attempt to get a string description of an int32 value which is - * assumed to be a code point. - * - * The primary goal of this method is to create strings for error - * messages. We would like to have a rich description of the code point if - * possible, but in the event the int32 isn't a code point we don't want to - * fail. - */ + /** + * A method to attempt to get a string description of an int32 value which is assumed to be a + * code point. + * + * The primary goal of this method is to create strings for error messages. We would like to + * have a rich description of the code point if possible, but in the event the int32 isn't a + * code point we don't want to fail. + */ def descriptionFromInt(value: Int): String = - CodePoint.fromInt(value).fold( - _ => s"Value is outside the domain of valid code points: ${value}", - _.toString - ) + CodePoint + .fromInt(value) + .fold( + _ => s"Value is outside the domain of valid code points: ${value}", + _.toString + ) implicit val hashAndOrderForCodePoint: Hash[CodePoint] with Order[CodePoint] = new Hash[CodePoint] with Order[CodePoint] { diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala index 92aa9bd0..694b12f3 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala @@ -30,46 +30,90 @@ import org.typelevel.idna4s.core.bootstring._ import scala.annotation.tailrec import scala.util.control.NoStackTrace -object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with GeneratedBidirectionalClass { +object UTS46 + extends GeneratedUnicodeData + with GeneratedJoiningType + with GeneratedBidirectionalClass { def toASCIIRaw(config: UTS46Config)(value: String): Either[UTS46FailureException, String] = - toASCIIRaw(checkHyphens = config.checkHyphens, checkBidi = config.checkBidi, checkJoiners = config.checkJoiners, useStd3ASCIIRules = config.useStd3ASCIIRules, transitionalProcessing = config.transitionalProcessing, verifyDnsLength = config.verifyDnsLength)(value) + toASCIIRaw( + checkHyphens = config.checkHyphens, + checkBidi = config.checkBidi, + checkJoiners = config.checkJoiners, + useStd3ASCIIRules = config.useStd3ASCIIRules, + transitionalProcessing = config.transitionalProcessing, + verifyDnsLength = config.verifyDnsLength + )(value) def toUnicodeRaw(config: UTS46Config)(value: String): Either[UTS46FailureException, String] = - toUnicodeRaw(checkHyphens = config.checkHyphens, checkBidi = config.checkBidi, checkJoiners = config.checkJoiners, useStd3ASCIIRules = config.useStd3ASCIIRules, transitionalProcessing = config.transitionalProcessing)(value) - - def toASCIIRaw(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean, verifyDnsLength: Boolean)(value: String): Either[UTS46FailureException, String] = - process(checkHyphens = checkHyphens, checkBidi = checkBidi, checkJoiners = checkJoiners, useStd3ASCIIRules = useStd3ASCIIRules, transitionalProcessing = transitionalProcessing, value = value).flatMap(labels => + toUnicodeRaw( + checkHyphens = config.checkHyphens, + checkBidi = config.checkBidi, + checkJoiners = config.checkJoiners, + useStd3ASCIIRules = config.useStd3ASCIIRules, + transitionalProcessing = config.transitionalProcessing + )(value) + + def toASCIIRaw( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + verifyDnsLength: Boolean)(value: String): Either[UTS46FailureException, String] = + process( + checkHyphens = checkHyphens, + checkBidi = checkBidi, + checkJoiners = checkJoiners, + useStd3ASCIIRules = useStd3ASCIIRules, + transitionalProcessing = transitionalProcessing, + value = value + ).flatMap(labels => labels.nonEmptyTraverse(label => encodeToPunycodeIfNeeded(label).fold( e => Ior.both(NonEmptyChain(e), label), label => Ior.right(label) - ) + ))) + .flatMap(labels => + if (verifyDnsLength) { + NonEmptyChain + .fromChain(checkDnsLength(labels)) + .fold( + Ior.right(labels): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] + )(errors => Ior.both(errors, labels)) + } else { + Ior.right(labels) + }) + .fold( + errors => + Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], + labels => Right(labels.mkString_(FULL_STOP.toString)), + { + case (errors, labels) => + Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) + } ) - ).flatMap(labels => - if (verifyDnsLength) { - NonEmptyChain.fromChain(checkDnsLength(labels)).fold( - Ior.right(labels): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] - )(errors => - Ior.both(errors, labels) - ) - } else { - Ior.right(labels) - } - ).fold( - errors => Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], - labels => Right(labels.mkString_(FULL_STOP.toString)), - { - case (errors, labels) => Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) - } - ) - def toUnicodeRaw(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean)(value: String): Either[UTS46FailureException, String] = - process(checkHyphens = checkHyphens, checkBidi = checkBidi, checkJoiners = checkJoiners, useStd3ASCIIRules = useStd3ASCIIRules, transitionalProcessing = transitionalProcessing, value = value).fold( - errors => Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], + def toUnicodeRaw( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean)(value: String): Either[UTS46FailureException, String] = + process( + checkHyphens = checkHyphens, + checkBidi = checkBidi, + checkJoiners = checkJoiners, + useStd3ASCIIRules = useStd3ASCIIRules, + transitionalProcessing = transitionalProcessing, + value = value + ).fold( + errors => + Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], labels => Right(labels.mkString_(FULL_STOP.toString)), { - case (errors, labels) => Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) + case (errors, labels) => + Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) } ) @@ -85,12 +129,17 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } else { val cp: Int = value.codePointAt(i) val bidiCategory: String = - Either.catchNonFatal( - bidiTypeForCodePointInt(cp) - ).fold( - e => throw new RuntimeException(s"""Error getting ${String.format("%04X", cp)}""", e), - identity - ) + Either + .catchNonFatal( + bidiTypeForCodePointInt(cp) + ) + .fold( + e => + throw new RuntimeException( + s"""Error getting ${String.format("%04X", Array(cp))}""", + e), + identity + ) if (bidiCategory === "R" || bidiCategory === "AL" || bidiCategory === "AN") { true @@ -103,7 +152,8 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat loop(0) } - private def encodeToPunycodeIfNeeded(label: String): Either[Bootstring.BootstringException, String] = { + private def encodeToPunycodeIfNeeded( + label: String): Either[Bootstring.BootstringException, String] = { val len: Int = label.size @tailrec @@ -120,9 +170,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } if (hasNonASCIIChar(0)) { - Bootstring.encodePunycodeRaw(label).map(label => - s"${PUNYCODE_PREFIX}${label}" - ) + Bootstring.encodePunycodeRaw(label).map(label => s"${PUNYCODE_PREFIX}${label}") } else { Right(label) } @@ -132,7 +180,9 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat // TODO: Can be optimized to one traversal val emptyLabel: Boolean = value.last.isEmpty val dots: Long = value.length - 1L - val totalSize: Long = value.reduceLeftTo(_.size){case (acc, value) => acc + value.size} + dots + val totalSize: Long = value.reduceLeftTo(_.size) { + case (acc, value) => acc + value.size + } + dots val totalSizeWithoutEmptyLabel: Long = if (emptyLabel && value.size > 1L) { // Subtract 1 for the empty label's dot. @@ -156,12 +206,13 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat val domainLengthError: Chain[UTS46Exception] = if (totalSizeWithoutEmptyLabel > 253L) { - Chain.one(UTS46Exception.DomainNameExceedsMaxLengthException(totalSizeWithoutEmptyLabel)) + Chain.one( + UTS46Exception.DomainNameExceedsMaxLengthException(totalSizeWithoutEmptyLabel)) } else { Chain.empty } - withoutEmptyLabel.foldLeft(emptyLabelError ++ domainLengthError){ + withoutEmptyLabel.foldLeft(emptyLabelError ++ domainLengthError) { case (errors, label) => val len: Long = label.size.toLong if (len < 1L) { @@ -174,7 +225,11 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } } - private def validInternal(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, value: String): Chain[UTS46Exception] = { + private def validInternal( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + value: String): Chain[UTS46Exception] = { val len: Int = value.length() // We can skip the normalization check, this is always performed by either @@ -188,7 +243,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat * and 4. This should be called at most once when processing position 4 of * the input. */ - def checkHyphen34(errors: Chain[UTS46Exception], previousCodePoint: Option[Int], cp: Int): Chain[UTS46Exception] = + def checkHyphen34( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + cp: Int): Chain[UTS46Exception] = if (cp === HYPHEN_MINUS_INT && previousCodePoint === Some(HYPHEN_MINUS_INT)) { errors :+ UTS46Exception.HyphenMinusInThirdAndFourthPositionException } else { @@ -263,51 +321,90 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat errors } - def checkCombiningMark(errors: Chain[UTS46Exception], codePoint: Int): Chain[UTS46Exception] = + def checkCombiningMark( + errors: Chain[UTS46Exception], + codePoint: Int): Chain[UTS46Exception] = if (combiningMarkCodePoints.apply(codePoint)) { errors :+ UTS46Exception.LabelStartsWithGeneralMarkException(codePoint) } else { errors } - def checkFirstBidi(errors: Chain[UTS46Exception], codePoint: Int): (Chain[UTS46Exception], Option[BidiType]) = + def checkFirstBidi( + errors: Chain[UTS46Exception], + codePoint: Int): (Chain[UTS46Exception], Option[BidiType]) = if (checkBidi) { bidiTypeForCodePointInt(codePoint) match { case "L" => - (errors, Some(BidiType.LTR)) case "R" | "AL" => + (errors, Some(BidiType.LTR)) + case "R" | "AL" => (errors, Some(BidiType.RTL(None))) case otherwise => - (errors :+ UTS46Exception.InvalidBidiTypeForFirstCodePointException(codePoint, otherwise), None) + ( + errors :+ UTS46Exception.InvalidBidiTypeForFirstCodePointException( + codePoint, + otherwise), + None) } } else { (errors, None) } - def generalBidiCheck(errors: Chain[UTS46Exception], bidiType: BidiType, bidiNumberTypeError: BidiNumberTypeError, bidiEndLabelValid: BidiEndLabelValid, codePoint: Int): (BidiType, BidiNumberTypeError, BidiEndLabelValid, Chain[UTS46Exception]) = + def generalBidiCheck( + errors: Chain[UTS46Exception], + bidiType: BidiType, + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid, + codePoint: Int) + : (BidiType, BidiNumberTypeError, BidiEndLabelValid, Chain[UTS46Exception]) = bidiType match { case BidiType.RTL(numberType) => bidiTypeForCodePointInt(codePoint) match { case "AN" => numberType match { case Some(BidiRTLNumberType.ArabicNumber) => - (BidiType.RTL(numberType), bidiNumberTypeError, BidiEndLabelValid(true), errors) + ( + BidiType.RTL(numberType), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) case Some(BidiRTLNumberType.EuropeanNumber) => // Set error - (BidiType.RTL(numberType), BidiNumberTypeError(true), BidiEndLabelValid(true), errors) + ( + BidiType.RTL(numberType), + BidiNumberTypeError(true), + BidiEndLabelValid(true), + errors) case None => // Set number type - (BidiType.RTL(Some(BidiRTLNumberType.ArabicNumber)), bidiNumberTypeError, BidiEndLabelValid(true), errors) + ( + BidiType.RTL(Some(BidiRTLNumberType.ArabicNumber)), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) } case "EN" => numberType match { case Some(BidiRTLNumberType.ArabicNumber) => // Set Error - (BidiType.RTL(numberType), BidiNumberTypeError(true), BidiEndLabelValid(true), errors) + ( + BidiType.RTL(numberType), + BidiNumberTypeError(true), + BidiEndLabelValid(true), + errors) case Some(BidiRTLNumberType.EuropeanNumber) => - (BidiType.RTL(numberType), bidiNumberTypeError, BidiEndLabelValid(true), errors) + ( + BidiType.RTL(numberType), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) case None => // Set number Type - (BidiType.RTL(Some(BidiRTLNumberType.EuropeanNumber)), bidiNumberTypeError, BidiEndLabelValid(true), errors) + ( + BidiType.RTL(Some(BidiRTLNumberType.EuropeanNumber)), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) } case "R" | "AL" => (bidiType, bidiNumberTypeError, BidiEndLabelValid(true), errors) @@ -316,7 +413,13 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat case "NSM" => (bidiType, bidiNumberTypeError, bidiEndLabelValid, errors) case otherwise => - (bidiType, bidiNumberTypeError, BidiEndLabelValid(false), UTS46Exception.InvalidBidiTypeForRTLLabelException(codePoint, otherwise) +: errors) + ( + bidiType, + bidiNumberTypeError, + BidiEndLabelValid(false), + UTS46Exception.InvalidBidiTypeForRTLLabelException( + codePoint, + otherwise) +: errors) } case BidiType.LTR => bidiTypeForCodePointInt(codePoint) match { @@ -327,14 +430,26 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat case "NSM" => (bidiType, BidiNumberTypeError(false), bidiEndLabelValid, errors) case otherwise => - (bidiType, BidiNumberTypeError(false), bidiEndLabelValid, UTS46Exception.InvalidBidiTypeForLTRLabelException(codePoint, otherwise) +: errors) + ( + bidiType, + BidiNumberTypeError(false), + bidiEndLabelValid, + UTS46Exception.InvalidBidiTypeForLTRLabelException( + codePoint, + otherwise) +: errors) } } - def checkFirstCodePoint(errors: Chain[UTS46Exception], codePoint: Int): Chain[UTS46Exception] = + def checkFirstCodePoint( + errors: Chain[UTS46Exception], + codePoint: Int): Chain[UTS46Exception] = checkCombiningMark(checkFirstHyphen(errors, codePoint), codePoint) - def positionalChecks(errors: Chain[UTS46Exception], codePointIndex: Int, previousCodePoint: Option[Int], codePoint: Int): Chain[UTS46Exception] = + def positionalChecks( + errors: Chain[UTS46Exception], + codePointIndex: Int, + previousCodePoint: Option[Int], + codePoint: Int): Chain[UTS46Exception] = codePointIndex match { case 0 => checkFirstCodePoint(errors, codePoint) @@ -344,13 +459,18 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat errors } - def checkForJoiners(errors: Chain[UTS46Exception], previousCodePoint: Option[Int], charIndex: Int, codePoint: Int): Chain[UTS46Exception] = + def checkForJoiners( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + charIndex: Int, + codePoint: Int): Chain[UTS46Exception] = if (checkJoiners) { if (codePoint === ZERO_WIDTH_NON_JOINER_INT || codePoint === ZERO_WIDTH_JOINER_INT) { if (previousCodePoint.fold(false)(viramaCanonicalCombiningClassCodePoints.apply)) { errors } else if (codePoint === ZERO_WIDTH_NON_JOINER_INT) { - if (checkNonJoinerPrevCodePoint(charIndex) || (checkNonJoinerBefore(charIndex) && checkNonJoinerAfter(charIndex))) { + if (checkNonJoinerPrevCodePoint(charIndex) || (checkNonJoinerBefore( + charIndex) && checkNonJoinerAfter(charIndex))) { errors } else { errors :+ UTS46Exception.ContextJViolationForNonJoinerException @@ -365,11 +485,13 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat errors } - def checkFinalHyphen(errors: Chain[UTS46Exception], previousCodePoint: Option[Int]): Chain[UTS46Exception] = + def checkFinalHyphen( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int]): Chain[UTS46Exception] = if (checkHyphens) { previousCodePoint.fold( errors - ){ + ) { case HYPHEN_MINUS_INT => UTS46Exception.LabelEndsWithHyphenMinusException +: errors case _ => @@ -379,12 +501,16 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat errors } - def checkFinalBidi(errors: Chain[UTS46Exception], bidiType: BidiType, bidiNumberTypeError: BidiNumberTypeError, bidiEndLabelValid: BidiEndLabelValid): Chain[UTS46Exception] = + def checkFinalBidi( + errors: Chain[UTS46Exception], + bidiType: BidiType, + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid): Chain[UTS46Exception] = (if (bidiNumberTypeError.value) { - UTS46Exception.MutuallyExclusiveBidiNumberTypesException +: errors - } else { - errors - }) match { + UTS46Exception.MutuallyExclusiveBidiNumberTypesException +: errors + } else { + errors + }) match { case errors => if (bidiEndLabelValid.value) { errors @@ -402,13 +528,13 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat // each permutation of condiditon. @tailrec def loop( - errors: Chain[UTS46Exception], - previousCodePoint: Option[Int], - bidiType: Option[BidiType], - bidiNumberTypeError: BidiNumberTypeError, - bidiEndLabelValid: BidiEndLabelValid, - codePointIndex: Int, - charIndex: Int): Chain[UTS46Exception] = + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + bidiType: Option[BidiType], + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid, + codePointIndex: Int, + charIndex: Int): Chain[UTS46Exception] = if (charIndex >= len) { // step 3 end check checkFinalHyphen(errors, previousCodePoint) match { @@ -416,8 +542,7 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat bidiType.fold( errors )(bidiType => - checkFinalBidi(errors, bidiType, bidiNumberTypeError, bidiEndLabelValid) - ) + checkFinalBidi(errors, bidiType, bidiNumberTypeError, bidiEndLabelValid)) } } else { val cp: Int = value.codePointAt(codePointIndex) @@ -425,10 +550,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat val nextCharIndex: Int = charIndex + (if (cp >= 0x10000) 2 else 1) (if (codePointIndex === 0 && checkBidi) { - checkFirstBidi(errors, cp) - } else { - (errors, bidiType) - }) match { + checkFirstBidi(errors, cp) + } else { + (errors, bidiType) + }) match { // Intentional shadow case (errors, bidiType) => checkForJoiners( @@ -440,11 +565,30 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat case errors => bidiType match { case None => - loop(errors, Some(cp), bidiType, bidiNumberTypeError, bidiEndLabelValid, nextCPIndex, nextCharIndex) + loop( + errors, + Some(cp), + bidiType, + bidiNumberTypeError, + bidiEndLabelValid, + nextCPIndex, + nextCharIndex) case Some(bidiType) => - generalBidiCheck(errors, bidiType, bidiNumberTypeError, bidiEndLabelValid, cp) match { + generalBidiCheck( + errors, + bidiType, + bidiNumberTypeError, + bidiEndLabelValid, + cp) match { case (bidiType, bidiNumberTypeError, bidiEndLabelValid, errors) => - loop(errors, Some(cp), Some(bidiType), bidiNumberTypeError, bidiEndLabelValid, nextCPIndex, nextCharIndex) + loop( + errors, + Some(cp), + Some(bidiType), + bidiNumberTypeError, + bidiEndLabelValid, + nextCPIndex, + nextCharIndex) } } } @@ -454,7 +598,13 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat loop(Chain.empty, None, None, BidiNumberTypeError(false), BidiEndLabelValid(false), 0, 0) } - private def process(checkHyphens: Boolean, checkBidi: Boolean, checkJoiners: Boolean, useStd3ASCIIRules: Boolean, transitionalProcessing: Boolean, value: String): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = { + private def process( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + value: String): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = { // The bidirectional rules apply if checkBidi is true _and_ the intput is // a bidi domain name. @@ -463,7 +613,12 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat def processLabel(label: String): Ior[NonEmptyChain[IDNAException], String] = { def validateLablel(label: String): Ior[NonEmptyChain[IDNAException], String] = - NonEmptyChain.fromChain(validInternal(checkHyphens = checkHyphens, checkBidi = shouldCheckBidi, checkJoiners = checkJoiners, label)) match { + NonEmptyChain.fromChain( + validInternal( + checkHyphens = checkHyphens, + checkBidi = shouldCheckBidi, + checkJoiners = checkJoiners, + label)) match { case Some(nec) => Ior.both(nec, label) case _ => @@ -490,7 +645,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } @tailrec - def processLabels(labels: NonEmptyChain[String], acc: Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]]): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = + def processLabels( + labels: NonEmptyChain[String], + acc: Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]]) + : Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = labels.uncons match { case (label, labels) => acc.combine(processLabel(label).map(NonEmptyChain.one)) match { @@ -520,55 +678,57 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } } - Ior.fromEither( - CodePointMapper.mapCodePoints(useStd3ASCIIRules, transitionalProcessing)(value).map(nfc) - ).leftMap(NonEmptyChain.one[IDNAException]).flatMap(value => - (toLabels(value, Chain.empty).uncons match { - case (label, labels) => - processLabel(label).map(NonEmptyChain.one) match { - case acc => - NonEmptyChain.fromChain(labels) match { - case Some(labels) => - processLabels(labels, acc) - case _ => - acc - } - } - }) - ) + Ior + .fromEither( + CodePointMapper.mapCodePoints(useStd3ASCIIRules, transitionalProcessing)(value).map(nfc) + ) + .leftMap(NonEmptyChain.one[IDNAException]) + .flatMap(value => + toLabels(value, Chain.empty).uncons match { + case (label, labels) => + processLabel(label).map(NonEmptyChain.one) match { + case acc => + NonEmptyChain.fromChain(labels) match { + case Some(labels) => + processLabels(labels, acc) + case _ => + acc + } + } + }) } private def nfc(value: String): String = Normalizer.normalize(value, Normalizer.Form.NFC) - private final val FULL_STOP = '\u002e' + final private val FULL_STOP = '\u002e' - private final val PUNYCODE_PREFIX = "xn--" + final private val PUNYCODE_PREFIX = "xn--" - private final val HYPHEN_MINUS = '\u002d' + final private val HYPHEN_MINUS = '\u002d' - private final val HYPHEN_MINUS_INT = HYPHEN_MINUS.toInt + final private val HYPHEN_MINUS_INT = HYPHEN_MINUS.toInt - private final val ZERO_WIDTH_NON_JOINER = '\u200c' + final private val ZERO_WIDTH_NON_JOINER = '\u200c' - private final val ZERO_WIDTH_NON_JOINER_INT = ZERO_WIDTH_NON_JOINER.toInt + final private val ZERO_WIDTH_NON_JOINER_INT = ZERO_WIDTH_NON_JOINER.toInt - private final val ZERO_WIDTH_JOINER = '\u200d' + final private val ZERO_WIDTH_JOINER = '\u200d' - private final val ZERO_WIDTH_JOINER_INT = ZERO_WIDTH_JOINER.toInt + final private val ZERO_WIDTH_JOINER_INT = ZERO_WIDTH_JOINER.toInt - private final case class BidiNumberTypeError(value: Boolean) extends AnyVal + final private case class BidiNumberTypeError(value: Boolean) extends AnyVal - private final case class BidiEndLabelValid(value: Boolean) extends AnyVal + final private case class BidiEndLabelValid(value: Boolean) extends AnyVal - private sealed abstract class BidiRTLNumberType extends Serializable + sealed abstract private class BidiRTLNumberType extends Serializable private object BidiRTLNumberType { case object EuropeanNumber extends BidiRTLNumberType case object ArabicNumber extends BidiRTLNumberType } - private sealed abstract class BidiType extends Serializable + sealed abstract private class BidiType extends Serializable private object BidiType { case object LTR extends BidiType @@ -580,24 +740,32 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat def partiallyProcessedValue: Option[String] - override final def getMessage: String = - s"""Errors encountered during UTS-46 processing: ${errors.map(_.getLocalizedMessage).mkString_(", ")}""" + final override def getMessage: String = + s"""Errors encountered during UTS-46 processing: ${errors + .map(_.getLocalizedMessage) + .mkString_(", ")}""" - override final def toString: String = + final override def toString: String = s"UTS46FailureException(errors = ${errors})" } object UTS46FailureException { - private[this] final case class UTS46FailureExceptionImpl(override val errors: NonEmptyChain[IDNAException], override val partiallyProcessedValue: Option[String]) extends UTS46FailureException - - private[UTS46] def apply(errors: NonEmptyChain[IDNAException], partiallyProcessedValue: Option[String]): UTS46FailureException = + final private[this] case class UTS46FailureExceptionImpl( + override val errors: NonEmptyChain[IDNAException], + override val partiallyProcessedValue: Option[String]) + extends UTS46FailureException + + private[UTS46] def apply( + errors: NonEmptyChain[IDNAException], + partiallyProcessedValue: Option[String]): UTS46FailureException = UTS46FailureExceptionImpl(errors, partiallyProcessedValue) } sealed abstract class UTS46Exception extends IDNAException with NoStackTrace object UTS46Exception { - private[UTS46] case object HyphenMinusInThirdAndFourthPositionException extends UTS46Exception { + private[UTS46] case object HyphenMinusInThirdAndFourthPositionException + extends UTS46Exception { override val getMessage: String = "Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this." @@ -621,7 +789,8 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"LabelEndsWithHyphenMinusException(getLocalizedMessage = ${getLocalizedMessage})" } - private[UTS46] final case class LabelStartsWithGeneralMarkException(cp: Int) extends UTS46Exception { + final private[UTS46] case class LabelStartsWithGeneralMarkException(cp: Int) + extends UTS46Exception { private def description: String = CodePoint.descriptionFromInt(cp) @@ -648,7 +817,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"ContextJViolationForNonJoinerException(getLocalizedMessage = ${getLocalizedMessage})" } - private[UTS46] final case class InvalidBidiTypeForFirstCodePointException(codePoint: Int, bidiType: String) extends UTS46Exception { + final private[UTS46] case class InvalidBidiTypeForFirstCodePointException( + codePoint: Int, + bidiType: String) + extends UTS46Exception { override def getMessage: String = s"Invalid bidirectional type for first code point in label. Expected L, R, or AL, got ${bidiType}. Code point: ${CodePoint.descriptionFromInt(codePoint)}" @@ -656,7 +828,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"InvalidBidiTypeForFirstCodePointException(codePoint = ${codePoint}, bidiType = ${bidiType})" } - private[UTS46] final case class InvalidBidiTypeForRTLLabelException(codePoint: Int, bidiType: String) extends UTS46Exception { + final private[UTS46] case class InvalidBidiTypeForRTLLabelException( + codePoint: Int, + bidiType: String) + extends UTS46Exception { override def getMessage: String = s"In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed, but got ${bidiType} for code point: ${CodePoint.descriptionFromInt(codePoint)}" @@ -664,7 +839,10 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"InvalidBidiTypeForRTLLabelException(codePoint = ${CodePoint.descriptionFromInt(codePoint)}, bidiType = ${bidiType})" } - private[UTS46] final case class InvalidBidiTypeForLTRLabelException(codePoint: Int, bidiType: String) extends UTS46Exception { + final private[UTS46] case class InvalidBidiTypeForLTRLabelException( + codePoint: Int, + bidiType: String) + extends UTS46Exception { override def getMessage: String = s"In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or NSM are allowed, but got ${bidiType} for code point: ${CodePoint.descriptionFromInt(codePoint)}" @@ -672,7 +850,8 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"InvalidBidiTypeForLTRLabelException(codePoint = ${CodePoint.descriptionFromInt(codePoint)}, bidiType = ${bidiType})" } - private[UTS46] case object MutuallyExclusiveBidiNumberTypesException extends UTS46Exception { + private[UTS46] case object MutuallyExclusiveBidiNumberTypesException + extends UTS46Exception { override val getMessage: String = "In an RTL label, if an EN is present, no AN may be present, and vice versa, however this label has both." @@ -680,7 +859,8 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"MutuallyExclusiveBidiNumberTypesException(getLocalizedMessage = ${getLocalizedMessage})" } - private[UTS46] case object RTLLabelDidNotEndWithCorrectBidiTypeException extends UTS46Exception { + private[UTS46] case object RTLLabelDidNotEndWithCorrectBidiTypeException + extends UTS46Exception { override val getMessage: String = "In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM, but this was not the case." @@ -688,7 +868,8 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat s"RTLLabelDidNotEndWithCorrectBidiTypeException(getLocalizedMessage = ${getLocalizedMessage})" } - private[UTS46] case object LTRLabelDidNotEndWithCorrectBidiTypeException extends UTS46Exception { + private[UTS46] case object LTRLabelDidNotEndWithCorrectBidiTypeException + extends UTS46Exception { override val getMessage: String = "In an LTR label, the end of the label must be a character with Bidi property L or EN, followed by zero or more characters with Bidi property NSM, but this was not the case." @@ -697,30 +878,36 @@ object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType with Generat } private[UTS46] case object NonRootEmptyLabelException extends UTS46Exception { - override val getMessage: String = "An empty label was present but it was not the root label. This is forbidden." + override val getMessage: String = + "An empty label was present but it was not the root label. This is forbidden." override def toString: String = s"NonRootEmptyLabelException(getLocalizedMessage = ${getLocalizedMessage})" } - private[UTS46] case class LabelExceedsMaxLengthException(size: Long) extends UTS46Exception { + private[UTS46] case class LabelExceedsMaxLengthException(size: Long) + extends UTS46Exception { // TODO: Include offending label? Need to check Unicode security recommendations. - override def getMessage: String = s"A domain label is required to be between 1 and 63 characters when represented as ASCII, but got ${size}." + override def getMessage: String = + s"A domain label is required to be between 1 and 63 characters when represented as ASCII, but got ${size}." override def toString: String = s"LabelExceedsMaxLengthException(size = ${size}, getLocalizedMessage = ${getLocalizedMessage})" } - private[UTS46] case class DomainNameExceedsMaxLengthException(size: Long) extends UTS46Exception { + private[UTS46] case class DomainNameExceedsMaxLengthException(size: Long) + extends UTS46Exception { // TODO: Include offending domain? Need to check Unicode security recommendations. - override def getMessage: String = s"A domain name must be between 1 and 253 characters when represented as ASCII, but got ${size}." + override def getMessage: String = + s"A domain name must be between 1 and 253 characters when represented as ASCII, but got ${size}." override def toString: String = s"DomainNameExceedsMaxLengthException(size = ${size}, getLocalizedMessage = ${getLocalizedMessage})" } private[UTS46] case object EmptyRootLabelException extends UTS46Exception { - override val getMessage: String = "The domain ends with the empty root label. While this is a valid domain, UTS-46 forbids this notation." + override val getMessage: String = + "The domain ends with the empty root label. While this is a valid domain, UTS-46 forbids this notation." override def toString: String = s"EmptyRootLabelException(getLocalizedMessage = ${getLocalizedMessage})" diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala index 2f6b5e43..0dd0c846 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala @@ -1,38 +1,63 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.typelevel.idna4s.core.uts46 -/** Configuration object for UTS46 processing. - * - * See the member definitions for descriptions of how the affect UTS46. - * - * @see [[https://www.unicode.org/reports/tr46/#Processing]] - * @see [[https://www.unicode.org/reports/tr46/#Validity_Criteria]] - */ +/** + * Configuration object for UTS46 processing. + * + * See the member definitions for descriptions of how the affect UTS46. + * + * @see + * [[https://www.unicode.org/reports/tr46/#Processing]] + * @see + * [[https://www.unicode.org/reports/tr46/#Validity_Criteria]] + */ sealed abstract class UTS46Config extends Serializable { - /** From UTS46, section 4.1, validity criteria 2 and 3. - * - * {{{ - * If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions. - * If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character. - * }}} - * - * For example, - * - * {{{ - * scala> val inputs: List[String] = List("-a", "a-", "ab--cd") - * val inputs: List[String] = List(-a, a-, ab--cd) - * - * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(true))).foreach(println) - * Left(UTS46FailureException(errors = Chain(LabelBeginsWithHyphenMinusException(getLocalizedMessage = Label begins with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) - * Left(UTS46FailureException(errors = Chain(LabelEndsWithHyphenMinusException(getLocalizedMessage = Label ends with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) - * Left(UTS46FailureException(errors = Chain(HyphenMinusInThirdAndFourthPositionException(getLocalizedMessage = Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this.)))) - * - * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(false))).foreach(println) - * Right(-a) - * Right(a-) - * Right(ab--cd) - * }}} - */ + /** + * From UTS46, section 4.1, validity criteria 2 and 3. + * + * {{{ + * If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions. + * If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character. + * }}} + * + * For example, + * + * {{{ + * scala> val inputs: List[String] = List("-a", "a-", "ab--cd") + * val inputs: List[String] = List(-a, a-, ab--cd) + * + * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(true))).foreach(println) + * Left(UTS46FailureException(errors = Chain(LabelBeginsWithHyphenMinusException(getLocalizedMessage = Label begins with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) + * Left(UTS46FailureException(errors = Chain(LabelEndsWithHyphenMinusException(getLocalizedMessage = Label ends with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) + * Left(UTS46FailureException(errors = Chain(HyphenMinusInThirdAndFourthPositionException(getLocalizedMessage = Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this.)))) + * + * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(false))).foreach(println) + * Right(-a) + * Right(a-) + * Right(ab--cd) + * }}} + */ def checkHyphens: Boolean def checkBidi: Boolean def checkJoiners: Boolean @@ -47,7 +72,7 @@ sealed abstract class UTS46Config extends Serializable { def withTransitionalProcessing(value: Boolean): UTS46Config def withVerifyDNSLength(value: Boolean): UTS46Config - override final def toString: String = + final override def toString: String = s"UTS46Config(checkHyphens = ${checkHyphens}, checkBidi = ${checkBidi}, checkJoiners = ${checkJoiners}, useStd3ASCIIRules = ${useStd3ASCIIRules}, transitionalProcessing = ${transitionalProcessing}, verifyDnsLength = ${verifyDnsLength})" } @@ -63,7 +88,14 @@ object UTS46Config { verifyDnsLength = true ) - private[this] final case class UTS46ConfigImpl(override val checkHyphens: Boolean, override val checkBidi: Boolean, override val checkJoiners: Boolean, override val useStd3ASCIIRules: Boolean, override val transitionalProcessing: Boolean, override val verifyDnsLength: Boolean) extends UTS46Config { + final private[this] case class UTS46ConfigImpl( + override val checkHyphens: Boolean, + override val checkBidi: Boolean, + override val checkJoiners: Boolean, + override val useStd3ASCIIRules: Boolean, + override val transitionalProcessing: Boolean, + override val verifyDnsLength: Boolean) + extends UTS46Config { override def withCheckHyphens(value: Boolean): UTS46Config = copy(checkHyphens = value) override def withCheckBidi(value: Boolean): UTS46Config = @@ -79,12 +111,12 @@ object UTS46Config { } def apply( - checkHyphens: Boolean, - checkBidi: Boolean, - checkJoiners: Boolean, - useStd3ASCIIRules: Boolean, - transitionalProcessing: Boolean, - verifyDnsLength: Boolean + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + verifyDnsLength: Boolean ): UTS46Config = UTS46ConfigImpl( checkHyphens, diff --git a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala index c1e7d6c6..d2e4cdcd 100644 --- a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala +++ b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala @@ -204,11 +204,22 @@ private[scalacheck] trait ScalaCheckInstances extends Serializable { useStd3ASCIIRules <- Arbitrary.arbitrary[Boolean] transitionalProcessing <- Arbitrary.arbitrary[Boolean] verifyDnsLength <- Arbitrary.arbitrary[Boolean] - } yield UTS46Config(checkHyphens, checkBidi, checkJoiners, useStd3ASCIIRules, transitionalProcessing, verifyDnsLength) + } yield UTS46Config( + checkHyphens, + checkBidi, + checkJoiners, + useStd3ASCIIRules, + transitionalProcessing, + verifyDnsLength) ) implicit final def cogenUTS46Config: Cogen[UTS46Config] = Cogen[(Boolean, Boolean, Boolean, Boolean, Boolean, Boolean)].contramap(value => - (value.checkHyphens, value.checkBidi, value.checkJoiners, value.useStd3ASCIIRules, value.transitionalProcessing, value.verifyDnsLength) - ) + ( + value.checkHyphens, + value.checkBidi, + value.checkJoiners, + value.useStd3ASCIIRules, + value.transitionalProcessing, + value.verifyDnsLength)) } diff --git a/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala b/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala index e89e8608..a0cee60e 100644 --- a/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala +++ b/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.typelevel.idna4s.tests.uts46 import munit._ diff --git a/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala index a2f17057..a33c09c2 100644 --- a/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala +++ b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala @@ -1,6 +1,26 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.typelevel.idna4s.tests.uts46 -import cats.syntax.all._ import com.ibm.icu.text.IDNA import java.lang.StringBuilder import munit._ @@ -8,15 +28,17 @@ import org.scalacheck.Prop._ import org.scalacheck._ import org.typelevel.idna4s.core.uts46._ import org.typelevel.idna4s.scalacheck.all._ -import scala.jdk.CollectionConverters._ trait UTS46PlatformTests extends DisciplineSuite { private def configToIcu4jConfig(config: UTS46Config): Int = { - val useStd3ASCIIRules: Int = if (config.useStd3ASCIIRules) IDNA.USE_STD3_RULES else IDNA.DEFAULT + val useStd3ASCIIRules: Int = + if (config.useStd3ASCIIRules) IDNA.USE_STD3_RULES else IDNA.DEFAULT val checkBidi: Int = if (config.checkBidi) IDNA.CHECK_BIDI else IDNA.DEFAULT val checkJoiners: Int = if (config.checkJoiners) IDNA.CHECK_CONTEXTJ else IDNA.DEFAULT - val transitionalProcessing: Int = if (config.transitionalProcessing) IDNA.DEFAULT else IDNA.NONTRANSITIONAL_TO_ASCII | IDNA.NONTRANSITIONAL_TO_UNICODE + val transitionalProcessing: Int = + if (config.transitionalProcessing) IDNA.DEFAULT + else IDNA.NONTRANSITIONAL_TO_ASCII | IDNA.NONTRANSITIONAL_TO_UNICODE useStd3ASCIIRules | checkBidi | checkJoiners | transitionalProcessing } @@ -24,28 +46,37 @@ trait UTS46PlatformTests extends DisciplineSuite { private def icu4jToASCII(config: UTS46Config, value: String): (IDNA.Info, String) = { val info: IDNA.Info = new IDNA.Info() - (info, IDNA.getUTS46Instance(configToIcu4jConfig(config)).nameToASCII(value, new StringBuilder(value.size), info).toString) + ( + info, + IDNA + .getUTS46Instance(configToIcu4jConfig(config)) + .nameToASCII(value, new StringBuilder(value.size), info) + .toString) } private val genIcu4jCompatibleConfig: Gen[UTS46Config] = - Arbitrary.arbitrary[UTS46Config].map(config => - config.withCheckHyphens(true).withVerifyDNSLength(true) - ) - - property("idna4's uts46 implementation should agree with icu4j's uts46 implementation for arbitrary Strings") { - forAll(genIcu4jCompatibleConfig, Arbitrary.arbitrary[String]){(config: UTS46Config, name: String) => - val idna4stoASCIIResult: Either[UTS46.UTS46FailureException, String] = - UTS46.toASCIIRaw(config)(name) - val (icu4jInfo, icu4jToASCIIResult): (IDNA.Info, String) = - icu4jToASCII(config, name) - - idna4stoASCIIResult match { - case Left(errors) => - (icu4jInfo.hasErrors() ?= true) :| s"When idna4s UTS46 fails, so does icu4j: ${errors}." - case Right(asciiName) => - ((icu4jInfo.hasErrors() ?= false) :| s"When idna4s UTS46 passes, so does icu4j: ${icu4jInfo.getErrors()}.") && - ((asciiName ?= icu4jToASCIIResult) :| "idna4s and icu4j produce the same result.") - } + Arbitrary + .arbitrary[UTS46Config] + .map(config => config.withCheckHyphens(true).withVerifyDNSLength(true)) + + property( + "idna4's uts46 implementation should agree with icu4j's uts46 implementation for arbitrary Strings") { + forAll(genIcu4jCompatibleConfig, Arbitrary.arbitrary[String]) { + (config: UTS46Config, name: String) => + val idna4stoASCIIResult: Either[UTS46.UTS46FailureException, String] = + UTS46.toASCIIRaw(config)(name) + val (icu4jInfo, icu4jToASCIIResult): (IDNA.Info, String) = + icu4jToASCII(config, name) + + idna4stoASCIIResult match { + case Left(errors) => + (icu4jInfo + .hasErrors() ?= true) :| s"When idna4s UTS46 fails, so does icu4j: ${errors}." + case Right(asciiName) => + ((icu4jInfo + .hasErrors() ?= false) :| s"When idna4s UTS46 passes, so does icu4j: ${icu4jInfo.getErrors()}.") && + ((asciiName ?= icu4jToASCIIResult) :| "idna4s and icu4j produce the same result.") + } } } } diff --git a/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala index c989efdc..ac50214e 100644 --- a/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala +++ b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.typelevel.idna4s.tests.uts46 import munit._ From 85f102b78427bf7615c4f61e6e9a1b0c16f8d07a Mon Sep 17 00:00:00 2001 From: Michael Pilquist Date: Mon, 12 Jan 2026 21:16:44 -0500 Subject: [PATCH 6/6] Bump base version to 0.2 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 8fe4534c..bf76cf27 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ import org.typelevel.idna4s.build._ -ThisBuild / tlBaseVersion := "0.1" +ThisBuild / tlBaseVersion := "0.2" val UnicodeVersion: String = "15.0.0"