diff --git a/build.sbt b/build.sbt index 8fe4534c..bf76cf27 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ import org.typelevel.idna4s.build._ -ThisBuild / tlBaseVersion := "0.1" +ThisBuild / tlBaseVersion := "0.2" val UnicodeVersion: String = "15.0.0" diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala index 7a5da916..21931f21 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/CodePoint.scala @@ -276,6 +276,22 @@ object CodePoint extends CodePointPlatform { e.getLocalizedMessage } + /** + * A method to attempt to get a string description of an int32 value which is assumed to be a + * code point. + * + * The primary goal of this method is to create strings for error messages. We would like to + * have a rich description of the code point if possible, but in the event the int32 isn't a + * code point we don't want to fail. + */ + def descriptionFromInt(value: Int): String = + CodePoint + .fromInt(value) + .fold( + _ => s"Value is outside the domain of valid code points: ${value}", + _.toString + ) + implicit val hashAndOrderForCodePoint: Hash[CodePoint] with Order[CodePoint] = new Hash[CodePoint] with Order[CodePoint] { override def hash(x: CodePoint): Int = x.hashCode diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala index 42328129..e1051500 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/bootstring/Bootstring.scala @@ -341,6 +341,12 @@ object Bootstring { } } + def encodePunycodeRaw(value: String): Either[BootstringException, String] = + encodeRaw(BootstringParams.PunycodeParams)(value) + + def decodePunycodeRaw(value: String): Either[BootstringException, String] = + decodeRaw(BootstringParams.PunycodeParams)(value) + /** * An error which occurred during the application of the Bootstring algorithm. */ @@ -351,15 +357,6 @@ object Bootstring { object BootstringException { - private def codePointDescription(value: Int): String = - CodePoint - .fromInt(value) - .fold( - // The first case should be impossible. - _ => s"Value is outside the domain of valid code points: ${value}", - _.toString - ) - private[Bootstring] case object UnableToResizeBufferException extends BootstringException { override val getMessage: String = s"Can not resize buffer as it would exceed largest valid size ${Int.MaxValue}. What are you doing?" @@ -374,11 +371,11 @@ object Bootstring { extends BootstringException { final override def getMessage: String = - s"Input contains a non-basic code point < the initial N value. Code Point: ${codePointDescription( + s"Input contains a non-basic code point < the initial N value. Code Point: ${CodePoint.descriptionFromInt( invalidCodePoint)}, Initial N: ${initialN}." final override def toString: String = - s"InvalidNonBasicCodePointException(invalidCodePoint = ${codePointDescription( + s"InvalidNonBasicCodePointException(invalidCodePoint = ${CodePoint.descriptionFromInt( invalidCodePoint)}, initialN = ${initialN}, getMessage = ${getMessage})" } @@ -398,13 +395,13 @@ object Bootstring { final private[Bootstring] case class BasicCodePointInNonBasicSection(codePoint: Int) extends BootstringException { final override def getMessage: String = - s"Decoded a basic code point in the non-basic section of the input. All basic code points must occur in the basic section. Code point: ${codePointDescription(codePoint)}" + s"Decoded a basic code point in the non-basic section of the input. All basic code points must occur in the basic section. Code point: ${CodePoint.descriptionFromInt(codePoint)}" } final private[Bootstring] case class NonBasicCodePointInBasicSection(codePoint: Int) extends BootstringException { final override def getMessage: String = - s"Decoded a non-basic code point in the basic section of the input. All non-basic code points must occurr in the non-basic section. Code point: ${codePointDescription(codePoint)}" + s"Decoded a non-basic code point in the basic section of the input. All non-basic code points must occurr in the non-basic section. Code point: ${CodePoint.descriptionFromInt(codePoint)}" } final private[Bootstring] case class WrappedException( diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala index 244c9e49..694b12f3 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46.scala @@ -21,7 +21,896 @@ package org.typelevel.idna4s.core.uts46 +import cats.data._ +import cats.syntax.all._ +import java.text.Normalizer +import org.typelevel.idna4s.core.CodePoint +import org.typelevel.idna4s.core.IDNAException +import org.typelevel.idna4s.core.bootstring._ +import scala.annotation.tailrec +import scala.util.control.NoStackTrace + object UTS46 extends GeneratedUnicodeData with GeneratedJoiningType - with GeneratedBidirectionalClass {} + with GeneratedBidirectionalClass { + + def toASCIIRaw(config: UTS46Config)(value: String): Either[UTS46FailureException, String] = + toASCIIRaw( + checkHyphens = config.checkHyphens, + checkBidi = config.checkBidi, + checkJoiners = config.checkJoiners, + useStd3ASCIIRules = config.useStd3ASCIIRules, + transitionalProcessing = config.transitionalProcessing, + verifyDnsLength = config.verifyDnsLength + )(value) + + def toUnicodeRaw(config: UTS46Config)(value: String): Either[UTS46FailureException, String] = + toUnicodeRaw( + checkHyphens = config.checkHyphens, + checkBidi = config.checkBidi, + checkJoiners = config.checkJoiners, + useStd3ASCIIRules = config.useStd3ASCIIRules, + transitionalProcessing = config.transitionalProcessing + )(value) + + def toASCIIRaw( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + verifyDnsLength: Boolean)(value: String): Either[UTS46FailureException, String] = + process( + checkHyphens = checkHyphens, + checkBidi = checkBidi, + checkJoiners = checkJoiners, + useStd3ASCIIRules = useStd3ASCIIRules, + transitionalProcessing = transitionalProcessing, + value = value + ).flatMap(labels => + labels.nonEmptyTraverse(label => + encodeToPunycodeIfNeeded(label).fold( + e => Ior.both(NonEmptyChain(e), label), + label => Ior.right(label) + ))) + .flatMap(labels => + if (verifyDnsLength) { + NonEmptyChain + .fromChain(checkDnsLength(labels)) + .fold( + Ior.right(labels): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] + )(errors => Ior.both(errors, labels)) + } else { + Ior.right(labels) + }) + .fold( + errors => + Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], + labels => Right(labels.mkString_(FULL_STOP.toString)), + { + case (errors, labels) => + Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) + } + ) + + def toUnicodeRaw( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean)(value: String): Either[UTS46FailureException, String] = + process( + checkHyphens = checkHyphens, + checkBidi = checkBidi, + checkJoiners = checkJoiners, + useStd3ASCIIRules = useStd3ASCIIRules, + transitionalProcessing = transitionalProcessing, + value = value + ).fold( + errors => + Left(UTS46FailureException(errors, None)): Either[UTS46FailureException, String], + labels => Right(labels.mkString_(FULL_STOP.toString)), + { + case (errors, labels) => + Left(UTS46FailureException(errors, Some(labels.mkString_(FULL_STOP.toString)))) + } + ) + + // A Bidi domain name is a domain name containing at least one character + // with Bidi_Class R, AL, or AN. See [IDNA2008] RFC 5893, Section 1.4. + + private def isBidiDomainName(value: String): Boolean = { + val len: Int = value.size + @tailrec + def loop(i: Int): Boolean = + if (i >= len) { + false + } else { + val cp: Int = value.codePointAt(i) + val bidiCategory: String = + Either + .catchNonFatal( + bidiTypeForCodePointInt(cp) + ) + .fold( + e => + throw new RuntimeException( + s"""Error getting ${String.format("%04X", Array(cp))}""", + e), + identity + ) + + if (bidiCategory === "R" || bidiCategory === "AL" || bidiCategory === "AN") { + true + } else { + val nextI: Int = if (cp >= 0x10000) i + 2 else i + 1 + loop(nextI) + } + } + + loop(0) + } + + private def encodeToPunycodeIfNeeded( + label: String): Either[Bootstring.BootstringException, String] = { + val len: Int = label.size + + @tailrec + def hasNonASCIIChar(charIndex: Int): Boolean = + if (charIndex >= len) { + false + } else { + val c: Char = label.charAt(charIndex) + if (c.toInt > 127) { + true + } else { + hasNonASCIIChar(charIndex + 1) + } + } + + if (hasNonASCIIChar(0)) { + Bootstring.encodePunycodeRaw(label).map(label => s"${PUNYCODE_PREFIX}${label}") + } else { + Right(label) + } + } + + private def checkDnsLength(value: NonEmptyChain[String]): Chain[UTS46Exception] = { + // TODO: Can be optimized to one traversal + val emptyLabel: Boolean = value.last.isEmpty + val dots: Long = value.length - 1L + val totalSize: Long = value.reduceLeftTo(_.size) { + case (acc, value) => acc + value.size + } + dots + val totalSizeWithoutEmptyLabel: Long = + if (emptyLabel && value.size > 1L) { + // Subtract 1 for the empty label's dot. + totalSize - 1L + } else { + totalSize + } + val withoutEmptyLabel: Chain[String] = + if (emptyLabel) { + value.init + } else { + value.toChain + } + + val emptyLabelError: Chain[UTS46Exception] = + if (emptyLabel) { + Chain.one(UTS46Exception.EmptyRootLabelException) + } else { + Chain.empty + } + + val domainLengthError: Chain[UTS46Exception] = + if (totalSizeWithoutEmptyLabel > 253L) { + Chain.one( + UTS46Exception.DomainNameExceedsMaxLengthException(totalSizeWithoutEmptyLabel)) + } else { + Chain.empty + } + + withoutEmptyLabel.foldLeft(emptyLabelError ++ domainLengthError) { + case (errors, label) => + val len: Long = label.size.toLong + if (len < 1L) { + UTS46Exception.NonRootEmptyLabelException +: errors + } else if (len > 63L) { + UTS46Exception.LabelExceedsMaxLengthException(len) +: errors + } else { + errors + } + } + } + + private def validInternal( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + value: String): Chain[UTS46Exception] = { + val len: Int = value.length() + + // We can skip the normalization check, this is always performed by either + // toASCII or toUnicode. Thus we only need to do this for the public + // validity check method. We also don't need to check for FULL_STOP code + // points, as toASCII and toUnicode will split on them. We also do not + // need to check step 6, that is taken care of by the mapping step in + // toASCII and toUnicode. + + /* Check for validity criteria 2, HYPHEN_MINUS can not occur both positions 3 + * and 4. This should be called at most once when processing position 4 of + * the input. + */ + def checkHyphen34( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + cp: Int): Chain[UTS46Exception] = + if (cp === HYPHEN_MINUS_INT && previousCodePoint === Some(HYPHEN_MINUS_INT)) { + errors :+ UTS46Exception.HyphenMinusInThirdAndFourthPositionException + } else { + errors + } + + def checkNonJoinerPrevCodePoint(startCharIndex: Int): Boolean = + if (startCharIndex <= 0) { + false + } else { + val cp: Int = value.codePointBefore(startCharIndex) + viramaCanonicalCombiningClassCodePoints(cp) + } + + def checkNonJoinerBefore(startCharIndex: Int): Boolean = { + + @tailrec + def loop(charIndex: Int): Boolean = + if (charIndex <= 0) { + false + } else { + val cp: Int = value.codePointBefore(charIndex) + val nextIndex: Int = charIndex - (if (cp >= 0x10000) 2 else 1) + + if (isJoiningTypeL(cp) || isJoiningTypeD(cp)) { + true + } else { + if (isJoiningTypeT(cp)) { + loop(nextIndex) + } else { + false + } + } + } + + loop(startCharIndex) + } + + def checkNonJoinerAfter(startCharIndex: Int): Boolean = { + + @tailrec + def loop(charIndex: Int): Boolean = + if (charIndex >= len) { + false + } else { + val cp: Int = value.codePointAt(charIndex) + val nextIndex: Int = charIndex + (if (cp >= 0x10000) 2 else 1) + + if (charIndex === startCharIndex && cp === ZERO_WIDTH_NON_JOINER_INT) { + // skip + loop(nextIndex) + } else { + if (isJoiningTypeR(cp) || isJoiningTypeD(cp)) { + true + } else { + if (isJoiningTypeT(cp)) { + loop(nextIndex) + } else { + false + } + } + } + } + + loop(startCharIndex) + } + + def checkFirstHyphen(errors: Chain[UTS46Exception], codePoint: Int): Chain[UTS46Exception] = + if (checkHyphens && codePoint === HYPHEN_MINUS_INT) { + errors :+ UTS46Exception.LabelBeginsWithHyphenMinusException + } else { + errors + } + + def checkCombiningMark( + errors: Chain[UTS46Exception], + codePoint: Int): Chain[UTS46Exception] = + if (combiningMarkCodePoints.apply(codePoint)) { + errors :+ UTS46Exception.LabelStartsWithGeneralMarkException(codePoint) + } else { + errors + } + + def checkFirstBidi( + errors: Chain[UTS46Exception], + codePoint: Int): (Chain[UTS46Exception], Option[BidiType]) = + if (checkBidi) { + bidiTypeForCodePointInt(codePoint) match { + case "L" => + (errors, Some(BidiType.LTR)) + case "R" | "AL" => + (errors, Some(BidiType.RTL(None))) + case otherwise => + ( + errors :+ UTS46Exception.InvalidBidiTypeForFirstCodePointException( + codePoint, + otherwise), + None) + } + } else { + (errors, None) + } + + def generalBidiCheck( + errors: Chain[UTS46Exception], + bidiType: BidiType, + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid, + codePoint: Int) + : (BidiType, BidiNumberTypeError, BidiEndLabelValid, Chain[UTS46Exception]) = + bidiType match { + case BidiType.RTL(numberType) => + bidiTypeForCodePointInt(codePoint) match { + case "AN" => + numberType match { + case Some(BidiRTLNumberType.ArabicNumber) => + ( + BidiType.RTL(numberType), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) + case Some(BidiRTLNumberType.EuropeanNumber) => + // Set error + ( + BidiType.RTL(numberType), + BidiNumberTypeError(true), + BidiEndLabelValid(true), + errors) + case None => + // Set number type + ( + BidiType.RTL(Some(BidiRTLNumberType.ArabicNumber)), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) + } + case "EN" => + numberType match { + case Some(BidiRTLNumberType.ArabicNumber) => + // Set Error + ( + BidiType.RTL(numberType), + BidiNumberTypeError(true), + BidiEndLabelValid(true), + errors) + case Some(BidiRTLNumberType.EuropeanNumber) => + ( + BidiType.RTL(numberType), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) + case None => + // Set number Type + ( + BidiType.RTL(Some(BidiRTLNumberType.EuropeanNumber)), + bidiNumberTypeError, + BidiEndLabelValid(true), + errors) + } + case "R" | "AL" => + (bidiType, bidiNumberTypeError, BidiEndLabelValid(true), errors) + case "ES" | "CS" | "ET" | "ON" | "BN" => + (bidiType, bidiNumberTypeError, BidiEndLabelValid(false), errors) + case "NSM" => + (bidiType, bidiNumberTypeError, bidiEndLabelValid, errors) + case otherwise => + ( + bidiType, + bidiNumberTypeError, + BidiEndLabelValid(false), + UTS46Exception.InvalidBidiTypeForRTLLabelException( + codePoint, + otherwise) +: errors) + } + case BidiType.LTR => + bidiTypeForCodePointInt(codePoint) match { + case "L" | "EN" => + (bidiType, BidiNumberTypeError(false), BidiEndLabelValid(true), errors) + case "ES" | "CS" | "ET" | "ON" | "BN" => + (bidiType, BidiNumberTypeError(false), BidiEndLabelValid(false), errors) + case "NSM" => + (bidiType, BidiNumberTypeError(false), bidiEndLabelValid, errors) + case otherwise => + ( + bidiType, + BidiNumberTypeError(false), + bidiEndLabelValid, + UTS46Exception.InvalidBidiTypeForLTRLabelException( + codePoint, + otherwise) +: errors) + } + } + + def checkFirstCodePoint( + errors: Chain[UTS46Exception], + codePoint: Int): Chain[UTS46Exception] = + checkCombiningMark(checkFirstHyphen(errors, codePoint), codePoint) + + def positionalChecks( + errors: Chain[UTS46Exception], + codePointIndex: Int, + previousCodePoint: Option[Int], + codePoint: Int): Chain[UTS46Exception] = + codePointIndex match { + case 0 => + checkFirstCodePoint(errors, codePoint) + case 3 if checkHyphens => + checkHyphen34(errors, previousCodePoint, codePoint) + case _ => + errors + } + + def checkForJoiners( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + charIndex: Int, + codePoint: Int): Chain[UTS46Exception] = + if (checkJoiners) { + if (codePoint === ZERO_WIDTH_NON_JOINER_INT || codePoint === ZERO_WIDTH_JOINER_INT) { + if (previousCodePoint.fold(false)(viramaCanonicalCombiningClassCodePoints.apply)) { + errors + } else if (codePoint === ZERO_WIDTH_NON_JOINER_INT) { + if (checkNonJoinerPrevCodePoint(charIndex) || (checkNonJoinerBefore( + charIndex) && checkNonJoinerAfter(charIndex))) { + errors + } else { + errors :+ UTS46Exception.ContextJViolationForNonJoinerException + } + } else { + errors :+ UTS46Exception.ContextJViolationForJoinerException + } + } else { + errors + } + } else { + errors + } + + def checkFinalHyphen( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int]): Chain[UTS46Exception] = + if (checkHyphens) { + previousCodePoint.fold( + errors + ) { + case HYPHEN_MINUS_INT => + UTS46Exception.LabelEndsWithHyphenMinusException +: errors + case _ => + errors + } + } else { + errors + } + + def checkFinalBidi( + errors: Chain[UTS46Exception], + bidiType: BidiType, + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid): Chain[UTS46Exception] = + (if (bidiNumberTypeError.value) { + UTS46Exception.MutuallyExclusiveBidiNumberTypesException +: errors + } else { + errors + }) match { + case errors => + if (bidiEndLabelValid.value) { + errors + } else { + bidiType match { + case _: BidiType.LTR.type => + UTS46Exception.LTRLabelDidNotEndWithCorrectBidiTypeException +: errors + case _: BidiType.RTL => + UTS46Exception.RTLLabelDidNotEndWithCorrectBidiTypeException +: errors + } + } + } + + // TODO: Optimization, consider making multiple variants of this loop for + // each permutation of condiditon. + @tailrec + def loop( + errors: Chain[UTS46Exception], + previousCodePoint: Option[Int], + bidiType: Option[BidiType], + bidiNumberTypeError: BidiNumberTypeError, + bidiEndLabelValid: BidiEndLabelValid, + codePointIndex: Int, + charIndex: Int): Chain[UTS46Exception] = + if (charIndex >= len) { + // step 3 end check + checkFinalHyphen(errors, previousCodePoint) match { + case errors => + bidiType.fold( + errors + )(bidiType => + checkFinalBidi(errors, bidiType, bidiNumberTypeError, bidiEndLabelValid)) + } + } else { + val cp: Int = value.codePointAt(codePointIndex) + val nextCPIndex: Int = codePointIndex + 1 + val nextCharIndex: Int = charIndex + (if (cp >= 0x10000) 2 else 1) + + (if (codePointIndex === 0 && checkBidi) { + checkFirstBidi(errors, cp) + } else { + (errors, bidiType) + }) match { + // Intentional shadow + case (errors, bidiType) => + checkForJoiners( + positionalChecks(errors, codePointIndex, previousCodePoint, cp), + previousCodePoint, + charIndex, + cp + ) match { + case errors => + bidiType match { + case None => + loop( + errors, + Some(cp), + bidiType, + bidiNumberTypeError, + bidiEndLabelValid, + nextCPIndex, + nextCharIndex) + case Some(bidiType) => + generalBidiCheck( + errors, + bidiType, + bidiNumberTypeError, + bidiEndLabelValid, + cp) match { + case (bidiType, bidiNumberTypeError, bidiEndLabelValid, errors) => + loop( + errors, + Some(cp), + Some(bidiType), + bidiNumberTypeError, + bidiEndLabelValid, + nextCPIndex, + nextCharIndex) + } + } + } + } + } + + loop(Chain.empty, None, None, BidiNumberTypeError(false), BidiEndLabelValid(false), 0, 0) + } + + private def process( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + value: String): Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = { + + // The bidirectional rules apply if checkBidi is true _and_ the intput is + // a bidi domain name. + def shouldCheckBidi: Boolean = + checkBidi && isBidiDomainName(value) + + def processLabel(label: String): Ior[NonEmptyChain[IDNAException], String] = { + def validateLablel(label: String): Ior[NonEmptyChain[IDNAException], String] = + NonEmptyChain.fromChain( + validInternal( + checkHyphens = checkHyphens, + checkBidi = shouldCheckBidi, + checkJoiners = checkJoiners, + label)) match { + case Some(nec) => + Ior.both(nec, label) + case _ => + Ior.right(label) + } + + if (label.startsWith(PUNYCODE_PREFIX)) { + Bootstring.decodePunycodeRaw(label.drop(4)) match { + case Left(e) => + Ior.both(NonEmptyChain.one(e), label) + case Right(label) => + // When it is a Punycode label, we would always use + // non-transitional processing for validation according to UTS-46, + // however the validity check which requires non-transitional + // processing (validity check 6) is not necessarily if the input + // has already gone through the UTS-46 mapping step. It is only + // applicable if we are applying the validity check to an + // arbitrary string, which we never do. + validateLablel(label) + } + } else { + validateLablel(label) + } + } + + @tailrec + def processLabels( + labels: NonEmptyChain[String], + acc: Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]]) + : Ior[NonEmptyChain[IDNAException], NonEmptyChain[String]] = + labels.uncons match { + case (label, labels) => + acc.combine(processLabel(label).map(NonEmptyChain.one)) match { + // Intentional Shadow + case acc => + NonEmptyChain.fromChain(labels) match { + case Some(labels) => + processLabels(labels, acc) + case _ => + acc + } + } + } + + @tailrec + def toLabels(value: String, acc: Chain[String]): NonEmptyChain[String] = + if (value.isEmpty) { + // It is important that we don't ignore the empty label. + NonEmptyChain.fromChainAppend(acc, value) + } else { + value.span(_ =!= FULL_STOP) match { + case (label, rest) if rest.isEmpty => + NonEmptyChain.fromChainAppend(acc, label) + case (label, rest) => + // First character in rest must be '.' + toLabels(rest.tail, acc :+ label) + } + } + + Ior + .fromEither( + CodePointMapper.mapCodePoints(useStd3ASCIIRules, transitionalProcessing)(value).map(nfc) + ) + .leftMap(NonEmptyChain.one[IDNAException]) + .flatMap(value => + toLabels(value, Chain.empty).uncons match { + case (label, labels) => + processLabel(label).map(NonEmptyChain.one) match { + case acc => + NonEmptyChain.fromChain(labels) match { + case Some(labels) => + processLabels(labels, acc) + case _ => + acc + } + } + }) + } + + private def nfc(value: String): String = + Normalizer.normalize(value, Normalizer.Form.NFC) + + final private val FULL_STOP = '\u002e' + + final private val PUNYCODE_PREFIX = "xn--" + + final private val HYPHEN_MINUS = '\u002d' + + final private val HYPHEN_MINUS_INT = HYPHEN_MINUS.toInt + + final private val ZERO_WIDTH_NON_JOINER = '\u200c' + + final private val ZERO_WIDTH_NON_JOINER_INT = ZERO_WIDTH_NON_JOINER.toInt + + final private val ZERO_WIDTH_JOINER = '\u200d' + + final private val ZERO_WIDTH_JOINER_INT = ZERO_WIDTH_JOINER.toInt + + final private case class BidiNumberTypeError(value: Boolean) extends AnyVal + + final private case class BidiEndLabelValid(value: Boolean) extends AnyVal + + sealed abstract private class BidiRTLNumberType extends Serializable + + private object BidiRTLNumberType { + case object EuropeanNumber extends BidiRTLNumberType + case object ArabicNumber extends BidiRTLNumberType + } + + sealed abstract private class BidiType extends Serializable + + private object BidiType { + case object LTR extends BidiType + final case class RTL(numberType: Option[BidiRTLNumberType]) extends BidiType + } + + sealed abstract class UTS46FailureException extends IDNAException with NoStackTrace { + def errors: NonEmptyChain[IDNAException] + + def partiallyProcessedValue: Option[String] + + final override def getMessage: String = + s"""Errors encountered during UTS-46 processing: ${errors + .map(_.getLocalizedMessage) + .mkString_(", ")}""" + + final override def toString: String = + s"UTS46FailureException(errors = ${errors})" + } + + object UTS46FailureException { + final private[this] case class UTS46FailureExceptionImpl( + override val errors: NonEmptyChain[IDNAException], + override val partiallyProcessedValue: Option[String]) + extends UTS46FailureException + + private[UTS46] def apply( + errors: NonEmptyChain[IDNAException], + partiallyProcessedValue: Option[String]): UTS46FailureException = + UTS46FailureExceptionImpl(errors, partiallyProcessedValue) + } + + sealed abstract class UTS46Exception extends IDNAException with NoStackTrace + + object UTS46Exception { + private[UTS46] case object HyphenMinusInThirdAndFourthPositionException + extends UTS46Exception { + override val getMessage: String = + "Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this." + + override def toString: String = + s"HyphenMinusInThirdAndFourthPositionException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object LabelBeginsWithHyphenMinusException extends UTS46Exception { + override val getMessage: String = + "Label begins with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this." + + override def toString: String = + s"LabelBeginsWithHyphenMinusException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object LabelEndsWithHyphenMinusException extends UTS46Exception { + override val getMessage: String = + "Label ends with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this." + + override def toString: String = + s"LabelEndsWithHyphenMinusException(getLocalizedMessage = ${getLocalizedMessage})" + } + + final private[UTS46] case class LabelStartsWithGeneralMarkException(cp: Int) + extends UTS46Exception { + private def description: String = + CodePoint.descriptionFromInt(cp) + + override def getMessage: String = + s"The label starts with a code point which indicates a combining mark (General_Category=Mark in Unicode). This is forbidden by UTS-46: ${description}" + + override def toString: String = + s"LabelStartsWithGeneralMarkException(cp = ${description}, getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object ContextJViolationForNonJoinerException extends UTS46Exception { + override val getMessage: String = + "ContextJ violation found for zero width non-joiner code point 0x200c. If present in a label, it must follow a code point which has a canonical combining class of Virama or it must follow a code point with a joining type of L (Left joining) or D (Dual joining) followed by zero or more code points with a joining type of T (transparent), then 0x200c, then be have zero or more code points after with a T joining type then a code point with a joining type of R (Right joining) or D." + + override def toString: String = + s"ContextJViolationForNonJoinerException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object ContextJViolationForJoinerException extends UTS46Exception { + override val getMessage: String = + "ContextJ violation found for zero width joiner code point 0x200d. If present in a label, it must follow a code point which has a canonical combining class of Virama, but did not." + + override def toString: String = + s"ContextJViolationForNonJoinerException(getLocalizedMessage = ${getLocalizedMessage})" + } + + final private[UTS46] case class InvalidBidiTypeForFirstCodePointException( + codePoint: Int, + bidiType: String) + extends UTS46Exception { + override def getMessage: String = + s"Invalid bidirectional type for first code point in label. Expected L, R, or AL, got ${bidiType}. Code point: ${CodePoint.descriptionFromInt(codePoint)}" + + override def toString: String = + s"InvalidBidiTypeForFirstCodePointException(codePoint = ${codePoint}, bidiType = ${bidiType})" + } + + final private[UTS46] case class InvalidBidiTypeForRTLLabelException( + codePoint: Int, + bidiType: String) + extends UTS46Exception { + override def getMessage: String = + s"In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed, but got ${bidiType} for code point: ${CodePoint.descriptionFromInt(codePoint)}" + + override def toString: String = + s"InvalidBidiTypeForRTLLabelException(codePoint = ${CodePoint.descriptionFromInt(codePoint)}, bidiType = ${bidiType})" + } + + final private[UTS46] case class InvalidBidiTypeForLTRLabelException( + codePoint: Int, + bidiType: String) + extends UTS46Exception { + override def getMessage: String = + s"In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or NSM are allowed, but got ${bidiType} for code point: ${CodePoint.descriptionFromInt(codePoint)}" + + override def toString: String = + s"InvalidBidiTypeForLTRLabelException(codePoint = ${CodePoint.descriptionFromInt(codePoint)}, bidiType = ${bidiType})" + } + + private[UTS46] case object MutuallyExclusiveBidiNumberTypesException + extends UTS46Exception { + override val getMessage: String = + "In an RTL label, if an EN is present, no AN may be present, and vice versa, however this label has both." + + override def toString: String = + s"MutuallyExclusiveBidiNumberTypesException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object RTLLabelDidNotEndWithCorrectBidiTypeException + extends UTS46Exception { + override val getMessage: String = + "In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM, but this was not the case." + + override def toString: String = + s"RTLLabelDidNotEndWithCorrectBidiTypeException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object LTRLabelDidNotEndWithCorrectBidiTypeException + extends UTS46Exception { + override val getMessage: String = + "In an LTR label, the end of the label must be a character with Bidi property L or EN, followed by zero or more characters with Bidi property NSM, but this was not the case." + + override def toString: String = + s"LTRLabelDidNotEndWithCorrectBidiTypeException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object NonRootEmptyLabelException extends UTS46Exception { + override val getMessage: String = + "An empty label was present but it was not the root label. This is forbidden." + + override def toString: String = + s"NonRootEmptyLabelException(getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case class LabelExceedsMaxLengthException(size: Long) + extends UTS46Exception { + // TODO: Include offending label? Need to check Unicode security recommendations. + override def getMessage: String = + s"A domain label is required to be between 1 and 63 characters when represented as ASCII, but got ${size}." + + override def toString: String = + s"LabelExceedsMaxLengthException(size = ${size}, getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case class DomainNameExceedsMaxLengthException(size: Long) + extends UTS46Exception { + // TODO: Include offending domain? Need to check Unicode security recommendations. + override def getMessage: String = + s"A domain name must be between 1 and 253 characters when represented as ASCII, but got ${size}." + + override def toString: String = + s"DomainNameExceedsMaxLengthException(size = ${size}, getLocalizedMessage = ${getLocalizedMessage})" + } + + private[UTS46] case object EmptyRootLabelException extends UTS46Exception { + override val getMessage: String = + "The domain ends with the empty root label. While this is a valid domain, UTS-46 forbids this notation." + + override def toString: String = + s"EmptyRootLabelException(getLocalizedMessage = ${getLocalizedMessage})" + } + } +} diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala new file mode 100644 index 00000000..0dd0c846 --- /dev/null +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UTS46Config.scala @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.typelevel.idna4s.core.uts46 + +/** + * Configuration object for UTS46 processing. + * + * See the member definitions for descriptions of how the affect UTS46. + * + * @see + * [[https://www.unicode.org/reports/tr46/#Processing]] + * @see + * [[https://www.unicode.org/reports/tr46/#Validity_Criteria]] + */ +sealed abstract class UTS46Config extends Serializable { + + /** + * From UTS46, section 4.1, validity criteria 2 and 3. + * + * {{{ + * If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the third and fourth positions. + * If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character. + * }}} + * + * For example, + * + * {{{ + * scala> val inputs: List[String] = List("-a", "a-", "ab--cd") + * val inputs: List[String] = List(-a, a-, ab--cd) + * + * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(true))).foreach(println) + * Left(UTS46FailureException(errors = Chain(LabelBeginsWithHyphenMinusException(getLocalizedMessage = Label begins with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) + * Left(UTS46FailureException(errors = Chain(LabelEndsWithHyphenMinusException(getLocalizedMessage = Label ends with hyphen-minus (0x002d) and checkHyphens is on. UTS-46 forbids this.)))) + * Left(UTS46FailureException(errors = Chain(HyphenMinusInThirdAndFourthPositionException(getLocalizedMessage = Hyphen-minus (0x002d) code point found in positions 3 and 4 of label and checkHyphens is on. UTS-46 forbids this.)))) + * + * scala> inputs.map(UTS46.toASCIIRaw(config.withCheckHyphens(false))).foreach(println) + * Right(-a) + * Right(a-) + * Right(ab--cd) + * }}} + */ + def checkHyphens: Boolean + def checkBidi: Boolean + def checkJoiners: Boolean + def useStd3ASCIIRules: Boolean + def transitionalProcessing: Boolean + def verifyDnsLength: Boolean + + def withCheckHyphens(value: Boolean): UTS46Config + def withCheckBidi(value: Boolean): UTS46Config + def withCheckJoiners(value: Boolean): UTS46Config + def withUseStd3ASCIIRules(value: Boolean): UTS46Config + def withTransitionalProcessing(value: Boolean): UTS46Config + def withVerifyDNSLength(value: Boolean): UTS46Config + + final override def toString: String = + s"UTS46Config(checkHyphens = ${checkHyphens}, checkBidi = ${checkBidi}, checkJoiners = ${checkJoiners}, useStd3ASCIIRules = ${useStd3ASCIIRules}, transitionalProcessing = ${transitionalProcessing}, verifyDnsLength = ${verifyDnsLength})" +} + +object UTS46Config { + + val Strict: UTS46Config = + UTS46Config( + checkHyphens = true, + checkBidi = true, + checkJoiners = true, + useStd3ASCIIRules = true, + transitionalProcessing = false, + verifyDnsLength = true + ) + + final private[this] case class UTS46ConfigImpl( + override val checkHyphens: Boolean, + override val checkBidi: Boolean, + override val checkJoiners: Boolean, + override val useStd3ASCIIRules: Boolean, + override val transitionalProcessing: Boolean, + override val verifyDnsLength: Boolean) + extends UTS46Config { + override def withCheckHyphens(value: Boolean): UTS46Config = + copy(checkHyphens = value) + override def withCheckBidi(value: Boolean): UTS46Config = + copy(checkBidi = value) + override def withCheckJoiners(value: Boolean): UTS46Config = + copy(checkJoiners = value) + override def withUseStd3ASCIIRules(value: Boolean): UTS46Config = + copy(useStd3ASCIIRules = value) + override def withTransitionalProcessing(value: Boolean): UTS46Config = + copy(transitionalProcessing = value) + override def withVerifyDNSLength(value: Boolean): UTS46Config = + copy(verifyDnsLength = value) + } + + def apply( + checkHyphens: Boolean, + checkBidi: Boolean, + checkJoiners: Boolean, + useStd3ASCIIRules: Boolean, + transitionalProcessing: Boolean, + verifyDnsLength: Boolean + ): UTS46Config = + UTS46ConfigImpl( + checkHyphens, + checkBidi, + checkJoiners, + useStd3ASCIIRules, + transitionalProcessing, + verifyDnsLength + ) +} diff --git a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala index fd80a383..8a01af7c 100644 --- a/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala +++ b/core/shared/src/main/scala/org/typelevel/idna4s/core/uts46/UnicodeDataBase.scala @@ -21,7 +21,6 @@ package org.typelevel.idna4s.core.uts46 -import scala.collection.immutable.IntMap import cats.collections.BitSet /** @@ -67,17 +66,4 @@ private[uts46] trait UnicodeDataBase { * [[https://www.iana.org/assignments/idna-tables-12.0.0/idna-tables-12.0.0.xhtml]] */ protected def viramaCanonicalCombiningClassCodePoints: BitSet - - /** - * The bidirectional category for all Unicode code points. - * - * These are used to check the bidi (bidirectional) rules for the UTS-46 validity criteria. - * The actual rules are defined in RFC-5893 section 2. - * - * @see - * [[https://www.unicode.org/reports/tr46/#Validity_Criteria Validity_Criteria]] - * @see - * [[https://www.rfc-editor.org/rfc/rfc5893.txt]] - */ - protected def bidirectionalCategoryMap: IntMap[String] } diff --git a/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala b/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala index 8dc46bce..87c59800 100644 --- a/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala +++ b/project/src/main/scala/org/typelevel/idna4s/build/UnicodeDataCodeGen.scala @@ -71,7 +71,6 @@ object UnicodeDataCodeGen { package org.typelevel.idna4s.core.uts46 import cats.collections.BitSet -import scala.collection.immutable.IntMap private[uts46] trait ${Type.Name(GeneratedTypeName)} extends ${Init( Type.Name(BaseTypeName), @@ -79,7 +78,6 @@ private[uts46] trait ${Type.Name(GeneratedTypeName)} extends ${Init( Seq.empty)} { override final protected lazy val combiningMarkCodePoints: BitSet = $combiningMarkCodePointsRHS - ..${bidirectionalCategoryDefs(unicodeData)} ..${viramaCanonicalCombiningClassCodePointsDefs(unicodeData)} }""" } @@ -948,43 +946,6 @@ private[uts46] trait ${Type.Name(GeneratedTypeName)} extends ${Init( } } - /** - * Create the defs needed for the bidirectional information about Unicode code points. - */ - private def bidirectionalCategoryDefs( - unicodeData: UnicodeData[UnicodeCodePointInfomation]): List[Defn] = { - val categoryData: UnicodeData[BidirectionalCategory] = - unicodeData.mapValues( - _.bidirectionalCategory - ) - val (singles, ranges): ( - SortedMap[CodePointRange.Single, BidirectionalCategory], - SortedMap[CodePointRange, BidirectionalCategory]) = categoryData.partitioned - val rangeTerms: List[Term] = ranges.toList.map { - case (k, v) => - q"(Range.inclusive(${Lit.Int(k.lower.value)}, ${Lit.Int(k.upper.value)}), ${Lit.String(v.value)})" - } - val singleTerms: List[Term] = - singles.toList.map { - case (k, v) => - q"(${Lit.Int(k.lower.value)}, ${Lit.String(v.value)})" - } - val baseMap: Term = - q"IntMap(..$singleTerms)" - - List( - q"""private final def bidirectionalCategoryBaseMap: IntMap[String] = $baseMap""", - q"""override final protected lazy val bidirectionalCategoryMap: IntMap[String] = - List[(Range, String)](..$rangeTerms).foldLeft(bidirectionalCategoryBaseMap){ - case (k, (range, result)) => - range.foldLeft(k){ - case (k, cp) => - k.updated(cp, result) - } - }""" - ) - } - /** * Extract out the Unicode code points which have a canonical combining class of Virama. This * is the only class we need to know about for UTS-46. diff --git a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala index ae0e0cdd..d2e4cdcd 100644 --- a/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala +++ b/scalacheck/src/main/scala/org/typelevel/idna4s/scalacheck/ScalaCheckInstances.scala @@ -194,4 +194,32 @@ private[scalacheck] trait ScalaCheckInstances extends Serializable { .shrink(value.codePoint) .filterNot(_.isSurrogate) .map(Delimiter.unsafeFromCodePoint)) + + implicit final def arbUTS46Config: Arbitrary[UTS46Config] = + Arbitrary( + for { + checkHyphens <- Arbitrary.arbitrary[Boolean] + checkBidi <- Arbitrary.arbitrary[Boolean] + checkJoiners <- Arbitrary.arbitrary[Boolean] + useStd3ASCIIRules <- Arbitrary.arbitrary[Boolean] + transitionalProcessing <- Arbitrary.arbitrary[Boolean] + verifyDnsLength <- Arbitrary.arbitrary[Boolean] + } yield UTS46Config( + checkHyphens, + checkBidi, + checkJoiners, + useStd3ASCIIRules, + transitionalProcessing, + verifyDnsLength) + ) + + implicit final def cogenUTS46Config: Cogen[UTS46Config] = + Cogen[(Boolean, Boolean, Boolean, Boolean, Boolean, Boolean)].contramap(value => + ( + value.checkHyphens, + value.checkBidi, + value.checkJoiners, + value.useStd3ASCIIRules, + value.transitionalProcessing, + value.verifyDnsLength)) } diff --git a/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala b/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala new file mode 100644 index 00000000..a0cee60e --- /dev/null +++ b/tests/js-native/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.typelevel.idna4s.tests.uts46 + +import munit._ + +trait UTS46PlatformTests extends DisciplineSuite diff --git a/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala new file mode 100644 index 00000000..a33c09c2 --- /dev/null +++ b/tests/jvm/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46PlatformTests.scala @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.typelevel.idna4s.tests.uts46 + +import com.ibm.icu.text.IDNA +import java.lang.StringBuilder +import munit._ +import org.scalacheck.Prop._ +import org.scalacheck._ +import org.typelevel.idna4s.core.uts46._ +import org.typelevel.idna4s.scalacheck.all._ + +trait UTS46PlatformTests extends DisciplineSuite { + + private def configToIcu4jConfig(config: UTS46Config): Int = { + val useStd3ASCIIRules: Int = + if (config.useStd3ASCIIRules) IDNA.USE_STD3_RULES else IDNA.DEFAULT + val checkBidi: Int = if (config.checkBidi) IDNA.CHECK_BIDI else IDNA.DEFAULT + val checkJoiners: Int = if (config.checkJoiners) IDNA.CHECK_CONTEXTJ else IDNA.DEFAULT + val transitionalProcessing: Int = + if (config.transitionalProcessing) IDNA.DEFAULT + else IDNA.NONTRANSITIONAL_TO_ASCII | IDNA.NONTRANSITIONAL_TO_UNICODE + + useStd3ASCIIRules | checkBidi | checkJoiners | transitionalProcessing + } + + private def icu4jToASCII(config: UTS46Config, value: String): (IDNA.Info, String) = { + val info: IDNA.Info = new IDNA.Info() + + ( + info, + IDNA + .getUTS46Instance(configToIcu4jConfig(config)) + .nameToASCII(value, new StringBuilder(value.size), info) + .toString) + } + + private val genIcu4jCompatibleConfig: Gen[UTS46Config] = + Arbitrary + .arbitrary[UTS46Config] + .map(config => config.withCheckHyphens(true).withVerifyDNSLength(true)) + + property( + "idna4's uts46 implementation should agree with icu4j's uts46 implementation for arbitrary Strings") { + forAll(genIcu4jCompatibleConfig, Arbitrary.arbitrary[String]) { + (config: UTS46Config, name: String) => + val idna4stoASCIIResult: Either[UTS46.UTS46FailureException, String] = + UTS46.toASCIIRaw(config)(name) + val (icu4jInfo, icu4jToASCIIResult): (IDNA.Info, String) = + icu4jToASCII(config, name) + + idna4stoASCIIResult match { + case Left(errors) => + (icu4jInfo + .hasErrors() ?= true) :| s"When idna4s UTS46 fails, so does icu4j: ${errors}." + case Right(asciiName) => + ((icu4jInfo + .hasErrors() ?= false) :| s"When idna4s UTS46 passes, so does icu4j: ${icu4jInfo.getErrors()}.") && + ((asciiName ?= icu4jToASCIIResult) :| "idna4s and icu4j produce the same result.") + } + } + } +} diff --git a/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala new file mode 100644 index 00000000..ac50214e --- /dev/null +++ b/tests/shared/src/test/scala/org/typelevel/idna4s/tests/uts46/UTS46Tests.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2022 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.typelevel.idna4s.tests.uts46 + +import munit._ + +final class UTS46Tests extends DisciplineSuite with UTS46PlatformTests