Skip to content

Commit 5527062

Browse files
authored
chore: Rewrite escape1 to unicodeEscape (#502)
We should check the surrogate refs: #501 A follow up of #500
1 parent 4a89bb6 commit 5527062

File tree

1 file changed

+80
-4
lines changed

1 file changed

+80
-4
lines changed

sjsonnet/src/sjsonnet/Parser.scala

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ class Parser(
117117
}
118118
})
119119

120-
def escape[$: P]: P[String] = P(escape0 | escape1)
120+
def escape[$: P]: P[String] = P(escape0 | unicodeEscape)
121121
def escape0[$: P]: P[String] = P("\\" ~~ !"u" ~~ AnyChar.!).flatMapX {
122122
case "\"" => Pass("\"")
123123
case "'" => Pass("\'")
@@ -130,9 +130,85 @@ class Parser(
130130
case "t" => Pass("\t")
131131
case s => Fail.opaque(f"Unknown escape sequence in string literal: $s")
132132
}
133-
def escape1[$: P]: P[String] = P("\\u" ~~ CharIn("0-9a-fA-F").repX(min = 4, max = 4).!).map { s =>
134-
Integer.parseInt(s, 16).toChar.toString
135-
}
133+
134+
/**
135+
* {{{
136+
* HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
137+
* }}}
138+
*/
139+
private def isHexDig(c: Char): Boolean = (c: @switch) match {
140+
case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'A' | 'B' | 'C' | 'D' | 'E' |
141+
'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' =>
142+
true
143+
case _ => false
144+
}
145+
146+
/**
147+
* {{{
148+
* non-surrogate = ((DIGIT / "A"/"B"/"C" / "E"/"F") 3HEXDIG) /
149+
* ("D" %x30-37 2HEXDIG )
150+
* }}}
151+
*/
152+
private def `non-surrogate`[$: P]: P[String] =
153+
P("\\u" ~~/ SingleChar.flatMapX { c =>
154+
(c: @switch) match {
155+
case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'A' | 'B' | 'C' | 'E' |
156+
'F' | 'a' | 'b' | 'c' | 'e' | 'f' =>
157+
Pass ~~ CharPred(isHexDig).repX(min = 3, max = 3)
158+
case 'D' | 'd' =>
159+
Pass ~~ CharIn("0-7") ~~ CharPred(isHexDig).repX(min = 2, max = 2)
160+
case _ => Fail.opaque("invalid non-surrogate")
161+
}
162+
}.!)
163+
164+
private def `surrogate-pair`[$: P]: P[(String, String)] =
165+
P(("\\u" ~~/ `high-surrogate`.!) ~~ ("\\u" ~~/ `low-surrogate`.!))
166+
167+
/**
168+
* {{{
169+
* high-surrogate = "D" ("8"/"9"/"A"/"B") 2HEXDIG
170+
* }}}
171+
*/
172+
private def `high-surrogate`[$: P]: P[Unit] =
173+
P(
174+
CharIn("Dd") ~~ CharIn("89ABab") ~~ CharPred(isHexDig)
175+
.repX(min = 2, max = 2)
176+
)
177+
178+
/**
179+
* {{{
180+
* low-surrogate = "D" ("C"/"D"/"E"/"F") 2HEXDIG
181+
* }}}
182+
*/
183+
private def `low-surrogate`[$: P]: P[Unit] =
184+
P(
185+
CharIn("Dd") ~~ CharIn("CDEFcdef") ~~ CharPred(isHexDig)
186+
.repX(min = 2, max = 2)
187+
)
188+
189+
/**
190+
* {{{
191+
* hexchar = non-surrogate /
192+
* (high-surrogate "\" %x75 low-surrogate)
193+
* non-surrogate = ((DIGIT / "A"/"B"/"C" / "E"/"F") 3HEXDIG) /
194+
* ("D" %x30-37 2HEXDIG )
195+
* high-surrogate = "D" ("8"/"9"/"A"/"B") 2HEXDIG
196+
* low-surrogate = "D" ("C"/"D"/"E"/"F") 2HEXDIG
197+
*
198+
* HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
199+
* }}}
200+
*/
201+
private def unicodeEscape[$: P]: P[String] =
202+
P(NoCut(`non-surrogate`) | NoCut(`surrogate-pair`)).map {
203+
case (high: String, low: String) =>
204+
// NODE: jsonnet support control characters
205+
val highSurrogate = Integer.parseInt(high, 16).toChar
206+
val lowSurrogate = Integer.parseInt(low, 16).toChar
207+
new String(Array[Int](Character.toCodePoint(highSurrogate, lowSurrogate)), 0, 1)
208+
case str: String => Integer.parseInt(str, 16).toChar.toString
209+
case _ => throw new IllegalArgumentException("Invalid unicode escape")
210+
}
211+
136212
def doubleString[$: P]: P[Seq[String]] =
137213
P((CharsWhile(x => x != '"' && x != '\\').! | escape).repX ~~ "\"")
138214
def singleString[$: P]: P[Seq[String]] =

0 commit comments

Comments
 (0)