@@ -117,7 +117,7 @@ class Parser(
117117 }
118118 })
119119
120- def escape [$ : P ]: P [String ] = P (escape0 | escape1 )
120+ def escape [$ : P ]: P [String ] = P (escape0 | unicodeEscape )
121121 def escape0 [$ : P ]: P [String ] = P (" \\ " ~~ ! " u" ~~ AnyChar .! ).flatMapX {
122122 case " \" " => Pass (" \" " )
123123 case " '" => Pass (" \' " )
@@ -130,9 +130,85 @@ class Parser(
130130 case " t" => Pass (" \t " )
131131 case s => Fail .opaque(f " Unknown escape sequence in string literal: $s" )
132132 }
133- def escape1 [$ : P ]: P [String ] = P (" \\ u" ~~ CharIn (" 0-9a-fA-F" ).repX(min = 4 , max = 4 ).! ).map { s =>
134- Integer .parseInt(s, 16 ).toChar.toString
135- }
133+
134+ /**
135+ * {{{
136+ * HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
137+ * }}}
138+ */
139+ private def isHexDig (c : Char ): Boolean = (c : @ switch) match {
140+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'A' | 'B' | 'C' | 'D' | 'E' |
141+ 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' =>
142+ true
143+ case _ => false
144+ }
145+
146+ /**
147+ * {{{
148+ * non-surrogate = ((DIGIT / "A"/"B"/"C" / "E"/"F") 3HEXDIG) /
149+ * ("D" %x30-37 2HEXDIG )
150+ * }}}
151+ */
152+ private def `non-surrogate` [$ : P ]: P [String ] =
153+ P (" \\ u" ~~/ SingleChar .flatMapX { c =>
154+ (c : @ switch) match {
155+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'A' | 'B' | 'C' | 'E' |
156+ 'F' | 'a' | 'b' | 'c' | 'e' | 'f' =>
157+ Pass ~~ CharPred (isHexDig).repX(min = 3 , max = 3 )
158+ case 'D' | 'd' =>
159+ Pass ~~ CharIn (" 0-7" ) ~~ CharPred (isHexDig).repX(min = 2 , max = 2 )
160+ case _ => Fail .opaque(" invalid non-surrogate" )
161+ }
162+ }.! )
163+
164+ private def `surrogate-pair` [$ : P ]: P [(String , String )] =
165+ P ((" \\ u" ~~/ `high-surrogate`.! ) ~~ (" \\ u" ~~/ `low-surrogate`.! ))
166+
167+ /**
168+ * {{{
169+ * high-surrogate = "D" ("8"/"9"/"A"/"B") 2HEXDIG
170+ * }}}
171+ */
172+ private def `high-surrogate` [$ : P ]: P [Unit ] =
173+ P (
174+ CharIn (" Dd" ) ~~ CharIn (" 89ABab" ) ~~ CharPred (isHexDig)
175+ .repX(min = 2 , max = 2 )
176+ )
177+
178+ /**
179+ * {{{
180+ * low-surrogate = "D" ("C"/"D"/"E"/"F") 2HEXDIG
181+ * }}}
182+ */
183+ private def `low-surrogate` [$ : P ]: P [Unit ] =
184+ P (
185+ CharIn (" Dd" ) ~~ CharIn (" CDEFcdef" ) ~~ CharPred (isHexDig)
186+ .repX(min = 2 , max = 2 )
187+ )
188+
189+ /**
190+ * {{{
191+ * hexchar = non-surrogate /
192+ * (high-surrogate "\" %x75 low-surrogate)
193+ * non-surrogate = ((DIGIT / "A"/"B"/"C" / "E"/"F") 3HEXDIG) /
194+ * ("D" %x30-37 2HEXDIG )
195+ * high-surrogate = "D" ("8"/"9"/"A"/"B") 2HEXDIG
196+ * low-surrogate = "D" ("C"/"D"/"E"/"F") 2HEXDIG
197+ *
198+ * HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
199+ * }}}
200+ */
201+ private def unicodeEscape [$ : P ]: P [String ] =
202+ P (NoCut (`non-surrogate`) | NoCut (`surrogate-pair`)).map {
203+ case (high : String , low : String ) =>
204+ // NODE: jsonnet support control characters
205+ val highSurrogate = Integer .parseInt(high, 16 ).toChar
206+ val lowSurrogate = Integer .parseInt(low, 16 ).toChar
207+ new String (Array [Int ](Character .toCodePoint(highSurrogate, lowSurrogate)), 0 , 1 )
208+ case str : String => Integer .parseInt(str, 16 ).toChar.toString
209+ case _ => throw new IllegalArgumentException (" Invalid unicode escape" )
210+ }
211+
136212 def doubleString [$ : P ]: P [Seq [String ]] =
137213 P ((CharsWhile (x => x != '"' && x != '\\ ' ).! | escape).repX ~~ " \" " )
138214 def singleString [$ : P ]: P [Seq [String ]] =
0 commit comments