Skip to content

Commit e344d47

Browse files
committed
Remove codePointOffsetsToStringIndices and inline its logic:
Due to checks performed at its callers, one of the branches in codePointOffsetsToStringIndices was unreachable and untested. It's clearer (and likely more performant) to eliminate this method and inline specialized versions of its logic at its former callsites.
1 parent 8d5a95f commit e344d47

File tree

3 files changed

+9
-26
lines changed

3 files changed

+9
-26
lines changed

sjsonnet/src/sjsonnet/Evaluator.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ class Evaluator(
379379
val unicodeLength = str.codePointCount(0, str.length)
380380
if (int >= unicodeLength)
381381
Error.fail(s"string bounds error: $int not within [0, $unicodeLength)", pos)
382-
val (startUtf16, endUtf16) = Util.codePointOffsetsToStringIndices(str, int, int + 1)
382+
val startUtf16 = if (int == 0) 0 else str.offsetByCodePoints(0, int)
383+
val endUtf16 = str.offsetByCodePoints(startUtf16, 1)
383384
Val.Str(pos, str.substring(startUtf16, endUtf16))
384385
case (v: Val.Obj, i: Val.Str) =>
385386
v.value(i.value, pos)

sjsonnet/src/sjsonnet/Std.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,8 @@ class Std(
543543
if (safeLength <= 0) {
544544
Val.Str(pos, "")
545545
} else {
546-
val (startUtf16, endUtf16) =
547-
Util.codePointOffsetsToStringIndices(str, safeOffset, safeOffset + safeLength)
546+
val startUtf16 = if (safeOffset == 0) 0 else str.offsetByCodePoints(0, safeOffset)
547+
val endUtf16 = str.offsetByCodePoints(startUtf16, safeLength)
548548
Val.Str(pos, str.substring(startUtf16, endUtf16))
549549
}
550550
}

sjsonnet/src/sjsonnet/Util.scala

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -72,28 +72,6 @@ object Util {
7272
res: Val
7373
}
7474

75-
/**
76-
* Converts Unicode codepoint positions to Java String indices. For example, the string "🌍!" has
77-
* a length of 3 UTF-16 code units, but only 2 Unicode codepoints, so this function would map the
78-
* range (0, 2) to (0, 3).
79-
*/
80-
def codePointOffsetsToStringIndices(
81-
s: String,
82-
startCodePointOffset: Int,
83-
endCodePointOffset: Int): (Int, Int) = {
84-
val unicodeLength = s.codePointCount(0, s.length)
85-
val safeStart = math.max(0, math.min(startCodePointOffset, unicodeLength))
86-
val safeEnd = math.max(safeStart, math.min(endCodePointOffset, unicodeLength))
87-
88-
if (safeStart == safeEnd) {
89-
val utf16Pos = if (safeStart == 0) 0 else s.offsetByCodePoints(0, safeStart)
90-
(utf16Pos, utf16Pos)
91-
} else {
92-
val startUtf16 = if (safeStart == 0) 0 else s.offsetByCodePoints(0, safeStart)
93-
val endUtf16 = s.offsetByCodePoints(startUtf16, safeEnd - safeStart)
94-
(startUtf16, endUtf16)
95-
}
96-
}
9775

9876
private def sliceStr(s: String, start: Int, end: Int, step: Int): String = {
9977
val unicodeLength = s.codePointCount(0, s.length)
@@ -102,7 +80,11 @@ object Util {
10280
} else {
10381
step match {
10482
case 1 =>
105-
val (startUtf16, endUtf16) = codePointOffsetsToStringIndices(s, start, end)
83+
// Preconditions: start >= 0, start < end, start < unicodeLength
84+
val safeEnd = math.min(end, unicodeLength)
85+
val sliceLength = safeEnd - start
86+
val startUtf16 = if (start == 0) 0 else s.offsetByCodePoints(0, start)
87+
val endUtf16 = s.offsetByCodePoints(startUtf16, sliceLength)
10688
s.substring(startUtf16, endUtf16)
10789
case _ =>
10890
val result =

0 commit comments

Comments
 (0)