Skip to content

Commit 4895a35

Browse files
committed
Fixed a bug parsing UTF8 encoded strings. Additionally some refactoring.
1 parent 22d2c76 commit 4895a35

File tree

1 file changed

+49
-27
lines changed

1 file changed

+49
-27
lines changed

java/src/json/ext/VectorizedEscapeScanner.java

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,70 +8,92 @@
88
import jdk.incubator.vector.VectorSpecies;
99

1010
class VectorizedEscapeScanner implements EscapeScanner {
11-
public static EscapeScanner.ScalarEscapeScanner FALLBACK = new EscapeScanner.ScalarEscapeScanner(StringEncoder.ESCAPE_TABLE);
11+
private static final VectorSpecies<Byte> SP = ByteVector.SPECIES_PREFERRED;
12+
private static final ByteVector ZERO = ByteVector.zero(SP);
13+
private static final ByteVector TWO = ByteVector.broadcast(SP, 2);
14+
private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33);
15+
private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\');
1216

1317
@Override
14-
public boolean scan(State _state) throws IOException {
15-
VectorSpecies<Byte> species = ByteVector.SPECIES_PREFERRED;
16-
17-
VectorizedState state = (VectorizedState) _state;
18+
public boolean scan(State _st) throws IOException {
19+
VectorizedState state = (VectorizedState) _st;
1820

1921
if (state.hasMatches) {
2022
if (state.mask > 0) {
21-
return nextMatch(state);
23+
// nextMatch inlined
24+
int index = Long.numberOfTrailingZeros(state.mask);
25+
state.mask &= (state.mask - 1);
26+
state.pos = state.chunkStart + index;
27+
state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]);
28+
return true;
2229
} else {
2330
state.hasMatches = false;
24-
state.pos = state.chunkStart + species.length();
31+
state.pos = state.chunkStart + state.chunkLength;
2532
}
2633
}
2734

28-
while ((state.ptr + state.pos) + species.length() < state.len) {
29-
ByteVector chunk = ByteVector.fromArray(species, state.ptrBytes, state.ptr + state.pos);
35+
while (((state.ptr + state.pos) + SP.length() < state.len)) {
36+
ByteVector chunk = ByteVector.fromArray(SP, state.ptrBytes, state.ptr + state.pos);
37+
state.chunkLength = SP.length();
3038

3139
// bytes are unsigned in java, so we need to check for negative values
3240
// to determine if we have a byte that is less than 0 (>= 128).
33-
VectorMask<Byte> nonNegative = ByteVector.zero(species).lt(chunk);
34-
35-
VectorMask<Byte> tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, ByteVector.broadcast(species, 2))
36-
.lt(ByteVector.broadcast(species, 33));
37-
38-
VectorMask<Byte> needsEscape = chunk.eq(ByteVector.broadcast(species, '\\')).or(tooLowOrDblQuote).and(nonNegative);
41+
VectorMask<Byte> negative = chunk.lt(ZERO);
42+
VectorMask<Byte> tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, TWO).lt(THIRTY_THREE).andNot(negative);
43+
VectorMask<Byte> needsEscape = chunk.eq(BACKSLASH).or(tooLowOrDblQuote);
3944
if (needsEscape.anyTrue()) {
4045
state.hasMatches = true;
4146
state.chunkStart = state.ptr + state.pos;
4247
state.mask = needsEscape.toLong();
4348

44-
return nextMatch(state);
49+
// nextMatch - inlined
50+
int index = Long.numberOfTrailingZeros(state.mask);
51+
state.mask &= (state.mask - 1);
52+
state.pos = state.chunkStart + index;
53+
state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]);
54+
55+
return true;
4556
}
4657

47-
state.pos += species.length();
58+
state.pos += SP.length();
4859
}
4960

50-
return FALLBACK.scan(state);
51-
}
61+
int remaining = state.len - (state.ptr + state.pos);
62+
for (int i=0; i<remaining; i++) {
63+
state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]);
64+
int ch_len = StringEncoder.ESCAPE_TABLE[state.ch];
65+
if (ch_len > 0) {
66+
return true;
67+
}
68+
state.pos++;
69+
}
5270

53-
private boolean nextMatch(VectorizedState state) {
54-
int index = Long.numberOfTrailingZeros(state.mask);
55-
state.mask &= (state.mask - 1);
56-
state.pos = state.chunkStart + index;
57-
return true;
71+
return false;
5872
}
5973

74+
// private boolean nextMatch(VectorizedState state) {
75+
// int index = Long.numberOfTrailingZeros(state.mask);
76+
// state.mask &= (state.mask - 1);
77+
// state.pos = state.chunkStart + index;
78+
// state.ch = Byte.toUnsignedInt(state.ptrBytes[state.ptr + state.pos]);
79+
// return true;
80+
// }
81+
6082
@Override
61-
public EscapeScanner.State createState(byte[] ptrBytes, int ptr, int len, int beg) {
83+
public State createState(byte[] ptrBytes, int ptr, int len, int beg) {
6284
VectorizedState state = new VectorizedState();
6385
state.ptrBytes = ptrBytes;
6486
state.ptr = ptr;
6587
state.len = len;
6688
state.beg = beg;
67-
state.pos = 0;
89+
state.pos = 0; // Start scanning from the beginning of the segment
6890
return state;
6991
}
7092

7193
private static class VectorizedState extends State {
7294
private long mask;
7395
private int chunkStart = 0;
74-
// private int lastMatchingIndex;
7596
private boolean hasMatches;
97+
private int chunkLength;
7698
}
7799
}

0 commit comments

Comments
 (0)