-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix comments, rename SimdjsonParser2 to SimdjsonParserWithFixPath
- Loading branch information
jimeng
authored and
Wang Xuan
committed
Oct 19, 2024
1 parent
d0c4330
commit e2f0def
Showing
6 changed files
with
324 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
src/jmh/java/org/simdjson/ParseAndSelectFixPathBenchMark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package org.simdjson; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import org.openjdk.jmh.annotations.*; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
|
||
@State(Scope.Benchmark) | ||
@BenchmarkMode(Mode.Throughput) | ||
@OutputTimeUnit(TimeUnit.SECONDS) | ||
public class ParseAndSelectFixPathBenchMark { | ||
@Param({"/twitter.json"}) | ||
String fileName; | ||
private byte[] buffer; | ||
private final SimdJsonParser simdJsonParser = new SimdJsonParser(); | ||
private final ObjectMapper jacksonObjectMapper = new ObjectMapper(); | ||
private final SimdJsonParserWithFixPath simdJsonParserWithFixPath = new SimdJsonParserWithFixPath( | ||
"statuses.0.user.default_profile", "statuses.0.user.screen_name", | ||
"statuses.0.user.name", "statuses.0.user.id", "statuses.0.user.description", | ||
"statuses.1.user.default_profile", "statuses.1.user.screen_name", | ||
"statuses.1.user.name", "statuses.1.user.id", "statuses.1.user.description"); | ||
|
||
@Setup(Level.Trial) | ||
public void setup() throws IOException { | ||
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) { | ||
buffer = is.readAllBytes(); | ||
} | ||
System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES); | ||
} | ||
|
||
@Benchmark | ||
public void parseMultiValuesForFixPaths_SimdJson() { | ||
simdJsonParser.parse(buffer, buffer.length); | ||
} | ||
|
||
@Benchmark | ||
public void parseMultiValuesForFixPaths_SimdJsonParserWithFixPath() { | ||
simdJsonParserWithFixPath.parse(buffer, buffer.length); | ||
} | ||
|
||
@Benchmark | ||
public void parseMultiValuesForFixPaths_Jackson() throws IOException { | ||
jacksonObjectMapper.readTree(buffer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
220 changes: 220 additions & 0 deletions
220
src/main/java/org/simdjson/SimdJsonParserWithFixPath.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
package org.simdjson; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import lombok.Data; | ||
import lombok.RequiredArgsConstructor; | ||
|
||
public class SimdJsonParserWithFixPath { | ||
|
||
@Data | ||
@RequiredArgsConstructor | ||
static class JsonNode { | ||
private long version = 0; | ||
private boolean isLeaf = false; | ||
private final String name; | ||
private String value = null; | ||
private JsonNode parent = null; | ||
private Map<String, JsonNode> children = new HashMap<>(); | ||
private int start = -1; | ||
private int end = -1; | ||
} | ||
|
||
private final SimdJsonParser parser; | ||
private BitIndexes bitIndexes; | ||
private final JsonNode root = new JsonNode(null); | ||
private final JsonNode[] row; | ||
private final String[] result; | ||
private final String[] emptyResult; | ||
private JsonNode ptr; | ||
private byte[] buffer; | ||
private final int expectParseCols; | ||
// every time json string is processed, currentVersion will be incremented by 1 | ||
private long currentVersion = 0; | ||
// pruning, when alreadyProcessedCols == NUM | ||
|
||
public SimdJsonParserWithFixPath(String... args) { | ||
parser = new SimdJsonParser(); | ||
expectParseCols = args.length; | ||
row = new JsonNode[expectParseCols]; | ||
result = new String[expectParseCols]; | ||
emptyResult = new String[expectParseCols]; | ||
for (int i = 0; i < args.length; i++) { | ||
emptyResult[i] = null; | ||
} | ||
for (int i = 0; i < expectParseCols; i++) { | ||
JsonNode cur = root; | ||
String[] paths = args[i].split("\\."); | ||
for (int j = 0; j < paths.length; j++) { | ||
if (!cur.getChildren().containsKey(paths[j])) { | ||
JsonNode child = new JsonNode(paths[j]); | ||
cur.getChildren().put(paths[j], child); | ||
child.setParent(cur); | ||
} | ||
cur = cur.getChildren().get(paths[j]); | ||
} | ||
cur.setLeaf(true); | ||
row[i] = cur; | ||
} | ||
|
||
} | ||
|
||
public String[] parse(byte[] buffer, int len) { | ||
this.bitIndexes = parser.buildBitIndex(buffer, len); | ||
if (buffer == null || buffer.length == 0) { | ||
return emptyResult; | ||
} | ||
this.currentVersion++; | ||
this.ptr = root; | ||
this.buffer = buffer; | ||
|
||
switch (buffer[bitIndexes.peek()]) { | ||
case '{' -> { | ||
parseMap(); | ||
} | ||
case '[' -> { | ||
parseList(); | ||
} | ||
default -> { | ||
throw new RuntimeException("invalid json format"); | ||
} | ||
} | ||
return getResult(); | ||
} | ||
|
||
private String parseValue() { | ||
int start = bitIndexes.advance(); | ||
int next = bitIndexes.peek(); | ||
String field = new String(buffer, start, next - start).trim(); | ||
if ("null".equalsIgnoreCase(field)) { | ||
return null; | ||
} | ||
// field type is string or type is decimal | ||
if (field.startsWith("\"")) { | ||
field = field.substring(1, field.length() - 1); | ||
} | ||
return field; | ||
} | ||
|
||
private void parseElement(String expectFieldName) { | ||
// if expectFieldName is null, parent is map, else is list | ||
if (expectFieldName == null) { | ||
expectFieldName = parseValue(); | ||
bitIndexes.advance(); // skip : | ||
} | ||
if (!ptr.getChildren().containsKey(expectFieldName)) { | ||
skip(false); | ||
return; | ||
} | ||
ptr = ptr.getChildren().get(expectFieldName); | ||
switch (buffer[bitIndexes.peek()]) { | ||
case '{' -> { | ||
parseMap(); | ||
} | ||
case '[' -> { | ||
parseList(); | ||
} | ||
default -> { | ||
ptr.setValue(skip(true)); | ||
ptr.setVersion(currentVersion); | ||
} | ||
} | ||
ptr = ptr.getParent(); | ||
} | ||
|
||
private void parseMap() { | ||
if (ptr.getChildren() == null) { | ||
ptr.setValue(skip(true)); | ||
ptr.setVersion(currentVersion); | ||
return; | ||
} | ||
ptr.setStart(bitIndexes.peek()); | ||
bitIndexes.advance(); | ||
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != '}') { | ||
parseElement(null); | ||
if (buffer[bitIndexes.peek()] == ',') { | ||
bitIndexes.advance(); | ||
} | ||
} | ||
ptr.setEnd(bitIndexes.peek()); | ||
if (ptr.isLeaf()) { | ||
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | ||
ptr.setVersion(currentVersion); | ||
} | ||
bitIndexes.advance(); | ||
} | ||
|
||
private void parseList() { | ||
if (ptr.getChildren() == null) { | ||
ptr.setValue(skip(true)); | ||
ptr.setVersion(currentVersion); | ||
return; | ||
} | ||
ptr.setStart(bitIndexes.peek()); | ||
bitIndexes.advance(); | ||
int i = 0; | ||
while (bitIndexes.hasNext() && buffer[bitIndexes.peek()] != ']') { | ||
parseElement("" + i); | ||
if (buffer[bitIndexes.peek()] == ',') { | ||
bitIndexes.advance(); | ||
} | ||
i++; | ||
} | ||
ptr.setEnd(bitIndexes.peek()); | ||
if (ptr.isLeaf()) { | ||
ptr.setValue(new String(buffer, ptr.getStart(), ptr.getEnd() - ptr.getStart() + 1)); | ||
ptr.setVersion(currentVersion); | ||
} | ||
bitIndexes.advance(); | ||
} | ||
|
||
private String skip(boolean retainValue) { | ||
int i = 0; | ||
int start = retainValue ? bitIndexes.peek() : 0; | ||
switch (buffer[bitIndexes.peek()]) { | ||
case '{' -> { | ||
i++; | ||
while (i > 0) { | ||
bitIndexes.advance(); | ||
if (buffer[bitIndexes.peek()] == '{') { | ||
i++; | ||
} else if (buffer[bitIndexes.peek()] == '}') { | ||
i--; | ||
} | ||
} | ||
int end = bitIndexes.peek(); | ||
bitIndexes.advance(); | ||
return retainValue ? new String(buffer, start, end - start + 1) : null; | ||
} | ||
case '[' -> { | ||
i++; | ||
while (i > 0) { | ||
bitIndexes.advance(); | ||
if (buffer[bitIndexes.peek()] == '[') { | ||
i++; | ||
} else if (buffer[bitIndexes.peek()] == ']') { | ||
i--; | ||
} | ||
} | ||
int end = bitIndexes.peek(); | ||
bitIndexes.advance(); | ||
return retainValue ? new String(buffer, start, end - start + 1) : null; | ||
} | ||
default -> { | ||
return parseValue(); | ||
} | ||
} | ||
} | ||
|
||
private String[] getResult() { | ||
for (int i = 0; i < expectParseCols; i++) { | ||
if (row[i].getVersion() < currentVersion) { | ||
result[i] = null; | ||
continue; | ||
} | ||
result[i] = row[i].getValue(); | ||
} | ||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package org.simdjson; | ||
|
||
import static org.simdjson.testutils.SimdJsonAssertions.assertThat; | ||
import static org.simdjson.testutils.TestUtils.toUtf8; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
public class JsonMultiValueParsingTest { | ||
@Test | ||
public void testParseMultiValue() { | ||
byte[] json = toUtf8("{\"field1\":{\"field2\":\"value2\",\"field3\":3},\"field4\":[\"value4\",\"value5\"],\"field5\":null}"); | ||
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("field1.field2", "field1.field3", "field4", "field4.0", "field5"); | ||
String[] result = parser.parse(json, json.length); | ||
assertThat(result[0]).isEqualTo("value2"); | ||
assertThat(result[1]).isEqualTo("3"); | ||
assertThat(result[2]).isEqualTo("[\"value4\",\"value5\"]"); | ||
assertThat(result[3]).isEqualTo("value4"); | ||
assertThat(result[4]).isEqualTo(null); | ||
} | ||
|
||
@Test | ||
public void testNonAsciiCharacters() { | ||
byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4}"); | ||
SimdJsonParserWithFixPath parser = new SimdJsonParserWithFixPath("ąćśńźż", "\\u20A9\\u0E3F", "αβγ", "😀abc😀"); | ||
// when | ||
String[] result = parser.parse(json, json.length); | ||
// then | ||
assertThat(result[0]).isEqualTo("1"); | ||
assertThat(result[1]).isEqualTo("2"); | ||
assertThat(result[2]).isEqualTo("3"); | ||
assertThat(result[3]).isEqualTo("4"); | ||
} | ||
} |