Skip to content

Commit

Permalink
fix issue with decimals losing precision
Browse files Browse the repository at this point in the history
  • Loading branch information
Selfeer committed Oct 25, 2024
1 parent 5087a57 commit 4629a01
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions parquetify/src/main/java/GenerateParquet.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.hadoop.util.HadoopOutputFile;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
Expand Down Expand Up @@ -375,6 +376,7 @@ private static void insertDataIntoGroup(Group group, JSONArray schemaArray, int
}
}


private static void appendValueToGroup(Group group, String name, Object value) {
try {
if (value instanceof Integer) {
Expand All @@ -385,7 +387,7 @@ private static void appendValueToGroup(Group group, String name, Object value) {
// Handle UUID string by converting to 16-byte array if the field is UUID
if (isUUID(name)) {
byte[] uuidBytes = hexStringToByteArray((String) value);
group.add(name, org.apache.parquet.io.api.Binary.fromConstantByteArray(uuidBytes));
group.add(name, Binary.fromConstantByteArray(uuidBytes));
} else {
group.add(name, (String) value);
}
Expand All @@ -396,9 +398,11 @@ private static void appendValueToGroup(Group group, String name, Object value) {
} else if (value instanceof Float) {
group.add(name, (Float) value);
} else if (value instanceof BigDecimal) {
group.add(name, ((BigDecimal) value).doubleValue());
BigDecimal decimalValue = (BigDecimal) value;
byte[] bytes = decimalValue.unscaledValue().toByteArray();
group.add(name, Binary.fromConstantByteArray(bytes));
} else if (value instanceof byte[]) {
group.add(name, org.apache.parquet.io.api.Binary.fromConstantByteArray((byte[]) value));
group.add(name, Binary.fromConstantByteArray((byte[]) value));
} else if (value instanceof JSONObject) {
Group nestedGroup = group.addGroup(name);
JSONObject jsonObject = (JSONObject) value;
Expand All @@ -414,6 +418,7 @@ private static void appendValueToGroup(Group group, String name, Object value) {
}
}


private static boolean isUUID(String fieldName) {
// We add logic to identify if a particular field is a UUID
// For simplicity, let's assume field names ending with '_uuid' indicate UUID
Expand Down

0 comments on commit 4629a01

Please sign in to comment.