From 7b5b6c79cee89d53e40505f4b9b4412ecd510eaf Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 15:30:23 -0800 Subject: [PATCH 001/112] Move version to 0.15.0-SNAPSHOT --- docker/hoodie/hadoop/base/pom.xml | 2 +- docker/hoodie/hadoop/base_java11/pom.xml | 2 +- docker/hoodie/hadoop/datanode/pom.xml | 2 +- docker/hoodie/hadoop/historyserver/pom.xml | 2 +- docker/hoodie/hadoop/hive_base/pom.xml | 2 +- docker/hoodie/hadoop/namenode/pom.xml | 2 +- docker/hoodie/hadoop/pom.xml | 2 +- docker/hoodie/hadoop/prestobase/pom.xml | 2 +- docker/hoodie/hadoop/spark_base/pom.xml | 2 +- docker/hoodie/hadoop/sparkadhoc/pom.xml | 2 +- docker/hoodie/hadoop/sparkmaster/pom.xml | 2 +- docker/hoodie/hadoop/sparkworker/pom.xml | 2 +- docker/hoodie/hadoop/trinobase/pom.xml | 2 +- docker/hoodie/hadoop/trinocoordinator/pom.xml | 2 +- docker/hoodie/hadoop/trinoworker/pom.xml | 2 +- hudi-aws/pom.xml | 4 ++-- hudi-cli/pom.xml | 2 +- hudi-client/hudi-client-common/pom.xml | 4 ++-- hudi-client/hudi-flink-client/pom.xml | 4 ++-- hudi-client/hudi-java-client/pom.xml | 4 ++-- hudi-client/hudi-spark-client/pom.xml | 4 ++-- hudi-client/pom.xml | 2 +- hudi-common/pom.xml | 2 +- hudi-examples/hudi-examples-common/pom.xml | 2 +- hudi-examples/hudi-examples-flink/pom.xml | 2 +- hudi-examples/hudi-examples-java/pom.xml | 2 +- hudi-examples/hudi-examples-spark/pom.xml | 2 +- hudi-examples/pom.xml | 2 +- hudi-flink-datasource/hudi-flink/pom.xml | 4 ++-- hudi-flink-datasource/hudi-flink1.13.x/pom.xml | 4 ++-- hudi-flink-datasource/hudi-flink1.14.x/pom.xml | 4 ++-- hudi-flink-datasource/hudi-flink1.15.x/pom.xml | 4 ++-- hudi-flink-datasource/hudi-flink1.16.x/pom.xml | 4 ++-- hudi-flink-datasource/hudi-flink1.17.x/pom.xml | 4 ++-- hudi-flink-datasource/pom.xml | 4 ++-- hudi-gcp/pom.xml | 2 +- hudi-hadoop-mr/pom.xml | 2 +- hudi-integ-test/pom.xml | 2 +- hudi-kafka-connect/pom.xml | 4 ++-- .../hudi-metaserver/hudi-metaserver-client/pom.xml | 2 +- .../hudi-metaserver/hudi-metaserver-server/pom.xml | 2 +- hudi-platform-service/hudi-metaserver/pom.xml | 4 ++-- hudi-platform-service/pom.xml | 2 +- hudi-spark-datasource/hudi-spark-common/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark2-common/pom.xml | 2 +- hudi-spark-datasource/hudi-spark2/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark3-common/pom.xml | 2 +- hudi-spark-datasource/hudi-spark3.0.x/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark3.1.x/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark3.2.x/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml | 2 +- hudi-spark-datasource/hudi-spark3.3.x/pom.xml | 4 ++-- hudi-spark-datasource/hudi-spark3.4.x/pom.xml | 4 ++-- hudi-spark-datasource/pom.xml | 2 +- hudi-sync/hudi-adb-sync/pom.xml | 2 +- hudi-sync/hudi-datahub-sync/pom.xml | 2 +- hudi-sync/hudi-hive-sync/pom.xml | 2 +- hudi-sync/hudi-sync-common/pom.xml | 2 +- hudi-sync/pom.xml | 2 +- hudi-tests-common/pom.xml | 2 +- hudi-timeline-service/pom.xml | 2 +- hudi-utilities/pom.xml | 2 +- packaging/hudi-aws-bundle/pom.xml | 2 +- packaging/hudi-cli-bundle/pom.xml | 2 +- packaging/hudi-datahub-sync-bundle/pom.xml | 2 +- packaging/hudi-flink-bundle/pom.xml | 2 +- packaging/hudi-gcp-bundle/pom.xml | 2 +- packaging/hudi-hadoop-mr-bundle/pom.xml | 2 +- packaging/hudi-hive-sync-bundle/pom.xml | 2 +- packaging/hudi-integ-test-bundle/pom.xml | 2 +- packaging/hudi-kafka-connect-bundle/pom.xml | 2 +- packaging/hudi-metaserver-server-bundle/pom.xml | 2 +- packaging/hudi-presto-bundle/pom.xml | 2 +- packaging/hudi-spark-bundle/pom.xml | 2 +- packaging/hudi-timeline-server-bundle/pom.xml | 2 +- packaging/hudi-trino-bundle/pom.xml | 2 +- packaging/hudi-utilities-bundle/pom.xml | 2 +- packaging/hudi-utilities-slim-bundle/pom.xml | 2 +- pom.xml | 2 +- 80 files changed, 102 insertions(+), 102 deletions(-) diff --git a/docker/hoodie/hadoop/base/pom.xml b/docker/hoodie/hadoop/base/pom.xml index dbaa7b0ebdf1..29693c5c696c 100644 --- a/docker/hoodie/hadoop/base/pom.xml +++ b/docker/hoodie/hadoop/base/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/base_java11/pom.xml b/docker/hoodie/hadoop/base_java11/pom.xml index 74da8b664c6b..a1332b6efcd7 100644 --- a/docker/hoodie/hadoop/base_java11/pom.xml +++ b/docker/hoodie/hadoop/base_java11/pom.xml @@ -20,7 +20,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/datanode/pom.xml b/docker/hoodie/hadoop/datanode/pom.xml index 3eb79ad2f685..ede16a4cc3f1 100644 --- a/docker/hoodie/hadoop/datanode/pom.xml +++ b/docker/hoodie/hadoop/datanode/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/historyserver/pom.xml b/docker/hoodie/hadoop/historyserver/pom.xml index 7759cd17dc6c..6acbdcf0d7ee 100644 --- a/docker/hoodie/hadoop/historyserver/pom.xml +++ b/docker/hoodie/hadoop/historyserver/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/hive_base/pom.xml b/docker/hoodie/hadoop/hive_base/pom.xml index 34b2af004663..aea9a9fdc57c 100644 --- a/docker/hoodie/hadoop/hive_base/pom.xml +++ b/docker/hoodie/hadoop/hive_base/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/namenode/pom.xml b/docker/hoodie/hadoop/namenode/pom.xml index c15d0a7bf6f9..3970b7b7f4b5 100644 --- a/docker/hoodie/hadoop/namenode/pom.xml +++ b/docker/hoodie/hadoop/namenode/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml index 7f4d5b0a09da..e87caac03c3e 100644 --- a/docker/hoodie/hadoop/pom.xml +++ b/docker/hoodie/hadoop/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../../pom.xml 4.0.0 diff --git a/docker/hoodie/hadoop/prestobase/pom.xml b/docker/hoodie/hadoop/prestobase/pom.xml index d73d9b1c90d2..e3aa7b5dcc98 100644 --- a/docker/hoodie/hadoop/prestobase/pom.xml +++ b/docker/hoodie/hadoop/prestobase/pom.xml @@ -20,7 +20,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/spark_base/pom.xml b/docker/hoodie/hadoop/spark_base/pom.xml index 860691a4e7c1..92c5b4aabef6 100644 --- a/docker/hoodie/hadoop/spark_base/pom.xml +++ b/docker/hoodie/hadoop/spark_base/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/sparkadhoc/pom.xml b/docker/hoodie/hadoop/sparkadhoc/pom.xml index eb47925e7ffa..458ca361fcdb 100644 --- a/docker/hoodie/hadoop/sparkadhoc/pom.xml +++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/sparkmaster/pom.xml b/docker/hoodie/hadoop/sparkmaster/pom.xml index a1a2850fce77..29de94f82d1c 100644 --- a/docker/hoodie/hadoop/sparkmaster/pom.xml +++ b/docker/hoodie/hadoop/sparkmaster/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/sparkworker/pom.xml b/docker/hoodie/hadoop/sparkworker/pom.xml index fda09bd14ce3..43ab9635626f 100644 --- a/docker/hoodie/hadoop/sparkworker/pom.xml +++ b/docker/hoodie/hadoop/sparkworker/pom.xml @@ -19,7 +19,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/trinobase/pom.xml b/docker/hoodie/hadoop/trinobase/pom.xml index 01a3bbe9c04a..49c234b52393 100644 --- a/docker/hoodie/hadoop/trinobase/pom.xml +++ b/docker/hoodie/hadoop/trinobase/pom.xml @@ -22,7 +22,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/trinocoordinator/pom.xml b/docker/hoodie/hadoop/trinocoordinator/pom.xml index 73469f4ed394..43656ba1df11 100644 --- a/docker/hoodie/hadoop/trinocoordinator/pom.xml +++ b/docker/hoodie/hadoop/trinocoordinator/pom.xml @@ -22,7 +22,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/docker/hoodie/hadoop/trinoworker/pom.xml b/docker/hoodie/hadoop/trinoworker/pom.xml index 57a757b6d98c..4bcf0a18cb56 100644 --- a/docker/hoodie/hadoop/trinoworker/pom.xml +++ b/docker/hoodie/hadoop/trinoworker/pom.xml @@ -22,7 +22,7 @@ hudi-hadoop-docker org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 pom diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml index d32450791da6..9768a4f56235 100644 --- a/hudi-aws/pom.xml +++ b/hudi-aws/pom.xml @@ -19,12 +19,12 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-aws - 0.14.1 + 0.15.0-SNAPSHOT hudi-aws jar diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml index 205e523315bc..8a6875a9df46 100644 --- a/hudi-cli/pom.xml +++ b/hudi-cli/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml index 8c5d6cde7191..c21553158a83 100644 --- a/hudi-client/hudi-client-common/pom.xml +++ b/hudi-client/hudi-client-common/pom.xml @@ -20,12 +20,12 @@ hudi-client org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-client-common - 0.14.1 + 0.15.0-SNAPSHOT hudi-client-common jar diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml index 1c60b37194bc..96b2477236d2 100644 --- a/hudi-client/hudi-flink-client/pom.xml +++ b/hudi-client/hudi-flink-client/pom.xml @@ -20,12 +20,12 @@ hudi-client org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink-client - 0.14.1 + 0.15.0-SNAPSHOT hudi-flink-client jar diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml index 5bd82367367b..594b4227f9af 100644 --- a/hudi-client/hudi-java-client/pom.xml +++ b/hudi-client/hudi-java-client/pom.xml @@ -19,12 +19,12 @@ hudi-client org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-java-client - 0.14.1 + 0.15.0-SNAPSHOT hudi-java-client jar diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index 79eaf2a78639..7cdef39ca278 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -19,12 +19,12 @@ hudi-client org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark-client - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark-client jar diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml index 46706df54545..5191fa15aebb 100644 --- a/hudi-client/pom.xml +++ b/hudi-client/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 9085999c2ca4..5f59a9fac298 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-examples/hudi-examples-common/pom.xml b/hudi-examples/hudi-examples-common/pom.xml index d9dd2e3c307a..ff627329fe33 100644 --- a/hudi-examples/hudi-examples-common/pom.xml +++ b/hudi-examples/hudi-examples-common/pom.xml @@ -21,7 +21,7 @@ hudi-examples org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-examples/hudi-examples-flink/pom.xml b/hudi-examples/hudi-examples-flink/pom.xml index 8e0f49b42204..7faa27e55908 100644 --- a/hudi-examples/hudi-examples-flink/pom.xml +++ b/hudi-examples/hudi-examples-flink/pom.xml @@ -21,7 +21,7 @@ hudi-examples org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-examples/hudi-examples-java/pom.xml b/hudi-examples/hudi-examples-java/pom.xml index 1788acb904f6..a385fb0e62f2 100644 --- a/hudi-examples/hudi-examples-java/pom.xml +++ b/hudi-examples/hudi-examples-java/pom.xml @@ -21,7 +21,7 @@ hudi-examples org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-examples/hudi-examples-spark/pom.xml b/hudi-examples/hudi-examples-spark/pom.xml index 116bb3e07081..1dddacb83fa2 100644 --- a/hudi-examples/hudi-examples-spark/pom.xml +++ b/hudi-examples/hudi-examples-spark/pom.xml @@ -21,7 +21,7 @@ hudi-examples org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml index a2724c09c057..f4671239d9f8 100644 --- a/hudi-examples/pom.xml +++ b/hudi-examples/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-flink-datasource/hudi-flink/pom.xml b/hudi-flink-datasource/hudi-flink/pom.xml index c390f448c029..5ba86552cd2e 100644 --- a/hudi-flink-datasource/hudi-flink/pom.xml +++ b/hudi-flink-datasource/hudi-flink/pom.xml @@ -22,12 +22,12 @@ hudi-flink-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml index d1ba72c6439f..3dd876dd20af 100644 --- a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml +++ b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml @@ -20,12 +20,12 @@ hudi-flink-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink1.13.x - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-flink-datasource/hudi-flink1.14.x/pom.xml b/hudi-flink-datasource/hudi-flink1.14.x/pom.xml index 291dbbafd755..aaa536b2041c 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/pom.xml +++ b/hudi-flink-datasource/hudi-flink1.14.x/pom.xml @@ -20,12 +20,12 @@ hudi-flink-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink1.14.x - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-flink-datasource/hudi-flink1.15.x/pom.xml b/hudi-flink-datasource/hudi-flink1.15.x/pom.xml index 84b8a6124ca8..33b1075f1348 100644 --- a/hudi-flink-datasource/hudi-flink1.15.x/pom.xml +++ b/hudi-flink-datasource/hudi-flink1.15.x/pom.xml @@ -20,12 +20,12 @@ hudi-flink-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink1.15.x - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-flink-datasource/hudi-flink1.16.x/pom.xml b/hudi-flink-datasource/hudi-flink1.16.x/pom.xml index 5f66265a09ab..097071aaeb26 100644 --- a/hudi-flink-datasource/hudi-flink1.16.x/pom.xml +++ b/hudi-flink-datasource/hudi-flink1.16.x/pom.xml @@ -20,12 +20,12 @@ hudi-flink-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink1.16.x - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-flink-datasource/hudi-flink1.17.x/pom.xml b/hudi-flink-datasource/hudi-flink1.17.x/pom.xml index e966fc400c44..ecfd84e0d070 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/pom.xml +++ b/hudi-flink-datasource/hudi-flink1.17.x/pom.xml @@ -20,12 +20,12 @@ hudi-flink-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink1.17.x - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-flink-datasource/pom.xml b/hudi-flink-datasource/pom.xml index a81a0680af90..e3f8c55b2868 100644 --- a/hudi-flink-datasource/pom.xml +++ b/hudi-flink-datasource/pom.xml @@ -20,12 +20,12 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-flink-datasource - 0.14.1 + 0.15.0-SNAPSHOT pom diff --git a/hudi-gcp/pom.xml b/hudi-gcp/pom.xml index a70e58b8cb7a..5f67569b8d23 100644 --- a/hudi-gcp/pom.xml +++ b/hudi-gcp/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../pom.xml diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml index 8757aa2bc750..2b0ffd90fef9 100644 --- a/hudi-hadoop-mr/pom.xml +++ b/hudi-hadoop-mr/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml index 79bdab9c28ad..64ed135fba07 100644 --- a/hudi-integ-test/pom.xml +++ b/hudi-integ-test/pom.xml @@ -21,7 +21,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../pom.xml hudi-integ-test diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml index 130aa66345e3..9d412cd91ad4 100644 --- a/hudi-kafka-connect/pom.xml +++ b/hudi-kafka-connect/pom.xml @@ -19,13 +19,13 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-kafka-connect Kafka Connect Sink Connector for Hudi - 0.14.1 + 0.15.0-SNAPSHOT jar diff --git a/hudi-platform-service/hudi-metaserver/hudi-metaserver-client/pom.xml b/hudi-platform-service/hudi-metaserver/hudi-metaserver-client/pom.xml index 33bf3d6b1bce..539496a8909b 100644 --- a/hudi-platform-service/hudi-metaserver/hudi-metaserver-client/pom.xml +++ b/hudi-platform-service/hudi-metaserver/hudi-metaserver-client/pom.xml @@ -21,7 +21,7 @@ hudi-metaserver org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml index d593eae75eaa..10ac5be853a0 100644 --- a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml +++ b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml @@ -21,7 +21,7 @@ hudi-metaserver org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-platform-service/hudi-metaserver/pom.xml b/hudi-platform-service/hudi-metaserver/pom.xml index a153101debb2..a84dcd9e8ffc 100644 --- a/hudi-platform-service/hudi-metaserver/pom.xml +++ b/hudi-platform-service/hudi-metaserver/pom.xml @@ -20,12 +20,12 @@ hudi-platform-service org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-metaserver - 0.14.1 + 0.15.0-SNAPSHOT hudi-metaserver pom diff --git a/hudi-platform-service/pom.xml b/hudi-platform-service/pom.xml index 9081fc0e5d08..30722fec0565 100644 --- a/hudi-platform-service/pom.xml +++ b/hudi-platform-service/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-spark-datasource/hudi-spark-common/pom.xml b/hudi-spark-datasource/hudi-spark-common/pom.xml index 8e3c1b5259ba..7a0930e13407 100644 --- a/hudi-spark-datasource/hudi-spark-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark-common/pom.xml @@ -17,12 +17,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark-common_${scala.binary.version} - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark-common_${scala.binary.version} jar diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml index b7ff77f2697e..87311926be12 100644 --- a/hudi-spark-datasource/hudi-spark/pom.xml +++ b/hudi-spark-datasource/hudi-spark/pom.xml @@ -19,12 +19,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark_${scala.binary.version} - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark_${scala.binary.version} jar diff --git a/hudi-spark-datasource/hudi-spark2-common/pom.xml b/hudi-spark-datasource/hudi-spark2-common/pom.xml index b39f5feeb670..5eb0e52bc186 100644 --- a/hudi-spark-datasource/hudi-spark2-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark2-common/pom.xml @@ -21,7 +21,7 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml index 1e497d79c462..636713ef269f 100644 --- a/hudi-spark-datasource/hudi-spark2/pom.xml +++ b/hudi-spark-datasource/hudi-spark2/pom.xml @@ -17,12 +17,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark2_${scala.binary.version} - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark2_${scala.binary.version} jar diff --git a/hudi-spark-datasource/hudi-spark3-common/pom.xml b/hudi-spark-datasource/hudi-spark3-common/pom.xml index 666ba86ff1b1..83619b3f19a2 100644 --- a/hudi-spark-datasource/hudi-spark3-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark3-common/pom.xml @@ -21,7 +21,7 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-spark-datasource/hudi-spark3.0.x/pom.xml b/hudi-spark-datasource/hudi-spark3.0.x/pom.xml index 0507a938beab..2035653a141a 100644 --- a/hudi-spark-datasource/hudi-spark3.0.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.0.x/pom.xml @@ -17,12 +17,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark3.0.x_2.12 - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark3.0.x_2.12 jar diff --git a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml index 32d487baea82..42c7ff0dcaf1 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml @@ -17,12 +17,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark3.1.x_2.12 - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark3.1.x_2.12 jar diff --git a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml index cd906ab3a5e5..70dbc0d47757 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml @@ -18,12 +18,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark3.2.x_2.12 - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark3.2.x_2.12 jar diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml b/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml index afafbd608409..e9e90c57a2f7 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.2plus-common/pom.xml @@ -21,7 +21,7 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml index 8ed998cf3dae..ae3477f2e49b 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml @@ -17,12 +17,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark3.3.x_2.12 - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark3.3.x_2.12 jar diff --git a/hudi-spark-datasource/hudi-spark3.4.x/pom.xml b/hudi-spark-datasource/hudi-spark3.4.x/pom.xml index c0f94e8bacad..92f63cacb96f 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.4.x/pom.xml @@ -17,12 +17,12 @@ hudi-spark-datasource org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 hudi-spark3.4.x_2.12 - 0.14.1 + 0.15.0-SNAPSHOT hudi-spark3.4.x_2.12 jar diff --git a/hudi-spark-datasource/pom.xml b/hudi-spark-datasource/pom.xml index dbf68b5c92f1..daa6ca8e199d 100644 --- a/hudi-spark-datasource/pom.xml +++ b/hudi-spark-datasource/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-sync/hudi-adb-sync/pom.xml b/hudi-sync/hudi-adb-sync/pom.xml index 356425987daf..df881c2e5e9f 100644 --- a/hudi-sync/hudi-adb-sync/pom.xml +++ b/hudi-sync/hudi-adb-sync/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml diff --git a/hudi-sync/hudi-datahub-sync/pom.xml b/hudi-sync/hudi-datahub-sync/pom.xml index bbb81b5f0148..558b0b957501 100644 --- a/hudi-sync/hudi-datahub-sync/pom.xml +++ b/hudi-sync/hudi-datahub-sync/pom.xml @@ -24,7 +24,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml index ee60b9b53638..69aa590bf2d2 100644 --- a/hudi-sync/hudi-hive-sync/pom.xml +++ b/hudi-sync/hudi-hive-sync/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml index eb6b585c6d65..82d4152ed234 100644 --- a/hudi-sync/hudi-sync-common/pom.xml +++ b/hudi-sync/hudi-sync-common/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/hudi-sync/pom.xml b/hudi-sync/pom.xml index 9c2b3a96378c..2db9a64648fa 100644 --- a/hudi-sync/pom.xml +++ b/hudi-sync/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-tests-common/pom.xml b/hudi-tests-common/pom.xml index 99758195c878..7b8ffad225d1 100644 --- a/hudi-tests-common/pom.xml +++ b/hudi-tests-common/pom.xml @@ -18,7 +18,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml index 7ee4945182ff..c6dd0b72f615 100644 --- a/hudi-timeline-service/pom.xml +++ b/hudi-timeline-service/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml index a8c0c6f24fe8..de444a8cceee 100644 --- a/hudi-utilities/pom.xml +++ b/hudi-utilities/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT 4.0.0 diff --git a/packaging/hudi-aws-bundle/pom.xml b/packaging/hudi-aws-bundle/pom.xml index 9fbad5aff828..74c12c2bb945 100644 --- a/packaging/hudi-aws-bundle/pom.xml +++ b/packaging/hudi-aws-bundle/pom.xml @@ -24,7 +24,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-cli-bundle/pom.xml b/packaging/hudi-cli-bundle/pom.xml index 45d8f8fd54f4..4fc98d0f74a4 100644 --- a/packaging/hudi-cli-bundle/pom.xml +++ b/packaging/hudi-cli-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-datahub-sync-bundle/pom.xml b/packaging/hudi-datahub-sync-bundle/pom.xml index d9e1b11a1b56..34b931b316ec 100644 --- a/packaging/hudi-datahub-sync-bundle/pom.xml +++ b/packaging/hudi-datahub-sync-bundle/pom.xml @@ -21,7 +21,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index e8a8dbbb8c99..1d15f1b1d99b 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -21,7 +21,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-gcp-bundle/pom.xml b/packaging/hudi-gcp-bundle/pom.xml index 69473b27babb..112f6f4c96d2 100644 --- a/packaging/hudi-gcp-bundle/pom.xml +++ b/packaging/hudi-gcp-bundle/pom.xml @@ -22,7 +22,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index 9b1f42781cda..8c9dc5f9a157 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml index 875054317a32..0567e3d7a3f6 100644 --- a/packaging/hudi-hive-sync-bundle/pom.xml +++ b/packaging/hudi-hive-sync-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index 3a69519b8f25..c0abd00e7ab3 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -17,7 +17,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml index 34d1845de12c..da9ecb0f2c41 100644 --- a/packaging/hudi-kafka-connect-bundle/pom.xml +++ b/packaging/hudi-kafka-connect-bundle/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-metaserver-server-bundle/pom.xml b/packaging/hudi-metaserver-server-bundle/pom.xml index 656a03dd62f9..d3f205233016 100644 --- a/packaging/hudi-metaserver-server-bundle/pom.xml +++ b/packaging/hudi-metaserver-server-bundle/pom.xml @@ -21,7 +21,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml index 267dab041e45..2324cf32a058 100644 --- a/packaging/hudi-presto-bundle/pom.xml +++ b/packaging/hudi-presto-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 93e52ace8650..5752703c7a97 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-timeline-server-bundle/pom.xml b/packaging/hudi-timeline-server-bundle/pom.xml index cadb1e328ae5..4ef131174071 100644 --- a/packaging/hudi-timeline-server-bundle/pom.xml +++ b/packaging/hudi-timeline-server-bundle/pom.xml @@ -21,7 +21,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml index 1eeecfe0c1cf..30e17b6deff7 100644 --- a/packaging/hudi-trino-bundle/pom.xml +++ b/packaging/hudi-trino-bundle/pom.xml @@ -20,7 +20,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 382822877ab8..c4d8f798ad6e 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml index 141e4b23e78c..e70e94cbaf51 100644 --- a/packaging/hudi-utilities-slim-bundle/pom.xml +++ b/packaging/hudi-utilities-slim-bundle/pom.xml @@ -19,7 +19,7 @@ hudi org.apache.hudi - 0.14.1 + 0.15.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/pom.xml b/pom.xml index a04e4c1d0eab..fd59bd06959f 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ org.apache.hudi hudi pom - 0.14.1 + 0.15.0-SNAPSHOT Apache Hudi brings stream style processing on big data https://github.com/apache/hudi Hudi From 6f25f414abdf167cb4c02dae391382f6e45106db Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Thu, 22 Feb 2024 18:55:00 -0800 Subject: [PATCH 002/112] [HUDI-6825] Use UTF_8 to encode String to byte array in all places (#9634) Unify the encoding of Java `String` to byte array in Hudi, especially for writing bytes to the storage, by using `UTF_8` encoding only. --------- Co-authored-by: Sagar Sumit --- .../hudi/cli/commands/TableCommand.java | 3 +- .../integ/ITTestHDFSParquetImportCommand.java | 10 +++--- .../HoodieTestCommitMetadataGenerator.java | 4 +-- .../client/BaseHoodieTableServiceClient.java | 4 +-- .../hudi/client/BaseHoodieWriteClient.java | 6 ++-- .../bucket/ConsistentBucketIndexUtils.java | 3 +- .../commit/BaseCommitActionExecutor.java | 4 +-- .../table/action/compact/CompactHelpers.java | 7 +++-- .../hudi/HoodieTestCommitGenerator.java | 4 +-- .../storage/TestHoodieHFileReaderWriter.java | 25 ++++++++------- .../client/HoodieFlinkTableServiceClient.java | 5 +-- .../row/HoodieRowDataParquetWriteSupport.java | 4 +-- .../commit/BaseFlinkCommitActionExecutor.java | 5 +-- .../commit/BaseJavaCommitActionExecutor.java | 5 +-- .../HoodieJavaClientTestHarness.java | 3 +- .../utils/SparkInternalSchemaConverter.java | 14 +++++---- .../SparkBootstrapCommitActionExecutor.java | 4 +-- .../commit/BaseSparkCommitActionExecutor.java | 6 ++-- .../hudi/io/TestHoodieTimelineArchiver.java | 6 ++-- .../action/commit/TestUpsertPartitioner.java | 14 ++++----- .../hudi/testutils/HoodieCleanerTestBase.java | 4 +-- .../hudi/testutils/HoodieClientTestUtils.java | 3 +- .../hudi/avro/GenericAvroSerializer.java | 6 ++-- .../org/apache/hudi/avro/HoodieAvroUtils.java | 4 +-- .../hudi/avro/HoodieAvroWriteSupport.java | 4 +-- .../hudi/avro/MercifulJsonConverter.java | 4 ++- .../apache/hudi/common/HoodieJsonPayload.java | 4 ++- .../HoodieDynamicBoundedBloomFilter.java | 7 +++-- .../hudi/common/bloom/SimpleBloomFilter.java | 9 +++--- .../bootstrap/index/HFileBootstrapIndex.java | 4 ++- .../HoodieConsistentHashingMetadata.java | 6 ++-- .../common/model/HoodiePartitionMetadata.java | 4 ++- .../hudi/common/table/HoodieTableConfig.java | 4 +-- .../table/log/block/HoodieAvroDataBlock.java | 3 +- .../table/log/block/HoodieHFileDataBlock.java | 5 +-- .../table/log/block/HoodieLogBlock.java | 3 +- .../table/timeline/HoodieDefaultTimeline.java | 3 +- .../apache/hudi/common/util/AvroOrcUtils.java | 8 ++--- .../hudi/common/util/Base64CodecUtil.java | 4 ++- .../apache/hudi/common/util/BinaryUtil.java | 5 +-- .../apache/hudi/common/util/NumericUtils.java | 5 +-- .../common/util/collection/RocksDBDAO.java | 23 ++++++++------ .../apache/hudi/common/util/hash/HashID.java | 7 +++-- ...FileBasedInternalSchemaStorageManager.java | 3 +- .../io/storage/HoodieAvroHFileReader.java | 21 +++++++------ .../io/storage/HoodieAvroHFileWriter.java | 26 +++++++++------- .../hudi/io/storage/HoodieAvroOrcWriter.java | 26 +++++++++------- .../metadata/HoodieTableMetadataUtil.java | 5 +-- .../apache/hudi/avro/TestHoodieAvroUtils.java | 3 +- .../fs/TestHoodieWrapperFileSystem.java | 6 ++-- .../TestInLineFileSystemHFileInLining.java | 3 +- .../functional/TestHoodieLogFormat.java | 15 ++++----- .../TestPostgresDebeziumAvroPayload.java | 9 +++--- .../table/TestHoodieTableMetaClient.java | 9 +++--- .../hudi/common/table/TestTimelineUtils.java | 6 ++-- .../timeline/TestHoodieActiveTimeline.java | 4 +-- .../TestHoodieTableFSViewWithClustering.java | 4 +-- .../view/TestHoodieTableFileSystemView.java | 18 +++++------ .../table/view/TestIncrementalFSViewSync.java | 10 +++--- .../common/testutils/FileCreateUtils.java | 12 +++---- .../testutils/HoodieTestDataGenerator.java | 8 ++--- .../common/testutils/RawTripTestPayload.java | 3 +- .../minicluster/ZookeeperTestService.java | 6 ++-- .../hudi/common/util/TestBase64CodecUtil.java | 4 +-- .../hudi/common/util/TestFileIOUtils.java | 10 +++--- .../common/util/TestOrcReaderIterator.java | 6 ++-- .../hudi/common/util/TestStringUtils.java | 7 +++-- .../hudi/common/util/hash/TestHashID.java | 4 +-- .../hudi/schema/SchemaRegistryProvider.java | 4 +-- .../util/JsonDeserializationFunction.java | 4 +-- .../hudi/util/StringToRowDataConverter.java | 5 +-- .../source/TestIncrementalInputSplits.java | 6 ++-- .../apache/hudi/util/TestExpressionUtils.java | 4 +-- .../java/org/apache/hudi/utils/TestUtils.java | 5 ++- .../format/cow/ParquetSplitReaderUtil.java | 4 +-- .../format/cow/ParquetSplitReaderUtil.java | 4 +-- .../format/cow/ParquetSplitReaderUtil.java | 4 +-- .../format/cow/ParquetSplitReaderUtil.java | 4 +-- .../format/cow/ParquetSplitReaderUtil.java | 4 +-- .../apache/hudi/hadoop/InputSplitUtils.java | 4 ++- .../hadoop/TestHoodieHFileInputFormat.java | 4 +-- .../hadoop/TestHoodieParquetInputFormat.java | 31 +++++++++---------- .../realtime/TestHoodieRealtimeFileSplit.java | 18 +++++------ .../TestHoodieRealtimeRecordReader.java | 6 ++-- .../hudi/connect/utils/KafkaConnectUtils.java | 5 +-- .../apache/hudi/helper/MockKafkaConnect.java | 6 ++-- .../writers/TestAbstractConnectWriter.java | 3 +- .../AlterHoodieTableAddColumnsCommand.scala | 4 +-- .../hudi/benchmark/HoodieBenchmarkBase.scala | 7 +++-- .../TestHdfsParquetImportProcedure.scala | 5 +-- .../sql/hudi/command/AlterTableCommand.scala | 4 +-- .../hudi/hive/testutils/HiveTestCluster.java | 4 +-- .../hudi/hive/testutils/HiveTestUtil.java | 14 ++++----- .../HoodieMetadataTableValidator.java | 5 +-- .../utilities/perf/TimelineServerPerf.java | 6 ++-- .../schema/SchemaRegistryProvider.java | 4 +-- .../sources/helpers/ProtoConversionUtil.java | 4 ++- .../HoodieDeltaStreamerTestBase.java | 4 +-- .../functional/TestHDFSParquetImporter.java | 5 +-- .../schema/TestSchemaRegistryProvider.java | 4 +-- .../sources/TestGcsEventsSource.java | 16 ++++++---- .../sources/TestProtoKafkaSource.java | 5 +-- .../helpers/TestProtoConversionUtil.java | 9 +++--- 103 files changed, 396 insertions(+), 322 deletions(-) diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java index 22bac81dff51..f0b653ec1e9c 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java @@ -52,6 +52,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * CLI command to display hudi table options. @@ -261,7 +262,7 @@ private static void writeToFile(String filePath, String data) throws IOException OutputStream os = null; try { os = new FileOutputStream(outFile); - os.write(data.getBytes(), 0, data.length()); + os.write(getUTF8Bytes(data), 0, data.length()); } finally { os.close(); } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java index a71697657a0d..930f6b0064c4 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java @@ -18,9 +18,6 @@ package org.apache.hudi.cli.integ; -import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; import org.apache.hudi.cli.HoodieCLI; import org.apache.hudi.cli.commands.TableCommand; import org.apache.hudi.cli.testutils.HoodieCLIIntegrationTestBase; @@ -33,6 +30,10 @@ import org.apache.hudi.utilities.HDFSParquetImporter; import org.apache.hudi.utilities.functional.TestHDFSParquetImporter; import org.apache.hudi.utilities.functional.TestHDFSParquetImporter.HoodieTripModel; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.junit.jupiter.api.BeforeEach; @@ -49,6 +50,7 @@ import java.util.List; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -81,7 +83,7 @@ public void init() throws IOException, ParseException { // create schema file try (FSDataOutputStream schemaFileOS = fs.create(new Path(schemaFile))) { - schemaFileOS.write(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA.getBytes()); + schemaFileOS.write(getUTF8Bytes(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)); } importer = new TestHDFSParquetImporter(); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java index 67592be1adcf..a26c8d008393 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.Path; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -42,6 +41,7 @@ import static org.apache.hudi.common.testutils.FileCreateUtils.baseFileName; import static org.apache.hudi.common.util.CollectionUtils.createImmutableList; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Class to be used in tests to keep generating test inserts and updates against a corpus. @@ -114,7 +114,7 @@ public static void createCommitFileWithMetadata(String basePath, String commitTi static void createFileWithMetadata(String basePath, Configuration configuration, String name, String content) throws IOException { Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + name); try (FSDataOutputStream os = FSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { - os.writeBytes(new String(content.getBytes(StandardCharsets.UTF_8))); + os.writeBytes(new String(getUTF8Bytes(content))); } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java index 2da144162115..e4e6f79c5eb0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java @@ -72,7 +72,6 @@ import javax.annotation.Nullable; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; @@ -85,6 +84,7 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION; import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION; import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.metadata.HoodieTableMetadata.isMetadataTable; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.isIndexingCommit; @@ -500,7 +500,7 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata, table.getActiveTimeline().transitionReplaceInflightToComplete( clusteringInstant, - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); } catch (Exception e) { throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e); } finally { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java index a62f1d042447..37f3fe6d04a3 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java @@ -93,7 +93,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -106,6 +105,7 @@ import static org.apache.hudi.avro.AvroSchemaUtils.getAvroRecordQualifiedName; import static org.apache.hudi.common.model.HoodieCommitMetadata.SCHEMA_KEY; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath; /** @@ -285,7 +285,7 @@ protected void commit(HoodieTable table, String commitActionType, String instant // update Metadata table writeTableMetadata(table, instantTime, metadata, writeStatuses); activeTimeline.saveAsComplete(new HoodieInstant(true, commitActionType, instantTime), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); } // Save internal schema @@ -1542,7 +1542,7 @@ private void commitTableChange(InternalSchema newSchema, HoodieTableMetaClient m HoodieCommitMetadata metadata = new HoodieCommitMetadata(); metadata.setOperationType(WriteOperationType.ALTER_SCHEMA); try { - timeLine.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + timeLine.transitionRequestedToInflight(requested, Option.of(getUTF8Bytes(metadata.toJsonString()))); } catch (IOException io) { throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java index f8befee9bf9e..6ff4d1b6d099 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java @@ -53,6 +53,7 @@ import static org.apache.hudi.common.model.HoodieConsistentHashingMetadata.HASHING_METADATA_COMMIT_FILE_SUFFIX; import static org.apache.hudi.common.model.HoodieConsistentHashingMetadata.HASHING_METADATA_FILE_SUFFIX; import static org.apache.hudi.common.model.HoodieConsistentHashingMetadata.getTimestampFromFile; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Utilities class for consistent bucket index metadata management. @@ -208,7 +209,7 @@ private static void createCommitMarker(HoodieTable table, Path fileStatus, Path if (fs.exists(fullPath)) { return; } - FileIOUtils.createFileInPath(fs, fullPath, Option.of(StringUtils.EMPTY_STRING.getBytes())); + FileIOUtils.createFileInPath(fs, fullPath, Option.of(getUTF8Bytes(StringUtils.EMPTY_STRING))); } /*** diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java index 55d8e4e47af5..4f4cc7d9bc7e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java @@ -60,7 +60,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Collections; @@ -71,6 +70,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE; public abstract class BaseCommitActionExecutor @@ -154,7 +154,7 @@ void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, String insta String commitActionType = getCommitActionType(); HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime); activeTimeline.transitionRequestedToInflight(requested, - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)), + Option.of(getUTF8Bytes(metadata.toJsonString())), config.shouldAllowMultiWriteOnSameInstant()); } catch (IOException io) { throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", io); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java index c6fa1f4f2b2e..a49f31ead6e5 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/CompactHelpers.java @@ -36,10 +36,11 @@ import org.apache.hudi.table.HoodieTable; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Set; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Base class helps to perform compact. * @@ -83,7 +84,7 @@ public void completeInflightCompaction(HoodieTable table, String compactionCommi try { activeTimeline.transitionCompactionInflightToComplete( HoodieTimeline.getCompactionInflightInstant(compactionCommitTime), - Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); } catch (IOException e) { throw new HoodieCompactionException( "Failed to commit " + table.getMetaClient().getBasePath() + " at time " + compactionCommitTime, e); @@ -95,7 +96,7 @@ public void completeInflightLogCompaction(HoodieTable table, String logCompactio try { activeTimeline.transitionLogCompactionInflightToComplete( HoodieTimeline.getLogCompactionInflightInstant(logCompactionCommitTime), - Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); } catch (IOException e) { throw new HoodieCompactionException( "Failed to commit " + table.getMetaClient().getBasePath() + " at time " + logCompactionCommitTime, e); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java index ae8bb416c9f2..b41649f5207d 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java @@ -37,7 +37,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -46,6 +45,7 @@ import java.util.UUID; import static org.apache.hudi.common.table.log.HoodieLogFormat.DEFAULT_WRITE_TOKEN; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; public class HoodieTestCommitGenerator { public static final String BASE_FILE_WRITE_TOKEN = "1-0-1"; @@ -163,7 +163,7 @@ public static void createCommitFileWithMetadata( String filename, String content) throws IOException { Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + filename); try (FSDataOutputStream os = FSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { - os.writeBytes(new String(content.getBytes(StandardCharsets.UTF_8))); + os.writeBytes(new String(getUTF8Bytes(content))); } } diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index 0d2eefa08637..af4de5b771ed 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -18,16 +18,6 @@ package org.apache.hudi.io.storage; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.CellComparatorImpl; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.fs.FSUtils; @@ -40,6 +30,16 @@ import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CellComparatorImpl; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -70,8 +70,9 @@ import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; import static org.apache.hudi.common.util.CollectionUtils.toStream; -import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; +import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -130,7 +131,7 @@ protected void verifySchema(Configuration conf, String schemaPath) throws IOExce FileSystem fs = getFilePath().getFileSystem(conf); HFile.Reader hfileReader = HoodieHFileUtils.createHFileReader(fs, getFilePath(), new CacheConfig(conf), conf); assertEquals(getSchemaFromResource(TestHoodieHFileReaderWriter.class, schemaPath), - new Schema.Parser().parse(new String(hfileReader.getHFileInfo().get(SCHEMA_KEY.getBytes())))); + new Schema.Parser().parse(new String(hfileReader.getHFileInfo().get(getUTF8Bytes(SCHEMA_KEY))))); } private static Stream populateMetaFieldsAndTestAvroWithMeta() { diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java index 68c32acca24e..05e00cf1f181 100644 --- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java +++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java @@ -51,11 +51,12 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.List; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + public class HoodieFlinkTableServiceClient extends BaseHoodieTableServiceClient>, List, List> { private static final Logger LOG = LoggerFactory.getLogger(HoodieFlinkTableServiceClient.class); @@ -137,7 +138,7 @@ protected void completeClustering( LOG.info("Committing Clustering {} finished with result {}.", clusteringCommitTime, metadata); table.getActiveTimeline().transitionReplaceInflightToComplete( HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); } catch (IOException e) { throw new HoodieClusteringException( "Failed to commit " + table.getMetaClient().getBasePath() + " at time " + clusteringCommitTime, e); diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriteSupport.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriteSupport.java index 4a3109db60a3..a153ec15052d 100644 --- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriteSupport.java +++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriteSupport.java @@ -21,13 +21,13 @@ import org.apache.hudi.avro.HoodieBloomFilterWriteSupport; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.logical.RowType; import org.apache.hadoop.conf.Configuration; import org.apache.parquet.hadoop.api.WriteSupport; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.Map; @@ -71,7 +71,7 @@ public HoodieBloomFilterRowDataWriteSupport(BloomFilter bloomFilter) { @Override protected byte[] getUTF8Bytes(String key) { - return key.getBytes(StandardCharsets.UTF_8); + return StringUtils.getUTF8Bytes(key); } } } diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java index 5f9b71d4c9fc..3dca687e9e85 100644 --- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java +++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java @@ -46,7 +46,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.Collections; import java.util.Iterator; @@ -55,6 +54,8 @@ import java.util.Map; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * With {@code org.apache.hudi.operator.partitioner.BucketAssigner}, each hoodie record * is tagged with a bucket ID (partition path + fileID) in streaming way. All the records consumed by this @@ -156,7 +157,7 @@ protected void commit(Option> extraMetadata, HoodieData extends BaseCommitActionExecutor>, List, List, HoodieWriteMetadata> { @@ -215,7 +216,7 @@ protected void commit(Option> extraMetadata, HoodieWriteMeta writeTableMetadata(metadata, HoodieListData.eager(result.getWriteStatuses()), actionType); activeTimeline.saveAsComplete(new HoodieInstant(true, getCommitActionType(), instantTime), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); LOG.info("Committed " + instantTime); result.setCommitMetadata(Option.of(metadata)); } catch (IOException e) { diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java index 27de85fc002c..38bbe528891b 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java @@ -108,6 +108,7 @@ import java.util.stream.Stream; import static org.apache.hudi.common.testutils.HoodieTestUtils.RAW_TRIPS_TEST_NAME; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -1003,7 +1004,7 @@ public Stream readHFile(String[] paths) { HFile.Reader reader = HoodieHFileUtils.createHFileReader(fs, new Path(path), cacheConfig, fs.getConf()); if (schema == null) { - schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(SCHEMA_KEY.getBytes()))); + schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(getUTF8Bytes(SCHEMA_KEY)))); } HFileScanner scanner = reader.getScanner(false, false); if (!scanner.seekTo()) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java index 2b14bb3a0665..294e29a65fb1 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkInternalSchemaConverter.java @@ -24,6 +24,7 @@ import org.apache.hudi.internal.schema.Types; import org.apache.hudi.internal.schema.action.InternalSchemaMerger; import org.apache.hudi.internal.schema.utils.InternalSchemaUtils; + import org.apache.spark.sql.execution.vectorized.WritableColumnVector; import org.apache.spark.sql.types.ArrayType; import org.apache.spark.sql.types.ArrayType$; @@ -61,7 +62,6 @@ import org.apache.spark.sql.types.UserDefinedType; import org.apache.spark.sql.types.VarcharType; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.util.ArrayList; import java.util.Deque; @@ -71,6 +71,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + public class SparkInternalSchemaConverter { private SparkInternalSchemaConverter() { @@ -307,7 +309,7 @@ private static boolean convertIntLongType(WritableColumnVector oldV, WritableCol } else if (newType instanceof DoubleType) { newV.putDouble(i, isInt ? oldV.getInt(i) : oldV.getLong(i)); } else if (newType instanceof StringType) { - newV.putByteArray(i, ((isInt ? oldV.getInt(i) : oldV.getLong(i)) + "").getBytes(StandardCharsets.UTF_8)); + newV.putByteArray(i, getUTF8Bytes((isInt ? oldV.getInt(i) : oldV.getLong(i)) + "")); } else if (newType instanceof DecimalType) { Decimal oldDecimal = Decimal.apply(isInt ? oldV.getInt(i) : oldV.getLong(i)); oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale()); @@ -335,7 +337,7 @@ private static boolean convertFloatType(WritableColumnVector oldV, WritableColum if (newType instanceof DoubleType) { newV.putDouble(i, Double.valueOf(oldV.getFloat(i) + "")); } else if (newType instanceof StringType) { - newV.putByteArray(i, (oldV.getFloat(i) + "").getBytes(StandardCharsets.UTF_8)); + newV.putByteArray(i, getUTF8Bytes(oldV.getFloat(i) + "")); } else if (newType instanceof DecimalType) { Decimal oldDecimal = Decimal.apply(oldV.getFloat(i)); oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale()); @@ -365,7 +367,7 @@ private static boolean convertDoubleType(WritableColumnVector oldV, WritableColu oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale()); newV.putDecimal(i, oldDecimal, ((DecimalType) newType).precision()); } else if (newType instanceof StringType) { - newV.putByteArray(i, (oldV.getDouble(i) + "").getBytes(StandardCharsets.UTF_8)); + newV.putByteArray(i, getUTF8Bytes(oldV.getDouble(i) + "")); } } return true; @@ -391,7 +393,7 @@ private static boolean convertDecimalType(WritableColumnVector oldV, WritableCol oldDecimal.changePrecision(((DecimalType) newType).precision(), ((DecimalType) newType).scale()); newV.putDecimal(i, oldDecimal, ((DecimalType) newType).precision()); } else if (newType instanceof StringType) { - newV.putByteArray(i, oldDecimal.toString().getBytes(StandardCharsets.UTF_8)); + newV.putByteArray(i, getUTF8Bytes(oldDecimal.toString())); } } return true; @@ -413,7 +415,7 @@ private static boolean convertDateType(WritableColumnVector oldV, WritableColumn } // to do support rebaseDate String res = org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaDate(oldV.getInt(i)).toString(); - newV.putByteArray(i, res.getBytes(StandardCharsets.UTF_8)); + newV.putByteArray(i, getUTF8Bytes(res)); } return true; } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java index d93401c2247b..db7fceecb077 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java @@ -68,7 +68,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Collection; @@ -79,6 +78,7 @@ import static org.apache.hudi.client.bootstrap.BootstrapMode.FULL_RECORD; import static org.apache.hudi.client.bootstrap.BootstrapMode.METADATA_ONLY; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE; import static org.apache.hudi.table.action.bootstrap.MetadataBootstrapHandlerFactory.getMetadataHandler; @@ -249,7 +249,7 @@ protected void commit(Option> extraMetadata, HoodieWriteMeta try { activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); LOG.info("Committed " + instantTime); } catch (IOException e) { throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime, diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java index 040cc7987475..0ca910fd7214 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java @@ -19,8 +19,8 @@ package org.apache.hudi.table.action.commit; import org.apache.hudi.client.WriteStatus; -import org.apache.hudi.client.utils.SparkPartitionUtils; import org.apache.hudi.client.clustering.update.strategy.SparkAllowUpdateStrategy; +import org.apache.hudi.client.utils.SparkPartitionUtils; import org.apache.hudi.client.utils.SparkValidatorUtils; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.data.HoodieData.HoodieDataCacheKey; @@ -66,7 +66,6 @@ import java.io.IOException; import java.io.Serializable; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Collections; @@ -81,6 +80,7 @@ import scala.Tuple2; import static org.apache.hudi.common.util.ClusteringUtils.getAllFileGroupsInPendingClusteringPlans; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.config.HoodieWriteConfig.WRITE_STATUS_STORAGE_LEVEL_VALUE; public abstract class BaseSparkCommitActionExecutor extends @@ -309,7 +309,7 @@ protected void commit(Option> extraMetadata, HoodieWriteMeta HoodieCommitMetadata metadata = result.getCommitMetadata().get(); writeTableMetadata(metadata, result.getWriteStatuses(), actionType); activeTimeline.saveAsComplete(new HoodieInstant(true, getCommitActionType(), instantTime), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); LOG.info("Committed " + instantTime); result.setCommitMetadata(Option.of(metadata)); } catch (IOException e) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java index 880c9f74f479..bed16dcbefa5 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java @@ -76,7 +76,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; @@ -103,6 +102,7 @@ import static org.apache.hudi.HoodieTestCommitGenerator.getBaseFilename; import static org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.config.HoodieArchivalConfig.ARCHIVE_BEYOND_SAVEPOINT; import static org.apache.hudi.metadata.HoodieTableMetadata.SOLO_COMMIT_TIMESTAMP; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -428,7 +428,7 @@ private HoodieInstant commitWithMdt(String instantTime, Map metadataWriter.updateFromWriteStatuses(commitMeta, context.emptyHoodieData(), instantTime); metaClient.getActiveTimeline().saveAsComplete( new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, instantTime), - Option.of(commitMeta.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(commitMeta.toJsonString()))); } else { commitMeta = generateCommitMetadata(instantTime, new HashMap<>()); } @@ -552,7 +552,7 @@ public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableA // if there are damaged archive files and damaged plan, hoodie need throw ioe while loading archived timeline. Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1"); - FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes())); + FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(getUTF8Bytes(s))); assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload()); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java index f7dc276e92e4..2c7f35d4d908 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java @@ -53,7 +53,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -67,6 +66,7 @@ import static org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS; import static org.apache.hudi.common.testutils.HoodieTestUtils.generateFakeHoodieWriteStat; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.table.action.commit.UpsertPartitioner.averageBytesPerRecord; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -150,17 +150,17 @@ private static HoodieCommitMetadata generateCommitMetadataWith(int totalRecordsW private static LinkedList> generateCommitMetadataList() throws IOException { LinkedList> commits = new LinkedList<>(); // First commit with non zero records and bytes - commits.push(Option.of(generateCommitMetadataWith(2000, 10000).toJsonString().getBytes(StandardCharsets.UTF_8))); + commits.push(Option.of(getUTF8Bytes(generateCommitMetadataWith(2000, 10000).toJsonString()))); // Second commit with non zero records and bytes - commits.push(Option.of(generateCommitMetadataWith(1500, 7500).toJsonString().getBytes(StandardCharsets.UTF_8))); + commits.push(Option.of(getUTF8Bytes(generateCommitMetadataWith(1500, 7500).toJsonString()))); // Third commit with a small file - commits.push(Option.of(generateCommitMetadataWith(100, 500).toJsonString().getBytes(StandardCharsets.UTF_8))); + commits.push(Option.of(getUTF8Bytes(generateCommitMetadataWith(100, 500).toJsonString()))); // Fourth commit with both zero records and zero bytes - commits.push(Option.of(generateCommitMetadataWith(0, 0).toJsonString().getBytes(StandardCharsets.UTF_8))); + commits.push(Option.of(getUTF8Bytes(generateCommitMetadataWith(0, 0).toJsonString()))); // Fifth commit with zero records - commits.push(Option.of(generateCommitMetadataWith(0, 1500).toJsonString().getBytes(StandardCharsets.UTF_8))); + commits.push(Option.of(getUTF8Bytes(generateCommitMetadataWith(0, 1500).toJsonString()))); // Sixth commit with zero bytes - commits.push(Option.of(generateCommitMetadataWith(2500, 0).toJsonString().getBytes(StandardCharsets.UTF_8))); + commits.push(Option.of(getUTF8Bytes(generateCommitMetadataWith(2500, 0).toJsonString()))); return commits; } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java index ea4f9eb536c6..158b9808e068 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java @@ -43,7 +43,6 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -52,6 +51,7 @@ import static org.apache.hudi.common.bootstrap.TestBootstrapIndex.generateBootstrapIndex; import static org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -210,7 +210,7 @@ public void commitWithMdt(String instantTime, Map> partToFi metadataWriter.updateFromWriteStatuses(commitMeta, context.emptyHoodieData(), instantTime); metaClient.getActiveTimeline().saveAsComplete( new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, instantTime), - Option.of(commitMeta.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(commitMeta.toJsonString()))); metaClient = HoodieTableMetaClient.reload(metaClient); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index a7808ea93824..991c615c35dd 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -69,6 +69,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; /** @@ -268,7 +269,7 @@ public static Stream readHFile(JavaSparkContext jsc, String[] pat HFile.Reader reader = HoodieHFileUtils.createHFileReader(fs, new Path(path), cacheConfig, fs.getConf()); if (schema == null) { - schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(SCHEMA_KEY.getBytes()))); + schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(getUTF8Bytes(SCHEMA_KEY)))); } HFileScanner scanner = reader.getScanner(false, false); if (!scanner.seekTo()) { diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java b/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java index faa36e5694db..ec747d662d88 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java @@ -22,7 +22,6 @@ import com.esotericsoftware.kryo.Serializer; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; -import java.nio.ByteBuffer; import org.apache.avro.Schema; import org.apache.avro.generic.GenericContainer; import org.apache.avro.generic.GenericDatumReader; @@ -35,9 +34,12 @@ import org.apache.avro.io.EncoderFactory; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.HashMap; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Custom serializer used for generic Avro containers. @@ -68,7 +70,7 @@ private byte[] getSchemaBytes(Schema schema) { if (encodeCache.containsKey(schema)) { return encodeCache.get(schema); } else { - byte[] schemaBytes = schema.toString().getBytes(StandardCharsets.UTF_8); + byte[] schemaBytes = getUTF8Bytes(schema.toString()); encodeCache.put(schema, schemaBytes); return schemaBytes; } diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index d04e986487b5..18f5b3631a07 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -76,7 +76,6 @@ import java.math.BigInteger; import java.math.RoundingMode; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; import java.time.Instant; @@ -108,6 +107,7 @@ import static org.apache.hudi.avro.AvroSchemaUtils.resolveUnionSchema; import static org.apache.hudi.common.util.DateTimeUtils.instantToMicros; import static org.apache.hudi.common.util.DateTimeUtils.microsToInstant; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.ValidationUtils.checkState; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.tryUpcastDecimal; @@ -1040,7 +1040,7 @@ private static Object rewritePrimaryTypeWithDiffSchemaType(Object oldValue, Sche break; case BYTES: if (oldSchema.getType() == Schema.Type.STRING) { - return ByteBuffer.wrap((oldValue.toString()).getBytes(StandardCharsets.UTF_8)); + return ByteBuffer.wrap(getUTF8Bytes(oldValue.toString())); } break; case STRING: diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java index 38d0564b1172..01ae15da1eba 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java @@ -21,13 +21,13 @@ import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.avro.Schema; import org.apache.parquet.avro.AvroWriteSupport; import org.apache.parquet.hadoop.api.WriteSupport; import org.apache.parquet.schema.MessageType; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -79,7 +79,7 @@ public HoodieBloomFilterAvroWriteSupport(BloomFilter bloomFilter) { @Override protected byte[] getUTF8Bytes(String key) { - return key.getBytes(StandardCharsets.UTF_8); + return StringUtils.getUTF8Bytes(key); } } } diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java b/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java index cdf0f15d80de..31be8d7bdca1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java @@ -38,6 +38,8 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Converts Json record to Avro Generic Record. */ @@ -290,7 +292,7 @@ private static JsonToAvroFieldProcessor generateBytesTypeHandler() { @Override public Pair convert(Object value, String name, Schema schema, boolean shouldSanitize, String invalidCharMask) { // Should return ByteBuffer (see GenericData.isBytes()) - return Pair.of(true, ByteBuffer.wrap(value.toString().getBytes())); + return Pair.of(true, ByteBuffer.wrap(getUTF8Bytes(value.toString()))); } }; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/HoodieJsonPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/HoodieJsonPayload.java index 86f5c9a13489..f2158a1c9e8a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/HoodieJsonPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/HoodieJsonPayload.java @@ -36,6 +36,8 @@ import java.util.zip.DeflaterOutputStream; import java.util.zip.InflaterInputStream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Hoodie json payload. */ @@ -74,7 +76,7 @@ private byte[] compressData(String jsonData) throws IOException { Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION); DeflaterOutputStream dos = new DeflaterOutputStream(baos, deflater, true); try { - dos.write(jsonData.getBytes()); + dos.write(getUTF8Bytes(jsonData)); } finally { dos.flush(); dos.close(); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java index 421ea46f1672..22e2c6889357 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java @@ -28,7 +28,8 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; + +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Hoodie's dynamic bloom bounded bloom filter. This is based largely on Hadoop's DynamicBloomFilter, but with a bound @@ -77,7 +78,7 @@ public HoodieDynamicBoundedBloomFilter(String serString) { @Override public void add(String key) { - add(key.getBytes(StandardCharsets.UTF_8)); + add(getUTF8Bytes(key)); } @Override @@ -87,7 +88,7 @@ public void add(byte[] keyBytes) { @Override public boolean mightContain(String key) { - return internalDynamicBloomFilter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); + return internalDynamicBloomFilter.membershipTest(new Key(getUTF8Bytes(key))); } @Override diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java index 43b19a19536b..adf0f058a26c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java @@ -32,7 +32,8 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; -import java.nio.charset.StandardCharsets; + +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * A Simple Bloom filter implementation built on top of {@link org.apache.hadoop.util.bloom.BloomFilter}. @@ -77,7 +78,7 @@ public SimpleBloomFilter(String serString) { @Override public void add(String key) { - add(key.getBytes(StandardCharsets.UTF_8)); + add(getUTF8Bytes(key)); } @Override @@ -93,7 +94,7 @@ public boolean mightContain(String key) { if (key == null) { throw new NullPointerException("Key cannot be null"); } - return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); + return filter.membershipTest(new Key(getUTF8Bytes(key))); } /** @@ -125,7 +126,7 @@ private void readObject(ObjectInputStream is) throws IOException { // @Override public void write(DataOutput out) throws IOException { - out.write(filter.toString().getBytes()); + out.write(getUTF8Bytes(filter.toString())); } //@Override diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index 32017d192557..27314f150dc0 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -64,6 +64,8 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Maintains mapping from skeleton file id to external bootstrap file. * It maintains 2 physical indices. @@ -467,7 +469,7 @@ private void writeNextSourceFileMapping(BootstrapFileMapping mapping) { srcFilePartitionInfo.setPartitionPath(mapping.getPartitionPath()); srcFilePartitionInfo.setBootstrapPartitionPath(mapping.getBootstrapPartitionPath()); srcFilePartitionInfo.setBootstrapFileStatus(mapping.getBootstrapFileStatus()); - KeyValue kv = new KeyValue(getFileGroupKey(mapping.getFileGroupId()).getBytes(), new byte[0], new byte[0], + KeyValue kv = new KeyValue(getUTF8Bytes(getFileGroupKey(mapping.getFileGroupId())), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, TimelineMetadataUtils.serializeAvroMetadata(srcFilePartitionInfo, HoodieBootstrapFilePartitionInfo.class).get()); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java index 4535983389d0..f7964de5f514 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java @@ -36,6 +36,8 @@ import java.util.List; import java.util.UUID; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * All the metadata that is used for consistent hashing bucket index */ @@ -104,7 +106,7 @@ private static String generateUUID(String partitionPath, long bucketStart, long byteBuffer.putLong(bucketStart); byteBuffer.putLong(bucketEnd); byte[] longBytes = byteBuffer.array(); - byte[] partitionPathBytes = partitionPath.getBytes(StandardCharsets.UTF_8); + byte[] partitionPathBytes = getUTF8Bytes(partitionPath); byte[] combinedBytes = new byte[longBytes.length + partitionPathBytes.length]; System.arraycopy(longBytes, 0, combinedBytes, 0, longBytes.length); System.arraycopy(partitionPathBytes, 0, combinedBytes, longBytes.length, partitionPathBytes.length); @@ -152,7 +154,7 @@ public String getFilename() { } public byte[] toBytes() throws IOException { - return toJsonString().getBytes(StandardCharsets.UTF_8); + return getUTF8Bytes(toJsonString()); } public static HoodieConsistentHashingMetadata fromBytes(byte[] bytes) throws IOException { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java index fe02573bc35c..ad5912ba8b9c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java @@ -50,6 +50,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * The metadata that goes into the meta file in each partition. */ @@ -171,7 +173,7 @@ private void writeMetafile(Path filePath) throws IOException { .setSchema(AvroOrcUtils.createOrcSchema(schema)); try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) { for (String key : props.stringPropertyNames()) { - writer.addUserMetadata(key, ByteBuffer.wrap(props.getProperty(key).getBytes())); + writer.addUserMetadata(key, ByteBuffer.wrap(getUTF8Bytes(props.getProperty(key)))); } } break; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java index 4d7324204734..d94206d4c5cf 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java @@ -61,7 +61,6 @@ import java.util.function.BiConsumer; import java.util.stream.Collectors; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hudi.common.config.TimestampKeyGeneratorConfig.DATE_TIME_PARSER; import static org.apache.hudi.common.config.TimestampKeyGeneratorConfig.INPUT_TIME_UNIT; import static org.apache.hudi.common.config.TimestampKeyGeneratorConfig.TIMESTAMP_INPUT_DATE_FORMAT; @@ -70,6 +69,7 @@ import static org.apache.hudi.common.config.TimestampKeyGeneratorConfig.TIMESTAMP_OUTPUT_DATE_FORMAT; import static org.apache.hudi.common.config.TimestampKeyGeneratorConfig.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT; import static org.apache.hudi.common.config.TimestampKeyGeneratorConfig.TIMESTAMP_TIMEZONE_FORMAT; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are loaded from hoodie.properties, these properties are usually set during @@ -503,7 +503,7 @@ public static long generateChecksum(Properties props) { } String table = props.getProperty(NAME.key()); String database = props.getProperty(DATABASE_NAME.key(), ""); - return BinaryUtil.generateChecksum(String.format(TABLE_CHECKSUM_FORMAT, database, table).getBytes(UTF_8)); + return BinaryUtil.generateChecksum(getUTF8Bytes(String.format(TABLE_CHECKSUM_FORMAT, database, table))); } public static boolean validateChecksum(Properties props) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java index bdcd0ac690fd..852deecbfa97 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java @@ -63,6 +63,7 @@ import java.util.zip.InflaterInputStream; import static org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.common.util.ValidationUtils.checkState; @@ -278,7 +279,7 @@ private static byte[] compress(String text) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { OutputStream out = new DeflaterOutputStream(baos); - out.write(text.getBytes(StandardCharsets.UTF_8)); + out.write(getUTF8Bytes(text)); out.close(); } catch (IOException e) { throw new HoodieIOException("IOException while compressing text " + text, e); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index 703266e63366..42c47c696d86 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -58,6 +58,7 @@ import java.util.TreeMap; import java.util.function.Supplier; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.TypeUtils.unsafeCast; import static org.apache.hudi.common.util.ValidationUtils.checkState; @@ -153,14 +154,14 @@ protected byte[] serializeRecords(List records) throws IOException // Write the records sortedRecordsMap.forEach((recordKey, recordBytes) -> { try { - KeyValue kv = new KeyValue(recordKey.getBytes(), null, null, recordBytes); + KeyValue kv = new KeyValue(getUTF8Bytes(recordKey), null, null, recordBytes); writer.append(kv); } catch (IOException e) { throw new HoodieIOException("IOException serializing records", e); } }); - writer.appendFileInfo(HoodieAvroHFileReader.SCHEMA_KEY.getBytes(), getSchema().toString().getBytes()); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.SCHEMA_KEY), getUTF8Bytes(getSchema().toString())); writer.close(); ostream.flush(); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java index 237dfe643cf0..0cf37c851057 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java @@ -42,6 +42,7 @@ import java.util.Map; import java.util.function.Supplier; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.ValidationUtils.checkState; /** @@ -237,7 +238,7 @@ public static byte[] getLogMetadataBytes(Map metadat output.writeInt(metadata.size()); for (Map.Entry entry : metadata.entrySet()) { output.writeInt(entry.getKey().ordinal()); - byte[] bytes = entry.getValue().getBytes(); + byte[] bytes = getUTF8Bytes(entry.getValue()); output.writeInt(bytes.length); output.write(bytes); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java index b170eb818657..6c8d6b664a08 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java @@ -40,6 +40,7 @@ import java.util.stream.Stream; import static org.apache.hudi.common.table.timeline.HoodieTimeline.compareTimestamps; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to inspect a @@ -72,7 +73,7 @@ public void setInstants(List instants) { try { md = MessageDigest.getInstance(HASHING_ALGORITHM); this.instants.forEach(i -> md - .update(StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name()).getBytes())); + .update(getUTF8Bytes(StringUtils.joinUsingDelim("_", i.getTimestamp(), i.getAction(), i.getState().name())))); } catch (NoSuchAlgorithmException nse) { throw new HoodieException(nse); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java index e5e4791fe569..295e5163ed52 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java @@ -45,7 +45,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Base64; @@ -57,6 +56,7 @@ import static org.apache.avro.JsonProperties.NULL_VALUE; import static org.apache.hudi.common.util.BinaryUtil.toBytes; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Methods including addToVector, addUnionValue, createOrcSchema are originally from @@ -142,12 +142,12 @@ public static void addToVector(TypeDescription type, ColumnVector colVector, Sch byte[] bytes = null; if (value instanceof String) { - bytes = ((String) value).getBytes(StandardCharsets.UTF_8); + bytes = getUTF8Bytes((String) value); } else if (value instanceof Utf8) { final Utf8 utf8 = (Utf8) value; bytes = utf8.getBytes(); } else if (value instanceof GenericData.EnumSymbol) { - bytes = ((GenericData.EnumSymbol) value).toString().getBytes(StandardCharsets.UTF_8); + bytes = getUTF8Bytes(((GenericData.EnumSymbol) value).toString()); } else { throw new IllegalStateException(String.format( "Unrecognized type for Avro %s field value, which has type %s, value %s", @@ -400,7 +400,7 @@ public static boolean addUnionValue( case CHAR: if (value instanceof String) { matches = true; - matchValue = ((String) value).getBytes(StandardCharsets.UTF_8); + matchValue = getUTF8Bytes((String) value); } else if (value instanceof Utf8) { matches = true; matchValue = ((Utf8) value).getBytes(); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java index d40659de6ff5..08ba298d2302 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java @@ -21,6 +21,8 @@ import java.nio.charset.StandardCharsets; import java.util.Base64; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Utils for Base64 encoding and decoding. */ @@ -33,7 +35,7 @@ public final class Base64CodecUtil { * @return A newly-allocated byte array containing the decoded bytes. */ public static byte[] decode(String encodedString) { - return Base64.getDecoder().decode(encodedString.getBytes(StandardCharsets.UTF_8)); + return Base64.getDecoder().decode(getUTF8Bytes(encodedString)); } /** diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java b/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java index 502ce85f4e82..c7bd01968ceb 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java @@ -19,9 +19,10 @@ package org.apache.hudi.common.util; import java.nio.ByteBuffer; -import java.nio.charset.Charset; import java.util.zip.CRC32; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Utils for Java byte array. */ @@ -185,7 +186,7 @@ public static byte[] doubleTo8Byte(double a) { } public static byte[] utf8To8Byte(String a) { - return paddingTo8Byte(a.getBytes(Charset.forName("utf-8"))); + return paddingTo8Byte(getUTF8Bytes(a)); } public static Long convertStringToLong(String a) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/NumericUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/NumericUtils.java index 775c1f82cf1d..1d5eaf25aa2b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/NumericUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/NumericUtils.java @@ -20,11 +20,12 @@ import org.apache.hudi.exception.HoodieException; -import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Objects; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * A utility class for numeric. */ @@ -46,7 +47,7 @@ public static long getMessageDigestHash(final String algorithmName, final String } catch (NoSuchAlgorithmException e) { throw new HoodieException(e); } - return asLong(Objects.requireNonNull(md).digest(string.getBytes(StandardCharsets.UTF_8))); + return asLong(Objects.requireNonNull(md).digest(getUTF8Bytes(string))); } public static long asLong(byte[] bytes) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/RocksDBDAO.java b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/RocksDBDAO.java index c9fdf0c31780..951fe4540c1e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/RocksDBDAO.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/RocksDBDAO.java @@ -21,6 +21,7 @@ import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.HoodieTimer; import org.apache.hudi.common.util.SerializationUtils; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; @@ -54,6 +55,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Data access objects for storing and retrieving objects in Rocks DB. */ @@ -191,7 +194,7 @@ public void writeBatch(BatchHandler handler) { public void putInBatch(WriteBatch batch, String columnFamilyName, String key, T value) { try { byte[] payload = serializePayload(value); - batch.put(managedHandlesMap.get(columnFamilyName), key.getBytes(), payload); + batch.put(managedHandlesMap.get(columnFamilyName), getUTF8Bytes(key), payload); } catch (Exception e) { throw new HoodieException(e); } @@ -228,7 +231,7 @@ public void putInBatch(WriteBat public void put(String columnFamilyName, String key, T value) { try { byte[] payload = serializePayload(value); - getRocksDB().put(managedHandlesMap.get(columnFamilyName), key.getBytes(), payload); + getRocksDB().put(managedHandlesMap.get(columnFamilyName), getUTF8Bytes(key), payload); } catch (Exception e) { throw new HoodieException(e); } @@ -260,7 +263,7 @@ public void put(String columnFa */ public void deleteInBatch(WriteBatch batch, String columnFamilyName, String key) { try { - batch.delete(managedHandlesMap.get(columnFamilyName), key.getBytes()); + batch.delete(managedHandlesMap.get(columnFamilyName), getUTF8Bytes(key)); } catch (RocksDBException e) { throw new HoodieException(e); } @@ -289,7 +292,7 @@ public void deleteInBatch(WriteBatch batch, String colu */ public void delete(String columnFamilyName, String key) { try { - getRocksDB().delete(managedHandlesMap.get(columnFamilyName), key.getBytes()); + getRocksDB().delete(managedHandlesMap.get(columnFamilyName), getUTF8Bytes(key)); } catch (RocksDBException e) { throw new HoodieException(e); } @@ -319,7 +322,7 @@ public void delete(String columnFamilyName, K key) { public T get(String columnFamilyName, String key) { ValidationUtils.checkArgument(!closed); try { - byte[] val = getRocksDB().get(managedHandlesMap.get(columnFamilyName), key.getBytes()); + byte[] val = getRocksDB().get(managedHandlesMap.get(columnFamilyName), getUTF8Bytes(key)); return val == null ? null : SerializationUtils.deserialize(val); } catch (RocksDBException e) { throw new HoodieException(e); @@ -356,7 +359,7 @@ public Stream> prefixSearch(String colu long timeTakenMicro = 0; List> results = new LinkedList<>(); try (final RocksIterator it = getRocksDB().newIterator(managedHandlesMap.get(columnFamilyName))) { - it.seek(prefix.getBytes()); + it.seek(getUTF8Bytes(prefix)); while (it.isValid() && new String(it.key()).startsWith(prefix)) { long beginTs = System.nanoTime(); T val = SerializationUtils.deserialize(it.value()); @@ -392,7 +395,7 @@ public void prefixDelete(String columnFamilyName, Strin ValidationUtils.checkArgument(!closed); LOG.info("Prefix DELETE (query=" + prefix + ") on " + columnFamilyName); final RocksIterator it = getRocksDB().newIterator(managedHandlesMap.get(columnFamilyName)); - it.seek(prefix.getBytes()); + it.seek(getUTF8Bytes(prefix)); // Find first and last keys to be deleted String firstEntry = null; String lastEntry = null; @@ -409,9 +412,9 @@ public void prefixDelete(String columnFamilyName, Strin if (null != firstEntry) { try { // This will not delete the last entry - getRocksDB().deleteRange(managedHandlesMap.get(columnFamilyName), firstEntry.getBytes(), lastEntry.getBytes()); + getRocksDB().deleteRange(managedHandlesMap.get(columnFamilyName), getUTF8Bytes(firstEntry), getUTF8Bytes(lastEntry)); // Delete the last entry - getRocksDB().delete(lastEntry.getBytes()); + getRocksDB().delete(getUTF8Bytes(lastEntry)); } catch (RocksDBException e) { LOG.error("Got exception performing range delete"); throw new HoodieException(e); @@ -429,7 +432,7 @@ public void addColumnFamily(String columnFamilyName) { managedDescriptorMap.computeIfAbsent(columnFamilyName, colFamilyName -> { try { - ColumnFamilyDescriptor descriptor = getColumnFamilyDescriptor(colFamilyName.getBytes()); + ColumnFamilyDescriptor descriptor = getColumnFamilyDescriptor(StringUtils.getUTF8Bytes(colFamilyName)); ColumnFamilyHandle handle = getRocksDB().createColumnFamily(descriptor); managedHandlesMap.put(colFamilyName, handle); return descriptor; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java index eeaeb4df5bfe..2a87396005cf 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java @@ -27,10 +27,11 @@ import org.apache.hadoop.hbase.util.Bytes; import java.io.Serializable; -import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * A stateless Hash class which generates ID for the desired bit count. */ @@ -85,7 +86,7 @@ public String toString() { * @return Hash value for the message as byte array */ public static byte[] hash(final String message, final Size bits) { - return hash(message.getBytes(StandardCharsets.UTF_8), bits); + return hash(getUTF8Bytes(message), bits); } /** @@ -108,7 +109,7 @@ public static byte[] hash(final byte[] messageBytes, final Size bits) { } public static int getXXHash32(final String message, int hashSeed) { - return getXXHash32(message.getBytes(StandardCharsets.UTF_8), hashSeed); + return getXXHash32(getUTF8Bytes(message), hashSeed); } public static int getXXHash32(final byte[] message, int hashSeed) { diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java index db636720ec4d..74368dc2a815 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java @@ -46,6 +46,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.table.timeline.HoodieTimeline.SCHEMA_COMMIT_ACTION; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * {@link AbstractInternalSchemaStorageManager} implementation based on the schema files. @@ -85,7 +86,7 @@ public void persistHistorySchemaStr(String instantTime, String historySchemaStr) HoodieActiveTimeline timeline = getMetaClient().getActiveTimeline(); HoodieInstant hoodieInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, SCHEMA_COMMIT_ACTION, instantTime); timeline.createNewInstant(hoodieInstant); - byte[] writeContent = historySchemaStr.getBytes(StandardCharsets.UTF_8); + byte[] writeContent = getUTF8Bytes(historySchemaStr); timeline.transitionRequestedToInflight(hoodieInstant, Option.empty()); timeline.saveAsComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, hoodieInstant.getAction(), hoodieInstant.getTimestamp()), Option.of(writeContent)); LOG.info(String.format("persist history schema success on commit time: %s", instantTime)); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java index b4cc801ed96f..fead46d06948 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java @@ -24,10 +24,10 @@ import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.util.collection.ClosableIterator; -import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.VisibleForTesting; +import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; @@ -62,6 +62,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.util.CollectionUtils.toStream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.TypeUtils.unsafeCast; import static org.apache.hudi.common.util.ValidationUtils.checkState; @@ -154,8 +155,8 @@ public ClosableIterator> getRecordsByKeyPrefixIterat public String[] readMinMaxRecordKeys() { // NOTE: This access to reader is thread-safe HFileInfo fileInfo = getSharedHFileReader().getHFileInfo(); - return new String[]{new String(fileInfo.get(KEY_MIN_RECORD.getBytes())), - new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))}; + return new String[] {new String(fileInfo.get(getUTF8Bytes(KEY_MIN_RECORD))), + new String(fileInfo.get(getUTF8Bytes(KEY_MAX_RECORD)))}; } @Override @@ -169,7 +170,7 @@ public BloomFilter readBloomFilter() { byte[] bytes = new byte[buf.remaining()]; buf.get(bytes); return BloomFilterFactory.fromString(new String(bytes), - new String(fileInfo.get(KEY_BLOOM_FILTER_TYPE_CODE.getBytes()))); + new String(fileInfo.get(getUTF8Bytes(KEY_BLOOM_FILTER_TYPE_CODE)))); } catch (IOException e) { throw new HoodieException("Could not read bloom filter from " + path, e); } @@ -291,7 +292,7 @@ private HFile.Reader getHFileReader() { } private boolean isKeyAvailable(String key, HFileScanner keyScanner) throws IOException { - final KeyValue kv = new KeyValue(key.getBytes(), null, null, null); + final KeyValue kv = new KeyValue(getUTF8Bytes(key), null, null, null); return keyScanner.seekTo(kv) == 0; } @@ -299,7 +300,7 @@ private static Iterator getRecordByKeyPrefixIteratorInternal(HFil String keyPrefix, Schema writerSchema, Schema readerSchema) throws IOException { - KeyValue kv = new KeyValue(keyPrefix.getBytes(), null, null, null); + KeyValue kv = new KeyValue(getUTF8Bytes(keyPrefix), null, null, null); // NOTE: HFile persists both keys/values as bytes, therefore lexicographical sorted is // essentially employed @@ -377,7 +378,7 @@ public IndexedRecord next() { } private static Option fetchRecordByKeyInternal(HFileScanner scanner, String key, Schema writerSchema, Schema readerSchema) throws IOException { - KeyValue kv = new KeyValue(key.getBytes(), null, null, null); + KeyValue kv = new KeyValue(getUTF8Bytes(key), null, null, null); // NOTE: HFile persists both keys/values as bytes, therefore lexicographical sorted is // essentially employed // @@ -400,7 +401,7 @@ private static Option fetchRecordByKeyInternal(HFileScanner scann // key is found and the cursor is left where the key is found Cell c = scanner.getCell(); byte[] valueBytes = copyValueFromCell(c); - GenericRecord record = deserialize(key.getBytes(), valueBytes, writerSchema, readerSchema); + GenericRecord record = deserialize(getUTF8Bytes(key), valueBytes, writerSchema, readerSchema); return Option.of(record); } @@ -440,7 +441,7 @@ private static GenericRecord deserialize(final byte[] keyBytes, private static Schema fetchSchema(HFile.Reader reader) { HFileInfo fileInfo = reader.getHFileInfo(); - return new Schema.Parser().parse(new String(fileInfo.get(SCHEMA_KEY.getBytes()))); + return new Schema.Parser().parse(new String(fileInfo.get(getUTF8Bytes(SCHEMA_KEY)))); } private static byte[] copyKeyFromCell(Cell cell) { diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java index 5b66c04045b3..6c440e7c5596 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java @@ -25,7 +25,6 @@ import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.exception.HoodieDuplicateKeyException; import org.apache.avro.Schema; @@ -48,13 +47,16 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; +import static org.apache.hudi.common.util.StringUtils.EMPTY_STRING; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * HoodieHFileWriter writes IndexedRecords into an HFile. The record's key is used as the key and the * AVRO encoded record bytes are saved as the value. - * + *

* Limitations (compared to columnar formats like Parquet or ORC): - * 1. Records should be added in order of keys - * 2. There are no column stats + * 1. Records should be added in order of keys + * 2. There are no column stats */ public class HoodieAvroHFileWriter implements HoodieAvroFileWriter { @@ -110,7 +112,7 @@ public HoodieAvroHFileWriter(String instantTime, Path file, HoodieHFileConfig hf .withFileContext(context) .create(); - writer.appendFileInfo(HoodieAvroHFileReader.SCHEMA_KEY.getBytes(), schema.toString().getBytes()); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.SCHEMA_KEY), getUTF8Bytes(schema.toString())); this.prevRecordKey = ""; } @@ -144,7 +146,7 @@ public void writeAvro(String recordKey, IndexedRecord record) throws IOException boolean isKeyAvailable = (record.get(keyFieldPos) != null && !(record.get(keyFieldPos).toString().isEmpty())); if (isKeyAvailable) { Object originalKey = keyExcludedRecord.get(keyFieldPos); - keyExcludedRecord.put(keyFieldPos, StringUtils.EMPTY_STRING); + keyExcludedRecord.put(keyFieldPos, EMPTY_STRING); value = HoodieAvroUtils.avroToBytes(keyExcludedRecord); keyExcludedRecord.put(keyFieldPos, originalKey); isRecordSerialized = true; @@ -154,7 +156,7 @@ public void writeAvro(String recordKey, IndexedRecord record) throws IOException value = HoodieAvroUtils.avroToBytes((GenericRecord) record); } - KeyValue kv = new KeyValue(recordKey.getBytes(), null, null, value); + KeyValue kv = new KeyValue(getUTF8Bytes(recordKey), null, null, value); writer.append(kv); if (hfileConfig.useBloomFilter()) { @@ -177,14 +179,14 @@ public void close() throws IOException { if (maxRecordKey == null) { maxRecordKey = ""; } - writer.appendFileInfo(HoodieAvroHFileReader.KEY_MIN_RECORD.getBytes(), minRecordKey.getBytes()); - writer.appendFileInfo(HoodieAvroHFileReader.KEY_MAX_RECORD.getBytes(), maxRecordKey.getBytes()); - writer.appendFileInfo(HoodieAvroHFileReader.KEY_BLOOM_FILTER_TYPE_CODE.getBytes(), - bloomFilter.getBloomFilterTypeCode().toString().getBytes()); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.KEY_MIN_RECORD), getUTF8Bytes(minRecordKey)); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.KEY_MAX_RECORD), getUTF8Bytes(maxRecordKey)); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.KEY_BLOOM_FILTER_TYPE_CODE), + getUTF8Bytes(bloomFilter.getBloomFilterTypeCode().toString())); writer.appendMetaBlock(HoodieAvroHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() { @Override public void write(DataOutput out) throws IOException { - out.write(bloomFilter.serializeToString().getBytes()); + out.write(getUTF8Bytes(bloomFilter.serializeToString())); } @Override diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java index f0c796ff6c6b..77f2a5cc72d6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java @@ -18,11 +18,6 @@ package org.apache.hudi.io.storage; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hudi.avro.HoodieBloomFilterWriteSupport; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter; @@ -31,11 +26,17 @@ import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.AvroOrcUtils; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.orc.OrcFile; import org.apache.orc.TypeDescription; import org.apache.orc.Writer; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import java.io.Closeable; import java.io.IOException; @@ -44,6 +45,7 @@ import java.util.concurrent.atomic.AtomicLong; import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; public class HoodieAvroOrcWriter implements HoodieAvroFileWriter, Closeable { private static final AtomicLong RECORD_INDEX = new AtomicLong(1); @@ -149,16 +151,16 @@ public void close() throws IOException { if (orcConfig.useBloomFilter()) { final BloomFilter bloomFilter = orcConfig.getBloomFilter(); - writer.addUserMetadata(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, ByteBuffer.wrap(bloomFilter.serializeToString().getBytes())); + writer.addUserMetadata(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, ByteBuffer.wrap(getUTF8Bytes(bloomFilter.serializeToString()))); if (minRecordKey != null && maxRecordKey != null) { - writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER, ByteBuffer.wrap(minRecordKey.getBytes())); - writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER, ByteBuffer.wrap(maxRecordKey.getBytes())); + writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER, ByteBuffer.wrap(getUTF8Bytes(minRecordKey))); + writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER, ByteBuffer.wrap(getUTF8Bytes(maxRecordKey))); } if (bloomFilter.getBloomFilterTypeCode().name().contains(HoodieDynamicBoundedBloomFilter.TYPE_CODE_PREFIX)) { - writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE, ByteBuffer.wrap(bloomFilter.getBloomFilterTypeCode().name().getBytes())); + writer.addUserMetadata(HoodieBloomFilterWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE, ByteBuffer.wrap(getUTF8Bytes(bloomFilter.getBloomFilterTypeCode().name()))); } } - writer.addUserMetadata(HoodieOrcConfig.AVRO_SCHEMA_METADATA_KEY, ByteBuffer.wrap(avroSchema.toString().getBytes())); + writer.addUserMetadata(HoodieOrcConfig.AVRO_SCHEMA_METADATA_KEY, ByteBuffer.wrap(getUTF8Bytes(avroSchema.toString()))); writer.close(); } diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index 62b023258329..acb9dc46446c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -119,6 +119,7 @@ import static org.apache.hudi.common.config.HoodieCommonConfig.MAX_MEMORY_FOR_COMPACTION; import static org.apache.hudi.common.config.HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE; import static org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator.MILLIS_INSTANT_ID_LENGTH; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty; import static org.apache.hudi.common.util.ValidationUtils.checkState; import static org.apache.hudi.metadata.HoodieMetadataPayload.RECORD_INDEX_MISSING_FILEINDEX_FALLBACK; @@ -468,7 +469,7 @@ public static HoodieData convertMetadataToBloomFilterRecords( LOG.error("Failed to read bloom filter for " + writeFilePath); return Collections.emptyListIterator(); } - ByteBuffer bloomByteBuffer = ByteBuffer.wrap(fileBloomFilter.serializeToString().getBytes()); + ByteBuffer bloomByteBuffer = ByteBuffer.wrap(getUTF8Bytes(fileBloomFilter.serializeToString())); HoodieRecord record = HoodieMetadataPayload.createBloomFilterMetadataRecord( partition, fileName, instantTime, recordsGenerationParams.getBloomFilterType(), bloomByteBuffer, false); return Collections.singletonList(record).iterator(); @@ -896,7 +897,7 @@ private static ByteBuffer readBloomFilter(Configuration conf, Path filePath) thr if (fileBloomFilter == null) { return null; } - return ByteBuffer.wrap(fileBloomFilter.serializeToString().getBytes()); + return ByteBuffer.wrap(getUTF8Bytes(fileBloomFilter.serializeToString())); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index af977bde76f1..517590a81e03 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -73,6 +73,7 @@ import static org.apache.hudi.avro.HoodieAvroUtils.sanitizeName; import static org.apache.hudi.avro.HoodieAvroUtils.unwrapAvroValueWrapper; import static org.apache.hudi.avro.HoodieAvroUtils.wrapValueIntoAvro; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -528,7 +529,7 @@ public void testWrapAndUnwrapAvroValues() throws IOException { expectedWrapperClass.put("bytesField", BytesWrapper.class); record.put("stringField", "abcdefghijk"); expectedWrapperClass.put("stringField", StringWrapper.class); - record.put("decimalField", ByteBuffer.wrap("9213032.4966".getBytes())); + record.put("decimalField", ByteBuffer.wrap(getUTF8Bytes("9213032.4966"))); expectedWrapperClass.put("decimalField", BytesWrapper.class); record.put("timeMillisField", 57996136); expectedWrapperClass.put("timeMillisField", IntWrapper.class); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java index 6d298c2edc44..75c09024f682 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java @@ -26,7 +26,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -35,6 +34,7 @@ import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; class TestHoodieWrapperFileSystem { @@ -70,8 +70,8 @@ public void testCreateImmutableFileInPath() throws IOException { Path testFile = new Path(basePath + Path.SEPARATOR + "clean.00000001"); // create same commit twice - fs.createImmutableFileInPath(testFile, Option.of(testContent.getBytes())); - fs.createImmutableFileInPath(testFile, Option.of(testContent.getBytes())); + fs.createImmutableFileInPath(testFile, Option.of(getUTF8Bytes(testContent))); + fs.createImmutableFileInPath(testFile, Option.of(getUTF8Bytes(testContent))); assertEquals(1, fs.listStatus(new Path(basePath)).length, "create same file twice should only have one file exists, files: " + fs.listStatus(new Path(basePath))); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java index 190ad398e1b6..cd3bdd1cddbb 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java @@ -50,6 +50,7 @@ import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getPhantomFile; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getRandomOuterInMemPath; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -160,7 +161,7 @@ private Set getRandomValidRowIds(int count) { } private byte[] getSomeKey(int rowId) { - KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, rowId).getBytes(), + KeyValue kv = new KeyValue(getUTF8Bytes(String.format(LOCAL_FORMATTER, rowId)), Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put); return kv.getKey(); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index 601f83101c9b..2f94f6cb8636 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -112,6 +112,7 @@ import static org.apache.hudi.common.testutils.HoodieTestUtils.shouldUseExternalHdfs; import static org.apache.hudi.common.testutils.HoodieTestUtils.useExternalHdfs; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSimpleSchema; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -968,7 +969,7 @@ public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxExcep // Write out a length that does not confirm with the content outputStream.writeLong(400); // Write out incomplete content - outputStream.write("something-random".getBytes()); + outputStream.write(getUTF8Bytes("something-random")); outputStream.flush(); outputStream.close(); @@ -999,7 +1000,7 @@ public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxExcep // Write out a length that does not confirm with the content outputStream.writeLong(500); // Write out some bytes - outputStream.write("something-else-random".getBytes()); + outputStream.write(getUTF8Bytes("something-else-random")); outputStream.flush(); outputStream.close(); @@ -1118,7 +1119,7 @@ public void testValidateCorruptBlockEndPosition() throws IOException, URISyntaxE // Write out a length that does not confirm with the content outputStream.writeLong(400); // Write out incomplete content - outputStream.write("something-random".getBytes()); + outputStream.write(getUTF8Bytes("something-random")); // get corrupt block end position long corruptBlockEndPos = outputStream.getPos(); outputStream.flush(); @@ -1297,8 +1298,8 @@ public void testAvroLogRecordReaderWithFailedPartialBlock(ExternalSpillableMap.D // Write out some header outputStream.write(HoodieLogBlock.getLogMetadataBytes(header)); - outputStream.writeLong("something-random".getBytes().length); - outputStream.write("something-random".getBytes()); + outputStream.writeLong(getUTF8Bytes("something-random").length); + outputStream.write(getUTF8Bytes("something-random")); outputStream.flush(); outputStream.close(); @@ -2594,7 +2595,7 @@ public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily) // Write out some metadata // TODO : test for failure to write metadata - NA ? outputStream.write(HoodieLogBlock.getLogMetadataBytes(header)); - outputStream.write("something-random".getBytes()); + outputStream.write(getUTF8Bytes("something-random")); outputStream.flush(); outputStream.close(); @@ -2952,7 +2953,7 @@ private HoodieLogFormat.Reader createCorruptedFile(String fileId) throws Excepti // Write out a length that does not confirm with the content outputStream.writeLong(400); // Write out incomplete content - outputStream.write("something-random".getBytes()); + outputStream.write(getUTF8Bytes("something-random")); outputStream.flush(); outputStream.close(); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java index 54eca3c6d05d..945a0d764066 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java @@ -43,6 +43,7 @@ import java.util.Objects; import java.util.Properties; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; @@ -177,20 +178,20 @@ public void testMergeWithToastedValues() throws IOException { GenericRecord oldVal = new GenericData.Record(avroSchema); oldVal.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, 100L); oldVal.put("string_col", "valid string value"); - oldVal.put("byte_col", ByteBuffer.wrap("valid byte value".getBytes())); + oldVal.put("byte_col", ByteBuffer.wrap(getUTF8Bytes("valid byte value"))); oldVal.put("string_null_col_1", "valid string value"); - oldVal.put("byte_null_col_1", ByteBuffer.wrap("valid byte value".getBytes())); + oldVal.put("byte_null_col_1", ByteBuffer.wrap(getUTF8Bytes("valid byte value"))); oldVal.put("string_null_col_2", null); oldVal.put("byte_null_col_2", null); GenericRecord newVal = new GenericData.Record(avroSchema); newVal.put(DebeziumConstants.FLATTENED_LSN_COL_NAME, 105L); newVal.put("string_col", PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE); - newVal.put("byte_col", ByteBuffer.wrap(PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE.getBytes())); + newVal.put("byte_col", ByteBuffer.wrap(getUTF8Bytes(PostgresDebeziumAvroPayload.DEBEZIUM_TOASTED_VALUE))); newVal.put("string_null_col_1", null); newVal.put("byte_null_col_1", null); newVal.put("string_null_col_2", "valid string value"); - newVal.put("byte_null_col_2", ByteBuffer.wrap("valid byte value".getBytes())); + newVal.put("byte_null_col_2", ByteBuffer.wrap(getUTF8Bytes("valid byte value"))); PostgresDebeziumAvroPayload payload = new PostgresDebeziumAvroPayload(Option.of(newVal)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java index 9f780727f11d..decdb2d7d246 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java @@ -31,6 +31,7 @@ import java.io.IOException; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -73,12 +74,12 @@ public void checkSerDe() { HoodieActiveTimeline commitTimeline = deserializedMetaClient.getActiveTimeline(); HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); commitTimeline.createNewInstant(instant); - commitTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes())); + commitTimeline.saveAsComplete(instant, Option.of(getUTF8Bytes("test-detail"))); commitTimeline = commitTimeline.reload(); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); assertEquals(completedInstant, commitTimeline.getInstantsAsStream().findFirst().get(), "Commit should be 1 and completed"); - assertArrayEquals("test-detail".getBytes(), commitTimeline.getInstantDetails(completedInstant).get(), + assertArrayEquals(getUTF8Bytes("test-detail"), commitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\""); } @@ -90,7 +91,7 @@ public void checkCommitTimeline() { HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); activeTimeline.createNewInstant(instant); - activeTimeline.saveAsComplete(instant, Option.of("test-detail".getBytes())); + activeTimeline.saveAsComplete(instant, Option.of(getUTF8Bytes("test-detail"))); // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached activeTimeline = metaClient.getActiveTimeline(); @@ -103,7 +104,7 @@ public void checkCommitTimeline() { assertFalse(activeCommitTimeline.empty(), "Should be the 1 commit we made"); assertEquals(completedInstant, activeCommitTimeline.getInstantsAsStream().findFirst().get(), "Commit should be 1"); - assertArrayEquals("test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get(), + assertArrayEquals(getUTF8Bytes("test-detail"), activeCommitTimeline.getInstantDetails(completedInstant).get(), "Commit value should be \"test-detail\""); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java index 21251afec3ce..842366940dac 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java @@ -51,7 +51,6 @@ import org.junit.jupiter.params.provider.EnumSource; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; @@ -75,6 +74,7 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION; import static org.apache.hudi.common.table.timeline.HoodieTimeline.SAVEPOINT_ACTION; import static org.apache.hudi.common.table.timeline.TimelineUtils.handleHollowCommitIfNeeded; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -523,7 +523,7 @@ private byte[] getCommitMetadata(String basePath, String partition, String commi for (Map.Entry extraEntries : extraMetadata.entrySet()) { commit.addMetadata(extraEntries.getKey(), extraEntries.getValue()); } - return commit.toJsonString().getBytes(StandardCharsets.UTF_8); + return getUTF8Bytes(commit.toJsonString()); } private byte[] getReplaceCommitMetadata(String basePath, String commitTs, String replacePartition, int replaceCount, @@ -550,7 +550,7 @@ private byte[] getReplaceCommitMetadata(String basePath, String commitTs, String for (Map.Entry extraEntries : extraMetadata.entrySet()) { commit.addMetadata(extraEntries.getKey(), extraEntries.getValue()); } - return commit.toJsonString().getBytes(StandardCharsets.UTF_8); + return getUTF8Bytes(commit.toJsonString()); } private Option getCleanMetadata(String partition, String time) throws IOException { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java index 06afc6fd5d30..86b05912a624 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java @@ -35,7 +35,6 @@ import org.junit.jupiter.api.Test; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; import java.util.Collections; @@ -56,6 +55,7 @@ import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0; import static org.apache.hudi.common.testutils.Assertions.assertStreamEquals; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -215,7 +215,7 @@ public void testAllowTempCommit() { HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); timeline.createNewInstant(instant1); - byte[] data = "commit".getBytes(StandardCharsets.UTF_8); + byte[] data = getUTF8Bytes("commit"); timeline.saveAsComplete(new HoodieInstant(true, instant1.getAction(), instant1.getTimestamp()), Option.of(data)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java index 8edcadc383cc..de5c71ea17af 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFSViewWithClustering.java @@ -37,7 +37,6 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -48,6 +47,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -146,7 +146,7 @@ public void testReplaceFileIdIsExcludedInView() throws IOException { HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline(); HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime1); - saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant1, Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); refreshFsView(); assertEquals(0, roView.getLatestBaseFiles(partitionPath1) .filter(dfile -> dfile.getFileId().equals(fileId1)).count()); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java index d908c1b0949d..695f4fc03b3a 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java @@ -73,7 +73,6 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; @@ -88,6 +87,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -180,7 +180,7 @@ public void testCloseHoodieTableFileSystemView() throws Exception { saveAsComplete(commitTimeline, instant1, Option.empty()); saveAsComplete(commitTimeline, instant2, Option.empty()); saveAsComplete(commitTimeline, clusteringInstant3, Option.empty()); - saveAsComplete(commitTimeline, clusteringInstant4, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, clusteringInstant4, Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); refreshFsView(); @@ -1432,7 +1432,7 @@ public void testReplaceWithTimeTravel() throws IOException { CommitUtils.buildMetadata(Collections.emptyList(), partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION); commitTimeline = metaClient.getActiveTimeline(); HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2); - saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant2, Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); //make sure view doesn't include fileId1 refreshFsView(); @@ -1519,7 +1519,7 @@ public void testReplaceFileIdIsExcludedInView() throws IOException { HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline(); HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime1); - saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant1, Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); refreshFsView(); assertEquals(0, roView.getLatestBaseFiles(partitionPath1) .filter(dfile -> dfile.getFileId().equals(fileId1)).count()); @@ -1688,7 +1688,7 @@ public void testHoodieTableFileSystemViewWithPendingClustering() throws IOExcept HoodieCommitMetadata commitMetadata1 = CommitUtils.buildMetadata(writeStats1, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION); - saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata1.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant1, Option.of(getUTF8Bytes(commitMetadata1.toJsonString()))); commitTimeline.reload(); // replace commit @@ -1711,7 +1711,7 @@ public void testHoodieTableFileSystemViewWithPendingClustering() throws IOExcept HoodieCommitMetadata commitMetadata2 = CommitUtils.buildMetadata(writeStats2, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION); - saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata2.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant2, Option.of(getUTF8Bytes(commitMetadata2.toJsonString()))); // another insert commit String commitTime3 = "3"; @@ -1727,7 +1727,7 @@ public void testHoodieTableFileSystemViewWithPendingClustering() throws IOExcept List writeStats3 = buildWriteStats(partitionToFile3, commitTime3); HoodieCommitMetadata commitMetadata3 = CommitUtils.buildMetadata(writeStats3, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION); - saveAsComplete(commitTimeline, instant3, Option.of(commitMetadata3.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant3, Option.of(getUTF8Bytes(commitMetadata3.toJsonString()))); metaClient.reloadActiveTimeline(); refreshFsView(); @@ -1853,7 +1853,7 @@ public void testPendingMajorAndMinorCompactionOperations() throws Exception { commitMetadata.addWriteStat(partitionPath, getHoodieWriteStat(partitionPath, fileId1, logFileName1)); commitMetadata.addWriteStat(partitionPath, getHoodieWriteStat(partitionPath, fileId2, logFileName2)); HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, commitTime1); - saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant1, Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); SyncableFileSystemView fileSystemView = getFileSystemView(metaClient.reloadActiveTimeline(), true); @@ -1872,7 +1872,7 @@ public void testPendingMajorAndMinorCompactionOperations() throws Exception { commitMetadata.addWriteStat(partitionPath, getHoodieWriteStat(partitionPath, fileId1, logFileName3)); HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, commitTime2); - saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + saveAsComplete(commitTimeline, instant2, Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); // Verify file system view after 2nd commit. verifyFileSystemView(partitionPath, expectedState, fileSystemView); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java index 9b56851f3e3e..162846da534d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java @@ -60,7 +60,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -77,6 +76,7 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION; import static org.apache.hudi.common.table.timeline.HoodieTimeline.LOG_COMPACTION_ACTION; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -247,7 +247,7 @@ public void testIngestion() throws IOException { new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs)); metaClient.getActiveTimeline().saveAsComplete( new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); view.sync(); assertTrue(view.getLastInstant().isPresent()); @@ -290,7 +290,7 @@ public void testReplaceCommits() throws IOException { new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs)); metaClient.getActiveTimeline().saveAsComplete( new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); view.sync(); assertTrue(view.getLastInstant().isPresent()); @@ -983,7 +983,7 @@ private List addInstant(HoodieTableMetaClient metaClient, String instant deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION, instant); metaClient.getActiveTimeline().createNewInstant(inflightInstant); metaClient.getActiveTimeline().saveAsComplete(inflightInstant, - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); /* // Delete pending compaction if present metaClient.getFs().delete(new Path(metaClient.getMetaPath(), @@ -1010,7 +1010,7 @@ private List addReplaceInstant(HoodieTableMetaClient metaClient, String writeStats.forEach(e -> replaceCommitMetadata.addWriteStat(e.getKey(), e.getValue())); replaceCommitMetadata.setPartitionToReplaceFileIds(partitionToReplaceFileIds); metaClient.getActiveTimeline().saveAsComplete(inflightInstant, - Option.of(replaceCommitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(replaceCommitMetadata.toJsonString()))); return writeStats.stream().map(e -> e.getValue().getPath()).collect(Collectors.toList()); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java index 4ace66779ec6..c3008fd171a8 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileCreateUtils.java @@ -48,7 +48,6 @@ import java.io.IOException; import java.io.RandomAccessFile; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -67,6 +66,7 @@ import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeRestoreMetadata; import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeRollbackMetadata; import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeRollbackPlan; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Utils for creating dummy Hudi files in testing. @@ -121,7 +121,7 @@ private static void createMetaFile(String basePath, String instantTime, String s } private static void createMetaFile(String basePath, String instantTime, String suffix) throws IOException { - createMetaFile(basePath, instantTime, suffix, "".getBytes()); + createMetaFile(basePath, instantTime, suffix, getUTF8Bytes("")); } private static void createMetaFile(String basePath, String instantTime, String suffix, byte[] content) throws IOException { @@ -160,7 +160,7 @@ public static void createCommit(String basePath, String instantTime) throws IOEx public static void createCommit(String basePath, String instantTime, Option metadata) throws IOException { if (metadata.isPresent()) { createMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION, - metadata.get().toJsonString().getBytes(StandardCharsets.UTF_8)); + getUTF8Bytes(metadata.get().toJsonString())); } else { createMetaFile(basePath, instantTime, HoodieTimeline.COMMIT_EXTENSION); } @@ -183,7 +183,7 @@ public static void createInflightCommit(String basePath, String instantTime) thr } public static void createDeltaCommit(String basePath, String instantTime, HoodieCommitMetadata metadata) throws IOException { - createMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION, metadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + createMetaFile(basePath, instantTime, HoodieTimeline.DELTA_COMMIT_EXTENSION, getUTF8Bytes(metadata.toJsonString())); } public static void createDeltaCommit(String basePath, String instantTime) throws IOException { @@ -207,7 +207,7 @@ public static void createInflightReplaceCommit(String basePath, String instantTi } public static void createReplaceCommit(String basePath, String instantTime, HoodieReplaceCommitMetadata metadata) throws IOException { - createMetaFile(basePath, instantTime, HoodieTimeline.REPLACE_COMMIT_EXTENSION, metadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + createMetaFile(basePath, instantTime, HoodieTimeline.REPLACE_COMMIT_EXTENSION, getUTF8Bytes(metadata.toJsonString())); } public static void createRequestedReplaceCommit(String basePath, String instantTime, Option requestedReplaceMetadata) throws IOException { @@ -220,7 +220,7 @@ public static void createRequestedReplaceCommit(String basePath, String instantT public static void createInflightReplaceCommit(String basePath, String instantTime, Option inflightReplaceMetadata) throws IOException { if (inflightReplaceMetadata.isPresent()) { - createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_REPLACE_COMMIT_EXTENSION, inflightReplaceMetadata.get().toJsonString().getBytes(StandardCharsets.UTF_8)); + createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_REPLACE_COMMIT_EXTENSION, getUTF8Bytes(inflightReplaceMetadata.get().toJsonString())); } else { createMetaFile(basePath, instantTime, HoodieTimeline.INFLIGHT_REPLACE_COMMIT_EXTENSION); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java index cd3755d26c81..26a85a6f806d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java @@ -60,7 +60,6 @@ import java.lang.reflect.InvocationTargetException; import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -81,6 +80,7 @@ import java.util.stream.IntStream; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.ValidationUtils.checkState; /** @@ -387,7 +387,7 @@ private void generateExtraSchemaValues(GenericRecord rec) { rec.put("distance_in_meters", rand.nextInt()); rec.put("seconds_since_epoch", rand.nextLong()); rec.put("weight", rand.nextFloat()); - byte[] bytes = "Canada".getBytes(); + byte[] bytes = getUTF8Bytes("Canada"); rec.put("nation", ByteBuffer.wrap(bytes)); long randomMillis = genRandomTimeMillis(rand); Instant instant = Instant.ofEpochMilli(randomMillis); @@ -525,7 +525,7 @@ private static void createCommitFile(String basePath, String instantTime, Config private static void createMetadataFile(String f, String basePath, Configuration configuration, HoodieCommitMetadata commitMetadata) { try { - createMetadataFile(f, basePath, configuration, commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + createMetadataFile(f, basePath, configuration, getUTF8Bytes(commitMetadata.toJsonString())); } catch (IOException e) { throw new HoodieIOException(e.getMessage(), e); } @@ -618,7 +618,7 @@ public static void createSavepointFile(String basePath, String instantTime, Conf try (FSDataOutputStream os = fs.create(commitFile, true)) { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); // Write empty commit metadata - os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + os.writeBytes(new String(getUTF8Bytes(commitMetadata.toJsonString()))); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java index f9a67a137106..de262ce0d648 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java @@ -52,6 +52,7 @@ import static org.apache.hudi.avro.HoodieAvroUtils.createHoodieRecordFromAvro; import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.AVRO_SCHEMA; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Example row change event based on some example data used by testcases. The data avro schema is @@ -245,7 +246,7 @@ private byte[] compressData(String jsonData) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DeflaterOutputStream dos = new DeflaterOutputStream(baos, new Deflater(Deflater.BEST_COMPRESSION), true); try { - dos.write(jsonData.getBytes()); + dos.write(getUTF8Bytes(jsonData)); } finally { dos.flush(); dos.close(); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/ZookeeperTestService.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/ZookeeperTestService.java index bed846393ccf..b7e090174d2f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/ZookeeperTestService.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/minicluster/ZookeeperTestService.java @@ -37,6 +37,8 @@ import java.nio.file.Files; import java.util.Objects; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * A Zookeeper minicluster service implementation. *

@@ -174,7 +176,7 @@ private static boolean waitForServerDown(int port, long timeout) { try { try (Socket sock = new Socket("localhost", port)) { OutputStream outstream = sock.getOutputStream(); - outstream.write("stat".getBytes()); + outstream.write(getUTF8Bytes("stat")); outstream.flush(); } } catch (IOException e) { @@ -202,7 +204,7 @@ private static boolean waitForServerUp(String hostname, int port, long timeout) BufferedReader reader = null; try { OutputStream outstream = sock.getOutputStream(); - outstream.write("stat".getBytes()); + outstream.write(getUTF8Bytes("stat")); outstream.flush(); Reader isr = new InputStreamReader(sock.getInputStream()); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java index 8cee7a24541b..6648a0292dff 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java @@ -20,9 +20,9 @@ import org.junit.jupiter.api.Test; -import java.nio.charset.StandardCharsets; import java.util.UUID; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertArrayEquals; /** @@ -38,7 +38,7 @@ public void testCodec() { for (int i = 0; i < times; i++) { - byte[] originalData = uuid.toString().getBytes(StandardCharsets.UTF_8); + byte[] originalData = getUTF8Bytes(uuid.toString()); String encodeData = Base64CodecUtil.encode(originalData); byte[] decodeData = Base64CodecUtil.decode(encodeData); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java index 91fe5bf30dc9..720f2610e139 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestFileIOUtils.java @@ -26,12 +26,12 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.Field; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -63,17 +63,17 @@ public void testMkdirAndDelete() throws IOException { @Test public void testInputStreamReads() throws IOException { String msg = "hudi rocks!"; - ByteArrayInputStream inputStream = new ByteArrayInputStream(msg.getBytes(StandardCharsets.UTF_8)); + ByteArrayInputStream inputStream = new ByteArrayInputStream(getUTF8Bytes(msg)); assertEquals(msg, FileIOUtils.readAsUTFString(inputStream)); - inputStream = new ByteArrayInputStream(msg.getBytes(StandardCharsets.UTF_8)); + inputStream = new ByteArrayInputStream(getUTF8Bytes(msg)); assertEquals(msg.length(), FileIOUtils.readAsByteArray(inputStream).length); } @Test public void testReadAsUTFStringLines() { String content = "a\nb\nc"; - List expectedLines = Arrays.stream(new String[]{"a", "b", "c"}).collect(Collectors.toList()); - ByteArrayInputStream inputStream = new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)); + List expectedLines = Arrays.stream(new String[] {"a", "b", "c"}).collect(Collectors.toList()); + ByteArrayInputStream inputStream = new ByteArrayInputStream(getUTF8Bytes(content)); assertEquals(expectedLines, FileIOUtils.readAsUTFStringLines(inputStream)); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java index 5801f7074f33..b439d8167247 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestOrcReaderIterator.java @@ -36,10 +36,10 @@ import org.junit.jupiter.api.Test; import java.io.File; -import java.nio.charset.StandardCharsets; import java.util.Iterator; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -70,9 +70,9 @@ public void testOrcIteratorReadData() throws Exception { BytesColumnVector colorColumns = (BytesColumnVector) batch.cols[2]; for (int r = 0; r < 5; ++r) { int row = batch.size++; - byte[] name = ("name" + r).getBytes(StandardCharsets.UTF_8); + byte[] name = getUTF8Bytes("name" + r); nameColumns.setVal(row, name); - byte[] color = ("color" + r).getBytes(StandardCharsets.UTF_8); + byte[] color = getUTF8Bytes("color" + r); colorColumns.setVal(row, color); numberColumns.vector[row] = r; } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java index 1548fd4a0197..54985056bf08 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestStringUtils.java @@ -25,6 +25,7 @@ import java.util.Arrays; import java.util.Collections; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNull; @@ -70,8 +71,8 @@ public void testStringObjToString() { assertEquals("Test String", StringUtils.objToString("Test String")); // assert byte buffer - ByteBuffer byteBuffer1 = ByteBuffer.wrap("1234".getBytes()); - ByteBuffer byteBuffer2 = ByteBuffer.wrap("5678".getBytes()); + ByteBuffer byteBuffer1 = ByteBuffer.wrap(getUTF8Bytes("1234")); + ByteBuffer byteBuffer2 = ByteBuffer.wrap(getUTF8Bytes("5678")); // assert equal because ByteBuffer has overwritten the toString to return a summary string assertEquals(byteBuffer1.toString(), byteBuffer2.toString()); // assert not equal @@ -103,7 +104,7 @@ public void testSplit() { @Test public void testHexString() { String str = "abcd"; - assertEquals(StringUtils.toHexString(str.getBytes()), toHexString(str.getBytes())); + assertEquals(StringUtils.toHexString(getUTF8Bytes(str)), toHexString(getUTF8Bytes(str))); } private static String toHexString(byte[] bytes) { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestHashID.java b/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestHashID.java index 3bf316cc4c18..1ab9d82b2b92 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestHashID.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/hash/TestHashID.java @@ -25,12 +25,12 @@ import javax.xml.bind.DatatypeConverter; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Random; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -53,7 +53,7 @@ public void testHashForByteInput(HashID.Size size) { .limit((32 + (i * 4))) .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) .toString(); - final byte[] originalData = message.getBytes(StandardCharsets.UTF_8); + final byte[] originalData = getUTF8Bytes(message); final byte[] hashBytes = HashID.hash(originalData, size); assertEquals(hashBytes.length, size.byteSize()); } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java index 75d3b30abd1e..d8e67fb7217a 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaRegistryProvider.java @@ -30,7 +30,6 @@ import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; -import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Collections; import java.util.regex.Matcher; @@ -40,6 +39,7 @@ import static org.apache.hudi.common.util.ConfigUtils.SCHEMAPROVIDER_CONFIG_PREFIX; import static org.apache.hudi.common.util.ConfigUtils.checkRequiredConfigProperties; import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Obtains latest schema from the Confluent/Kafka schema-registry. @@ -97,7 +97,7 @@ public String fetchSchemaFromRegistry(String registryUrl) throws IOException { } protected void setAuthorizationHeader(String creds, HttpURLConnection connection) { - String encodedAuth = Base64.getEncoder().encodeToString(creds.getBytes(StandardCharsets.UTF_8)); + String encodedAuth = Base64.getEncoder().encodeToString(getUTF8Bytes(creds)); connection.setRequestProperty("Authorization", "Basic " + encodedAuth); } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/JsonDeserializationFunction.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/JsonDeserializationFunction.java index ae5a45d7c214..5be0c3ce84b6 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/JsonDeserializationFunction.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/JsonDeserializationFunction.java @@ -27,7 +27,7 @@ import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; import org.apache.flink.table.types.logical.RowType; -import java.nio.charset.StandardCharsets; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Wrapper function that manages the lifecycle of the JSON deserialization schema. @@ -68,6 +68,6 @@ public void open(Configuration parameters) throws Exception { @Override public RowData map(String record) throws Exception { - return deserializationSchema.deserialize(record.getBytes(StandardCharsets.UTF_8)); + return deserializationSchema.deserialize(getUTF8Bytes(record)); } } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java index 216fa3f0f336..6c4aae3cd139 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StringToRowDataConverter.java @@ -29,12 +29,13 @@ import org.apache.flink.table.types.logical.TimestampType; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDate; import java.time.temporal.ChronoUnit; import java.util.Arrays; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * A converter that converts a string array into internal row data fields. * The converter is designed to be stateful(not pure stateless tool) @@ -101,7 +102,7 @@ private static Converter getConverter(LogicalType logicalType) { return StringData::fromString; case BINARY: case VARBINARY: - return field -> field.getBytes(StandardCharsets.UTF_8); + return field -> getUTF8Bytes(field); case DECIMAL: DecimalType decimalType = (DecimalType) logicalType; return field -> diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java index db77af5fc7d0..1e57ea8de83d 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java @@ -44,7 +44,6 @@ import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -53,6 +52,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertIterableEquals; @@ -138,8 +138,8 @@ void testFilterInstantsByCondition() throws IOException { "", HoodieTimeline.REPLACE_COMMIT_ACTION); timeline.transitionReplaceInflightToComplete( - HoodieTimeline.getReplaceCommitInflightInstant(commit3.getTimestamp()), - Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + HoodieTimeline.getReplaceCommitInflightInstant(commit3.getTimestamp()), + Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); timeline = timeline.reload(); conf.set(FlinkOptions.READ_END_COMMIT, "3"); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java index 50816a298de0..c9eb5ac54959 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java @@ -30,7 +30,6 @@ import org.junit.jupiter.api.Test; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; @@ -40,6 +39,7 @@ import java.util.Arrays; import java.util.List; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -117,7 +117,7 @@ void getValueFromLiteralForNonNull() { dataList.add(new Double(6.0)); // f_double dataList.add(new Boolean(true)); // f_boolean dataList.add(new BigDecimal(3.0)); // f_decimal - dataList.add("hudi".getBytes(StandardCharsets.UTF_8)); // f_bytes + dataList.add(getUTF8Bytes("hudi")); // f_bytes dataList.add("hudi ok"); // f_string dataList.add(LocalTime.of(1, 11, 11)); // f_time dataList.add(LocalDate.of(2023, 1, 2)); // f_date diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java index 1e951dc3cb00..5fa78e3647f7 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java @@ -37,8 +37,7 @@ import javax.annotation.Nullable; -import java.nio.charset.StandardCharsets; - +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -133,6 +132,6 @@ public static HoodieCommitMetadata deleteInstantFile(HoodieTableMetaClient metaC public static void saveInstantAsComplete(HoodieTableMetaClient metaClient, HoodieInstant instant, HoodieCommitMetadata metadata) throws Exception { metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, instant.getAction(), instant.getTimestamp()), - Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(metadata.toJsonString()))); } } diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 622f499b64bb..ac9ca59d574d 100644 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -82,7 +82,6 @@ import java.io.IOException; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.time.LocalDate; import java.time.LocalDateTime; @@ -93,6 +92,7 @@ import java.util.stream.Collectors; import static org.apache.flink.table.runtime.functions.SqlDateTimeUtils.dateToInternal; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.parquet.Preconditions.checkArgument; /** @@ -190,7 +190,7 @@ private static ColumnVector createVectorFromConstant( } else { bsv.fill(value instanceof byte[] ? (byte[]) value - : value.toString().getBytes(StandardCharsets.UTF_8)); + : getUTF8Bytes(value.toString())); } return bsv; case BOOLEAN: diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index c56109426554..76aa827a84a6 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -81,7 +81,6 @@ import java.io.IOException; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.time.LocalDate; import java.time.LocalDateTime; @@ -92,6 +91,7 @@ import java.util.stream.Collectors; import static org.apache.flink.table.runtime.functions.SqlDateTimeUtils.dateToInternal; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.parquet.Preconditions.checkArgument; /** @@ -189,7 +189,7 @@ private static ColumnVector createVectorFromConstant( } else { bsv.fill(value instanceof byte[] ? (byte[]) value - : value.toString().getBytes(StandardCharsets.UTF_8)); + : getUTF8Bytes(value.toString())); } return bsv; case BOOLEAN: diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 6211416631bf..1b636c63b2f6 100644 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -81,7 +81,6 @@ import java.io.IOException; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.time.LocalDate; import java.time.LocalDateTime; @@ -92,6 +91,7 @@ import java.util.stream.Collectors; import static org.apache.flink.table.utils.DateTimeUtils.toInternal; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.parquet.Preconditions.checkArgument; /** @@ -189,7 +189,7 @@ private static ColumnVector createVectorFromConstant( } else { bsv.fill(value instanceof byte[] ? (byte[]) value - : value.toString().getBytes(StandardCharsets.UTF_8)); + : getUTF8Bytes(value.toString())); } return bsv; case BOOLEAN: diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 6211416631bf..1b636c63b2f6 100644 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -81,7 +81,6 @@ import java.io.IOException; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.time.LocalDate; import java.time.LocalDateTime; @@ -92,6 +91,7 @@ import java.util.stream.Collectors; import static org.apache.flink.table.utils.DateTimeUtils.toInternal; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.parquet.Preconditions.checkArgument; /** @@ -189,7 +189,7 @@ private static ColumnVector createVectorFromConstant( } else { bsv.fill(value instanceof byte[] ? (byte[]) value - : value.toString().getBytes(StandardCharsets.UTF_8)); + : getUTF8Bytes(value.toString())); } return bsv; case BOOLEAN: diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 6211416631bf..1b636c63b2f6 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -81,7 +81,6 @@ import java.io.IOException; import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; import java.sql.Date; import java.time.LocalDate; import java.time.LocalDateTime; @@ -92,6 +91,7 @@ import java.util.stream.Collectors; import static org.apache.flink.table.utils.DateTimeUtils.toInternal; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.parquet.Preconditions.checkArgument; /** @@ -189,7 +189,7 @@ private static ColumnVector createVectorFromConstant( } else { bsv.fill(value instanceof byte[] ? (byte[]) value - : value.toString().getBytes(StandardCharsets.UTF_8)); + : getUTF8Bytes(value.toString())); } return bsv; case BOOLEAN: diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java index 5dcd66cd826d..9739135ae409 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java @@ -23,10 +23,12 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + public class InputSplitUtils { public static void writeString(String str, DataOutput out) throws IOException { - byte[] bytes = str.getBytes(StandardCharsets.UTF_8); + byte[] bytes = getUTF8Bytes(str); out.writeInt(bytes.length); out.write(bytes); } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java index 92bf6f3ca718..55d03c156089 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java @@ -49,11 +49,11 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -342,7 +342,7 @@ private void createCommitFile(java.nio.file.Path basePath, String commitNumber, File file = basePath.resolve(".hoodie").resolve(commitNumber + ".commit").toFile(); file.createNewFile(); FileOutputStream fileOutputStream = new FileOutputStream(file); - fileOutputStream.write(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + fileOutputStream.write(getUTF8Bytes(commitMetadata.toJsonString())); fileOutputStream.flush(); fileOutputStream.close(); } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java index 286be418b04d..1540aea1023b 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java @@ -18,20 +18,6 @@ package org.apache.hudi.hadoop; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.IOConstants; -import org.apache.hadoop.io.ArrayWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapreduce.Job; - import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.avro.model.HoodieCompactionPlan; import org.apache.hudi.common.fs.FSUtils; @@ -55,6 +41,19 @@ import org.apache.hudi.hadoop.utils.HoodieHiveUtils; import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapreduce.Job; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.parquet.avro.AvroParquetWriter; import org.junit.jupiter.api.BeforeEach; @@ -65,7 +64,6 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.sql.Timestamp; import java.time.Instant; @@ -77,6 +75,7 @@ import java.util.List; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.hadoop.HoodieColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -495,7 +494,7 @@ private void createCommitFile(java.nio.file.Path basePath, String commitNumber, File file = basePath.resolve(".hoodie").resolve(commitNumber + ".commit").toFile(); file.createNewFile(); FileOutputStream fileOutputStream = new FileOutputStream(file); - fileOutputStream.write(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + fileOutputStream.write(getUTF8Bytes(commitMetadata.toJsonString())); fileOutputStream.flush(); fileOutputStream.close(); } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java index 4b0f379aedb8..b7b21a288110 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java @@ -40,10 +40,10 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.AdditionalMatchers.aryEq; import static org.mockito.ArgumentMatchers.any; @@ -101,12 +101,12 @@ public void testWrite() throws IOException { inorder.verify(out, times(1)).writeByte(eq(fileSplitName.length())); inorder.verify(out, times(1)).write(aryEq(Text.encode(fileSplitName).array()), eq(0), eq(fileSplitName.length())); inorder.verify(out, times(1)).writeInt(eq(basePath.length())); - inorder.verify(out, times(1)).write(aryEq(basePath.getBytes(StandardCharsets.UTF_8))); + inorder.verify(out, times(1)).write(aryEq(getUTF8Bytes(basePath))); inorder.verify(out, times(1)).writeInt(eq(maxCommitTime.length())); - inorder.verify(out, times(1)).write(aryEq(maxCommitTime.getBytes(StandardCharsets.UTF_8))); + inorder.verify(out, times(1)).write(aryEq(getUTF8Bytes(maxCommitTime))); inorder.verify(out, times(1)).writeInt(eq(deltaLogPaths.size())); inorder.verify(out, times(1)).writeInt(eq(deltaLogPaths.get(0).length())); - inorder.verify(out, times(1)).write(aryEq(deltaLogPaths.get(0).getBytes(StandardCharsets.UTF_8))); + inorder.verify(out, times(1)).write(aryEq(getUTF8Bytes(deltaLogPaths.get(0)))); inorder.verify(out, times(1)).writeBoolean(false); // verify there are no more interactions happened on the mocked object inorder.verifyNoMoreInteractions(); @@ -134,11 +134,11 @@ public Integer answer(InvocationOnMock invocationOnMock) throws Throwable { }); Answer readFullyAnswer = new Answer() { private int count = 0; - private byte[][] answers = new byte[][]{ - fileSplitName.getBytes(StandardCharsets.UTF_8), - basePath.getBytes(StandardCharsets.UTF_8), - maxCommitTime.getBytes(StandardCharsets.UTF_8), - deltaLogPaths.get(0).getBytes(StandardCharsets.UTF_8), + private byte[][] answers = new byte[][] { + getUTF8Bytes(fileSplitName), + getUTF8Bytes(basePath), + getUTF8Bytes(maxCommitTime), + getUTF8Bytes(deltaLogPaths.get(0)), }; @Override diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index dc3f04955af2..6753a0aa33c1 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -83,7 +83,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -95,6 +94,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader.REALTIME_SKIP_MERGE_PROP; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -778,7 +778,7 @@ private void createReplaceCommitFile( File file = basePath.resolve(".hoodie").resolve(commitNumber + ".replacecommit").toFile(); file.createNewFile(); FileOutputStream fileOutputStream = new FileOutputStream(file); - fileOutputStream.write(replaceMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + fileOutputStream.write(getUTF8Bytes(replaceMetadata.toJsonString())); fileOutputStream.flush(); fileOutputStream.close(); } @@ -820,7 +820,7 @@ private void createDeltaCommitFile( File file = basePath.resolve(".hoodie").resolve(commitNumber + ".deltacommit").toFile(); file.createNewFile(); FileOutputStream fileOutputStream = new FileOutputStream(file); - fileOutputStream.write(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + fileOutputStream.write(getUTF8Bytes(commitMetadata.toJsonString())); fileOutputStream.flush(); fileOutputStream.close(); } diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java index 0f2b86e67e43..1e27b29ae2d5 100644 --- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java +++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java @@ -47,7 +47,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.FileVisitOption; import java.nio.file.Files; import java.nio.file.Path; @@ -62,6 +61,8 @@ import java.util.Properties; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Helper methods for Kafka. */ @@ -232,7 +233,7 @@ public static String hashDigest(String stringToHash) { LOG.error("Fatal error selecting hash algorithm", e); throw new HoodieException(e); } - byte[] digest = Objects.requireNonNull(md).digest(stringToHash.getBytes(StandardCharsets.UTF_8)); + byte[] digest = Objects.requireNonNull(md).digest(getUTF8Bytes(stringToHash)); return StringUtils.toHexString(digest).toUpperCase(); } diff --git a/hudi-kafka-connect/src/test/java/org/apache/hudi/helper/MockKafkaConnect.java b/hudi-kafka-connect/src/test/java/org/apache/hudi/helper/MockKafkaConnect.java index 113b93ef8712..66ee2b597cf7 100644 --- a/hudi-kafka-connect/src/test/java/org/apache/hudi/helper/MockKafkaConnect.java +++ b/hudi-kafka-connect/src/test/java/org/apache/hudi/helper/MockKafkaConnect.java @@ -29,6 +29,8 @@ import java.util.Map; import java.util.Set; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * Helper class that emulates the Kafka Connect f/w and additionally * implements {@link SinkTaskContext} for testing purposes. @@ -137,9 +139,9 @@ private SinkRecord getNextKafkaRecord() { return new SinkRecord(testPartition.topic(), testPartition.partition(), Schema.OPTIONAL_BYTES_SCHEMA, - ("key-" + currentKafkaOffset).getBytes(), + getUTF8Bytes("key-" + currentKafkaOffset), Schema.OPTIONAL_BYTES_SCHEMA, - "value".getBytes(), currentKafkaOffset++); + getUTF8Bytes("value"), currentKafkaOffset++); } private void resetOffset(long newOffset) { diff --git a/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java b/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java index 7a286e565ea3..5b266e1d4fca 100644 --- a/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java +++ b/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java @@ -51,6 +51,7 @@ import java.util.List; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; public class TestAbstractConnectWriter { @@ -139,7 +140,7 @@ private static void validateRecords(List actualRecords, List Any): Unit = { val separator = "=" * 96 - val testHeader = (separator + '\n' + benchmarkName + '\n' + separator + '\n' + '\n').getBytes + val testHeader = getUTF8Bytes(separator + '\n' + benchmarkName + '\n' + separator + '\n' + '\n') output.foreach(_.write(testHeader)) func output.foreach(_.write('\n')) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala index ea83c828c553..abe3858b03c5 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala @@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.common.table.timeline.HoodieActiveTimeline import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils} +import org.apache.hudi.common.util.StringUtils.getUTF8Bytes import org.apache.hudi.testutils.HoodieClientTestUtils import org.apache.parquet.avro.AvroParquetWriter import org.apache.parquet.hadoop.ParquetWriter @@ -49,7 +50,7 @@ class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase { // create schema file val schemaFileOS = fs.create(new Path(schemaFile)) - try schemaFileOS.write(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA.getBytes) + try schemaFileOS.write(getUTF8Bytes(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)) finally if (schemaFileOS != null) schemaFileOS.close() val insertData: util.List[GenericRecord] = createInsertRecords(sourcePath) @@ -82,7 +83,7 @@ class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase { // create schema file val schemaFileOS = fs.create(new Path(schemaFile)) - try schemaFileOS.write(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA.getBytes) + try schemaFileOS.write(getUTF8Bytes(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)) finally if (schemaFileOS != null) schemaFileOS.close() val insertData: util.List[GenericRecord] = createUpsertRecords(sourcePath) diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala index 4920437a1ec7..562128a6b4d7 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterTableCommand.scala @@ -25,6 +25,7 @@ import org.apache.hudi.common.model.{HoodieCommitMetadata, WriteOperationType} import org.apache.hudi.common.table.timeline.HoodieInstant.State import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstant} import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver} +import org.apache.hudi.common.util.StringUtils.getUTF8Bytes import org.apache.hudi.common.util.{CommitUtils, Option} import org.apache.hudi.internal.schema.InternalSchema import org.apache.hudi.internal.schema.action.TableChange.ColumnChangeID @@ -44,7 +45,6 @@ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{Row, SparkSession} import java.net.URI -import java.nio.charset.StandardCharsets import java.util import java.util.concurrent.atomic.AtomicInteger import scala.collection.JavaConverters._ @@ -270,7 +270,7 @@ object AlterTableCommand extends Logging { val requested = new HoodieInstant(State.REQUESTED, commitActionType, instantTime) val metadata = new HoodieCommitMetadata metadata.setOperationType(WriteOperationType.ALTER_SCHEMA) - timeLine.transitionRequestedToInflight(requested, Option.of(metadata.toJsonString.getBytes(StandardCharsets.UTF_8))) + timeLine.transitionRequestedToInflight(requested, Option.of(getUTF8Bytes(metadata.toJsonString))) val extraMeta = new util.HashMap[String, String]() extraMeta.put(SerDeHelper.LATEST_SCHEMA, SerDeHelper.toJson(internalSchema.setSchemaId(instantTime.toLong))) val schemaManager = new FileBasedInternalSchemaStorageManager(metaClient) diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java index ad9bbc368bfb..239816c3179e 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java @@ -64,7 +64,6 @@ import java.io.IOException; import java.io.OutputStream; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; @@ -75,6 +74,7 @@ import java.util.Properties; import java.util.UUID; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.fail; public class HiveTestCluster implements BeforeAllCallback, AfterAllCallback, BeforeEachCallback, AfterEachCallback { @@ -171,7 +171,7 @@ public void createCOWTable(String commitTime, int numberOfPartitions, String dbN } private void createCommitFile(HoodieCommitMetadata commitMetadata, String commitTime, String basePath) throws IOException { - byte[] bytes = commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8); + byte[] bytes = getUTF8Bytes(commitMetadata.toJsonString()); Path fullPath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime)); FSDataOutputStream fsout = dfsCluster.getFileSystem().create(fullPath, true); diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java index cc7f6e7980b6..78d3185e6ae8 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java @@ -75,7 +75,6 @@ import java.io.File; import java.io.IOException; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.time.Instant; import java.time.ZonedDateTime; @@ -95,6 +94,7 @@ import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME; import static org.apache.hudi.common.table.timeline.TimelineMetadataUtils.serializeRollbackMetadata; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_BATCH_SYNC_PARTITION_NUM; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_PASS; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; @@ -281,11 +281,11 @@ public static void addRollbackInstantToTable(String instantTime, String commitTo createMetaFile( basePath, HoodieTimeline.makeRequestedRollbackFileName(instantTime), - "".getBytes()); + getUTF8Bytes("")); createMetaFile( basePath, HoodieTimeline.makeInflightRollbackFileName(instantTime), - "".getBytes()); + getUTF8Bytes("")); createMetaFile( basePath, HoodieTimeline.makeRollbackFileName(instantTime), @@ -553,14 +553,14 @@ public static void createCommitFile(HoodieCommitMetadata commitMetadata, String createMetaFile( basePath, HoodieTimeline.makeCommitFileName(instantTime), - commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + getUTF8Bytes(commitMetadata.toJsonString())); } public static void createReplaceCommitFile(HoodieReplaceCommitMetadata commitMetadata, String instantTime) throws IOException { createMetaFile( basePath, HoodieTimeline.makeReplaceFileName(instantTime), - commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + getUTF8Bytes(commitMetadata.toJsonString())); } public static void createCommitFileWithSchema(HoodieCommitMetadata commitMetadata, String instantTime, boolean isSimpleSchema) throws IOException { @@ -573,7 +573,7 @@ private static void createCompactionCommitFile(HoodieCommitMetadata commitMetada createMetaFile( basePath, HoodieTimeline.makeCommitFileName(instantTime), - commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + getUTF8Bytes(commitMetadata.toJsonString())); } private static void createDeltaCommitFile(HoodieCommitMetadata deltaCommitMetadata, String deltaCommitTime) @@ -581,7 +581,7 @@ private static void createDeltaCommitFile(HoodieCommitMetadata deltaCommitMetada createMetaFile( basePath, HoodieTimeline.makeDeltaFileName(deltaCommitTime), - deltaCommitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)); + getUTF8Bytes(deltaCommitMetadata.toJsonString())); } private static void createMetaFile(String basePath, String fileName, byte[] bytes) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index 856b5266c97c..bb97e17a6d70 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -100,6 +100,7 @@ import static org.apache.hudi.common.model.HoodieRecord.RECORD_KEY_METADATA_FIELD; import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME; import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.hadoop.CachingPath.getPathWithoutSchemeAndAuthority; import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath; @@ -1350,7 +1351,7 @@ public List getSortedBloomFilterList( .map(entry -> BloomFilterData.builder() .setPartitionPath(entry.getKey().getKey()) .setFilename(entry.getKey().getValue()) - .setBloomFilter(ByteBuffer.wrap(entry.getValue().serializeToString().getBytes())) + .setBloomFilter(ByteBuffer.wrap(getUTF8Bytes(entry.getValue().serializeToString()))) .build()) .sorted() .collect(Collectors.toList()); @@ -1390,7 +1391,7 @@ private Option readBloomFilterFromFile(String partitionPath, St return Option.of(BloomFilterData.builder() .setPartitionPath(partitionPath) .setFilename(filename) - .setBloomFilter(ByteBuffer.wrap(bloomFilter.serializeToString().getBytes())) + .setBloomFilter(ByteBuffer.wrap(getUTF8Bytes(bloomFilter.serializeToString()))) .build()); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index 8e2e01c73aa9..3490c0689656 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -62,6 +62,8 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + public class TimelineServerPerf implements Serializable { private static final long serialVersionUID = 1L; @@ -200,7 +202,7 @@ public void init() throws IOException { private void addHeader() throws IOException { String header = "Partition,Thread,Min,Max,Mean,Median,75th,95th\n"; - outputStream.write(header.getBytes()); + outputStream.write(getUTF8Bytes(header)); outputStream.flush(); } @@ -210,7 +212,7 @@ public void dump(List stats) { x.medianTime, x.p75, x.p95); System.out.println(row); try { - outputStream.write(row.getBytes()); + outputStream.write(getUTF8Bytes(row)); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java index c3541e6aab07..0f65dd338d03 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java @@ -41,7 +41,6 @@ import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; -import java.nio.charset.StandardCharsets; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; @@ -54,6 +53,7 @@ import static org.apache.hudi.common.util.ConfigUtils.checkRequiredConfigProperties; import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Obtains latest schema from the Confluent/Kafka schema-registry. @@ -149,7 +149,7 @@ protected HttpURLConnection getConnection(String url) throws IOException { } protected void setAuthorizationHeader(String creds, HttpURLConnection connection) { - String encodedAuth = Base64.getEncoder().encodeToString(creds.getBytes(StandardCharsets.UTF_8)); + String encodedAuth = Base64.getEncoder().encodeToString(getUTF8Bytes(creds)); connection.setRequestProperty("Authorization", "Basic " + encodedAuth); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java index 0e92bc7b1595..cf8532d65c85 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java @@ -56,6 +56,8 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; + /** * A utility class to help translate from Proto to Avro. */ @@ -129,7 +131,7 @@ private static class AvroSupport { private static final String OVERFLOW_BYTES_FIELD_NAME = "proto_bytes"; private static final Schema RECURSION_OVERFLOW_SCHEMA = Schema.createRecord("recursion_overflow", null, "org.apache.hudi.proto", false, Arrays.asList(new Schema.Field(OVERFLOW_DESCRIPTOR_FIELD_NAME, STRING_SCHEMA, null, ""), - new Schema.Field(OVERFLOW_BYTES_FIELD_NAME, Schema.create(Schema.Type.BYTES), null, "".getBytes()))); + new Schema.Field(OVERFLOW_BYTES_FIELD_NAME, Schema.create(Schema.Type.BYTES), null, getUTF8Bytes("")))); // A cache of the proto class name paired with whether wrapped primitives should be flattened as the key and the generated avro schema as the value private static final Map SCHEMA_CACHE = new ConcurrentHashMap<>(); // A cache with a key as the pair target avro schema and the proto descriptor for the source and the value as an array of proto field descriptors where the order matches the avro ordering. diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java index 87f875642be3..80b6479f3189 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java @@ -57,7 +57,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -70,6 +69,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.Function; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.nonEmpty; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.hive.testutils.HiveTestService.HS2_JDBC_URL; @@ -474,7 +474,7 @@ static void addCommitToTimeline(HoodieTableMetaClient metaClient, WriteOperation metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime)); metaClient.getActiveTimeline().saveAsComplete( new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime), - Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8))); + Option.of(getUTF8Bytes(commitMetadata.toJsonString()))); } void assertRecordCount(long expected, String tablePath, SQLContext sqlContext) { diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java index 9a62c14e5caa..dca7d8a7ce13 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java @@ -55,6 +55,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -272,7 +273,7 @@ public List createUpsertRecords(Path srcFolder) throws ParseExcep private void createSchemaFile(String schemaFile) throws IOException { FSDataOutputStream schemaFileOS = dfs().create(new Path(schemaFile)); - schemaFileOS.write(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA.getBytes()); + schemaFileOS.write(getUTF8Bytes(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)); schemaFileOS.close(); } @@ -291,7 +292,7 @@ public void testSchemaFile() throws Exception { // Should fail - return : -1. assertEquals(-1, dataImporter.dataImport(jsc(), 0)); - dfs().create(schemaFile).write("Random invalid schema data".getBytes()); + dfs().create(schemaFile).write(getUTF8Bytes("Random invalid schema data")); // Should fail - return : -1. assertEquals(-1, dataImporter.dataImport(jsc(), 0)); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java index 59e04d77602b..abbe983cbce6 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java @@ -31,8 +31,8 @@ import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; -import java.nio.charset.StandardCharsets; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.ArgumentMatchers.eq; @@ -73,7 +73,7 @@ private static TypedProperties getProps() { } private static SchemaRegistryProvider getUnderTest(TypedProperties props) throws IOException { - InputStream is = new ByteArrayInputStream(REGISTRY_RESPONSE.getBytes(StandardCharsets.UTF_8)); + InputStream is = new ByteArrayInputStream(getUTF8Bytes(REGISTRY_RESPONSE)); SchemaRegistryProvider spyUnderTest = Mockito.spy(new SchemaRegistryProvider(props, null)); Mockito.doReturn(is).when(spyUnderTest).getStream(Mockito.any()); return spyUnderTest; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java index 83108ee0c7e8..936a6e45a1bc 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java @@ -18,15 +18,16 @@ package org.apache.hudi.utilities.sources; -import com.google.protobuf.ByteString; -import com.google.pubsub.v1.PubsubMessage; -import com.google.pubsub.v1.ReceivedMessage; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; import org.apache.hudi.utilities.sources.helpers.gcs.PubsubMessagesFetcher; import org.apache.hudi.utilities.testutils.UtilitiesTestBase; + +import com.google.protobuf.ByteString; +import com.google.pubsub.v1.PubsubMessage; +import com.google.pubsub.v1.ReceivedMessage; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.junit.jupiter.api.BeforeAll; @@ -34,14 +35,17 @@ import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.mockito.MockitoAnnotations; -import static org.junit.jupiter.api.Assertions.assertEquals; + import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; + +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.utilities.config.GCSEventsSourceConfig.GOOGLE_PROJECT_ID; import static org.apache.hudi.utilities.config.GCSEventsSourceConfig.PUBSUB_SUBSCRIPTION_ID; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -266,8 +270,8 @@ private Map createBasicAttrs(String objectId, String eventType) private PubsubMessage.Builder messageWithAttrs(Map attrs, String dataMessage) { return PubsubMessage.newBuilder() - .putAllAttributes(new HashMap<>(attrs)) - .setData(ByteString.copyFrom(dataMessage.getBytes())); + .putAllAttributes(new HashMap<>(attrs)) + .setData(ByteString.copyFrom(getUTF8Bytes(dataMessage))); } private void assertBucket(Row row, String expectedBucketName) { diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java index 305eaa920bc9..52376f897419 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestProtoKafkaSource.java @@ -63,6 +63,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -158,7 +159,7 @@ private static List createSampleMessages(int count) { .setPrimitiveFixedSignedLong(RANDOM.nextLong()) .setPrimitiveBoolean(RANDOM.nextBoolean()) .setPrimitiveString(UUID.randomUUID().toString()) - .setPrimitiveBytes(ByteString.copyFrom(UUID.randomUUID().toString().getBytes())); + .setPrimitiveBytes(ByteString.copyFrom(getUTF8Bytes(UUID.randomUUID().toString()))); // randomly set nested messages, lists, and maps to test edge cases if (RANDOM.nextBoolean()) { @@ -179,7 +180,7 @@ private static List createSampleMessages(int count) { .setWrappedDouble(DoubleValue.of(RANDOM.nextDouble())) .setWrappedFloat(FloatValue.of(RANDOM.nextFloat())) .setWrappedBoolean(BoolValue.of(RANDOM.nextBoolean())) - .setWrappedBytes(BytesValue.of(ByteString.copyFrom(UUID.randomUUID().toString().getBytes()))) + .setWrappedBytes(BytesValue.of(ByteString.copyFrom(getUTF8Bytes(UUID.randomUUID().toString())))) .setEnum(SampleEnum.SECOND) .setTimestamp(Timestamps.fromMillis(System.currentTimeMillis())); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java index 1690132deaac..6fe7d9aeafb9 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java @@ -37,6 +37,7 @@ import com.google.protobuf.Timestamp; import com.google.protobuf.UInt32Value; import com.google.protobuf.UInt64Value; +import com.google.protobuf.util.Timestamps; import org.apache.avro.Conversions; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; @@ -44,7 +45,6 @@ import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.GenericRecord; -import com.google.protobuf.util.Timestamps; import org.apache.avro.io.BinaryDecoder; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; @@ -67,6 +67,7 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.utilities.sources.helpers.ProtoConversionUtil.toUnsignedBigInteger; public class TestProtoConversionUtil { @@ -205,7 +206,7 @@ private Pair createInputOutputSampleWithRandomValues(Sche long primitiveFixedSignedLong = RANDOM.nextLong(); boolean primitiveBoolean = RANDOM.nextBoolean(); String primitiveString = randomString(10); - byte[] primitiveBytes = randomString(10).getBytes(); + byte[] primitiveBytes = getUTF8Bytes(randomString(10)); double wrappedDouble = RANDOM.nextDouble(); float wrappedFloat = RANDOM.nextFloat(); @@ -215,7 +216,7 @@ private Pair createInputOutputSampleWithRandomValues(Sche long wrappedUnsignedLong = primitiveUnsignedLongInUnsignedRange ? RANDOM.nextLong() : Long.parseUnsignedLong(MAX_UNSIGNED_LONG) - RANDOM.nextInt(1000); boolean wrappedBoolean = RANDOM.nextBoolean(); String wrappedString = randomString(10); - byte[] wrappedBytes = randomString(10).getBytes(); + byte[] wrappedBytes = getUTF8Bytes(randomString(10)); SampleEnum enumValue = SampleEnum.forNumber(RANDOM.nextInt(1)); List primitiveList = Arrays.asList(RANDOM.nextInt(), RANDOM.nextInt(), RANDOM.nextInt()); @@ -358,7 +359,7 @@ private GenericRecord createDefaultOutput(Schema schema) { expectedRecord.put("primitive_fixed_signed_long", 0L); expectedRecord.put("primitive_boolean", false); expectedRecord.put("primitive_string", ""); - expectedRecord.put("primitive_bytes", ByteBuffer.wrap("".getBytes())); + expectedRecord.put("primitive_bytes", ByteBuffer.wrap(getUTF8Bytes(""))); expectedRecord.put("repeated_primitive", Collections.emptyList()); expectedRecord.put("map_primitive", Collections.emptyList()); expectedRecord.put("nested_message", null); From 232255ed47383920a6830c0cf599129cba6c65c0 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 12 Sep 2023 04:57:01 -0700 Subject: [PATCH 003/112] [HUDI-6826] Port BloomFilter related classes from Hadoop library to remove dependency (#9649) --- .../hudi/common/bloom/BloomFilterFactory.java | 2 +- .../hudi/common/bloom/HashFunction.java | 99 ++++++ .../HoodieDynamicBoundedBloomFilter.java | 4 +- .../common/bloom/InternalBloomFilter.java | 245 +++++++++++++++ .../bloom/InternalDynamicBloomFilter.java | 33 +- .../hudi/common/bloom/InternalFilter.java | 30 +- .../org/apache/hudi/common/bloom/Key.java | 174 +++++++++++ .../hudi/common/bloom/SimpleBloomFilter.java | 16 +- .../apache/hudi/common/util/hash/Hash.java | 110 +++++++ .../hudi/common/util/hash/JenkinsHash.java | 285 ++++++++++++++++++ .../hudi/common/util/hash/MurmurHash.java | 90 ++++++ .../bloom/TestInternalDynamicBloomFilter.java | 3 +- 12 files changed, 1047 insertions(+), 44 deletions(-) create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java index a1ace65f2ff1..68f1a6911bbd 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java @@ -18,7 +18,7 @@ package org.apache.hudi.common.bloom; -import org.apache.hadoop.util.hash.Hash; +import org.apache.hudi.common.util.hash.Hash; /** * A Factory class to generate different versions of {@link BloomFilter}. diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java new file mode 100644 index 000000000000..e2637b10d6df --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.bloom; + +import org.apache.hudi.common.util.hash.Hash; + +/** + * Implements a hash object that returns a certain number of hashed values. + * + * @see Key The general behavior of a key being stored in a bloom filter + * @see InternalBloomFilter The general behavior of a bloom filter + */ +public class HashFunction { + /** + * The number of hashed values. + */ + private int nbHash; + + /** + * The maximum highest returned value. + */ + private int maxValue; + + /** + * Hashing algorithm to use. + */ + private Hash hashFunction; + + /** + * Constructor. + *

+ * Builds a hash function that must obey to a given maximum number of returned values and a highest value. + * + * @param maxValue The maximum highest returned value. + * @param nbHash The number of resulting hashed values. + * @param hashType type of the hashing function (see {@link Hash}). + */ + public HashFunction(int maxValue, int nbHash, int hashType) { + if (maxValue <= 0) { + throw new IllegalArgumentException("maxValue must be > 0"); + } + + if (nbHash <= 0) { + throw new IllegalArgumentException("nbHash must be > 0"); + } + + this.maxValue = maxValue; + this.nbHash = nbHash; + this.hashFunction = Hash.getInstance(hashType); + if (this.hashFunction == null) { + throw new IllegalArgumentException("hashType must be known"); + } + } + + /** + * Clears this hash function. A NOOP + */ + public void clear() { + } + + /** + * Hashes a specified key into several integers. + * + * @param k The specified key. + * @return The array of hashed values. + */ + public int[] hash(Key k) { + byte[] b = k.getBytes(); + if (b == null) { + throw new NullPointerException("buffer reference is null"); + } + if (b.length == 0) { + throw new IllegalArgumentException("key length must be > 0"); + } + int[] result = new int[nbHash]; + for (int i = 0, initval = 0; i < nbHash; i++) { + initval = hashFunction.hash(b, initval); + result[i] = Math.abs(initval % maxValue); + } + return result; + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java index 22e2c6889357..3825b6634bea 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java @@ -21,8 +21,6 @@ import org.apache.hudi.common.util.Base64CodecUtil; import org.apache.hudi.exception.HoodieIndexException; -import org.apache.hadoop.util.bloom.Key; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; @@ -46,7 +44,7 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter { * * @param numEntries The total number of entries. * @param errorRate maximum allowable error rate. - * @param hashType type of the hashing function (see {@link org.apache.hadoop.util.hash.Hash}). + * @param hashType type of the hashing function (see {@link org.apache.hudi.common.util.hash.Hash}). * @return the {@link HoodieDynamicBoundedBloomFilter} thus created */ HoodieDynamicBoundedBloomFilter(int numEntries, double errorRate, int hashType, int maxNoOfEntries) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java new file mode 100644 index 000000000000..4e2c56d163f1 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + *

+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.hudi.common.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.BitSet; + +/** + * Implements a Bloom filter, as defined by Bloom in 1970. + *

+ * The code in class is adapted from {@link org.apache.hadoop.util.bloom.BloomFilter} in Apache Hadoop. + *

+ * Hudi serializes bloom filter(s) and write them to Parquet file footers and metadata table's + * bloom filter partition containing bloom filters for all data files. We want to maintain the + * serde of a bloom filter and thus the code in Hudi repo to avoid breaking changes in storage + * format and bytes. + *

+ * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + * the networking research community in the past decade thanks to the bandwidth efficiencies that it + * offers for the transmission of set membership information between networked hosts. A sender encodes + * the information into a bit vector, the Bloom filter, that is more compact than a conventional + * representation. Computation and space costs for construction are linear in the number of elements. + * The receiver uses the filter to test whether various elements are members of the set. Though the + * filter will occasionally return a false positive, it will never return a false negative. When creating + * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + * + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Space/Time Trade-Offs in Hash Coding with Allowable Errors + */ +public class InternalBloomFilter extends InternalFilter { + private static final byte[] BIT_VALUES = new byte[] { + (byte) 0x01, + (byte) 0x02, + (byte) 0x04, + (byte) 0x08, + (byte) 0x10, + (byte) 0x20, + (byte) 0x40, + (byte) 0x80 + }; + + /** + * The bit vector. + */ + BitSet bits; + + /** + * Default constructor - use with readFields + */ + public InternalBloomFilter() { + super(); + } + + /** + * Constructor + * + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hudi.common.util.hash.Hash}). + */ + public InternalBloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + + bits = new BitSet(this.vectorSize); + } + + /** + * Adds a key to this filter. + * + * @param key The key to add. + */ + @Override + public void add(Key key) { + if (key == null) { + throw new NullPointerException("key cannot be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + bits.set(h[i]); + } + } + + @Override + public void and(InternalFilter filter) { + if (filter == null + || !(filter instanceof InternalBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + this.bits.and(((InternalBloomFilter) filter).bits); + } + + @Override + public boolean membershipTest(Key key) { + if (key == null) { + throw new NullPointerException("key cannot be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + for (int i = 0; i < nbHash; i++) { + if (!bits.get(h[i])) { + return false; + } + } + return true; + } + + @Override + public void not() { + bits.flip(0, vectorSize); + } + + @Override + public void or(InternalFilter filter) { + if (filter == null + || !(filter instanceof InternalBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + bits.or(((InternalBloomFilter) filter).bits); + } + + @Override + public void xor(InternalFilter filter) { + if (filter == null + || !(filter instanceof InternalBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + bits.xor(((InternalBloomFilter) filter).bits); + } + + @Override + public String toString() { + return bits.toString(); + } + + /** + * @return size of the the bloomfilter + */ + public int getVectorSize() { + return this.vectorSize; + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + byte[] bytes = new byte[getNBytes()]; + for (int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { + if (bitIndex == 8) { + bitIndex = 0; + byteIndex++; + } + if (bitIndex == 0) { + bytes[byteIndex] = 0; + } + if (bits.get(i)) { + bytes[byteIndex] |= BIT_VALUES[bitIndex]; + } + } + out.write(bytes); + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + bits = new BitSet(this.vectorSize); + byte[] bytes = new byte[getNBytes()]; + in.readFully(bytes); + for (int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { + if (bitIndex == 8) { + bitIndex = 0; + byteIndex++; + } + if ((bytes[byteIndex] & BIT_VALUES[bitIndex]) != 0) { + bits.set(i); + } + } + } + + /* @return number of bytes needed to hold bit vector */ + private int getNBytes() { + return (int) (((long) vectorSize + 7) / 8); + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalDynamicBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalDynamicBloomFilter.java index c464967a2a2d..3e068294a0bd 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalDynamicBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalDynamicBloomFilter.java @@ -18,9 +18,6 @@ package org.apache.hudi.common.bloom; -import org.apache.hadoop.util.bloom.BloomFilter; -import org.apache.hadoop.util.bloom.Key; - import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -48,7 +45,7 @@ class InternalDynamicBloomFilter extends InternalFilter { /** * The matrix of Bloom filter. */ - private org.apache.hadoop.util.bloom.BloomFilter[] matrix; + private InternalBloomFilter[] matrix; /** * Zero-args constructor for the serialization. @@ -63,7 +60,7 @@ public InternalDynamicBloomFilter() { * * @param vectorSize The number of bits in the vector. * @param nbHash The number of hash function to consider. - * @param hashType type of the hashing function (see {@link org.apache.hadoop.util.hash.Hash}). + * @param hashType type of the hashing function (see {@link org.apache.hudi.common.util.hash.Hash}). * @param nr The threshold for the maximum number of keys to record in a dynamic Bloom filter row. */ public InternalDynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr, int maxNr) { @@ -73,8 +70,8 @@ public InternalDynamicBloomFilter(int vectorSize, int nbHash, int hashType, int this.currentNbRecord = 0; this.maxNr = maxNr; - matrix = new org.apache.hadoop.util.bloom.BloomFilter[1]; - matrix[0] = new org.apache.hadoop.util.bloom.BloomFilter(this.vectorSize, this.nbHash, this.hashType); + matrix = new InternalBloomFilter[1]; + matrix[0] = new InternalBloomFilter(this.vectorSize, this.nbHash, this.hashType); } @Override @@ -83,7 +80,7 @@ public void add(Key key) { throw new NullPointerException("Key can not be null"); } - org.apache.hadoop.util.bloom.BloomFilter bf = getActiveStandardBF(); + InternalBloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); @@ -121,7 +118,7 @@ public boolean membershipTest(Key key) { return true; } - for (BloomFilter bloomFilter : matrix) { + for (InternalBloomFilter bloomFilter : matrix) { if (bloomFilter.membershipTest(key)) { return true; } @@ -132,7 +129,7 @@ public boolean membershipTest(Key key) { @Override public void not() { - for (BloomFilter bloomFilter : matrix) { + for (InternalBloomFilter bloomFilter : matrix) { bloomFilter.not(); } } @@ -177,7 +174,7 @@ public void xor(InternalFilter filter) { public String toString() { StringBuilder res = new StringBuilder(); - for (BloomFilter bloomFilter : matrix) { + for (InternalBloomFilter bloomFilter : matrix) { res.append(bloomFilter); res.append(Character.LINE_SEPARATOR); } @@ -192,7 +189,7 @@ public void write(DataOutput out) throws IOException { out.writeInt(nr); out.writeInt(currentNbRecord); out.writeInt(matrix.length); - for (BloomFilter bloomFilter : matrix) { + for (InternalBloomFilter bloomFilter : matrix) { bloomFilter.write(out); } } @@ -203,9 +200,9 @@ public void readFields(DataInput in) throws IOException { nr = in.readInt(); currentNbRecord = in.readInt(); int len = in.readInt(); - matrix = new org.apache.hadoop.util.bloom.BloomFilter[len]; + matrix = new InternalBloomFilter[len]; for (int i = 0; i < matrix.length; i++) { - matrix[i] = new org.apache.hadoop.util.bloom.BloomFilter(); + matrix[i] = new InternalBloomFilter(); matrix[i].readFields(in); } } @@ -214,19 +211,19 @@ public void readFields(DataInput in) throws IOException { * Adds a new row to this dynamic Bloom filter. */ private void addRow() { - BloomFilter[] tmp = new BloomFilter[matrix.length + 1]; + InternalBloomFilter[] tmp = new InternalBloomFilter[matrix.length + 1]; System.arraycopy(matrix, 0, tmp, 0, matrix.length); - tmp[tmp.length - 1] = new BloomFilter(vectorSize, nbHash, hashType); + tmp[tmp.length - 1] = new InternalBloomFilter(vectorSize, nbHash, hashType); matrix = tmp; } /** * Returns the active standard Bloom filter in this dynamic Bloom filter. * - * @return BloomFilter The active standard Bloom filter. + * @return SingleBloomFilter The active standard Bloom filter. * Null otherwise. */ - private BloomFilter getActiveStandardBF() { + private InternalBloomFilter getActiveStandardBF() { if (reachedMax) { return matrix[curMatrixIndex++ % matrix.length]; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java index 0737622f5a92..87854edd313c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java @@ -18,10 +18,7 @@ package org.apache.hudi.common.bloom; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.bloom.HashFunction; -import org.apache.hadoop.util.bloom.Key; -import org.apache.hadoop.util.hash.Hash; +import org.apache.hudi.common.util.hash.Hash; import java.io.DataInput; import java.io.DataOutput; @@ -30,15 +27,28 @@ import java.util.List; /** - * Copied from {@link org.apache.hadoop.util.bloom.Filter}. {@link InternalDynamicBloomFilter} needs access to some of - * protected members of {@link org.apache.hadoop.util.bloom.Filter} and hence had to copy it locally. + * Ported from {@link org.apache.hadoop.util.bloom.Filter}. */ -abstract class InternalFilter implements Writable { - +abstract class InternalFilter { private static final int VERSION = -1; // negative to accommodate for old format + /** + * The vector size of this filter. + */ protected int vectorSize; + + /** + * The hash function used to map a key to several positions in the vector. + */ protected HashFunction hash; + + /** + * The number of hash function to consider. + */ protected int nbHash; + + /** + * Type of hashing function to use. + */ protected int hashType; protected InternalFilter() { @@ -150,9 +160,6 @@ public void add(Key[] keys) { } } //end add() - // Writable interface - - @Override public void write(DataOutput out) throws IOException { out.writeInt(VERSION); out.writeInt(this.nbHash); @@ -160,7 +167,6 @@ public void write(DataOutput out) throws IOException { out.writeInt(this.vectorSize); } - @Override public void readFields(DataInput in) throws IOException { int ver = in.readInt(); if (ver > 0) { // old non-versioned format diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java new file mode 100644 index 000000000000..b762f14d0637 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * The general behavior of a key that must be stored in a bloom filter. + * + * @see InternalBloomFilter The general behavior of a bloom filter and how the key is used. + */ +public final class Key implements Comparable { + /** + * Byte value of key + */ + byte[] bytes; + + /** + * The weight associated to this key. + *

+ * Invariant: if it is not specified, each instance of + * Key will have a default weight of 1.0 + */ + double weight; + + /** + * default constructor - use with readFields + */ + public Key() { + } + + /** + * Constructor. + *

+ * Builds a key with a default weight. + * + * @param value The byte value of this key. + */ + public Key(byte[] value) { + this(value, 1.0); + } + + /** + * Constructor. + *

+ * Builds a key with a specified weight. + * + * @param value The value of this key. + * @param weight The weight associated to this key. + */ + public Key(byte[] value, double weight) { + set(value, weight); + } + + /** + * @param value + * @param weight + */ + public void set(byte[] value, double weight) { + if (value == null) { + throw new IllegalArgumentException("value can not be null"); + } + this.bytes = value; + this.weight = weight; + } + + /** + * @return byte[] The value of this key. + */ + public byte[] getBytes() { + return this.bytes; + } + + /** + * @return Returns the weight associated to this key. + */ + public double getWeight() { + return weight; + } + + /** + * Increments the weight of this key with a specified value. + * + * @param weight The increment. + */ + public void incrementWeight(double weight) { + this.weight += weight; + } + + /** + * Increments the weight of this key by one. + */ + public void incrementWeight() { + this.weight++; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Key)) { + return false; + } + return this.compareTo((Key) o) == 0; + } + + @Override + public int hashCode() { + int result = 0; + for (int i = 0; i < bytes.length; i++) { + result ^= Byte.valueOf(bytes[i]).hashCode(); + } + result ^= Double.valueOf(weight).hashCode(); + return result; + } + + /** + * Serialize the fields of this object to out. + * + * @param out DataOuput to serialize this object into. + * @throws IOException + */ + public void write(DataOutput out) throws IOException { + out.writeInt(bytes.length); + out.write(bytes); + out.writeDouble(weight); + } + + /** + * Deserialize the fields of this object from in. + * + *

For efficiency, implementations should attempt to re-use storage in the + * existing object where possible.

+ * + * @param in DataInput to deseriablize this object from. + * @throws IOException + */ + public void readFields(DataInput in) throws IOException { + this.bytes = new byte[in.readInt()]; + in.readFully(this.bytes); + weight = in.readDouble(); + } + + // Comparable + @Override + public int compareTo(Key other) { + int result = this.bytes.length - other.getBytes().length; + for (int i = 0; result == 0 && i < bytes.length; i++) { + result = this.bytes[i] - other.bytes[i]; + } + + if (result == 0) { + result = (int) (this.weight - other.weight); + } + return result; + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java index adf0f058a26c..0183aedaf065 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java @@ -21,8 +21,6 @@ import org.apache.hudi.common.util.Base64CodecUtil; import org.apache.hudi.exception.HoodieIndexException; -import org.apache.hadoop.util.bloom.Key; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; @@ -36,19 +34,19 @@ import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** - * A Simple Bloom filter implementation built on top of {@link org.apache.hadoop.util.bloom.BloomFilter}. + * A Simple Bloom filter implementation built on top of {@link InternalBloomFilter}. */ public class SimpleBloomFilter implements BloomFilter { - private org.apache.hadoop.util.bloom.BloomFilter filter; + private InternalBloomFilter filter; /** * Create a new Bloom filter with the given configurations. * * @param numEntries The total number of entries. * @param errorRate maximum allowable error rate. - * @param hashType type of the hashing function (see {@link org.apache.hadoop.util.hash.Hash}). + * @param hashType type of the hashing function (see {@link org.apache.hudi.common.util.hash.Hash}). */ public SimpleBloomFilter(int numEntries, double errorRate, int hashType) { // Bit size @@ -56,7 +54,7 @@ public SimpleBloomFilter(int numEntries, double errorRate, int hashType) { // Number of the hash functions int numHashs = BloomFilterUtils.getNumHashes(bitSize, numEntries); // The filter - this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType); + this.filter = new InternalBloomFilter(bitSize, numHashs, hashType); } /** @@ -65,7 +63,7 @@ public SimpleBloomFilter(int numEntries, double errorRate, int hashType) { * @param serString serialized string which represents the {@link SimpleBloomFilter} */ public SimpleBloomFilter(String serString) { - this.filter = new org.apache.hadoop.util.bloom.BloomFilter(); + this.filter = new InternalBloomFilter(); byte[] bytes = Base64CodecUtil.decode(serString); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes)); try { @@ -120,7 +118,7 @@ private void writeObject(ObjectOutputStream os) } private void readObject(ObjectInputStream is) throws IOException { - filter = new org.apache.hadoop.util.bloom.BloomFilter(); + filter = new InternalBloomFilter(); filter.readFields(is); } @@ -131,7 +129,7 @@ public void write(DataOutput out) throws IOException { //@Override public void readFields(DataInput in) throws IOException { - filter = new org.apache.hadoop.util.bloom.BloomFilter(); + filter = new InternalBloomFilter(); filter.readFields(in); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java new file mode 100644 index 000000000000..22218191674d --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.util.hash; + +import org.apache.hudi.common.bloom.InternalBloomFilter; + +/** + * This class represents a common API for hashing functions used by + * {@link InternalBloomFilter}. + */ +public abstract class Hash { + /** + * Constant to denote invalid hash type. + */ + public static final int INVALID_HASH = -1; + /** + * Constant to denote {@link JenkinsHash}. + */ + public static final int JENKINS_HASH = 0; + /** + * Constant to denote {@link MurmurHash}. + */ + public static final int MURMUR_HASH = 1; + + /** + * This utility method converts String representation of hash function name + * to a symbolic constant. Currently two function types are supported, + * "jenkins" and "murmur". + * + * @param name hash function name + * @return one of the predefined constants + */ + public static int parseHashType(String name) { + if ("jenkins".equalsIgnoreCase(name)) { + return JENKINS_HASH; + } else if ("murmur".equalsIgnoreCase(name)) { + return MURMUR_HASH; + } else { + return INVALID_HASH; + } + } + + /** + * Get a singleton instance of hash function of a given type. + * + * @param type predefined hash type + * @return hash function instance, or null if type is invalid + */ + public static Hash getInstance(int type) { + switch (type) { + case JENKINS_HASH: + return JenkinsHash.getInstance(); + case MURMUR_HASH: + return MurmurHash.getInstance(); + default: + return null; + } + } + + /** + * Calculate a hash using all bytes from the input argument, and + * a seed of -1. + * + * @param bytes input bytes + * @return hash value + */ + public int hash(byte[] bytes) { + return hash(bytes, bytes.length, -1); + } + + /** + * Calculate a hash using all bytes from the input argument, + * and a provided seed value. + * + * @param bytes input bytes + * @param initval seed value + * @return hash value + */ + public int hash(byte[] bytes, int initval) { + return hash(bytes, bytes.length, initval); + } + + /** + * Calculate a hash using bytes from 0 to length, and + * the provided seed value + * + * @param bytes input bytes + * @param length length of the valid bytes to consider + * @param initval seed value + * @return hash value + */ + public abstract int hash(byte[] bytes, int length, int initval); +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java new file mode 100644 index 000000000000..6b7a0e01d08d --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.util.hash; + +import java.io.FileInputStream; +import java.io.IOException; + +/** + * Produces 32-bit hash for hash table lookup. + * + *
lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * You can use this free for any purpose.  It's in the public domain.
+ * It has no warranty.
+ * 
+ * + * @see lookup3.c + * @see Hash Functions (and how this + * function compares to others such as CRC, MD?, etc + * @see Has update on the + * Dr. Dobbs Article + */ +public class JenkinsHash extends Hash { + private static long INT_MASK = 0x00000000ffffffffL; + private static long BYTE_MASK = 0x00000000000000ffL; + + private static JenkinsHash _instance = new JenkinsHash(); + + public static Hash getInstance() { + return _instance; + } + + private static long rot(long val, int pos) { + return ((Integer.rotateLeft( + (int) (val & INT_MASK), pos)) & INT_MASK); + } + + /** + * taken from hashlittle() -- hash a variable-length key into a 32-bit value + * + * @param key the key (the unaligned variable-length array of bytes) + * @param nbytes number of bytes to include in hash + * @param initval can be any integer value + * @return a 32-bit value. Every bit of the key affects every bit of the + * return value. Two keys differing by one or two bits will have totally + * different hash values. + * + *

The best hash table sizes are powers of 2. There is no need to do mod + * a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + * For example, if you need only 10 bits, do + * h = (h & hashmask(10)); + * In which case, the hash table should have hashsize(10) elements. + * + *

If you are hashing n strings byte[][] k, do it like this: + * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + * + *

By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + * code any way you wish, private, educational, or commercial. It's free. + * + *

Use for hash table lookup, or anything where one collision in 2^^32 is + * acceptable. Do NOT use for cryptographic purposes. + */ + @Override + @SuppressWarnings("fallthrough") + public int hash(byte[] key, int nbytes, int initval) { + int length = nbytes; + long a; + long b; + long c; // We use longs because we don't have unsigned ints + a = b = c = (0x00000000deadbeefL + length + initval) & INT_MASK; + int offset = 0; + for (; length > 12; offset += 12, length -= 12) { + a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; + a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; + b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; + c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + + /* + * mix -- mix 3 32-bit values reversibly. + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * + * This was tested for: + * - pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * - the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + * satisfy this are + * 4 6 8 16 19 4 + * 9 15 3 18 27 15 + * 14 9 3 7 17 3 + * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for + * "differ" defined as + with a one-bit base and a two-bit delta. I + * used http://burtleburtle.net/bob/hash/avalanche.html to choose + * the operations, constants, and arrangements of the variables. + * + * This does not achieve avalanche. There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a. + * The most thoroughly mixed value is c, but it doesn't really even + * achieve avalanche in c. + * + * This allows some parallelism. Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the + * opposite direction as the goal of parallelism. I did what I could. + * Rotates seem to cost as much as shifts on every machine I could lay + * my hands on, and rotates are much kinder to the top and bottom bits, + * so I used rotates. + * + * #define mix(a,b,c) \ + * { \ + * a -= c; a ^= rot(c, 4); c += b; \ + * b -= a; b ^= rot(a, 6); a += c; \ + * c -= b; c ^= rot(b, 8); b += a; \ + * a -= c; a ^= rot(c,16); c += b; \ + * b -= a; b ^= rot(a,19); a += c; \ + * c -= b; c ^= rot(b, 4); b += a; \ + * } + * + * mix(a,b,c); + */ + a = (a - c) & INT_MASK; + a ^= rot(c, 4); + c = (c + b) & INT_MASK; + b = (b - a) & INT_MASK; + b ^= rot(a, 6); + a = (a + c) & INT_MASK; + c = (c - b) & INT_MASK; + c ^= rot(b, 8); + b = (b + a) & INT_MASK; + a = (a - c) & INT_MASK; + a ^= rot(c, 16); + c = (c + b) & INT_MASK; + b = (b - a) & INT_MASK; + b ^= rot(a, 19); + a = (a + c) & INT_MASK; + c = (c - b) & INT_MASK; + c ^= rot(b, 4); + b = (b + a) & INT_MASK; + } + + //-------------------------------- last block: affect all 32 bits of (c) + // all the case statements fall through + switch (length) { + case 12: + c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 11: + c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 10: + c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 9: + c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; + case 8: + b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 7: + b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 6: + b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 5: + b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; + case 4: + a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 3: + a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 2: + a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 1: + a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; + break; + case 0: + return (int) (c & INT_MASK); + default: + } + /* + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different. This was tested for + * - pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * + * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * + * - the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * These constants passed: + * 14 11 25 16 4 14 24 + * 12 14 25 16 4 14 24 + * and these came close: + * 4 8 15 26 3 22 24 + * 10 8 15 26 3 22 24 + * 11 8 15 26 3 22 24 + * + * #define final(a,b,c) \ + * { + * c ^= b; c -= rot(b,14); \ + * a ^= c; a -= rot(c,11); \ + * b ^= a; b -= rot(a,25); \ + * c ^= b; c -= rot(b,16); \ + * a ^= c; a -= rot(c,4); \ + * b ^= a; b -= rot(a,14); \ + * c ^= b; c -= rot(b,24); \ + * } + * + */ + c ^= b; + c = (c - rot(b, 14)) & INT_MASK; + a ^= c; + a = (a - rot(c, 11)) & INT_MASK; + b ^= a; + b = (b - rot(a, 25)) & INT_MASK; + c ^= b; + c = (c - rot(b, 16)) & INT_MASK; + a ^= c; + a = (a - rot(c, 4)) & INT_MASK; + b ^= a; + b = (b - rot(a, 14)) & INT_MASK; + c ^= b; + c = (c - rot(b, 24)) & INT_MASK; + + return (int) (c & INT_MASK); + } + + /** + * Compute the hash of the specified file + * + * @param args name of file to compute hash of. + * @throws IOException + */ + public static void main(String[] args) throws IOException { + if (args.length != 1) { + System.err.println("Usage: JenkinsHash filename"); + System.exit(-1); + } + try (FileInputStream in = new FileInputStream(args[0])) { + byte[] bytes = new byte[512]; + int value = 0; + JenkinsHash hash = new JenkinsHash(); + for (int length = in.read(bytes); length > 0; length = in.read(bytes)) { + value = hash.hash(bytes, length, value); + } + System.out.println(Math.abs(value)); + } + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java new file mode 100644 index 000000000000..dd66da6dcdde --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.util.hash; + +/** + * This is a very fast, non-cryptographic hash suitable for general hash-based + * lookup. See http://murmurhash.googlepages.com/ for more details. + * + *

The C version of MurmurHash 2.0 found at that site was ported + * to Java by Andrzej Bialecki (ab at getopt org).

+ */ +public class MurmurHash extends Hash { + private static MurmurHash _instance = new MurmurHash(); + + public static Hash getInstance() { + return _instance; + } + + @Override + public int hash(byte[] data, int length, int seed) { + return hash(data, 0, length, seed); + } + + public int hash(byte[] data, int offset, int length, int seed) { + int m = 0x5bd1e995; + int r = 24; + + int h = seed ^ length; + + int len4 = length >> 2; + + for (int i = 0; i < len4; i++) { + int i4 = offset + (i << 2); + int k = data[i4 + 3]; + k = k << 8; + k = k | (data[i4 + 2] & 0xff); + k = k << 8; + k = k | (data[i4 + 1] & 0xff); + k = k << 8; + k = k | (data[i4 + 0] & 0xff); + k *= m; + k ^= k >>> r; + k *= m; + h *= m; + h ^= k; + } + + // avoid calculating modulo + int lenM = len4 << 2; + int left = length - lenM; + + if (left != 0) { + length += offset; + if (left >= 3) { + h ^= (int) data[length - 3] << 16; + } + if (left >= 2) { + h ^= (int) data[length - 2] << 8; + } + if (left >= 1) { + h ^= (int) data[length - 1]; + } + + h *= m; + } + + h ^= h >>> 13; + h *= m; + h ^= h >>> 15; + + return h; + } +} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestInternalDynamicBloomFilter.java b/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestInternalDynamicBloomFilter.java index 5940da15dd45..6a75a5643af5 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestInternalDynamicBloomFilter.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestInternalDynamicBloomFilter.java @@ -18,7 +18,8 @@ package org.apache.hudi.common.bloom; -import org.apache.hadoop.util.hash.Hash; +import org.apache.hudi.common.util.hash.Hash; + import org.junit.jupiter.api.Test; import java.util.UUID; From d0e98e163bd3db21d1afbcc325b10e6b9bff6088 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Wed, 13 Sep 2023 10:22:57 -0700 Subject: [PATCH 004/112] [HUDI-6850] Add tests and docs for ported Bloom Filter classes (#9700) --- LICENSE | 15 +++- .../hudi/common/bloom/HashFunction.java | 35 +++++++++- .../common/bloom/InternalBloomFilter.java | 3 + .../hudi/common/bloom/InternalFilter.java | 30 +++++++- .../org/apache/hudi/common/bloom/Key.java | 4 +- .../apache/hudi/common/util/hash/Hash.java | 2 + .../hudi/common/util/hash/JenkinsHash.java | 4 +- .../hudi/common/util/hash/MurmurHash.java | 4 +- .../hudi/common/bloom/TestBloomFilter.java | 70 +++++++++++++++++++ .../common/table/log/TestLogReaderUtils.java | 11 +-- .../common/testutils/FileSystemTestUtils.java | 10 +++ .../common/testutils/HoodieTestUtils.java | 4 +- .../bloom-filter/hadoop/all_10000.keys.data | 19 +++++ .../dynamic_1000_000001_jenkins_5000.bf.data | 19 +++++ .../dynamic_1000_000001_murmur_5000.bf.data | 19 +++++ .../dynamic_200_000001_murmur_1000.bf.data | 19 +++++ .../hadoop/simple_10000_000001_murmur.bf.data | 19 +++++ .../hadoop/simple_1000_000001_murmur.bf.data | 19 +++++ .../hadoop/simple_200_000001_murmur.bf.data | 19 +++++ .../hadoop/simple_5000_000001_jenkins.bf.data | 19 +++++ .../hadoop/simple_5000_000001_murmur.bf.data | 19 +++++ 21 files changed, 345 insertions(+), 18 deletions(-) create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/all_10000.keys.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_jenkins_5000.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_murmur_5000.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_200_000001_murmur_1000.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_10000_000001_murmur.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_1000_000001_murmur.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_200_000001_murmur.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_jenkins.bf.data create mode 100644 hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_murmur.bf.data diff --git a/LICENSE b/LICENSE index 28222a717e69..301ea869628b 100644 --- a/LICENSE +++ b/LICENSE @@ -291,7 +291,20 @@ This product includes code from Apache Hadoop * org.apache.hudi.common.bloom.InternalDynamicBloomFilter.java adapted from org.apache.hadoop.util.bloom.DynamicBloomFilter.java -* org.apache.hudi.common.bloom.InternalFilter copied from classes in org.apache.hadoop.util.bloom package +* org.apache.hudi.common.bloom.InternalFilter.java adapted from org.apache.hadoop.util.bloom.Filter.java + and org.apache.hadoop.io.Writable.java + +* org.apache.hudi.common.bloom.InternalBloomFilter adapted from org.apache.hadoop.util.bloom.BloomFilter.java + +* org.apache.hudi.common.bloom.Key.java adapted from org.apache.hadoop.util.bloom.Key.java + +* org.apache.hudi.common.bloom.HashFunction.java ported from org.apache.hadoop.util.bloom.HashFunction.java + +* org.apache.hudi.common.util.hash.Hash.java ported from org.apache.hadoop.util.hash.Hash.java + +* org.apache.hudi.common.util.hash.JenkinsHash.java ported from org.apache.hadoop.util.hash.JenkinsHash.java + +* org.apache.hudi.common.util.hash.MurmurHash.java ported from org.apache.hadoop.util.hash.MurmurHash.java with the following license diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java index e2637b10d6df..c6e6deb87273 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HashFunction.java @@ -16,6 +16,37 @@ * specific language governing permissions and limitations * under the License. */ +/** + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + *

+ * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + *

+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ package org.apache.hudi.common.bloom; @@ -23,11 +54,13 @@ /** * Implements a hash object that returns a certain number of hashed values. + *

+ * The code in class is ported from {@link org.apache.hadoop.util.bloom.HashFunction} in Apache Hadoop. * * @see Key The general behavior of a key being stored in a bloom filter * @see InternalBloomFilter The general behavior of a bloom filter */ -public class HashFunction { +public final class HashFunction { /** * The number of hashed values. */ diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java index 4e2c56d163f1..ac93de2d58fb 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalBloomFilter.java @@ -57,6 +57,9 @@ * Implements a Bloom filter, as defined by Bloom in 1970. *

* The code in class is adapted from {@link org.apache.hadoop.util.bloom.BloomFilter} in Apache Hadoop. + * The serialization and deserialization are completely the same as and compatible with Hadoop's + * {@link org.apache.hadoop.util.bloom.BloomFilter}, so that this class correctly reads bloom + * filters serialized by older Hudi versions using Hadoop's BloomFilter. *

* Hudi serializes bloom filter(s) and write them to Parquet file footers and metadata table's * bloom filter partition containing bloom filters for all data files. We want to maintain the diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java index 87854edd313c..6b2e46ee0777 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java @@ -27,7 +27,20 @@ import java.util.List; /** - * Ported from {@link org.apache.hadoop.util.bloom.Filter}. + * Defines the general behavior of a filter. + *

+ * The code in class is adapted from {@link org.apache.hadoop.util.bloom.Filter} in Apache Hadoop. + *

+ * A filter is a data structure which aims at offering a lossy summary of a set A. The + * key idea is to map entries of A (also called keys) into several positions + * in a vector through the use of several hash functions. + *

+ * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). + *

+ * It must be extended in order to define the real behavior. + * + * @see Key The general behavior of a key + * @see HashFunction A hash function */ abstract class InternalFilter { private static final int VERSION = -1; // negative to accommodate for old format @@ -160,6 +173,12 @@ public void add(Key[] keys) { } } //end add() + /** + * Serialize the fields of this object to out. + * + * @param out DataOuput to serialize this object into. + * @throws IOException + */ public void write(DataOutput out) throws IOException { out.writeInt(VERSION); out.writeInt(this.nbHash); @@ -167,6 +186,15 @@ public void write(DataOutput out) throws IOException { out.writeInt(this.vectorSize); } + /** + * Deserialize the fields of this object from in. + * + *

For efficiency, implementations should attempt to re-use storage in the + * existing object where possible.

+ * + * @param in DataInput to deseriablize this object from. + * @throws IOException + */ public void readFields(DataInput in) throws IOException { int ver = in.readInt(); if (ver > 0) { // old non-versioned format diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java index b762f14d0637..37ae6e68f73a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/Key.java @@ -25,10 +25,12 @@ /** * The general behavior of a key that must be stored in a bloom filter. + *

+ * The code in class is adapted from {@link org.apache.hadoop.util.bloom.Key} in Apache Hadoop. * * @see InternalBloomFilter The general behavior of a bloom filter and how the key is used. */ -public final class Key implements Comparable { +public class Key implements Comparable { /** * Byte value of key */ diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java index 22218191674d..a5e5d4a2f9a7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/Hash.java @@ -24,6 +24,8 @@ /** * This class represents a common API for hashing functions used by * {@link InternalBloomFilter}. + *

+ * The code in class is ported from {@link org.apache.hadoop.util.hash.Hash} in Apache Hadoop. */ public abstract class Hash { /** diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java index 6b7a0e01d08d..a254a78970f3 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/JenkinsHash.java @@ -24,7 +24,9 @@ /** * Produces 32-bit hash for hash table lookup. - * + *

+ * The code in class is ported from {@link org.apache.hadoop.util.hash.JenkinsHash} in Apache Hadoop. + *

*

lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  *
  * You can use this free for any purpose.  It's in the public domain.
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java
index dd66da6dcdde..dcd074b881d1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/MurmurHash.java
@@ -22,7 +22,9 @@
 /**
  * This is a very fast, non-cryptographic hash suitable for general hash-based
  * lookup.  See http://murmurhash.googlepages.com/ for more details.
- *
+ * 

+ * The code in class is ported from {@link org.apache.hadoop.util.hash.MurmurHash} in Apache Hadoop. + *

*

The C version of MurmurHash 2.0 found at that site was ported * to Java by Andrzej Bialecki (ab at getopt org).

*/ diff --git a/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java b/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java index 552098e71bb1..2e72b3737a0d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/bloom/TestBloomFilter.java @@ -18,15 +18,21 @@ package org.apache.hudi.common.bloom; +import org.apache.hudi.common.util.hash.Hash; + import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.UUID; +import java.util.stream.Collectors; +import static org.apache.hudi.common.testutils.FileSystemTestUtils.readLastLineFromResourceFile; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -92,6 +98,51 @@ public void testSerialize(String typeCode) { } } + public static List bloomFilterParams() { + return Arrays.asList( + Arguments.of("hadoop", BloomFilterTypeCode.SIMPLE.name(), 200, 0.000001, Hash.MURMUR_HASH, -1), + Arguments.of("hadoop", BloomFilterTypeCode.SIMPLE.name(), 1000, 0.000001, Hash.MURMUR_HASH, -1), + Arguments.of("hadoop", BloomFilterTypeCode.SIMPLE.name(), 5000, 0.000001, Hash.MURMUR_HASH, -1), + Arguments.of("hadoop", BloomFilterTypeCode.SIMPLE.name(), 10000, 0.000001, Hash.MURMUR_HASH, -1), + Arguments.of("hadoop", BloomFilterTypeCode.SIMPLE.name(), 5000, 0.000001, Hash.JENKINS_HASH, -1), + Arguments.of("hadoop", BloomFilterTypeCode.DYNAMIC_V0.name(), 200, 0.000001, Hash.MURMUR_HASH, 1000), + Arguments.of("hadoop", BloomFilterTypeCode.DYNAMIC_V0.name(), 1000, 0.000001, Hash.MURMUR_HASH, 5000), + Arguments.of("hadoop", BloomFilterTypeCode.DYNAMIC_V0.name(), 1000, 0.000001, Hash.JENKINS_HASH, 5000), + Arguments.of("hudi", BloomFilterTypeCode.SIMPLE.name(), 1000, 0.000001, Hash.MURMUR_HASH, -1), + Arguments.of("hudi", BloomFilterTypeCode.SIMPLE.name(), 5000, 0.000001, Hash.MURMUR_HASH, -1), + Arguments.of("hudi", BloomFilterTypeCode.DYNAMIC_V0.name(), 1000, 0.000001, Hash.MURMUR_HASH, 5000) + ); + } + + @ParameterizedTest + @MethodSource("bloomFilterParams") + public void testDeserialize(String lib, String typeCode, int numEntries, + double errorRate, int hashType, int maxEntries) throws IOException { + // When the "lib" = "hadoop", this tests the backwards compatibility so that Hudi's + // {@link InternalBloomFilter} correctly reads the bloom filters serialized by Hadoop + List keyList = Arrays.stream( + readLastLineFromResourceFile("/format/bloom-filter/hadoop/all_10000.keys.data").split(",")) + .collect(Collectors.toList()); + String serializedFilter; + if ("hadoop".equals(lib)) { + String fileName = (BloomFilterTypeCode.DYNAMIC_V0.name().equals(typeCode) ? "dynamic" : "simple") + + "_" + numEntries + + "_000001_" + + (hashType == Hash.MURMUR_HASH ? "murmur" : "jenkins") + + (BloomFilterTypeCode.DYNAMIC_V0.name().equals(typeCode) ? "_" + maxEntries : "") + + ".bf.data"; + serializedFilter = readLastLineFromResourceFile("/format/bloom-filter/hadoop/" + fileName); + } else { + BloomFilter inputFilter = getBloomFilter(typeCode, numEntries, errorRate, maxEntries); + for (String key : keyList) { + inputFilter.add(key); + } + serializedFilter = inputFilter.serializeToString(); + } + validateBloomFilter( + serializedFilter, keyList, lib, typeCode, numEntries, errorRate, hashType, maxEntries); + } + BloomFilter getBloomFilter(String typeCode, int numEntries, double errorRate, int maxEntries) { if (typeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) { return BloomFilterFactory.createBloomFilter(numEntries, errorRate, -1, typeCode); @@ -99,4 +150,23 @@ BloomFilter getBloomFilter(String typeCode, int numEntries, double errorRate, in return BloomFilterFactory.createBloomFilter(numEntries, errorRate, maxEntries, typeCode); } } + + private void validateBloomFilter(String serializedFilter, List keyList, String lib, + String typeCode, int numEntries, double errorRate, + int hashType, int maxEntries) { + BloomFilter bloomFilter = BloomFilterFactory + .fromString(serializedFilter, typeCode); + for (String key : keyList) { + assertTrue(bloomFilter.mightContain(key), "Filter should have returned true for " + key); + } + if ("hadoop".equals(lib) && hashType == Hash.MURMUR_HASH) { + BloomFilter hudiBloomFilter = getBloomFilter(typeCode, numEntries, errorRate, maxEntries); + for (String key : keyList) { + hudiBloomFilter.add(key); + } + // Hadoop library-serialized bloom filter should be exactly the same as Hudi one, + // unless we made our customization in the future + assertEquals(hudiBloomFilter.serializeToString(), serializedFilter); + } + } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java index 69b1bddc5cfe..fd8e3a5cd286 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/log/TestLogReaderUtils.java @@ -19,13 +19,10 @@ package org.apache.hudi.common.table.log; -import org.apache.hudi.common.util.FileIOUtils; - import org.junit.jupiter.api.Test; import org.roaringbitmap.longlong.Roaring64NavigableMap; import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -35,6 +32,7 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.apache.hudi.common.testutils.FileSystemTestUtils.readLastLineFromResourceFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -92,11 +90,4 @@ public static void assertPositionEquals(List expectedPositions, assertFalse(expectedIterator.hasNext()); assertFalse(iterator.hasNext()); } - - private String readLastLineFromResourceFile(String resourceName) throws IOException { - try (InputStream inputStream = TestLogReaderUtils.class.getResourceAsStream(resourceName)) { - List lines = FileIOUtils.readAsUTFStringLines(inputStream); - return lines.get(lines.size() - 1); - } - } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java index 82de0f3317fa..e73f2bb04407 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FileSystemTestUtils.java @@ -21,6 +21,8 @@ import org.apache.hudi.common.fs.inline.InLineFSUtils; import org.apache.hudi.common.fs.inline.InLineFileSystem; import org.apache.hudi.common.fs.inline.InMemoryFileSystem; +import org.apache.hudi.common.table.log.TestLogReaderUtils; +import org.apache.hudi.common.util.FileIOUtils; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -30,6 +32,7 @@ import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -86,4 +89,11 @@ public static List listFiles(FileSystem fs, Path path, boolean recur } return statuses; } + + public static String readLastLineFromResourceFile(String resourceName) throws IOException { + try (InputStream inputStream = TestLogReaderUtils.class.getResourceAsStream(resourceName)) { + List lines = FileIOUtils.readAsUTFStringLines(inputStream); + return lines.get(lines.size() - 1); + } + } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java index 7100ab9af341..a8e5ffda7078 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java @@ -18,7 +18,6 @@ package org.apache.hudi.common.testutils; -import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieFileFormat; @@ -34,6 +33,8 @@ import com.esotericsoftware.kryo.io.Output; import com.esotericsoftware.kryo.serializers.JavaSerializer; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.junit.jupiter.api.Assumptions; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -44,7 +45,6 @@ import java.util.Objects; import java.util.Properties; import java.util.UUID; -import org.junit.jupiter.api.Assumptions; /** * A utility class for testing. diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/all_10000.keys.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/all_10000.keys.data new file mode 100644 index 000000000000..5d11b297de68 --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/all_10000.keys.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +90d77c70-a0ef-4cc4-a376-1904e9cf2b52,38db2a3b-7e9d-4774-998f-43d3389dd828,9171563b-e57e-438a-ba10-47197df85c77,07561753-19c0-4d02-8f59-4efebe2692a8,ccc09818-13bf-4024-af7e-c39b160539d3,de0088d1-33a4-4df1-86ef-b2fd8db2484b,7a1b5242-1c29-4c62-a2e9-452c22944a2f,bce526bf-471b-462b-b98c-138ec44a8f2d,7aa186da-4f51-49f8-bde0-e4b375968b1f,7ed111bd-5b3e-4381-8842-df54a4b7ef4b,7f05efb8-53c9-459e-b9b2-fd29a37b311f,d2250bf1-8a18-4f90-8a78-9f4b954054f7,b19d1011-dda9-4019-8073-b432a52b2d2a,3404e922-6145-449e-804c-0eac59c5524e,f74febc1-f1fc-4bf5-b4e3-efd198e9e8e1,d23f60f9-a607-410a-a4d1-bf09dfac1a06,9b84fc6a-d268-446c-976f-aab0e8f6e593,0fb9ff12-4907-4826-b0c6-1aa136d769ea,9648cbcf-5e13-463d-bcbe-2b6d38479fb9,6f1d427a-24b5-485e-8bf4-7bef1793bb54,ef3e7bca-c5bc-4c20-8358-d7982000d58e,eb6a3070-db2c-4484-999e-9f249b6c68c8,b29080ea-79b9-4300-9ff2-280c60c71f7a,5c457a5e-3168-470b-afbd-6e862500feda,a14defc8-0d13-4219-ab13-9f26ffe77dfb,8ebcbc70-02c6-4f3a-a937-7d561ebcde1b,28009ab4-09a6-4fc7-a2ca-33e46e582a06,8e0b31d4-3476-4df3-a3fc-2861ecdb7818,26fb9a17-f6aa-4754-b5cc-103bb3d9e824,067b6f3d-e51b-4fce-b02b-82c86510c228,7f6a8175-dd92-421d-a11a-18fc0ceaa5ee,1c1c151a-7272-4c25-9826-d2ed6a8a6763,33ef75eb-fd32-4d84-8dd6-f0ec52ed1d81,85c5a51a-1c6b-4f4d-a5bc-8432cf2f4727,91175ee1-869f-4ac7-872c-9415b5c53bad,2b6e8dcb-3dbf-4685-b2e6-1e27ed73bdb7,df4b7294-b118-488b-b5e2-84e99529131c,0e0bda68-774b-4ef8-8549-d9a2677a99b0,20f53130-9ae5-4c2a-ba1a-4ea51164a740,7354a566-6d26-48a4-8676-8bfe659f89de,5f0cb151-8578-4841-b91a-d62ac2d3e2a7,f7487560-c46a-47f2-853d-e1d62c79c76a,511d94c5-e85c-480d-963c-969e8111f166,8a717e7e-bf8e-4d52-ac4f-0ce168d1320e,c1b3ca9d-2e57-4eff-a024-0b0c703411d8,f19c390a-6a49-4dc5-aa8e-2b0872a86c6f,ee19b25d-a9f1-43ac-bdd1-f991e7a99b08,f5332e0a-65dd-4d2d-b26d-da618d294f4b,fde7c51b-65a2-4b41-9d4a-0ce8a1c3c341,b68b8e93-05a2-44ef-96fb-70e835ca9e46,6e75c2fa-74cd-462a-81df-84fc19132322,e0923cff-f713-42e7-b0df-1e83d3e56d95,55755e9d-3d72-4aa8-8a8b-10594bebd1dd,808b38d5-6d5b-4be2-91cb-58f984d34105,08ca458b-f753-4cc4-a657-72ade484439a,598e0adc-14ec-4bf8-8fa2-d2ec63d266c5,4597c281-ce6c-4dc4-9f9d-b152c9f42908,b63931f7-fe84-4499-9406-3904fd18f6ce,24ef68e7-4cc5-47cb-8d4c-d623f97e9ce3,96f3a4c6-b560-4759-821b-f9ac4a2efef3,4857a7dd-904d-4d25-801c-a123dda0b49d,177a05a3-c4e9-4030-a963-ea137f7677ce,a06dda4c-8d68-44c4-94c7-4cc6cc42756c,f1ce29ef-1f1e-4078-9a56-26970d4d340c,10a02bbd-9a0f-4dc5-a8a3-ac903147e4aa,3f317222-858b-45f9-acdf-120048041725,741158b0-8458-4f01-8f32-243b64623371,c3985f14-c5bb-4893-bd1d-8aae66e28748,510eefbf-8c67-47c8-b7d4-e9d692250e26,13d3bd4a-19d2-494f-b91b-cf492df06053,8e7681fc-e79e-4aa4-adbc-f0778cc8bb2f,7b645906-eb10-4615-9a2e-914b418718ba,dbc1086f-7789-4b51-93aa-cfa0a804d762,14064776-e5a3-4acc-ba0c-e5a44e3c5ba8,03679580-aa12-4735-87a9-2a8d7ff8e90d,0b116632-335d-4b14-b39d-147906b2fa84,7816a741-8a33-43cc-bc65-958ac005c108,5ae11bbd-5ef0-4478-aa62-0b913f848fe8,1bfc373f-4d43-448e-8aa6-c3f48cbe9d91,4188158e-b880-444e-ae66-f1d6f7446a6b,4a654954-86ff-4063-bde9-425980955ae2,358e0cf1-457d-493d-990f-90a7e4089ed9,4fc9ed74-5f8f-4180-a35e-91d1e1acfcf1,74606584-8843-4c4a-a7e5-1965428d6349,17fbd260-0d43-4114-80f8-839748c66587,556af9cb-240c-4977-86e0-7d2835475bdb,1c1a6432-c6a8-4d41-958b-8b09f0b98737,9893236e-5c65-4aca-badb-9487642eff3c,9d59ae67-8a71-49e7-bf8f-052b447ba6e6,ba699e25-e7d7-4851-ac3f-db3274f8d64d,5a3a1d25-690e-46b2-9825-bf1f47e3cafd,386b02ba-e995-47a7-82a2-2b600deadaef,69c4c1e8-061f-4bf3-bd91-d63e2f5792a3,77778015-0a90-4094-960d-d09249432837,f11cd1fb-25d4-4d63-b68d-eb6dcde06443,3179880d-e05f-41f6-afc0-ec277838785c,03a84d26-7c16-42e4-8529-c42ae48045df,465f8ab6-6a50-4723-8a1d-65f8e0da6b19,4b5d11d4-c424-4d02-92b7-f39292b38e02,af588b17-ac03-4ab6-9298-2c202760ee41,e6bbef87-3c72-4d75-bcf7-44b70338e224,e045531f-4098-4ee2-b2db-dfa77d2ccd5d,f3c23cca-8fdd-4828-a522-f3173d228c2f,6130cf97-9dc1-479a-8d10-916f42517837,2f8ef191-cb93-46ab-b4b8-57d64d4d7bd0,9aaa9c00-0eeb-451a-9ade-f534a0a4aa35,7549c48f-217a-4209-8472-79db8da31f98,0cc6718d-9bcf-4dc6-aa1f-b2c99eb3cd5c,2ba1bf7b-cf7c-491b-ab4d-a177272e247a,64303e72-f875-4dba-9ca5-1b42c835a2c1,98965855-4dc5-40b1-89f2-112aed093c74,6257d54e-fc46-44cb-9f27-19f88592241f,6e803197-394a-495c-b398-4dbbd870c087,f8904fab-58d4-46c8-98d4-1e31c0d029b1,5a677ca0-cae1-4a6e-8533-19ba7aca09fe,725e474e-6dd0-4959-a682-ef350846dd8b,2dfaece1-3b9e-46e7-9def-0e3f3ccb99b3,2387b3fd-d94e-4f3a-8744-e2fb60cc925a,69f77182-b72c-40bb-8375-86d46012c68c,1c1730ca-e7a2-4471-8e07-2eea386af075,d1e2db26-d3b7-436f-9ca5-fe93596590ee,645eb717-62df-428a-b92f-65e64a2e0672,933a294c-2d82-48ce-bb80-cfff2027d0f2,16edb69d-0f39-4cf8-8cbd-5a4998241971,a07d55af-274c-48c9-8d06-04ffb3995bec,3b4ab7b8-4143-4655-aa83-b68129f946c7,a13ca68c-748f-4261-b838-18f8c0c1f834,5bfe4f1f-8b55-4345-a3a6-cb5faf4525e0,1683549d-0f15-4282-8366-9828a989ccb1,74a8ec7f-3542-42e9-869d-36b1ca2e939c,abddf3bb-0b19-40f9-b813-9d5c47435bca,7c7a01ec-9b67-4dca-84ac-08c07a5f4ae9,8a4f2d2c-2a90-4933-a271-6386998eefcd,33087f0f-aa5e-4f0a-a394-e042dc0be7b7,d7d923f2-00ad-4844-a1a2-8d5cc8be45e4,3effd33c-483a-4e76-8622-666310498510,3d061755-078d-4e58-8e89-f27f8ed5e3a5,ca64c289-e154-4d17-ae13-15b863c75b37,80710adc-3330-4424-bb31-1cca93e4f507,8fd1f832-b606-4bc8-910a-dbef63eb8941,7b372bb5-f9d8-44de-9eb3-d7ab8aa90670,a624b0c9-8d9e-4b6e-aca9-34ea39998238,f334139a-d910-4c3c-8852-348bcb0495c6,63912fc3-69a6-4f04-a2f8-6a2841004d49,528e153d-7bf4-47c9-b0fe-4b802313176c,8642cb78-17a7-4bc5-b3d1-bd021bd51d51,fd9d512f-4fe5-401a-8bd3-ead3633653f3,96ea3bc1-11aa-4d5b-be5c-999c9aafa351,f6e7be17-7780-4a35-853e-3c62e127ff88,10c6caba-4e22-4457-92d6-4d90404a0323,e868bb96-bd5b-49bf-b435-1d7553151f2d,ac294c9f-b6f1-4ca4-9531-43221bd7c899,3613b314-0c2f-4b0b-81f7-a2293b9be4b1,3ad2a820-3616-4c3c-accc-d31181fcd876,f3dbffc0-0c72-4ed1-9ae2-00a958e510ee,4b9ba070-a440-4589-8fe8-5e01848621e3,1eef101d-c281-4ff0-9e6d-50633caadc1b,e24d6623-4915-4483-a33f-525135d36c07,dbaafa02-ba76-46ca-9e1b-f62d6ff61ad9,e3eb9b08-9e47-4177-92b9-6c0877a9505f,75739a87-30f3-43be-a71f-473cd6af0344,0ecf56f6-704e-4344-a4f4-392ff520bdee,40e72ea3-7a93-41ec-a972-38b032337152,9e361c22-c752-4fb0-b7ca-e91d9a393b4c,1f529e21-d229-4043-bab1-f15d553f9efa,e4d2e76a-a3d1-492a-bd3e-50383db5186d,e34b82e6-7560-4bd2-9f50-413eda57d088,859c0184-6164-4d5f-947b-33fb6f1ad096,86574c52-fedf-4096-ad71-d7ee70759774,4eefccd3-b81c-41a5-85d2-3cbb102c0078,9c71c830-2aa2-4a63-8543-6d7f80993aa2,09f3a899-a4bf-46bd-87b5-aafd1b03b650,9153281b-2eb7-4bf2-8e21-49f01ba8ea03,c4c333a3-c01e-4485-ac71-ceaf0b08bbc2,08563ec9-fcd5-4430-8d34-b01694cfeba5,9d405ef5-bfdc-4aa6-98e8-05d440eee1ed,90df3225-47b1-4db3-b2c9-ee0a257d9515,013dbb85-e437-42fd-be0c-764c38362a4f,91e536e3-548b-4e65-86bf-4c3573b26b7b,373b5688-95b1-40b1-be68-fb644330ebb6,b63db5bb-7920-47a1-b343-1ab8726bb699,e97f21b4-13d5-40a4-b93b-84a443a71d03,c25018e0-4e56-4259-baf0-6c629a41822f,d1e86142-1108-4a55-b934-7488f1ff6f37,7213ed7f-4029-49f4-82a6-c7b42c9c9c0b,aca556e5-2b6d-42e6-91b2-fc1a5cee1652,6697eb80-d248-44c7-a67a-3352e61ca1a4,67533e96-f6ad-414a-b56a-6f211bcfd55d,38b2b0d8-8773-459a-95a0-5f77dfc5be31,7d53461b-c2f7-4b7e-bb5c-ffd7252cf56a,920e218f-3663-4e16-9d7e-5bc1dde37eea,43bb21d2-92b6-4969-b9eb-20d66b95e999,d08900c7-e17c-48aa-9ad2-c68a019d8699,280b1df2-6b90-4893-8f4f-136d379885ec,59332162-ec86-4cb7-9ab4-0f5182efe84e,df3ee7c0-d39c-4798-92b3-c79fa3443c37,71cbf5a2-0620-497b-ab79-e349ff48ed56,2bab164b-3b88-4d34-b99e-932c43ad1bcd,aefbaa03-e52b-4dbb-86c0-7e1c6f0aeb1c,945481a8-959f-4134-b247-87d81dee8847,ec5e2333-c881-441c-9786-b843189ef5f8,a38dde52-7efe-40fe-bce4-8d76423c20e4,0206c129-a258-4d96-81cf-54b38c252ed5,432792c4-fbe6-4d0c-9fd1-03c5df56fe37,d7a8f571-72b9-4e69-b3c3-e0c1a9dbba27,28269df2-59c4-442a-86d7-8b6ececf93ab,a58c4bfc-4682-4fd0-8fe1-d628bfd1174b,7fd189ed-35cf-4409-b7db-37a214ef6756,5a293312-f266-4048-831f-48c5aaed22e7,418462ec-a273-4aba-abf5-5755647f948e,08765e0c-acd7-42bf-8f9f-13e98ec5ecca,e66ea9c6-6c65-42f2-a44a-226c3a0b245f,0f626445-5b66-4700-9145-1998025177b1,bf2e55c6-2d56-45d3-88b0-7800f193e301,87279f56-cba9-444d-a5d5-826852c6aa28,5dd6e5e4-d869-49c6-99e5-8f72608ea2bb,76aaedd2-7909-41db-aa74-e6aa19e99ad8,1f7d5602-2bc7-4e11-bf83-c742105323a8,71d5d33f-0c31-4bf2-ac56-67a7a9768a31,74af5f98-471a-4fa7-823f-eb52de8f580b,b20f9ae0-3541-435f-ad46-82454d39a018,5f536727-8b3e-4d27-b9d1-a75fe92f308e,ac6e7fc1-68ce-4154-8841-bf7e5651d301,ba200a27-5a40-4314-9cab-2362739ec72a,9d3d86cf-bbe6-4284-bb73-322a4bd52533,d23aae3c-3f48-426f-9308-66a5a21c79fd,5bd0e77f-c4cc-4be1-b70b-f178924cb577,0f441d72-0e81-47ee-8000-1eec5f5407c2,e241c4d5-b48d-474b-a9ba-25fd8cd06b5c,9765c142-a2f6-4cf9-a3de-5602fd9c7517,70adf6ae-d764-4da9-98ef-2e19fe60e2b8,37dd6a40-f823-45c5-89e5-7271558ba466,7172ad9b-4108-4d36-b418-cbd1e5b2de4f,4583a3ca-a457-404d-a70f-8c594e95902a,647658af-a303-43e8-937b-5cdc24f1d3db,93617d9e-30e7-44c7-87f5-ba63c2692ead,4c55d4b7-2312-4a23-a573-9c827134d53d,94147e50-5ab1-4f56-8e97-c5b502c72531,1093df2e-2441-4fba-8462-f2ac7b7cf221,b70f00ad-48d3-4318-9abd-50d721078669,c9a0c97c-b2a8-4db8-ac53-2c76e7a91c71,d03cf30f-5b5e-43d4-8f74-8de791c44511,5af9db77-e4ef-4254-b414-53037d1788a6,8da96c07-e82d-412a-b10c-98d5d3ad9a7c,a2cec94f-7d84-45ab-aff4-79ed71a54576,7bf381d3-3159-426d-957c-6d27ebf5cf1a,28be5f6d-5b7a-4aa6-a907-1b1133daa646,93a47e4f-fdb5-4839-8924-3fda7d49c285,4955aa35-3e0c-487a-b254-67920618598b,61c6449a-feef-49fc-9ee2-01047b53a880,d73dca48-8633-42f6-a773-86727457581a,c90e91cf-0d5f-494d-a5b6-510743a8103a,26d275de-9983-481c-b3ef-43c1b01ce573,c571e5fd-0199-4039-b332-bb0d247a0f2a,a5ed66a7-daf2-409a-9ac3-981324616d49,c09a65b8-1af3-4b1d-bcf2-6eb0d4e10d5a,31dccaba-f802-47f7-8850-7b5b9abc4c9c,efe6481d-d1d3-4a40-ac65-73d117f1fdd9,3ec61152-626b-48fa-8ebd-8490e4f724b3,6c06d3c1-fd2d-4343-b9b8-774bf6a76008,be73291a-20ef-43bc-ba57-62559454c0da,44aecbd8-8b77-4494-9b28-17e24acce77a,c7ea1ec1-5816-4365-aa38-c32bf0834294,5e2accd8-d16c-4d82-84f2-d8deb7d55aad,1f7dd2dc-4a9d-4ffd-bdb5-0baabd4c784b,e1eb3849-edd8-4247-93be-63d3dfa4ba51,01e9f631-7dae-4f34-880d-1272665ce4c1,f7c3270e-885d-403c-af1f-ce0eba065e5a,c6ee4441-48c4-4d43-925a-485ebddf8cae,46d5df31-372c-421b-970c-34d3acab57d6,0a60d5ca-539b-4463-87f4-c737378a34dd,27bd81b2-c1ab-4f29-85c9-08c33b149c46,336c9bc7-40d5-4480-a4bc-2c5a47c4c038,34fc29f5-5913-4cb1-b685-3cff1a8ab5be,fb2d354d-066f-4c97-8ade-d07e9a8024f9,320cf920-e3bc-48e4-a851-a1005f14f891,29524469-2411-4065-8fc9-35aa39754ee0,c5daa818-55df-4195-8003-10e22f28ff26,c231761a-3d87-4277-b4aa-daa328b82070,d47e572f-0147-4f88-b825-1126d903f520,120ef068-4818-446b-ac13-46281da02098,784c5174-d823-40ef-b3a7-c0955cb65db1,a30ab6c1-88e7-4cdc-9950-39e02efebdae,658ab2e6-6db6-4b3d-9be2-fecfb3b813aa,9e9c7f83-8521-4975-9db1-77fc6d0e187c,072ca328-20c4-4018-910a-bf63b2bf30b2,5bba7759-e92f-43cb-bcf6-2787e8538573,82625848-89fb-4a19-99b7-219af9cc58e0,0c8eac2f-3aaa-4207-b16c-18efbeb25253,a555cfa8-11ae-4964-88e8-fea6bdc8c048,09a71de2-12d6-4810-af52-86552f22fe4f,0607c066-7575-4acd-9929-3afc442e9aaa,5d9459a5-364e-465a-ad48-02e8524bb886,43af81d6-bf9b-40fd-a7c6-f82b07f561fa,50815978-85b3-477f-afe8-f5686e2ed97d,588d5eba-6c3b-49fe-a8c5-77850942545e,2fae0146-5ed2-4146-9a57-da8cba1e939c,ceb3a303-b225-408d-968e-596caec7bbff,d266650c-50ab-473f-974f-32c5c157dd8c,754edf98-ce7e-4071-bba0-138864f47cc9,c93ee8ae-e7a3-4b29-8c39-9d32e019053c,259eb266-c28f-47b9-a017-7edd05407d83,075d44b5-6861-4896-b478-98f83ee8a6b8,59b511af-51fc-4584-886b-1dd107a1e570,5ba0a278-0ff6-4340-be1b-09e8fb083a18,b37b93e9-f06d-4065-be6a-7b73bd0bdba7,2b885a13-b8c0-4ac6-be7a-d6c197e5f107,c6fe8cef-b086-4025-a485-4529ea793ec6,3704d6e9-047d-4047-887e-997d224e2763,b94fb5eb-5728-4ce7-a0a7-43841553037f,78d64d0c-def7-4498-8973-efb24ccc460b,1ad23cd5-760f-41ea-82d4-78bc518c8f34,db7413c6-c940-4809-8e59-aefa8cd68b88,b09f25f4-9056-4a74-8885-a038d66dace4,495e1842-fe4b-4ccd-9ca4-11238e0c8ebe,6674ec10-2918-4141-98ea-5ea7cead4c58,ec30d37d-27e9-46bf-a879-9f51e6580d8a,5e69adf8-5152-4113-af04-227377aa5170,c1550484-5b81-4f64-8e2f-c49b88d4092c,76fdfcc1-b4f0-49d2-b307-521b299366d9,77008b3b-2ecb-4f5d-8133-ad2aad892043,5644ac6a-738d-4dee-84f3-455063338cef,ff576574-258c-441f-85d4-5a64e93fe393,169c600d-a8e9-4c42-b449-34ef17b64f30,506aca7f-cebf-4451-ac5f-3a52df4265dd,1369305a-220c-46fe-9831-6c787dbb7bb1,f3853617-afeb-46eb-931e-cc099f1550af,9e9acb71-ebfb-476c-bf6e-869c6bb3831e,60b52478-83be-4c69-98e7-78eb3d0b4cbd,7d18c775-dd0f-4079-9562-6ab92f531776,8be8bfa8-1052-46ef-8c58-def76d5e2196,14b74e7c-9a7b-4532-8da2-4039bd14e285,22dbbd79-0103-4f89-81a6-c1fdb25645c5,d446c494-2895-4cf1-9198-ca9ef1b27494,b142073e-fe67-47e8-b964-b067678f1e1d,c4688329-a8cb-4ad8-9d9f-4367d782b063,b652de56-66da-4d17-bcdf-e5fa2db187eb,6f06c6e7-c2ff-4eb7-bd66-af6d9d759447,fbde5994-fb61-49e9-9bff-ed9f5d5dad5e,6709f715-44b5-4a8f-90ba-0079e5493672,bfc83fec-0815-466c-8731-4244088695bf,a174b14e-8bf6-4e15-830f-bdc3f6c8c7e8,fa0920a3-8d8a-4ca1-bc3a-83ca3f27682f,4b3a5446-04d3-408a-b3bd-3d8c48218b23,bc00be43-f872-46c3-8173-83de165e8205,2fab73e9-2da0-4ac3-a2dd-a3eb9dce366c,bde3a7ce-e19f-4aff-ab0a-01446b81eb68,bce00a42-eabb-4ef9-84f2-dfdaa6deecd9,2186a960-95a8-4a49-8d0e-47cd07935987,8d04d544-cd3b-4b72-97e7-e04fdd499926,d2b7f54c-f8c4-4c16-8336-59fa129f861d,d5daa461-519c-46dd-a3a5-4b026830c5d2,37aa81f8-3165-458d-bfe9-a8226b2db2e9,8dca35ae-88f6-4999-8069-83d24efb2871,edadc2c3-b085-4a0d-885a-7fdae011d633,79a55732-00a8-499b-b59c-16f4fc14be7d,d4cbc7ee-91e5-4561-8fd9-5af5d20609f9,953d3691-caac-4b7f-a154-0d50f5f66959,32c8078f-ae6d-4211-a6b4-a31705bc3b89,1040d2b3-fc61-413c-a6f1-b71eb019fad8,56870573-9750-4617-ac46-d2ae37934d31,894758a9-c84a-40ca-956d-2c93eb3593af,5b43116c-2b61-465f-941f-d596c3511de6,67326cd1-5299-47c3-9087-fa07581c1d43,79b21986-779f-43c1-902d-b17f3259813d,75ad050e-0469-4685-9f78-603b8967b7e7,ddf10243-f5bc-4b15-abf1-a75a939d901f,fd9794e0-b4ee-49cd-997a-4a04f7eaa285,83428011-2d4b-4636-8c72-3af67da1e0a7,14a3779f-7ed7-4247-bc0a-44460ef02107,51a67dc0-f4e2-47fd-951d-12571157f4cb,67d6525a-f1f4-45cc-ab55-2f0f837b9003,ec231093-e61e-4d19-863d-64195ac78701,c1098034-3866-4cca-9d62-588b893d9f3d,56ed7147-1dd7-40f6-807d-16724535bb74,9f81a3ec-0a8f-480e-bf68-c3528ed09d72,1c86725f-b76e-4720-a674-1432b1108cbf,84d2cd00-67c2-421e-9e76-61e934a8409d,231fc1ed-2bdd-4006-bbdd-5b642ff637c7,33648930-8830-4585-89fe-a13221886c9c,a045ba12-0af4-426c-a32d-f83d7fbc8c0d,afb9e2cb-b318-43d4-bbd3-8ff7b2d82f7c,c2e630f9-a554-4fb1-b0f4-94233e9c712f,5d1a6338-ed9b-4817-b55b-1f79320ce7ad,7f208e8a-993e-4aec-a0b8-039c287f580c,9e654fd8-49e2-4cea-a297-702c93d6ccac,76c2f035-a5c5-4549-ab53-3a153c138359,88f35a88-68a9-41d1-9989-cb974af4a92d,4f1c2cc6-d166-437b-940b-1a5384ebc7e6,4e4e64a1-6ebd-4435-a328-994c30624eab,3f6e1f49-9395-4707-aa52-1f5258b9ff61,39c65c67-6df2-4f62-a429-36221f5b4bb4,e407bcb1-f867-4c26-b0f0-308b43080337,c05654e5-02b9-471e-8882-b28365170343,de2124f2-126f-44c2-b08e-89610dae0dc4,a503d988-93bb-418a-ba81-27fcd143fd2a,ceaa5dc2-8623-419e-897d-b62aed224071,3db0910a-6a3d-4a74-b2a2-1605f3b0cdeb,f9ee13f8-72ca-4b00-8ba5-736eeb104569,b18fdf79-da66-4a69-a303-8486ba6f9ce8,17268cc8-eaac-44e3-aa83-ab5e5662008f,fbeeb647-2d31-4a9a-90a9-9fb4d0b38d85,6fe85a1c-fa85-4e99-a061-d66d6a9fd8c2,6bccaee7-4f9b-40ba-9d15-438ac53f89c5,109ee100-ba57-431c-ac8c-8cf8de263961,aefa6e37-2e9f-4d2d-8260-876b438cc894,5ace8e55-8a58-4967-aee5-9e8ca9367820,1604995a-f758-4103-b611-922a439bfb9b,e480a5df-3ac1-4f37-ba81-956ab95e71ed,e83d79bb-9cfc-4ee2-978b-f8626ef314f7,107c2cd1-1fb9-41a9-8f5a-31859bd1ee2e,75abac01-3c9c-4453-989c-7018b7c8e575,82093976-18d7-4e63-a66e-a16e2c0b2218,5dac7dd0-60cb-457d-9cc6-b6ebf4e98b65,8b85fc85-94ca-41f5-8c17-47bf67ecc231,f72ff344-98bf-40b2-81f4-e240bb1aef9a,5cc7aa81-551e-4928-b0f7-98ed2c519430,8b33f811-733a-4223-af10-062dab5ffcbe,0ef6a547-393c-478d-b96c-5fbf120c3364,d343812f-9769-408a-a608-84220b100c22,db81759c-dcdf-4eb7-a248-ccc658110602,c956069f-e233-4822-aa30-83ab0cae44f1,8354cf0d-880f-4b18-b18d-18ebb7b31ed5,783c9f33-7384-4046-8f9a-a4d4b0035464,a35d5b4f-932b-4db5-98d4-4734dd4b17fd,d2029474-8176-4cd2-997a-d78167d62cbf,41911130-8fb2-4f2c-bccf-a1aeaf93a442,5a749cdb-c303-4de6-acc7-3b77b022acf0,fb76b99a-48bf-490f-bbbd-99461521db1e,a3f225be-95e8-480a-afbf-e4af29e69d85,20c2a6be-5ed9-4ab6-a153-67deaaea79c6,21c02c1a-386c-42bb-b3c9-bee854d5b360,852e80d1-0e83-433b-a243-631d3d0776c5,df6a7f8d-f14c-46bb-8534-65e058207516,dd33a2ac-9904-4210-b765-c993df915baf,44d9c736-e2d3-4873-942e-3407b7f002a6,c1430fe5-82b2-4694-a99e-db667f847b33,0178b6bc-7c14-49bf-bc5f-7e6b40dbfdee,cd65b8c4-0079-4f94-876f-08351ac8bbed,9fcc731c-f8a7-4bcd-a5fb-30fa707df9b3,7ee53ef0-5f52-47e4-8ed4-7926fb0b0bff,b950f313-2e0e-4590-a1e9-82f615ab5ec7,b3c100cc-baf6-455c-aedc-4c372ba9a3bd,0baab10c-252d-4fe0-aa1c-aafc9780f859,6eb8b902-1472-47df-8426-2e7f809f4c96,c4e94f41-8184-4158-b326-e7fbaee1d27a,35941439-f8fc-42c6-8273-975acf1c446b,d04e553b-b2fc-48e3-b2ff-a5fe2724f348,3ea49f4a-4e38-4494-baf5-18d67b1bc831,58736f94-7501-4d03-98f3-e84eac222375,4c6cc35e-44e2-411f-a76d-b4b366ddb907,bbfafdb8-03f6-4eb6-af99-2cff1b574724,941604aa-6458-47d0-a60a-6d0c2984f72b,be6c0b10-1180-4b42-8094-79a6e6ab429d,03a11576-e039-43d8-b10a-3f18c537bc69,bc34a386-5db2-453f-8163-23245991f886,964ee029-60e0-4a63-b02f-d432240fcd27,230a1697-9398-4ae4-b329-297d5f631ade,1053d4b4-19d3-4527-83ac-d6c84bd07692,3d0f040f-c7ee-4571-8a49-670ab25d96fd,b77b526c-58f1-4e51-a3f4-fce2501bf92f,00bbdf55-3cee-4210-a47a-099d30d1372a,ddd2b4b1-d5fa-439e-8860-c2a15fab68cf,34320611-a1be-49b1-9250-e3495ba7435b,194a5081-8393-4763-b757-68e8e1dda820,a483f379-61b8-4ea0-9f3d-41256e8e2e89,2d2e94f8-bb2a-4d2d-b1d1-99ae876f8945,f124674e-8580-4cda-a97d-c0f4928ba8bd,9a1e3a3d-117a-4868-84b3-ae3bee65e80d,45012306-3476-4c8c-a4c1-455fabb83e5f,48f179e8-120c-4020-8f22-8b33ae4cce0f,69218538-a69d-4bb9-9eb3-4850ca789e8e,dc77640c-072f-4913-9fbe-831d446a3bca,81077c46-f1e2-4990-8768-417a3f8e9f47,ac27bd18-d130-42f6-922b-616cac4dcd2b,16337c99-e5d7-41cb-b369-e37ea173751b,8920522e-f6ed-4a50-a8e9-fb9f63bb0094,ebf7d1bf-3f65-4d03-a64d-567c76ea3f7c,46a5f7cd-8614-4f25-9975-ac817e546fd0,e1cfbc42-ad9e-4d73-92c9-5c1b78251068,48b9f169-7c01-485e-83ab-64575801b87a,56a3e5ec-2ec3-4e06-b155-1559496c7fd9,5cf7579d-26e9-45b1-9448-096af64e351d,8afa3dbd-8eac-4b16-8090-2dcd8dff1f42,5659d5bc-0804-487a-a38d-c4439902dc96,58554721-7f2d-404c-8337-ff0d3175e113,79e44fed-027f-4307-b08a-9e24d4e477de,b851ad58-43f1-41ed-b942-278c67aeafa1,8ace3999-97c5-4d4d-af54-0f2eddb33d05,0ea9520f-b821-40e1-aeaa-6fd70333779d,310bb179-0826-4770-93f8-5f77d0ea4e4c,c7022e74-f532-4fdd-8b16-ab75b7853b24,e0dbb7c9-01c1-43f8-aa14-a6b8e9312fa4,3f71a53b-2e36-4f5f-b1de-e48bd101e843,f64e0566-1931-4f72-bb07-429e9ef72b86,cbc1d95f-4391-4d2d-891d-8e42265390da,0eda6dc0-07a3-446b-abdb-b36ece881b81,771de572-6f25-4270-9f75-f23bcfc10f43,eef04d9c-f1c6-4dc2-a728-a190a5d9315a,482af706-aee0-4698-87d5-914b429976c1,efad105f-4d0f-4a78-8b8e-baf68ddd763b,7cc9a8e1-6a4e-41e6-b1af-3aadf71c9f8e,eafe539f-dde1-48a9-b7b2-dce348b92692,313e2a07-70c9-4a24-92cc-51a3c7876477,839b243b-37b9-43e6-8f23-7c20dbc2f016,c1319d6a-41d3-4764-82a3-4a912d510f92,5cd0c32b-ead2-4b8b-a32c-4166e6a088ee,aa0af403-5298-4345-aeba-36de20ea7d4d,1ba7340d-6d95-4d42-85cc-a8e02e36fb9e,6713100e-5a01-4848-9026-2df3ca0a82e6,7aac27f6-5927-43d5-a28a-c658abeb74e6,35393e9e-4c81-40af-bab8-3ac94d29b573,1ab8681c-25ca-44da-8489-7a2c254bd86a,843397c7-e1a1-4fab-ad8a-aac3e5141d20,cd4503e5-c5c5-442c-b574-3f626ee0d3a1,221929e8-d7af-4d38-ad0c-bad24dd5a47c,44161f89-7718-4b29-9aaf-94b6bef2a4f1,b6575e1d-12f3-40af-b5ca-cfbb7875c977,bafaeb29-f294-4ade-add8-5e88a5e273ab,fa5d2a05-18f0-4d85-9f9e-1fd7700ba14f,8b3598de-2ca7-407f-8b04-8dd83241cd0a,70ec4b13-3daa-4123-84ba-bcdd797a1797,5e3c2341-3326-4340-a763-208bc5bce248,3e72f6b1-dc8b-4db4-8ecf-cbe436441f7a,4bee55bd-274a-477b-960a-b55fc8782daf,d3198b78-d5fb-469d-920b-ba635706e6b9,a26fdb27-a68d-4de2-9a76-e80b0639e6bb,26ecd8af-ee2a-4cc4-b72b-626101d987a7,cdb2ba53-e7ed-47ef-9798-e4eb6b34f3f9,20caf787-e39a-49e9-ab1c-fd02704a236c,8c0a50e6-7099-4992-bac2-aa4c99e429af,fc004b75-350b-45ac-9680-7a8816d13986,1d1339d3-fcdc-4f1e-b60c-d3c32bcb8f1b,609d0fcf-225c-4d39-9ef4-c52765c52202,d426f525-a077-473a-8230-d8ca44fd0ccb,32a7435c-94de-4ef8-8991-d671f575684c,18e604f8-f50d-4b5d-a77c-1df2b4013692,98529ab6-e271-41e4-8c31-79646ec77a17,d7050b24-754e-41a1-9808-9a474ea9e0c0,dbf33991-9cda-4e0a-b8d0-33653732129b,64bfa47d-d4e6-4839-9bdd-53e5d01820d1,b635d9cf-b27a-4772-b941-1472e8308cfa,5aec0ae4-83f4-4bc4-99e5-e240f6947743,222b04ea-7f60-4fd3-8c83-35e6766c2760,ed48206b-b517-453c-bcd9-69f2c4afff55,ab0d2168-98fe-4c8e-adff-d1482a95453e,542d6ba7-4491-4f85-a8b3-aeee55b3016b,9932f9e4-4ff1-438c-b513-2f8258d500bb,484c564c-a895-40df-9735-8a57fd79c40c,c27dd871-2cf2-446d-ad58-6235408dcb6f,70d94a99-a259-4ac3-94b6-5a5042937f7f,88cd3b51-9dce-4d20-ac7c-22f72ad40081,d1c77e54-32a9-4c25-a1d5-d1ff5538f1cb,a28d3a05-39c8-47ba-b180-3d9ea5e7e7b9,7bb4d2a5-0f4a-4e83-891b-7da45ce4d577,99cfe872-407a-4ba0-a562-20683f4545d2,77074583-6552-4449-b3e0-cc94b0f101b1,a4fa7588-97b4-4618-a4d4-18a8ab6438f5,0adb1382-bf94-4b42-b26e-a6304380821f,a9fc21e4-099b-4026-82a8-cc0f739026be,69e67caa-711b-4d16-896c-dbb1cc8f91b4,063559f9-e231-463d-9fc6-b1eb9a02fbd3,dacb98d4-05a6-4923-ba41-d564013430cf,23ad45f4-8ccc-4b3d-9584-302cf206efd2,ed07e0ce-05da-44c3-b098-bdc733ea5980,51334fd5-0220-4dc8-8ff4-c6e70bb16849,81c0094a-aa4f-4175-9e2c-bec1e16c0f47,969aaa40-3ffc-4fde-bcb1-0afa9bda81f3,eddc53bc-50a6-4107-8ca1-7cb430c3d9fe,9c7f0ee4-c626-4a2e-bd6c-40cad5fc6e51,b76794a1-2519-4b0e-ab86-d21448b42ca8,eaf043a6-2c81-430f-8d22-802e95216c41,3e8e8327-f7c5-47f9-80c7-a7195ffec3e4,a23e3025-dbd7-472a-b5ec-9c1a3edf08da,eccf5258-7475-43f6-af80-9f8a2fb43b68,699a85e2-eb57-4ccd-99f4-bda229311a35,7f8b6fed-47c6-4d9e-8ee3-d42be0a8a8da,e73dc8c4-a472-4cb4-b963-d78bbe3a12f3,5048a772-2f1b-4f90-91c1-45c549148595,a40acda0-388d-4ffa-965b-003ee6ca58fd,f1e3d5b8-ec12-46d6-8461-78c17aaf0ecb,a88a738f-c78e-457d-8b5c-29d77d251039,405fb818-59fc-4a39-9f28-dddcb3218c51,d1bd178a-a977-4871-ba3f-c60e09d1d878,0727c3fc-9615-48e8-883d-f1f7ee2f0fec,c3616546-dde1-4bcf-a2e0-3a05e077cf4b,67dd58b4-10c8-4475-b685-ff7ccae9b823,c6162c59-90ac-4bc1-887c-bdf8cc1e8982,4d2dc5da-7e70-413a-bf38-5166c310be6c,32dce2ec-333f-43f4-9b71-532f850663ea,84c4c20b-94e2-41e0-9178-8bb351b22d3d,547d90f7-16ea-4a31-ab8d-070a1285c66e,d9d38f1c-f36c-49da-8fe4-91229348225b,81c79502-fed5-4f36-8c20-6fceea7051f3,9c28035d-eb68-4361-aac5-9e74c64af322,09cb1005-793f-4282-8f66-970827e16a3d,86991ac0-d8fb-4d85-bea3-47753b77e891,aabb6de5-880a-42c8-8597-a11f8881efbd,6ec69efa-ddc6-418e-a366-d153fd6de92f,63369a70-b84b-4cba-81b4-2e9a8bad4ea9,ea226a7b-718c-4890-824e-006f4b1852bd,aefe6e84-0580-4c3a-852f-2fb24293eb9d,0f212f97-1019-4c36-98f9-d80b26da449a,8d980781-210c-4a23-9da0-6d815e8bc1b6,87613c0c-46d7-416e-86ef-235dc802e39a,1158ae9c-14e8-41a4-b6b9-6f3560a498fd,06861483-5b28-4a71-905c-35d0d7db6714,0a8761c4-d412-42bc-818e-9d34aa4dcf17,0369eacc-c122-4c3a-988a-9a79dae46dce,2df6fdc3-1505-4780-9e85-3748bbb71716,8a38d9cc-dfc6-49c1-b283-54ec85561f6b,a19a2956-d39c-45e7-a0b9-1161fd8b8f15,61200e35-bf14-4b9a-b7d1-204160a0a04a,359a69fc-9832-427b-bc62-d4ae24ee0ebf,f0990462-09e0-4e2a-85d3-ade48d070fce,a16ef75d-a421-4bf4-b11d-fcb7947ccf5d,846d3c38-29cc-4789-befa-76d6405875fc,d2339fac-f709-4a6e-9437-59f1cc282399,18f60487-e27f-49c1-b090-26b482d1caea,446c6941-a62e-4256-86a2-392901d4f109,4dfe351b-3e8c-45de-a376-d052748fab02,6d96d6d1-946f-4fa4-a145-fece7310981c,ed1c2f83-d2b1-4f4c-a5c1-394681584fe9,211b4e84-6a76-44b0-b131-7d8f54bbd076,9af9b970-b6e3-497d-82ba-a50ef36e54d3,62f2cabc-da12-430d-8470-5cd5cad7f6a8,1870c01f-c982-4af2-ac78-d5a901cc88d0,406dffaa-ae5c-4827-893a-cd50ae533c45,bd79b8e1-6434-4646-b0cb-ddb5242b92bb,56a2a055-b9f6-47bf-a7ab-983e85f4cd71,dd2487df-f1f7-4500-bcd2-3d1af7b5aafd,b5590fff-81c0-40ae-94c8-f7a63b862793,1ad6847d-b1c0-4639-b7f3-a137e724d679,68b03cd2-c125-4f44-9a5e-5866b4cb5252,2f21201a-8fdf-44c3-bfd0-72c9da4f5e47,b35fd92b-174e-4fe4-9f10-48a5d89f783f,40144ff6-8e07-4496-b845-80440b558980,9c1f01a9-9777-454d-a2fd-983fb066a293,d035ae11-65eb-4837-a167-3803cfc3c152,3c6a3c6a-d023-4e1e-b783-30cec1d4b1fb,2ea0ff97-5364-4088-acce-c9af3ecd9712,92d72c68-4219-4653-b530-39265d20614c,a7a7f813-b639-4420-b5fc-9c38240d7f49,5a189245-2544-4641-94a2-2c4e0f6bf11b,54921687-4d34-4530-8c3a-19e1eab6d7ef,cae1b6d3-e7ca-400c-b840-67e5b50ce9a1,804f7120-e634-4c9c-9d93-63258a7969de,96c260c7-2e3b-4d76-a0a1-f0d3a6ef4468,75780b05-8be3-48ed-b5f3-c29673edfa95,7a8a6f73-ba91-4835-8a43-0cc9ec711eb6,c4c3cb74-cfdc-4609-b943-760ee44aa045,7b5b5455-511d-492d-8928-42313653d803,323a903d-2c23-434b-bd40-b29616bd6fda,e37fe33b-a94f-4567-8f4b-e97a879e4438,fa81155d-ce51-49ac-a742-824ed29f5a44,c3ff44d9-9e57-4d83-adc9-020d25880811,a55b9c49-7334-4d4e-9c77-658c51fae986,41e1b970-98fb-46ef-b7cb-dfd80e8ebde8,81f8860e-d3d8-4e5a-b5c2-c9e0c4b040aa,244cf6e6-b5cc-4fb6-83e9-9e03b6139b48,79d96337-880b-4c9c-ab17-31e2a001cfcf,d071d5c2-bbb3-4fd3-8994-ae902b6ce8fe,c539f67c-094c-4d87-93e3-c2b0d6672638,02a96664-9848-46b7-a856-73ffff5c5478,45066a16-58ce-4cc1-9e59-b44c56240924,5d2d9094-8b32-4ecf-9ea0-89a9f6b4c348,0c706d16-6286-45df-bc19-9d9d00d78aa4,4eb03ea4-5779-460f-b20c-d45a133852e6,eef45af9-965f-4449-b1b4-ac8597a0008f,0feae613-06f4-4d6c-b9c2-cbc6e453a46c,77605270-1130-4c31-8d37-78dd21fcb1d8,948e2136-3d8b-4533-8054-f5cdbef110c2,df19397f-daa3-437c-b809-d7de993c6da3,c893a844-c0ab-45f7-b182-aab67f4a62aa,28e76bf8-fbe9-495c-a490-f22506c0deb0,264afa51-5ba9-4623-81c3-34f465a74633,0a62fa9b-d106-4cb1-a2ee-fccaec52f092,f7ea290f-103b-4590-abd5-6e55d8520c3a,cca1c7a9-ce72-4dd3-961c-8be9f4e4a7b5,6817c98f-3a4e-4204-a7bb-023fa7f04ca6,533ba671-e451-4510-bc78-4d0608a4a3b0,9df3b640-dfb4-4181-b868-aab913f50ea5,a0160621-639c-4f10-bf36-c771bcc37431,150dd6cf-11cd-4e98-bdd0-66dbc38618b4,a4718fb8-706c-496c-adc7-eeabe690cc83,f59bf63f-fa88-4f8a-9962-e98493d7bf1c,296ce09d-4c39-4884-9120-a755e5d965e1,f435899b-961e-4064-8f75-2137e6b5a09e,c2993a61-ba9f-43b7-82bb-fc391efb6a58,5096df81-1027-498f-b200-73140ee3bad3,bf0c6d93-568e-42ce-aab6-261df4c8840e,553b97bd-9c2e-4ead-b468-a45035f7a1a0,316bc2f8-62c4-4962-94de-2bc339730adb,097a8827-cdaf-4a1f-95e5-20e69028f377,186cba62-a7fe-477d-8968-49e336e6cfbb,942f612c-7bd7-4766-898e-2cccfe5c09ff,fa694c43-576d-4a74-b2c6-aa48143ced29,c04fc1fe-6154-4c5a-a989-1c4ccf029a0f,ec8c1b9d-720b-4da3-9892-f5e678af964d,dbbedff5-2f89-4f60-b8fa-b28090b08c5e,1e888d53-f35b-4808-a303-c8029e042427,89ec90c8-a4d7-4732-bf2f-5d0c847b51bc,d0308f6f-7e34-4ba9-b0af-fe8dce78fb9f,c2088c2b-286c-4c16-8224-de132aab66fc,b4b760d3-bdd4-4abf-bb5a-0e1062d872f0,b5a6ac25-baee-4f48-be34-ad6453d8f7b4,220e11d4-c859-4cf8-9db4-6b48b9a7fe98,e6b38d35-5f23-4226-8cfc-5bd41a269170,69efc21c-fa2b-4162-b93c-35cf32f37a1e,6d65d02d-23d4-4df2-9e1b-cb662cf30e10,045d1874-404e-4377-ba7c-eaf9d1bda583,a69023d9-f496-4272-a24a-69e742210f90,d7aba321-bce3-450a-b0d8-f6105301081f,52007d5e-a6c1-4c49-9df6-263fa646668f,6c2c7182-8098-4d5f-bec5-2f6c245f1041,58d0e415-3981-43a2-b44f-807024fe9d2b,12b43507-dbe8-4fab-b5f0-77e0cb25754e,4e7b05ae-2a43-4cea-9a9e-3d603eafb124,c509961b-d8a9-4357-b96c-df6c714fba93,46b3ddb6-34db-4f9a-a8df-93ae4d18ae47,97505e7b-1039-46ce-8203-f6b573472f46,5d92dcec-c469-4f7c-ad49-1c31e704e7a4,d430eb65-ba91-4f28-acc2-c07dd38dc1fa,bc95d105-f7a4-4368-8af6-295f2d5ecaa5,f77a1a4d-70ad-4bd1-a871-ca3f5b3f7a53,4e3e51da-badc-45d9-9bac-352e19ea83ad,ca080fac-80a8-4144-9d4c-d3665c6e954f,f665ca70-bd77-4c4d-9b97-8ba030dee6f0,53e04c5e-174f-42d0-9548-4404557b3e9f,db2066a8-83f1-49f7-94d6-9f9d1c4b5d31,3e814e49-d06e-4639-b3e3-80f5e4b5e97c,ebb076cb-0b51-45d0-ae8a-27010d1011b3,3ac94834-faa4-4671-b5bf-3bce3c9577e0,9f54f61c-855f-4bb2-b57a-1a9fac9595ab,53ae579e-70ae-421a-8385-e5a3dd7ef88d,17d53c57-915f-41fc-acc4-73a6caef44f6,6fc92d46-fd17-45bc-8d6d-de4ed60a2c0f,94302cda-f5d1-47ae-a2b9-a81361186880,cbf97a3e-acf4-42a8-b1db-b6d0f5c0a653,cc2c002f-d525-405b-9653-d4296e635453,ca35022f-b03f-4f76-88a6-e17e2b73d849,697bdc8f-09b2-407b-a582-33f239ad645a,03526725-0cb3-4127-a7b2-1b2db50dca0c,9b9f3233-f858-4f19-a5a1-d9a678cf4a91,27258200-c1ba-4416-bd60-6dec152b069b,a723dcaa-7806-4490-98aa-f9bfb6b29d9e,c0651a1a-a8c7-4d99-9e1d-c7cfb5aa1996,44e8289f-2e71-4d22-bc68-f1a66662c045,e716bf27-d04e-4f00-99ee-e939e85395e9,834e7f50-5758-4c8d-9ebc-5df0ce498562,acd9a1c0-ce31-4c6b-9e24-7936939ad3d6,a23e1019-0663-4758-a64b-66161e2edb52,20df1e8b-b3f5-4591-9881-4a5b8013c97b,d93aa7c3-a942-4250-b223-95bbb62ac54e,edd5fde6-fc29-4173-a974-242004003e93,8995f8af-278d-494f-b663-9230ff9c8b7a,4fbda894-6b96-42f5-98cf-504ead209e34,48a45ac2-698c-4b5d-a96a-e46aa3761078,a0b71fa6-a0fb-4264-b965-de5f85a7276b,21cd12b6-bd13-46a4-a89a-a05b6b990f58,27642012-4c43-4e31-b758-a718e365c560,85e69697-3829-4a7c-8d82-5e610ff789aa,1aeb057b-a986-4ad3-9a0a-e27accf79453,5d5e6aa8-c02c-455e-8c0b-460c97a5d506,f66e3371-1392-472e-8d27-c3468538fa02,19c7d012-8c87-4a0f-828a-61bb9381803f,83aaefc6-f8a5-4128-9dd1-2b94cbcfa9f1,1b0a5f60-7034-4682-a984-2347ef704636,594e3103-0151-4748-af39-19bcc844d4a0,0c0d1c0b-e621-4405-94ab-1a92e2a08dac,d1bc1043-dcc9-49ee-b499-f6d5342e7c21,c9cefb76-8907-46fa-9675-363a75efbcb0,17f70c67-3a7e-40f4-b32c-0f728a7896c7,49f6d28f-2352-4a77-90de-36afc8d7d919,326b858e-43dc-4234-a78a-2082ab89914e,7119d172-a1e8-41ba-9d7f-2309e00721ab,6fbd7019-1707-480e-a7b8-3b42c5c31ef6,a8afed99-df97-407f-91bb-644d8d55040b,fb85a186-3f02-41e8-a428-0c9733efa234,f675ec3c-7426-4e47-943e-fbca4b9e35cd,bd1ce0b4-aa75-407c-ab98-8bfe9cd11f3b,16822c65-85be-4ae2-8d3d-7771ba062933,bcfe06b2-30c9-4a09-8f80-15b77d716465,d2288aad-6048-4953-99df-287f3b2edf19,7dd8b0bf-f658-4b8c-a204-83781e0a92ff,255017d3-6e7c-48c2-91b7-c2e79f92a4db,3d3c7035-171c-469e-a244-32c9e2e34a0c,e663ff83-9468-4e4d-ac96-7ba5aeb91535,2afa0d7c-0798-4b9c-8fc6-c9ec1a460fc8,8a67905e-183f-4a6b-9a68-22a1b3397047,cf655940-8b56-4dd2-8ef3-dd07da3360f7,b8ba4cd1-b232-48e3-80a2-65e29d7ccdfc,eccde371-05a3-42ec-9166-67be6e2aaa50,5c048c56-8300-4cb7-bda4-8c9de13f2a79,5a085ac5-abe9-4790-a2d7-c9f2bf19d94b,9537e47c-fc90-4a59-b656-ba25dbedc7a0,848b644e-1931-4a82-9aab-9a175fdf16cd,18626f21-b649-41c1-a25f-6507afa4e4ff,80ab4823-ec9f-43eb-94c3-84a0f396377f,090d0705-782f-4596-a1d3-0793482cbe88,b7e94cdc-3336-43cf-8049-d40a62911ac7,cb36b4f1-1230-4bd8-aca2-5c58f7652a4a,11eb082e-6e60-4a91-860d-f692b262ba5f,f1e65048-197e-468c-97a1-f1802a493b36,a754a25e-8630-41a7-bd4f-765cf907097b,45cd20ac-b10b-457a-ad81-125d195b7869,1c9129e8-3dbf-4baa-a4bd-ba4178eb97da,9e99ef3f-a199-4e39-a3e9-565607fa5eec,a6131a7c-573b-4f56-b9eb-502551365552,f7dcb09f-fca0-4d07-aef5-2d889e5d70d1,444565a1-44e6-4a18-af0c-198d9d28fd82,79ed7ec5-9966-4eac-98ac-ec35ffc516e1,5d3ec163-a0d3-4c75-a8fb-44be30e716ad,930d7bda-55ff-4afe-bc37-96b76d88efbf,48b4a867-39ec-4af2-96f6-ed6f2677e13b,51b2ae03-d722-4f88-ae92-34f14da51541,5bb91c82-972f-4301-8597-fb81df56346e,6180eba8-28b1-4299-898e-180328a43bfc,8237e866-a923-4444-9cad-753db78dd0fb,60472f62-254e-47f3-940b-6b8c804eaafb,c0a5c2a8-e50c-4956-a019-b1b40fbd0f04,d408df6e-8487-4d59-a853-2148b6c856fa,92239f96-5b9a-4af2-9182-e5ef261bded1,1ece474f-9e17-4ec6-8082-a8e43205652a,912bb101-6850-42eb-8c58-61fe4048f6d2,76fce95c-63f5-4b64-bf3d-b86f6a0277f7,0acf89e5-f765-44c7-91c6-a8af9d987140,5ee50a79-e580-4712-b297-8349dc590d36,25dca652-07dd-4d9f-b094-f6242f0a3ed9,7350a0eb-caa2-4f74-853e-09851123e8c6,6cb04dfa-0e42-4164-9b54-c64b0e7f1100,ff07f5eb-f29b-48f4-92c3-d0bbf61c5a0d,e3c91e2d-5b5f-47a0-8235-68e19cdad107,50c8fc5c-b44b-4fdd-aa5f-58fb9f9c536e,528e9405-8ae2-48ea-9f08-cd67168a2f6c,efe28358-a07b-4ec5-a907-e1eee318e80f,0dc0ed15-6c13-46cc-a31a-9a807a7cfcd6,c19b5084-533e-4a4a-8730-7bc1851edf29,d5a1a240-b898-46c9-9575-7d6894237671,236ee9d0-e7d7-4b54-8573-6e799b0b1c1e,dec17415-a51b-41b5-a963-a75ab2535bf5,2978225c-b481-469d-9175-a10498c8ef42,65189759-b244-4622-a1af-e504be35f18d,60d37a6e-73ca-491f-8c63-95c9bb7d3ec2,65e6cfc6-881d-41d6-bc27-ebb5cf8767f0,082414b0-902b-4557-8725-4de4552c0ac3,7e89a86b-f855-4c3f-ad4a-589e863d0366,4eaa2436-e3a0-483d-90e5-54c145064729,edb7b7d1-ec22-463a-8cf3-792f02c430b2,2c079e64-eb19-429a-a4a0-ee733617353a,c535ec62-1b6d-4ca3-8e00-fd0ad9e0de12,f86517db-7f52-410c-acb1-8f4494a51cd7,3c3ad42a-43d3-4063-a33b-82cb92bc88cf,d845faa9-e9a0-4816-aa4c-0d8dc91acf2c,d675ecb6-96ae-4ee7-8f43-37bad5e65cbb,f830cebd-84fa-41f0-b8a1-b1ddf43c2b38,2d99c919-322a-42af-96b8-eeb4c72e35a4,67f79624-e983-452b-a4e2-e95155de5ff6,e5698256-c1eb-48e6-8d0c-ddbe00846819,7bb23180-aeb4-4d9c-aad5-7d04de8dc1b4,88ed886e-bfdd-433e-8194-b8cf0b90df4d,beae7a7d-7982-48f6-b91b-2dc4ad1394b2,662c1a39-0d6b-467d-be59-9499781c2585,c783fb36-b9c7-40a8-b0c9-9c985ac45cb5,5db3b7f3-98fa-44d2-a046-d9a42d7cae49,d363976c-d0a3-4495-b1cb-5aa1cc98ece2,052e84b5-96a3-4359-b798-72edc9ac5f2f,3058a69d-b6af-4c8d-b789-559b43a42446,a5eecec6-b66b-4b4f-bdbd-5eaa2ebf091e,e5d9fbb5-ea59-408b-8d08-eca5fa1db9c7,a51cc051-350e-45a9-b61f-e9e882e415b4,ad8c510e-9882-4fab-bec6-a76534a76929,bb943eaf-d1d4-4e87-afe5-2495cb37b7a5,90523e09-6f7a-4ad8-a741-be2f2be202dc,95c2cf5a-6c9e-4e3e-ae39-6a87463f9b3a,c5e86fa4-2fd0-4614-ac1b-ed72456eeeb2,936bb6dd-7cfa-4d6b-823e-ec814ef64da4,43662407-6aaa-4148-bcb3-934821b91d90,07b54970-432f-4676-95cc-0889fa19340a,0de29744-4bb1-48af-97ae-7b690f7ba437,55200b17-38a6-459d-94cb-82860e9ae9df,b65fa4f6-2ed7-4043-868d-bc42f29d349f,fe847d5d-e2ea-420f-95ce-c38851d55ed4,b7485c82-76e1-43b1-8a86-11b5a2b0f660,eb370a2d-c17d-4bc1-aa01-34c315ce8dc8,33e38db9-7c26-4ef8-a249-5929c4bbeb95,75000684-3276-43db-b580-438b1ad98215,935fc767-efe8-4762-8a4f-0c4d1e5b9299,545be97c-8837-4c13-80c1-9957a7298dbd,479aedb7-d930-4c31-88ba-1f9d327c83de,360f0753-c316-4433-a6e2-df9cff2ecff3,9daa4b33-6940-4685-8112-67d51f35a45e,73ceb8ca-bd97-4dde-bd01-f67c799108d2,4d5a4813-225e-4ad2-beb0-a85c71c3e20a,b6a24cf2-8eaa-4ddc-aad2-ba3657c03d4d,60415835-10d2-413f-8fe9-75422b31d9df,95e86d5b-654b-494a-8fc7-300b35bf3909,d5c16ba6-afee-40be-ad74-264c3a196b8a,b9d29c73-4397-440e-a08f-4f8a8e06a405,77fa527a-d24a-4fe9-97d0-f32dd5ce954b,156cdb63-a31b-4664-a9b7-8e99a86d1340,aadaabdf-e3b1-4718-94b4-02e35bf902ff,c44406a1-2012-40de-985b-3e1f4e15a938,20b121a2-a255-4337-afb0-26827b9912bc,103fed46-a0a2-4b5d-93b0-dd77884c06f9,7d77a3c6-ac69-457f-b823-32413f471cd3,1ae85c48-624b-44ec-8e90-757184b848a6,054c8e84-1d24-4116-8823-11f2d9a94bcf,e9c39953-444f-445b-92e1-a811c4a10779,14f07270-a150-4f94-9a16-989ceded223f,fbfb91a7-293b-45aa-9f26-e90f2b86bf20,b656c9b5-9a1e-4bf1-b714-5df9c6ab36cf,89dabe0a-96c0-4034-baa8-f315e17b613c,88ef7e29-3e50-473e-adce-ef7fe36c322a,10329341-8bbc-4108-a31f-ea09d81d7a8f,af25df18-a202-4d52-b5af-f421bb1b83b4,bbaa4a00-9e10-44a9-a67c-dc7c0e96d327,a0f6bb03-d6e3-4c4d-b500-75ba9b71275b,1551276d-0823-4a03-a692-27ccdde44800,2c8ccbcb-56a6-46e8-9c79-64ad14558a8d,f8689ece-213e-4ea1-a756-06cf43b9a978,567b843c-a238-468b-bd01-2563041a6d15,8e82e658-16eb-407e-b806-a24094941f84,f93dc8f2-b85d-46d7-a77f-192f1d3b480e,9c60df69-9acd-4778-93ab-679f438a7815,e34aece6-2949-4f2b-a9d0-7b753891059d,1c625a74-22ad-464e-8316-6debbcba8c0c,6ca11a35-f91e-4989-885b-888335778891,7905cfdf-00df-42fe-a7d4-8983512bba08,9c9e46bd-3183-404a-8cd8-2e20ac2408c3,3a1be9fc-0b78-458b-bf25-88c97881c728,827383f2-426f-42ed-a1c2-7176eab75c97,dc36adf1-2840-493a-9a67-77ccc287ddb6,afc05f76-407d-43c5-a9f6-5fc561c09d5f,aae262b5-c261-43a3-b8c1-e62f1aff5656,eec65fcb-2932-47be-9f1e-766d232a37d9,ee25a0ea-ec9b-4abc-93a0-f845043531b8,7df5f6e3-0df9-4dea-8c45-3bdaa70e4b7e,17751e44-69d9-4db0-8085-bc1bf5062f35,1a803969-4097-475a-99a6-2ea31573e5da,c9728192-d2b5-423c-a9b2-b9f77e47ffc3,7ef730e6-9ee8-4e43-ab1e-f77fc02170fd,5d1442fc-744f-41e7-9849-23bd2d1793d8,ec726560-0ead-4cd5-a489-f6f5c8583cb3,b298d87d-6940-45d4-948b-5dcf897e5c48,4df48c4d-37a5-41f7-95d7-daee29deddc0,19ddd4da-1752-4ff6-8f10-2fee222c4a50,959f26c3-85ee-42ec-afd0-ca2e723334fc,ef234e26-8738-42f9-8529-ad93eaf1e870,28244cce-2ef7-4da9-89fe-fbf62695aced,ba5a5f7e-d7a9-4eb6-81b8-20637e23cc87,872aa056-4589-4192-8bf2-e35bcf6e27b8,464e2317-b2b3-44b4-a05d-ab9c847f7203,919e7600-16d5-47ee-aee6-2196b15c0418,8d842b66-b458-4b37-a68b-00fca3c9d160,6e45df03-1bff-4811-b0c6-e7b31e662759,fb2ab1c8-451b-441f-b03b-6e2d9e66fffc,bb088f22-22bb-432a-b1e4-b7e92152fc21,64f1411a-c93e-43ea-9851-9c21dc635636,c615a38d-d9ff-4313-b57b-79eca1bc8bea,5dea5ed5-ce2b-476b-9da3-9063c614dff7,6a8c6599-e7b6-4f40-b97a-2ba700918c69,73c2d31b-b536-4d88-8a04-a04f53e32adf,f630fbbe-c8ef-460f-b5f0-bdb49e61ba21,d2c83fbd-b1ca-4f12-9caa-893bf7d8b7db,e7b58f04-870d-4ddd-bd94-4d829ef563b5,a54549ff-aec0-4f6c-9ae7-6a8ee7bd6962,cd129370-c59a-42cb-b8ce-447b8297e635,3be1d81b-570a-44aa-9e73-3be7479a6583,67104a49-9b70-4f5e-ba8c-1d7b25e999a1,3d2b1e3d-d717-4e14-9781-feff5db75e28,72905afb-2e68-4661-93c3-e9c966d7e02b,75e490df-4c97-43a0-b3a0-76f6001b9d70,d744cf00-2df9-4516-84d2-f2bbb3d8cc9e,ef4cc79d-e55a-407d-898d-262d61f872b4,960bcb9a-b1c6-45c3-8f36-3f7a297327c5,01470dcb-0a29-4e6f-819d-87f34a26d5e6,3fc64e1e-3ed2-40f7-bab8-7dd93b4412be,c29af2b2-8f22-4666-be5f-4318d6b4007d,fe68e1f1-768e-4dcd-bfa2-1c8208235553,802b2d8e-2869-4496-bdea-a66034826cab,4361337f-c552-456b-a7d0-c192dfc580e9,05e60501-37db-49ff-a5a8-f54f9fe58e62,2dcab892-2657-4288-ae31-bb4dae25e148,ab17c52c-6bc2-4a37-882e-b0f294e08e4d,63bb911f-7104-42c0-aa05-fa25cffb0b9b,c0f136c8-f1cf-4293-99d2-ae723b46fa21,0c79c0fb-bd6f-449f-82f3-42de08529ac2,456c770e-804f-4ed5-94f4-434fd4d8d6b2,7891bc94-35c5-472b-86de-bd6b539d9452,27d88132-a86a-4e5b-af7d-23c1462586df,34a95eaa-bf26-49b4-bac9-885a06c06212,68fa29f0-fcae-4b33-bca3-846529b62f0a,d68a2591-c299-4db6-9398-be414f002f96,f450b72f-e1f8-4939-80a5-1a086a74e3e5,4bfd7fc0-8784-4d37-992f-8370d15486fd,5881b128-e026-41af-903d-8cdbc7ce45d8,72d8f708-4a9f-448c-926a-b2e40162f9c6,8532887f-2ba0-4dfe-a54e-ab789a3f4bcb,0b1176f1-a77c-46e3-ba63-e39e860682da,1840f0c3-2fbd-4469-a9be-c86e26304e62,d06b1d21-7816-440c-990b-cc2847bff7f6,c18223a1-7309-49bf-8814-11a442fe36b8,4e681f92-fc13-4bbe-abe6-9e235ed788af,f3422367-81d7-4fdc-aec4-0cd9ce0fd92a,97a4d744-436b-47a0-8e09-8793cff4677d,10bbc113-7d47-42b5-ba78-e502afe86c60,844010ea-5ddb-4a69-8dd1-151318a03303,6158cdf8-4088-4662-b5f4-840415362a84,6279e191-b994-4467-a367-c3c880add59d,e0865bf4-6390-4311-a74e-9a576112e70a,b90272f0-0d9a-4738-9872-dc34f66acfa6,98e0b73d-83ac-4f86-b5e8-dbbd82f04dce,bc129bb7-c0d3-4bbe-bb61-196f5051d433,64e8cfb4-0099-41bd-b1a0-d8ee0966d0b1,852f3816-df15-48b9-84a8-31802e252d96,80c5cad8-af90-4920-9bd2-f21dea40059a,24d633ef-4150-44c0-b0a3-b9a5bb40c414,97bc66e9-f8ee-42fd-a125-85fc149008f6,3f8b0669-fde0-4048-aa08-2231c6a35966,6d95626b-bb17-497c-b8cd-c0cd9eb07cb3,212a5ad3-41ef-4435-8c63-e6a5d5b4ec20,eb7cdb98-eace-4e9b-8902-91fdb0b183ae,c6983497-a6fb-4988-be30-39386463f729,334590c7-dc81-48ee-b44d-45fd51b55e23,cd51a6fc-cb66-40c7-aec4-35d80585616b,842b4b82-de9a-4fb9-9976-f98314b3ca74,36b8208d-efee-4483-8f92-0ae60fe5de1e,2a9b673b-780f-4c30-9651-36d7d72c75d9,865a8893-9997-41ec-be01-bbebb664687e,201167ac-8e46-463d-b2e7-f0f458c14f61,00c988c2-ade9-4bf2-9980-9c797de42715,6c0e2951-7dce-46a8-96bf-73e798685e31,8af2a226-7677-4b41-9306-5e44267ca56a,8923777e-8b1f-49e5-b57f-7ca10e47c3e8,9b97abab-9d1b-45d3-ae3a-8d425ad8dfc1,91a1c796-582a-44d5-b0d9-4c42dcf8feb6,a1d2d910-33cb-41fa-98c9-75172fed3729,62061c75-5f89-49bb-b760-c19d29089634,c2e08f8c-8c69-4ed9-9342-6cf835aabe94,ffab41ac-b83f-4c0a-90ac-25c223413925,c879d0e8-0d19-41e2-8fe9-3781d9702585,0c9f10da-3b12-4d32-971f-dc0778f6aa5b,2467555b-d807-403d-9486-8c6f51e70fc1,879880a1-f06a-4921-acc8-1e50332c1a4a,c23dafa5-4a30-45b6-89e0-d27e593c4694,15782472-205b-4105-ae8b-081d3efd5a6a,9700b07c-65dc-4bba-a49c-abc836c000e1,101796f2-eddf-4e37-87db-77e98533d122,763cc33c-4e25-495f-936c-5399b96166a1,900eb63b-4f1b-4164-af29-c1b17f99da04,d7f1be0d-286b-46d9-a586-84432f94fe39,88ed4861-947c-478c-b031-3dcc64081707,e3c3425a-c3a8-4a20-b107-b2e4fff5cd4a,a7444a16-7e0e-4aa3-b8c6-b904ae31343f,0c9982f9-5f61-455c-9c81-6954bcd6ae67,c03c2c23-2764-4b5d-82f7-931c84aac3a0,95ddb51b-6a8d-4e4d-8cae-48a83c5a00e6,53744a1c-aa42-4073-bfeb-88ece973a446,aca86b98-4eee-4869-8586-fb2ef905e393,477ad22e-6122-4f8a-a4e9-195b86f661bd,2cecb515-7c70-449e-8e9e-cd7e8f4e9464,d9197118-caec-4f8c-975c-1d96dff5f9e4,f3d68f19-83b7-4218-9c80-d5fe0d6445fe,d829b930-9adc-4ac0-b4e9-31956ec6ab9c,d2e8f70c-630d-46a3-b06a-9fec6605d7b4,346815e7-ce44-46ca-9044-6912a7a65103,1b14d8f0-e69e-4b63-ab0f-de7f9e9ec3f1,472285ea-fab4-41be-87f4-c0ab5fb46280,421a651b-fef8-4dac-b279-8d287730492e,abb3c49a-2045-43fc-a2e9-8cdc5be00fa9,58bef864-41c1-45b6-b674-404c8caf1fed,9f21d477-623c-4dfc-ba0c-f83368436dac,3a83da89-9753-413e-b70c-32f5b68f8189,86424ea2-5d29-4c8e-831c-c63d7132d1f9,204b2f52-52fc-498a-824b-a88178154212,217ff673-541c-4e06-baa1-fa1d4fd927ac,4bb69d44-a961-42d7-bf87-0b15bf27968b,e7143f21-25c5-4a85-bb37-8c45ecb0479c,316143bb-039e-4acc-9a1d-a6b66e920aa5,954f3c09-d829-4670-8793-985e1f31b7fc,f6fbc7a2-a663-45fa-9174-613d13c9680c,869c292e-31f2-4268-93b2-34566367baf4,9db46dd5-8ce5-4240-9b43-aa302ed1e0d4,58dff3ca-d950-4672-aab0-267824f0bdce,9f3cca3d-9c5b-4a5c-9c1e-018a719de489,f25364d1-7fcb-4cb1-a97a-cc2bd42fe7bb,2d994c78-91ee-48be-9546-9125fa3bdbbe,51ea1af5-8b28-436e-af39-b013909d099e,8aef6ec4-5154-4675-832b-ea0b37362cd4,48470478-4268-499a-98b9-d515c98aab4e,b21a0c48-b4be-499d-8d85-402cf30fbf5e,1a68ebcb-a239-4858-8799-994f92f85527,470ee2e4-f667-47d4-aee2-37f747cd8e44,f5c1c1da-ec59-4daf-8249-4b53259d2f5d,7cf4366e-7b1c-4bfc-828d-c9fb443b0e87,3fb65fbd-fdd0-4f00-9af0-5f7cd6f936c7,35545dcb-54dd-4fd9-9cb2-c187d7b32dfa,b4664d0d-bc80-4774-859e-31842dc658f3,6446a229-a2b6-423c-b431-6469208742da,e8133993-4748-4ce5-ae2a-ad168d3670d5,90671c4a-a769-4f1f-a22b-25db9043dff0,91a2e910-fd16-4ef2-8781-e9a5ccc2e4fa,84e8d2ad-7a20-4098-a89d-836c3bfe3f8f,0548a2dd-9ad8-4f7a-8566-77a10dcd6c54,ae6db626-f948-440b-b7d1-2e2c74e2909b,421841a5-2554-4fc4-ba31-4d6fd2d33971,8473925b-9150-45e7-b6e1-70c86077d5b3,3e3f817d-bb7a-4ce3-b39d-d603c564c795,0f8a7f3f-e21e-4ca8-b6e8-65822aba790f,4739cffb-8322-457a-be27-4192bbd945fc,6ddd67ee-846b-43fb-85ee-9bb9619954ab,6c031115-1a32-4ecd-b79a-baac3eacfd73,b41c1c13-d394-4ca9-95ac-da44a1df912b,b09057b1-b1c0-435b-a2bd-f6be0fefce58,5cdd8588-4b3f-496e-aeb4-7bfcd590c95e,fe42619e-f1d3-437e-aec8-159bbf93549a,550052f8-287f-4942-8c0f-8022e134ab78,a7a5d951-81ae-4487-8f6f-f07040d68474,a8c1f81f-91ae-401c-975e-233a89113bec,edeb57a9-b95d-436e-8543-2065f738f372,bce5c6b3-a62d-430d-aa1e-7e0f8901ae5d,0378fcfd-b677-4bb9-9837-b16c15d060da,81ddec65-39e3-488a-a569-1fa9a066ccc6,d2936ecd-f172-47f2-82dc-c9c528ac3db5,44a74fb8-4374-42d4-8905-7dad50e6e9a2,0caae38c-5bac-488e-bbf9-1de836cd2c29,3753245a-1871-4c30-ab36-ada886733ee3,2690c3f1-42b5-442e-a982-8ce4b5018ff2,2b490fe4-577b-4289-8454-e807fbdc3132,dd080eb8-21bb-4213-a933-18d3064af4e5,5a5d6b5a-6be9-4985-bb0c-c597999008dd,763276d0-7495-4757-b438-12ded703ca5d,5ebba9f6-4b1c-453a-aa47-641f6be6b6ab,6a53dc0e-b01d-4aec-81bf-ef056ca68621,eeab4b0b-401a-480a-887c-a905caa8e62d,a52c344f-dd55-4fdb-8d80-c6a33a10ae4a,e1d6b7be-4ce1-48fb-af8e-96d8d01fd07a,91b2bed9-473b-4048-8347-f91b4214c156,90736484-c29a-4a78-ba23-0015194eb23e,5fd58fe7-dd24-4fe0-89dc-771f84b993e0,c509b38b-5219-4ae1-bf5c-6e4aaffce542,d682ce1f-ba1e-483a-8540-e6fbbd083d93,2835104a-37e8-4d24-97d5-5a31c2f71607,b4a8ff2f-659e-4541-b443-2b152dba3f61,74e042a6-c84c-4d2e-9579-93747e1e636a,bcc1dd58-e10c-48a9-80ef-a2351781a37d,65772a75-b1f2-4b02-ae65-515a24691c11,b1fa74ca-7b0a-43c8-9016-c9d1ca017490,41c37374-83a6-47f0-b97f-87a4a1988a3d,dad3629f-b360-4571-b4a8-5f3ce2913fed,0f45c551-5b6e-4bf8-b8f0-cb2721b2fbff,834564db-53dd-4589-bc6f-284af1700d07,3ee62d28-e4aa-4632-9623-175403a808f8,a41aa08a-bdc1-42d3-b121-bef67c5d4962,72b4413c-080c-4832-8851-e668384c17a8,ed376cc8-5054-4877-8355-4534039e6838,54e613f0-76f2-4051-ac72-5eb346124851,965fb24c-cead-465e-9bbc-0b4a3377e485,e3c732bc-a656-48da-ab88-b5832804eba8,b04e0d33-e53d-4531-957a-a06d043fccc9,feb205c7-de2f-4fa1-ace7-e5815d8ca830,caee6f48-e661-4607-92f9-2accda56656b,c44b8df6-12b9-4650-8083-03aef45240f2,a24a4f8c-3868-4948-a307-a4ffdc74179f,15fc0356-c46c-44e5-8fca-afad6877826d,e9cb1142-fc8a-47df-8fe2-1a98e6e703c2,03d22a93-7035-493d-bb23-54edf3941f16,5dd52f97-194c-4fb9-81dd-eb1d79199286,86ba25d4-b35d-4cb4-89e6-c0ec58b4ea41,c399c75d-81f7-466f-a247-92928559d4c7,9d39b02f-91a5-4a76-8a02-d4ae1192f31f,2e6f84f4-3662-442f-a35d-794c5526db1e,cd5e79ae-9244-451a-84e5-e98632837eb4,c69d25e9-4f15-4924-bcd0-ee3749e76e40,ba8aef23-e5fa-475e-a9ea-f021c6b3553d,4bbe7bb7-ebfc-4c64-8158-c912ff4f2f65,d204d5f7-b269-4814-b215-d286df75a10b,9cf4d171-34f3-4923-9f20-9e2575efc553,5c9abb66-f74f-4b98-930f-07d486b3abd6,0306ef20-15ec-4457-a63f-11d7a0ab3a6b,47ae4171-7d6a-49e1-9684-a705173c1ec0,3e137eed-e85a-44c4-968f-7c0c8bb66514,9ed1b665-0448-452e-8f17-c4889e52ccc8,045f583a-72c7-4986-99c2-0eb16a7fa0d3,c3f02455-0fe9-4633-b0ef-72a05d7a61e6,ccc2398c-39f6-41fa-80ca-b28d9d1a3b33,f6c2ca49-006e-446c-93f8-86d92978b6ba,f0e78fa9-c9bf-40ea-b9aa-a6eaba049fb7,95634484-2ab9-4918-ad6b-d163ba817f82,9da52e08-5293-4360-9db8-bb013909f656,c7e27c1b-07a8-4b28-8283-13e26ee5f663,0ce4c407-20db-4c03-a581-87a4c89fe5a2,c4f0b6aa-8e74-488f-9e22-894d518310dc,3c27341d-76f8-4fb5-b5d9-60e8eb5dcb53,9ce135ad-e91a-4477-80cc-3cf47d8f8f23,184dc860-312e-425c-b657-ee9ddb4934fd,d83d9f67-8809-4597-b5e9-7f6e17a81e2c,9fc80491-d425-4ddc-91a6-db88856c7fe8,2eac7afc-1246-444b-b155-42baf2cd0b1b,75e0af05-b121-4791-a00a-8440599e8110,2b2f4393-91d0-4313-af57-7f247acb79e2,9e1d9fd6-d0cc-4d54-9b66-ba043ce2e489,a2bbaed6-cad1-404d-bfd3-5e52ca3f2d71,f2a50af8-afc7-4bf5-9ed9-3d8e1b263c10,bb0bf435-274e-4e57-9e71-6569aab853e5,5b51e165-2d75-417a-bc05-4fd593d85fa1,c3856e5a-c246-4cbe-a2e0-b96659d1c2c5,2b1ea46b-ab35-404c-9944-84d1a5e70426,c72b1666-9caf-488e-bf69-9714c44c0d82,b30bb48d-d386-4fa3-a25f-1d40e09cbae4,78ff982a-6239-4f65-a0e4-2080b55886dd,b203e394-592b-4363-be7f-95d3090f0c87,2dd5691f-e8d0-4087-89e8-e3e21ead8bdd,8dc86132-b253-4a9e-ba35-4e9c72b8732b,d505f6ae-22b7-404a-aa71-635ed872c296,75355391-2dda-43ff-ada1-b88e0893d88a,9ef1a84b-3387-4baf-89ee-4ff1bae35e87,855f8047-80ed-4e3c-af5f-a561e517a4af,d5de06b1-4b01-48bd-aa2d-46e2a08b012f,e626e56f-74c7-4451-8653-359208667200,d7f27be9-00c3-4fde-ad51-09c389784445,d1b99558-e759-4276-8bba-a98b7127b191,ab1b8b47-e933-460b-a672-c5044c534200,c626867a-4663-464f-b598-0cdfe56e0b5f,a4e1060b-17fb-412f-995b-9ed20336b7cc,80f8bf9e-04d5-4b92-974f-b1626629672a,18b16c4f-950f-42ae-a29d-a546610a1430,2ecf00af-36b7-4027-bb5a-b1d0e91524d0,1a967172-16e2-4bd5-bb03-8bed9488f6c1,c54250cb-bb89-4c80-b52e-85009e708c28,3f657d86-740a-41b3-b9e9-3c5f02e47ffd,30f40ffe-0e22-4b07-99bc-b6c209e27317,9499fad7-ef9d-4abc-8d79-12a35c11d913,ef54bd21-abdd-4e86-b41a-e3513e753e48,35d39222-5ef8-43e7-bc9d-e02706d8b047,b135512f-1e7c-4693-9531-66482fa29ed2,5fb9481a-4862-4b08-a424-af1a88a89179,20a8056b-3afb-475e-9148-cbfa1b30fc91,031a5030-38f6-448d-805e-eac429cdcdd6,05908f34-736b-45c6-9620-ead9ceb3a5c2,0ffb2184-5810-4bee-ae56-8dffa92ebedc,af3aee28-777b-437a-a7b1-67a9dd5bcaf6,21fecb63-c60d-4b7b-826a-ee695571d69f,0285b9d3-a6f0-4ee7-b67d-376f1859b512,9ed30c73-2278-4f66-89c9-5063d08e0008,4db3a20a-7024-457f-bb53-cdf382e773f7,345c500b-9b7d-4826-bd8a-d8733ca75314,986f98b7-a589-4df1-96d9-9e3460a77a74,db2b9f87-03f0-41d2-ae91-1b811aeeaa7f,78316274-e850-480d-886b-dbc4ac1a7dc0,b9a0e740-20f2-4935-a86e-d0d659fc1773,ce3124c2-3463-4053-a796-3621708db002,a84cfc95-6cde-46fd-a55c-464845d6c12f,7412f1dd-e3ac-4c7b-9906-942faa7774de,42024bdd-383e-4062-8184-d9593ffebf49,5c0e806f-f74d-49ae-9206-cde0227f03f1,24094708-a0b3-4255-b40e-6302a2b25330,0d76e21a-17b3-4bd5-aca5-5b3e51addbb4,5aefa554-83a3-4115-832d-373b3a373a3a,c2f7d82d-4665-4fcc-8759-6b6ccaa5c093,5f26cbda-cb56-4355-a032-3f03d36ee509,6742a8b4-44d0-47e1-8a66-079d87497cf9,c825fb4f-5798-4873-92ad-46ba08776194,a48b9559-d2dc-4a88-9558-7c1e08ca5e23,b2006606-8e57-4338-9b7b-937036787c4c,f295b9eb-3a05-4ec5-842b-f9bfd2bd2182,e971c945-e1a0-4337-a907-4bb6a20a3801,e2390750-38fb-4d45-8fda-5b745d09c93e,5caaf7a1-6306-4b9c-8a13-3926a1ce3f62,1dc53f1b-5f9d-437b-9dde-7ff69b3ef183,bc9c1e4d-ee99-4a3a-b455-a901b99e541b,59848ca0-2f73-4b55-b088-aad84f7937b0,b5275bd7-0efa-4cec-a69c-9986563662cd,971b0863-d4d7-4101-aa95-f6f502b50c3f,6b81ab1b-e83f-43f1-876b-f548fec72331,361b3212-8ed6-450d-9a21-b8d61802b563,1447acb7-8c7e-403c-81a1-734e2f2f23cd,521f1a0e-318d-48fe-98d3-4a7d5060c973,16f0fb0c-4b13-46ae-8107-5df3a05a19cb,16ce6c50-d80f-49a5-920f-b9d9052bf559,3f7bc75d-00b5-4619-b1f8-dade81f5d2cf,450dd66b-77fe-4fa4-a4a3-f30b2bab7731,3c852617-9e35-4ea5-9286-60f267e45fde,0d0f7cbf-3315-474a-bec6-e8afcd75a483,c099a66e-1b9a-40ff-8f16-5a46b8fff110,75a112c0-1c58-4e4d-b3a6-5b61b349850a,8716e372-5139-4f7a-9e5f-d98b8c0f3d78,97909d51-b4ae-43f0-8af0-ab5cbb72b261,a991e06e-6863-4c48-8a90-f8f8979d21a0,ae3ee000-5b66-46f1-8ead-abdf559ddb60,399d3b4e-4db9-4ee2-9ee1-513428125be3,2515e747-58a3-4db7-a132-d6825315edad,9d076e05-b5e9-410c-9b3a-21fbe1c5c3a4,94bb4656-8b3d-45f7-b5ce-518b39b45bd2,810235c0-8901-439b-b6e4-e4a9622edb33,50ba8ecf-30d6-4640-a944-65e69f03d1e7,50cbac31-7e1c-45a8-8b1b-4815d748be03,bf886e99-f47b-43a0-be63-0edf629890a0,9fa5d941-4c13-4191-a9a1-9967e7283ef1,50b85288-d506-4a98-872f-b510e22152b0,de878fee-1b46-4f16-b133-517634fd9f68,d6653876-12a4-498f-af9d-ae950efa0cb3,6d6348bb-ff15-48bb-9acd-1f39aff08a77,5fd12252-830e-4968-b4f2-2f43fd26a6f4,3ea29c4e-7765-4955-9932-c6451cddfae2,d416d69e-bd4b-47b5-8a1d-c2cc4e89e137,27c9e311-9ee6-4c73-8ebb-b408983796e5,70e465ce-4b5f-4a82-b83b-4a65c6d90949,c2319e50-41e4-44ed-be4b-51a8286c7a4d,0388fbed-3c48-4a07-94e6-dba1bfa4de38,b15ba8a9-fabe-4a55-ab1b-8c9d27683b73,2f08eb59-3011-4e45-8813-00cd7c867779,450797e4-9384-42b5-90b1-51c4c47e14ec,04950ba3-394b-41fd-b078-64f3a6f4acfd,2323d4ee-eb29-4b26-83c5-f2ece3ed0285,ca82265e-4a25-4dc0-88a8-8af4c2f25508,3a545124-91ed-41c2-b89a-b8cd3c4e37d5,3560dd05-391f-48fc-9242-267c130bd471,15be5b8c-ce27-47af-bcd7-5871dee4aa0f,02a004c3-ffe9-4044-96b3-8617e9e2388b,0cfb7afe-1b1a-4670-bd3e-04ebe9d1534e,f951fc8f-c49e-4238-a305-aac66c995438,80266845-4c84-459d-81ec-140e96db8f72,e10fed18-4093-492c-90a3-633ea93ea2ff,d3e02a5a-5162-47e7-a557-b657e7bf2b47,345daf28-f4dc-48a3-95a7-1504f5732d4b,9ecda89f-18b5-43e7-a2d8-e6df2f5cc0f1,6e0bc657-f615-4a9c-b3a7-69c3339a26c1,c5e3e26c-0812-40e9-89e8-05e4ca96c039,3f71a6a6-7504-4e5c-af1e-8dde6ad7a928,b625f0fd-e7c9-4469-afdc-ff786ed77209,226e4fdf-5a3c-42dd-8018-42055736314c,16875233-07d7-4584-a129-590c16986d40,41f03895-035d-4b33-8181-49e7e8a2f01e,913998a5-f993-446f-95da-e91f1c5ba4a9,0872c219-69e5-49fe-ad28-09e946b7e31c,35964358-ff5a-425f-b18e-8ed724c2b603,e71cb5bf-2afb-4738-8554-bf73c52868ba,bf4a4791-3f34-40b7-82a8-27e043ef7c0d,bd68cbc7-5441-4c39-abab-0b7977de30a5,1a470324-a316-4446-8c6f-57e394e3220b,28fe8ad1-4477-4e0e-9e90-6c3d06a0c8cc,b5f43d47-61de-491e-add3-4358e880c140,02bab5c2-5e74-4ca8-aab9-cef16c8a1d15,80b17eb4-91fa-404c-b1c9-3d3d6cbc4318,e838fd91-e97d-4fbf-b717-796183725536,94e079c6-6372-4d1d-9d6c-33e25d12b823,1b67b9be-10aa-4ab9-b080-24194d388732,707ad48d-9125-457d-817c-6c7b257e1205,84332641-e67b-4fe6-ae0e-f9cfd0ea454f,e0196613-4845-40c7-9292-4392e9126a96,1a91b00b-5bbe-418b-9577-d3fc1b6e0dd9,704a3704-15ac-45d9-87ec-7c860ffaf5be,b727f8c8-991d-45b1-b18e-d5c1ebb3dc20,fec79bff-8f0b-4ec5-ae2e-a634c3268e68,b1068049-f040-4907-8496-49df3409fd3d,60d95c22-6d41-404c-ab25-b84daa2c9fce,2757d003-810c-4b95-b3a4-7cff27e4a6ae,3ecb9771-80bd-4b09-954c-b7a1bc49bfaf,e072d9fc-69b5-432c-b2e2-5fea44d22811,fe95d5f7-94a8-4070-ae6b-f31aa8d403bf,a128e9cf-ae6e-47e8-8648-40c5490bbdec,118f7a3e-fdf6-4630-b158-eb552ffa6c3f,eedd1bf5-cef4-43e5-8659-9eb2abe8bf44,790e8c08-67f5-45c2-9e85-6d3fc1c06f2a,b58616d1-0b02-4738-96ab-08d37ba6328a,b3cc6efb-7c9d-45d5-a4c9-062f8d97bd58,da0e0ce6-0273-4e1a-8b34-3a8a4fbd2ebb,16ba1619-f81b-4770-96dd-f3ce04e20aa3,d9d20cde-4aed-4a92-bb51-cca878ca580c,fb21e19d-fa71-4c96-b023-a27b92360292,f5b3de7d-28e9-4c75-9b02-2fed3cdac4cf,cc06e31f-c571-482a-ad48-81be7d47804e,bab41d26-79a6-4cbc-8199-b22faac6f0e9,a1ecd05c-1bdf-46d3-8583-5213320f1043,af938bd2-e482-433e-89ae-cbf42e443b16,eddaebae-bf71-4270-b739-47fd4e286b6c,99c88cf7-315a-4cba-a5b3-3a84a8c5c9c0,ea89b12d-bc3c-48dc-8a8a-ae411b97fc17,c7517265-70a9-4ae2-8c1b-702563bf99fa,594588ed-c60f-4dae-8028-53b1db3c5a9b,75ee075a-7596-45af-85f2-bdf6d2ddb1f8,c81ed369-78f2-4a4d-a0fa-8aefa5b50426,eb518f30-1d14-4e5e-9c1c-bd466bade792,fd9ffecc-73a8-4753-b99b-6e1df6d176f8,3a29e4ed-4008-4499-9447-327176c577e2,b784ba92-27d8-41b2-804b-d19ca7850af7,b383a67e-c0b5-4674-9a23-8a2c0859d360,21f505da-1c44-4ce9-b395-89cab6bf2981,f7105481-cd5a-4bb7-aa30-afa19d70406c,018ec1e6-4be5-4b01-982f-02079a346bb3,417a5aa2-bd6e-4cbf-bc02-1b6dc4b2112a,009acd62-5ea3-4269-9589-ac3cd24f6707,9b215bac-405a-4742-aa7a-c5716739c666,7f654fda-9c20-450a-9ee1-d0164de9fb48,33350a23-c4be-41ee-8037-f7cee6ef8776,14972f8f-53e3-4fbd-95bd-761cf57e3872,afea9dfc-8b80-4da7-b316-06781c0c6db8,1c6577e7-c27a-4de6-a921-9f3c0710b8fa,d8b36842-a1b0-4335-951a-91e2eefadc86,ffd5535f-71b9-488c-8780-775d6dda6948,bfd32b59-26eb-477a-bf5b-9e7a0795de52,f9c4d959-ba21-4263-b5e5-07c970c968c2,00faaffc-4f7f-4efa-99a4-b85b92525f87,68b4c8d8-8ff0-48f1-b19e-e2f61c9cc635,57d7dbe7-d0cc-42ad-a7e5-5e6f3c8cdd2f,56b9bc11-a4bf-492c-b31e-daa7e8b109be,e25e1c05-613b-4d62-99d1-2b0e8e1de516,29c0b3b5-fe65-4285-ae03-3e6bc4f53a2c,c41bd776-2c65-48ad-ac78-5772c006284f,4d6033e4-1bc7-4cc4-8f1e-c288f8463c49,35d15fa9-f41a-4ec7-b5f6-91b0cb745fec,5a2cb846-746e-4bfb-b8dc-8bf118774b3e,c2a95a83-e868-4a4c-9846-1eddd5c78245,a3b8d7e2-2dff-4183-ae28-29ba313f796f,3d85583a-6e43-4514-8b51-0c7e719e5b33,020cf96f-d232-4023-a2d8-b57e813227ef,31afd725-ecb5-4fe4-a686-bc99415f3996,23cbcdca-755d-4633-a908-6859cbac28a1,690bf243-ebe3-4ae4-b04e-c8a9855bbd86,630d2019-d966-4e6c-8d93-ae7a254d305f,526a5979-8623-403b-a713-c8c142cc8477,8ff9ef7f-148d-4221-a8a3-c12adb866e46,11b51ba2-dfdc-4f65-82a7-092c84ac970d,dd910e8f-8782-45ba-8506-c6cc7707cf4c,10950c30-85ac-46a8-8a20-8d8f9db91f18,2ab2e9e8-b50c-43da-9c08-2bbb73300c5d,3a9b2b42-389c-4761-ae93-b2e5a7cf618b,cc3469a1-f87f-4c65-95dd-39a4f9e6f333,f828bafa-b5a4-4fbb-b99e-36ef03b93297,e0b969f5-3e04-4fb3-9933-084118d4e9b7,318dfe79-85cc-4d84-811f-6d20a7f670b3,81a9dff6-17c3-459f-be57-3655c180d486,e448df03-199a-43f0-ab08-a757b0ff5b52,48c69d3a-e708-4324-8760-a21c250e5095,b7ffc36d-2030-41b1-be54-72b21fe0007f,b5f72ce5-274b-49e9-9a5b-08936ad1bb79,902f9dfa-6779-47f4-8a07-73ac5ce01ed2,40b38236-8638-4d57-b2ed-62393368c81c,996fc15a-8434-48e6-922a-6841926dd793,1afe47f7-d64e-4a3c-88a7-fe3a251cad4f,cf78f9aa-4fee-4b65-b1ed-7e169c3831cb,c1598b77-6dc0-4f05-8dd6-eb1bc63371c0,c24dd0c7-7933-4249-b28f-c8726a651383,4a8d9424-f5f6-4a6d-afd0-ecb50d46b5b1,e4c360cd-132d-4c96-91f8-5f81ba446318,132f84f4-ddf4-442e-acd0-6db7c498d419,522e0f9b-0017-4df4-8888-bb190a649ed3,00260bab-f1fc-49bc-86af-abb856095551,e330e882-328d-4075-ba22-6a2fa2f21045,9280f5a6-2ddd-4d01-808b-b55f7149a796,27c6d706-88f7-45d0-a5a0-f72b3894995b,f1150e34-8e01-49e1-bcdd-cdead0c4cea9,415c1d06-4663-4c2c-9c82-5656846b78e7,b10e2096-adfc-4a48-8b35-8800f35456d2,a041a324-5143-4bfd-bea5-f2b6478fcc58,38b70594-095a-488d-9929-2a825e948e1e,da708320-5bde-4227-94b3-038700a035b0,521df482-e794-4e7a-8edd-7d8a187da039,af7d911f-dd74-4d82-a2bc-a3980c42300b,1e578142-8811-40a0-bfc6-132e2f53b148,765970c1-379a-4c5f-9cde-96120640b98d,14819e7a-6ee5-4e0c-95ec-9b7dbb55d8f8,719e72ef-dc93-4235-bb38-62645f17b0bd,4d3a9b18-728a-4292-b958-8c44fee1d52d,7a36e522-eecf-462e-9362-7eb850f8e00a,948009e0-d692-44ac-96f8-83bc6afcd2f7,94ed26b5-e9df-430e-8e8d-5b0d6307bb19,915faa49-4acd-471c-b24c-448b5b8e3460,d18dc3e9-34eb-4ad8-8d5c-93f907a22045,4e831cb2-1136-4337-801c-a8a1baeeeb5f,fc0a0140-77ed-4334-8a46-6ad387e8dbac,6143dc05-d95c-4bf3-9c9e-b87b9167a868,38e4ced3-4c2a-473f-bed2-f8149e58f0c4,3fe5061d-3b75-4cd1-aa71-8c67f636354d,70cf876b-e4d2-4c23-b638-854aaff67864,d8913ae7-6915-46d6-9c7e-234013cac442,3edd9133-990f-416e-9e57-30c519aa6e2b,8cfc7a51-e81a-48f4-b5ae-83b51ce906ad,528cdac9-fc14-4108-b351-74e7c5ebbc9e,0152a068-038a-455a-ae89-2afd114676fd,b9b37a62-f9ce-4c73-975e-137d4cfab96b,839f2852-f4ce-479f-a833-55fa625d8189,8b47107f-7d04-4240-b023-01472f1e3d7c,010e8db2-3f1d-45ea-95b0-b198d2e34499,d76abd37-2db0-49e8-acfd-f80263cb3cf1,24524eb8-e34e-4c18-898a-8fa5b3bf85b0,789f7690-f861-480c-8407-086f3d145b5f,abd09e9c-9832-4ec6-bef9-312b1b8e0668,d0296093-e0c4-4237-b464-b01f4260b0c9,db47d990-4f1c-4a6f-b61a-5356014acf66,eb4038fa-d022-459b-8433-bfff41e1320d,7d0d86f7-d929-4b50-becc-05fdbfa0401a,513eae93-ec8f-4d00-82e6-fbbc995bb349,db1c1907-4350-4721-b6c6-1ef49d59c213,e5398308-8399-41b2-a4f8-3eca62a3c9e0,12e23bb4-e5ab-4686-a710-9d4990075609,35c4b5df-bc5d-4486-97f0-3b48f82adda9,194d0d01-944d-4d94-9ed3-b2944ba16499,e67b560f-3058-4853-97ce-9882d76fcad3,f4d3a085-4a4a-44e1-b03f-556159cbde64,5b753879-78ba-4b53-ae65-d32082a11008,ea43d075-3b70-4ae2-af59-6a03d8ba0b25,0346b9f5-125b-4f83-a47a-03ef4f70ee92,c3ef153a-f1e2-491e-a8fb-910673fb20f7,d2d7407d-23c9-414a-8460-a5f499f32770,bc01fb50-8ae0-4068-a961-c3a9ffe220e4,ed056990-7cb7-4bc9-89b0-7c342ebe8607,7c5cac85-6a5c-4b3c-aa5e-9646223f75eb,d7406677-84e4-4b27-a95e-aa1638be0ebf,383e5818-ec5b-42b6-96e0-c053df545e65,8f5e62c7-e074-4f60-870c-2b838deaf212,7ecc3015-c7e7-42c4-ac50-67fcef3b92cd,17e3cc88-5e4b-4030-bafe-0a0895178207,50417e7d-8ba7-40d8-9a1e-8ce239097d5a,9f7ade49-c30c-476a-b009-31586343e0a6,e829ec18-7e85-48e5-b05d-59263c321b66,27056de5-90c0-4e84-b3c9-4cb50b6144c1,7fb2aea6-27d9-46e3-9a97-18af961b6d73,a3a6e133-c253-4846-9fae-473df9995fcc,5881deba-c5b4-41bf-9c82-1e951540cf31,19ab1b70-c516-48b8-a300-422f39c1103b,7961ee4e-e4c6-4035-97d7-522da9d5d91f,6020dd00-9a24-4a86-81d9-d6699c872479,4bd146ce-590b-4d09-82bc-bd259613f1c7,7a2d5c0f-62cf-41f4-b64f-3100322b8efa,9a39ed47-a616-482f-93e3-a1277d572cf6,3013fd2f-28a5-42bd-9f1b-adf600535c44,9c04c156-38a5-429b-a5e7-1308c690c72a,c60c32a9-5309-4c50-9017-3d7475e4fae1,081fc07b-d5e4-48c6-8124-52791d42b456,99ab723c-7390-48e6-8ba4-2e330c7cb4e4,77b0cfb3-b2eb-45d6-9291-0e925aa9f247,a0d5b0a4-8294-4a2e-b074-770d7b7ab2df,5869b9c6-90d0-4751-83dc-30253bd19b79,a019bf43-254e-4e43-bfed-6cba54510729,d2698127-43b8-41a1-9900-6b4f4ae6d009,117da1c0-ae17-4962-ba90-003c98512c13,a615cae0-cdee-4715-ba76-b2645c508cf9,9d531cb6-ab3b-4602-a62f-a308b0c6eead,30a6c938-1928-46c2-ac69-5fc49ff5d47c,b5668837-4b11-44df-8013-51d954593dfd,175cd644-e9e2-478d-841d-579f92e6d66b,32071bf7-5aaf-481e-92f5-6453997f99f6,22e0f212-d53f-4b6a-a612-efef6d5e9775,6bcea205-55bf-4ef3-a40b-7826f7686cad,d3261c51-b6e5-4b1c-b3e1-dbd56b27f41e,82c80f9e-94f5-40dd-b44c-ba98dd1f6ff8,89f2759d-759f-4a80-b91f-8a91a4911fa6,30cd3432-7b6f-4ed3-ad0f-a4e896188674,211e3f1d-8b64-4af8-adc6-8b2c29e359ec,c87ab012-7143-4493-ab2c-1cdfbde45926,0a641207-31e4-4f53-97d3-53db40e10f60,4c51ea00-f0bb-4ad2-aa0d-fa12023ddaca,5e382418-4178-4864-b1e3-6ac909d20435,da0405fb-5348-4384-9566-4486aab9262d,21c24413-9497-4ab1-8a6d-f2a6719077d8,32f705f4-afa2-42f9-a212-e465716833b3,d3dba69d-88ee-4d16-a3db-3114f07fe011,b5666adc-31c2-4269-8da5-450ade3c2e7b,96f5e12e-e52a-4603-be30-d6de36c09bea,3dd5439a-0917-44f5-bf05-b23d0b327bf0,352355af-7f8f-48b0-829a-38a0dcfffb3e,cfad1172-2c69-4e10-8af3-2c677b22dfcf,9a3110e4-7b47-46f6-8ab1-db16f77a1293,238bd012-2fee-4563-96f5-0c88328120cb,564c9f2e-9e54-4c92-b7fb-f9a0d43a0608,e9ff337a-e224-4a7f-9d2a-d6954d39b557,9522fadd-fb69-4403-9bc4-48201401ec92,8d6452dd-7054-411a-8c1f-e0318c734bda,9b0152ca-92f0-4e26-82d1-7836f47b8681,56464690-d89b-4092-9ad7-c1bd2e2e10c4,d5751f55-63ea-4ef4-b340-f6b0e15e47e4,c6f553cd-bf42-4fbb-9d47-a2e7b2620314,3f0125eb-691d-437b-a668-bba4fa8a72ef,1138bc6e-5a08-48bb-bbf7-a3f54e941f0e,a83d4e61-ff0c-4832-921f-dc9b78167ba1,4afce32f-c997-458c-99f6-e2f644f2a880,a90c4dd1-ff11-49d8-9ce6-400daf522995,0256bb26-0faa-437c-a692-bd475d7e0769,e8fe4201-8978-43e9-a527-6f0e217358cd,f7b547a8-4703-4a25-9bc3-08a6bb476974,dfafdb27-0393-41f9-86fc-04c5fb81e405,44d8fcaf-21f8-4514-8cd0-50b380fbfc28,2a266cde-dec8-491e-b01c-676b39472b65,b494ca59-b74e-47dd-9804-c1f173312537,31e65815-3922-4c75-9271-41b6973c4898,fc74d613-13c7-4265-aad1-956a95d3f9d0,62915bf4-d319-47f7-b728-b0b7758d334b,da82b3ac-a4a2-4ed9-ac57-ce075b697660,5ee611e1-e1dd-4259-9b36-6e4fee7b783d,3e8bbec9-65c9-400c-8b40-94b452cb55e5,1e13ed82-884b-4a95-9832-01cc2217acc2,c025097b-7aac-4a64-9f0d-27718eddff28,28bf4a27-8697-457e-9469-9d927fa23af4,55739465-ea73-437c-b537-78f5555c916f,7cc4fbd2-de8c-4bf8-b78c-c9eaf762176e,c6fe3e29-0490-47f7-95c9-7430a13ff682,2131563d-96ae-4e28-81e5-73decb447250,6a334802-9fdb-4be7-b74f-7f2382781295,100d577e-b515-4653-aa54-cadc60efd7ac,288385b9-5980-4326-a221-3ea5595a3b57,a4c520c4-a803-41b1-9250-ff68b56f22d4,3ef6017e-08cf-48f9-9ccf-3639cc6b0067,71f4dd09-ce20-4fb2-8a64-cf4d835b960c,22f97fc9-e908-4ce5-88bd-57282c3df39b,003dfecf-e3f9-4dae-861d-2bc324c96bf1,c4ef4ec8-f6fe-4e29-b318-4871312bdb76,1fe7201b-c07b-41c1-a5cd-bfee45f51484,8c00cc33-4586-4009-ad8e-3671e96e8281,f1dc16d8-f991-4f8f-bf5f-23ac4eb1f110,c35a315b-f1d4-46a4-8abb-9d7c7023e422,591ad8f3-fd75-45ad-98cd-bc4e383a301b,077aa024-9927-4d88-a8fb-5b1f63b375ad,e50ddbfc-3fee-4394-9c13-48af7d0ea625,387c6897-d6d9-4738-8290-2da1ff90fa0c,d53618f7-fd54-4bb8-ab15-90bff4ce0d91,6b77dd91-7f14-4c02-a9d4-60969c107b09,96969fbd-95d2-4053-abcf-a95584233a47,0b747855-171a-42f8-8179-2b89d1533bb2,c6300a19-2af9-4954-a632-b03133a1490d,e67f6862-5a8f-417a-b937-efb824a042bc,190dc190-e5bc-4d46-8802-da8c3371f9be,322be9df-c460-4474-a854-a045f47ccb19,4b0bf5cb-5ca4-4ab5-95d1-824cf8661361,30f46da4-0032-4278-987f-cbfc5df39919,773a2f9a-eb4c-4bbd-8010-7ff8f7610962,50fbafe2-45e8-4921-82bf-de4a3915b312,02dadab8-bd16-4155-ada3-4966d0f145fb,085179f6-5fd7-44d4-97f1-e2e787cccd25,d05efd04-0960-497e-905e-fbcc6c72015a,8a93e3ee-3418-44b7-8bf9-561e34df863d,5286fdf6-3cb5-4cb0-86cb-06e4c0d78499,0615ff3d-4ab7-4e2a-8ea4-d90491d7c9a1,1ddb2d2b-34bd-4cfd-8b31-9afb2be74b5c,00072b47-853b-4f2b-ab15-37300f22ba29,63391758-6b0f-40ca-bbe2-ab3a2bf2fe12,0802a322-6d8f-4b9f-ad85-f3929d4d8897,c6e92f57-32ff-4e30-b871-d2958ac5a23b,aa6f8efc-e9f6-4e23-9795-7fe962ec91d3,05301636-5be7-4f74-95e9-0f3704c8064c,0e7d639f-7ca3-4557-9d39-dba0cd2ee285,a4bcd52b-4f36-46c9-a5d9-cde0b80f97e5,4d343c44-1a07-42d7-b552-9a41c03e2ecb,1b2257f9-1ec6-40e3-a209-3765b4fd81a3,cc7d3b3c-2f82-4309-9849-70a05d6c5603,4b6f1964-3912-439a-aba9-0e2ee98ad90f,c7eb171a-7290-40c8-9d98-63fc114e2935,38cbb858-9d97-490c-8674-59e50dcb5957,a1748678-e9f9-49fe-b0b7-aa28630c002f,9f564055-06fa-47ec-8156-e67b2ac1fe88,9810663f-4a10-40dc-b7b7-51d53dd29302,6cd59b24-15fa-4f69-b860-17e03853bd99,b1872e34-4afd-4e41-8243-33fd88fc1ab5,26c5f3de-b7aa-4778-a678-e9d45f259112,021519b0-455a-4484-9c80-ae72054a135a,d1c62d88-d392-492f-83ea-e9a9c86bf415,ad1a96d1-f318-4341-a726-3ff708ef3644,75a76a00-baf9-412c-9322-ce0b5f6cce49,eb7e88a7-886a-41a1-85b5-415877cd540c,a4f931ce-665b-488e-87c6-6a9a322017af,e242bb54-0959-4ba4-bb33-f9c7f5c27305,7754adc8-c99b-4048-ab14-5222d36d50f6,da7fe24d-eefc-4b13-8277-84fd27d3451c,24b13336-9ea8-4f9a-8523-909fbd71ae9e,541b5203-aa41-4307-97df-9b203cac72de,a08131f2-7006-4bc2-936d-2c1b9f1e1f0b,9daf77be-3d25-4a28-b5f0-2115382ff726,2fe3895c-f32f-4e02-88db-38fc8c9eae68,2d4edbe3-465c-400b-8b91-912630a5fc79,ffe7756e-3bcd-475c-98bb-796e2c5b15e6,58c59e09-833f-4164-8239-293ae2d3e841,c3bf9f2d-3fa5-4da4-ae7a-15920815cc69,88b36b28-7551-4f33-938e-b1c7c8426ab8,6f216728-1a2b-49d3-89d2-903b41554790,957d2b9b-5360-4bc1-bcba-dfb83146cc83,f76973fe-8f04-44c7-b3ce-7d236adbd568,5de649d6-9263-4c95-b2d3-830dbf5fc40e,fc945085-07d2-4872-9b4d-93041035ab96,1c08a5d0-e4bf-4d34-9525-ea4245b2b43a,3f5edfee-67ef-48a2-b3d3-416637abd80a,53cddbdd-75bb-49fd-921c-95dc78890a04,f2a5d9ea-3efc-4821-98f9-a96f5350fd9c,a5e2bd95-698a-41c4-91f5-59ef4634528e,3fc5d623-9743-457d-ae6c-b8710cf02669,8a7ab97c-455a-4f8c-9c56-e528f4f1f4c4,66ae6bb0-9505-456f-a73f-c50f2daaffc0,b297cb7b-fea5-4e46-b246-0bdcd2990b3f,e993ccbd-af8d-4ae2-a039-46cf30467b06,c2d8df9c-70e9-42a5-b5d8-26225657f4a2,b95915a2-4eb7-47dd-a10e-65e8c886e272,c5a46d60-eb3a-4225-a503-cead7054f7f9,b8a9a400-2632-4486-91c9-8f6e0730c703,c92d1282-6b38-4649-8f66-8c8975083cb6,eaf37472-5b8d-4ad2-8b77-9a658f8d6cdc,c5312f3f-899c-44e0-b31a-70a2ec982c71,00d27244-4539-4ec5-8fe4-6542778196fd,48f9978e-73c0-4585-a1d5-7b2f0a79a42d,c9660b06-1631-4ed9-ba4b-ec506d7140ba,278c93a8-449c-48cc-b8b5-b6104aa397d0,43f31b21-2574-48bb-b981-ad9ed448d40e,6305d2ef-219f-49ed-ab78-dff012938769,a3255d24-1a36-4224-b4f1-0ff995ea04b3,1a40b928-49fb-41ec-b444-0c0e71e62925,41ffebcb-2ccd-446e-b54b-0337e9e630b8,d39234b1-215f-4362-9572-391bbae6308b,a7764c0d-d16b-4fcb-9ad9-033e030aeb23,0a98e180-796b-4978-971f-5981bfaa3b36,768bf070-1026-4a8a-bdd4-ec265193a1d2,e79fde1a-d742-4d7e-bcaf-106f822ee1b5,12cbe6f5-47a7-41bc-b859-6bf56d252e3b,f72aeb4c-f63e-4f00-8cf7-ed75e938eff3,95b6970d-99ae-417b-98f1-71de0510c905,5898d6a8-adac-48a6-9969-d1d48ad69798,a7c86ff2-3955-4159-9949-7dc2d2535d5b,4563b9ce-7334-4517-89fa-a346313d99dc,2f4f8ec8-c964-431e-a93b-817f73b76c4b,1cb8766a-2b39-4f7e-af93-988237e9de12,ca1c61c5-9e1b-4e31-91d8-e0c46015a5b8,f2939da4-a310-45fb-a08f-e18c0c6484f3,4787cd04-d068-4171-ae1d-b611d67db3db,7d091731-0fac-4556-9d00-e10eb8a2b016,02a151a1-f7e1-4fa7-b02d-51653096a0b1,4c5a586b-c47a-4577-8fe8-a531999c70d2,1e9f730c-6cb7-4487-84cc-1cebc48a2381,407e0f10-f13f-48ca-8437-4b69c7e28eff,22db74bc-5127-4125-a0fb-665b60e5472b,4ec8d441-cd29-4619-9109-1ef8ac38106d,3c39cf21-ae3d-4697-806d-fba2031e990f,c355ab90-69fe-4875-b03d-b589b70bc17c,8ad0fe4a-36d4-43df-9de9-a7174d7ecb57,82677ec8-2cf0-4814-8030-9f29c53a80c9,5967a9f4-1156-46c6-aa57-949456ca0872,29db3208-8ba0-4756-8d54-bde115e5f840,2ef3bd7c-39f0-4c6b-bf8e-f75e0c1feba9,19b48f5a-7649-494b-b3a3-b6097150baf4,f4c57f54-ee54-45df-9411-d8ac331feceb,de0d27d4-8049-4a88-a4c1-c5d6f6723e6f,594d6c37-0bb4-40e5-8608-0240fa089bea,4d7880d0-b268-4197-8ded-458d5928ec3c,8467fa48-4c9a-4ad4-9d78-2ef36cffce5a,3fd61628-29db-4db4-84ca-a3b218e44418,976ea862-ec80-4d21-bffb-ac38c0245810,a3df9483-d5cf-44eb-90ce-3f3072233432,87772eaa-fe85-4574-99ca-0c21c7867634,403b3720-ba1c-4f32-8507-9b2fc7f6a117,347b6ce3-59e0-4ff1-95e0-23aaf6c1cf9f,1e8fb5d1-4757-4329-b49f-1082502b6e72,0cb60288-0389-4057-8a8f-256bc0165aed,58c2043c-ad39-44e7-9ba0-b249bff64dd4,55dbfa1d-62fc-457c-9ba3-f5ac7af6260a,01078074-81ad-4027-b704-3d3ee43a7859,20704b68-ba83-43a0-a317-a8e17e7bbb2b,c2af5616-c2ff-45d4-9c05-fba4b6c41724,6a97c5f7-5238-4caf-9f36-3bcfe142fe39,2b4a1918-a245-4efc-b46e-dcef5ae25068,28c199e0-f044-4103-b88d-5f62cdb87641,264b7dab-586e-4dba-b4db-0e7e725683a9,30acd60e-160e-4009-be63-408761434251,18c0375f-6031-477e-bc62-5cf4f9374e9a,e3e45a5b-eaf7-4293-8356-1d6d95a6c95b,4dfa2cfa-0dc3-4d7b-8ab0-a3c5ccbaf684,6671dcd7-3ad3-4cb0-bc74-d0a9af418a86,8e57fd2a-3e32-4430-85ad-cd3f558bafcb,4ef99ed4-37ec-4852-a3e0-2bfbb0758ed8,eb548555-2260-4393-8c54-50cda30a0783,0c2823b3-e83d-4f92-b121-7cb3abccb9c0,eee5ff36-5c2d-479c-8f33-7aa412b583f2,3977ca08-43a1-4654-8af4-3043536afbe1,32d7e80d-ac58-4336-9ba7-6322fbdf408a,f6a2e8c4-0c7f-4552-8d9e-83808dcf1d76,9ee45019-a459-4f26-abdc-379f62dd7365,c50dc413-b87e-44af-9f4c-0c22f6c04fcc,a71311f7-5169-42f0-a08d-4ffdc8da9aaf,fba88807-5ce3-4cb2-9d37-e0ba53261cee,2146942a-0feb-46d7-bf13-b3907a4123d9,8f54c24e-613b-4ad5-b761-2480a5a0cfc6,b395c921-6971-4d0f-b4cc-a59adc8f2c1f,f53c4199-a663-4488-984d-e7ae62d2e949,2234e002-9f7f-458b-8820-c17e53c61b43,07a7c3aa-891d-4af3-a531-7c83042ec111,667f4ab8-9238-4ce4-9017-c486abe012c4,80312344-3d5d-482f-9a28-0cccddc41b3b,ed9d580e-fb4f-4f05-ad46-8f5cac51e85c,daeb72fc-676c-4b4c-a0e7-540ce918c3a4,3b06994f-b482-4216-a561-7aa4e3b48477,29059301-b4ed-4f40-bc3d-9b0e980f9053,88f1c414-bea1-4628-a47c-2a253925bf91,451eb0f4-826d-46d2-8aa3-ebbec5c33f0a,39a742bd-2fe9-4009-8f9c-0fe2d65ffb94,9c37fa85-ad70-4850-9390-8453ba191190,adee6b23-5a10-4e76-b1b2-292e2da862e1,b636ac69-0542-4662-8774-165034326f69,8a13ab93-4516-4d5a-92fb-725a6b850fe8,dabab425-ea28-459d-854c-e5a1a370cc38,a9dd9f4c-06d5-45d3-a3a2-ad1f22825108,e4d60768-a5e0-4ac4-b865-f6b36172f70a,2ddf571d-503e-4774-ae21-047c0f5f3361,8e941f90-c004-4cdf-aba5-ea6a39dd3799,477698b0-69e9-439a-9761-430c58dff1c3,a081c2c0-ddd5-45e9-b72f-2a7b47d79be3,d25b3138-b942-45e9-a6f6-9748bcab52ef,379c5a1c-1882-4072-8fa1-0ef79b5abf5b,cba1f50c-9d64-4d3c-876f-aea227795a2c,45827e05-27f8-4bdd-a5a2-e6bf83ffadda,338783cc-1242-4b3d-91f6-1ecf16c36867,a8d659a2-ad77-420b-bf48-08ff053f242c,eb8c4c88-df15-4f3a-a474-8e53a7ff0ff3,1c532d4a-75e7-4f49-9f69-8713f07291af,a258e897-5c24-4b00-b0ad-ed16f44a026b,e0c036d4-6745-432d-bee1-f060231cf71a,2259ceb7-a7e8-4ab6-b8d1-06e6463525dd,efeee0ed-3063-482f-a4c7-616810e651dd,01989f81-b7da-4b7f-953e-d0557cc5ff49,556c9f75-4081-4db8-bf0d-37a613c7c351,dff6c859-277b-4b23-9e27-ef63da1e88e1,c0d8d7ae-8f05-4cde-a73f-819823c42566,c3bf666c-232c-4b1e-a96d-6ab8a6966fe9,fddae801-6f59-47e8-9ab1-2089489464b8,c3b8f364-329a-46eb-a66e-c39fe99e2141,3e83545a-31e3-439f-a057-339f2f7da192,c505c56e-ea7b-40f4-b3c0-e8ab3f38ec6e,a5244143-7c49-4bfd-b038-bc5ec88daf9a,51279a64-e3de-477e-8886-d21f4d66be94,240dab3d-6215-489c-9cec-183e21941f46,f67dbedc-6780-4b1e-b3b7-5e775750e95e,a34ba237-5188-494b-b40f-613a22c664e1,8f27b9e6-0243-4024-acff-3f3098241a48,76b31b4f-3d34-4622-a38e-18ad520c5835,06222b14-3b2d-4392-a707-b76a7df3c60b,5ff9bcde-857d-4b1e-9e1a-eca618213964,3ac292fc-ca64-475a-8dff-853e0dd37381,d7c8ac94-cbc5-489a-b042-7b361545c274,8d7142e1-304c-47ba-8fef-11a3e152ff99,650d5799-2e7d-4648-b552-5e186cb24c57,8326fad5-dd22-489c-a4b6-7cae7129be78,196d5b51-279f-4d9d-87c0-9811c282ae12,d285cc81-0f9c-4f31-a8a3-fd102cf35863,d21dd9ce-c47e-46ae-a7c6-7ad615624d9a,701ef5db-9a5a-4444-8083-acbf94168443,9192d063-613f-4287-8f0f-537e98c0923d,4730355a-d6db-4259-951b-b6a1ad1e3a2e,c043a85f-4135-49a2-ae74-544dc4e0e6c5,6f89e90a-e875-47ef-a044-ce545dfc57c9,9ee194b0-b78d-4cee-a55b-2dbf4e57c1de,bb72f856-7325-4a2d-be05-062aa0735c61,c52aa66d-e244-4885-8d8d-62bda45cd907,bd587691-b77a-4995-85f8-19561f3397da,94dac936-d617-4bed-bdaf-eb373a8b7ce7,ad2664d7-51e1-48fa-af36-eb19f6c5f63a,42f5f7c8-61fe-4dc9-b88c-47b74aa4206a,121f25dd-479c-49e4-9099-755412fdb775,e6b942b6-d8a8-4c15-897e-2ecfd1a59f0a,7d9d621f-1b88-43bc-8764-1e4d20090dee,ba8c907a-b2c6-4559-a87c-879acb950f1d,cd0de095-654f-4113-bb17-05d82d789f27,87a3a93d-b39a-42a7-8bb2-a3fa3de77df2,53f4b0f3-c081-4335-af34-a899b0cd450a,e42542c7-5f5c-4c54-affc-c0e6bcde3bba,b5edb0fa-f794-4e13-a704-cf0f77eec3c7,6a174df2-f54f-4e3d-a7df-00419b07a3e9,2894239b-5789-420d-b98d-1bd473b1c362,85e47ef3-fa82-44cc-805f-5c4e3d548023,08592958-3c8a-432b-821f-650f565874a6,8852f7e6-f657-4392-910b-69fdbff86dfe,46953226-1d93-4d68-ab5b-9f00592b1885,79d71965-0161-4a4e-8955-230a42ad66cb,4524c0fa-42d1-48ae-99a1-c25f0d76c141,aab992f3-0a56-42d4-9f2b-93ef1b55ebf4,320cbb2d-7919-48b6-8f67-bdb261508fba,0d675cd9-7731-427e-9c7f-77f612009e84,ef804ef8-b325-442d-b48f-1de03cbf9221,09e26edc-8e89-406f-8a3d-808a23437c86,8905e3b1-8e08-49c6-b82d-8f8b14387ed1,9aa0852f-ce5c-4e1d-b5a4-4f6abec4e1be,def04cd4-5439-494c-8b18-4cbd1889943a,dd54d8da-d5ca-4e45-b612-fd1693993974,615ad82b-25e9-4e4a-acbf-3556bb087ab5,671f3b42-96cc-4f67-8fe0-67a406a206ec,40ec5e95-1aa3-4941-beb0-6c22fef5d810,9a7ec889-d271-4773-9806-72d28cf4f7d6,6cb96fe6-7d18-4e3c-8596-7042f5a1f7a3,f514fb0c-c5a5-4b13-9d99-4521a0a0dc65,61b1c0d6-d5d9-40a4-a96a-ffffffee4045,95c8bd2b-6d8f-4576-bb6c-16ba8b775303,3aa82d39-8279-4af6-abcd-f24498301ba6,5f126e6f-00e4-4e3b-8c35-e5d87b5bb424,ef2e021f-f77a-4b4d-afef-385df72d3220,8df036e9-ad69-4dd4-9b77-1ce73c7a5a2c,05d00dbe-d3e0-4164-8536-093258e18a44,b83fd09e-80fb-4542-8f93-71f4530d88f8,273640da-6058-435f-ac3b-e25cc10123b2,001c8a37-09ab-4584-bd04-6bf4ed93d56a,69ae84cc-78c7-4a4d-8414-8dfe0060fc98,68907c88-53d3-43f2-b677-54a68c33b378,dfb8dbb2-545d-4863-bcc2-14cb3e874fc8,0d7a182b-b8a8-485f-9668-740a6645eed8,7f09460b-5aae-4449-959d-6bc1f9a15442,aab572f5-e989-4f05-ae7d-88ebf41a0b06,bce651bd-526b-4548-a158-12af7c226bb4,9db4b0b3-1f7c-40bb-8413-f03865f95ca3,a46cb42f-0b16-4e5b-8130-24e3b284e641,f0107869-3b19-471f-8fa8-830e593de77e,a7938da9-9630-437d-9cd6-61b063ddbdcd,73f40e96-d6d9-4291-82ce-3a01f81bd8e6,5775bb70-eef3-4d2e-adb3-565493a0524c,50d6f22d-f521-4965-9ac6-7439aaf627ac,cfb76b2b-1f47-44c2-a37e-2a7182adf4e1,fe1e75d6-9e6f-46e9-9a86-db1b2baf7717,461e9c6d-df50-4bc4-b39b-7ebc2dbc97bd,c4c00ef0-f1fc-4ef4-907c-47d92338c641,620b5224-0ac3-462a-8acf-a1c3e77c80c5,3b933017-a313-4323-9a93-4e8ac48eaaff,662c7f8b-2306-4f98-bedc-1db7207ef451,42bdbfa0-fb9e-4908-a905-eba45fc15bdd,046fdcd9-7c2a-459b-8e96-de2e1520eea2,e870ac81-f37a-4b0b-a9f7-7a531d1d1dd9,4f6ac3b0-d7bb-400e-9a22-7ee9853bdaeb,c0a69855-0031-4a1d-ae77-a008fc54d3c6,2f47ef8f-7ace-4d4f-a231-dcbbc0a76366,3290f11e-932e-4521-8a0b-493fbf1a04fb,e04a9160-4249-4d29-a8f9-1120711149a7,6be1c0df-aca5-4a34-a3a3-a6f55c1810ae,1db01b08-534c-40ca-abd5-2c77972d1b78,203e1192-4022-4fdc-b887-748a0d695857,597a87d1-f7bd-4cc6-a387-f41903021e9b,da3afea1-14b9-4e1b-9fe3-7290d379d1b4,b1c9f663-13b5-47ef-9e24-b6bc5a49e65b,1135bdfd-37f8-42f6-8faa-578728b75ae9,b4facf3d-5213-45ca-b155-ea65bf2b0833,467bbdf8-dc6a-4237-beb2-bf7afcba55eb,74a70fe0-f5e5-41cb-a8af-2e870291b771,aafebd21-7e01-4f6d-920e-9e449417cb52,33efb301-142e-494c-92ee-0df050ebaede,5ec330c6-3b7b-4210-8293-28266769865f,b5a67d20-d6b5-4764-aada-2ae9215ad868,a510b153-e664-4fba-a80f-f18cfcb35573,55c05940-c524-4f31-a644-b2c769416af5,d6cc5157-24dd-48a9-ad5d-d6df729c5387,ad99c70a-7761-4627-92e6-86137aa8e119,f7980fff-3352-47ef-a447-d5405cae7592,4fc1c698-5000-4a31-978c-6df4e74b8b66,fdb477ba-0b1c-479f-8a4d-382dafd3ae8d,0493a855-d8df-4085-a0fd-800ebbd88152,ef5844ff-13a7-454a-9a68-519cb468e283,f897fad5-f1ce-4b58-b201-4d9f68069454,febd72a1-4460-480e-8ed2-031cb863d8f8,fdd82ef4-88b4-473d-ab67-7c9a62751ee4,194442e2-3a49-4678-a80a-131b5ffb3dd5,004d2c69-1062-4650-8a75-b03abccac8ff,c60f3e7b-7e9f-4b28-8b57-9e3d0db3fcbf,da5aa63d-7149-488b-a5b7-33c343a6c3f6,49e7394e-9ad4-424c-93e8-23756c487dee,1ebf3f5d-f605-400e-a8c6-502bf5e631ac,4a5af010-3700-4ae6-8aa7-326507ff56dc,4c70dc19-9d4b-4747-b7a9-d0c6776f2af7,57ee69f7-ab1d-492b-bac1-b4209b4bf8de,c3b375f4-dfd9-423f-94bf-2675e8232ab6,30392d95-cfd0-46f3-bf71-85d89001c18c,64589f0d-3254-4465-95a2-38fecc13d979,b71f4dc9-ceb5-4d93-aae6-888f513d9059,66e52129-0a12-49f6-9f82-7137ff506e15,a00484d0-e851-4c99-a775-0254435daa18,103dd1ed-748d-48ee-9b8b-6b5ebcf1ce39,e18cad45-4234-42cf-9b4c-f11898755d15,b34efe39-c58d-4189-96d1-e0bfe1b74fff,de29d522-6509-4d00-bf28-9a77e97a4d0b,177ccbfc-1bd1-4d7c-90bb-2a0760a75949,ff01a6ef-c7ee-42fb-b119-77c604026cf9,92cf31e8-e431-4e38-998a-fb80b69a4dfc,424c42b1-0968-4bd2-8238-d78434ad8e80,1d4e44ed-2130-4e77-a850-51fd10f2494d,0c08edb5-2986-462d-a88c-31046827d006,6cd9187d-cb73-4cbe-88d9-b4c236c89f5b,8ee7410d-6534-4c52-b34a-1389ff68de83,829d5de0-6a5d-4b5e-88ba-4fd6dafbebb8,88a43157-988b-47db-83cb-0ec12c94d1b4,52bacb65-c0fa-4a5c-8502-72a942273196,017bacc7-2772-4c7d-b2d8-e8104052972e,f3389076-c793-459e-a3e6-176be46d508f,652e6245-7d95-4659-afbb-2f86562e3a73,e1ff79a6-9285-4e64-a14e-dc44e59ad7c3,64c4cd81-766f-47da-be9d-8650e369108e,48357f6c-e56c-4652-98b2-01c33b4bdd9b,0225ce71-b429-46c9-b64b-f1f0cc6ca0c6,a7dd0bf6-2a3d-4b41-8ce1-91a66237d8a2,06d1d511-db50-4a07-b371-954deaebcafa,04d61b6a-68a0-44f5-9529-b505b9758976,2aa4719c-e187-442f-8f0f-cb10800bb221,5caaebe4-a133-492e-aa4e-ffddbbb2ae4d,c4a499e4-9d3f-4493-86ec-7819f4b0734c,01500ff8-1977-4603-9273-3e3750d6b295,75114df4-dcbb-41db-a160-df6f0ec6baff,4fa79f16-5e85-4cf0-8bbf-d9cd17662a24,8927dd3d-172d-4ec4-a8d2-ba88fa84109b,aab54a6b-c010-46ee-85f8-920345c4e184,377d3d06-c41c-45b8-bfa8-2665d428974d,1d24f6d3-1570-4b6e-bc88-e97eb620b25a,d1433979-75fb-4c3d-a028-a721e49732d0,fd13504d-1838-4469-bbcd-2a9f0094d82d,9df1d392-bbf4-43c8-b093-f740e6317e18,ed85f2c0-5b94-473e-8662-704c662e323a,36a7c1f4-a142-4561-ba86-15450fdf330c,b61550ec-d9aa-4258-a35b-e8aa6b6740c0,dcd7db9e-0cb0-42c6-a948-57acf1e911b3,fb8665b2-eb8c-4e19-b5b1-37918dc41542,bbb4578c-8de7-47a8-ba6f-71b11eb8b154,75ef2eeb-cd37-4161-a9e9-170d967741b3,dc04aeef-36ee-48cc-9ac0-4112904500da,3269444a-ea76-45bb-800b-7487376f0a2d,f94d75f5-34c7-4b32-a343-234dbceb389d,357dea50-b0fd-40b2-8e00-53d04c013429,0cd5318f-0ba7-473e-acb8-e1d5d1e164d0,58e71ef3-e639-4b25-8d23-d83a6b064e4c,52b5f9d9-8fa8-4ffa-b6bd-d8eb61930392,6d2fd1dd-b474-45b3-b581-b8502e4aa5da,ee2901ff-f619-4207-b435-84727cc11d89,4717098e-9e57-41de-af0e-2cd44504252f,a846347e-4510-4653-b38a-c15e5fa33652,d91b54c6-9a19-4fcb-bd20-39c524349be7,206fc359-3d7a-491a-a168-149cf9a4e9be,93f385ad-5394-43a7-bb1e-925d4f15fb4e,4682fd0c-28d5-4e29-ab42-9abcd269deb4,77d8292a-f3b7-4c6a-b937-a645d6844332,ce379754-caed-465c-b2ca-9bf62ebc473c,a1bc4745-fd01-4f05-9725-0a29b920511a,7d81204b-79d8-47db-bbd6-a6f88c0b4ff6,a0f58845-55d6-401d-8d5e-00c646069f93,efe5a420-91e1-49d1-a92b-d3c2f8f43f85,dc21cbe6-7a1a-4f33-bdac-77f329ff27c5,548c7db4-0329-4ce0-9904-0b90c6ccc1a9,1d6bcb8d-989f-4db3-ae65-63c62671aa39,addd3cbd-4bb5-43f8-9dd0-3b40a3e4e51c,00b5a5fd-13db-4a6b-8675-5c4cbab91718,cf25bb8e-43c7-4c03-8ccd-5fb2fcd02385,1c6aae49-1646-491e-b437-25e36aa683ae,07bbc4db-c4ec-4fae-b941-72351febe682,91d8153a-29cd-49f3-a149-669cf64bdf46,54016972-50e1-4687-832a-075f6cb08e4e,2ae73ee9-a324-44a5-9060-9332c270e902,63d031ab-113e-4919-ab3f-de146d3a1513,165ddf62-1f8b-4306-b6ce-8f1777cd8db4,7639ce4b-f5b3-4f91-95b7-952d6fda4680,ff1a1f25-8c12-43fa-ae00-1e7b337c1fa0,4d081d54-49bc-4bcf-bd35-beba6d766e3e,6ab6fd41-662e-4d07-912a-9925b34d2bda,50cc9c86-0225-43ba-9f50-510f61cbda0d,4f821f43-ccce-4c6f-a3ce-48b0e25da681,79fc6ba1-17bf-4589-a939-122b4ba1652a,729ff945-352c-463b-aec9-812b2dc762c1,8d30e8ce-b4ec-4c11-aed2-e2cd13044bd9,b7d2494f-475b-4a3f-9a67-d4a333d82ebd,10d500ef-ea88-44b6-a96a-601aaa30a48f,69ce3212-724d-4b1e-aebc-876356c23521,9d468ed7-a7fc-45bc-bafd-a49be8e6eb37,422ddaa6-d157-46d8-bf02-340befbec8c1,aa6c8d33-adff-402a-bf92-90447fbd5768,f9cdd4d3-9b83-412e-8b25-4173e0f3d080,3dbb43d9-bc59-4086-b9c6-5194fde177c3,94d93f29-f861-4c4a-a420-c9937586a5d5,47f838da-ad01-46f1-bdb4-8de56ff0ce8a,6f1172d1-8d55-46d6-a85c-05d91f3ce638,b112fec1-d3d8-4f13-ae21-63ea3dfb5d05,268c1b50-8416-4dd9-9b04-de9b3437f8e9,13f54c5e-3c22-4631-963e-9e6b13fb9f3a,21a08c55-814e-4065-86dc-1c47a664fa38,1cc1577a-daaa-497a-bb3e-280d68e54bc2,83658d84-cefb-4cf7-9ab1-feb0d80024b4,f98f00b8-28e0-41db-86b5-e0965f8aef0f,3737ba05-30f5-4f02-8712-f1f1208ca56f,1f816536-dc98-4efe-bcc4-edf30e9879ee,e62e1191-ca29-400e-a11a-97751b7b6066,02db940e-b21b-4442-9336-fb170e57d8df,801f90f2-39a3-4617-aa1d-4e9d810a0eed,03af3c52-b7b9-4604-a469-ab836dc3d8f8,b705e77d-b6f5-4321-aaf9-ca4d35e1311a,f925c4a9-39c0-44f7-9ca3-3fbb79fc2e23,68126506-5034-4ba9-a618-7f328f8c8f4a,16f4d2e5-6876-4ba5-95fd-d2c3007f2c96,9d990434-10a2-42b8-b94a-46d6eb00641d,e8f97500-f99e-4133-adeb-1e24ab0ee646,dc362260-cd45-4dd8-877c-ebbe8558d861,5cab1da4-1e04-413e-a9b0-504056a94761,a24ad9ce-2a62-4791-a6aa-038cca9272e4,d131d8e1-7078-473b-8c65-6cca6dd46e74,f01a58ae-112e-40c0-a88c-fbb4a9fdcbbe,f4d243fd-6a19-41ff-b417-59c1817472d7,35e742c6-e785-4b5b-9661-0eba202e2549,c7300e1a-fc13-4776-bfa2-19376384f4fb,7de26ca3-9d61-4038-a3f8-83b02dd5efe6,61d8ae94-adb6-4734-898e-6223e55d9dba,7cbcc93d-25f9-4e36-b389-70d28e9a2c06,f31c6239-f4c4-45e4-8a58-cd35426a2ebb,f47e0455-7fc6-48a7-b27b-e861eb787e6b,9b9c8686-e216-4ff6-b615-3cc86c1cb3e6,8cd6039b-73dd-4e1b-8671-a0753bc8e6bf,a6d9a77a-5b33-4d92-a368-bb5137a35430,01fa438f-600d-4b5a-9ff8-b0e007d8cf8d,477025ab-db52-4ea6-a971-9ccee0f43367,2a28914d-0dc4-484f-9484-9d82990204ca,c2aa0836-4038-4279-9987-168b738aa25f,6f3ce819-bdd4-4608-b52f-e8f0a4e12a2f,b36c12ec-3150-47f8-8ebf-fb7cc22a9ac0,9dada31e-c071-4362-a07e-4695e5a7f293,52bc1856-7e1e-4d13-ae38-33e0197bbb87,51f61287-5606-41c0-a688-69a667202dfa,1a870773-1d00-49a2-ae1c-193a04db8943,dff4c40f-2960-41f9-9392-e49b0565f5f7,e0df806a-432f-490e-afff-44b81f08a536,f0158979-5f8f-4015-9e3b-c05c00f82945,526a8ba4-ddb4-47fb-9025-cd298f301128,5a2e6503-61d7-432c-8e38-1f44ab32bbdf,8cfe6ff9-7d2d-4b85-be3a-6389736e87fb,78d84092-59ab-4653-95a7-f716385b76a6,9eaae106-c70b-4941-bbd8-737152bbbf4b,9be69f15-4ec6-4a8f-9a26-0fafbabae355,4718cf7f-79a0-4790-864e-814a5557717e,c5fb5e0c-bff9-4790-aa72-c34faac13e55,97fa431c-31f0-4bcb-951a-cfba386b9dc1,fa0affcb-ba08-4ce0-b694-a1ebe4352b63,68514e11-acad-4046-9862-a24203801bba,34badd72-479d-4ecf-b538-be1f778178ce,763f09a6-f8db-4305-8f4a-5859f99dc3f4,c83e3a1a-689a-44bd-91be-55a8bf78c998,24ce5902-b256-4762-b67d-3d6d5d5a27fd,18287a04-a4db-40b1-b63b-4725d6d332f2,5ea193a4-12c2-4da6-aa8a-4c6171eb0c69,7f3d1e50-41e2-48ea-876d-84ef26ca09c5,354611c5-ffb1-407e-8843-6179c203e075,6f15f328-7254-48ff-bf15-3b2b308e73a0,809fb112-0945-4c7d-bb5e-621b19d03467,769364bb-8a47-4dcc-97c4-d87074a88fc3,a23b2b72-0ee8-4d57-a188-7e2a2fc354a9,2c502e04-3174-421e-a63f-ccb38024de9c,11d5cde6-23d4-46ba-857d-fa4b6618aa3a,00925b55-eeb6-46a3-a7d2-25859f2a0736,1978a172-626a-46ae-a944-c6eb01f5fb01,505f4fa0-191d-40fc-a52a-8230ea988360,42f8e54c-9110-4d4d-968e-059ce36f1406,3e149b60-e150-4b39-8198-b8afa6edee3f,0bf36478-324d-4c25-92b7-dc526c3b035c,302bf338-9749-4ceb-bef8-7b7b8334e57a,7b63b1af-d163-45f6-b997-87b0d55fca96,979ecbd0-4859-47fa-b5a6-ab6b0052965f,a3317bf9-8212-4922-8bda-15c82b638fd9,a8a371ea-456c-453e-beb5-a607f1a4b5e5,0202782f-c721-4df8-8f64-8190316a3803,eec00cf7-a981-4fb4-8c89-100696401dbb,9c77cab7-0539-4d44-8205-8a052e593f35,b8dcbb83-bafe-4484-a267-fc85942bc751,55113135-8f30-4b34-aeb6-0d73dee1ef37,46617708-1375-4c86-ac33-f5ccd5e44a2e,306ad215-205f-44fb-b1a8-87cc4d9e1b6d,6ed7b21b-56d2-4b34-ac3c-af44394709fd,8b80db1b-2c13-45e1-8a0d-b84fc51f17db,58d4b89c-3cae-4da3-bbab-4db0ebc50893,77ba1620-fd03-426b-a772-d89106fcdb74,310aee6d-9712-4066-a841-a488c431738a,dd4a8210-d09a-488f-8330-3f2d84801055,4a5b5799-7fae-4477-9ad8-1c45f4ab747a,0c7696b6-06ac-41d6-9454-14402bbc55b3,4db5662a-49a6-4637-9460-7b8f2661a176,c2baaf31-ca90-4db8-bb4e-1acdcc8416d6,7a4ca625-6f63-410f-9202-99647f376da7,1d2679a8-afc5-4d57-9c62-dd7da55bdfa5,c1549a14-2457-4222-b01d-a8ec1754fcac,4e4e939e-c333-4604-8097-097b28ddc8ca,332ea68a-5226-4aa8-b769-a814a92e6bcb,8ea98673-deb4-459c-adad-b1b4e29a8ebe,414361b6-1206-412d-9f75-58fd2763151c,28085e91-de7e-4ae7-931b-5ac868d43bb0,f27f51ac-7ced-4098-a41a-b4ef14ad48a2,12b0c67f-08cf-4495-a1b9-bc1eaeec9d14,54196ff9-33c0-45fd-88a8-37d0ae2a89fb,9858f24f-9fad-4699-807d-9bf37eaf91f6,03b6670a-f37f-4f8a-a70e-d17721587233,ecb49e87-dc62-44c1-a335-cc50f5c61682,56e65ad4-aa29-4cf5-815e-39f2d26024ee,d5a2af67-8677-4c31-9cc5-1c8ed1ab4799,ce9638e9-8f01-4d48-96ab-ce42230c6d92,aa74302b-6c71-4a87-8fca-818cc0e8d19c,d958524a-7545-4ced-9fda-51adae8b1d00,3ef66dd9-a10e-4983-8ebe-c5c68daaddc8,4a432a2a-8318-4e19-aea1-9041ed93bca3,8ea63087-d713-491e-a9b0-8d723b7b6374,2fb5e9b6-4be6-4438-b1c0-03a2d6cd79d4,8999a5cd-9b3a-4819-b4fe-27a28a62deda,67adc53a-ddbe-4e54-9c41-589f81e26428,6c30f466-2745-47d9-a22b-cb8fc6267b6a,17b291c7-3a17-40bf-a488-5395af895dd3,3190407b-65b4-4745-895f-e7c2dc1ab718,f4c51107-49e3-4978-bffa-1def60089464,dbcc12ed-0060-4dca-95c7-76741f69a1cb,16677408-fdd7-4109-a35d-74810773bc84,b2529b91-7cfc-42c3-92b5-77ccd744e99d,9c956575-fede-498f-acb5-9546e3ed3688,dc66a7b8-843d-459b-91f5-419251495ec1,a05b9c60-5cfa-4dee-b9a6-007fce380506,66d6408f-2e89-4efa-851c-3a297a77fec6,b280d304-6f98-48dd-b6b4-6e5f2a868c9c,e664d4e2-cefc-4419-92d4-06ede50ce6a6,adc0cf84-f3cf-4fec-beaa-23bd3d070ddf,b391ff75-6857-445b-9259-091e28e2e42f,bd50969a-0ebb-4dbe-91b9-4df0f7e43cdf,dcc3f5cd-c7a3-49a5-8e51-4fecb654b6f0,482df6e0-5554-4243-a503-084ad1d0a3c6,24e6f79f-70cf-47a3-b155-696884b17efd,06861dda-96af-48d6-b316-3c175bf38135,2bc7b257-595d-4e5a-8728-c8527060f3bd,ea38a0a6-f85a-406c-a115-f0b33938f76b,e214e50c-255a-4da3-b778-d8ac714c99d2,a2b597a2-3d20-4321-be2c-e240fb815f53,1e57a7dd-7350-4702-9bcb-30c541f0d93b,14acfe3d-0cc4-4cc5-b75a-cab7976c4e2d,1eed906a-ef6f-44fc-a6d4-de2d5b7b1eb1,dfde9acf-3a5a-44a7-8d46-ca0116e26954,5d2e6a31-363c-46c1-bc39-1550e948a7a0,e3c2f09b-eb2a-4c7f-83ef-45d1a79be63e,cde1fc54-2652-4faf-89dd-9513226ad114,e093f2c4-380c-4b53-8508-d227e2b3d28d,c9b7c13a-88e4-4ac1-b253-9618f4ae9f19,f7efb785-efc9-44df-9947-a58822d5a029,e303f2a6-cdcc-484d-b3e4-e1b4d8645fd4,e05b3494-88ca-432d-a652-ff0f15397c16,cc780429-e2be-42b2-8afd-334c8a4e250f,88bbca39-416d-453a-a304-9d3e93e8195e,f2314519-a7bc-4fe3-b16a-815d1d3db252,42f31097-d8ed-43cf-8057-a82399839b41,d9dff318-945f-4410-84f9-e5fd3678337d,6c309a30-258c-46e8-9562-655fbe0140fb,1fbf2511-c82c-4e19-b13a-8db02b883846,c26c7a76-a087-4ed3-bdf1-6913b9ae97ba,d45cedc3-4d08-48f0-a9e3-b90d22e6425d,aa1ebeb3-b875-4cb2-a030-0c82a015c031,a64ee77c-2194-4550-9b01-111e3362c567,f7d1310e-a06c-4e56-bcf6-59c1bb59a733,17f79fdd-e06d-4d4e-aac7-5ba686177652,291c2765-f137-46cd-8fab-502ff8aea774,cc440f19-d396-47dc-a7c0-befa90813e4a,151f065b-e21c-4ce6-83ac-c873c431b79b,d364975f-8f45-4e43-ae15-6ca07e8f3f68,71b9a06e-2409-445c-bbdb-50af757e218d,1b07518b-9ea6-4d06-8ec2-8afc9c0fc62b,336aa7a9-a4f2-49f0-9c2d-b062aeb48ba3,2ba5c9cb-8ebb-4af8-852f-a3659c521e2f,b19a28bf-db17-4f38-b11c-1d0455b723c0,af1a555f-0808-4e4e-b98b-16a1a00b3f5f,a61c1eae-32f6-44cd-a9dd-f240a3ee5e7d,7e6fdbcf-d9e2-4b82-a80e-2acc0ee6ab46,6bf86a3e-f9ba-43a6-a8a1-8580da02b3de,83a0d580-e182-449c-ba24-d29b88eb3218,07e47a1d-c17e-4097-be7d-a64770ed2a38,d2a64ffe-81b4-4966-9a78-4f14025695f3,e6b923d3-96fa-4e8a-a6ef-c0a647a5e323,051f4abe-138f-4e73-ae19-7ce57c477874,27bb9c88-e3c0-4678-b418-bbb8147e44f3,0573636a-c5c9-43e9-b6c3-55087f372976,bf3ad498-68e9-438a-b18e-2e7cd1fc773c,629162be-d3a6-4568-9783-e9575513aa87,dd98bf81-5e64-4bdc-aba6-9a70ed225c11,e8aca238-74a3-45fb-82f6-de6db161932d,e57b1a57-3ad6-4e9a-bf95-483043910f51,5a74c1c7-624c-48e7-aaba-9b3fb752ab1d,858a87f9-d23d-453b-8c68-365c70ec24f3,439a2e8e-c469-44b1-b878-547d936c6598,d6846c0b-5609-4b71-8964-d120757316a3,bd8e878f-6ac0-46a6-873e-779f27dc21fd,c8f0498e-45c3-465d-b115-b53be4039e9b,cd24f927-f93e-4cb8-b137-45dc242b23cd,51edc9a8-ef40-4ed6-8e6b-99ec070cc00e,c670970a-1987-4ea6-8941-28a7a4329e51,2c3078ad-9806-4d7f-bb6f-2477e23b8dad,36c906bf-f06d-4a7f-ad2c-37a938776c32,7e559bb6-e9d1-47c9-a2c8-898f1d91bd5b,0bafbc2a-f614-4477-9440-0a8b68318f14,a75226fb-7093-4ff3-9fd8-431e97a6dcde,72afad16-53b5-48f8-96f3-e207ff17f3c8,89eb1c47-3b0e-4dc4-b851-2204622a3fc2,68a5301f-b07e-481d-a486-9742209624df,aeaba820-86a1-44fa-b2fc-fb480c81dd8c,48dea47a-e9e0-43f6-9a4a-67cb731d58d9,a25a25d7-336f-4c91-ab15-19436a8eeed7,e5f191d3-bd0b-4e66-bdca-7afa98d16654,7c48ab06-6f8d-45da-99cf-586103e324f5,ea318788-66ca-430c-b8a2-ef512df080f4,c55bca8e-bf78-4caa-bbba-3f8c5293a284,33596827-a740-45d2-9f91-2bff3c986b37,4879b0ba-7731-4e07-a72d-3ac976f2c0e4,da5ffa02-a865-45bb-b879-9ed53df20e8d,e0ede3a5-0ceb-497b-820d-1652fef03984,aa7a4613-c84c-4069-8b38-cf994bf456ff,e64479e7-8ec4-41e8-8dc8-8f74be3d9243,c7e61cec-8c4d-4196-922a-6ed42260b244,7516f84c-001f-42b9-9e90-2c901f72c6ef,51176cc3-a46c-4d6c-81e5-5f35592832aa,b242b1df-12a6-48d3-b4c0-c9c6ea416767,66c432b5-8e41-4a08-ba71-078488709749,1065234c-4758-435a-bcb2-0940e2f47f14,46b6158f-94a8-4205-9a7a-18df2f4e08ae,b8d4de76-a3f8-4812-8ede-5627ef3ae4b5,6c1bb59a-7922-48b8-85a9-54cc398c8cad,b0f2fe0a-6925-4f39-9327-f08bf475c358,4b1c7688-6b48-4b5f-b31f-f522e152f4b7,2f4fe9d1-09d1-46e9-821b-b2d370483cea,23b0ce8c-2556-41ca-8f98-46c7ff74e589,de840178-fe5b-4747-812c-fdcc5dbefed1,7c3bd52c-9837-45aa-bf0e-7aadd8000200,e756a9c7-e0b1-4cc8-b96d-63369d436784,e3c014cd-a648-4329-8b5e-4b94ebff8177,6978b06f-adee-420c-a599-61dd53f1b39d,b9bbb05f-b1da-4b3a-8981-d539e5e041ce,66743db4-a6af-4174-a135-c0e45c20e482,1e41a20a-a1ef-4438-8f7c-4dde959ed4a9,e1c5374d-c77f-48d9-aa2f-b0df1a2e8b8a,67f438d6-cafc-4fac-b76b-d2d115e4b106,38f00030-51fd-4495-b781-addeedbb76ed,929f52a5-38de-4a64-8ab9-4673f9829a13,7ba094e8-5951-4124-a7c1-59fa8b2efe6a,2a478f25-1212-4b4e-a968-d041428b67a8,4129a249-90a3-430b-b02f-309e34c05c97,7adebd2a-772f-413e-9b93-e5c987a568b7,3297073c-61c5-4fa6-887c-b5bba498699b,bdac0a24-b6ed-4090-a902-256568d7c243,9b4c46e9-289c-48a3-9356-fc7f6c96d08c,620a1fea-3ef7-4f82-83bf-c3ae2fab1612,361dffae-d36b-43ad-b88f-40429f0d1195,6f83deb9-66c8-4dca-9aee-ae9df92b0972,d5e8ba55-f5fb-42b4-ba19-b8a3400b26a1,8881153b-d1b4-451d-8f34-7f3d6618b978,1a1a0b09-97fd-4a9c-bd12-9411709b121d,84b48465-fdd6-4d0b-8061-310d87c2313a,e5bdb5ac-e345-4cbc-b21c-5e5c9ce8a295,98f06c79-6c73-4de7-ba0b-d1e5498f9d7f,2b337963-aa00-4975-8d21-d8680064ab8e,d60cf5bf-548e-489b-bad2-29bd1aaad232,485ebdde-e635-4a86-9e23-7aa67f7ca209,c29fc295-546d-4929-8789-c55975ea9f59,d2211dca-a2cd-4b8c-aa67-0db28a78510d,f6c27e83-cb92-4238-a485-585b095d0bcd,db495fd5-1fa5-4c35-a8ea-4e8ab5005c06,f5817cdb-2fd9-43ea-b50b-d230f5b9f213,005525c8-e62c-48cc-b94f-db8b9b784a4f,7a33a8cf-9332-4a2c-bbb0-b5f4f5aab610,fc2f68e2-a5cf-4b23-9964-a66cd563820e,ebf4458d-3f74-4123-8bd6-b4d897b69d6c,c3f7efb7-371e-4d23-8fe8-f6f0d3d82a82,3d092be8-5af6-4ecc-a94e-087408d6b4ca,898e4479-3218-4eae-a832-b9fccfae0865,715a6797-d5b7-4d76-b9b3-0211fdab8a3e,74c1fba7-c0ee-4406-8892-7c7e6cf1b6ac,564e0d94-2108-4b2d-b3a7-943ce62c92a0,386b9532-7efe-4c04-8f06-543b06dd27df,c91dfcca-4d74-475b-b83b-c62ec6230efa,7c8b53b7-b2ee-4910-88fa-d71f53d7f983,620d8e4e-971c-4391-8c77-7ff49a142692,ceda3857-57f0-44be-b2c7-b138834499af,88d9aa93-21ed-4158-8c92-f3638237b496,87d1c33e-d855-4664-b0a9-da3022e9c260,7f0156d5-785e-4a29-bd4d-cbbf65a73280,906e5e52-d5af-4d6c-85d6-8b6d0fae9ba0,4b72aee0-888a-4b70-ab78-642d0ebc4f70,f6051d37-f24c-43e9-9b8f-fe5ca97482ab,1831b5e1-c9f1-496d-9c24-9b64aa0f61b3,0c165384-151b-4db5-a70d-aac3b149c230,1ed04d7c-8497-40c4-95ee-9582157a2a61,57d8bdc1-774f-4bd5-abe0-8424664ae53c,89986977-c29f-470b-8f6a-e24cf69afd6c,d0b7ffb1-6b17-46d0-a40e-2ae864d53408,cf6aa36c-2edc-41e1-b006-1be41237770e,a4362f0c-58cd-4f21-86a3-82cf101a5696,55157cc6-a50e-4b0b-afe7-4cbb0a1d30a9,af242079-5e0d-4507-a65f-bdacd4f0e55f,b9a4c7ca-b078-4cea-9703-0521df732379,e8c16f32-4ebf-4016-85a0-3779c9fd153a,dcd1ed5c-c3db-458a-be10-f2ddcb223fbc,ebb71400-a19f-4040-8e72-061d2a8218a0,882fa42a-a42c-41fe-b447-01abea1d2115,0a56b223-7ca3-4e14-83ea-a4b27d59b451,f7125003-60b8-44ad-a7b7-2863d28642b5,a9e2b0bd-0fa5-486d-b417-4f32635ee956,37c1730a-351a-4f04-a83c-3cfa7ca78aed,c7be82c9-09ce-404c-b775-2131194334ff,d216f8ca-9385-4baa-98de-0f34036337d0,6ea42c14-fafa-4deb-a211-6d6d6bc1d95b,3c69b00c-3204-4392-a65c-57c29ac4512f,90090202-3260-4b9b-8bed-dbe8ba7ee0df,8106c15d-cb85-4585-8ddd-b2e6d846a455,812121f1-1cca-4271-9308-80b85f57fdec,038e7db3-a304-4ec8-a995-f2cae9443022,58075261-c7a3-42df-b09c-51613f712fe8,e224655e-af4a-4d1d-93de-9b2c08f3c25a,fb5ff29b-1542-46d6-8fcf-69ec7552f82d,5e32f949-ab1c-408d-82dc-d380fde9269f,72a331b7-960f-452c-866e-a431e8fbe9cd,66176341-edb5-47c8-af36-c4d204e5fd27,e37dd0cb-7db2-42d7-90ac-0a663fdfec46,49257455-5376-48de-955b-e5ee1baa8ddf,ab9cfc33-9765-483b-a664-0d6fb9637b5c,6c826695-1302-4019-989d-318c498fb478,248e133e-b91d-461b-aa20-8643f2742b67,3ebfd017-94bb-436b-8343-a78253b650e5,340c88f8-7f47-43f2-a350-5cb10b33220d,14b9d05a-0f47-4836-a799-cf8e3597157e,b55657b4-51ef-42be-9a26-d259eb638589,1f2c8767-9cd6-46e3-8fa6-894bb5e0c99f,07e2e242-d8f1-45c5-86eb-a42d8a93bd84,ca13f3b8-684d-4ff0-ad85-6799c925512e,13ab9867-22b2-43e6-8e53-b2858b7c9f12,4fde3ff9-dd5f-4e3f-9733-c523e3448265,d3e4a8cf-f64e-4225-a61f-e2829129bfad,43ec746e-2590-41e5-972f-ffa382e6d355,cfcbe7eb-d9ee-4b22-b790-18cd6fa24f2f,c7eb5d0b-3470-46cd-b9e1-bd6aa5f2951b,aef5ce5f-9ddb-4614-8ef5-ba6fa9f4d445,5dd43abd-5176-4a59-b935-af8be6b5b4da,6ebf7471-8d49-4233-8b3d-edb202eb55f3,f30f8edd-f6b3-4ba7-af7a-d0f7089ade98,ebc669a0-7f56-44ec-9e39-3d38baafb5c7,c9002a85-70ab-41a1-8a6b-ffda3196648f,5960a5be-c147-47cd-a8f4-063008ec1183,c5d267f6-bb0b-43ac-9e99-c456551a3aac,bf7754a3-d0fe-4d5e-998e-1c6576493a77,df15ba2b-ae94-482a-ba33-687445df8906,3012541e-17be-471b-9cad-67f4ce381435,6b357a39-d7e0-4f0a-bd89-4b0756f262f3,067754a2-8a93-40af-918f-95ad7c3d250f,437e1a49-b6af-4f36-b7ac-45786c4731b4,6035d108-046c-44a6-aab9-23a34756ad1a,fd10455a-740a-4222-8ed6-4c35a5f5d7ab,f5483f89-357d-4b00-b2f0-a5e65b59a5c0,a3a8a4b6-5cbe-476b-ac4d-cf478a12138e,98aa2bbe-22cb-41e8-9502-94b9118ad308,531c457b-281a-409d-bd99-5ea62440d2ac,92cffa09-510f-41fe-a252-0ad22a3ddd21,a81ddc19-41cd-4a95-ba39-a6cd23c5cffb,36fcf685-8291-4bf2-8219-7d2e4eb0bf51,9326c9f5-f13b-465a-84d3-fd235cc535fc,5a50dd9a-29b8-4434-a90d-c0147e6c8ce2,a191f7ed-4293-45d0-91b4-7615641d76c7,516e265c-a01c-42d7-86d9-8720317d0562,3984e71b-ef17-41f6-acc7-c73038b80a23,b0c26a32-a220-439f-a77b-16b06bc63ccf,57ada453-e956-4b2c-bcc7-1f118f4a5685,bcbcec0a-3e78-49b0-8cab-f9ee8dadb13d,c8418f14-401b-436a-81e9-307339101e92,cd373a16-edc3-4198-8c77-6b0d4eb55066,0b17a167-9854-4c6a-b141-513df0859a51,3b2aea47-8368-4d36-8f74-55ad1620c321,07efb22d-c58c-4c1b-9879-7d24f778685d,3775d38e-8e7c-40d5-853c-b9c589921a70,7de6ae59-8cb1-4ecd-b6db-f32727dd0a92,224c6d5b-cedf-4f19-aa2f-a1991419e380,222de0cd-4a59-4626-8026-9aee26a30c02,38724ee8-fef4-4d56-ad99-167a98ddb6a0,3749e236-ec0b-455e-8f2f-db3c35edf372,9a316708-7e69-46c8-a932-9436f0ee0426,34fb27d3-01be-4f32-8cd1-6b6e89338d58,b8b952b2-c47b-4823-a605-6d20529ad09f,0301963b-60c4-4df9-814e-42ba91019286,f226207b-603b-4ecd-bfa4-b4e0fcaafd5a,43fa38ac-2e53-472e-98a2-e1404f6db19b,962059f0-7af5-41a2-9854-4cd80d5a84e3,adffc112-eab9-49a7-b54e-d1b8b42e6df6,fc861f3c-ebeb-4923-965d-e41f3b9691fb,97a5ed46-4187-4a1a-8ad3-ef1e7fd5fa90,e500f28d-da21-40b6-b7bb-2a736b789196,ba617778-e1aa-4436-a431-e54f7d12bc10,a823d6c4-6b15-40d4-a803-dcc348d00158,3910f51b-5606-4210-a00d-d0e1c546537b,cdf608bd-db2d-4953-81d1-778686dc8425,bf6ef1eb-0e86-4fce-a704-1d17559ae0b0,bbe3e71f-b94c-4b82-9d84-f2b051fde907,40c8a28e-dd71-4c51-808c-10dcd5a7544f,e1a078a3-9946-4598-bad1-17cfa0c8195d,2c695199-b53c-4430-bdc9-165f46b18bcf,a18da91d-f211-4c3e-99d6-e9d58aad04f6,6dc63280-2200-4be8-aae4-4bf6a13d4118,e3f43570-96e1-4679-88c2-0fce9e9e8259,085ccbb2-b59e-4981-802c-88f0b1e4edd1,e31f3c12-ab8c-435a-9b13-27e7879a70b4,9dd2d9b6-5bad-4430-baef-7ad8297d1fee,1319bc33-1847-451e-b758-1455946341ec,dd004365-74bb-4267-a01a-49784866f8b7,3fac6c0b-f63f-4b1e-b971-ef0b4d0d7769,f6ed712e-94eb-466e-a270-a989cb9fc0f7,88e5b17a-9afa-413a-b3f7-7c6d374ec3f8,80fb48c8-890f-4d7b-b04a-b3fd5e732b0c,03d36885-d5c6-411c-a48e-3970118889f3,6ed478cb-7372-4c7b-b42b-1659dae95a70,d9f38215-858f-4668-9f31-064ab9199403,ec870eaa-ed79-4986-8a4f-eaabee73f4d9,70096de5-5050-43bd-b2fd-9093db3a5128,bd1579ed-d066-4006-b530-c90fd1406bdc,dd5dd495-7511-4a28-8ecc-a98e57f34751,a8097f00-bd67-4fe9-9c07-8303725ce73b,22a72a65-4fd2-4e34-aabc-0fd49de72394,d87831a8-388b-4b47-9b7b-7c5ab1020641,c7ef39ab-424c-4a94-bee1-0c3fb899d957,83c09520-a09f-4f77-9c30-14173343951a,02449af4-caf2-44ba-b05d-70caf7435619,e071ddd7-59e5-4378-a3f0-3a29e232dd5b,0a9f9aad-1c59-46d6-8c75-0f8baa293fa1,ae5d74ab-641c-4bf0-a0c2-3ad7fefc1efe,9c197974-4310-4382-b70f-07569a16c533,9abff918-22aa-49df-a188-69eb6a57db73,3c063b1f-dd57-4148-afb7-0c37606c0f2a,47767548-e566-4512-8092-f1ca45b35e68,daf511bb-a1a3-4482-a3a5-cd0551cd674a,fb3bdfd9-8588-47df-af00-90cbc7f15506,01e7a149-bbd8-4e64-8f08-35cbea2096a0,c684bb5e-94b0-4e5e-a666-1fc715745066,e2e88b26-5201-42cb-8f0d-6e5a7ac2106a,b60c7936-e65d-4eee-a6cf-9185a826af3b,539dd181-cfbb-499f-8f6a-5d90f29d998d,2e26f7e6-2f8d-4705-bc51-e57aa22ce4d9,eae7bf74-705a-4e40-b9b7-7916dc536c72,d1314a8a-d2a0-4eab-841b-94106ad454eb,b8d13eb9-a381-4ddd-820e-29d0f7517e2d,2ba5af18-08c0-4d50-ba05-564a5c68551b,2535558e-4848-4878-9de6-1446c4a0b329,64644fdd-df21-44e7-bf88-18f846b97fe1,caad7194-19f7-4e96-8f7a-df653e8a54ea,9f328761-9c87-45ae-8a09-b46ea64d9465,8dc30cea-1c4f-4a08-a9f3-ed9d9bf4595d,08e0ef2c-39a1-4168-a071-eb7c376ad24f,5634a1c2-8598-4218-b6e2-be0214998d23,b18ccd1b-bacc-4a13-9be3-4abf080184f3,7fb4ece8-9215-41e1-9916-ce092469ee14,c5411d41-50ca-4b2f-8452-baf9de66abd4,c5f376b4-4d33-4747-a5da-03f9ac2992ed,1c78e43c-3063-4b35-881c-b693b637466f,dc10c095-d17e-470b-bc0a-afec6517242e,568f8b52-f7c0-4572-8a4d-419980e736d1,83423c6b-e4a1-4ece-81c9-076c973aa52c,9b751c4a-c797-4cf0-a3cc-e16976936ef7,cd340ab2-d22b-4b2f-95dc-aed98416b892,99a966af-f947-45f2-a6d9-e78329ff540f,fe272fae-69ad-44bf-9ded-5f6fa8354b8b,f29fadc3-bb64-4660-be22-36b1fc94411c,7c2f380d-70cc-4c0d-9268-f99c61bc4f42,02fddf8c-c9d0-4086-a606-681b7f717fc2,79ffb03e-615f-4fb5-9cae-50232c49ca55,5ef2af10-61c6-4690-87ca-7c32614159b9,a06361d6-d1b8-427a-bc80-78fd7693c3dd,164bb6eb-b7fe-48b2-af71-059ba2f646a2,e32441b4-94c1-4a9e-af10-06a5361b12d0,f326f91a-07c0-4348-8d17-829d2d4f6e5e,ee3d8944-f87d-4d07-b844-c8ff1397ae17,c0c82013-b033-4a78-956e-3ac4189da5a2,472a39cc-3a0c-4a42-8851-36962121e3a1,90bbdd92-3719-47c1-ad3b-d16192527c02,26cd3175-f13b-422e-b0b2-9fdfd945fc55,10ba5621-7e2d-4981-b101-7ec23d756eb2,1d1e6088-2993-4554-93a3-97744e1415f1,4053582c-503a-4553-b967-855b506b6702,7c9a514e-b4a1-430a-9ca8-aa3c88fa391f,ab6c8eb3-2b9e-4c0d-abff-07a7e02b2904,3bda6eef-92f9-463c-bf06-e53abf14e6c3,5f711ad7-4649-4a73-9f77-591639005a4d,0cf48b42-1437-4f16-bdd9-b61d9863058d,2b293456-c744-49b1-99f1-4760cc1ba4f9,8e5027e6-1931-4a6d-8742-9d265d5ff729,d31328aa-8f18-468f-8b0e-292d767fe5b3,c9a5fd0b-b85b-48e2-a6c8-9e684eb11300,79bca86a-55d4-4df4-b5b2-c4f96c076998,9c41caa8-6b65-4796-a595-c6751289fa43,d80a0254-5033-4833-b557-d49585b01006,c7be0d9b-67bf-46ee-a7ab-4214dd4a7cca,c05fd1f2-9ed7-40ca-89ce-df4d819c99a0,64e48ec5-253d-4bd8-ba9c-45a33c72ba48,f63ffdf0-b15e-4de1-8365-58ef7c4eb4ee,af6a9f38-1c04-4864-bcf5-5d68e0ec788a,797d2fef-e364-4a77-af83-a72a4de1f3cb,67d3ca73-2419-459a-ba62-cedbf256d330,229c9529-573c-4ef4-85fd-a24ba999f473,f1ea2cae-0e89-40ed-aecd-4cb7bb0b602d,46a52325-d0c2-4c4d-9637-9068d6ffefe5,359e5ac8-b07e-49ee-94fc-ccdf94a08286,52d9848c-2442-4a0c-b218-bc54ce516c74,7d46e271-6c85-4e62-95b8-ba5718925678,30a0609e-cc53-40fd-ba9d-5b364925f466,72ad0861-25db-41c5-af77-e476f4668501,0e14c124-1d16-4b22-8686-f8bfefe7736e,37db5f66-fd0b-40d0-999d-fae7e8f85bc0,112df099-d213-4407-bc2c-dd611b070724,97becffe-b061-4267-8c32-83a88192228e,d8a62dbb-0527-4b20-977c-92fcf26a0079,83bf4e4b-1804-4473-9f09-5b0a59dbcf50,1032d583-7da7-4dd4-b21e-00912d419c6b,295634b6-7ca7-4645-bb51-f00df280e8ef,bf9c7f94-cecd-4cef-ba57-25c6f2e4443a,1c79f128-c512-45c0-99e9-ff666d32aa13,48e64116-0634-4ac3-a2c5-5a7b5bf980bc,20aa12fa-e36f-44be-87e8-dbb5cd2307de,d7e38978-2829-4386-a60b-2eabd99cfb08,89548c9d-9016-434d-9778-bf70f22e4513,a0e9d651-ccd0-4b03-8594-7997403b6ccf,a39d06a8-2f1b-4174-b379-75d5567c02e5,968f8480-b767-4821-95ac-397bb7560864,36dda54d-c218-4e04-bdce-a367ba6fe34f,79c8837a-5a1f-4b4c-9399-425c3069f5e2,afe669a4-92c9-4157-8f48-2a62b590d54a,03a8c88a-2d5a-40b3-a5fc-1ff509d7946e,4dad99e1-34ef-4c21-86f8-1e9726b0a1ec,40ace92c-93d9-43d3-8c05-a7ee639f3244,24fd4aaf-781b-4cab-9d0b-a17ce9a82eed,9101f630-db61-4134-8e1d-c817e032e1b9,0db1c265-d0a6-4688-9b31-5aa855f6c901,b18f663a-99b6-4bd5-9334-cf0018afae12,3cdf9b6f-b9ae-4b0f-a33b-74f3bc7e9d0b,7986a896-e1e4-4992-963f-bac3b4437f21,730e1a95-4814-4767-9253-6ae237a6891c,5902e68e-445e-4fb1-af82-b2b16bd6133d,98170838-4521-4490-9f9c-ef2d584fc974,31044256-52a5-49d9-8f3e-6b2efe88b58d,278bf1af-2ea4-4582-8fe5-26be79c95afe,08bc8b10-ba8d-49c2-85b9-e70a1b846d62,4519d277-c2ec-4e74-9592-e48af94aa243,314421e2-bc7c-4f1b-8837-2e9a4ea3c231,d391d951-4c99-42c5-baa0-8100980a02b3,e4f3c89f-e2df-46ca-901b-3f79874ccf0b,197dc2df-4043-4f68-acfb-761c63845307,baf35519-f46b-4fd6-befe-9b6cf67230c4,ceb03c10-e121-4d59-83c8-8c76d8d21bfe,20ea2d9c-6fe1-4be5-873a-90f149598e5a,70e9cdd9-47ee-41ab-9b0d-c0b069d9fd6f,a7bdbf6d-1606-484c-bc14-a73cd8375375,35abd457-69d3-4491-a997-a40bfb560167,8d0bd59d-60ab-411a-b4ca-1b391baa7cc3,4a98e9ac-733a-4a0f-a72f-dbc5ba49be72,4ccd1e92-b5ec-4352-b25f-117141b8562d,f1cc0e1e-39af-49c4-a4e0-a8323b53a8e6,4a565c58-3531-453e-b62d-331ae1cb7a01,d7c68ab4-69ab-4a45-a977-258b529ef1a1,3d34e49d-47dc-4ade-b2f6-54b5773e2049,614ba762-40eb-46e5-bef6-7059c80eba0e,47aef4f6-9000-4f20-bd7f-4648dfbf64ec,04e8dc35-17ff-4cd9-b627-6403c97e7c24,217b1c2d-90bf-463c-b7fb-8b97fa80b814,7558df03-5987-4db2-8767-bfa9e466dc5e,a5ce99c2-8805-4b30-b6eb-61f5c5bc1f9c,dc17a9dd-783c-446f-b7c9-3fdedabe2544,9db310bc-da6f-4201-9cfd-b4cf68c1bd12,b7a47003-f772-44be-ac39-684ecc5d456b,e8a5a063-2d6b-46fe-9010-45153e8f1494,6167c614-1fdb-46bc-9bd2-4d535ec9eb38,cf41b359-5e88-4182-8ee4-9b6e0ad486d5,501e35db-7ac8-4d06-9b78-ff0231622080,ca643073-dd7f-4d84-b39e-f9afbeed1cfa,efbee04b-2d03-437c-8471-5889ed821f56,2859f499-0d91-451f-9826-86e63a31b5f8,859c119b-1779-4da8-a4db-2293dc0db1a2,c0edf92b-437d-4437-860b-3165f0460fac,27b8191a-07d8-4f54-bef8-d56483dd9c71,c773295a-9b72-46d6-b5a5-41fc8e1d1c46,20d95a97-6342-44ab-85c8-94cd21d1960f,6d816f92-64f8-497f-9a6a-d37d5bd0e65d,4b52fd82-d8a9-4751-89dd-3e826e9ccbe3,afeffc06-d8c4-468e-b0cb-68ebb8fa4a2d,d939cf24-855b-4fb5-a98c-4c3bf8dd096e,8792df26-e469-4fd8-afda-1bc5fa28053d,a5117a69-9a04-42c7-98e8-b0f37faf55cc,f73f072a-b402-47a8-ac7d-fd5a53a83558,bd9bee12-d5e4-4f9d-9894-7359ba0b0c53,69a4d3c6-c89d-4436-953c-01c4739c0930,99db636d-0940-4d07-b8dd-0876ce87e666,f63f4b56-801a-4650-9cbb-0e1f891a05e5,60123141-7fa5-4585-8894-2fbeabf30738,814e720e-5b74-4500-a2f4-850b5df10443,cb38f376-5c09-42e1-977c-a1706cca9fd8,5e643056-5532-4579-9805-5833885f945f,7a3a79e1-f9d6-4248-8f27-d17226a2428e,16810206-f647-46ee-b912-92a0a6bca721,fae40935-d5cf-4113-9c5b-1040f72ade41,54c0f491-6c57-4ac0-a1db-92df5684f79c,e7ea8f25-5198-4142-91e3-01933ac5af67,7a3d57e2-7290-4001-98cd-197a30ea7122,85d48d22-dac9-4d47-9ba1-e209cb11c729,71a1cc0d-e036-441d-a199-b80c134afb9d,e0aa5bd0-e535-40f8-841f-fdf920f89fe5,48ec9650-ea63-4739-9ebb-1c2d60de6a90,f32a42b2-aed1-46f9-921e-429aefd38f10,fbfed7c8-f8a6-4558-9f5d-c3f6ef326b5a,97d9f34c-6387-403c-9135-c5f7b2c3b240,9c466b41-ef9d-4000-a370-7ad3ad9da2b0,998a554f-ee3f-4a4d-bb4f-342d38d08c2c,131c2e50-e956-4387-83e1-6c0b3fefaeda,d20a4446-1c94-4772-a182-83f35da09013,938ef8d5-ee24-446a-a7f2-76eac7a840f9,7d91a59f-fc04-4d81-bc83-8a231f3f72b2,3473671b-a7da-47c3-bebe-edde00ce3331,c11021a1-4ae2-45af-a6cc-4c12c9c71310,7e10235a-0766-41fb-bfb5-8de3e1bf1b67,9f3bd7be-d847-4dda-9adc-25ca97d6a77e,46644e28-6655-4bd0-9550-2d8e6a98f6a6,47ac3f8b-ed77-4dea-90aa-b0b409606e6d,ec7038a1-0519-4e40-aa8e-fc8b84bdccb5,bb92fe8a-ab5d-45f1-8106-8d7730c26549,1aa9f3e7-3ba5-435f-aa42-8d7ab0ce27ef,b9392fba-f41c-4722-b1e3-6894d3bd6634,57513964-59ef-4f89-9538-b7cfde4125cf,e8288bb2-82f9-4be3-8eae-cd09e46322a9,47d06162-95b8-4ccf-b9c9-29bc7c8b4a41,93872c68-c0a9-4e65-ae1c-af8dcbe477df,9df437b5-a49b-4b5f-be51-eeed333cd219,5e417a33-e0b1-4cb3-a05f-d155a3ec3cb3,37e0bb67-f29a-4d51-9f49-bad3e89e5efe,6d02e93c-3a39-4dc4-b473-75e0782b605a,3cf5c812-16f4-49f3-bb08-97be4976047b,280fbb5e-4818-44d7-90cb-07fe7f73b4cc,b7c9b07f-9846-4e5f-be4c-e7418afe8f89,9285708c-b62c-4dd2-8ba2-a0e22b3eea79,0392ea78-eec8-4579-abfb-16c61d2d8194,559f8f76-fec4-4a9c-963d-a40816054849,d1c991f4-a07b-46cd-beeb-c0ed4dbdf1c5,b9a64697-146f-426c-809c-aa6b59e340a1,7d0df5c1-7835-48bb-91b5-05132a1beb0b,5d2044a4-a342-4788-9609-8df584409824,98b79357-c314-4fb3-aaaf-2c3a66996f30,af3aef53-259f-42c9-9f8e-9498923e3100,f414614c-5963-4ec2-8846-2a81c6a64628,7d7b7c48-02aa-44a3-80b3-cfb9160b31df,da1a839d-3e68-4a2f-b1cb-87cecff356c8,a4c5336c-302d-46ad-8435-c600db739f7f,40e30c37-52a3-4278-9284-32b1f821f2e9,52307aa6-c90f-413b-b174-9f31785ddcf2,68321e08-0400-462b-86d8-11fda951a8f7,f17793c4-c425-49db-bd33-b0f6ca6034a9,bf7a5cf9-7844-428d-9767-d7aed46ba966,4e1932be-717a-442d-a747-b3e578b4b6cd,9b0ea4a4-664b-4355-b682-74a3a491b7c2,5b22b3d2-03f6-460f-a664-50d67967f4df,1b56a380-1fd9-46e8-a854-cf05a336505a,f7a05d27-06fe-4a59-a4e2-2aa3c77b4e10,1ec3a9f7-5eb9-4bd4-9af9-3462a881de57,f6c0d806-272b-4119-9dfd-f6ead4008336,681ef2a1-3340-4cb9-b923-aace0521af8e,76cc9c82-da09-4b6a-876c-d6be6f6bdd61,ac2d3dbe-b94d-43a4-8aed-c28dd3711149,c43c3299-6a11-40b9-a605-884cdaff0deb,9e4aed27-c16d-440f-a853-9fa993d62013,7ab239d3-b4a5-4021-a651-e537fa9f54ba,21dbc628-31ac-4ce2-99db-d79efaff5e23,4c534ddb-0956-4212-8b53-a9fe68c41753,d6e70d97-c779-4c5a-8090-c78c2d242062,671e86a7-2e61-45f0-9b70-afc2cc527633,4b6c424b-e6dc-45d3-9e5f-526d01bc1cc1,e5607746-ebfd-422f-b307-a9fe7853d759,f2a4dabd-605b-4cd8-8844-8321f70c2246,846a361d-a8fa-49d2-8244-64d6584c0da6,07ee6939-5d22-4185-8cff-f0c14a798b78,4dce42e3-7e4b-419e-84e3-5a3cb3044757,83b6a5f0-06a9-42e6-ae54-b63bbb0acdf9,4766e4ec-dc66-439b-9b8b-62bcfb020e38,77885fa3-3a48-42b4-befb-b8243b3c89bb,5ed474ba-7d5f-455b-a751-f91bdb109cb5,5a9bf577-2940-4bac-933f-d7a01ebf86ff,93f4d3e5-fb99-4b04-8102-0cc8648966b0,736d0a44-ca89-44e6-b5c1-be153f849177,a37be9ce-eb13-4250-838a-d528abb0a471,110e2014-a721-4466-89b6-5c92fa939f81,94179e28-9d6a-4bdb-8b03-0d439948b509,283bdb2a-b4a8-4f96-bd13-c913ac405070,e1d2c5c9-8c52-4913-a30c-6c0940bcb99b,5666e235-18b8-414d-81eb-bfeb682c50b2,8d4088ae-274c-4bfa-ad18-1f5295b27d0f,f2bf92a6-5d9b-4338-996d-3b963cc1f8f8,3fcbf7ae-45f1-4f84-bfe6-a32c77e842b4,fb00928a-5b85-492c-ab58-9d9df16b7043,a6281cc3-38cf-4772-9d1f-65179c1b5f27,c647c711-a2dc-4908-9798-def8d8b5a8f9,d3b8fee7-5f65-4268-aecf-fe77ca9604f2,4ff90823-2967-4aeb-aac7-b9a719711f57,8c8a7224-397b-452b-82d9-50b52e3442e7,ad60c477-4b7c-46d1-bd60-bc3f6d7f9040,fda7770a-dfb6-40d7-93d6-7f4de18cd06f,e0c15bee-bd84-40df-976f-5b0da25d1166,84276fad-4bbc-417f-9d27-e45a9e3c3e31,d6f6725c-049f-4771-bd01-f760c162f96b,0bd5e8bd-dda5-433d-ba73-d5e38d9d3ba1,349112de-3811-4337-a074-697a86994752,c5b96dd3-cebb-433c-9028-e2527787946a,a12acbd4-159e-4abb-a037-40ac27eb2aed,4889bed4-5fa3-4ab1-8412-7a40235e27a4,b8c06568-be94-430b-b389-18ac4251b876,5cbdede7-a561-479d-81ba-e5ee14f88ca1,a4cf23e1-742f-4f0f-b58a-c5a9762d3f28,5215b988-380e-4822-a6aa-5cbdee9a2c24,f16c7930-283e-4a20-916d-57049a18546d,8a43ea3a-04ca-4cb8-a185-b752729da1cf,69b5742e-542e-464e-b694-843008481c85,abb62803-1dec-4c4f-b1e8-56fd5d8dcaac,d5751a0e-7bc7-4060-8aad-148d009d8116,aa235aa2-a5f3-4140-9a8b-54e5019a11ac,768f8cfe-1684-4bcd-af78-955392e2c73d,d26641ac-3263-4c6c-b62c-c173320f4043,3ade5c34-2710-4bf0-8115-4175ca3f64a6,d9b9828e-e720-4d44-b542-671b481cc0d7,34b8a82c-89ea-439c-913e-bb2ad180a0d2,d7e7ca21-429f-4501-a1c0-47455e2b684b,b72179cc-7e76-4e44-b5f7-eff314c7d6b0,4969f4a2-2b2e-469e-8f11-b2d16d538db3,9fdb532b-c75c-4cda-a3b2-8efe1f4f332f,23afd3e7-3001-4ba1-8e60-372175d78a90,a10f0671-1a41-49a7-8bff-a0e4fe3eb373,6952a409-ec6e-4d9b-84d8-448029554cd3,7878b702-07d3-4cb8-a9c5-fe191aad21cc,3a649455-08d8-4f7e-bfcc-8bc2a1adaa0d,e69622ae-5bb9-4b2b-8e85-85afd4fd17a6,7b946bd2-ef2e-4bd1-8ee0-338be518bcb3,7ebe1ea5-70e2-465f-913e-f49e77817114,8a96aa00-77a5-4160-82d5-34b03167cef6,62a3307d-ec57-4a60-bdd7-4995c94b4d2c,73967be2-e321-47ba-b67c-605258f1aaa5,6461799b-2ff6-49ef-8f39-e1aa7a615f60,97e043ff-2dd5-4c5f-8cd4-9f401471749c,c3148b0a-46c9-43c7-bca2-dbd1fae9b9c9,b902a2d9-6223-401b-8ff2-bc60a49551c3,1116bc99-375e-4088-b40c-4640ed51d707,6f76a634-afdc-4f51-874a-2fc4a3aa2b7c,6cdb91ab-e661-41ee-b35f-c889df0feffc,dfad9430-d902-44d6-96ba-057afd6e9ba0,333a1cf9-9767-46ec-9239-efd7c33c372c,2780adc1-4a5c-4429-afbc-b4419aab00f5,90b8e5a6-7471-4efc-b253-63a753407666,920b9947-9b5c-4db8-8bc3-789416e60794,28a78409-4a41-4866-b554-1a2bf986127a,d1cbbd05-af16-40fa-b073-59a5ba616a36,4f6c7daa-cbc0-4eef-93de-a0ce4e7d3619,65b4af1d-68ce-46b2-bdad-6d5245138a79,b9f3ad3a-3da1-4257-9fd3-eceb0b6d81fe,b13c9312-4a8b-4e07-aa3c-84770a9475a5,77d429ab-350d-4d74-a836-44d963852267,2f520fff-7c64-41cb-8fd4-797a1c217699,b5037d40-111b-4eb8-9b70-88dfc2696dde,53f63377-30ac-42a0-b907-8bbc3e4f785b,fbd8ea54-90f8-468f-aa24-8364343d3aef,94c48e31-db3b-4fc2-b2a5-a838a962c138,f762a62f-b272-4f2a-a561-d6701d8e0384,1a7b489f-dd11-41ab-a63a-c88f83ecd1a0,f05c96be-6954-48c7-8926-4664328332f8,e3daf6ee-54d4-4ea8-823b-fe07ef305042,2b0b1e1b-f889-4f28-920f-288626f20423,4b689974-0baa-474d-9e93-41c06c357ed6,2783e83f-56f5-4ec0-bbb8-7b72d5b713d9,e883280f-7f83-4b28-83bd-c8954a23f772,523b96a6-494a-4713-b68e-cb398f6c32df,d272b8fe-cbc9-4ef5-99b6-2233d3b90014,ea23e59b-2f56-4099-beaa-d845fd6b728d,a5452c59-c660-4f22-81eb-6fc4c4cfd9f3,f8d0173d-37a0-490b-b783-38b1ee43e76f,7edfc08d-06b4-4d18-b7f7-82e94466acbd,a779065f-f48c-46ea-897e-9fb649a91606,0043eb6d-b421-4368-877e-ec2337f0c4b6,13bda692-bc86-4174-9138-18390166529f,58d561d4-d635-4879-859b-5f3451039490,a30882eb-6b6d-4067-ac03-0a294692be61,d8242a1a-af73-463d-830c-3cc30d0cda38,c15df5db-eda0-4cf7-8774-b90a3cab146d,c3404284-560a-4750-8702-8ccf2bdf523a,1dfad329-a7e6-49ec-b0c7-a6b767ba19c4,5eb2b0ff-ef70-4a9e-9729-ff57c083a0a1,cc256c53-b096-4f4b-8a08-87a23fda55d2,c8d48a04-2264-4f09-8193-ee14d21a035c,5df0d584-ba45-4b1b-85d7-21eada44cd9f,33401a69-1fbc-472b-af7c-4746bb7277ec,4e098114-412a-494a-9d94-718d28a1db5c,653eb115-0761-4ec8-8aef-af35a6741fd7,d7eaddad-2e49-4fa0-bcca-a02da3489fdb,1e05f12b-bba2-49b2-8da4-aded139c989a,4bd8a015-ea8b-47b7-a0d1-a80a1d0d0a75,103ee4e1-5fff-48bf-abd4-12483565bfd0,5b88ca56-146d-4ee3-bc1b-65efa00f00c7,67d19656-1c41-4cae-a7cf-3fa3f5f7cf4c,aa8d8b88-41e7-4a3f-b609-61be3d205e86,97d201f5-9421-4512-9361-f890f6bfec8e,2428ff9e-d601-4240-8c8b-851df507afc2,4ff2c514-61c3-4fcf-9be5-7771914d13bb,79664c52-fbf1-463a-80af-cc5793f84e81,41acc8ba-722a-490c-9e62-92a0601957f2,5a9b449e-a08d-40ea-bc42-8cfd172af9c9,7dfca409-a631-46ca-a78a-24bc65664676,76896308-3bf9-4a06-8739-c964b30be66e,71012cbf-b043-46d8-81e2-4d6fab327ca5,eba993ce-cdb2-4a2d-85be-ef3670c819c0,7e4494c9-668d-4ca5-b094-9aeb017fd3cf,bf2b0c8b-17cc-4952-8c8e-108d25ab5fd2,670f2336-4bd0-413b-9e49-ca1a955e198b,3ad01770-5fda-45f7-8621-76c3a4a4c7f4,b96b749e-a3a5-4e92-b7e9-05ee52a8d1cc,5b0dd74b-7d4f-454a-8899-5a729373573b,350b549a-3ad4-46e6-9b33-e62132a56d5b,afeb352e-6a08-4ff4-a72b-d1a4a5d47d11,9c878d3b-7553-49d4-9ee3-4c38a824649d,c7725dbf-94d7-4ecc-82b7-651784c81ad8,85421575-243f-45ee-811f-615b44a59dc0,fcbd297a-b9cd-4e32-94d7-5a50ee110725,d14dd9de-9ac5-4922-a8df-7bd51c6dc07e,0bc84fe4-76ce-4f6f-9ee3-5533fccfb162,2b4bdc2c-c3d6-4362-bc2a-53e3cceaf3a2,e519e2ca-cc05-46eb-8552-39196d7d506d,667daeed-8400-49a8-8ccc-f3410c3d02bf,2e4424ee-c9bf-45c1-888f-b79351822394,a4d384fe-fef7-425f-8e50-8eb97199ec3b,94678939-0a9e-423e-af6e-4783eab92f42,f39d1ceb-ea88-4939-947f-059b479849f1,8ddd5f16-da3f-4fb3-8345-36131bbe128a,4fb8553b-a6ff-4684-997a-9f0027b4118e,f7d36c1c-9c4d-4b72-94c8-3e7d2f32b6a3,f58ac6bf-7af3-4454-9644-84a75c453df7,328cee82-e7df-46ac-bf24-0c7d824536b4,f37c6176-5354-4f17-ac4f-17f489caf4f9,45b41b1d-4c5c-4985-9de6-e5d2816d06e1,9e700e63-fae6-446a-b130-2b0f26513b6c,7eed6988-8530-4937-9ecc-e23b40e61d70,bd73da80-00e4-4951-9dc8-0495dfa57f08,cf492dc2-b2eb-4539-bf48-1eea489e8244,5a2f4729-4481-40ea-8a8a-6f918280b744,52eb4e93-aad1-4cce-b91d-8c28cd09a3b2,a4c1b836-1ee8-41d1-bfd7-8957426a2c5b,78525e32-7951-4694-b71d-f3cfe8aea1e9,0b648f24-38a4-4f57-bb92-67cba9a550e8,b1a721a2-2829-4125-a1c2-e5b363daff02,038a36d3-bee5-4d33-a01f-346af97a9f65,b8eff1e2-dd1d-455f-94a4-7638923bff32,0b4625f0-8394-40bd-b71b-95473e9632c2,4fc15c0f-5d5a-465d-8713-b3fd47ff75bc,30fca634-b367-4f69-b54b-520801e79012,84dbb79f-606e-4aa1-8e7e-4b33e79586fe,e4c3a8bd-63f3-4e79-a67f-de1c1fda6237,0f281746-b559-4331-a5f3-1e2d6262be16,c1b29240-865b-48ec-aaa8-71e2a0823662,7776a069-53a2-481c-94fc-0e1fdc6f2907,29e3f01d-58f3-45c0-a478-4d269e3b9fcd,3398e2de-9d38-4a76-b59c-7f3870cca347,c55ff0b8-8680-400b-a57a-e9d4fe889d3b,1b20dda9-c452-4ec1-b5bc-0309789d8115,9899a043-0e8e-41f4-87bc-1d550aeade1a,e966b5fb-ccad-4b1a-8f0b-881d0ecea77e,b6e65a6f-60fe-450e-8aa3-db534f0ee017,71b2a6bb-928b-4c7a-bc0f-9aea99884dca,da7c9964-8861-43d9-bddb-534686821801,1ee8103d-507a-40ec-8d76-8708f2684e09,180dd8f4-1a2f-467d-abb7-e259fef3ccf0,b6218772-357f-436d-a758-2dc89e94acc7,c70c2ad1-b50b-4b55-a35d-b3fb6ab1258a,8b01d466-f59f-4308-b149-67c44da2cad9,cb840bf0-977a-42f7-a5b2-ab3914de26e3,7bda077e-0671-4cb9-96be-a750273635fc,b3e38736-d119-4b52-95c9-92da747211a3,3989277f-dd80-42cc-ae3f-7a0945c30a6a,6be9828b-7abc-46e5-bff7-40cf12524b9d,0921189a-78e2-42d1-88c3-faab6d22c795,74381a70-dae4-4b28-a74e-91f2fd507d40,a16a5e5f-3e15-415f-872f-b9856720263b,174886cd-35f2-4604-aa9d-86054fb5f0ba,144c65a7-d710-493f-b887-62905ef4dec4,7408d5eb-bb31-4cb0-bba8-89bdf0086de1,913ea8db-a925-4a68-8ce9-efd72e2d8eb5,53c9631e-59b1-4aec-b273-61c16ae1b75d,81de0ed0-33a1-4547-b3fa-c87be4053f0d,2f9ad2f3-b990-4f7e-9462-7b524175eed4,8baf0c42-7455-4e3c-acd5-bbf885c59569,0dbd7bc2-a8bb-4883-b023-9ab607a97dd0,96d9f3ac-76b6-43a7-a857-e54b8f94043f,886c1813-3c5d-46c5-9612-c0993c214fd4,cd4cc138-f059-4b2e-a894-d6c10e9063e8,c1ea0c97-360d-4a89-909a-550128b1ac18,18a46e62-f501-41ac-98fb-69347503331f,0d7422f1-4644-45e1-b19a-8af295b356f8,64c64ad5-9ea9-4e4b-8323-9930f179bb4a,d0eaecab-d5eb-43e8-a924-f1018ab8742a,22e1a640-36e0-4559-b382-3d37b003e0a6,b22822cd-0dc9-4d0f-aa21-1ffaf97f357e,1a7b3a4b-e2bb-454a-8fba-130d033e11d5,627873c2-45e7-4bca-894f-00bcff8dcbf4,f255e89c-3c2c-4598-94ab-146086e13420,9cdbbced-e604-4420-9999-3811366e61f1,7f861ff1-458c-49dc-89ca-f28efb058097,3e31f523-3239-49b3-b1a4-27e66f64b0a6,a546ee50-08cc-43e2-99be-d81f05916a35,4ab06016-5e95-4c2e-a767-14931bbeb860,b1e58fe1-f38f-4b6a-a287-917400a41e4e,3fc7e974-a463-4577-b211-c19011998b8e,925de240-7b06-4b8a-b83e-847f4bddf83a,d8dbfd33-7f1e-49a8-90b2-7525b0f496b9,21c8ab4c-d7cb-4bc2-9f87-3c0004b47675,60a587f0-b6e9-495d-ad74-46e3d5094154,b99c4c3a-0556-48aa-9503-f0db3582cea1,b2b526b2-fb1a-45b3-bab5-49628998bb2e,1b10c2b0-2221-4ce4-b972-809b1eb8dfc1,1ab69d0c-afad-4e83-8b2f-c1c6ab074ed2,491ddb04-8768-4dc2-a858-4fc73a8389b2,1e5f6e86-f84b-49c6-b66c-f5715ae71321,4ed80cee-02c8-44fe-a559-25fe3fc1d8b7,31cace95-64d4-4037-acbb-28b37a78a956,a746cebf-ed0b-4e9f-a087-26f6971a51fb,2a87484b-6825-4514-9a2c-dc6f916b62eb,ff7ffd01-ad28-49d4-a502-b8cd2d5f0eab,a9219df1-f962-45d8-9dea-aca44b06b15b,ef92398c-2056-4aee-836d-c7280a081c3f,2a679038-1e45-4956-8062-3b90320f026d,f4426f18-734d-4973-8f22-21694df6ba21,26c2ee48-e978-45a6-acb8-d7bfbd305a7c,e172e74d-8f63-43e1-b08c-d2633288a301,79a10a4e-6377-4f71-96f2-fd81c4fb3ee8,680919b3-ec53-42c0-9643-d3adb50e854f,06ff6066-c13c-4885-bbf2-f36a4c4b4621,f06d669c-4043-4ec2-9e80-3a9590577080,b8074494-cefb-43f0-bd49-04dbbb41d245,debebff4-6dc0-4eea-a40f-d964a3f59bf1,eeaa02e7-3c87-4924-99db-4e4b438ca8d4,83dc9db6-fc04-4d0e-b147-409f20880e44,712acd4e-8aa6-437a-b2fc-9580e6658808,2959b3af-dd32-49d5-885d-40138b62f6e4,f4888e58-4311-4ca5-9543-3f4217aa543f,1215a3ea-46ce-459a-9ae7-ea281027e2be,92251e30-8048-4094-8556-08ebfe247bce,d14cbe85-7c03-4c78-8c44-fe3627a87f15,fb8b07ca-bf61-4e3b-bc5c-b64e1ca21b21,4b3f8c4e-9d1a-451f-86ba-940ba66cfdb2,8e2d6ea3-c5ce-4314-91f1-efbb55a32ce4,4eae6bf3-7847-427f-99bf-7a1a7c00f75f,e14d37e5-aac9-43f9-86f4-ec0ebf3f61c2,995b4bfd-e2bc-467d-9b23-357ff2ac9f81,e23ada3c-7475-4302-8230-c1959d4bb656,a39cdbc5-0181-4c28-80d3-666c17187bc0,b980b46e-4cd3-49d2-ac5e-611054093b0c,991b5151-6efc-4b7c-a5fa-777f3eb39982,c4c97632-1317-481e-ba46-e27e7a03247c,9c8fb147-3a8b-4673-87da-cfc97a777c44,1aebdbc4-1486-4894-b06e-2fbea9455e25,f133ed40-fb14-44d8-b18f-76f4f792bc56,18588969-3a27-4ca2-af25-692f7fbb6b64,1e74ce40-e9f7-472e-862e-d0d01232fbad,989bf356-20f4-400d-949f-2caa5c5df1ea,12c24d69-afa2-40c2-b719-37b2ee59fb00,54a75e3a-5f05-472a-99fa-456ccb3130d5,842b7d4a-986f-4534-9413-8dbbea498738,0eb49853-343e-45d9-9d20-7672dc3b1a4d,2e8ee960-e4df-45c0-b424-b4c2a8c25ca9,38efde09-5a43-4e18-95e8-098cfdd092f6,c3ee9a1b-33b2-4e7b-95e1-7e4b9134fa64,3e2a624b-a751-4405-bf95-0612622f14d5,3af8934e-ec16-4979-b563-f3bbe5a879c3,7f082375-7e12-4197-8cb3-d37d53f3a0ce,5d5ea8d4-f3fc-41ac-b3fd-891be5879585,61981d0f-08a1-4465-8dfb-537e986e7fb8,8d9825ef-f2d7-44c0-9449-69c527a5c1c8,3bbbf45c-c222-46ed-a134-df6ce0873cfd,1a4c9594-7480-42b1-8aa2-1d52fc3c751b,51fa5df1-39cd-4794-9b18-b90f1a97cb58,a6e213b0-2b49-48e8-bf8b-3239b0b4102f,0e6d9773-dbf6-4965-9772-966337e25a51,940c2b02-b7e3-4a0e-8bba-c55dfd5c4c98,41594360-2030-43ef-b461-4e4d112db68a,0cd3ae0c-0ac3-44b4-a2b7-01d91d361d06,3813dbd1-6d61-47b3-a082-41e22f54cd7c,dc44bb1e-c882-421c-9d57-c335e48df818,67534e42-e042-4cf5-84cd-a3055e438956,e620b6ca-1590-4c14-b150-40fe8993c464,069d38fb-ac84-4b1a-81c3-69005452904b,40e9b22d-4afa-45aa-af13-1c1f37eba67b,409af3e6-d938-4b9b-b1c4-75b86d96f564,e93a8fee-6c45-4adb-b12b-58f053af4d3d,0175fb21-4d84-4d8d-940d-d03c99b67d09,394c169e-6fd7-4a3c-943e-75678410970c,41563ab6-795f-4394-9b29-aed954bb6061,a6454876-c664-4343-a95b-b868369dd22a,6509f991-55f8-4bfa-868c-37c7c5702781,e079cca7-597c-49e2-82b4-80b610de1fd2,a5baf192-9f80-47c9-acf3-d9f0a90f9bbf,e9dabcb3-5e05-460f-80f4-184c2bfb3a2e,5aaea4c1-d035-4307-910a-d9f2104e0963,72e6135b-2854-48b0-a337-5be1c23aaeb6,5319f485-e2ae-4de8-8a8a-70ebc8e08022,8991b221-bd33-4d1a-8d70-58bc605af86b,d69ab158-a4c7-4319-9d6f-74de9566e628,4073f9dc-ffc2-49c0-8415-684247774aa7,94f52dbc-8fbb-4e1d-af85-ec119c0088bb,60ddcea4-5ae7-468e-8507-8f0bea06af0c,059e8fc3-b206-400a-b576-40da25ae36d9,eb06354c-aed6-4caa-bd90-edc545ccb2f9,890c5a29-e082-444b-bde4-7164fcc6f15d,6b722ecc-6871-40c8-a975-7c961c3e4ada,b9d4234c-68fe-48c1-8a83-2b1e138b773d,0505cb35-eada-4f68-93fc-63d16c800f16,2e6d2272-23ec-46fd-aaf5-adbccdb3547a,a4d488d8-970e-427d-82b9-19020315505b,d04980c9-91b2-4635-87ac-b8fddfd59187,4a54dc68-b7ab-436e-894a-2019aa689676,e0996b53-cb82-47b0-803e-9bfd42128198,afba2f44-6178-407b-9549-ad5e0329349b,dbbf68a5-2b48-48c5-bf43-30322235c678,4e9b8524-a488-4784-9161-2622938fbdd0,5d48a928-877c-4136-b3ed-8e3bc2130d2a,b27aeb6f-396a-4d68-9849-e9552df10e75,9b3706da-60fb-444d-a38c-a3beca85f7a5,67cab2fd-a50a-47f1-b798-c864243c3901,a2c9ac3e-e541-4e6c-8f8d-5e89168c9f7c,c074b15f-f0ec-402f-9d62-32a3d2c9966b,d64dd4ea-bba6-4ae9-a06c-03abec0038c6,dc389bf9-ff26-4e4a-b1f6-b48cac988eae,8bc7495f-f542-43a0-ac0e-daf08f39e125,3e21eef2-7dc9-49d8-a517-dad61d82e569,1541ca04-2d55-4e64-bc23-5d0a3c8bc0b1,9e19f7d1-a1be-48c0-82c7-cf03ee39fead,8bcf1b13-aecc-4d38-a84f-ec88481798ef,7b1c5d11-6a68-4cf3-9714-2a62c77e46b9,8cf33636-efbe-486d-ab00-26b03408b5e1,ed49f1f9-6f32-4e34-9fbf-2562ead13bdd,da5dddbe-6cfa-4c0d-82ae-737a525139c8,7bc30647-9bce-473d-b7e1-0e6661dbcaeb,bb24202e-8b1c-4735-9c82-20846f6082c8,58d531ba-4dd2-4620-b861-35668082bcf3,653cf8c3-6ae6-4ab7-9145-0b3bed10cd22,0eb9ffad-e0de-4180-bd7b-a12fdc3b1b5d,ab399735-5d9f-4561-a817-72d7415d6bcf,edbf5039-64ba-4081-b9c2-e9a7c02abe0d,d8b0e9a5-2256-4d4c-902c-241ab1518c4b,3070ddb8-4d8c-47ea-9c5b-56aa4da0b82c,a63e9979-2285-4834-a611-7a3fefb242f1,99a8c28c-c44f-4b7a-984a-4a08bd349587,db69311c-d59b-4049-85f2-38b448d89a42,6b2ec1fa-f2fc-4b92-a618-b5969b226334,880fc9a9-dd97-4f87-8130-5d4c895b778d,f83d3259-cef8-4f03-9876-966279fe1a01,b3c980f5-77be-4888-90e2-d234a7911822,2bd65dd2-845c-4fd7-869f-4f6ddef31b4e,237df8ae-4247-4aaf-9dbb-3ed0f7ff169b,08d99361-dd47-49a4-80c4-907fad99c60b,a81ed5c2-db7a-416f-aeba-58e00ad8afd3,83e91b8e-6eaa-4160-b9f6-20f25d7ddf4f,586308fc-542c-41ec-8786-24b21db7c878,f78d7c2a-dba0-4185-b7a1-4deea1e466e3,d03e15b0-572c-4f70-a872-eda572e5ea23,639e3984-1733-4c83-bf0a-07e050e02530,291d5b31-bc74-4675-98fe-afc730ac58cb,274c02ab-f477-40a1-b7e4-1761f41837f2,e78d77f4-d629-48bf-9e4b-4e1457866314,46e1766c-1d9c-4e64-8ed1-b7eb5dd5d84f,0ce12695-8588-476f-afe3-1f55d191472e,24f9434f-6ca7-4f2f-8108-f281f6302f85,910d76cd-adbc-48a0-bbbf-f31e7cfd1d91,a08b9c51-513f-4926-be77-87492ade44b5,43e586f8-59db-42ef-a9b9-fa0e5b7e53da,f8e666dc-1f2d-485f-accb-5447a5b978ee,04c6585f-5d0a-4796-a639-58419433681e,6eeea0aa-7d51-4b8e-a8f3-6120095f1114,8b83f76d-177d-4e88-87e6-83e0bbaba765,399b2832-cffc-44b8-9b11-5b9064e223cf,afb50cf5-0e1e-44a1-b381-cbdb57005c52,818264d3-fcbd-49fb-80ae-a5826959598b,4e1ad588-987e-4315-af47-838406766e6d,4db96db2-3f89-4908-a52a-614aa0c29c72,b7d91f85-9e67-43db-a727-ab0d39585c7b,4e42a72b-26ad-4f15-867f-be95cfa38a36,42332bcf-e4e7-467a-8bc5-77f1087b430f,d3741f54-16f2-42a8-84c4-3a35be17cc56,4f6da2fa-c919-47a2-a00b-f62582ce8a3e,142845bd-77b3-4f39-b703-331fe418228f,2e9a5419-12e2-4bcd-8e93-cdea72bcceb7,fae599f4-8675-468c-93ca-cb9bf272f87b,06fe2690-14cb-4fd1-97cc-9c5f95f16fff,10ca1658-cef1-49ab-93bd-d00f7127508b,2ba933eb-fba8-4cac-8448-8cf4540f8415,9353df06-11d7-48bc-ac78-d7ed68343b70,33bcd385-521d-499f-bf37-e1bf83f16887,2b6190a8-0d5b-4494-850c-7d5338863710,05104dcb-869a-4f2f-81de-88e3c86960ba,addd0974-c58d-4e8e-824e-f3b1517ce79b,e57af4b2-787b-4fbf-bc7c-5bad8ae1da4b,be420892-8759-475a-978d-9a73fa7b9e55,8fe6fa86-be67-4ad0-8fd8-f820cbea7c99,438e54e3-90f5-4fd2-aaed-e0413347729a,8447638f-b6aa-47b9-bf6f-bc5de50b35ad,7f077245-1e5c-437c-b412-9397251972b6,2a3bb685-50ce-4ab9-b832-e77e242fe194,d96ea691-41d6-453d-8b2c-b90af6bc0358,2e5695e0-3f9e-4d24-aff2-c9c80fe8290d,b36cd394-ed4d-4f3c-8c00-989675be7163,3e427721-171a-483e-9489-af5c24d52453,faf64835-5240-4774-ac2e-83fafae5ef50,dec7bf46-f266-4b18-acbe-17dc2698ee3d,bef58aea-18fb-4f6a-8a02-d61af88b16f5,41f3ca8c-3507-4e86-bacd-ef0ddafc0f36,e3fe7192-0455-46e0-9edb-130793e354ec,b880444b-82f3-4db6-b47b-d5971f6c19e5,e2084654-1b6d-43ab-b1c0-89202e324de7,e254d2c1-92c4-44c1-91a3-f069e1783329,45c01db1-d9b6-41d7-9563-097885474728,44852910-213a-4abd-9857-41af2a73fb38,7f1f73ec-aadb-4257-ac50-7722e331e07a,f7d474b0-4b06-4788-a8f2-6a8e131c2894,91802393-26d4-47af-b9a5-36725bd6271b,0414ff35-3373-4c36-b5d4-7a6408f1878d,bb94292b-ce84-4243-94f4-2fc121d78ae9,b55010a5-6013-4388-91b9-d9681484c0cf,c9478fdc-270d-470c-ad5a-f4ef1e3bf476,c3c9472f-fdfc-426f-b842-c5c7594f84a6,c3cd5c60-01f5-4817-9191-46ea865de9c0,b3dcea29-933c-48f7-9f79-72d0db74eee8,a150a8f3-62ff-4af0-9486-81910596ffcc,f7860444-a929-423e-999e-7183005ea46e,a051966c-7042-463f-aa56-69e92faea6e8,da955f83-dac7-4a99-bb9c-d7982c93eaa8,642bef68-aedd-4b42-a113-2c61de6d3110,72259169-a232-4c25-ad93-6971ed3be019,46723011-4199-4b17-844c-8435638031cd,3dac5c92-25e8-4330-8659-3c347599017f,ca3a1adf-677b-422f-ba34-d6acaf9c876c,2d95bd37-f673-4662-81f1-94f4b6ae617d,3f481821-0b3d-435b-a9e4-8e99e9aaf498,75eed77e-f257-4368-b1be-ce81bfb69caa,ac4ebb5c-19c3-4f22-9de1-db3a82d9ae07,b357efe0-7ff2-43dd-87d4-5e44393a51ec,2be97b60-4718-490c-83ed-dca17ba16d9c,37c5c89f-0933-4bb8-a302-5dd9488a8980,9d596ae1-f19f-443a-ab3c-fdc9247550ab,60873fd4-f7e4-43ef-8bdb-3ef20a973bd5,9d7bd91f-5f2e-43cd-9d8b-bdefff085561,34882574-ff98-4ccf-a5e0-19f1960f8464,088412fa-9759-4178-9c81-c3419f97f6a7,a3c1363d-154e-4737-b0b6-e2006f0eef87,5c7e2e64-f2ff-4f50-9a4c-674fca7cafab,cf4f89df-2efe-4e5f-9095-e41f78bebadd,6803f988-ee56-4883-bec4-b14484510cef,2f2db474-bfdb-4506-9dcb-a0153e5baf6a,fd3a87f7-5415-4004-a649-9907075f83e2,263280ec-1d3a-4eeb-9962-ce1519332af4,6cc5e0de-6b46-4dcb-849c-8f7bfe5b93e3,ebabd84c-548d-4037-b3fa-f6115c6f9a33,71dc4d44-8f1b-44b9-bafc-c415c8b7ece3,6f2fdf16-41b9-40a5-9087-aaefa4a6a937,afafbbe2-2d70-4716-9642-f00e9301a9e6,94a46a9b-02ef-4f37-8fdf-b1b26efc3192,f449d57f-4fb7-4fda-b39d-945bab45fc44,ce30b072-620e-47aa-934b-8d60712e432f,165838b4-2feb-4d6f-b619-f4e1fc445e75,6f554a3b-f23f-4426-aba8-91ce0b426f1c,ca9de580-283b-47bd-a2f6-604abd6dd62c,74ccc7cb-f0f6-46aa-88a6-47c7b36abcde,871c90ab-2996-43d8-968f-5d936c0137bf,5ca1d9dc-a1e2-4092-b0b9-f2cd82ae8cca,576158a2-4404-4c04-a432-000230ef3f3e,525c4442-a4eb-405f-87c6-81792cf0b2a3,04bcac0a-b189-480b-948b-e5e1058f4e1e,513cbddd-2dc9-44b7-8f06-d42a60f9b329,acd477d3-8ac0-4957-b9ff-f913d79271fa,166d6926-abd6-4c71-a98d-d7cef7f57e4f,eda89c47-38a6-488d-b423-464668de6bae,1e16c58f-2ccf-4ae2-b69a-a6b8395f593a,0b4c8d6b-4e1d-4ad9-a2a9-8a63075beb80,f030da35-3e31-41f8-9fe1-e289868e9fce,38748223-ff73-48ed-92c3-b6e43d01ef18,7201404c-ecd0-4820-a042-04793b23bb20,c92adaad-21c1-4d01-b472-3b260f17eb7f,69f0f689-c3b3-4311-9cf9-232b8c92999f,c5a903af-e19d-40f5-b5bb-dfe2a3cf5a5e,92d82956-1398-4879-af0f-28c6cc569f77,3ab09b87-687e-49fe-82bf-af5e07837cf3,87a80b88-0a90-4f34-95f1-ff0517536613,6a8461b2-b6de-4094-be26-14f8b0868596,e5247ab9-d1ec-4c43-82c3-10c8e0523ae5,2f6f9fd1-1236-48df-8c37-011e3cfee1b6,75e9dfc0-6d95-48b7-ba7a-8903c667c7b1,160759ff-44a1-417c-8a82-cab244d064ed,6a304e05-f42f-44f9-a54e-db751d8cc21c,9c1b4724-a460-4b09-815d-f22181e81e5b,52a0d8bd-49d3-4a38-86f2-233c17e09d03,098b0e25-cf46-418b-96a6-51d44cb963c5,48237719-c785-43fb-a8b1-0ee52da3e896,26cdb754-b36b-4361-a36c-6bbd5548d21b,6f2a29ae-65c4-4a69-872d-8aae177f1482,3bd9f5b5-ca59-405f-bfc4-1c85339cbb83,a760ae39-8edf-4372-af9d-57eef82c1527,29f29c37-7761-44f6-960f-eb5c9f072629,b14b5af6-fd15-4f72-b099-a2fadae9364d,7d412374-e688-4a20-b72a-9f2879278d98,79cba7bb-533a-4f4c-8b3f-91b52629bfd3,12d9b178-4179-4e05-b73a-375e90c0f8f5,4944328e-2603-4705-8d2a-7a294fbd8b73,7bac9395-8b30-4a20-a303-9732eb183ebc,04d4537e-2297-4928-83fd-37376b059ceb,93aec3a6-9bc3-4836-96af-c3d9149983a7,cb281c40-7ed1-4f69-884a-8d5c9e51a9e4,6a7fde97-eba0-4c61-9754-54576e17de57,c62afada-03bf-47a1-845c-369ddde7f18c,d9b0e124-9646-4735-b2a8-07dd270ff947,6b43e203-4f97-4b9f-96b8-6c4fe6d9fa2a,e6e663bd-ecb6-404f-b067-eb6e9e7a4753,69962384-f967-4ff4-8a5e-6dfe65f747dc,cacb015b-7053-4d39-916e-1ca8ae4ca635,d4a7bada-081d-42ba-a266-08fe0f3cf81f,a47e9d35-572b-437f-bea7-fb740e62ec5a,1af763fc-449d-460d-8021-b42ba715395f,90b644b7-224a-4f11-b460-9db59ce0f3d2,4b494ce0-d760-4e44-8e6d-46d0f921faec,d6947320-7b0b-4cde-adae-135cd5742594,f0d77861-1fb9-4296-8ef6-576686983e81,c8356f97-e8de-415c-9ca3-5f40cce0447a,d27f370b-91c1-4064-92f2-6b44da96db09,40ee704d-b5ba-4c11-948e-17e05092f570,191db066-de18-4670-b95b-0e73410eab10,87117b53-c25a-4668-83d5-d234b34e20c2,1f390e37-3718-4e5f-b775-2f9812c99bd0,56d39abb-d3ac-4a68-bf4b-4bb31c6a44f1,537b1ae4-592f-44a4-8c58-06a3624cc207,77e7c6cd-1403-4c17-8b59-917e7c9ae2f6,7d725dc2-1c12-4a5f-bc1a-cc2d40bb507b,6a6f8429-364f-4c6c-aa1c-14f0528f4a83,fa721a34-d12c-4ff2-a344-5dc31f97d7f7,6dc08ce3-1ee8-4a31-8d82-54ae7851e180,d79ffb72-652b-4478-b7c3-2eb8986aaca5,092545fc-8bc6-4cb0-a710-29fb85399a19,e1685b0a-1592-4e05-a625-9e40a2319a9d,c4ac174e-e73a-4393-83bb-bf2f765d9055,865dde68-c8b5-4908-9dfb-119f46fa506f,8f63d881-c38b-4a30-8467-a794d09a6190,96671d82-faf4-458f-a752-966324a8e892,89aff5b3-28b6-4d67-9e95-8382c734b5d1,1b11d5ce-1418-4e4f-be1f-9f0a22875696,6a52da2f-b1a1-4e30-9180-853fd472a52c,42c5ccac-d4b0-4481-92e3-9f8ebdf0ae09,f852d3f3-0eac-4d41-907f-76c816c6853b,1b79008d-c474-444d-8606-76ebf35995e5,e2a32e3b-4f0b-4b86-9963-4a6ccb4b0b81,0ae66a75-84e5-46af-8e85-6996723e1e60,3083fdec-48b4-44f4-9689-a2764db19e36,09d88a7a-a334-4405-bd31-6b24160ce1ee,271e93bf-99d1-4824-843e-92709f54ff8d,0831defa-b9c4-4049-a5b0-21ed865e5cda,a23c8d5f-eae2-4ca7-857d-d1ab3c2b091e,a02d1a94-c55d-4dae-a89b-1d45fa5ec19a,1c4285f0-fcbc-4f1c-b34f-21064a0b5271,eebf34c6-1a02-43be-9017-13659cfb9066,136602b5-f825-4b93-856b-837212116c0f,df4a27ba-4fbe-4e59-80f1-1ab8545aaf2b,2f385c3d-a052-4a6d-879a-c992d74ce7c9,2bc7c286-6903-48a2-a845-1e0aa50c46cf,b5c43544-915e-46aa-af82-0b5246aa0fb0,b09bc0b1-1571-48ea-9365-9735d5024f7d,86826235-711b-4a7b-a949-11cefb8e39ca,cc3ab3fa-c256-4772-8cbc-cce7b74c4baf,2a8b30e0-6f16-4663-a680-78464d5d81a9,7c592af8-b665-4378-9181-de3f6eac12b9,7c3586c4-0dff-4595-b49a-09d9fb73d7ef,5fbdccb3-9cb3-49d2-8f6b-93b7f09cf728,d8314b8c-abf1-4525-921b-78d60105745a,ab21c6a4-f0bc-4094-a7c8-90f164f51f5a,5b744c4e-cddd-45b0-a961-a6a0d2967bc5,7ab6b962-4621-4039-a924-a8a8d33fa482,4cad6487-ecb4-44fa-a111-2f233e0df81b,9a8f9352-b4a0-4468-a14e-281e599604a5,039387f0-df00-4d68-8c78-db3e6710fd4b,bb41e9e8-0915-42b8-9af2-3a705f827073,7840bbc8-a37f-4650-93e2-3b16b8275608,aed42ff4-6b19-4e80-b248-44bd91eeccd5,bca2ebfa-ae71-4e6f-9c56-b8c00ad0df6c,6d290fec-2e4d-494c-8ebc-00a63a5b1192,aa30e2e7-7643-43a4-9fc8-5c2517e757ec,e886ce8e-76b1-44cb-a25c-e11940a94471,57dc0a12-28bb-49d6-a77a-f8f54b14be98,7ef150a9-74b9-4d4b-ac50-22038593da42,9387832b-57bc-4cd9-8ac7-8bcccdb75fd3,f5e0b185-7c72-4b1b-bbf6-b5cd9f9db4da,78e3cac2-6c9a-4225-b46c-25047774c842,69a49e58-6e6a-4977-9822-242cec2d0e3e,bb3bc15e-0cc6-4379-8d0f-691b073bbe59,66ec485e-a473-4812-a169-d79ab74fb54a,5c03253e-19e2-4428-b008-86d46b9f5de7,1b136aef-26a1-4be0-bafd-35fc267afabc,d0a0baed-0b01-4ab0-a921-ce267fd005d3,5eb3f2f6-9127-4f83-a545-a98a090a301f,b631f409-30eb-48bd-8629-e675a5b21756,c1afc54d-8d19-42d2-bf20-57dfdf4e8039,cc1dbaf3-26f3-47b3-884d-72b9de0efbb8,c75fa52c-68b2-4164-884f-0a4119d9f153,36d0e892-6c96-4057-a8c2-5191baf7732a,8a13cf8f-cc2a-46f7-950e-281658deba2d,63c228b5-f1e4-4315-90c0-32e0a6d92bb8,8c195c07-36bd-451e-b8ef-b5ac1483e8cc,de733de4-27b0-4e22-a6ea-637d640d7b79,5ce63d97-aa8b-4a66-bd33-c5cb47ad36d7,a170b45d-f002-4b12-af94-b1a9187d58c2,1d255e59-8d80-4c62-93c0-60c07c000efa,8d01ca86-ea16-469a-a1eb-7e21aa570613,c1abc553-7663-4912-8b34-7d519754210c,5c15425f-e560-49e3-8070-316bdfc9c89e,bdab9533-3f41-4d10-ab02-4bbc04058a24,4e5a7580-588a-48c4-ba45-5b4db2687b13,43ca18f5-1dd4-4279-befe-234529f5b94a,7eedfc5c-f68e-44cd-a3e9-b034a1e5d110,f99784c8-0297-4777-b38f-3c38a0925b19,e876428d-a724-473e-b7ac-500e1cdf0c5e,21265435-16c8-476d-9f58-23fcd9b19308,31b456fa-9563-4531-9e18-38614d28af4b,15b2ada5-9a95-40ce-951d-c3753209abe2,8609ae3a-af9a-4a9b-a84c-8ade125ff053,f2b5476b-b8c0-4857-adf1-d272fe95a5af,ffa5ac65-4b6b-4dc0-b64c-67db5616cec6,80501b05-2208-4f00-9c20-12c98fb77ad3,939cee2c-795d-4fee-9e27-0a72d4f63ca3,0e32933b-78b8-4f3f-9c7a-dd037e2e9ede,f8d01439-5cc0-495e-80e1-497e5c4a71d8,65172efa-5f5d-4716-9ba7-4b987336055b,1c0c3bdd-4b95-438d-93d1-9d137721d786,9ce163f2-cf11-470a-9fe8-6607a502681a,2d204486-c0fe-4ecd-892c-d4b20c9c0f57,cbd17f15-7da7-4010-8c3c-ddab182e7e28,cba8a632-53aa-4979-9051-e80f460e389a,b7b6624f-af81-4d58-8e16-2a11c870fe78,2027617f-529c-4016-ae95-c04f8bc0c2ce,7796f4e5-7f7b-40a9-a823-7ee1f3cda77f,72102648-18e8-40fc-a959-b4e7a8ad2244,c51309f0-2fef-4bad-8559-ab6037ea9922,7f243fe4-0b1d-4c9e-a2ce-4d2b46dd1eee,af418229-a174-40f4-a549-a0b383e0e88f,ef828f16-5179-46f6-bef9-269c41839314,289c28ff-f1d5-4f22-b08c-3f51dd831574,30d5863b-8a09-4882-87eb-547eb661df96,c3748e91-82e5-44d0-8154-e37027a826f4,8f906eb0-8c1b-4e53-86ce-8eefd16a5a8b,84cc9e06-6210-4032-bdd2-95eba33a0c17,75590837-fd2b-454e-b63c-32360925c3ba,d007cd4b-889e-4ea9-8b27-4713d7f91d95,86e4dcf7-d32c-4bcf-8767-ed9c0327f126,7e1b5a04-0abf-4420-a297-cbb1b7def121,0996eeef-d069-49ec-abd2-b32c386307e8,3c0774d6-4e9b-4c1e-bd53-2627dd17f04a,5d9a9435-473d-4bd6-97fd-4c66aee67e16,fa4d3f4f-0727-444e-b117-5788149fb908,96a790ac-c6f6-4b4b-996e-d4a9b4b31e16,3d33d718-c664-4b29-ae65-34c61ae0d227,6b50c5e7-b948-4ca2-9a98-c32a791ab236,39a4ceb0-e5db-4571-b6e5-10b70f99f5b5,c3a15f81-9757-483f-bbcb-38ca1831148d,8ca077ef-b208-4bcc-b819-a5dc82841364,a2a477ca-273e-4a8c-b717-a7be6b114b14,746a0ab4-2576-473d-8804-6c736b8e5145,2c49d15d-b069-416f-bafa-94dc7c85c6d7,823f386a-e2bc-4b80-8df2-d161822b0db8,a827ad52-45c7-4aa3-94d0-308bbe682c37,f79fb81f-9d12-4080-a70f-b2e6a9d53693,22f8117b-5293-4748-b579-380bf78bc2fa,2203a10a-cb57-42e7-a892-0a06b2ecf123,d6a5b063-37a5-46eb-8897-45c6e0c76011,898d1aed-4cac-47a7-be97-daed67cb70d7,34f2716d-9fa3-4dfc-af3a-d10d91a61e38,5bb39ef8-6c32-44d3-8ea8-7a0c33580d9e,015f5e21-6258-4311-a91d-ac13a738558f,e3df7521-5a10-405c-b98a-8c82f252f020,58a69797-516f-4f2a-a88b-6db670b4393e,2ab64794-13fb-4f6b-b397-6c074e7e0fa6,b195c488-887c-41a2-8c92-959d56ae9e66,2073ebd6-3d41-4156-a239-dbfee29c0581,8d4743cf-5476-4674-9552-8dfb379345e3,9770db43-3197-4427-ac99-8ba8d64d2083,ddbda089-9f07-4c9e-bfa2-77f6d8266657,b7caaf42-9e1f-4027-9828-a74800ab4943,7a29ceb9-d0f0-4434-9a0f-0c501f6e86d5,1b9e0326-13d7-4510-a006-3eba41602f7f,2e6cc9e8-d3b8-44e8-ad7c-aba197d3d7b3,ebc39849-9f5f-40f4-8d95-27ede2c691b6,f91912b5-40b8-484c-919a-0a372540298f,dd9bfe58-6996-4cf1-9f6f-b9dbd4aa95d9,2c1b7094-1b51-4221-bdce-9a45ba62039c,6f938023-d8e9-4c18-8669-abeb8ebb630a,a6147c98-d9f8-4573-a94a-5eaccb3b2f35,9c50617d-e817-4896-b09c-d01491a6e97d,d5eae6e6-667b-4d1b-95e6-60e9d50aa2bd,9da12402-27aa-4eaa-af86-ee5bb12aafb4,459986d5-83b5-4927-830c-80c79b2e8c17,7b7db781-60c2-4c7b-b6bc-256c7d5a83d7,7505f5aa-e357-4f89-bd51-f58fa40d87d6,0b6afe29-58ac-49c7-83a1-202242f75349,ee51b874-98eb-4d47-be0d-e223e7e54d5e,1c158461-4faa-4256-81c3-f0b1c93daf37,a6365854-6317-463c-8b4f-ca382bf79238,59c97a2d-43ec-466f-b9e7-a0950335eddf,83cdb6a9-9ad2-4b1d-b790-4d58d7071f68,f0059ee0-d86a-4190-b00b-8cb05d3ffd81,dd68375a-e785-44a4-8de3-dc3c8c1f2ec0,fb9928b7-0469-4969-85bd-ee520ebba99a,83778b85-d7c9-40d4-842a-6692436b4e58,82c19bf1-751d-4841-9b72-dc77995c58e5,ff4af474-a231-4583-afe4-8f7ccbc634d7,eafd327a-0800-41af-af82-83193b20ef8e,224153dd-1058-4e3a-b674-feedc81306d2,b678e1fd-d5e1-41a6-8b2c-46bcbd43c3b3,d6bd99d2-7b7e-48b0-b1f8-d615d9b769ee,5c182e7c-992f-4759-9322-fb09d7a62f7a,ea116e84-28e6-4629-8fb8-afe22e660cab,2aa89ec3-9dab-4c15-baed-23a7cb34e0b6,776f297d-a2c0-4b75-9462-f3d0ee04c744,2c5657c6-2850-489a-9dd8-ffd9c7b413e5,0d6aca55-f900-48f7-9178-f4c9ab31bc17,c51ae1fc-6fdd-4ded-8a48-43c1402f089c,b4f63255-7794-46da-8efe-2cb5ac3e8a3a,121dc7a6-eafd-492a-82e3-f2dde62804a3,4493b482-f13b-43fd-89f1-62eb7f040c39,f42f41ee-b4d7-411d-a300-5e9f71fc9192,2a13df60-469c-4833-9cd2-899d5d735f20,5351d664-fd73-47e5-835e-0527c338c6f1,616da815-d9f5-48f0-88ef-b0415e826b71,66bc9d65-1a3c-45f2-ae36-d03edd963132,95dbad44-6db1-4da5-9147-d009321bfd64,efbcffa4-ece6-4613-867e-173a789af993,dbda9387-83e3-411a-a628-27dd86b0fc27,c6e92684-7da2-483c-a5e4-11010171514f,8edb9a92-fc30-4f44-9c86-19c70f7fce4b,69890238-6409-43bd-ae69-b92c007acdac,cd2874de-6106-4bc1-9a6b-e1463033b97b,58b26635-fdb4-4410-9d77-4701846aa78a,6d26eab8-56af-4e08-8e30-96fbefaab730,183bbb0a-2747-49f2-87f0-2a8250a97ffb,046411b6-9209-48ee-b294-cf9c2f94d1b2,ad9800f3-d1bd-4bf5-bbb5-7b594bb7fdcc,803411be-7118-421e-a91a-9bec429f9473,193621fc-b967-4d6d-a5fb-77e1ca9d0ee0,85239246-3121-4b11-a172-690f240f6193,0b42e71d-c1f3-4269-aade-adf953d19ea2,b154d17e-6dfb-4d0d-9fb0-74e638fdbaed,e73802a4-6e3a-4bfb-8a70-e6a085d9082c,6f482dc4-0767-4ff8-85e8-626d9ac48d8b,54cfb665-e91b-4f1d-ae30-0bad5e5969c7,ec4d7547-7c87-43e7-b59c-e4ca2714a582,ca7f37e3-fc9c-428d-9704-56eb81469f65,80ace610-3ad7-4df6-8c58-d8be40b6da8c,410d0e20-fe49-4831-ab07-2c90ed3a2d0a,cca35ed8-bffc-48dc-a369-0883fa3d06ee,0eb9735d-cc8a-4f82-a085-7e628a5c7b66,9cb32f20-de98-4037-a9fa-86ae19e3eb75,bc9745cb-6572-40e1-bdef-0bef1de8f700,64dbfa71-7fe1-41f6-9077-ec2ce8122323,d7d26883-f301-4cd6-871a-3f08a25ea718,88133d09-9208-49ef-a43b-aeda6f2b12ae,ae1a0f86-798e-4d43-b4d9-b3ec7965d3b6,600c7073-061f-43bb-b03e-dcecfcdf7b7d,ec9b10aa-9ae4-4665-9f56-65c40c015069,f5cde553-8c54-4036-ae58-4bfb4105a300,bccb876d-fdde-49d1-97b1-cf597cd6a634,37b34bbb-ea21-4c45-a33b-4198818a49cb,2d40cca2-2d58-4eaa-906e-4638683b3612,0356b6f4-363f-4bb5-9657-b9af29be6b5b,6cf4fa07-70ec-4179-a41e-c1e6fbd9cc4f,383a747e-a429-4581-a291-906635c8b2c1,53effb2e-12af-4343-af51-c6c31f4b9ad2,8906fb18-ff2e-419d-b4fd-aa627447e814,e1040e96-e620-4984-9ff4-992aaa21c334,8b0034b1-a52e-4d89-a4be-d18202152dcd,9bbff3d5-fa79-4c78-a6f9-31478925d659,e686c18f-07a5-4741-bfaf-15b163a19ece,0f099cdb-66b6-4c44-82f8-7761b9fa2bec,630b8cd0-6d85-428b-b58d-5acacbc259a4,7e451ddf-c6bb-4dcf-af87-9ea93e29b788,35bae9c7-6483-4a82-8888-2912005bc237,fbdc2c28-a961-4664-96bd-8410bea12a6f,dbad9f78-a6af-44f9-a335-3d20886c2bef,1801a14c-55bd-45ba-b5e0-97aeca70a765,9c433f45-5a46-4f70-b439-138efefa517f,5d057305-ce70-48dd-881f-51441a0bc0df,4be61bbe-419d-4563-88be-7d35f68da492,5ee82dee-8b4f-482e-9d6f-55e48954f826,084ed80e-ae1e-4fc1-8269-212e2a84a924,cbd0eff6-313c-465c-b452-af9c222ce19b,fe53ee71-a44e-41e2-b81e-845e3cdca7de,64c4abef-5a4c-4b97-8121-ec0e5afd8656,a5cae7af-1432-4c99-ae6f-f2668b5333d4,465570c2-901b-447b-a533-9d120c987560,7a8acb9c-cf60-45cd-b7a0-3ff5b7ce51b3,14c2eafa-0006-4fb0-bd21-ab2f7a2055d4,76ddf654-19b5-4ef4-9b91-f628be36ef4a,00fc592d-a28c-4567-af7a-02b80a198615,eb5986a2-cc58-4112-bad0-83ffe1b72685,fdcb44a2-cb05-4b22-a89b-164e97479f55,2c92c432-b863-474d-9ee2-70858f286c83,92786420-6c66-4e2c-92c9-6cfc6ab39680,aa189b77-2e0b-44b8-b652-c91a67a96b46,afe57c11-cf38-4ed6-a3ad-e3e207a81749,418c85bd-22ce-4ea2-bef2-1103a0daa745,a1499ddf-db97-430e-966e-7e2a58e6768b,b24dbf16-70dd-4a19-a35c-317b21819956,411b74ee-a143-49c0-a292-c621f69dae5f,8891d2c9-d647-460c-889a-688dbeca708d,edf4f8dc-5fe6-4935-bee1-ca504d2b1f12,69a22319-b9e5-46e4-96c1-537e9795e264,d93c1cca-bbe2-448b-9815-6db48ce99490,4f0fe87b-628c-4b95-bdce-062d9372ec27,31171d78-8e22-4a8f-b513-f3b4b3b11387,3067913c-73e1-4028-9bb0-ddfb783f38b6,3447ed54-5bee-4f91-ae45-be0e141bcdb3,942d6f49-2576-432b-ad6e-5c5e3287af5e,bed22f3e-be1f-4942-bea8-2fdeb7ce33b5,6eb3bd91-30a6-4fbf-8e6b-092686e4a7ca,bec064b2-4d82-4490-8311-d77e3d0ce51c,bfd811c2-2693-434e-9aba-87e4672a58c7,c10545f4-621f-4627-ba9a-a9130bf85a46,7b39fe66-a6e7-4d85-b9cd-90b9d0fdb27d,05293d5c-1b76-4d85-b116-2da6bd43debc,dedc9150-d27e-45c5-9902-0eef59da30a3,8f6eb8e6-ece8-494d-a0b7-eef2d05e036f,26f61b74-90f9-4941-bae9-7da153ab96a9,336833c0-a712-4f9c-93ff-bbbd2ed40cce,057bf213-3c97-4079-a7d3-7a64d105b002,a08ffe4b-8cba-4d5a-90b0-edfb478bc473,f8d159cd-4c3e-4ce1-9b68-64192a2bb428,e5281d45-087a-4838-acba-7bbf2320644d,50b8d561-bff7-479b-9a07-9c0b5ac04e5c,7f113511-b294-4363-99e9-23d4d17e86a0,02aeb049-9dd5-4330-b21e-c6db5f79595b,1ed5f687-b8f5-466b-b075-1e2b49b5862d,27bae0a7-5d8b-4902-a736-5690844d5c02,2be53cfc-d5ec-4a61-9cbe-4ddee57b552f,164bf61c-a853-46e3-b0d2-3763379393d5,5ab9f596-845d-4132-841f-4b3a5c156ca9,b4a521a9-5a88-4831-b81a-47504d1da7ad,4a0fb99b-37fa-4967-9873-e4e1c349b1ea,e15fdf02-1093-4fa2-9e82-137a4f9fd89f,48550b3e-de25-4f67-8ef8-630fc49ee61a,b43f587c-7619-4dd6-ab30-64c723f89ff6,d1eacb1a-b413-404d-b890-b179ea6c399d,4a2e13c1-3f85-4a83-a2d9-d5fa838e96ca,2bcd7d07-c551-469d-adec-89a2cc083ebc,a77bd0e1-0931-44cd-86cb-a956e071b1e6,74327970-9087-4ac4-a7e7-0a0dff8e9b16,0036400a-9ee9-4b80-9f47-97456dd1cde1,d922d810-9984-4cb5-806e-b8dae70ba8c8,608b7216-5a93-4e2a-83d4-ec2d2a742d24,806a92f6-60e4-40a7-a7e1-73b6fab43061,ec5242d9-968b-468d-9d1a-d2d224034718,55acc34d-7e8a-4aa7-838f-7d57ffc31fcb,d8443d75-1cbd-412d-8166-0e38c307b50e,f97d6a7d-c794-47fb-b6d0-c081ff3db145,6c91aa26-e0c9-4f37-aae2-50aa734a6ff4,57b50376-0845-4c0e-97ce-130b1c75cb2e,6827eefa-1bbc-4bd6-bcea-e49acf0e4128,3e410b51-868a-49cb-956a-9529c8753f3b,ac052b44-02d1-4d50-9400-567c0d39940f,99b187f8-3c5e-47a9-b938-f1636aedb391,a5222fef-b619-44af-bfc5-695fe078b610,9306a81d-8858-4423-9ce7-a1f35ca18fea,f0a58515-9b22-4fb8-9744-4a930d0baef9,6a2f1c2c-21eb-4316-b4a2-9f5ab936dbad,50e48c3b-39e9-441d-b0f4-1f03d84034bb,f5016718-4988-44f4-b0e5-7ebe4dabbf92,3ac873b2-a692-4217-9561-dcda91fefb0b,c31e6cb2-81cf-41da-a491-c1e2366ce408,ea88dc7b-3c6c-45e5-8cc7-cc6aecf0d855,a12d3261-5b79-44d2-8ad7-bd9faf538a99,8432d84e-b7ae-4c93-b798-69854aef6f7b,35518550-ec83-4fd9-b1c7-87dbbb79142d,71f05081-4a76-4fc0-a1de-854703049a9e,0f484082-8d49-4bd6-b0d4-c27f5a1b4f67,c9f67bb4-9e7b-4979-9f2a-98e3af0be6c1,03fbf3bc-d1f3-45d0-9954-c6edac29ae63,88adfcde-58bf-43b1-a8ed-748e464752af,474a3ad5-409f-4ccb-906d-0228eeed9202,01311ff8-6c29-4dd2-ad80-fd45e4cf2687,fda2b6bf-e230-4f4e-814e-ab477bfe1231,a1d5f73e-6582-403e-9938-0e9864e6a0bf,2d1a9e9a-0c22-469d-aa38-2f8880e36e0a,870d6f7f-324c-46a0-9850-1d2d3e7edc86,7fb96b97-df01-4e05-b7ae-863d61db5973,c24ae72d-14a5-4ca6-ac45-80b266e60e3b,8d7782d1-9aa2-4cca-8966-d061a10dbccb,432c4efc-2768-4da2-aabf-955d4a4250f8,261bd868-1a2f-49c9-b103-0a5b7dbc30b0,1d7867f7-35c3-4622-87dc-4da8e2fe602c,55c6093f-a7ff-4d7f-8576-d5a36bb8a446,ed372007-e420-4f2b-81d5-cfa680944c2c,1e111379-ca8c-4c89-b1b8-0e54fedbd7c4,5824bf3d-8108-4660-a16f-006288fe64e0,060a3970-cef0-498b-b279-8f984bff8578,114d295a-5208-4736-887b-e8b09f815632,72b8e049-31f0-4110-bfba-490c390e99fe,0da484f2-4263-45f9-a0ba-3dbf21c689ad,3a328506-e0d1-4317-b73f-6d7213ed929f,961204bd-1772-49b0-ad46-d214dadb7072,50e976c3-16e2-4d21-9152-7f6a2f77b930,6f9ce5da-afe4-460d-a1e8-59dcafc46220,9dd49afc-a103-4cb3-8a01-f024a2afa8bd,46e920eb-accd-4eef-a8f9-a115b44de953,5c8346ac-78a1-4eb7-a0cf-d3f0360d8ba3,3a5bc51d-f894-4c17-b3cc-49af1e6fd692,68456f92-7b3e-48ca-8a87-04470095c1e2,6beda8df-d7e8-42b0-91d0-2d1e60268532,0f549ec0-2f33-4885-8f1a-ab2f3ad80698,801ad806-745c-4584-ba9f-6bea9c15ee07,fd80ab80-47b5-4674-acc7-fefc383b8114,4917e0cf-0ddf-4192-8a1a-b7f47747952e,f034ca96-ea59-4e8a-acb2-2485114de369,72e127c9-d0cd-4a87-8a92-bf491742fb0d,929a9f17-fd9f-4363-abf3-65f00a5f9268,a6bffc19-e817-4186-8d72-8a091ce78161,ed3de48e-23c7-4594-b932-f901af297c45,ceccea8a-1d74-45bf-8ff4-43614b5874a4,6bab4ba3-722e-43dc-b4a1-6b67ca6b4f7d,af9c7cb1-d674-4431-a6c3-1086bd8a835b,803304b3-7926-405d-b099-7a76aba17b79,bbb44240-ac0f-4c76-8333-826c2aeafa3b,146514c1-a767-4413-931f-52b07dec9997,d4597536-8ce4-4243-9864-fade6b8a5415,18b88d84-160a-4235-9beb-5d70f66ef77a,7374d47b-e6ae-42d5-99b2-bc120a84315b,a53d19e4-4df8-463c-bc4b-079da793625e,6bc44e2c-3e32-4987-a61e-48565075f246,fc008a23-6e19-4294-9888-985cb730a962,360a4686-b3ec-4098-8f22-01518d7c31ec,1063c5a2-5308-4c35-8b3b-d4ee530d9536,b5f2c225-1db2-474f-acfe-f0eb9414edba,290236d6-8d9d-4707-b0e4-4d105e0e4420,f52404ae-de4f-4bec-9c39-4af257607ea3,fabea5b3-bb0e-44a9-8441-45a12d0451b0,9e01d1bb-d262-4fec-a01b-cb5973108501,fc0c6101-3f9b-4079-88a4-f86040970c7f,c53c1507-e858-41d2-b8c9-dc8eb6602f29,c101911b-c278-4249-9159-e6e1a70b2cfa,129a768c-fcea-4b55-a040-5aacee837949,7e614d70-017e-4d8b-aa77-953c6c2b4af0,ab58740f-0727-4fbd-9c46-3870b62ef951,b6b8928c-eb25-402e-911c-03d517226dc8,ad0db166-a722-400d-b859-a98a43935510,a663495b-f98b-4383-bd84-0829d8136898,7241561e-d316-4275-8820-3c99ca8195ad,14404a40-236a-4126-8cc9-573c66e877cf,01b7f1e9-c953-4ca6-a7ef-f9e6126d2454,acff468f-bf98-4306-82c6-7602d8230dc3,b7583dca-6983-4a10-b9b5-1a76f2b69d91,b857f75f-e2c3-4c48-8304-75fd27c4d644,dee9ebdc-b1cf-4855-8cd4-8b18c17b6f2b,99c8f286-3832-4abd-bc45-5d44f698876e,01cb31b3-ff8e-4084-962e-5ad5446abe02,bbd252e4-5386-4557-a723-12d6566fef6f,79238de7-4b34-4cdf-a0b2-3f121c001dd5,47ef66b0-4b23-41aa-a4bd-0117b7aa3c18,d661fbfd-f646-4956-b07c-a18304869acb,cd41c942-8ffb-4f8e-8d12-09f943c0dc46,6547103c-7d92-4853-ab47-3967f0a35990,04b1013c-80d8-4cd2-be76-4056b1c832ba,04e95582-519f-4a4a-85ae-3201c65f7268,e31051d0-3fc7-4f86-a877-0c72796ba6a7,52ba1a27-53c8-4289-8b73-9f413c3bcb2c,8e622398-d6e5-4e52-a0ad-40ea898496a1,9ae42dff-34d4-4f62-a922-2f7caeb02da1,18f71046-5417-43c8-9123-f3e628b43d51,feb700a1-1c01-4a30-9788-6427a26eb654,0ca01310-eb86-4143-9a7d-97aeb25ab9c8,31a252e8-53b6-4da1-b042-831f3c8f8f62,f3afe4cb-e39e-4d1c-aa4a-034ecc6fd2b6,cd0e489b-5628-4073-b91b-dc30bd89527f,5c4b7727-eeec-4647-aeff-d204612dfbfb,34d57cae-ff1b-4700-b83c-8679d36846fe,c1e9ed30-11d1-4d29-8360-919412df4a5a,3d6907fc-da56-48b3-bc67-8f102722ae61,ea6bfa76-6825-4e9b-b0b7-f7992c13635f,680420e4-068d-479b-a7a2-60e3c7023771,f239ccb5-7735-4f40-a8a5-040b3914bc1c,ee24f447-dec3-441a-884f-0272ab40b1b3,b066ebda-f7ca-4551-86ac-d6fd5565f8d3,3aa5b6cf-b49b-4699-84c1-e952c1d79f1f,f3cd85ab-033b-4e21-9933-6a2c7e7deb00,c7ec0a36-4397-4d91-915d-8a4edce9c68a,958331c9-04ea-4c0a-96dd-6890b68f40e5,ae1b7485-5259-4867-ba3c-23bf33712ec8,28336c46-2bf1-42b4-9bb5-e377fe542ba6,f1d0ab5c-17cd-4187-a1d1-6b6e43e78057,7e21be7e-cfeb-4037-91fe-7ec06da29fd5,23ec1e61-399f-4418-bc30-df4dcb376748,1cc2b02f-3d5e-42e8-a57a-3ab1ccc1ca13,84cc370b-3909-4a5c-8e38-4b8d61c9708c,d9730530-3ac0-4058-ae7b-78b2765c0ff5,8b81c8d3-679e-4e5e-9b45-fdac1ce5de6d,fe71d49e-14e4-4399-946a-bdbc6d82e1c0,ad4d8dc3-e22c-43a4-bb1d-d3f14c0c5b10,b3d7b7de-2165-4b0c-9679-904c6028699d,e4820285-810a-4663-bd9d-1ef066918f78,9d3364a5-860c-4c5b-aac3-b8f8047cddf3,642c441c-bd7d-41a0-bc17-7f2afec42070,1c8665ed-aaba-4f29-974c-02b01dc4ece5,e2016dd1-592a-4da8-9341-280ce0ac05d4,030ab1f1-da80-4a9f-9276-f5dd53feab4e,c286e8c6-dabc-4919-9870-88eb267141cc,3b9c8578-2a48-4b53-a5cf-d05c30441690,c659552b-ab89-4042-a61f-f4f122322fc0,f7f4c222-bf60-4450-9dc8-45eb4b4c4ccc,aba8a522-46c2-49b5-9c5c-e9fe9ba86a5e,e30978ac-368c-4efe-9467-2cee22c5b49d,994a8f94-11cd-4759-9db4-2775d655c89a,28547d64-8924-4782-b2bb-baf276d13ee5,f974046c-44a0-4191-ac81-e4f2c5fd4daa,ed0d57b4-e700-4261-b690-f1585cf34025,2a510da6-7958-44f7-a748-28df2b26edb0,497e1559-9e48-4bb0-96f2-f95180890836,b746be8d-abc5-41ea-a1e0-dbd332f6a3e9,6d9ce35e-bcba-4631-bf3f-a09fefc04f26,448092f0-9ac2-42c8-a12f-c030fdc7cd05,1db72170-1f9d-4042-b019-258ec00ee0f9,a8c9cd19-d965-4855-ad13-762fe6b9ce06,fc912fd8-bc63-4558-a5c0-f48f3c8721c4,4366ed4f-3db8-48fa-9fa9-c52e3f425c1c,5d21358f-3128-45f5-8c91-678b8aaf2115,bc812b3d-68c0-4e9c-a13a-3e315ef4dfda,f51454b1-6c5a-4d46-ad84-f23cb762512e,b4656b2d-0df6-46c9-84fd-4674b4907c0e,b36a0287-9b2c-423b-9e81-c7ca55c87a18,5d4fb34e-560b-49a4-8484-789ecaac258a,74ddf15c-1544-4987-979b-ae78f210b4d3,b191de8c-8da8-41c8-b294-5e643ab92194,30b9ad7b-f373-4382-b906-2d53bc7d46cf,6daab83a-178c-448d-835b-5a94ad17dce3,ed4741a6-6276-4fea-b18b-4d1142e13f60,bbe2eef0-bd12-438d-8de5-8d30f22347b9,92fe0eaa-8ebe-4549-9fdb-ed934ab07057,27d81f55-8e8f-4670-94ef-77f3e76eb57d,08f93c4a-3fa8-4b32-884f-0f9c7c8fbb4e,39989dba-c971-4601-9bb0-1aaae55480cd,e5cf2043-65dc-4125-a2e3-9a7bc1d4f792,ff9b4f1a-42b3-4959-a5be-b640fdcb3467,a8059735-4e7f-437c-9f7b-6d8fbf0c87ce,d3bcd38e-eeec-4431-b263-6ec84d750f11,59f0e1dc-988d-4cb7-98e5-df3013136d8a,5871b91c-1407-4ee2-b57f-7e564e124f27,e1e00330-706a-4f3b-8d80-d778acc4f2b5,537db21f-b3e3-47e8-b5a9-972d6cf441c3,10690cab-b59c-420d-928e-02ed7f9e0f63,c666d8b1-aaf3-436d-8543-9b8421de8d24,83f3630c-9ec4-4d58-b611-cb06aeef58ec,009683be-87de-4ae4-9e70-ff5e408e0aee,90a550f6-ffa7-4d3a-8556-d1141f7bfa51,f1f478c7-40af-4630-909a-44dab9176d82,397c6d2e-bed3-4e11-a8ac-557050d971b0,9120a2d5-0205-44b3-9030-68b94be19000,8ac057a7-181d-4a18-b9da-9a3872151a55,c82183b0-3bd8-49f8-b0e6-75cbe596803f,0c68497a-ccc7-48a6-9639-b45e3e11811a,20a9d51f-b1a4-4c25-bcba-d7db2b8d7ec1,29f3a4b5-5b05-4dbf-b0e3-fc6cfa75b779,b271514f-2b1d-49a3-979b-8a831c245521,5d35c4c8-28d1-4485-93f8-1dd06e1f7be4,995014f3-5ab6-432d-80af-f690354bb3ee,ceda86a4-a659-4a74-9b6c-86c66e27252d,337de47f-4f0a-4004-a049-70154a53e692,cdfeb7ab-ab6f-430b-9ee4-8a3ae9257fca,972dd614-d643-4973-be72-f5e6d161581a,11431755-5ff8-4913-9fd9-e2ad102926a2,9202792d-ffa5-4a44-82d8-5e7417d6005b,fb257b1b-af8f-48bf-849b-8c8a5f104786,df24a33a-fbe1-48d3-8ed6-d76ae5bf6bb6,21196561-9a84-42b4-9b98-f49f25e19c48,9787e902-b15f-426b-95e5-a90d47949419,1ac00445-1328-41b8-9d11-3fdd505fd998,e1c7bbe2-f3e8-499a-b68e-f191889cca8d,16f72b35-4bcc-4633-9f1c-6ccea1b15979,b6bfc80f-51ef-4370-9063-9b3a2d50d30f,0377fc6a-de50-4424-bdb1-4b1482ee3262,a230bc46-426f-45f3-8a0d-bdcd1df11d69,a3d20c9e-da84-4d52-b5dd-e7b9a391eef7,287a1128-5076-4c58-b7ee-1a2f8ba8113d,1a9676a6-7386-4b62-9eea-335ddbc3b8e1,4f423518-6325-4422-b639-a1d1e6330058,9bf6807c-2839-47e2-96d5-de9ea23c6c02,2ec379eb-72aa-43b9-ae24-5ea02937f9f7,224d4c28-4c58-4a6c-8d67-9a936039e336,8a0372ea-5c3a-41bb-b80a-c8acafc6068c,9ebf6d9d-eb2e-465e-8245-5ce9e42e573a,a58bd58d-2706-4430-ac1d-1e07a21dc2a0,df863077-7386-4677-bec6-7f8e92c20a6b,61e7abda-38d3-4a0f-bc39-8548e45ec049,73d44cb5-22f9-409b-b573-8cf12ca9979c,4937e332-148f-4e76-88e8-21a0c32cb7a0,440de11f-3ce1-4a39-94c1-9c656d8c7a3a,7e891f47-a784-45d0-89eb-a3aed15b5b22,5c084efd-7b94-47a5-8243-1cab88f0021b,4b9a867e-7f82-43bf-a143-81b297daf8a0,9c3e2f5e-fc87-4a33-a6d9-fe7e1a8c951a,eeeee441-c95b-41ac-9cd6-b540e69e68c0,3248c759-c7bf-45d0-8865-b8061bca9389,f83f7a82-c713-4052-9847-99c70a3654f5,93733baf-b752-4393-bcb5-60d7e1e7ecbd,71325a7d-fe9e-40b4-8fff-d7bd2ad92acd,55964a9d-3d8b-4465-817c-bb72d961e670,16eb4d66-856c-4ed7-84eb-e2b738e8cc05,86296170-9d02-41ba-b86d-32bea5ad2c91,1b11f909-e2a7-4020-8e22-12745c64c5a8,6233a638-6e80-40cf-8807-04fbb562b028,cc2efee4-44f0-4f8f-99be-5cb120b269f4,b8177a5a-ae1a-49c7-9dbe-77c7aedd11e9,09076f6f-089b-4844-b9f1-c6964b840967,fee2aa27-9f59-4fb1-ab1b-627fe5c2435d,0786fdc1-4e22-4bd3-8efd-f5fbb2d35c5b,4bc420b8-ca38-4386-82f4-d893860d7f28,382261ed-6df7-4781-813d-53412441445e,d76e05d3-012d-4eb8-adb8-9bb372b65846,dbe18ff0-15c4-48a7-9c7f-1c1632fa09dc,e74671dc-ca93-4bfb-ad77-9f3d291382fa,33ef977a-f1f7-42b2-9cc6-2e088cc090c5,5f63b141-c611-4879-a519-9c9bd680cb3e,073681ea-21a8-497f-b57b-c5ddb32352d0,3f14d119-808e-491e-905c-d6f1d29ea449,1d8dcfcb-4f1c-4002-bbda-55a2f1aa566c,29332a8c-c8bc-4668-b258-16b76f09baaa,3cbda4f1-30c6-44a3-a54d-c0c7fc743d68,f5358d7d-9d6a-4a23-9fe3-d499552fc824,1bd261b0-cb37-4a21-9499-f6e5fe2001aa,f8a4e134-5001-47ac-8ff1-bb3563059c37,0bbdfb64-2efa-4109-97db-b5d8f6b52842,88514b0d-d66e-4bf3-9721-208aebff2f4d,22f45bfa-9de8-4696-b6dc-d9d0eb822dea,801ca209-0419-417f-b76e-70976db5495e,b3bc2c02-e5ee-441a-8e60-d39707281459,48ad8d13-cd41-42a6-8f3e-4d79ab45861e,d72e735a-008a-464e-9d65-06288121e2a9,76ba60f7-0298-4720-a01a-227c53c19fee,293b3bb4-498f-41ca-83d8-c04cb21398fd,09d79f94-ac51-4f72-8401-ff497294fc3c,c0885965-147f-4205-92af-8745441d81d2,11b31859-bfcc-4fa4-ae70-43a6175216fb,5e24d8ca-a6c9-433b-aa69-cb9b09cd6c6e,eae89de7-6fe8-43c4-9fd9-c5692802d50e,d174a30f-cfc5-45ac-a0fa-e13fea33d466,9d512ea7-70df-46f5-8378-76ba2e7da0cb,9e85f1be-9166-4b3f-ac19-e66701d723ce,c975e775-3122-438a-84a8-72e27f71808f,b78e84fe-4531-4d46-93d8-7e5493200221,782fee5e-b4ae-465d-bed3-9b538bf06c97,75e77f9b-77c6-4f0c-a084-09926551427b,a61488da-243f-46cf-9b27-332b95d16aff,fce1e1e9-4a1a-4cd2-8364-2f444712e819,e1001de5-1143-4207-a835-b227f2348cd1,c2fd9cd7-1a14-42ce-a6df-65f50eefef2b,a2e8953a-5991-438a-8efe-d685c710a084,e14f47a6-a96a-4011-a0b5-7260a1901ded,cc8d5476-3d05-4510-a597-188aef112d3a,6d6727e5-5bcb-44ee-854f-8540392fcfff,b9dd6b13-6ed9-4a12-9756-0298a09428c4,9151848d-2783-4f22-9ac9-4dc39c7591f8,7e1fb6ca-14c9-4178-a40f-2a1b3857d530,d952883c-dbca-4f61-9f6f-d959d34e7379,dbf0593e-67b6-4163-be95-b08058a31f0b,33078159-36dd-43ec-aaa6-b2838c28257b,e7648065-035b-48af-9ad4-20bd7fae769b,ddaeffbc-2224-4b3b-8436-ddeeb675ede2,7c6f672c-b8a3-4fb8-9817-84a05768d9f2,d8d8aacd-c4eb-4f20-a8d6-aae3e19b0fd6,50d373fc-b738-4759-af60-90e4d2ce4ad2,a1a5cf8b-3c16-4b20-8ce8-1f52987c19a6,933bfa02-9fc8-40ea-bda1-663099bc8b32,2d38af75-5549-45db-9005-3e287dde204d,09787a65-d21a-49ed-8e70-22398a56fad9,1b104332-55f3-4a33-bb54-a7dc450f48b5,d0cfae09-22b9-4d78-9f86-0d3220f1667b,2446c6b2-bcac-4882-b881-f5d6a01116c8,f9c0ccd1-3300-4778-8224-01cc5aa7e981,43e53a5d-230a-4592-b611-749303c12c1b,28c8b082-9188-4742-a6cb-90a744ae84ad,de069cbc-4adf-42a4-96a4-74743f43f07a,d11520b1-f20e-4035-b50b-e74ac8105492,fc3e620f-0ee9-41e6-b4bd-7f8bb606df93,4071c78f-9d88-447a-a5c9-b5b03041313c,bc5d61b2-ad51-4f7e-b980-c37ce073583e,97e71318-9cce-4599-9125-941814475ea8,f0ca3280-1329-4ebc-83ce-4246966b7879,cbb2618d-c315-4cbe-9894-3a5c2cee2229,6b707097-30fa-4f12-8dec-fe27cf70bd60,d1bdbace-3cbd-406e-b437-a51102850d7c,8651185c-0c94-4724-9b4e-8288d3f32519,d9b64513-c62f-4abe-b411-007f95b0f523,c11bb0b4-846a-49d1-8289-0dfa9ad77338,45f500de-f21a-42ad-a45d-7f3d675d4abc,4f7e19af-6551-4beb-aa9f-651ea82b7141,ce42d3eb-ad3f-43a3-b70c-646a48cc5cad,eb81a0ba-8984-476d-942b-8ced5ce99699,4c17473e-0cf3-431f-8cc7-acbe9783ae49,975076bb-d02e-4c44-9a4a-8bdc13a114e1,889aebb4-cc23-4141-b98b-8b9ee809fe5b,ec583f17-7fee-412c-978e-dd7a42a78c1b,7c3cf446-3103-43dd-84b9-a5c1f6b8bac3,d4d5a3a6-1374-4516-9fb5-de9f82df4e0b,eb70cffd-7406-408e-9360-9ffe1c2918fe,47484c33-045e-4a44-b30b-0deb644159f4,952c215d-d1e5-4596-851b-98291f3da19c,4e0d53b6-850c-4bfc-b5e1-090d8b058926,fdb66d91-5232-4625-b161-71feca9321ac,8ba43541-251c-4941-ba96-0f1f60838229,f8c64e18-5ed3-4417-9b97-dbdc4f59915f,63624eac-4f85-4f5a-b89a-f48fa0e990e5,08048bed-ea9f-40cd-bf0e-e2486c28d902,85ae050b-66c6-409f-8493-369c92b57c52,cb2b1421-b780-4b20-9ab8-7fdecd6eb3a8,1a12a7e5-60ac-42c2-944c-08d099e89e6d,f48b30cd-797a-42ac-95be-f96d2fa3ce00,67c5001d-6790-4de8-a43c-d49ca482be24,0fbc7da2-9199-42b5-8315-12e2b728defa,86d50c2f-b108-4f78-b662-17d6e5a5d141,743b65a2-2107-4d33-ae3f-a0a9be9c2060,88c3112a-ca81-4f7e-8e8c-440088a39ba4,33c98422-f466-4ca3-a855-9475d19791b6,841804d5-dc92-4bae-bee9-a2973849d6dd,9c281cf2-b50c-4fac-b176-19adea7ee337,a48bc7b5-0acb-4005-97a0-603e14e565f0,1df6012e-0a28-4f70-ae5c-f14eedfa1dcd,eb10d51e-7b75-4c50-bf80-db7a7b0930a7,30d1ed18-cab9-47fc-9b63-2db35222be03,ee2420eb-3029-43f4-842e-91c59fe5d633,b89942b5-7540-4054-978a-7a06d992e6c7,2771aed2-4a67-4692-9328-bd43fe943bf2,fc024896-01ca-4a00-9b9a-157fc74a6c94,7226e3cf-3e52-4e83-92c8-e1d0a3b4bdcc,66b7b6c4-b63c-4a9d-b168-20c381d730ad,cf6e4f47-ab87-41c8-a7cf-f2c72f9c672c,a2b97af9-7982-4528-964e-418b300451fc,da8581a8-4f67-45b4-8f46-e1a5cc42d7ac,a952b3b3-5741-4471-be32-7fc3658f2088,cbd4fa62-07e8-4796-acd8-b74ccf46cac7,a2b55e93-8bd8-4eeb-88c7-83c81a9e9a04,257d910f-091a-4548-8a50-0c547bcd38d4,a0f79673-bc76-4b7b-bf16-f649c408b4a1,0075565c-4b94-4c96-9337-629ca52378bd,730b61fe-5f70-4b85-9bac-62b4910c8670,302dc219-1c4d-49ce-b028-b3fc6b1e928c,544e4d7e-cb5a-4fee-b34b-43e8a8f872ed,b18f4d3d-a06d-478d-aef0-943d56af600f,b21215d9-3987-4313-ab13-129519dce17f,9ff6e038-2661-49fa-a267-d634d3ba05f7,db50ffbb-e04b-4e47-895b-ded037e0c129,9113a634-e712-4e99-ba0d-885d6a870f70,db96f558-b16d-4d55-b88b-a03e67d49953,eefd0f6f-35b7-4a9c-a119-29d838d66649,6c04f89a-79aa-491e-bc5e-d37212b4904f,c44b3a99-7803-436d-a018-82ed9016dcc9,bfb846d0-4ea7-47db-b7ab-625a990871fe,eeb46c5c-9534-4259-9e32-719e181ef00d,6e763498-deef-4f25-8add-c53bc62cff73,75a6d303-9cc3-4809-8bd1-a0e7df127df8,251e281c-cbb2-400e-9e5d-e7fff7409e10,32c0d6e8-5d60-4939-8832-10e103d5f07e,441fab5a-6b79-4bbd-8860-0e1e2a7c49d4,0e5a0e5c-1057-4db8-83ed-8e81252dffbb,0959621c-fd4c-4d40-b0e6-d911fdaa91e0,83d8ed09-2e19-4c5c-b047-a5a894db8356,544a647b-d7ca-4582-8d54-dbe3256b86b1,cb812f21-78f9-4071-8165-84c307f1f6a8,fa2cbede-ba8c-417c-8de8-7c62158b6b8e,f88891ab-431d-4148-9d89-e11f3df7ba77,64e058af-72e3-4be6-beef-5dac0b01a1c0,f43554c8-78e2-4edd-a5bc-3ec630bd86a0,6db04579-5149-4af1-ab77-39fc9508d9b8,5c41c784-e129-4bc3-ba59-c195b0907c35,9b9ad353-3fa7-40b5-8ded-706e468d26b3,7d9037d7-8007-49ff-abe0-1b447fddc863,4d085d48-9294-4ee4-8c80-6248d7f66ccd,1c213db5-4166-4f45-aaac-7d345d7fed8f,f8df63c9-126a-4046-9f4d-69ce2d6c8fd0,de446beb-3384-4b3d-9e83-6766518fee23,8194c0e1-c573-4aa2-9932-ea280e002bf1,85b24b4f-d61a-4610-8404-6e0b895174a1,1204eb35-9e0c-4e53-9367-251ea10816c4,9c061351-eb89-4c5b-a01c-2f213db08c30,8e16aa8d-d28c-4a7b-b7b3-eaeecf4c1730,631dd145-9176-43bb-889b-b37752b1442d,67790b72-b27f-45ac-93b5-2187ee5523ef,325add7c-22cf-4a7a-951b-4cec3f57f6bf,df69281f-80bb-431c-9ce8-d3bebbb53a09,f48af738-07bb-4f15-bf12-e40f9624d223,1d2b0210-a31b-4bce-92b2-06a6058e99b2,17da647e-7ab4-4d38-8654-6e61aa78734e,729a9ab3-c1e3-4b3e-82b6-129704775d5b,a1fb4ea9-843a-4fd9-829f-8bbd4fa14aec,ef3a7231-76ff-4f2c-b7db-b636009839d4,0908893c-8d29-4d25-a6c9-e1124feb6c26,8b75a9af-decc-4bb7-a774-9e355d128bff,6c0989cf-84b7-49e9-85b3-e65fcacc21db,6af9c303-babf-49da-b479-8c9275166af1,980e5550-674f-4f66-855d-2de1c132fb2b,8297887b-107a-4f5b-884e-6370aa8b9b68,db9c4430-0fc1-4945-84a0-336b9e595d97,87365e1a-cf68-4d97-8f27-63fbf6df26db,4cbb5452-6d42-4903-b4c3-af5032335df2,9cdf0657-aa56-4c08-a672-2e985b21d36e,71f1d3a5-0962-49fb-83ad-a39587a1cf39,40fcafd5-5f6b-4c5d-bd60-0fdb34094a47,93fdb16a-3bd8-44cf-bdf9-6db9d441491e,5fd7e363-cc0d-4504-bb0c-0c0faf281ed9,e7b366e8-091f-4cd7-9f88-d8c103dc1a41,ac79c810-ae14-4c6f-b36f-e247aa81f687,34acf013-7344-426d-9bf8-b7ac83453aaf,8a9b1bcf-c0c6-412e-af3a-097bd1a86bab,faa4a54c-d66e-41b6-adea-d7ab058323ee,3c29d078-ca16-4617-9514-1a64a942e6cc,ce853031-e49e-4bbe-a731-ba6ffed57950,a89752d7-491f-44ca-beb7-e52dd22de377,328c28c3-6702-47e4-abcc-d16fe3e39656,68e19f49-e308-4e01-8e0c-60e2c83b8e43,ca79635c-2a5e-41c9-a2e3-9f996359bb06,34e4c03a-1381-4b54-b63b-b95c22e20fad,71c62e56-0aa2-4fca-a5cd-9e811d3c96b7,f9952cff-a2bb-466a-919c-61d4b3245f92,617fbd3d-6406-44ca-a895-fddccbd0eace,15df9253-9e39-48bb-9aef-c26ab5689704,dd9c0fea-cc46-4a16-876e-70829e78a129,2907b37e-b37b-49f4-9103-de77c07816bb,b847b359-f936-4fd2-8494-5a2d1a29ad5b,cccb14ba-709f-47d2-8318-708022dc52da,62787a4a-53f9-4026-8005-6d9e5e9c5451,9812d522-6157-48f1-8945-fb3783ab3d17,f40d50f4-2bd0-4765-96ec-480bb7c6875f,9271141e-d663-4ee6-b7ea-8a55f0f2a7e1,f874d119-54e6-4929-a289-1a3564c1425b,7b122b59-8500-4a6a-8b6e-436998b20fc5,ac0efe9c-6033-45f3-aa1d-107767e68933,0f69f2e0-74c0-464c-85bd-f4bb83df8879,ce3ca6f7-9b54-45fc-b03d-55d4b24c1dcc,2d968b37-6913-419d-a911-e5e3d3926769,49b5f95e-e242-4b51-90a9-7b12a31425f3,09ed7bc7-9ff8-4fca-ac84-6e5f1f109343,7f9e8bac-8de9-4002-b88d-55ce816d59ac,ca26b89e-56a3-481e-a522-b9ddd6973272,f02c84dc-70bd-4c4f-b5dc-4018328e229e,ccc1a5ff-f3a4-4574-9282-6951154e803b,b1182d16-2861-42c4-a29c-2b02de8b3f31,5bff9a74-9cb6-4bed-a578-8d49d8c75537,ba113494-2bce-449e-969c-4cc43fa83dab,53c86790-60db-4906-a199-f417fe01818c,4f7f8f18-0cad-49ee-a9f7-51669777465e,dc745d0b-54ae-456d-8a1a-04a851518823,3e85b347-73e9-4b13-915e-8eb7bafef60c,7da71bae-5323-4bf9-b5f3-27edd19be0d0,25c7f50f-1ddb-46dd-b2e3-376364c73c64,8fa03188-bc02-4a34-9ae2-831b16238eb3,02f1ad32-6116-4e7d-9338-53ccb90a8e4c,5c8b08ba-6375-400b-a574-a9a6bff44605,8d11dc67-98f8-4b60-9d12-3accf21ce5f4,ac54c356-115c-4d10-9fd9-4cf7d1d714ca,412dbc14-f794-40a5-8ed2-4d838def2b0b,713ec106-8899-4ff2-b3ea-fab8dda43bff,ce3bdb6c-29f8-4be2-99b0-ac27aae35a47,e9827a5a-9f6d-4413-b75c-13ddafb10826,c6d01766-87cb-4359-8740-22b0cd57c089,3f5ac2e3-643e-41ab-bbc9-c984d76101e0,169f07d7-2072-4fe1-823e-6e3a9d18ffb6,afa5e963-32b0-4134-9dd3-9b7b1bdb0468,2dd997ed-0f06-4c2f-8db0-6ede2836ac27,a3a97ab8-d25d-472e-9dce-7adb55b5ce5f,3e3e1fde-be1c-4f6b-9c9b-e6547ab75111,19713683-95e9-47a2-a5b5-a2540978c329,05b5f7d7-8960-4130-8e13-dab66a38c5ed,6dc3cb99-b314-4515-92e8-f6bb3832e843,e6df44f4-5c97-42d0-90ae-0b4f89100d4e,b81d51d7-df3d-4bf8-b9e6-df22b80d6849,c1930bd2-fd67-4a78-9d22-cee849db0172,29187c6a-e266-4ce7-b87a-59f390116b30,14af88e7-ac80-4bf3-b1a0-c077be6e1971,00a7f31b-9000-4b84-bfe7-160407a8c624,07cd44e2-a8d9-4f3e-a5fb-ed59d08c6e7b,51a08886-f1d0-4956-9e3f-d6b0606a4d12,cc893355-acf7-4a55-90f7-cfe5223f8ea6,66ea6c67-c291-4aeb-a36e-bed5b72cdd61,b31ec8c7-2149-45db-87bf-bbaa1380097b,2be02981-d27e-40cc-ac50-259656fffa69,33489476-db0b-44c3-ae9f-d39d9a5d698d,02d63230-1148-466c-8378-95cd7e7a6e1f,ca2be3f5-9a1b-4e44-92f2-0254c29ad0da,dd00501e-8b98-4858-a942-9f025b6d478b,fb86ebac-f2b8-42f0-93e7-9192f8b1f562,89eb8ea3-740e-409a-abf8-53738520061e,0081d59a-ce16-417d-9287-830c19110600,46c9872b-a314-4cd0-9901-3080934c8516,73744808-49ce-403a-ba81-b25439acc0e8,86687b99-b74e-4272-98ed-bc8682389e6b,85092350-0b25-4175-83cc-cdfd6158d44c,982792e6-a740-4055-bc27-022556443cdf,430730d0-db0d-476c-9b22-62388e4c8823,2951b985-4e0f-45fc-b4ec-98e6bc1d7a5a,297af02d-02f8-4c6a-ae89-70b90323876d,a81129dd-8aef-4f1b-93ba-63d01abdecae,e212d30c-9cc5-475b-bfef-b0a5b8f800a6,530213b8-4bdf-44db-8491-ed90c4576043,e94beafa-4ba2-4f5d-b976-6aa94e1a4e6b,be70c7fd-66ec-4e25-9899-efd37e182cde,59e66889-8198-492b-b087-16a4d7284fcf,e5ce70d5-6a37-4114-a024-19d496733ade,ed0f2e84-c3a9-482c-9008-54ff636b73ba,3523d6e3-7e34-49fd-b159-8ab44537a8b8,7c3bd9b7-41a6-4976-bc25-7649693f6ce3,1694b413-764c-4d8c-b4d8-5ba7e3611e57,12fa6271-15c2-4810-a6d2-c0bf0c216b79,0305ec1a-3564-4277-b9f1-17fb0399538d,a795f4c5-7726-4a9b-b088-b2e77a675ab7,1d3a0634-7618-4f3c-ba4c-a50eb6b45787,5d420c91-39c2-4669-9fb6-ec20c2805fba,ccebeeb0-a62e-4a62-8701-040be8867139,525e65aa-ce8c-4e22-b6a6-01501784e448,fbf61595-febb-4d1c-8fa2-66a9562a2762,55ecb4a8-0c8c-49e0-9539-f6595acb1360,65a6a70f-1c79-436a-840e-1d3c452b9c55,7d68e352-98da-4998-bcc0-52e7894b0587,e57f8c6c-084c-408e-a5d6-c75a8ab0775d,e0ca30e0-441e-41ac-ade0-da83daf32c69,08fa6575-9f2c-4222-aeb9-d36a506736be,5ea1e708-4929-4cd6-8e06-7191cf0885b1,012805a6-437f-4902-b610-5d43de6fca49,27cc7caa-c5e2-4924-8086-c98c2ee1fd43,b8f9614d-a0f3-4285-af17-538565bbf67c,41776a84-f301-48f0-8e72-ffdff9318d80,26f019bb-f219-4e1b-b3fc-65aa72007503,0dca1ef8-c732-486b-a5e6-03b1603e8a82,bbe3725e-78ca-4bb8-8fee-dccf6b4eac05,74e84097-6e5a-49d4-91a1-338e85daa288,a4e32302-8417-40d8-b84d-35fbb21be97c,802bdb09-114e-4fed-9e44-ffe79407ed9a,f89e794d-e15c-4f02-8530-8c57075830ed,c1502f74-2c86-4def-a9d2-b461ac899a3d,af902588-4ccd-407c-ae47-157e561ec282,52a2a31d-5afd-47e3-84c0-6bd6bcfcbddc,4a9f254f-39fe-4142-b706-9350fcd35989,40706645-eb67-4bcb-8f84-d6b1f8fc577d,ed51677a-5ac8-453c-b79c-1e5bea493d06,12a4761f-a5d7-4e01-a46a-c12367bfffae,0d0814d7-0ef5-4b82-a5b5-752afead4dff,c533a917-9982-421b-9a80-539df0d5a968,8ff1a9ec-8c68-4742-857b-17aa997ca335,cba9899b-1e16-4530-b79d-b0fe6bfa65a4,9e45a7e9-10fa-461d-9588-5f06b5c09ad5,d7fb52ed-caa5-43cd-ab84-28178977a8d9,a4e9f548-a15b-45b1-89a7-66870e8ec3bf,a21b12fb-0c13-4ca8-9ce5-4d2495f8a17c,036500e1-e453-4881-821d-034dabda09d9,0b7ce7e1-5db4-43ca-9c2f-c2d59550798c,501d082c-002c-4edb-ac75-d0e556658d3f,ce4f8f01-0682-47b2-a1ed-25d7ee2644e6,82578dd7-e11b-44e4-a7d7-024ab629b5af,72f4d48d-5ee8-44e4-81e3-ee34cfd9faf7,b70fdc69-23d3-4644-88b2-2a0b2123a66d,e9043059-5f04-44fe-92d7-d37ea1a4ad5b,2cacc12e-a8ee-473a-8546-3a0967301bcd,371f1b88-1c9c-42d0-9bf0-404bfcda5e52,81327baf-ff31-4d06-ad09-c2f8f5ef7a5c,7ef46287-a201-4928-9090-6209b60c5cf1,16855fde-075b-40fc-b6d5-41fd2e0f5a07,7170ed81-babe-41bb-8990-cb9cdd09f27e,4ded3d59-2920-4ad0-b518-70e2c6eb552c,64a9591e-af7e-4204-85e1-a2d6e5f3f06b,85366857-5a14-45c8-bb3a-b47d8de31183,3b77966c-6db1-49a8-8ae3-5d6a47e77777,620c7951-eff8-4ecc-b361-1909edb7f7c3,1e65000b-6266-4f91-a7d6-5897f0a0fb73,56995465-5e14-45c5-893e-17afc1748b54,b8e38365-1c25-4e9f-96dd-f18aff397ed8,7e896c02-4e05-4d88-8889-edfd054db8ac,829b5a40-e5b9-4033-a98c-3b33d46f98a7,6b2d4133-a33d-40bb-9530-5812a6bdea60,cb30f4b1-f125-4712-b14e-afb993037d6f,7056acab-9261-4df7-bb96-b8be2f66f4ac,31153c7d-dd09-4d30-b72a-ad8213857034,686bf0c8-2ebe-4607-bec9-9a6dc4d3fb54,10479acc-0fef-42ba-b88f-5a9f4aa9af9b,027e493f-fe2e-4975-909a-1cd1d1514d14,4f8e75f2-9692-40e0-af14-ee222fa12d19,9a5f0253-9345-40a6-b617-2f0973de1bf5,3d9d26e2-e433-4e3c-a108-4815551a66fa,ce3097a1-506b-4990-badc-6ed034bea89c,76b13629-13f6-4cd0-be11-975edcb2a470,06d3192e-b3f1-477e-be96-0052b8099224,5c7738f3-1d3f-4f0a-983e-749311d164e3,826bd352-2e01-4019-8ecf-d27780c3cf9b,f3aad957-d143-46e5-9b35-90fe2750c1e2,f7fb169f-0778-42ec-bba1-357ed61a5520,d724af55-336c-450c-b151-376afcffdec2,94185105-1635-4606-ab02-ea1ca06d9dbb,e8f71f1b-f10b-4dfd-afcc-8b1613023ca5,7faa1b24-3a40-486d-9d08-a84ab1a22f36,9e03abc9-4e31-4e5f-ab02-40218192dbed,bdc4bad8-bbe2-4995-a36b-2928b6c09132,46a4f766-3a8d-4164-8c95-0c5ba9d49aa4,36b15213-496a-4640-bd0a-f3cb703f7428,00cc938f-d4d3-4d80-8b0e-431e69c57ef9,97bc5c34-9c6d-4c2f-a497-4082cfac2eb9,e46a71d2-9d8a-4b4d-ac6d-3c614cc8ebd0,acfa32c6-aa26-47c3-8cce-fa1f2cea8f20,3786a4d4-0ccf-42bc-b2ce-a0046d75800d,21a648d8-5af8-4a40-bdbd-2fb5c61389ea,c23cd859-750f-4d4d-8884-11666b0e9516,672f3b05-fb2e-4690-a313-a777e12b68b5,0dd3dd9b-21fc-4294-a54c-0ee7ab0dba48,9bb3e5ca-4ce7-4b1f-afd4-978a0aec37a0,7bef2f22-e43f-462a-8ea9-8f0dff2e40de,86e3d867-3901-4026-a1ce-c5a34dc018e1,462aaafc-6601-4602-a9c4-b2c934743edc,ff4374b0-e8c3-41fe-8bcf-9d639c211ec5,67d2f71b-fb06-4fc1-803e-8adcb346238c,868d0d14-d444-4758-a39e-44804cb605c0,ce08bfa6-4c2a-461c-94a0-2d3d1d2e7277,d4a5a78c-3d56-4494-9cee-e26764e46cf4,d7c453b9-4391-42c5-91ae-75a8a1e634a2,c42f5862-6af7-4647-a19f-a3fad20d93ca,69882606-16ee-4674-bd8e-164cd6e2f5de,e167a9c7-7e21-423a-aa79-ad54bfcfb66a,7bac9146-07a9-42d7-8db4-1a05deb25738,85f30975-2ee9-496d-92a9-a82948a38be2,e3f8d091-bd77-4090-8df9-5cd2cf2f43d7,adc00a03-1d72-48e1-98f4-81e82c5ac69c,38dfbb97-50e2-411a-96fa-d03945ee30dc,4059ee53-ad10-498a-9370-e5e5e2702284,267c4fb7-c82b-459f-96e6-482b723c8a8b,1691b47f-87be-426c-bbeb-136bfceab931,0b4dd425-fe88-44d2-b9c7-c1c591bdda88,1e67a583-c2e6-4c30-aa65-ecd751e629fb,41693a25-5cf7-4750-a548-586e9d351404,79296272-e9d5-4a0f-94a4-a19723ed992a,bd0cc5ce-1001-4957-98b6-955ddfe34246,2e8113a4-0887-4dcf-bcb1-548dbb205dc1,94d2c627-844e-4d17-a8d7-3c27c4bda866,a04b85a4-7d4f-4f1d-99be-8d558545b7f6,73ff6e8a-e356-4c4c-9c12-24c17059bc06,5d3166f6-c6e9-48a0-9727-4ec94ed09129,3aeb12e8-f8f9-4efb-a14f-f5cea335b361,4e538747-d30f-43b2-9dcd-514d062e1953,f47c45eb-4db3-425f-9bc9-f958476d7a93,7b09f49f-20af-461a-b253-06f679145a6f,c25372f7-5fce-4f1c-b803-c1349e2db945,7a89f4ce-bc37-4059-9e6d-99bfea36e561,7b71ec7b-b455-45ee-99d6-04018002f4b0,4ead9697-98f6-473b-ba04-9bbb0d84135f,65f2be1d-25ff-4a4f-ba86-f5d2be92e2d3,daf304a7-292b-4573-a9df-38736fd4fc41,cede43a6-501b-4551-98b9-d167d6a4dbf2,1952503e-039e-4c50-ad26-4923b9850615,ab88d752-1e0e-4261-9a75-4e20c08210e3,ea3e80c2-9022-4cf2-ab62-8d5d28e8bb31,f0b75ee1-d0ab-4352-aa1f-aba176d08473,a7ccba9a-fb61-476f-97a0-8d501fc0100e,15d94074-0939-4fd6-8f73-f098378126cc,90169bed-950f-456c-b8e3-03b3c8300956,6c1f26de-53f0-4c46-bc30-73721215befb,74af754d-f6fe-400c-9714-7ef2fc52f492,e791b3ce-f589-4f66-a615-acdd3523b362,b1a37fc5-3283-451b-8d73-f9357f5a4beb,3e847526-3066-4fc9-92d9-30b17d8305d4,8d81e129-988d-41b9-9d2b-de6595a5c3c2,e02cb250-fac2-469a-9e1e-4208eb911d55,1e288f08-e951-4f63-99e2-46386787d54b,63cd17fb-5140-4167-acfc-b8d965dddedc,6ff06cc2-e968-4d72-b9e4-02447afac0e6,dfa91bc7-cd16-4510-9a2c-8a3d0d4cfd64,3eddb5b8-7a22-46c1-bc24-f9da5253add7,373b3a8a-f6eb-44b6-a321-473acdba3d19,716ddf11-c4c8-4ab3-8d63-5809b5cad05f,3a020d89-82c8-4fb7-9c99-c97a5186e308,54bc2c84-04b8-45f2-a999-101342e65f9b,89e75ad5-b60e-4368-b60b-4028cf854e52,37368b23-1fbb-4b3c-9580-d9afcf9af66e,02931367-d6ed-4be2-bf2b-f23b0b2677c5,5cfa346c-721a-4bdf-bca4-c25b8b334abe,f525c89f-afcb-4036-ad83-63e2d8fa818f,7b063973-1d7d-448e-a318-b40f4392c30f,bf46e975-24ea-4e81-b8f8-b41b23929739,72757345-8a21-412e-83e8-4726011dc709,5210c998-b4be-493d-b240-ea27cc0f6bfc,209f6f31-d490-4a7c-a6e9-42291ae3842a,81b8d1df-3e5a-405a-babb-98397658f07f,602af4af-67e7-470f-bc68-fd53a911b61e,e1123b75-e90c-40d1-84d2-669f38741486,2e9a3e96-e73d-491e-be4d-ad8f184ff4e6,86ef54b9-60ac-40b8-8dd6-53c5084c86cb,25c2f465-80b7-4a16-952f-aa6defb5075f,2c2032db-4d83-4ea1-9b75-3a7974c254b3,87388e3d-0c2d-4d1b-82f1-92e2aae75a78,4c84e5fe-0d61-42f0-bdcf-3e9509a86b76,4552f78b-9870-49ca-a3fe-9fc044f1863f,02150da9-e8e0-47a8-b779-83177e9dc2c6,6477e8cc-348f-4d99-a180-e898dc000e66,8aaa0d72-5f4c-47f2-9582-a853e2e094a4,e9e69354-8731-4732-bff4-8ced263868c7,c849c156-68d5-433a-95ec-521cd2efe3c8,415fcfce-492b-42e7-9294-ed4b5192876e,2be0ce5e-574a-48c2-b4f8-6b79188afb5d,ee93ac75-2789-4585-908b-e8d4a934b672,ca773020-92a9-4b43-a1b1-eca30ce0eda0,33388302-ae50-4485-83b2-42edf3fe6042,657ec425-67c1-4f6c-8665-15678e8a83d3,ecee7731-46d0-4292-91aa-d47c40e9b8e2,647afbc7-9807-4f99-a92b-5deb16e4306c,e48ae307-a7d0-4cc1-a06a-ea45748bdb1c,3d965923-b8e0-44ff-86a7-7934e0f79631,5c81b78a-67b0-4aab-a1e2-a4b6e4ddc99d,91cafa7e-6930-4a7b-b02d-cf930f48b6b4,070dc4b4-e75f-4893-87dd-3787ae76568b,60c600ca-9f0e-44db-8a72-655b43ac79d9,859c94e1-5298-4648-83cd-b33aa62637a9,e832830b-c14d-4cfe-8ed1-00902410b631,e7d16216-27ea-44d6-9b88-e515d3b9db3b,d02487d8-3d7e-4c14-a279-a986ee2f3a8f,7b99d9f2-0093-4ceb-b406-48da10eb3395,1510c45c-df9e-4090-bb6a-57d65bdc184c,b1b68936-57cd-4238-bb54-e83a98e0dd84,24e43e4d-72df-45da-a832-72b88cdd3b4d,9e213ed4-a297-4efd-b4e6-def837e6a3b2,6d55bae9-8b10-4185-88b1-820c3cfc77d4,66557bf0-8de0-4d04-a859-b0a85b7194d3,dac9ae21-419b-4175-b7fd-8748a840f1b9,9efc44cd-1fba-4f1a-8708-007bfbec895f,d848c72b-0d6d-44e6-944d-8aaaf3350827,0501bd52-4874-46f3-be44-965005dffcdf,35b070ff-b16c-45dd-aea0-14f52a3fa22a,a21f9210-397b-47a8-85c2-f484a187474d,a403d30c-0e0c-4152-a42c-ade05191c7c9,44ca40ab-0e50-4af8-ae0a-2808dc38c6df,2b3ca1df-5d03-40b8-989f-41b64a6b975c,7d8e19fc-c5f7-42d1-8279-da7c5712a663,8c93c67e-54f8-4278-ac27-8165d86e44b6,4caf201c-3e1e-4416-8c0c-9d4c39a60d96,14b7392e-efec-4335-8a6f-4b67440c7acf,a6ead378-4080-4e3b-932c-98fa12fa3d6f,72355866-a143-4b6a-87af-7cac4c28209a,5854f106-8141-4e49-bc33-5318d56dedaf,f87efab2-9c0d-492e-ac66-e0ef5646ae0d,7a7f4599-e220-4b3c-a7bc-c2d75b269058,5a550dc5-6969-4e50-8fef-8c5ea4e69752,966e7a7f-f9d4-4e61-a22b-79ffc32512a0,77ded99f-ca66-4f2c-b9e9-03d6e668c5fb,cca90344-b770-4e2d-bfa4-fea123059d8e,88ae1b02-67de-477b-a643-8d2286886e1a,7726c525-0636-4d79-a0bc-9c786eef1e37,f9071725-f5b1-48d1-9c4b-af625e5720e1,ae51504f-394d-4b31-ae35-f673753a4252,a7cb39e2-6434-4f62-a6b7-dd543ac76a49,d1eba48f-4b09-473d-8d84-3128fb3dad52,30faa3e1-23c6-4210-9c7a-f8d9b0125f77,629aed0d-16cb-46cb-ae80-d486336e6bd9,ab28aaf0-0655-40bd-b9e3-fe037bd5952d,b6cf2f12-f584-4651-9c25-9f89cd7276b6,df68cfa0-2be7-46a5-a47e-948544bc7bf4,1e7870b3-3883-4564-9741-9f178bbb4da2,60ada6ca-9240-4b3f-80a1-dff8a7c06223,fe3c543e-21d3-4d61-b136-81b52b9065b3,7e6c6cd2-5872-4c1e-be6f-e1025db733f3,df5cf4c2-9ade-4c8a-a8c8-f00b1e77d0d6,c86bdf15-a613-43a4-bb8a-e412b5d93758,2e4c4a50-3ea5-48d5-a96d-79e279893426,2d8284bf-29e2-494e-bdc4-b39c27228d3f,6c7c8140-5584-4722-8671-f30c5126394d,3d281c77-71c2-4a7a-92df-0da4b13029f5,171af8fa-0ba0-4b4d-8712-f353fe98df21,26306f5f-2fd7-4da4-8ded-ebfad5a7a509,f639f1c7-1095-4499-99a0-e51f027ae88e,124f1774-44cf-4397-b3e5-68952965bdec,4893c081-2d26-4a45-9e36-140335442683,7006854f-0991-4882-9ceb-c369b4786189,1c20de1b-b506-4328-a044-c1466b5e6080,a1dfb460-cd34-4238-97dd-721c379f7b5b,b1254ec6-ba3a-4187-b462-5e7912966507,9e94d918-ee03-48a8-a9bc-df9e3cad4495,22b48cac-f879-4e7d-b3a1-28cca8d75bb5,069d8a39-e29c-4631-84ca-84150f458af0,cd40fe75-4223-4935-a74d-1e5025860a9a,33e08bce-e3ea-4aa7-8fb9-795aee4724a4,2f29dd47-6640-499a-aaa8-35ef1fb3e309,0d09acd5-bd78-4012-987d-4887d33e197d,2b24758a-1367-4edc-8e68-3a4e7d0f27f3,68331db4-6d1b-4846-8da9-fd323d51ac71,02b242e7-9e3d-45c1-a989-b7a2401f792b,7f97341e-b041-4141-8807-db1dc7c94ca7,0e283183-6359-4452-a0ce-74b4a9bc0e47,a699eddd-5ea8-403e-bc70-3410d029d97b,58115ef7-6067-48c6-8a68-8391203f289b,37967721-b196-4754-a6bd-3e55a0b30c3a,05df53d9-e97c-4599-a668-f6b5e879a0b3,92de1a53-5815-4b01-8d02-9e9a921c17ef,b83fdb44-1885-4597-bda5-8885eca8f8bf,8697a3ff-2a5d-4664-8bc2-b79809fbe957,073550f9-a4d1-441e-a63a-032e9ab8a4cd,f8ee8d4c-da39-49c0-bb33-d5185834c867,e524a2ef-7f1f-44a5-a4bc-89f0b1f195de,64c89449-3838-4bf7-9513-0017b85f4486,02107dcb-0a54-4cc6-ba20-5ba03852f4e9,5d791066-a7c5-4e71-a9e7-481e5b1436c8,84a2edc1-4e86-4cd8-ba3a-59c5ba985dda,c76f47a1-0598-4a45-883b-cb09728ea928,4cae9078-f04e-45f7-abcd-513a0ef33f3c,5f0c8f21-6c62-47de-8a03-36e0f81af8d7,d1101a15-2cee-461d-b769-64a94dc32efe,3d1ff884-87b6-4683-b88b-c0f66afdff89,12b580f7-088c-4090-a93e-a94b5febfd9f,6c833b47-ebb0-4eb8-91fa-c2104a78506a,0e9896f5-e1d6-4e56-a430-97d8225adac7,2463472f-7bbf-4367-bab2-b30c5d3126e3,1b48c59d-da7a-45b5-a6b4-cb23292b34d9,0e0e53b0-abe7-458d-8d3d-30038701f152,5553b3bf-e5ff-41ec-832a-5e76caad1602,fc55c293-acae-4ee5-9300-617ba59e5134,741a11ca-c275-472d-a1a0-41c7bbc528b5,8137eea3-d910-4891-9c5d-d421404a1e0d,a430d6b2-d6ec-453e-a8f7-9e72acbe8f88,45e4264c-ecd0-4da4-b08c-7be74f75ae3b,20f2391d-0ea4-45a9-941f-1f0bf4b04365,95d35495-4ce1-4156-af6a-868c9df294bb,8a2c3122-9efa-422f-b5f4-33f410a9f553,df010a16-9ef1-4b0c-9211-32f1cde7fb99,f890e54b-07de-47ae-91f8-8e014ce63366,d9e6f27c-45d1-4d26-bc12-c543f7faa879,b05b3138-6b34-4729-83a9-73b9de03a5ba,c0fffc5a-06ff-4a6c-9123-f214f960a976,24d58bde-ec70-463e-8b02-50ec321866c9,1fe8d5b6-4fec-443e-ac70-8fc9cd01e73e,ba522855-6db0-4fcd-9dfd-324b07975c5e,42a4ba05-1a8c-449a-be7f-a38a25c37f22,63bec734-77a6-4a20-a0d1-4c734fd99b79,05f999d4-a311-40e9-af24-b23830b15340,fb8702e6-a9ad-4077-984e-fc92964cf7e6,d367496b-9641-4e7b-99c9-50e19b10e004,3224cbea-1a7e-4dfa-b8b4-474cfb32b2f5,fea036f6-f73d-46d8-ab17-659b8d5d0797,dce5499f-0cbf-4b5a-888b-8de5db796cbe,75aba260-a468-4799-bd26-ce175b92ce6f,5ddb425f-7990-4896-8356-f87dd71a00c8,b4238c61-d28e-400a-9513-5a336b0a292a,72013314-9625-4628-8e2f-ae88dd5c65e3,3e8ede77-bffb-4c9b-8b16-f164ecca2446,43738cda-188e-49bf-b3eb-ec6cc8ccfd91,dabda16a-5e97-4fa1-8fc5-1f9b33643835,f7e34792-814e-4e84-ac98-3d60dcbce79d,3d272e28-1f59-4a8d-910e-13fca101f650,4c0cfb30-23ea-4192-9b28-26410a8ab117,e5eb8779-94a5-42c4-822e-969596b6b977,acacbf25-7fca-4cd5-b964-b25220ad70b3,b0fc44b1-e6c9-4542-ab74-2e191e871d95,6473f755-a8ee-46ef-b2d9-4a9e32c384ab,42f5b849-caf7-47ae-9783-95b3a71cc815,4c1c6588-4cf9-4fca-9313-ccb1cf859b5d,27382f50-4443-4f53-b3ce-01ef12689b52,ec92a4eb-5257-4adb-aa44-116721436e11,865a9e55-e0d1-43b3-a570-ac6a3194a97e,cf48a611-867e-4342-b601-fa7f9e35ebca,50c836a3-cdc5-42e2-ad5c-f703b2837dcd,abfa691b-82aa-417f-bcb0-68e7be37483c,7e4b96b5-4dce-4015-95dc-7502ea975908,6bdd5daf-43e4-448c-b92b-96fadf280efd,ad5cb5d0-2bdf-4c93-836a-5168c197cc30,fd557a20-46c4-4260-8d42-9b5d0c86c19d,7a12a6d4-d8f6-4c08-a4e7-b3a510cf75cf,0f75f0cc-44a6-4d00-9c1c-7d1f805c3cf4,09cb7db6-b69a-408f-b750-2e65255a8b8e,9b9a2fb6-609a-4ffa-aecd-3f1526f74a7f,334dd35a-c2fc-401c-bbe9-b7d31c72db59,a18901df-d495-4b57-872a-27be580004ce,2ace75ed-afd0-4611-ba9b-b1d8eea17a54,b179627e-d96f-4183-a96e-4a47c0c5fc1c,42e59449-8e57-4991-9ef8-cc808b82b77e,bfd57938-7f6f-4dfe-a989-ffeb1c58a717,9a9fa73c-0252-4817-96ba-660ec5273b8e,a339ce3c-b1a4-4b10-911d-448e8e950f09,a8e91baf-e313-4e4c-98ee-48840452b724,bfe4c9d5-bb4a-4d4c-ab39-df387ae07859,5e97a879-3de4-4da5-9158-bd849ff0e01a,53d1b79b-58d4-4818-ba2c-4cc7410c8861,ed3d111c-1830-4e6a-a4a1-f04bd6ed5957,bde82cd4-9da4-4759-b346-dea1cff20f85,3715d440-88e4-4213-a71d-254981ed45a7,bd257871-b9c0-423e-8662-df29a508da23,3c0c52e9-58aa-4ab2-85d1-fafbb8734895,6853843d-81be-4495-981d-1ef2bea9c2e1,5873671f-c0d2-4904-95bc-9cdc145973d6,f146948e-ed79-4284-904f-eed4179b3669,ea0647e2-0117-4236-bc7e-88cf2b946c74,e8ff9240-6b74-4712-accf-1fb2ff367424,7a7078d7-ad37-4742-bddd-cb0d72b53655,d7bd3eec-7815-4de2-9189-aeb6577f1d85,cc6b1789-2a06-428b-9423-4e9cd34bd0fb,13f2e830-dc99-4271-b0f7-aa8df430025f,784d312d-2dfc-4faf-97a7-6acfe566c571,3193813d-6b3e-4163-b927-b8cf4c74f261,9b1d3ad1-b774-4365-911b-3b1b110e8ba6,ddc50e06-bfaa-4e79-8ee9-87c33793820a,8184cb5a-7d30-448a-8d73-a0700732133b,d1e93c11-a047-4fdb-b233-9260995e3b9d,40c04132-7717-4df0-91b5-84bea93ed051,589adec4-f1e3-49ca-8257-383dee32c161,35a2cd20-00bc-4b9f-9452-9212e5c5cbad,4edbb16b-f184-4aa6-bb15-f9032a921ba8,406c15d8-12f9-4e55-8ce6-51ee0f8af0ef,c0c6a19d-c2d3-40b0-858f-7879b2e141e7,373f07aa-2ab1-4426-9992-faa2d8b632c2,fb795899-8b81-4feb-9d23-28135c1ae4c2,da8ac2fd-9c3b-4139-8529-7f85be20f595,fb8e1bbc-bf17-4878-9975-1ec0dc0bc22c,d3b16eff-c3f9-4763-806f-6be0a802605f,a320deb1-0191-42f7-b350-59c90a0de360,d12b323c-c1b8-42b9-a39f-55f5739a934c,b9dd27e3-c4a0-4dca-bcd6-ccc66d015738,7be8d665-a319-483f-b7a0-02bebf983b6d,e29db249-1e93-408d-b6e0-cbb550a26de6,3919d5cc-12f1-4ecb-8aa4-3afc4727bcc2,d02d4e97-26c3-4570-8735-bbfe819c541f,e1c9129d-d754-4374-b054-3b79b57a27f7,446768b8-8fba-423e-8437-d9ce9aa3263f,01aec05a-0a6f-4362-89e3-813fa6167def,a0abac79-4bba-4bfd-8e00-b5cc23692fec,19b983de-7118-4162-9a33-0158abc1c54f,9ee6a85f-a1d1-496b-962c-9739b04f818f,2e392bd0-1282-49ee-bace-031d32175fd7,406ab228-6fa9-4e70-98c4-f3af50fa19b0,7d7ba24b-84a8-4f99-ac75-8ffdc42665a3,d246b0f2-254d-424f-9fe1-c4aced1af417,029154e4-86b4-46a8-a62e-2edf3b9737ad,1e328db8-36a0-4d6b-888f-931ed0ed50eb,410f13c3-e3b1-42b8-bb8b-44423b832043,fb647e38-c3d3-43a5-905d-bc2cbb7ab24e,f4d73b9f-08df-4464-be76-3e2f45a6bcd4,c498def3-e59d-4642-9b5b-8a0f21cf71a9,0ff7d4a8-c0dc-4fa1-b4ba-9c839473c960,eda08d5f-170c-4ac8-ab75-f451f0fe2000,9a4bb56c-75d4-4556-a2b3-a4b652b25e29,57cd0e9f-bbd4-47b7-9b03-369fbf3a7828,e5760a75-ed59-4cf7-b78c-9324a38abf71,7e314d6e-cd1a-4f16-9f3a-ad0556c86401,3fcb487f-5d4c-43fa-a7c1-d7f799e7ce40,2a5a06ab-879e-4848-824f-faacdaf1dd82,152f74c6-b967-410c-bfab-c08c0d793dff,8c5de175-83b0-44ec-996c-7ee414a8478b,0e672399-5ff6-4146-843b-fe72f28173e5,658ee575-bc9f-4849-8fa2-d2c0007770d4,8d1b712c-cb3c-40be-b7f6-64de2702d610,332e428d-0850-4cd0-8f80-51fef6b4a912,2d0ae8c6-a9af-4148-8062-36adc7c379fb,5279f6cb-6499-4cc9-9881-5207363f7655,4aa193af-8845-4625-9b72-6add6c2ed6af,11415552-e3fd-4793-9a33-e1185e0aa4d7,476c7169-bfcf-4bba-9e63-874d63e97c7d,dc044b42-dbea-4746-b925-c353dbdd0863,214cabe2-bf7e-4010-b725-7320e44b6fc3,9fc51c8d-5dbc-4635-97be-c885226b2ac4,a9ae22e7-c658-47c2-8cd1-f15e2f8ad21e,bc94969d-3698-4b15-8d0e-525dae684cbe,c2d08656-a57b-485f-b9d8-f69f734d4b0d,d4596619-3a8f-4a31-a2e0-7ed0b82aae20,e229ef32-3b4b-4388-b78a-780f8d7aa163,2aa53556-91d1-4a7f-b9df-adaa08568280,96e5ffc0-c898-406b-b95b-11e118d134ed,db1ef6f3-cf3e-41eb-88c5-248f2f0f4031,4aa3fadc-00df-4d2e-ac59-306d09985ab2,2cba874d-3a13-4222-b288-87504aa1da3e,844b3e8e-97d3-423f-b6fa-92f38813c9c4,f02134e0-94a3-45b0-884f-36d1d5777571,926817db-327a-449a-9b02-32c18d00e827,2cf4eb5f-05b5-4364-a045-0188a05d714c,361a4a1b-aa1c-40c5-9087-33be242f874f,2169fedf-900a-4232-bed7-1d7053b25b36,b38676d4-0f64-4ec7-9608-4796f3c46f50,352d9e91-ca00-48a4-89b2-fb287c025b32,7c978d8d-a864-436e-b5b6-82f29c9bdfcc,6642e5e5-c2db-4c32-8523-ec252e4a01e8,0e0163dd-7f1a-498a-a43a-d4f9a1bb6a7b,d7cbad7b-b835-47e4-a550-36e24b6b4edd,99b6c935-97b4-4b57-adc6-0e1c0a4b8669,c8ada53b-a172-4c75-b9bb-d5071886ce31,0b4ed589-9b86-4e92-a133-f90653f43507,ed45acf2-cf96-481d-93c4-8804d8091ca5,07ef164f-69da-44e9-8105-e39e6939650a,7d424af3-c2c3-4c79-83b7-e5e62c08442c,7a63bfe7-55b2-4e51-9342-f04e6e8a50cf,ed858b90-7a6a-45ac-a58a-3db8b5f5e6d5,4edf4810-92a6-44e5-a8ca-f83c44151a09,4b7827dd-6148-4693-8c6d-e75cca1370d9,5569ddb2-7a28-4a38-86a4-809fd121e833,6f56eaed-2661-4e9d-8771-dd9f6fd3d485,e5cce6b8-d324-44a5-8604-e52854044b4b,8ad09b3b-cb16-461f-8370-5e10b7744a52,9225727e-dde0-4dc6-97f5-f561b3edfa55,13b1e5b2-5973-4b86-a89c-f902ebc9d576,795cb71e-4011-44b8-ba01-7527166836a0,932a9e30-9f3a-45cb-8802-8973b1ab69ef,238e302a-04f8-469a-ae84-0806ad43ab8b,0234376e-c7e0-4cb1-9668-1802223d6ff6,ac290c80-16df-40a0-b2a5-e502b3722e09,5729a812-e08f-4dbb-a1c3-f77a1116876b,56653820-5872-4ac2-b369-9d1b28a8b71e,6b06876c-3e8e-4d36-bf7d-7b982395be67,488559c1-da31-49e1-b9cf-41fb2924f71d,3e854855-d69d-4b99-9a95-fce706993b19,aa53ea55-5705-4b98-91ad-b76cbb195f69,e6213d8b-5e5b-42d5-931a-239646ec2d5a,c7d5589c-5e51-4782-b42b-1ae24173022b,6774c5c4-9ce7-4f8e-ba36-db64cec099b2,c0cba529-b01b-4aba-8cc7-fe3dfa20de24,7098f53b-1230-4c32-a07e-86fe31ce9d16,696ec537-4149-4e67-9a11-281984b49ab9,6d11759c-59f5-4d08-97ce-bb7a56a0bcc1,a76c0301-3245-45e9-acc8-a59a728543c9,8247f162-f67d-43e1-9787-1f5973733b3a,604c64e9-32ac-4e90-95fc-36dfda0bfba6,08f109c3-f851-4eeb-b83a-c1c24df7d99f,831a9660-6ae3-4643-8213-fdfca5d5f9dc,b59559b0-4511-4720-8db2-fb2e49ffcc89,41fe88ca-a98b-4796-82ac-a7abaa313974,5b79c8c1-9297-478b-be1e-bc34b0cec77f,78997de5-bed9-469d-81c9-9fc7b425949d,400f0c26-39af-4023-9881-0cd9d908e5fc,51510fc2-af95-435d-b204-5ae4eddd3ffc,cfd87b9c-87e7-4af7-87df-1ce83a3a66bb,0678a54c-8cd5-42d4-91cf-d507d530b937,85525711-8d2f-435a-b70e-8d172ad39478,1c8ddd9c-df17-4a7c-b4d7-f29dcaaebdd4,d8b44e47-c423-407d-acc8-c90956478119,b6db1832-b049-4538-92ef-29a0020d0b8f,e57dd127-d065-472b-b2d6-ad27eba187b8,aa369e69-bde9-4517-976e-e28aaf52f5e5,c05a34de-9aa6-4298-91ef-8227c9d1119a,74d97da4-ab55-486e-b724-58d495546c25,5c667d5c-9f44-45d5-83af-eaa9440fbd8a,f7dba51d-0db1-407b-a68c-0874fd4f4480,b1817b30-e576-4626-bfc0-059ae5a2ac35,9f89ef61-5f05-480b-a209-d1228e778be8,b2ceca2f-4583-4a41-a3fe-9ad7af2936ba,bad093b3-aec0-4e3f-acb1-fb3b7de4ef81,202d68a4-6b55-46dc-915c-5e98b1efcfab,25a61c0b-7fdf-4539-bc27-1bfed664f396,838e593b-3bfc-4986-8bdc-f6f12620e217,f3437dc9-75c1-4173-a9d5-8c1d35af79e7,66b25f4a-5fc1-49e1-a902-bf52339544b0,9e79a18a-0174-4bf0-9591-4eb9ff0e24da,fb7e81ce-58e3-4cdd-a298-7366c06e6c02,f3f5f44a-af06-48f8-b7b5-40a5900300ba,199193eb-928f-42b0-a746-a5fb2bcc7111,61a08af7-51f7-44e5-a337-7d3b635b5807,6d77dd2b-2a81-483a-9fb6-20d10bba4887,db9df0f6-a908-43a1-b029-cb26d5475cb5,e09984ea-00ae-47ea-9c14-650a347df3d7,96662255-315a-4988-9dc0-78ad5d03054f,9e7730a7-b33a-4b04-9219-28e27e516c55,b3911928-16cc-465d-a5cd-915ad366a7ab,68bad2a4-45ec-4c4f-b147-cfe74a5c5b02,ef4a8070-b5d5-40ec-8178-d0cb0677616c,e4c4fe87-08e6-4a4c-a400-54dfc9e45406,7f0561e8-273d-49dd-9966-3ee228488c42,a47d4b1e-4336-4806-9ab3-38038b945428,48579c9a-4f32-41ae-8a30-32789be57d64,b95bfa8c-39eb-4113-93a1-4788f38bb1d3,bac2e694-a77d-4f63-be4f-d2c439734161,8433a3f3-9808-4cdf-a427-c6c563a60899,041d1808-4fce-4cef-bf47-ed4f5034310e,73f3c48f-e5e0-4216-adaa-41e58b80050f,86933292-02bc-4383-a464-844c2636ba17,ca259888-eced-4af8-915a-6649dd1e1ab0,3083d6d0-3835-438d-94fd-355887966ffe,57f0cf40-437d-4a18-9d08-b032fd3bdc14,2df359f3-ae8a-4384-bb6d-d76192ef1ae4,e016c55a-9b0c-4ad8-a723-c39aa541a188,df6d5e78-6aca-48aa-80ee-7efe4923302c,a5a44402-6622-432f-8bdd-b07e49135bde,80ecfce8-6207-4c41-a481-852025f1b667,72874ea0-a61a-4641-9c0c-b581bfda378f,b1d48050-fd7e-4e7e-905e-15f0fd51815c,361a0cec-00f7-4822-accf-c146a31f7dbe,c6fb34fe-4cc3-422d-b200-7dfa63b47b55,bb3773ea-14f8-4104-b027-bc8c0f863dd6,f6c61071-d495-4b9b-82a2-947f2314a832,5464c134-d51a-4c27-8c79-63c6d40a1735,668381e2-1168-4f6f-a295-793264c880ce,975a5142-fbad-4498-8e01-b3c0e3c94e46,9845bdcc-5aef-406f-b809-3b7135b6e853,81f397ed-ae18-4d11-a3a1-85ebac1fc20f,b2fdcedd-9b87-471d-9852-e68e6c21e9ff,dd5b195f-8916-41e0-8aaf-8f6e327f105d,56150f98-e8df-4210-b995-f2961110edde,f17bbdb2-f5a9-4816-8f3f-4ff3241b2bd7,0bf93aec-34d3-46e7-8134-a0e31a6d88ec,b23c4059-1562-4891-8354-cb3e498943d2,e9757021-6644-48f5-95f5-6ea11d58d6df,66874c22-0233-4e16-bb3a-bcbf5db5be4a,cb59cb93-0483-43f7-a418-9c9e2f5591e9,3e32837c-cf8a-4cc3-9b25-ae8c2a5aaf15,ebb05954-dc2d-41c0-bc39-22d0325f0a2d,a71f071c-402e-4175-9948-0cf8b2a489f3,ce852907-a943-4fae-a761-d309758c138c,440a15c7-a31e-4bb8-82c9-43307d0f6bc1,b34b1a0b-183e-4f3f-8002-b7357e986d19,d71e4e89-4c13-4720-ae97-e654a6928bc1,217a508e-9a34-48e5-ae65-553c8ef45009,79ea7fa2-0e6e-45ee-aec3-8186334a9b4c,f34f2a35-3f6b-4d52-bbb4-acba1f5a2a68,b15c6d8b-4fcc-4170-a26f-78888f1b6ae7,41eb6107-3359-4faa-a5b1-8c222954be75,d5218fbd-680e-45b8-9cbb-042eaf7448db,58cda88a-d77d-4083-a807-7997ee6824b3,2198ff06-76c0-4c28-97be-f728f0a755bf,0127481b-8b8a-43a7-a953-fb755bbcd155,def7fef3-f27e-4fab-9410-88c40d9fb984,35105754-d3bf-4daa-9b6f-b2f970d7e69d,f111c8fa-6ed1-42c1-84ba-ccfb4fe43e15,f68e8155-7a26-4bc8-b7b3-18b5cc9267d8,4e4d6ea1-6d79-45d7-b5fa-c6e22599406d,c800aac5-8223-4228-b1d4-3958ed8f6193,1ed87996-a094-40ad-ac27-3772de0126eb,d3455a19-01e3-40f1-9b2d-5ca3a1ed0f27,85d26804-1d77-425a-8255-1bb2e698f148,52727569-bc7a-415f-ba61-b17e2f6b42f7,3115d7cf-2d8c-4edc-ae6a-d533c80eb067,a99e3f76-828a-44a8-a94a-7ac9a88a9ae3,e8704df8-74ae-433c-8e7e-3202bea25e0a,257b3e79-19bd-4cd4-8216-3a522dc01129,3b44fd04-fc89-49bf-89ea-372dedf13e0e,7488cc30-6533-4b1d-896b-1fc3ffe3893f,c183e59c-9d74-49e6-b77e-75f7b14573bf,1000286b-f73f-4355-8f70-a331ede6db59,b56dd240-e9a5-4e92-99ee-3caab5c3eeda,874bf215-c2f3-4aac-abea-ada1e75e2346,41660436-a304-4545-920f-5aa9e8fbae39,74c3ce87-ce84-4450-912d-0a9c2e587f2a,b489cf29-27a3-40c9-8d79-a0d102cfc6f3,4b04b0e0-2d01-4aec-9489-a374c9cb3658,edaf57da-1220-431e-bae7-801a2c5246cf,e4346865-08a5-4ec7-9ad6-e90f63bae646,69672d39-9d96-4c34-ba00-30aff1b31bce,22d39f80-8080-482c-88ad-44897f5c0617,4d68d9b2-b693-4b21-bbd0-fec7e79d46af,0148950f-187e-4b19-88a8-dd1b8414f930,0c7ba56c-46fb-439c-a3d9-c00e4f46c86f,a2ea002b-8cf9-4148-81ee-99e1cb482f56,eb081eb3-7bc0-4a3c-8587-55b12d3a26d3,9af37292-ef01-4c7d-9dc7-1b4ca5a00bce,7503327d-38c3-430d-a1db-c06ef58d44f1,65d2e7f7-dbc1-45dc-a519-1885b9a20c6b,cc293cf4-8bbc-4d73-a972-c4f1a74c8793,7427d2fc-2ee7-4f88-a8e7-eb3b4e5ae0a8,1cba196e-83cb-41a9-b51b-db4717915657,30de6ad7-4e0b-4d0c-b7d3-e20292327015,ddae315e-401f-465f-af81-c655fdbfc84e,489de543-9d92-4ad0-90fa-35cb02e2b8b4,a6708a10-5bf4-4cad-80b8-872e2897b1f8,8ebadb84-6edd-4974-99e4-0431185527fb,f346b363-67d9-4e33-a646-58ecf7c8dc67,11c35031-e1c8-47db-b9f5-d340e77de1db,3a267018-7f16-4e72-a3d3-4284e159052a,0cabe146-84dc-4f45-9d46-20b494e363d2,84bbc0ee-6540-4359-a23a-afee78f66c0b,e64b7bfb-b860-4f5f-bee9-2924b228b85a,64b7e706-1f6f-453b-a601-e2d9cc7134f4,c9bdcce0-63a3-4a13-8360-de29d517c1ba,06b202ba-b94b-4d97-940d-2263ccd78fdf,12bfa6c6-86bd-4584-b55b-2ec95aaff810,2bdea1ec-42d6-4974-b419-132b81d04e1c,14aa1d4d-1510-498d-bc51-204eea04f1c3,fe73ac8c-f1ed-4064-9cc4-9b41c6512d7d,7e6b7937-14dd-4dd7-85d4-838e803ed260,eadd55db-4b33-4e7d-9e02-81ccbde9a114,68e7733c-d06e-409c-8a1c-522f2cd64443,8207bcc4-c8f0-452d-8f06-81657fa52842,34ad142e-8e97-4880-ac57-6ef6d76c3da2,172a48ba-eb02-4af8-a817-8c3de7417f0d,01e5b795-019a-4f78-a977-1aa424c6d16c,bb6a1059-1787-417f-a2e9-4a2698399633,0c20a0c7-de5f-4834-8803-955c9b63e821,ab6ad45a-2419-4a38-9ca6-db25d961163a,ca6014a2-be50-4c0a-a36f-dbe32eb23ec5,2865699b-4e4c-459d-9d2a-be12b46280d3,907e0be0-a8c8-458a-9941-f7b5dbaf96b9,0c4db28a-43eb-4fdc-a5a4-55446831a8e8,1a932acd-790f-4fac-9f33-c5062bd8605b,9df54c03-17fe-44d8-92fb-d1288ea3f66f,0c5cb160-a2f5-4a26-bc41-9d8ebf05f04a,4e98a6f0-81cf-4eef-addf-5bc77741debf,f8f8d27e-adc2-4d51-a131-c8cddbe08de3,ee79647e-f69a-4756-8f8b-707d84499320,0671dc9b-1971-4c95-bf26-429059dc0d68,83bc307f-dc5a-428a-8812-91f05d4d1a24,84ff23c6-9d85-4a7e-b543-835a4fdd5fd1,4f481f42-dcf4-4bcb-8592-962302bf7b8b,d3ce7405-28a1-4077-ab19-e7f75c930f9d,af4c90dd-9a19-48d5-8eff-79609e309e5d,f494ed60-15ff-4451-a7b8-4206019520dd,fcab8116-615a-4a2a-9368-001b8b5fa1e5,172c351a-b34b-4b72-9507-3aff5aa055df,86ac44e6-a08a-4fa4-b4bf-291172a5f472,3d36eb67-a130-428e-b45c-f0f84d1e6fdc,b1be6a1a-e907-4641-817f-6020afc0cdbe,687ef28c-8627-44db-a267-099f4ec04f05,3882e7e8-dad1-4541-a918-d6c5eef37a0c,c563b2f3-1807-4c74-a90a-a5813023ec92,3fffcd40-a195-4097-af83-e257b0af09f5,8d183e2f-eb0f-4507-99b5-106d07bfd5a3,c08c7bd3-bfd2-4e28-99bc-2a282f20bb1c,13169b45-99f2-4254-b5fd-0c02e4f79b6e,57e498fb-1fa6-4fa4-93cf-a542506fdd6d,ee219221-373b-4edb-9ce7-99cef2fa49c8,95da2c59-c214-415f-bb90-5126835a2817,2bea57e3-023a-480d-805f-e041376f6b0e,2863cd94-b7f4-4ce9-b2f5-b8a7765d9372,0be3007c-7d10-4a5d-8b86-805ca708ce9c,beb8f938-9e47-4819-8f8b-5b69a1f5b9b3,3b57dfea-1538-4a98-8771-6222f555c094,9e41e754-74bc-49e3-8323-217bd6c9111f,a595c882-f6b9-49e9-a3e0-2a7e9ac83fcb,104e981d-ba60-47ad-aa4f-dfaa2ed90db6,f85faa4f-c2eb-4f4d-8dd8-a872fcf66fdd,9c7c9495-cbc5-402c-ae79-3b23125ced22,341e9651-3516-4b89-a737-0c6fcd3251fb,d00851c5-655b-4967-8310-351c668a0940,f709b667-6a3c-40a1-8a4d-77a0cc538cd4,ec112fac-c446-4f02-ab80-e9f2bd604358,8317ea97-321c-4e00-a957-f664257dac80,a703d9f7-709f-4d2b-a745-e62bd38dddf5,f2ccb61b-6bec-4141-aa98-8099c49f0a11,b55bc9be-90a8-4544-94ad-ded805d6664e,c613ea52-449a-4ee4-9ed0-f824bd1d9e49,53dc6a0e-66ea-47ac-bdc9-9784f4d8e51f,3e4ce08f-bf17-4e27-b2b0-57c638cf98a3,060b2167-089d-47fc-9962-73d80b233c49,506374a5-e627-456b-b879-25ba35ba5662,7f93de85-16d0-4bc7-a4a3-44b7acd1212b,08bc42bc-e9f2-4546-8c57-18079752f96e,3f9597a5-2370-496e-9dec-c6c69ef77d34,20eebcf9-611e-471a-9282-09dfa8f3ddaa,76c535fb-541a-45fb-a7b2-4647df0b7d37,ed9d8c8b-f0cd-4441-89cd-7167514d31f5,9b295882-d7ff-47de-962e-9dca982e36fb,30e5bea8-c302-475e-a4ed-0a1f4f1bd6a3,d089c11a-a7fd-4fe9-83e2-71938beafbe8,8b978a45-ce3f-4dad-89c7-fa9c63cd02d7,138ae68f-96d6-457e-9b4a-7e62261cb756,4822b3ff-a4b7-4550-80cd-a9f67243f396,2053c054-0113-4763-894c-eae722d224ad,2f8892f5-fdbc-4eae-8451-aaf67364350e,45c6746f-d9fb-487c-bafc-77eeea5528d0,e1f756b0-e578-42df-9317-144f3f656151,c8b78bed-7f3f-4bf8-875a-1303b633738b,374c3a29-4832-429b-b760-12843d34fe36,be32d21c-778c-4152-8946-8351cab30a9d,97ae231b-43cf-4e4a-8690-354333d76978,bf041bd6-b0bc-42a5-a670-8bd237df3787,3380e4ee-127a-41a8-bbaf-fd6bddcc7863,00857c95-3f8f-4329-81b4-5c4d2c94c72d,46fdb04b-fc9d-406f-820e-6c4494a4c550,c7cec750-1479-4ac2-b2f7-63fa1b3ca687,24332b2f-9920-408a-9e2c-39f69d3f038d,8ba8adfc-e034-46ed-8958-5ad29fd00dc3,4a1090f2-ee40-4de0-b57b-29c17b24a1ec,fa2cfbcd-274f-4d89-8eb2-a033634428d8,9ea6eeb7-62cb-496e-bcdf-e342c4581460,7e4e478c-9ee9-43a6-af8b-c6e375204cd8,31483d75-cdd6-455e-b06b-496a83ce0ca7,5ad61fbc-253d-4675-894b-cdbc6723822a,cca60838-0b4c-4432-aae6-9a17d16f63d4,84424d88-50a0-444a-8f7e-22e04cf3da2c,a3c692f9-a528-42aa-8507-1e99299de00f,21fc62d4-90f1-41b6-90a4-abd6e62cf208,5026ac61-ca67-42ec-a45c-e2c0e0a945c4,3a711797-94b0-4472-bbf9-c3973a55b725,d4eab0f6-e1f7-443f-8904-17b1b062c966,73031ccf-b9fc-4e76-baea-b90d81b47775,af3aaa0f-1b20-492b-ac4b-ffc5273421d3,21897438-0838-4e68-bc59-e561593bfed2,b56c46d7-2390-443c-bbe2-338fad105f53,8f26c802-7395-4bfc-ba0b-e5a7afff98c6,53c12537-4881-4af7-9727-7bb4f39b19e0,f43f30ec-4a74-4a62-ad81-c762ed45494c,7a2f0e5b-e200-4ace-aa6b-50bd733b7d4c,cfa71422-cada-41c7-ac8f-ff95ddfcfedb,0d0b7c10-11d1-4a26-85be-64da3fe88a5f,58f4f0d5-1786-4bef-83b5-a32463b412bc,8ca69f9d-dd2c-4564-9521-36066208b28a,a27fc494-05d7-460b-925f-5639ab2672f6,041f04d5-3bb8-4b84-b04f-06cf27a8dff4,ed6c1865-d31f-47f4-a72a-2cba6d23e386,efdf90ab-3455-44d1-9ce6-aa81152c528f,41ee1d71-f1fd-4b89-8b45-053b38db17fb,c2c03c8f-5e19-43d9-8ce9-a1206ab0d728,a569cd5f-0229-4ccf-888d-b7301b9ed5a4,6db54ed6-4db0-441d-9867-a4027d12b613,562f72bf-8552-447e-96ec-a33eca5fcc52,e28c0093-3d2b-4683-ae6e-d8667dd1a6a4,53ded966-284b-4c85-83eb-5ac6f71b489f,b150a09a-826e-4523-85bb-7083e6d2b327,55a0c337-0162-4d42-81fb-7e37cfdbf54a,4011dc54-2d27-4495-a1d2-05f1e1e56f22,3ba2f622-be74-4f83-a19b-ceed0b7be329,7e355098-8653-4258-8cca-7737234aae6a,268c2cbb-ea9a-4dd2-a0df-ff824eab0606,ff6ff695-fd10-4597-ba9f-e156896243f5,e2344605-9b06-4e0c-bc0d-b7b9a23e892f,da760cec-7150-4046-a262-f15963d627da,8510a86f-ece4-4a8e-91e6-1c36666aa6eb,406aec20-956f-4544-baa4-7a23372550f2,884be4cc-5ae9-4744-b262-20953fe88c04,8211902d-f1ec-4839-a715-04fc5aa17ea0,206c350b-a2ba-42b6-afc5-e8986b79fe9e,4cd8c012-6bb9-499f-a994-ed1291c97e49,d07ca422-da95-446a-96ee-253024381276,f3622cf7-a5ca-44ca-b39a-698e515100d6,bc4e61ea-fcfe-4377-95c4-4d35a209e7cb,36b89474-cd7a-49d4-9ba6-09687ba0272d,3414f7ec-18fd-46e5-b1f1-4acb2bbfb9db,0b2411ff-accb-489f-8f2d-ac676ec87eb0,8eeecd52-aeb9-47d0-a3b8-e3d90a0ebba0,5faf04eb-8f7b-4379-ab7a-1c58e3ee4400,6dc826fc-e686-492c-845e-a907442a0dae,ffb0cc30-f6d5-44ca-a600-5861df68fc6d,1145d77a-cb7b-4157-a349-70bbbd112cea,f8c29ed0-6bcd-4bd5-bfd2-71129a609ac6,69a5d523-e1db-436d-be41-8dadf453e07e,783f0d61-e7d7-4077-8690-8fd7b8ff40ad,3640822e-bdb0-4e89-bcb6-4ed68fd5ae70,1dbdb6d5-d445-4f5b-9f53-c194bdfc8de5,f8a4b6cc-ca27-43ce-ae2f-b7f1f14737f1,abf0acdc-665f-4f9e-b90a-a1a5cd19ed6b,c54f4e4b-56cc-4c13-9587-a057a58348dc,9501d9f6-3999-4725-bf4c-05d5c294d5b4,92aac7f3-3ecc-4be3-b61c-cf34cd0f9046,d45f6fea-3053-4894-93d6-64f3aded32bf,d08ced8b-53f1-4b40-8c7d-9452e04c68ac,6c6d56d9-4080-462c-990b-c4b2c71a0dee,98395960-534b-476e-ab47-981af1d5f8be,fb6c0026-11ac-429f-9530-3232b0ec2385,f5527a7e-1e63-417a-8a57-86d465bfc344,91e23ffe-867e-4168-b005-9914a891026c,8390e9ad-ed6b-4734-ba89-f4bcb61a5b13,abde6d14-8630-4ec9-840b-0f1f4292687e,52907db8-e1e2-4a17-aea0-824ba262d6eb,5bb12464-f779-47bc-97bd-2656fd013aba,5ec347b3-a664-477b-aeb5-8c9852ea8871,769b785c-ded3-4389-99a8-1782065b6f5b,31ddd916-a6d4-4899-9903-8127a2db236b,9fa21a2f-fbdc-415f-98b2-a60eefd435d4,476de522-1ab2-422a-81fb-a7d29a5ffbb4,a35833b6-225a-4b77-a04c-dda09f14f4bc,4c74e543-1048-4cf3-8660-2199ce39e91e,54319ac8-a6d6-4284-92c5-22590889dad8,3c845531-14ee-499e-af94-a81c03ec066f,38026882-2652-4f06-9733-edfdf1b3a7f5,07da13fd-5c0b-41bf-bfdf-fe2ff90a94f1,bf8e5f27-d0e6-401b-a958-a6a845642efc,db810e45-c757-4120-9a8a-54813be79710,be7c57f5-8654-41a0-ab39-3b03ba5b4f82,dfb78cd5-492e-4e26-afe7-af53143fda01,c9780464-f851-4d4f-8c76-8e80a57d69a4,12636f3c-653c-4e47-a05b-1bc5fd25c2b8,23cd3e9e-2d8c-4cab-882e-d0f6a71c9177,160da3b9-0540-4a3a-ba05-e197103ef3c2,a8e79884-5a0e-4721-b4fd-9d27654b925a,99719c80-d2c5-4fbb-b087-77d1a2bb57b5,42dffc69-32fc-4c48-a4f5-9a168c094c0a,bbf9695c-a91b-487e-b7ee-9525c1046064,52b1b5f4-705e-4fe1-a1fa-9ae6fb78154d,be7dab4d-2e72-4b43-8c8d-deb2bcabee44,75a756d4-9f92-437a-9c4f-5824884fbced,51a87483-8654-4def-996b-e8e06d6bc7fe,61bab545-fc11-4794-8b9b-efab194462aa,aded8616-dd7d-4bce-9c4f-ff690a1482d2,66e6dfa9-e110-4511-b7d6-2b4146254fba,6d99311a-20a4-4154-a9db-65a230ea4c92,64796514-e8e5-4c03-ae16-49c0b97bca77,a81d48bc-17f8-4341-9551-1d7f2843cb3e,152fd65c-c99c-4ebf-a0c5-e9f398caa121,0d9f3e0b-b2db-4e8c-bf06-10c89754bc63,9644acbf-d3ba-42b0-9a93-7d1341093786,2b4c5228-6196-4dcf-9fb4-3547ae70f87b,976861e3-d0eb-4060-aba3-c9843749e0b4,a1390467-f50c-4c6a-afc7-d9fc60de6890,315dcfab-77c7-4426-b2fa-ccd477f08269,5a31289d-8128-424f-b5ba-f2a5a209e18d,0862321f-6df6-412b-b90f-aed92be7a07f,ec1fffe0-a1aa-4c6b-9733-1bdaa395fff7,1c8f59aa-2c48-40b1-aacb-ac1cb50b0c06,9a506cf0-0183-419b-a36a-cf521e6260ee,741cfcf8-7748-49a5-baa8-575555da45d0,08292480-89ed-4183-b0f4-1c8dba303e51,e1eac7c2-4af3-48ef-bc4e-b586b8738c8d,85e52ff4-c719-4153-8bcd-d56e78bc5498,f6f9f6e6-0ec2-476d-969e-34b2c0129200,38900d70-7bda-430a-8eb2-6c825aefa5ae,41aa6919-0901-487b-a5cb-f51ab0df47ce,f4b69b62-7f4d-4350-b59e-79fc77c27c28,e8431bb1-971a-4c5c-8470-b99eafe9f622,f4a458fd-721e-4129-abb3-1b21f675923e,d3a7c198-e9a9-4708-9c8e-a4bad2d0a98b,8729f0c6-6d84-40d5-b7b6-115807faa2b7,b6133fc5-7331-4a6c-9eb6-f03f026d89fa,a097fca9-1954-4290-9942-c226efd42d61,3186f339-8390-45ec-bea3-637142047d68,16e131b9-87d2-41bf-b3dc-d564f22f45c4,33e3c76e-4c09-4278-8de6-5a879aa0c41d,8301dee1-a6eb-452e-8467-ef809f027978,610fef93-55e5-4e1a-a1a2-f7fe407f3ea6,f1dfc862-d79e-472e-b89e-927a31aad00d,cdd845d2-e84a-4836-aee8-443bf45ba87b,929b4081-4780-40ee-a1e6-9d050f9c5c98,d6c21123-501c-4f93-b1ea-2fcc833eda88,e5202c3a-f79f-4093-828b-ffd5905dd5a1,af96983b-ff8e-4325-8706-3dcd2c3e1d05,c3fc0a1e-570b-4d40-9e7e-c868412e2093,a3e7bfeb-2562-46f3-b2de-fd1fed1c2ac4,32881d0d-a370-4332-ab64-c91a232cd028,f5dbc369-4455-46ff-96e3-bf2cf8f252e5,e4d438d4-fd26-436e-a79b-429ff1475e60,b2651dca-7b6c-427e-b905-0b887a452869,c8832667-6c3d-4fb3-8779-a9c48b8251b6,bb4c1eea-3f3f-4649-bd7e-11df9287df2a,9f48c089-0090-4bd9-abf4-c2bb30c25a06,1e20c8d9-7856-4ff7-93ab-3ad533c89e86,22b49f35-527b-470c-9eff-5251e1bd0371,d618b17f-939f-427b-804e-63da69efeb93,affe88f1-bde6-4860-9925-54b43bae4686,03e79f48-e142-4f0e-a792-da647b1e87f0,a34f1420-2680-4146-a394-67a11ee92e99,aeba1089-bcf5-41c1-81fd-273d5aeb4610,031493eb-76f0-4532-9d16-d59979c11a65,28cb8914-0539-4618-8d11-ce0112db69d9,b8f2cf55-6003-49c6-9868-5283eece6e6a,9fe0b584-3d3d-41d0-8f0b-52c545a7558b,928e402d-7943-40d1-893f-a205a39535cd,5a9b959a-2965-4c3b-ad54-31cf3cfc9ae4,2e5fb01a-8eaa-4ce2-aeb1-1e91a777253b,ff2ae65d-df04-4f72-a2d5-6a7b6fc801ee,ec2d4a82-adf5-40fa-848f-3c997abd033c,3aadf976-4d87-43b1-b32b-03715070455e,f7dc0d79-cc5f-4cd3-8ed1-fbd4825629ce,d11be24d-bda3-42bd-8495-cac28740e0cf,f8159988-17e8-490c-adc2-e75a0ee63d8e,44c1f4dc-336d-4206-96d4-0955021d9c63,adf292ea-81b2-4f18-8eb9-a057fe0bf7b2,f18a8a69-e881-4615-9222-b87d613cb168,fe49ddbf-53df-48f8-8f1c-e46eee3fa968,58cc95c8-8a8f-47d0-a67f-a3f4c86252b7,c38a18f0-06bc-4ff1-95c1-2f1b187f435a,510ea7f7-cf42-4fe8-b18b-81c24447f920,6ae718f5-ba80-4135-bed3-8639051334a9,4419a079-7e34-4227-9feb-7b49a089ace6,58be9314-a78e-4b34-98d7-c0f2c7d2655a,51c0a77c-c50a-4178-996e-22a1a6a6c2d5,6fd15fc7-efc6-4706-a5ad-c33ad3edb52b,aa15a919-80a8-4828-a0ee-24ddb205865b,0ba101ad-e7dc-4019-b971-6225964462e0,667e51f6-eb8c-4226-8f7e-de2a5b8a32ad,d82e3555-2d60-4805-834a-4b627fab6206,573f0da7-e14e-473b-981d-f72142ad4ab6,d91ec375-faa4-4b7b-9ee8-b33f3d3729af,326911e7-1b6d-4717-bbbc-7c1259db0f33,d79bb354-3c0c-4610-8752-87df8cc0d1f7,724a33f3-aaad-47c0-8943-e9eb57d9707f,24965439-dc21-422d-8754-0273aa6dd10a,b65d7f8b-d8c0-4638-8b50-63e65fe6115f,2d860847-6f3b-4054-8ab8-65b297b73938,59124a64-6432-4b6e-8fd0-fc51fcaf6131,ab42bbe3-3f61-4ec0-b350-644d7a80e755,6d436085-93b1-405e-8f7d-d6bbfcc7f9de,15327f7a-ac6f-4a82-a51a-4eed276c975b,c1555ce2-d63b-453f-8597-5184ae51d8ec,4f2aa74f-a32d-46b8-b2e7-8f0f05abc94d,23a3a388-59ff-43f9-b9b6-d0712f96d2cc,117ab1fe-4fc3-4a0c-8868-fd003350d26e,24ca7fe2-e116-4714-9220-e61ba610b7d2,2a98888d-0d56-4e0b-89f5-5b3f75716cf6,7ccb6f6d-fb69-4711-982d-1e7e3cb67383,91e18da9-8b3a-40a7-aa80-1769f50d2f57,e9dc112d-291d-4f08-98af-5374bee76c3c,34792f59-43c2-4d55-8212-06dfc0db06fa,4780c44a-7d8e-47a1-bc49-d8e6133bcfa9,6e38a1b7-5ba2-4be6-810d-d737f7bf7002,333dcf42-5773-4bb5-9ce2-9083b2da1a12,30ff20ac-bee6-445c-bae3-c07ee94c7f03,2015e902-2959-46d1-be9e-be3a35c052d0,02a1fb6e-d901-4484-8821-b2897d95c84b,0704b8be-316f-445b-a3c0-e15eac1de63d,75fdea88-3dc0-40ce-9db0-3a0fb9fdd028,1ff3ccf0-099f-4819-ad50-63eb7b80a072,94c520fc-ee01-4bfe-9276-bd8e9c8fb057,0bebc64a-5fd4-415a-8b6b-4d56021e7329,af9c51dc-9a04-438f-85a7-edd979c53889,9f5a469a-c108-4f7b-a298-e4cc5ee38d26,a6d0c643-2c30-4170-8a94-19f360dd9510,ce67186a-db24-459b-87a2-8bb6e3203639,72da3687-6a81-4092-8565-dfd1d1c451fa,fb191799-9e68-4e49-8b20-bf226222e974,ceafbbee-74e7-4698-8d05-496d78aa596a,8ff7eecd-68de-42c3-8ceb-3818edbe5f5f,1cae1a2a-6e0d-4f4b-8c41-d67d0f855675,1baf48b7-b06f-4b1a-a141-19cf93590f03,22403e19-59a1-4b77-b493-79a1ab7456ab,e73c3a97-1aa4-4279-8eee-202de4717712,b5dd567b-7e2c-4448-963d-3e4da761047e,e646f48b-6df5-4ab6-bcde-a67106480c2d,c1c18a29-1b8e-49b6-9b6a-e0629a730f0b,6e447fc9-f284-44e8-8c16-0da0960147c1,aa82f6cd-c8f0-47fa-88b5-f6dead08deb3,8e47bd1a-5c23-4076-a756-b483be4bd32d,306c7e46-9caa-4387-ac4b-8f1e44ccc109,1d552d9a-91a8-4935-9320-7007120eb986,2bd7ea30-07e6-4b55-9c75-4d5cc1f10d97,4f71456d-e13e-488a-9b93-26df62b212bf,beaedb08-fdf3-4cf7-bdd8-39b6c5f8dde6,18924a07-7789-44f2-8692-cb094c39880f,d45d22e0-8d8c-46a0-bdea-f513bd2c363d,881f33be-07d7-4cfb-884f-da9a5e7dbdeb,352fd074-e2fa-4d05-8a23-9eda6759c3fe,829f63f3-d22b-44ce-b8cf-41ec2cedc8df,822bed28-d46d-4d55-bbcb-b3ab3af18716,b5d4c794-c281-43c8-b2bf-c34b7844cc9d,598961a5-27ed-4ad0-8582-62197e24b5b4,ad681e86-3447-4643-9697-86c3f200e5c7,46c132bd-7ace-44e6-b44a-ef1c1fc2f5a5,196adc3d-f799-42c0-b49d-452a460764da,8d707f1f-5758-461a-ab0f-f9e19b2c4c65,7b0cfdbd-b06e-4d19-b0ad-678dc742f7b9,a1358f75-7854-4828-a62d-ddd17c2ac1df,c846935a-10ce-4309-934b-be02f0878ec4,fe3e8a09-511e-49a6-98eb-a1cb134c7908,96e7f58c-2f97-4d02-a99e-26c4d03e9e0f,9e0bc3d9-e227-4819-8e62-56b27894e65b,b7ee95a6-f517-4b91-b7e2-fecaf713c25e,fb0eda1b-fbe7-4fa1-9593-0e4573ea70f1,733c6a56-2b07-4284-b435-623584613199,2f3f313b-3c21-4c48-87d9-592c0b1b4cc0,576294da-401c-43e0-8f77-0ba61021b20b,8d624daf-fb0f-4c2c-b81b-b592f8e7b092,921e577e-440f-47ad-961d-89fce3ad5633,3c7de956-3d7b-4bd4-b60e-b6661a460b69,88108dce-dcd6-4177-9487-e720bdc545e4,6a485f01-20dc-469b-92eb-9998ee5f0df3,e7b8355f-5e82-469c-aabd-58677fc26215,35fe4d43-cd7e-47a6-947f-bfe215b2ed55,7488da7c-9ff6-4c11-bfcc-aacceb8d43ff,e597b693-e8a4-4d43-b6d5-61a27614913c,93966be8-9cf5-4274-9af0-222fb558ec13,c420ae0f-7c31-410e-ac99-ec87bd0f0f49,23f69bb4-4715-498d-91bf-fcc33426d166,5050c0d5-091e-4b22-b662-6556b0482db4,b5bc844e-fcac-41e8-8545-09f50bda22ee,8d6d6193-6747-428b-8f58-6b1ed0bded2d,e9790843-d9ef-422a-80ae-fd65f511dd9f,d89338b0-945a-4e6a-8b2a-2927f9f0edbf,c052ebd4-3558-4c67-be88-5643a54e4013,56b6a6d9-7427-44d6-b179-c94514f187ce,e6fb51bf-efef-409e-920b-4347c789cceb,73eb59af-1e1e-40c2-a745-0e6f53fe47c0,6d63f4a0-fa87-4ce4-88a4-3d74179a9dc0,1063f3fd-c826-4b9b-99dc-1940c557b74b,678a939a-bf90-475d-94a6-3205fd52e442,4c02e494-5985-4a5f-9419-617223aa1e6b,bb7732fa-0dd8-4ecb-bcdb-f4dc2fcd0449,cf8b8589-74a0-4284-bcaf-e5c9ad9d8ee5,f266ef6e-0e76-423d-8f56-6319e24418c2,d3033f54-054e-4922-ac55-104be2094f99,97a3effb-9e1b-4e6e-801c-177d6ed4b8d2,c6a946c1-b51c-49f5-919a-88b2e82e3946,02157af5-cea0-4a37-a617-f0a6c458358b,7b32ef4d-182a-418b-b7c5-3386652fc688,0636db00-3b0c-4543-9eba-e5c8edb27c91,3a42b240-a56d-4280-aaf3-5f52709b1b59,32f0e83c-f0ad-45b2-8a6d-0a3d89f2dbbd,cb6101ec-eaa5-4f44-a4f3-cca7e9a89675,3d95fff3-5e9e-4a98-9456-f48a9794fe25,cee6da37-9b12-4bd6-b990-a1ec6f90fbec,49d7eb56-814f-455d-af97-17cc24fa00dc,2a6ad886-1586-453c-bc2d-61fd19603a0d,3551a05e-ac22-45ee-8bef-cd1892da1aba,7170e630-f090-42bb-a93c-c5945b6d9eec,81a90eb8-fab6-4fb2-b9f5-2f954a0b7810,7774f612-e305-40fd-816a-ce073f9be5e8,09722d10-a402-4c53-a5e8-dcc0bec6bc97,9e342dea-8f3c-49b7-a5ce-067b0923ec15,4fe2698f-e922-4a49-afb7-a56ec7103bd1,c17714bf-15c2-4ce6-a6a7-95a8809d480f,5cab2e62-c59a-41ef-9387-fb9212672324,ec1a6495-6751-44fd-8427-dd089bd26e68,725cbe41-e75a-422b-ba59-1d0a528bae0f,f36ff3d5-ace9-43f4-8555-d183cb5f1ca0,9683935e-8886-44d9-b4b0-37d70b5de6eb,62bb2434-7382-45bd-9c05-ee887d6fd879,b10e5362-544d-4b36-929d-7b49dee6b256,98d58bcc-2cc7-43e7-9789-b36aebf934bf,88744566-af88-41bd-b94b-397e63004602,04f88b4a-1b70-406f-8094-2250aed03abd,e10f0733-f713-4405-95a8-72a3e6264fc6,7c1a9b1a-e4b1-4a3c-be47-a1372166533c,861db9b9-ca0b-4563-862c-c23fb9298f92,1d3106d0-4e73-44da-b0bb-6581ea835259,56ef188e-cb0a-441c-ba19-635879bc87d1,e8086522-4c23-4983-9ed6-96cef2a69c27,6fe9c4aa-8d70-4ffc-b529-ad501bc0f9a8,35e3f422-0481-491c-9815-2d4536502eb5,d7269f14-b10e-4b18-80e6-b0252a879808,c81d642d-694c-4eec-a19e-432ef85c6b28,ed33a2ac-8c9b-44bc-ac9f-194838d4f5e4,f819c61e-ea46-43e5-950d-0b316fc588e7,db13ac57-27c8-441c-a787-2d6e1de3be36,f88398f0-df94-4996-9b5f-eb59f9d0fb16,42072982-e48b-4224-aaea-a84c9e535460,95152fed-5d15-48bc-8bcb-0ce305f17fe1,a001df18-ef92-4f6b-9568-11c1d83279d3,ae4ab7ba-f591-4cae-9501-1ce7f2d911e6,08f9f8f1-d463-4990-aa8c-a438cc587f42,afc85f31-451f-4621-8d4a-8a9e5e15aa2b,20de3e4c-03b5-4c0f-909b-b843850abc83,af612df7-1b08-4b98-8c4e-8a5c5fad2e68,94c11af0-3d0e-480a-bbb5-605e665faac3,38127dc3-dd87-4581-b5cb-9f90a58aa524,0e0a97c3-9d0b-4b9e-b289-225be53a79a8,46c9e4da-f751-4e57-9961-a6707710619a,ab66dce8-b1c4-4ddf-a359-0ad11095ff72,b8d3652d-4ee9-4be7-a7f7-03854d9c524f,9be61312-05fd-47a1-988f-d96f3fdc39cd,33198bc0-7371-4917-8188-f3513582ed3a,ffdc7a2a-08d9-4d52-8dee-e26f16ac8950,b047fa9a-a3c7-4d44-8761-750afe8d72d3,ebe4814f-0845-452d-a1ad-54c173835fa2,5f9a81c0-a540-49f5-b65c-d6908630b8a8,877c3d43-5ec2-4d76-8cc2-e8b4cc76943c,1abf9886-430a-4d27-9b3e-1899d9c4e701,0db36d5e-9f48-42b0-8dc9-2a0a7bd7c8b6,0cfcbdc6-59a0-4429-83ba-abb0d14ef379,47a33c65-8341-41a6-b724-1ba6bf93dddc,ba5c95f4-7a3a-4a95-a3ae-6cbe1a68aedc,9ce8ba5f-9e88-47a0-8c83-871dfa21ff2e,789bac04-2317-4d59-b8a1-e5a0e0db2c99,a830b1fb-3501-4451-a37c-bbc0fd74e5ac,6c10833e-a34a-4b8d-b561-c6f3183fcd36,ee4c1892-15d7-44d1-9475-1a2504ede3dd,7b25814b-e029-424e-bcc2-c7f222f2a5f0,00e5876f-3012-4f0c-901c-daabe6fcc004,9a5d1c6b-8e25-44a5-b3b9-8867318792b4,e81a177d-cd27-4156-a9c0-b0ae021a90e8,6d9586ef-462f-4b46-9b6f-6f3227c7c848,fdf49139-037b-4e03-8127-7b5fe84e25d4,1017287b-c929-4581-9b3d-9df22464f786,bed434bc-ba76-424a-b309-a4b487e162fc,f1504f7f-db7f-4f4d-bf68-7be422afffd3,90df5608-41dd-49db-9343-366ad746da24,7c4b1382-29ac-4e5f-8009-13bd524c0988,4c2e207e-3de6-4be9-8acd-f467dabc90c7,20667275-dfb7-4385-a691-94968fe7ebc9,2be29b57-eb5a-4cfc-994e-d3f99b769cb8,344d2c27-cccf-4b3b-971c-c7abb1cd296f,5f69d59c-fcc7-4f60-abb9-fe0e03614dc2,5917058d-29ff-4822-a799-7b91e459f391,b09d4c36-329f-44cf-8cbf-5191ff461c27,69e4972e-65a0-42fe-a9f5-39c7697fd11b,0da0923d-b06f-455e-adc8-b1a25fae8f2b,65f2ecbf-e8b3-424e-8c7e-34ce13845626,a9d2d0d2-1d6f-4c23-97db-7e7afb82065e,bc911e20-3907-4fab-a0c8-18ed25ec2f30,fda94caa-673d-4e9c-bb5b-5b80aefaf6df,58247e66-20b4-4598-9ca3-d29f42540476,9f8af3d6-4a97-4bbc-8f68-aed41c40e602,ba942bf3-9e2c-455b-9618-26cc96af3d60,d131d6d5-dcb6-4248-8104-96252b90cec3,8fceb309-c24c-4601-a2b5-23b880720b6a,732c3cdf-f3e4-420f-aed2-86f0ee482912,0b62c05f-bb2f-479b-aa4c-22e4267552c7,800000a4-5d33-4d45-84a4-6600cb3020b1,b5f1ff32-fa7c-4070-8d4a-d0051af5d4f4,42eb072a-e323-450e-86ee-228a40485e99,0bb43ab9-8255-4ad9-a568-b880bea618b1,eeff992c-0517-4185-b4fe-de73ed843be9,2f7db980-c055-4afe-8254-c088417fe0ef,82ef6217-f2c7-4139-a15e-085f91c2116e,b24b1989-7503-4421-a942-1ed8d784f1cb,174e4ead-85df-495b-a66c-10d2c38bc8bf,c17bfd8c-b06a-44bf-8300-173dfc3a19b8,a3bf732a-484c-4d38-a340-2e54c76d3af2,d9f0de9d-3c42-49a6-937f-1cd22d236d15,ff089b25-7deb-47f0-a1be-53e1e8960c9d,3e90aa56-a107-48c7-8588-3a33520a3b74,bd813b2a-085e-44d9-aac7-9bf9e1562b86,a044bc3f-225c-4367-962a-f8ef4aae5e74,c9835e47-a012-45db-82da-a52b3aaac83c,b2754ad4-22a4-4181-bbcb-319728c61d9a,656a88c0-ba29-445d-9463-20add2c1ca98,2981467c-ad01-427e-b128-d86faddc8c9d,c5941dea-b068-4a4b-943e-5f181ac56de0,eef6cc7f-af9b-4bfc-81b5-692504163574,9d196289-8343-4106-80cf-dddeefd71e23,4ea1ae21-75de-4c32-9539-73dc6bf7f870,53569a54-03a8-42cd-bcf2-16115c0a072e,281aefdf-a9f9-4693-b093-285698571bee,6b0796d9-bd94-49ab-8277-43558915e7ee,bf824300-b67c-43eb-9b61-0a5faa5926ba,d4dd3f7d-8378-4d54-9737-6ed04272529b,d5318bc2-3ffc-4397-9154-6194bedc0acf,a9d87021-2071-44d8-b9df-0a3a058eec55,0d8de49e-49d9-4b3c-9c78-4d20413d7020,61216931-dae2-4e37-b5fb-797c3411a67d,89451734-919b-40b1-b8de-51440f73d60a,4bf3215b-ab5c-4012-a2b0-b8818b0d4871,548142e8-d34f-4a9a-bdea-e2fcc6e520a8,0b11e599-4e85-43db-baa0-54fab96b377a,710c9059-33eb-4b64-aebe-ac16fd3090ec,4f48ef1f-dbc7-413b-ae95-6c59e9473e72,4f26f444-f81c-43e1-babd-7cc8d2dcdc24,8957db44-79bf-4432-a163-e3f99f4eb7a2,7835953a-5c77-43ee-a14e-1a673c560953,0e98ab9c-6a46-48fe-81ff-9fa6628265ff,3a3fdec1-e0a8-4f4f-84e1-f9972a28620c,1383dc06-eb39-486e-a9d7-62600ac603df,515016b0-960e-4770-b2dc-1e71c10d363d,fcb3f71b-a634-49d6-bc2a-4222f8a10687,3b368e66-4c1d-4fd2-8879-f0792fd40f73,452fe1f2-587c-4700-b1ac-baf60e2ad950,1d5a3745-99df-4cc1-bc9b-46b14f630d4f,9e510bcb-81cf-4104-9c60-b44fcf92514a,adcdb007-8232-468f-bb96-5769fccdb722,d27d5411-3c2d-4a6e-9db4-6efabb6a0726,1257fce6-c4aa-40ee-ae2e-781f652c0bc9,0e934acc-da5d-446e-aa22-e862119b7ba0,a31d69f2-4cae-4a94-a7ed-e702b3620f17,4a9e71eb-82c4-4bab-b954-19d716e9e527,1da95148-8fd7-48cd-870f-b8499c5b3ebb,b6db6d12-cc5d-470f-a981-0978e7aa6e5f,c7f2237f-613f-42b2-950a-ee4b3e4d541d,01e2f5c7-11e7-4a75-bcf7-82706b78807e,246b054b-6e1a-4776-9d28-c2a689371aa5,9a173c24-15cd-481d-a626-f59be884072e,29e6c38e-a4cd-4f14-945e-af1f28a861f7,58abfb2b-47f8-4590-b4f4-5da678892d3c,0a199371-f4dc-458f-9e65-938b08e5e65c,40e8411e-c9a9-4cec-9fb2-d9535472de23,43efc7cc-984e-4bcf-b0c3-91fcaa7583e0,31beef42-610d-4f98-8cf5-988d81af16d5,f89b4c03-4838-41d1-ac54-4f061916dd44,5c06c1e5-9370-49c0-aaf6-9a3f5f47b5ca,67fe0982-aab4-4463-8e1a-92f199e16e02,77993563-f5be-425d-bd1b-56d461a78fd7,3c8c71b7-085e-44d1-9aa5-de59f9a2fb11,b7fb1a87-877e-4d6b-b89d-6ff8dc0db12a,8b33ec5c-5ebb-4ee0-af1d-4ea3e9bca991,8acdfeb3-a651-4ef5-b223-9d3d817aba6f,dd4b4c55-bf4e-4732-a0a4-512624581792,42ae699c-e8b8-4369-889c-66857a542358,1370e61e-331e-4b33-b6e8-9a3cc472a286,246788cb-0b77-42e6-8fe7-333e0a0c59fe,e0e5e6e4-98c3-411a-b7ec-0ce24b2612d4,1fb1ece9-63f8-4c22-aa72-673cf2331f73,9b854594-9a3f-4ce7-90e8-ebc5b2ed9bff,2cab8483-9cb9-48ab-9645-43afa31e4ad2,90a6d541-fc06-41fb-a6cf-6eab00a69f52,3e889e3f-31cc-4678-a63f-1e10ae0c341b,6cff5032-6829-4344-8f71-7337debf868a,c4006aef-5246-421a-a772-d6603474db9a,490fa205-c85e-4bc9-82b9-0cfce4fe7ffd,7bb57544-c728-4b45-8c99-21d15828937b,aabfe222-6203-4089-abcf-e32ded0896a0,c0451fc3-054f-4480-a35d-07cda2a84980,8f83c694-85f7-4446-82aa-39d88266b8c5,a3c0dbec-f2ab-49b2-b7b2-54d067dc65b9,fcc82e88-da55-4c7c-8d3b-a8ce22cbc39e,35861820-eb14-4e05-8089-595733eb82f6,b900eee6-df7c-4c71-879a-6739e29a3fd1,297015a0-747b-44ab-9a96-91413e07d912,f2bf958d-3412-480e-8f32-6825695d3909,f9997dc7-9467-4d73-9795-cd6ea720ad92,7a2f91ee-14d7-4081-adc1-56bb3bbd80fa,d47ca0e1-7a94-487c-8854-7c570cacd839,ef194a13-cd2e-48de-b323-468524247f3b,fe01f7ac-d89d-4759-8d24-0fdf66cb8236,cf094920-2a48-4744-8977-a3f3a455dbce,1a9dc90c-a085-45d1-b3a9-f56d4c109e93,da4849a6-94e0-420f-b6c6-7034c85515df,66cdaa50-09de-4c87-8fed-e09a3d3a7503,06525d06-8b94-45e1-ab30-4f4e19e0c9e2,ffb38c2c-85e9-43e3-b862-8b49e7ac3571,27e862cb-4d90-4ce5-903a-7a3e3cf718fc,40ef6377-73e4-461c-b1c6-43f1dc7b9e3c,addff032-0dbe-49c2-b7f0-133e1756ec16,05890640-c46f-47a5-865f-62dd33d9de10,9110f24f-f801-48a7-a43e-f3ea117459ec,016f1c4e-0f67-47e6-b742-67e6b6f0dcf2,f884439c-676f-4d99-8d5d-84a864add7de,0991afc5-341e-485c-ade6-5a5b4a5b7f15,464b7936-26da-411a-b6d1-293cadb9665f,ffbf8854-6032-4246-9b3d-06638a01e472,b09abb36-0149-4078-a270-98e06fd54853,45b9be3b-2d78-41d4-92aa-a4d8ada01416,f7ba3d0c-9d29-4661-963b-9926ce9f1796,bc730e3f-5aa3-4c1a-8dad-0768b528e2fe,fa237856-dddf-4f25-a3c0-be63f3125efb,0d3d0f08-05d2-4602-b476-991a1b61883d,1c63783c-7abf-47d0-991f-1570e7999d6c,7ed1c9b2-9d77-49c9-adc0-707223ff7459,5ab88eba-da2d-436d-8379-2241238cbe4c,6c909f11-1f04-4e67-89f9-1e7c6ae53bf9,90baeda9-1310-4d0b-8d0c-49b62623d599,cb4143ac-6572-4546-a790-0caae9645ba8,f309a241-0d25-489a-b50b-45004680d439,05d21912-4d5b-4817-b911-33d3f98343f9,e08ef22d-3da3-4bfd-be6c-f5458c5ea803,a143f10e-b88c-4e1d-aeb5-59711d9c6b68,f0fdc121-a637-4485-bb45-6f5fa537f75e,5490c50e-78b8-4c28-95a5-62d53f2f0dea,9bf5a788-e7fd-4d38-9c62-ba2d16831c54,39395541-a7f1-4307-85d2-fc46d53be751,f49858d6-2bf6-4490-ae2e-38d6151e9b47,ceef53d4-c040-4c68-934d-26df3578a8e9,47693422-e016-4c56-8b41-0e9c4ff29650,fd1653f4-4a99-499a-b4ea-6eba5c5c9fa5,89fa0315-a21a-4672-a7e7-16abff09d2c5,58fab243-42e9-4974-89c9-a0d17e52d7a2,8c6a7f7a-4835-4579-bb8e-9829c95c9625,94df23cf-7d32-408c-8c39-95e81dbe8d13,2d38cfb8-e14a-4ea1-95a2-96a765752637,d870dd68-6a6a-49c6-8730-eab299605f97,f248abf9-9dbf-4a11-af9a-7fbfd1ba390a,f3c31d60-9160-41f8-b6e9-e583cf2413f3,2940ad75-e7f0-4d33-871a-c4eb5e8db9b6,38fe5dbf-ae54-48c4-809a-aac1c1fbf059,71da2d89-89fa-4d65-b432-b4ba3f0b5f1e,20d07547-044b-4dcc-a3ce-d4dea9f56787,9876a3a2-4f24-42aa-924d-a4a6c8521627,2350a1d5-69fc-4483-9512-c71a24255b71,bcdff2b4-e96f-467e-8400-474de465ea3c,a5e7267c-00bd-4604-9af8-782ebc5bef23,fa589192-fd85-40d6-8159-97bf25618df4,fd4cba03-04b7-49a0-9819-173f50a7dc30,a8ff181f-5445-459c-92a2-6ea0bf4ef3ad,9b315075-041f-434b-9596-d1f90d8d471b,de2f180d-241e-4651-a9c9-6dc22c5161de,38cb2cb8-90d3-4953-bef2-739df78d0307,f60f26de-43fc-4ce6-922a-1c4e45eeedd6,35113700-d197-421e-b6de-e7357b8a9dfa,ad51e505-7e72-4465-b427-3695763142b4,8e0b6649-6ef9-4325-af1b-474ebf5832e5,959e4441-9f95-43cb-8374-032600466822,5896c0a8-bdee-4b4c-ad8b-d0619c31e33b,39a0a2c9-40ea-4be3-9a48-04b83292c0bb,36f285be-ef22-45d4-8d9a-2ac913c1df1e,cd888a66-7cb8-4537-8730-2b7fdb1c1a9f,0bb81fb8-1376-44d2-90be-c73a80f98992,2c190751-07fa-45a5-b48b-9d838e1fbe80,f9bac232-f147-4a7f-b5f6-dbf80453e9c4,f898f3f6-8c7e-4a83-be20-7e59e8dd16a5,faa03720-6303-4836-99f6-17299083a4e6,edf9665d-f3e9-4cda-997e-37769708c135,62cb4f15-eaff-4a4e-a3dd-f88b5f0f200e,0100583b-55ca-497d-9cd0-8b1d4cde37e4,0fdad1b3-19e8-4b40-8dd2-ff537f07fb41,f293e35e-13fe-46f4-b476-b7e58ee2e230,14d9e71a-bb5c-45ef-bfd0-4a637d94df64,f012634f-e980-468f-9f16-8966aaf7aafd,f93f3bde-4a73-4fbf-aeca-5129e76811c6,3f04a7e9-8d6c-4c81-b441-0f6e21c379ce,93af3db1-3799-486e-91a3-326213a4bbd7,ae3780aa-2c81-4454-b2b5-89bcaf2b3fa6,5bd84038-11c0-4c2b-9b93-9793569844a6,0a43b5ae-1b03-4d92-9fd4-413b512367f7,e8f031f6-141f-4cc2-b9a8-bc5110225a53,202d47a7-7cb2-44a0-89c0-ddef3b7a266d,d1f6f203-c6af-4a98-bd7d-2259298a087b,5b0496fa-8e2e-485e-8183-c20b512e50cc,3bb773b7-2810-4ca3-bce8-d126ccf6189a,f9d3a012-9330-4631-833b-958fed218e6a,db84abca-fac7-475c-9b26-6501cb59f038,cd18dd68-7626-446d-9bb9-c7b1983ae655,8b625a36-98d4-41e7-8c76-b9fc9a000f99,1fd0801b-ca3b-4171-906b-5ac5ab2ff7f8,4b2c8c03-afcd-4948-929e-3ac44e4374fc,4b586c21-b05e-4385-946f-974cc1c8be4c,14991883-8750-4de8-a29d-555fd687378e,db000891-2cab-4fe8-a482-dccde4a6a32b,0d4bdd70-87b0-46d7-9a23-f7110bda0bc4,c6700422-54a9-4571-be2e-40f73ebc755b,24346c45-e25b-4844-966f-1bf5f68e4909,8877afc5-1ac3-4901-9a52-09a3930c795b,0284953f-7fd2-4d1b-952b-3fa9a335af4e,9aa9a6dc-9801-4632-b8a3-f650dad2509b,6e0bf7c6-3e45-47d4-af9f-d73346871391,0bcf135d-2fa9-4dd2-9a66-48299dbb3474,91260e85-6bf5-40eb-b955-9a0bf976e443,d4dfd9ff-8627-4798-bf9c-33b55804aacf,f6ebe9cb-a7ac-4f6e-934b-d714ce097498,7f448f14-9126-47b3-82fd-1b4b8f1d7416,ea23e0a2-6e00-481e-852e-0890127dc5c3,fa40db86-76dd-4302-a3da-d1e719eea0f3,03902cbd-0c65-4728-b8b6-3e1ba5f19991,070860cd-1a11-4aa8-a81b-d0e35a656c25,41b4538c-9cca-46be-9b37-4b17c5580b18,786f3335-7ef8-4fc6-b4a5-0655f22b275c,13aecab7-f02c-4f17-b51d-049082095361,13895f3e-c3d5-4e43-8694-ceef296402f8,96152d82-6b37-4834-8f8d-caa386a3d38c,6ae98b31-e8c4-405b-a61a-418e81d32ce8,73eb31f7-b4c7-4f72-9c52-18dfb41d506f,c63c7ca9-04ee-41f5-9599-a8f2447b230b,5f70e230-6c1b-4d1f-9bdd-29f93993b9d1,3e58e424-e286-4ef9-a2b7-ee4a04afd576,6d502578-323f-4a3a-8f73-ca3d97af2b13,35068fec-7ad8-4854-a2d2-505345494f87,d581c911-4fc8-4a2f-a51c-6c9cc1ef836f,eddc83cf-e45f-4b8f-9933-fb6ab93276cc,3d0befa4-1305-4a36-9a43-248584b4fb6e,14fa7317-7f7a-4650-b6a1-13be2fb4fedc,eee016e6-5b42-424d-94cd-da00bdff2730,b61c9370-65de-4af9-bdc7-ca0c3fa4f418,5e58c398-308b-40d8-b451-fba25185adb6,088a8c4d-3f4d-47fd-9445-00090c5090e5,5fe7690d-a8f5-4551-90b1-96eb3620ad45,a7b5f041-38d2-456b-930a-6a8da0c8723d,20d26ec3-e53d-4c13-9f30-ab964d4c599c,ec48fdff-1195-47c5-a4d0-3ff7003e277c,19572943-5301-4962-961c-3cfd28c1f558,6692954f-f9d1-4b94-98c4-4e7140f9e2b0,2031ea10-b23e-4dc4-ae01-a9600602a287,162436b1-e58f-49ad-bd7f-940a45a65e71,9b5efe6f-8294-478d-bc59-a5aed4702248,82b227e2-964d-4e6c-95fe-d62168e04c24,2f2babd2-6366-4156-b2ca-d7a4f2c94f50,08d5b328-8299-4509-98a6-557e1d120234,fd40002e-9715-47a3-b42a-aa9dd8c4e203,4b50f212-71e0-4160-93ef-3cc9392e846c,5b2dc1a7-7e9a-42c6-af1e-065e2a1584bb,cf5c28b1-6476-4e71-982c-4e3b245500a4,b0772ea8-9aa5-4b2b-b4d3-cfc546a342ac,cfbb4e86-ae84-4e80-a6a3-d311cc2c4870,83a335cc-49d0-49a2-a806-84df084319f5,80035ee5-e1be-454b-ae53-b9fca7b495d1,70e4585f-807f-40eb-b09b-921dc9834600,7c860b89-147a-43cd-83e2-48651c2e5ffa,54e7425e-7ca0-413d-b164-21eb57b4497c,260ee693-82a5-41cb-885a-b7261ff84420,e6ed5729-2378-4426-8447-b51e85f651d1,dcc56835-2cc1-40ef-b011-6c10baee38c7,ecbf8eb0-4a85-484a-a5b2-806fa6c8ad02,a336d555-759e-4b4f-96d5-169836074fdc,c1847136-4f2f-44a9-9408-3da748fb6416,b55a2fe2-fb39-4ff0-aac0-7cf5136d742c,29929df3-a42f-49a5-b6a1-79fc24c0a19a,abdbf314-420c-4053-ac54-2f5893a4b1c2,2a0a0e3c-03c6-43e3-8dbc-e052e82ee018,f9d7bd9a-29d2-4ca0-8320-8445e41e5680,a89bb138-4e95-4ceb-a43b-f42a3e2786ce,d3bb6739-65e8-4967-b838-2685e4859088,8a912702-8c75-4ce2-bd6d-94c868f2e7ec,f7620730-ed83-4c2d-b285-391a54d8ea03,13c78400-f3be-4a9d-8106-1fc3fd92855f,0ec04cbb-82ed-4766-a861-018ec19e3f9a,05504b5d-e756-40b0-ab2a-268eac8a15e9,37e6220b-d077-4b13-9c05-3f064be008c7,ff7042a4-76b9-42ae-a695-2abca73b133f,5dac95bf-50e6-4521-a867-f1b6076826b5,659aa6e6-4f92-440b-9706-847a6c714968,f00de72c-95fd-4ec8-b8d6-bf0b9c1692db,91a933ea-3d9f-4735-a394-b9cc0605bbc0,d53dfb8e-f95f-4d85-a277-1702d3c57e98,97723063-0fb4-4ff7-a76e-be346dc85304,581c953f-599e-40c5-a52c-1b083510e384,e2d716e9-a38e-4183-85e6-595fec5a7fb0,cf3a8181-3005-4bc9-9f2a-05acad4fd450,c35712b8-986a-4514-abab-e2ffb3dedf65,c4719e62-20ee-48b2-8ded-06d37260bbf8,bf5701dd-e36e-4ec4-8c06-37f7978b5c3e,482fe3d3-d7bc-4f0d-9722-fb72aa565acd,b9bd2847-8f65-410f-9311-177cd5cf40d4,86c09700-9250-482c-ab57-dbd906b7ae6d,e8aad8ea-6b16-4e55-af9d-6eaa6ebac655,2b81094f-9bf9-4ee5-b503-2fe6c7bf9ac6,e023dd08-888a-4df8-b88c-c08bd22fe6a6,8e63b763-701f-4bdb-bd11-df0017e55aa2,595b7c8d-f12e-464a-8863-2a706d616f78,d45465bf-d596-47c1-8efa-ec3a2525be95,37d51b76-70a1-40e5-b9bd-3b05c602c466,d1ba8ef0-fc86-46f9-bae8-f84e06dfa33c,6ae93b44-d328-477c-8530-d2af3aca3df8,b8831451-432d-4cb3-8a0e-17fccb9d12c5,d8714afd-468e-49ca-b312-90fa64a9f285,d23217c6-b8c0-4e95-a168-0c6168306dd7,b4131a73-556b-465b-b79d-6cd86f7ec21a,7b1f2ed7-540a-4e79-bf56-beae1cf889ef,d8702a5f-b685-4920-ab5a-f0a55c3ead1a,3c877984-7eee-4bdd-92dc-d01fbb24567d,8ff5e236-4881-4598-a3fb-9a36415e90ab,4980e480-17ae-45d4-a585-c0cc75e866f9,68679e35-260c-4e2f-9b71-c892d8a102ba,0023b415-c3dd-4e95-b28c-81e5e561f242,55425c27-1a3c-4b04-85b8-7a1cb7d3dcba,e3bd8790-749a-44bc-b50c-e966b38b7a67,b55cddb4-cd24-4d80-8b27-6a3262a8cf19,9f4949c8-ef34-4792-b278-cda77015e412,8b65e224-3e4b-480f-8f3b-e77ccd79c6b9,1bfb2aaf-e87d-4d09-9926-8333a9002126,2b19e262-1459-43ad-9629-85578f4929e5,94977c76-6e4b-4eba-b40f-23ac9ef5519b,4bfbe558-3ac0-443c-b237-6b3273ea7697,e4f36b93-8d91-4248-9a38-7f13eb3f7f82,4df4acb2-0959-467a-9263-d00ee2ad8711,188c26dd-0611-42ed-997e-9b3a7acb8643,d604f2a0-6b68-4f2d-9691-c8ad16bee16e,be5040a7-7525-449c-bf58-d05abe49927b,3a20749a-a990-4a2f-a711-5c82bbecf4b0,8831b03f-8b8a-4c88-81e1-6c03c8904e00,a307b6a9-fa8e-4237-8cf8-39c8a9ca3df7,b025a755-9f8c-4891-80e1-fbbbaa428442,63a75ecc-a003-46c2-a086-7a74112e35f0,e0b49249-5d1c-4e10-b31c-7f807c9970e6,9003c488-4daf-4825-bbb4-14d207fae29a,f63b7cc9-e5fd-43d4-bc7a-cfe32c5731ca,6e50a1c7-8fe8-4ee2-91f3-7a78c768af66,3f4e84a5-9550-44d3-bb2f-7edb4a60a8b6,1f3975cf-47dd-4371-8433-895a50a3f280,f93c855d-3a31-4192-a83b-450b9fec3141,45bcf262-7997-487d-82e8-ecadc7d1f46e,33cbfe49-734b-49f1-8deb-da7e19995964,20a39c0c-bf7a-44d3-868c-2e84b15c7e16,15f9601d-1dc8-4474-9211-c668e9023bf7,1496a8a2-809c-4b2e-a1ca-b91476ad446d,f94e5c15-3259-4fb5-8431-5a701f5ea256,4c85c0c7-ae35-41ac-997b-099dfaf0ce00,66fb6aea-9e72-4d6b-b701-dc1591bfc1db,aa514cdd-a416-4b01-b7f3-4252ce66c33d,e7c24c11-ab01-4cd8-9511-a7cbfc36a0f0,174f042a-16cc-4ddc-bfba-269c1b15bed9,180223cb-a12c-4a44-b1af-a36cd7295deb,b7287dee-85d0-4a6d-99de-7a7d9090acb5,a1054c67-8f2f-4a2d-9af1-48a9f7b0f461,4c29ffa6-6028-4925-adce-47e4ce7e9385,034bf26a-c5fa-4c55-83b1-108988c53bbb,33677423-da96-4d2c-ac84-e12ba3ba8b1a,3593f50c-751d-43b2-8f87-65c2d34a68b2,5ca0e0c8-d752-42c6-bcbf-2d463e9d5bfc,df55faef-0616-415f-b1bb-fcc2fd9f807c,625f9161-caf8-4b8a-a4f0-fc962bc5cb6c,1be83812-a0e9-4c18-84a5-3e53c2475f44,0420d3e7-7ef8-4078-ba2d-36a90920729e,0509a6ff-2518-4a8f-bc6b-89e069d4786c,f6494ea1-cf2d-4907-8a04-f0836d3ecf83,ef08ebfb-a88c-4bd7-ae5a-43c5f52b6ce8,34ecac00-9218-44f8-9eea-bb896da1d23c,03837a53-6c18-43bc-9b65-3dfd14a0a4f1,e474eda4-4a80-46fa-9b11-0c1f316478d5,f64af71e-38fd-4b08-9541-b20322f31688,1490b091-7fed-47d8-bd08-7b0661cec2c9,833e52ef-8d96-44e6-bcf8-c8664a9cabec,7466fbd6-6972-422a-8bb1-c1e1da5e55f1,a0b9764a-8e26-4e01-b135-007b1de8d6a5,5b844b82-aef0-4396-b078-3521ef5f4380,ec8389f0-b6e8-46a3-bf56-385232677b07,c98ca31a-b3d9-45bc-8ca3-1da467f1e538,c079f8ce-0ac7-4f3c-a880-cf9b0c82ea9b,24982c07-7c32-461e-b628-2a741150153b,6c54a21e-47cc-4c83-9157-71c1bea1e3a0,1b12b872-221c-488e-934e-2e94b46c8314,d0acf7b7-adb1-4cd4-8111-b3df713cefe4,68eb2686-3bb9-4b14-9a26-0b6f840becf3,4a2fce00-3d86-4b70-abe5-ff2a8e143fcc,100d2c53-cf66-4ac1-8dd0-44665cd38f5a,d6905333-d0d8-4b4c-b2e8-011a5cb3b66a,434646ef-50d8-4ac1-9d4d-a9718d9592a6,ecf16116-d714-4056-8920-13eb79a90a0b,274449f6-7122-41c3-b5df-583e8b5ff7fa,e073aaa9-2a17-4c16-9b3c-3a89645e4301,e956fec0-fdab-4413-8cd7-b5ae0f865448,e0c3fe9c-8869-4fd3-a983-afc7f81597d2,5a5bc176-acde-4017-a37a-409ceb4f3b26,1fe5e2d7-b29e-4d6a-b683-8f2223661492,1101a082-878b-4437-892f-da93472e876a,16c48180-c133-4541-91ac-005ade458009,ec2365dd-e949-42dc-a60b-46c8e0322982,65845ecf-488d-4a00-86b0-01dc5ded6aa1,728a9df3-f305-42c3-a066-21b3871bbf9f,11f35339-0634-4ec7-a975-222f5e756db5,f742bce1-2b4c-47aa-b3db-30018b247c62,3f3f79cc-4ce6-48d3-a7b7-3c1943de0e16,044fb867-c4cc-4f3b-ae90-ded20aeff2c7,80ba0f75-e037-46af-a8db-6ddde56b045f,7dcfac7b-b07b-4e3e-ae41-96c23547c323,cca3443a-8b78-4406-a42d-5aed441e959f,18379610-da06-43d6-b820-599447ffd34a,c545633c-1349-4dcd-8c33-90539d528f92,67a97487-9b60-4a2a-b42f-2c2f0e8a1a3e,ce581b58-c40d-406c-986c-282384fe8b69,35a752b3-1fe6-484d-ba0f-15755197fb25,cdbdb871-fa93-4bc5-b683-6a7df65dfb0b,72be1425-035a-4353-9769-8c5245d29224,f1b8f248-490e-4494-8612-92e69faa2a18,f058b4e6-896b-4630-ad59-f3e21b4da646,44255fab-05ce-41dd-8f46-57bd87189055,0bf82aee-9ef6-4789-86f1-cc5e571a615f,44235ba6-b8d2-45a8-9acc-cf9289bffa15,58a60c35-f404-4665-b48c-b6379f67a13d,e8dc43c9-6634-4af2-a31b-7cc8dce3e0ed,3cb9190a-79ae-42b1-9c02-b73ec064f84b,a5178fb9-8208-40b2-a053-432494dc8946,30dbd6f5-7134-4640-b556-4fd1e589beaa,ad305b0f-fb79-4450-811a-ad3b48b78991,136e440f-2058-47b2-833a-5c0f435e999e,77648f8c-b9a6-45a0-b40b-96bb2b9772b0,18c08f35-6387-4576-b1d0-d5b95930e09c,bab91807-109a-4dd6-ab19-b91eddbeac9b,bb03caaa-498e-4335-a52f-a7107d3691fa,83b2a53b-f9e4-4b30-ae18-29f09c504734,d821e46b-33e5-4cf5-b082-37074ba493e4,ca24fc8b-0cbf-4d0c-8f10-5c25c6e55e22,c813f3d6-a36f-4c07-84fe-a0e183923263,afaa4e61-2bc6-45e0-989a-aebddc64c845,87c750d3-02c8-43a7-a892-56b86dd35be3,02aab03a-dd53-4022-93d9-abf240da9eba,3f695ef1-910c-4c32-91df-1eb16250eb67,68f6a7dc-bd12-4332-babd-341757ac3bdd,9e5180b7-190c-4e7f-b657-a718e0ac87e5,05da573c-7a56-4421-8ed4-342fccdc7404,a2fdaf36-3c8e-4fc7-ae82-5e668f9cbd4e,1a245dda-eba1-4780-a9d6-7e3f38b447dd,2fbfad47-2c4a-4960-9ee5-fa18b2e399d7,fdee3540-728c-4264-a8bf-cfb521c44b00,2dac5722-0b42-4fd6-b295-b1b2c474a5a9,d958abf2-6101-4253-8779-e43274680352,38cbd366-f1e0-458d-977e-1296488043a4,f37ef83a-6c60-4dfa-b269-ce9bdde4d2b8,923d1e86-8c7b-4fb4-9083-86540b52d0a5,e176f9be-cd5e-40a9-90ca-7f50a807ad95,bf9942d7-c65a-465e-8ce5-9c3320962c11,a794c0b3-082b-431e-91ee-f868de91b9e1,43d89d5e-a240-4773-9d73-97edabe44c48,a5461fb2-6f53-43b3-b304-fd4fb3191f11,906f8def-18c0-459e-bc37-1c52ac58936d,c671d889-7367-49df-bbdc-80b5e789b590,87119b59-f0b2-4868-8e6c-7a1ac4e367fd,da7c6b4b-740f-47fa-8fcb-7f11b51e4364,89a52f97-1ebd-487c-acde-e55fec785b48,107280b3-fac4-46ae-8573-e636a977f586,5af1d2f7-6525-4620-875d-c41f8092c263,25a278e6-ea95-4f9c-b6c1-d5747af0cb24,0bd78936-9cc2-4e33-9e45-835907011d33,5251f907-21e2-4396-8e74-f444c6bc6fdc,981d257b-b14e-4972-bc3b-f8ca3c07c35d,c1189db3-538a-4361-ac46-135e2f595c7f,2609c4ae-4d52-444a-9018-4519838417c0,411497cc-f8ef-4417-8982-f82f95a63681,550826cd-6c46-47fa-b836-0443ea7c0103,512b20fa-ee28-46b1-9b68-7fda1a650322,2c823ec7-a561-4bbd-949d-711fedc851ec,79ad54b8-3d7c-49b9-9666-c2569dd05d6e,b06c8c53-f4e2-4141-b034-c15f1a7cb152,631d46eb-41a3-42c5-b344-5c281a3e849f,de3c6d96-a584-43c5-a325-c252b316ab6c,a9ffe970-af29-44b4-b321-1aeccb404a8c,2b92353d-1e75-4662-92ee-fb6ac552ef76,b9851b8e-eab0-402b-8734-5ee30b2180da,381ea89f-9171-40c4-8a17-0671a08fd3d6,ea9d4626-18fe-48d3-a1ac-659f0fc9db9a,da63dbec-fca0-49d5-928d-991fe8138d7f,5c1fbfdf-26f5-4779-8772-3dcbc58470b5,018d3074-f991-4a95-82e2-d51f038a5752,84c13b3f-4278-4b86-b0de-f073f2f02b77,5738c1b4-0d86-4a73-82e6-8b74f7453162,dc60cc1a-7a13-4e63-86da-a2482e840424,9e6cac0b-211a-4dde-bbbe-ec668d98f2f9,e02aab67-0b8a-441f-8bac-6360001b1ffb,01323ca8-b813-4e12-8f99-cbdf6c060256,0b7538bc-604e-47f5-a2d8-73000e73cc70,db2495f7-0e83-41ed-92d3-4b1baf4669ff,ce1cbcd3-2156-4ba5-8be6-167335aa02df,0adb595a-fe6f-428c-8420-dd0462513d8a,f61fd1dd-9abe-43d0-b282-09307a8737ba,91f99667-78fb-47a2-bcae-83d273c3bae8,6474cbee-ad8d-43f5-99a9-e32bcd6ce71a,71a33912-76e2-4879-8906-a617a882b859,609e7210-9685-440c-9fdb-5e9dba5f89e4,8040e593-1a77-44a4-b601-9da407085b05,edaae0a0-9458-464d-b24a-4d8b8a7549d9,d2939f94-e769-4be3-9c97-eceb48c90d7d,e1ef3eec-8b82-4eb0-8be4-e1585280a8dd,9b4012b7-1bb8-47c3-9925-5ea6c55f8f0e,5569e1a6-0871-4897-90ab-c440aca6692d,1e04de2b-73c3-4716-a5e1-fb0ace51bd45,e85541e2-f8d6-4bd5-9b2a-a490fe61d1c5,1f9c25ea-5d2a-4fd6-876f-6f3fcffa5d05,d526f40e-0c29-47e0-b4ee-5f1667f93a3b,bd2a4bde-5043-413d-ae57-cbf57df4b47b,2c41ff85-1e92-42b3-9ff3-9ce12e28eb46,bd840d8b-42c1-46c3-b620-70e55db1fb20,5b2a2141-a631-4c24-af37-165417214a80,e3026048-6de7-4166-abc0-d038f2e853fb,1deae309-4dd5-422e-9ef5-b84cbce58802,27b237a8-d520-406f-be05-13008c0c1d46,cb3cdd91-242f-4031-8d91-25fa51114363,eabebbee-0b1f-4aa5-b402-45c4f9d30e12,c3ca278f-ad71-423c-be7f-94652843f36a,d98ccb23-91c8-4009-a1cc-f68d9a92cf27,63fe6658-76b2-4ce0-800f-b1db820212c4,1ca3b3d7-458d-4891-b5a9-0ce48e6b6317,fe1a0dc3-3e9e-4853-bece-29eac4413864,e7fea117-40c2-4a2f-beaf-31aae4bb7e7e,47398e09-d338-4a56-af35-8e0fe9292d99,b0d04ea5-abe5-4902-abee-b9934ce69d61,9cc22d0d-6a87-4240-a828-7dd1576494d1,3e135f46-202f-4a1d-9e6e-4e8776bd90bd,86b56925-b4ac-4776-a076-ad78cc996d15,7f3a04b1-ed27-47b5-83a1-27e98b1d41a0,1d6a2543-d532-4e35-90a9-1ed5dcc0affb,a2464d2f-cc1a-4eec-831f-6707009dadab,617ed8ed-f989-4fb6-a279-7f0b51c835c6,93f9c7c4-1151-4afc-9a56-dfdf4af676a8,3c88ad74-aaa0-49a6-8f10-e1b27168f6d5,fa979dd4-9241-4723-a4d9-240a8e50f1ad,314a89e2-953b-4a0d-9bd6-54aa22572c6f,32a0f86f-df03-4ad8-9c36-751f61089557,35c8c0f7-b40b-435f-9a1d-752bb0f74b09,b182712b-9c5c-4756-86a5-5a1124f1ced3,4b9e7f2f-3567-4d56-9be2-5db09137fa8c,f0095d17-24b3-48ad-b3df-65a3ee9e8a05,dae97570-ba50-4494-9bb3-83e6ad86e7d3,c0f67f39-324c-49fc-9138-63d7edac4de3,8a991f31-7592-4a56-8d2f-3cabe66031d9,c38c7633-fa82-42f3-a621-fd38077975dc,3b64b82b-a17c-4956-98e1-91a74a006f8c,04564917-1a4d-4396-b403-e6565060a204,0935b264-6b9f-4ffe-b7f1-4dc67227ed20,da499df6-9a97-47e2-b4fa-fb04fbd1d9c3,86bcf503-95f1-45b8-9956-4fbe5b21df8b,ee7ffc45-abcc-40be-a5fc-fa752249ff94,6ac10fed-2db2-4e4d-bd84-56090aed60e4,ed583ecd-3c6d-456e-831b-93131b8f0b4e,8a4d3d19-f1ab-4bec-bc2a-98e96f426e14,c093ffa1-0c50-41b5-af25-fbc9eb6c705e,9cb2df2d-4ffc-49e5-8c19-ef38bfc41621,0769fd3b-8b60-453c-baf6-2560dbdf8676,205b7e9d-9ef5-4acf-b173-079a2724fd44,80cecb56-d2d1-4631-b25f-08dd9d529bb9,48ae790d-f2e6-4811-b1c6-d71efa59ef88,a6f0d7af-b3bf-43e1-9d37-ecaadabb2dec,75cd2a8b-fd55-4962-8923-756816c83c37,dd02cbea-1fb7-4009-8888-c6ac27de6d7d,811993d0-1543-47f7-9f55-5726f903aa2a,3cbaddbb-58ee-4e26-bedf-7faaa20152ed,a68f397e-410e-46e9-a1fa-2e432d3613d2,b7d0b0fe-2b07-4c5a-b5f5-24ccd782f01b,32dec600-e58f-4ba9-9cb2-67716701866f,cecf62b8-7ad5-4602-80c8-c554b34b80f4,bc13ec40-7bfe-4b8d-b1f0-c918fd60dbf4,6014167e-a879-4071-9198-2fbe7569be2f,9a9eb954-09f3-429d-be77-e58a4a6dd1af,57ab4334-bfe6-4be4-8d24-e5324083f52b,31a18eff-7b79-41a9-ac13-8be1a9ab5c05,9fc764df-8535-4767-80da-82d037a953b8,589d3659-1ba2-455d-bd96-c880c6aa0461,8d487a7e-2468-4ded-aaf0-c5c965e84d0e,48f785c8-b65f-49b5-a82b-c6e6b8579cd2,45314680-c7eb-4e5e-a81f-33f1f43daaf8,6e2ebb2d-e97c-41ec-9268-a0de1de35292,5519666d-c6d1-4fe5-9f20-b8ed73c23b56,ae89c558-3bee-4e54-a7e3-a3800be6b72f,d35b278d-3539-4ed8-9abb-8488eb02ee6f,47e0a5ea-db9e-4622-9bc5-356e247da304,2223c7b1-5b25-4623-addf-7bcbf71a8fac,d726696b-087e-4e59-b1a3-b2ef94041678,0b2b1676-029d-436e-8e56-65c831501d76,b13debf2-d4d9-431b-8c73-c3785b006b52,8e9f70c1-a7b1-461e-b7ba-6c34b072c6ca,c7568127-5ff2-4481-83a9-7a53bb9da0c5,d286e402-d395-441e-b0b4-92530ca2f13a,9ef828fa-1fb0-45b7-80f4-eb22bc817b28,5fdea5de-9e51-45ee-aa4b-8fc88703e95e,7452c2c8-84cd-4df3-a7a6-6e0862d2ecba,b9edadae-66a0-4308-9871-a3a5ceda9ce7,afbbbe1c-84fc-4543-98ac-e80edd6cf257,bd5a3602-e5d2-4c40-bc74-63c08a743bdc,cc7c1d85-72d9-47a9-8796-7385caf80bde,86cfd96d-b302-41a1-961e-e0d3e015eef4,ac797279-26f1-4641-9a69-a9d6af5a0f6a,da2711ed-e595-4735-8bf5-5bb20be40940,9cb4d9f2-7f74-4037-8b1e-3c2beed84e18,21ee98a1-337f-420d-88b4-4d53e94d0717,dd2875ab-480a-450e-9077-7361f5862207,3982c58d-8d12-4dae-a94d-4807c67bfdf3,c289e693-bc6e-4a04-bcdc-de06fad841bd,a8d83476-d05d-4658-abc3-47ab3ce7bd77,06cff15c-d220-43c5-a811-fb15a3814e53,a78acfea-889c-463d-81f7-42061813e7f6,e52f4362-210a-4116-8d8b-1c3c05bf82fa,899f24ef-03bd-4d59-b160-6d342f7063e8,c86ad5c9-3632-4c15-ac74-2f117df9caec,100037ff-657b-4d21-bb98-e4de56ee3b72,d34002f0-d583-432f-9239-b84d056a5375,52375893-79e5-49c9-b57b-81d66e60bdd0,d16a2ff1-8050-4f41-9bc4-ba7b5a280b04,ad2e9e7b-4f4e-4864-930a-f7157bf6ac4f,ad3a01fc-c242-4e0c-bb8b-57f1c8993ce7,4e7c6f4d-2a9b-416c-aaac-d2a6c686487c,8bd10511-4e33-4350-9ade-21dfd0cc9136,4c71bb33-250b-4021-b2ec-532855347913,41a8b130-5a7a-4880-85d1-f79d659d7292,80333b8f-a38b-4385-a9cc-ab958808babb,d6b7d015-9ca3-4155-a172-817eb07aaa6d,98b57cbc-7fa5-4034-8127-a565325b3a3b,7b28594e-a400-4206-9003-2cf2ce0c8891,1acff7d3-fc33-4cca-8984-2b9c30026dd2,12cd4baf-cb6a-4c52-9a4c-8fb2cfa974f6,643fa794-dfa5-4df6-9cf3-b11f7936115b,d1d65d3c-2153-4373-aeb3-8c1ae0dab4e4,1d9012e0-3951-4b78-aea4-f5adfef71232,6bbfc331-ee28-46bb-a9e2-4eddf8633ff6,3dc31827-adb6-40bf-b031-d8d1d0469b0c,3ec95a10-a1da-40c9-9133-77c23e63f6e7,676cdf4d-0bcd-44a1-917a-a8519d1a8dad,562cfcca-bdd3-4d90-ac4d-7a9be2b9b299,4c58c2d5-1d2c-4c00-af2a-d0a9ef567a3b,d58c34c7-dc43-4ffb-a0c9-359190e7c6c6,585c6950-ba63-4457-b087-ca22f81e9597,3025783a-3080-45bc-b2ab-ce725260fb96,7e968520-c544-45e2-a5c3-0e2f7b4dd14e,99f03795-94b9-4d7c-8a7e-4b7f51c5eaa8,0bb77bc4-ac33-4abd-a71a-1012e607bc72,cac98114-02ab-4955-bf1b-6d8f173443c6,a861fb7e-24c3-4eb3-8dfe-852b29740a84,2602ff11-ca7e-4590-a384-03385663836f,88a23fd9-2178-4e57-8b36-77c68a0c00db,89e9a046-050b-42fa-88ab-543cd7a7e94f,7273b9aa-c30f-425f-8c79-db282215ffa1,a9d0bb9a-5a71-46d8-87b9-7bff2c275e37,3e2f5cbf-1d4b-4ed3-b23b-3a045ec1a032,317afb4f-8381-459a-885f-80b590877b47,4b57d5a5-4de6-4ed6-810e-8846ec98c7f9,150755d1-4e7b-4c0e-9343-5f89fc02871f,d9e962e3-434d-4960-9890-bda7bcb26ff8,a60f7372-7c9f-4a14-832a-fd9e32481fc7,70b60134-5a53-4196-9c97-92e4df6589fc,f5f33ebe-a077-4f37-9ba7-0202148f8705,cf7989fd-4ae9-449c-b05a-8436702ab936,55bf4083-ce3e-49fe-be1c-0d8965e3c555,1d0f380b-7847-425e-85a9-0b1c4ca635a5,d95f57b2-1304-4d36-b7e3-46436845160a,fc348d86-c53f-4efa-b2ef-926d9eb65bf8,0ec09779-4622-4bd5-9131-48866602e60a,d79dc104-a963-4ba9-986e-2f86c9ce233a,fe5318ca-e355-4d95-a95c-4946c4bb5296,003dee7d-ffc8-4ccd-982f-e02e333f9054,3e10be69-80ab-4a3e-929d-411f9968538a,a46ad5b6-ad40-48c5-b11a-08686ff1c98b,5650e7f7-97b9-472d-b7ea-e439bd301f04,df0ac2fc-2c61-41dc-b1d6-63c2c3a4c073,0162fda7-fb63-439d-9f5d-2cd9f0a9c827,61fe11c8-3bf9-4101-a49e-7c04739cd68d,a81afbb3-173c-4ffa-ad09-49ba5d338a9a,56565a36-942f-4887-b7ea-3d727fb370b6,c3de53c9-390e-46a3-93f1-b1950fb11a33,3c823968-1961-4bf3-9b4f-10e6c78071da,ad72ce78-5226-43b4-b3e0-b8d7de888209,ec34a225-0315-4336-addc-4484697fd8bd,50a4b728-4005-4fad-bfda-047592908491,8f6fc028-f9c6-4a95-be36-e3c027da8404,7917e061-9789-4b69-9ee1-c8c3ba965960,02000eec-9d5f-45d6-bf2b-de59452cee4a,b849ea71-8a4e-4043-b155-0bbb5c356a9b,e5567759-50ce-452a-9c38-21a8a6f42ab3,d23701ab-8f5e-496e-831c-f529283b6b76,d1fd1047-5bd3-4696-bbb5-695ea1b8913b,b367cced-112b-4b7a-91d5-c106f61fab19,f9198fdb-040e-45e6-be1d-372056e47624,1ae9dfb3-5e0e-4e3b-bf9a-27423c354eaf,01746c60-fafb-466b-af6e-42b11e8f4496,8160647c-964a-4e2c-b2c3-d1d877fa7857,087fa8a7-120b-4ad7-9abb-7e8be46433ce,0ac0d723-e8e4-4436-8537-266d235e3e39,02119b2a-d794-4f8a-b6f7-25f679146d16,b31fb400-8235-4316-8208-a5a1e437dce6,7da4e7ad-ee5a-48d7-ba4d-67ade956e91e,74f7aa15-7c43-45f1-b5e8-5c1bca34f3c4,b35881ec-70ef-489b-ae41-d230f8a983dd,6d9f4022-571c-4484-9153-69d27af19ab2,543a29e2-db07-4142-bcf2-81566cdb2035,07bb4fa8-da69-407c-be98-9e67373f86e3,54961736-a077-4652-8fd6-8483fb243599,dfd88b91-f2e1-4a10-8d42-2ef2ed8e4d59,efc9d8f1-496d-4cad-b15d-cc5886009130,09b2d21d-6de8-4174-b9c2-3dd108f26357,f918def5-4aba-44fa-8bdd-e4ce4da60540,457e7d3a-4560-41b4-82c9-e97634a23ab8,076712c4-52f9-49bc-bd8f-ff94e7b9791d,537c52eb-bdbe-4100-813b-d9939ce278e9,b3fb3200-cac3-4398-a696-627ed2591af0,7e8d6a6a-9727-476f-9568-b138ecb5dc07,3c7438fc-235c-413d-abf3-31ec5ae80ede,39db10aa-5e15-467e-9fdf-33528089a8d9,0782b45a-0c18-4666-b504-9b93f3e1662e,1fccf8b6-bc12-4f2e-ae6b-34e5454279fc,a9ca7bb4-9f23-4837-b69a-1d579f3a6858,3617e4fb-0a23-407d-9de8-796a5238fec3,f5b02305-a2cb-4fbf-b47a-018fe5541e8a,bd48f425-b982-47a4-8179-68d242b33702,a24d768b-fa31-4319-a030-e0bc3e9efdc7,941efe9e-fa75-4a2d-ac0f-0d4015b6683a,bb0d209d-35f0-4f8b-b7be-a0a65171e450,c11434fc-0123-49d5-800b-5b1d51c80da1,09d09786-bc97-4c66-aed6-4c5bb3a7d95c,183a0be0-b7e1-4670-a8ed-304c54648786,14051333-990f-42ef-87ed-9eb63a10d0c1,cdc3190f-de46-4c14-a418-3a43abf9419d,c554c386-5636-40b8-8b8a-f5cc2ef0f36a,d8092c71-4227-4f89-98fd-08ccaa8d701b,145092dc-ed00-4b3a-ae24-4b9dae85aab8,8b2ee91f-f27d-41d8-8f3d-25d43df262d7,b2a63f4f-5bf0-4e35-ab71-d544d0669e13,dc26ec2c-1019-461d-8bd8-ca11b5bd4946,10a88ee1-7198-4bb7-ae95-2cb3c4065bc4,a1f7aa44-53e6-4202-ae3e-23b7338d53d3,38f161b2-eb08-4e10-9676-936eb01f3e3d,2885c831-be83-4b41-a006-0db2b71aec81,88357425-716f-43bf-9a4a-53d3bdd6be74,ce3baa42-9ea7-440d-aefb-852ef0ae12ec,d1056e68-9e93-46d1-a7b1-6afcc0a8caa7,75fa9f2a-ea85-4d6e-a368-d5adee78a38a,6eab9c73-5c35-4230-9ca6-c59d4fcf608e,2c517626-73a5-4b90-9dc4-bac2f1f92cf0,ee0f1e1b-810b-4fc7-8f5f-4edb772deb51,416c99b0-8b2c-4034-8825-29cba4f2c8a6,5092cbe3-add0-412b-8b3d-3da3ebbda5ef,0416fc3b-61da-47ff-97bd-81540e601f3a,6a32ed50-38e1-4b1a-80d5-07a5dd60e103,eb6a0d83-e050-4d4a-b111-666fc72cdec4,44de06ec-0acc-444f-9bd6-52aa52bfed24,c32efaea-b15e-46dc-b9cd-5d2e7ab7896b,a8170179-caa2-4d95-b5b5-bf8f1b1aa5d7,a0699208-5f5a-430f-8cb2-180e38a4b3c3,b1eef6c3-25b6-4ad6-8ae5-66ebc53e7d06,270e8caf-92d7-4d36-ae3f-63b7cf89cc3f,cc87d250-4d9c-4347-b662-73546efaa0dc,bba064ad-3120-4898-b923-019a550ed3a6,4c073d55-e42e-49ea-8fd3-d5a6f9fb6b64,9f649a50-02f4-40aa-a03b-70cfdd2b9e94,ba031ef4-261b-4928-b418-adb5f655794a,7e323560-ca2f-430d-8a8c-77438e6e88e9,b85c05ee-cfc5-4770-9af5-829bddcf1b6e,0d213531-b129-40b8-9b52-f560fc4e0c54,52c47ec7-5ca6-47dd-9b79-ce3e306c2e4e,f02801c1-9a0a-4f4a-ad8e-c36f0d36872c,8214708a-aa02-4999-9874-0f00c0328437,5da839af-5c6d-40d4-84b1-2689788148d6,c48eb628-4cec-41ed-8f39-a704ed20a4ed,322c9846-c818-4e3a-8176-1d2b1ba38fb6,11950be3-bd79-4b9a-883d-e3f8c8f20c38,881a61e1-1afb-47be-93b1-49d05241ce4a,fecc0e88-31b9-44ef-86c2-0f22ee124291,c5007a35-1baf-485d-abde-5028710a231c,3790279f-6486-45de-a5f6-e9375a626bd8,bd73b48c-76d1-4fca-a3e6-dd70d5f6c831,cbbdd6f0-96b6-4423-8fbf-da37e21b6826,89bcc124-efca-4807-94ca-d657deef0bf2,4dda786c-c1a3-4d3c-93dc-463e12c25203,cae2636d-c897-4617-8e0d-b85b3cde91b6,b1e23822-731b-4258-a12d-9dee19d503fa,5343ead6-0c84-44a6-9294-64edc2f2a405,7b9c8dfc-e6ea-43aa-9a9b-8ea1c0b8027e,4413537d-367e-448f-aa6d-fdce6a8a7561,0b219e84-d9aa-473e-afee-d9946d39fadb,c2b68c02-1243-4828-815b-5495664ccddb,77f9b4ff-f7d7-4b50-bd91-1a0d7817fe3a,194b1359-a399-492e-ad2c-9e13223f83ed,9bd2fd6c-f4ec-496a-8a87-5873e63adf37,edbc2331-3c4d-498c-89fe-a66dcb20f04f,76e91d8c-7377-4e04-b3f4-7af01f66c667,612c21f2-76f2-4ce4-aaf5-107e0cbdd9f5,3d54c0dd-8c38-4cfe-95f2-65d5feb94561,df3e094e-ee83-4090-ab18-3f5c0debcba7,470b79de-a492-409f-ad6f-f7fbcabb1502,6eca29da-313a-4778-88f2-9902947b6802,efe769d2-dbc1-4c47-a370-040fb9c4b849,e9a52769-44a3-4854-a5fe-effcacb50f38,1222c81e-bc6f-452f-b222-cf59d047849a,e35c4e3e-d153-4732-ae8a-a1571cf7ddd8,a69d3e6c-677e-40e4-9567-f0e17c02d592,fc7bd258-3eb3-4ce6-9d8f-40f2c0b55fb9,733f18ec-ceb1-4ed4-8067-050b8b8546d8,62b2d635-ba4b-477b-811e-894958c307c2,81ef28c2-48c7-4b93-9600-61c15a3bc0da,ad935650-15b7-4e11-b98d-624c22eeaa7f,349c1b5b-02c0-4db2-9025-925e457a4083,8d1a06ee-024b-45fa-a832-a9c54c92c462,85e25aa8-5205-46be-b157-5d0768b05fcb,4094a96c-0a93-49c6-bc50-32eadc65b007,812bf5b8-4200-4a81-b0bd-ac8dec832909,0020fce9-509f-4817-a674-6f03d9b41509,0de33900-5289-44f8-bae3-7fdc717551da,5979de92-c1b9-496f-bf32-bcb586a90109,56a0b4bd-2c85-4518-80d3-c9a109c83565,00c20b2c-da13-4b01-b561-decce88ab7a6,680ebd24-b8f5-4017-9382-117048fec09f,c01d78d7-88f3-444c-bc40-7d43a371f57d,3a96ff3c-c647-4a1a-b855-1d9a98462913,a254eaa7-4a13-4927-9f71-0e4bce6008e1,69a53d6b-1998-4ed2-a16d-f22a736db83f,9f14efc3-2a71-4f84-8d5b-6f8e0f73490b,b5af4ef2-f88a-460f-9ce4-f821c4efe664,bdf3031c-9daa-40f3-842c-5ba7ae1c0688,a06a11a7-fb13-4e68-a9e5-3fe573e99ba6,5da1c3e9-8201-42d6-8d79-b6d419e814e4,34014c18-6613-4319-bce4-148ba9a61e31,cbfb2c98-f4ff-4d9a-9f1c-4707e4ab1493,347b73a5-fb82-49c6-b2c3-900b271a301b,c2ae58dc-2794-4a14-9d22-24d098e0e39d,d678324c-6607-43db-9482-91740eb453db,b93895d8-b932-4b82-96e7-4d95612349d1,f3db3e8e-cdab-4b76-bbaa-2af0f20a1d18,bf9326e8-1ee5-4527-a397-c451542fc1a8,7b955a19-565a-42b3-9cc8-5ae098f8104d,5ce66b55-9d12-41a8-8b80-a872c7fa1d4c,659da973-82e8-4629-9c94-1c78f1b6a573,2c93ca8c-4aa0-4d97-98f8-65ca05dd7e90,10584b89-63c8-4398-bc5b-99bfa90c085e,6712e6ae-3876-4eb8-925e-260364ea1760,5fe94696-b2c2-4f88-aee2-4d0326719960,5b492a96-767b-4e88-983f-1be52d9d80af,d5abb1a6-550a-42c6-9dc8-efe4ae27f640,862f6e9b-4424-417f-80bb-f482796bb739,d31e135f-9948-4b1d-94db-ed32ef4e4959,70d12ac9-836e-432f-ba88-326adc9fb60e,c2a4ec0e-1f13-494a-a392-bc53f1c05e27,ee15539b-717e-4642-8a78-d41601999724,334a5c3c-dc79-4052-8a7e-c81e04594ad0,46d96d80-57bc-4310-8c09-0377e6576af0,f046987e-ed0b-46a7-b138-77f7ad726a12,e0fc7f23-30e0-4c3f-8463-67f979cfc358,18906f75-977a-48d1-a4bc-1f986a8bdcdb,4491d4d3-c488-4c48-b7e4-7865e68e2378,e951b788-4226-4746-9b11-911558f87ab6,dd1aa395-5c2b-4dfe-bc6f-8b53bb044b8e,3058ff7a-66ad-467e-97a3-5ce8f096be1d,a23de651-60fc-48c4-ba76-8f14fda79d3e,fe911be9-4d83-441d-8b3f-a8dcc6f857e8,23e52f40-8086-45db-9960-1496f994af95,054f18ed-58ff-49ca-9ec0-7de75d44e91f,63baf3a2-0438-46fd-acd7-5c200fcb2d59,5a62c7a9-f32e-483b-af66-c8fbcafa11b5,194685d2-9c30-4a3f-847d-3e5932bf66d3,54147464-bb45-472c-b93b-a899f94fa48b,43283ab6-de4b-4d40-9e63-8f86a15aaf56,942d62f7-e382-429a-97ab-eafd08f591e0,3e4f0eaa-65da-4224-8a3f-15433f3287c8,720df7a2-b04b-4075-b50b-db5af215aeab,eb0c32cd-5163-46aa-b5af-573b843c1f5d,c675ca44-113d-4610-b68f-ce7c2d07e333,7d61f79f-4547-4800-8a29-767cb86bd221,0f0b881f-17c0-4a53-84fd-f5d21259c7a3,53f3fad1-075f-4747-a4cf-efcea0e6d0ca,167e9b24-f1ed-47da-a405-2604450bf8c2,0549e15c-c395-439c-841e-3d2a5b4134ad,031eaf4a-ad36-4e11-a920-e3efec4ee0c6,ba330774-21a2-4bab-b18e-878f0278a0e2,f52cfd5f-60bf-4b23-9b27-67b1defb5b27,52fa0c85-173e-4966-9b8e-00ccbcc9a9d0,a510855f-ac2b-40a6-ae06-600295277fec,ed9a1e3a-2fe0-4d99-98b0-a54e35640526,a6bc24f6-be3a-4639-82a3-eac0aa6040a4,bf65911e-9b42-4a24-a396-5ade9391bf1e,758eb270-bef5-4210-85b9-45186a388a96,8a15647b-482a-46c0-846b-1e549c57225a,5e518155-d749-4736-aa06-cc945423ac38,ced2e7e5-01be-433b-809e-82e337d095fe,c101af18-2150-4453-b47d-7f9c0bd4e4de,044f871a-76b4-42a6-a3cb-6a96414bfcd5,257dae7d-df8f-4ba6-a569-f52c7aa338ba,1713c57e-f6c0-4d76-97cf-518a6339dfb6,5956306b-b450-4c3e-8cd2-45db5af00bad,521e1b2e-87d1-47e7-8d42-f2b104fe46ec,fa913a8b-eb42-49c1-a29e-754168d1e6d7,c60796bc-a1da-434f-8542-14dbd3d635cd,549e98ea-d39f-45a9-b0a3-3cd39661fcd6,6a81b6b3-2c9e-44e4-b82a-dc393e73a851,b16e6f8c-7849-44e5-b612-edb27fc308dd,75de35a2-d0fe-469d-af67-0da473729e8a,ae313b8a-d3e0-42c0-9c97-842555a1beef,ab4bade2-0cae-4a22-97c3-5262a21cd073,f86716cd-31f6-4dec-98dd-7cd7922f0daf,3615e868-0317-4f59-9944-437dd6b09072,ecb98a3c-5369-47c1-8e1c-dd47d4d447bf,cf7b645b-d41d-46fd-9692-55260cd0d12a,1e4b56c3-4bfc-4a6e-974b-a290c1839f6b,3447d972-160f-4acd-a5d0-88859e0d7558,4d5e3d18-d80b-4844-abfb-9fef65b682f4,19b857c8-aa34-4413-ab0b-4fa01bbea155,20af03da-55ed-4a13-84f8-b9b1de20878a,487dc4cb-01c9-441c-a25c-606b541d1a93,209f9c36-f0e0-4f9e-be17-f959c9f51e01,94b77cb5-db51-4f19-8a23-94ead465a982,b976eb26-a1ec-41e3-98ad-86ffa0900bda,8064fdde-3f31-494d-bd40-c9d51f3250ff,2a4d8691-ab8b-4d13-bc51-8d9ebedb9cab,1739c6fb-0cf2-48fd-8edd-8b50c644656a,dcc38ce5-3c10-4b27-819d-c6e66da2cade,845fcda7-e683-4071-9112-9d966b91231d,582ec112-60be-44fd-a6bd-01101859f5fd,7f3e6ab3-6e69-46d3-adc2-3650472d9146,5531e363-62bb-4a6d-8a98-8c3fba8fd3a1,aeb0ccdd-e62d-4a2c-bc48-1bf828b9568d,6fa3936c-c625-42d4-8446-ff3cad929ff1,82ee1d22-0850-453a-adb6-c6150f0b32e5,039cd81a-7174-4ea0-920e-7c2cf6a6f622,85eb1f22-d8d2-4a5a-b7a8-f47ea9b2ecf3,cdc6c560-2bfd-45b7-9db3-f32c4c2f24ed,386fab8d-df6d-4475-9906-1448326f3a8e,e13376c5-7660-41e1-a679-fd613265d307,0d7da31c-648f-4d77-baa4-383118a51ddc,d9456786-1011-40f7-9f09-d06448a80947,d61a4c09-8f35-4c7b-80f7-8f0f5a4ccf16,46173cb6-df19-4781-ac12-a054faaecde4,bb8309c1-221b-4816-b11e-2031f4a3e191,14906a01-e6ca-4964-8a78-0bc6684cf8a2,e365ea5d-57fe-4d23-ba2d-342e3a2e604a,f6c63020-fc5f-453b-a0ff-23b8bec82b0a,16b43283-3ed4-4a2e-b82a-34deaa78367e,f8abf765-d5b6-4130-b2ac-4d0eca01e5fc,e801b980-9632-4b0c-a50c-084d0d68223d,a2e43349-4155-4dfb-bae6-03ef7af6e89b,aea4d867-cd05-4a7a-8f38-5f9874ffe802,2b724b44-ddae-4b9e-807a-b737a29e1094,188c2bcd-2fec-4f76-9251-5577584f2f80,e29300ae-799c-48e5-8341-9ba940db8d39,3ecec7cc-e5e7-47ea-8370-311b23cdf4f2,20953f9f-2f1c-464e-a8ed-e42d1f3b0b22,d6b338be-ad0b-422f-aed5-ebcd772e7a3e,0e570f44-59c9-46b4-9dac-5e7b72dc9b3a,0ef09996-bedd-4759-a18c-25744a77467e,0f1e7ed6-fcbb-45ba-afa4-59c3272da147,86e8a24c-59b9-42c3-af59-bc6e7b898963,699cf2a7-d3b6-403c-bc91-e3e5343043b8,21e90d83-0c15-4a04-8716-e019dee854cf,f9b436b3-5726-419d-8d61-cbc63c6c61c9,a7a4a32b-d91e-4430-a235-b21b98382194,faddfa35-2afd-45f4-b327-cf3c85526089,53c30f54-88bd-4f52-a778-4c532ade98bc,7da7f934-730c-448d-a578-bf525b3b0563,7d64b13c-83d7-4977-9ca4-4e9dd29137ec,42839029-ee83-48fd-aa2f-c6f88bf7d1cd,edb361f9-7e64-47e4-ad3f-6943a7448ed3,4e73fb36-8922-4cc3-b1a1-c74e60d2b0fa,2f7a0028-986f-4882-ba7b-990886ffd7e2,bcf2f1b4-530a-450f-9e6c-f67c5936544c,d0e5ef2a-e569-44ae-9235-1b44f99a0ceb,bae98541-3c92-4925-af89-fd2cc8f5bcf2,6526ea0f-47ad-4b4c-8780-28077c75ee46,aa45eeb7-7649-4b09-a08e-d6b031941766,4dcf05fe-f966-4526-972a-6edb27cb71f9,c1395f4f-046a-4d7a-88e1-b07a13c36bbe,c30a4d40-661f-4eda-aea5-34f6a1a6c3cf,c6682b94-9d23-4533-902d-68c754157273,ec8d2005-be86-4177-b0b2-d9f6f75bb420,63b2684e-af7e-4ebf-886b-91b215bec1d0,6ef4deac-8a2c-4b15-bb75-cfbba22859e2,bfcb9681-9aeb-440d-9343-a2716a5fced6,9b6b8e3e-bdf5-485c-b2ec-28b5fd320ac9,06fd15b4-d649-4e15-96ca-20a31631f2de,67ff9ef9-059d-4c9d-a847-68be44a2d1c0,982ea18f-64ad-4715-b0cc-6cd2a7142b64,250055c2-f8fd-4272-ac5f-29add2c0fac8,86ad47bd-de3f-48a3-b62e-8bac9b211308,7f9c0bc4-5109-4082-8726-a2f0abba78a8,848ff1ad-5b81-4b3d-b5aa-000a0e63a4c2,6f1b630d-5835-429a-8675-8f4ab583adf0,4683e6e1-c2eb-4dd9-bdc8-8be18042b653,9777373a-92ad-42c6-ad9b-2c01462d00dc,35c349e5-6365-4066-8e93-c330d6609609,5db241ce-5132-4db2-a2bd-3d02b4368e6a,944cd7d5-44f1-4545-9cf9-536cef0cac6c,27ff60f8-bb82-48a9-b6df-5534a23d44eb,db874a0d-fac9-4937-bccb-8c019545a375,98cb0e56-efeb-45b5-9eed-d0c6add4189e,26a1e9c9-a04d-4f8b-8a55-a50018818e6a,e9fbaf41-8e98-4343-957e-7235f0bde924,b4ae60ec-9948-4f63-b9c8-1e0cf6e44d5c,b076efb8-e67c-4061-b922-fed47f1b41ce,65fa8cbd-be03-41b0-816d-acd8a85bdcf6,857b03fe-58be-40d6-ad42-39d9bf6851e6,735529b5-503b-431b-9703-15dc52f0fb98,cd625f95-1748-4791-b2ab-b69d8f171e31,fedf2321-a81c-49fa-8dbd-bdabd95ef329,a261a45d-4b84-437e-8202-0bd168b381d1,a4abb991-86ae-4a92-a6dd-32fa6830734c,c50a746c-7afa-4c88-b6c4-2dc61df1ca18,4bd9d842-3951-4460-bb63-d125457c9b75,22f3eef2-f565-43c7-897e-b0e44b16e56f,71605bf8-944c-4671-aed6-928d553867af,031fa422-f3bd-4524-8f14-6944fe1c95e1,abf944b1-aad9-45dd-9e56-f90ff1b6bdf8,f84a7b1b-f51b-4402-a6d7-ea9df7303e3b,21fae68f-f128-4098-b9e7-ba72d33292a8,585de744-e98f-4da0-9cea-ff07c057a4cd,7dd5534c-2b2b-48f5-969f-f794eb9a49c2,76707d03-95cf-40da-a1ea-633e732da36f,f52b3974-af4e-4cc6-be3b-57089b0c6ebe,692aacd1-9324-4d78-945a-235d9fdef5e8,071f0341-cfaa-4808-8a80-d39625163140,546f4da9-c641-496c-9c21-ec0e00404dbf,37763df8-eb8a-4705-ac15-223ceaee5d82,c039ef98-06fd-4fcf-83fc-6550f0f9975a,73b27612-441c-4109-8c6d-996946493d65,8dc98708-5a0f-4340-9b1b-1149619b20bc,22634b61-3b46-4802-abec-753dc3cd2bf6,2ee95110-6edf-40b9-acc8-ee46e1426d7d,096c8167-5ab4-4b5a-97e2-d5d81228802c,d15946bf-14d0-448c-a077-ecde8f71e2ee,67228d03-9a02-4dd8-9e91-c289c058d18a,fd605a75-275f-4e33-bbd7-e2e244f1bce3,aafca521-c86b-4384-8f06-ff5293753ca1,7b3e0375-7224-44a1-b9f0-4ae06471d9c4,5fe2bfd2-6c18-4c6f-ae6f-e9750dc5acfe,130eaf5d-3f4a-4cc3-a672-ec92921a58aa,3252e2e4-079e-4b8a-b785-47a6f26d410a,8f771aca-0f4a-4d23-8d45-90e30b90a725,e27738dd-7321-4962-9e24-23f15446e836,3f5e5f62-4796-4c5e-8aae-1bdaa6f09070,173e8ea7-6cfd-4285-bdd2-ff67ee16f582,c22131a3-2811-4ecd-977c-63d964ba9d23,f67bd78d-6d6b-43dd-a23e-5bb63d24249b,45e2a3f5-873f-467f-ac5a-8b03e201266a,b9b71579-564d-4fd5-b9fa-7789e2668003,ded0d7b6-1b98-4c0a-857b-ea5e2fa4f875,063afc79-6d29-44ac-9c12-fdf6d7591734,1796bc79-d673-434b-a632-040c464ee78a,64bf00ad-3356-4285-9bd5-f97ac0de5eee,2eb5ecf7-98c8-4ab1-889d-ecfe714836b8,ae159a06-d150-4b93-b351-00987e5fc367,95702b79-6c26-4442-ad3d-bbe9a05baee4,7a1a9ed2-6861-4979-99aa-902186c4b246,d1142152-519b-46a2-8940-f7ad7713d8d7,ee1a2a8b-9c63-40c5-af3f-3b5277a783df,bdf72fba-86f6-4de7-89bf-c4d14dde65d5,371b24f4-237f-4728-a6cb-bfcfc8ff264b,bb0b68de-2922-4886-8593-efa09495bf58,d2a45083-cf35-4aa9-b499-56a2500579b6,f7f7e7ab-27d9-4933-a7dc-c876713db822,ddd35a30-63de-44a1-98dd-ad3ca992da7b,58fb1555-bb40-49d6-8093-e822ba4020e5,4d8743cf-c6a5-471a-9036-31f2bcc16909,48af1c21-2f40-40ed-950a-9199a8154463,d3821b96-f43b-4355-9695-ccf893236456,81213f38-9552-457f-83ed-11cedd709b02,bbb5649f-3e0e-4626-8f93-36273ee5cabb,b1011d77-2d39-45fc-89df-73945ee39fa4,067b413c-bb73-41fa-bbff-30d132a0c841,8832a660-e10c-4f9b-8785-b38e358e2cb2,2d4f5626-c482-481f-8af1-8b902ecce1f9,e4c5a124-ee28-4ee2-81b2-2d07d283010b,37d45655-09d0-443e-9d18-41c9247127f4,f078ec73-1879-475e-95d2-23f2f091973d,af8a5c92-40fb-4813-bd31-1273b01563df,9527a8cc-abff-4204-b598-8f6e2810c0b3,ebfefa2a-8f4f-4878-90a2-eec5549094af,8971b6b3-b2c5-4528-aed6-9273b559f19e,82194ffc-b5e2-428b-9745-fea21c0f73fd,28693b6f-baed-4af4-be51-4c05aeded2d5,4a4f028b-b0c1-4e57-b83a-78827c53c7b4,6728fe8a-b389-4fb7-8946-5de4d18809aa,4c949911-b7ad-46c7-aa45-85a2156872a1,9e2a07b5-3863-43f1-94ed-e40088303b10,ada4e8f8-e8d4-4e4d-a1ca-598178840366,4b719886-3d09-45e5-a9b8-9a5293727001,0bc3a0da-0904-4484-8c27-69b082cef641,ad68fea7-6a63-46c1-89de-7576cba5740c,fd721dd8-8c12-4627-88b5-d018f443e2ca,ce72057d-5497-4710-b7a4-18ab26c1db15,3e4c06da-93ae-4322-a7a5-50113a977a6f,747845df-7d34-4f58-9743-deda7be1e36a,dff2307c-4198-48d3-bf94-4f27c22c7872,20c5a7ea-28e3-4418-97db-8ec8948a7360,5fc1e5a7-6a01-45e4-835a-dcabbf8343d7,68992c72-e190-4f63-bd04-fc80cccbe59a,7121af7d-6618-4138-aa65-55c04e51e16f,3c8f93f0-e8e4-4d5f-966c-e1b09a62802d,75e6fc8b-227f-44cd-9683-24920d5b9e57,9a950dc1-935f-4069-803e-2d615141230a,853583ea-ca26-4853-819b-7727840bcece,08b5a200-17aa-48da-b8e3-c82e2f6ef598,843d4071-4c2d-4351-b817-397bb7be37ba,f5cb98f2-a33d-4fd4-b83b-8d9342f33cfe,6e57bdcc-565c-4266-b9c6-e1c815a3adff,d659a679-7b5d-4f22-9c4c-7dfa0d1b6019,65aceb55-bd5e-4dda-834c-cd746f77a377,6bcde0fe-1131-41c3-948d-44c058464301,9f18c0a2-9f55-4f1e-bf8d-0449da1fd5a6,6f66490e-1219-4b8d-b09d-35312b4a0403,f3d4398a-e3d3-4023-84d4-4e5e13892603,9446f66c-edda-47e6-9c06-7603fd7cc5e3,62414bad-7a5a-400a-ac58-1504cbf0ffca,6e61b2a1-0805-4e74-90b7-590f4fe2f68d,7d34ff11-7c85-4281-bf63-fd61d2497cbc,2fcd41a7-9fba-486f-ace9-7e84e6c36b53,9b56a8f3-dfb2-4343-813f-c98f03769490,026cf5c8-42fb-4ba1-903d-cb2cc400773a,10f8c26e-c4e4-4f6a-972b-2e16bfb0cd80,6b0b13fe-7615-4781-849b-f75a171124b6,c8e5e163-a6fa-483e-8a9f-80677260952c,4bea7804-3dd3-4fd8-b85f-d35203175e91,413c367a-6742-4a9c-8438-68309856377c,0344742d-803d-4994-9cea-bb16a881ccc5,61ba3eb9-7d94-4e28-a9ef-472ee2ada142,ed766920-0188-4761-9443-ee51741408aa,7f2d1f5a-699b-4999-b6f8-1c5e8b468f4a,ff6b183a-d28b-471c-bd28-b2c3ae564b31,3bf944ed-7520-4d73-9a19-596925ff13d3,5d5803fc-2278-4bee-9a2f-348e7086f32b,8400fc00-61cf-40b6-8a16-9e8c0d567d97,73e8f920-fb76-4e0a-93c4-fc7cd5e80bb4,4f825ab9-4df3-4732-ae8e-21422a6dbc2d,51f2d36d-793f-4a77-9e10-ff58c1af00af,a129fe19-f439-4303-b1d2-f1ad33461c2a,2381fe39-b325-4184-ab87-4604cdc2e57a,5e489884-5a14-46b7-8fae-691619f74b20,4bdf81fa-9b24-4903-b5a0-48dda503837f,d073ab0b-473f-4283-bb9b-e1a789cdad91,d68f27ae-fb51-442f-9f2c-e24257ecace3,9b508230-52ce-44f0-8548-2faf76fa3023,e4779633-779d-424d-8ba7-b32bcf79ecda,c1e5a1b7-2059-44a4-9cef-b3dc8c05fe5b,165a9740-0df8-4b67-9593-5c959f6d1b9e,1216c733-2922-4bde-a63b-866b5a17ef49,e1d81e08-5663-4f9a-8e4d-6a7321b4e2ee,ff42a8ba-225a-40e5-bb43-386967e0b6a7,abeb0bb1-a379-4433-b81c-26baf5f8afa8,f19b1cc4-a4f8-44db-81de-c90168364afd,b3a4e03a-6222-4153-8ede-394c608fd783,7b2ce3f3-4e98-48cc-bde5-9def9a59bf9c,e610cee1-260c-4295-9cbc-969b5e14cb89,ca153ccc-5339-42e1-90f7-b14144496751,77767b7a-56df-4d9a-b6f5-339f2dedc77b,90f0b080-d227-4fc5-8cc9-913ad95d0d5a,c4833675-03a4-44c3-bb89-97eb1b5777d6,838bc161-d875-45b4-bc34-d913f3c50fe8,aaf35fda-e042-40ac-829f-eda848c593fe,8e5c1744-3fbe-4816-8bcd-79480fe8840d,e5f5ae47-ff72-487c-96d3-2b84b32e4d23,89848457-1f64-4443-aebd-46be4377b1ee,ddb6efc4-2abe-4680-94e5-2447dd3417d7,1b7fe3b9-beb7-4a07-96d3-ffab8af5c81d,0250150f-64ab-4d2b-9f3d-633f2351aa4f,77fc5365-8e2f-46f3-ab06-364576f4141a,cd3f60bd-3f31-4630-b845-b351964f3d25,54eeef63-2954-4634-be92-3fd21f006bc1,0857f0f5-da6e-44a2-bf1e-2fa8fa8ecc32,f7243995-6e52-4ef8-8378-82bdf1722a45,ecd9e3dc-80d9-49a9-973b-0255d881948a,5e8c3bde-45aa-4676-a249-8e2a07e2693b,be178c5a-df66-4dcd-94eb-c06557f1f24c,18079828-3aea-44b6-853d-4c5e87fcc28b,c375e663-7ff4-456a-a8f5-cd5edf28620a,18402e73-b183-4c12-a4a9-87c897b9795f,69359474-b20a-4cc9-aa5a-a58df6025135,50bf720a-4312-4f38-88df-adfacb22a3ac,bf55d444-0679-46ed-bd7c-8511c0aa3c61,5a5979c6-6f70-47f1-a24e-e5a8be40fb5c,910afdbc-bd08-4757-a24c-204782409900,b43669dc-878f-4e59-be64-83613b9bd81b,dbe13caf-4605-46c8-84e2-4a6e7a48fa0c,9dbf058a-cd62-4bbf-a050-b5adad54d184,00d622de-aff4-4b17-8b77-23332d8c1315,29311973-fc45-4246-8dba-1b83a921985a,34a47980-4a42-4643-9f71-40b557a67726,783d845d-e246-4671-822e-9a19930f6249,18e06b11-ea97-4ae6-9e31-55b51078e348,9e83d187-1e2a-4244-8e15-b2281b1ee413,c4147672-88bd-4516-acb1-2e5ef58ef0da,4944f669-e34d-495d-830e-3591750edd6b,ef988030-e05c-48ff-b68e-aea5a546485c,7c8ca623-87f4-4178-b234-39747d48926e,d0b53768-6ba0-47ee-9341-4074d506cbf9,9ff16cdf-d96b-470f-ae18-8838fd282dc5,a55ff754-3509-44f3-8788-75066e7249cd,e43e8160-429e-4c8a-b685-4d8a86db8e7c,5d7c7d82-fde5-496b-b6d3-802cde8c3526,741067d8-2a4c-4a78-9bbf-ea6c88b14c74,f1861a2b-1e43-4cf6-92e5-0552f08e6cd3,800cc8bc-adee-4f15-89aa-59a0a8f511cf,ef5b7d6e-a9ab-40d8-937c-2de096acc15b,d7812eef-f269-4a65-828c-aa1a8a3b73c3,dc9447f0-f0ac-440a-9f8e-e987220ec21c,2e6e5fc1-98ea-498b-844c-bfdfcc6adc09,29b6dc36-e1bf-4e80-accc-bf942c75fe90,dc0fce4f-4668-41fd-80f1-46209868f063,88d40e96-4e19-4cb7-9626-09eb70f1f2aa,da1cbc3d-c75c-4cbe-83be-0640db104d88,4fbfa416-34fe-494b-9081-7e9dba5f892d,824d79f1-c5f4-4647-9302-70b7972082c4,24a5429d-3479-4906-9a5d-89759936a9cd,9883f983-dc55-43ed-9c7c-a1d3909e8cd3,903edda1-4d99-46ac-83a8-3e707694911f,0a216e35-faaa-475e-9d02-8fe34f296d5d,dafa9068-b1fc-4b03-9dad-6f225b25d0b7,35f1edc1-fc4a-4402-9400-668ecda0a65b,1770b0e1-ac4b-44b2-a70e-a1245c6a5d9b,bcacc035-d908-4c26-b966-1358b3e5011d,d002babc-46c8-4d1b-9f38-17fc1f12c5e3,8686d7d2-deb9-460f-b06c-912515cb7c4f,f71e5822-e5e7-46b1-ade8-ad1897cb9708,fbf9083c-f93b-46cb-b2e9-c8b6736123b6,e55615fe-d734-4339-abc1-74745462f1c6,d43942b4-2bc6-460a-b40c-b94f3c01b882,2b00281f-491e-4663-9414-ae7ff159ea4c,b8bde75a-af9a-44b6-9d09-d89ca1c02ba5,e6c40ac7-ef33-4dc1-bb43-31ff2e1040c5,29466dd2-70ea-4c6e-bbe4-c819092da0f1,bffb5fb6-6a25-4f72-9de8-12743bedb9c6,674d05a4-d34f-4211-bf63-579cb59645eb,3967385c-cc62-4a77-a68e-01f743d4589f,9bdd1220-cd07-46d4-bfb4-e943ec0486f1,f92a3707-6bcf-4c60-8a5b-66e0db11d8fc,f1d37c4f-20e1-4b66-94e2-a15687552893,eb7f2c17-2c78-45b4-a3a2-bcb5bd5f866b,2987c12a-27b7-46ff-99aa-fdda980199f1,b6afd1d1-90cc-43f9-b77d-6f5c7e9ab056,b1962a57-e33a-4575-97f3-0765e16b39a0,bfebdfb9-625c-491b-ae14-5e52c2f5558b,19ad729b-9f93-43f0-b27f-7c145c8cd26a,c93eeea9-245c-41f9-8e41-a97a0a86e2c5,bc885a99-f6a8-4005-8236-81036fe1bcc4,9b3ed3c3-b4e6-42d2-8ed6-c15826728d56,698d00f0-a3aa-43bb-981b-08f0a90f7d49,8a8f536c-2479-478b-80ea-f5f7d30b77e7,b62c1562-ba60-4f0c-b85b-a17d60bee807,f964a3c3-6903-42bd-996f-4e6ec279d639,f529295b-2ece-474c-bd1f-b32addee6c42,9afff2cc-338f-41e4-97fa-4731da923614,273c6397-7129-42df-ae9b-14a7a9499361,3afe7f17-29de-4766-9bc8-dff535daad24,a5afd719-ff49-4710-9055-e40f8a3eeb58,6151cda3-ed90-4e3b-9d46-191fd9593d7b,dd7ce90c-bd5c-4c9b-80d0-9f83e7cb8019,ccc5f4b8-06f3-4ee1-9acb-e9593f5675a9,fba94c2a-49bb-4cdd-857d-c48fac97c93f,b4e6cd4a-90fc-4cb4-ae10-3fb312364b7f,35b7ffa0-4d57-4086-94f1-6b0718619427,4ab761db-1dab-42fb-960e-0a2c9a41ce0e,1aacbc4b-7c44-4fd8-bdf5-e667739c664a,aeeeda83-fd89-4b9d-a91d-a35bb90646fd,fb5c6380-2a8d-42c6-b4e2-1a4c453b38df,0a116631-d9cb-44d5-b8de-a62dc19ef20b,caaf6411-c242-4d65-8a32-b54c1768e69c,0faff613-e875-4b3a-ae63-6402aa5f2a18,38fabf63-9e0a-4840-80f7-ee978f2cb2c8,37f0ea25-b723-419c-8083-a6157edfd88f,c151efb0-1b02-48fa-a819-fea7c39c84c1,b21b1630-ebb8-4ae0-a5c2-e4fa2e49bd45,a5037031-8af4-4e8a-be53-90a85ddc9455,a8bddf91-063b-41a3-ac0c-361f2c241dbf,20da396c-422d-477f-a375-363948a5cd24,07dd07f8-c060-48fe-9ed7-066bf23bf3cd,bb358b39-8af2-4d57-bc05-7df253b2dc3e,b522122a-3eb3-496f-b87c-21ee467ceb2c,d61f2047-78e9-454f-9fad-002f94f0cc42,2cdaa815-922b-4609-8e2b-a65d51a59bc6,37f3bc82-cf6c-49b8-ab8a-9473550031f4,52383723-0dc2-41c6-b2a7-ee86218db3e0,d543c15d-4d7e-4b68-a4d7-bcd296515937,68ef0a77-bdab-4ac6-83c8-4852aa023b6c,d28c445e-9672-4b9d-98ae-ab9235962af8,1dd4dcc0-e052-44b1-b8b2-fa9756bdadbe,c8ede05a-698c-456d-bfc0-a2d12b402b85,37af61d7-726a-4391-9f45-8f43272cff40,af6c0a88-509a-46a4-bb70-c57145d2a351,5f7b732b-6506-4339-9087-2db117db6a01,7e9e5b1f-547e-41ec-897b-43b4cbf797ba,e79fc388-a7c6-4413-8605-1d4f02006cb5,aad4c6ab-878a-4c46-b473-5e9f33ab6c69,a0dff5a8-06a3-48bf-b9f4-e90d5b8fb50b,048d9d10-ebdc-4617-9988-40ef1e707934,6d50ff5b-31ab-4c06-afa6-e49656a810ee,b199c956-2836-40e3-b0a7-86509c48a587,07f3d1b9-7224-478c-ba61-5c85d084d051,c1384c66-0787-4cff-bcc9-f7e0d2926a2a,4c27ec99-e8e6-4666-bccf-f9b41765a262,463f63b0-01fa-4708-a29f-6b0aed233912,cc2e3749-470f-4d8e-bb05-9afeab0bb543,7c2022a5-c11f-4d5b-8153-bd3844ecf8e3,892f2a97-2c0b-45dd-8500-53a92eea2c84,bd79e686-126b-41bf-818d-1dda52905f19,2e71ebd9-8dd7-44fb-aa78-6f443ba1d390,d5010d42-0666-45ba-85f6-4363183040d6,a69f8af7-f672-450e-b211-96ef6c876e46,a87fd470-888e-466e-bb04-de24eb8e0a3d,a7b2608a-3cd0-4484-899b-f699075f6a27,dbd3c431-1673-4b2e-b00b-30488acc45f3,853c7e1e-986b-4156-b46d-f0b48e12824b,37ebf2a9-1d69-49f6-9aed-2a7af46b0227,255b64c4-d4f4-4115-bd17-2d2165268b5d,5441cffc-61b3-4296-a800-39c4cff3b9f6,b45c86d3-263d-4fec-820e-8440c3037691,811d3764-15e2-4641-bcf8-b95f71d93c45,aed4ed15-137d-4e9f-a9be-f910086d6327,fdc0686e-321b-491e-9e8a-7616471202bb,f262012f-8dac-4bfa-bab0-999c44d9eb4c,ff43fc27-4b73-4afa-9a5f-0e2e0255a60b,610625bf-3fe0-4c38-85aa-4096cc48c0aa,7353fd2e-e8bd-4f70-b549-e248f2396b7f,112ee637-6cb9-4691-a2b5-a5b0cb88f438,9b89b2dd-a1ad-48a4-a5ec-a6ecbe897b19,3411e4a7-a355-41f3-86ff-54d5a99b565a,e70b590c-a200-499d-acc2-14d889a9b4c2,cf051bfb-484d-4aeb-bb32-6ca33da5ab37,76a7ab2e-6b33-49e0-adb0-2921b3971a54,2b62680f-5dce-4892-a0d2-e7dc09ef39c2,8378eb8b-64a1-4616-a71b-f23c09ef1f7f,1560a68a-ad4b-4c7f-8bc5-de1d134931d0,c07dfab2-d76a-49b9-b001-cd72ff1e3827,83178f43-a9a8-44c7-baf2-979bb7eda666,37377a7a-73cf-425c-9c89-5564b7a5101a,013cf13e-6bba-482e-a1a7-b4ffb0f583b4,7a833578-463f-49e4-89aa-68466816054d,71fb61f3-0845-4207-b4c3-735beaa71d6f,4d9bcfb8-65fe-446f-bdf5-176e6fee13d8,30fe925f-3eab-478b-a4bc-a15135f5ad3a,440377a6-95e0-432a-89d6-a0a06831d2cc,19450f57-394c-4c5a-b392-738182f2e6d8,682a0174-f0b6-4ac7-b500-2d41fd6b4d20,436e2c94-0295-4df7-8efd-6ae2fe63bd67,58531b35-43e6-48dc-8e8c-a39a9eb5e0e8,49a8afeb-fbb1-4458-b312-bb607cc79db5,99bf7ae9-d744-4946-ba31-a57c9ffa388d,10f65f39-93d5-4764-b331-1898582210b7,ef5ae3fe-794b-4001-85d3-cda4f0c5db68,9c7e14f3-01b9-497b-a019-ed61f0a66fe5,ef5624ee-5232-4027-a4b3-3ff58b013ec9,6c2bbf36-211d-4f67-8594-1cca517ae126,18b8c917-b629-4dbe-b9aa-ca102c196c97,d9688432-9659-4651-af9c-aa431eafdb88,fc5a6e37-ed02-43a5-b53e-e138536c5f4b,41da6950-9499-4d1c-8e45-9e14c14dba00,152386a8-15ae-4f31-adbc-3893a66adc06,c20c99e4-d21f-450f-b2be-f30d1a07adfa,fcaa6d44-140a-4060-b739-14be778f95e2,7c2e5051-92ab-4054-b7b4-b29afea4b10a,88275ca0-8383-414b-92bb-a716be67b32d,f8dad5a7-817a-4367-92c1-0d7438135eb5,f658b4df-bb98-4ae8-9bb9-3063a3d96cc9,3fcd51a1-3f80-4fa1-9d2d-a2ab37572b9b,12522859-7886-4e42-a942-9b986c9edc13,32c0f841-2e96-42a7-8e42-1375d4f928a7,53693c87-4ade-43a6-aefa-32d878d4677f,0759735c-336a-4484-ba1f-ef8188e77dd0,119137b2-b862-46d2-8d60-7bbe736b566d,b0ccb655-366a-4672-ad3c-cd6ea2949dfe,2ea4d805-d17e-4c9f-a11e-d79bdfb1ce50,27b53860-a399-4be9-8931-e532fcf1658f,e13e52f9-bf3b-4031-b1b6-e5e6c0f16e60,c83fa34c-11ce-444c-98dc-fad956ade0be,14ca096a-11f3-45f7-9a6f-6a93cabdba80,d8abcdb0-00d7-4e2a-8bfa-372322f7a7b5,0d6f0f93-874e-4b1c-b851-a29b1331d607,d22ba1ef-18e2-4cd7-8784-bf4535bc067d,4b1cd3d5-d894-4ad4-94fc-acd00439e6fe,1a3f8ed3-9598-4683-8d7b-13f8b77dc5ba,63712e9d-9fad-4259-9957-5f6f2bf90acf,284ff2f7-d640-4a8a-a7a9-e506967466d4,b7eda31e-1520-4272-a64d-fc5168656253,5c6476ca-f1a1-4ac0-be93-11c3150537cb,ae706527-b44e-4510-b3d3-86215f799161,3f28884f-f7bd-414f-bf34-6b51de4f8328,cced58fa-72f5-4266-8043-426fce97dde2,4db85b22-ab06-45d7-9251-c8ff2dcd13b7,84174158-ac43-4cbb-92c9-b803fa04e082,8e4d529e-08d4-4d6b-aa45-4c4accf5b448,39264e50-07e3-4aaf-b9ef-f10b78ac3650,79b2331e-27e0-4ee4-887a-4b55ed3b32a9,c6a40b71-885f-4c09-99b5-e7fa25a5cd8f,e0768096-6700-4b5e-812c-6140074fc07d,ddd6a3d7-1951-4489-9aaa-e39237dde985,c3c2aea1-74bd-4514-9ba7-300cfada877c,d5e1c600-1a4a-4c33-9029-f26930044264,86c024a9-09e7-4ff7-b93c-77c6e7e0bf69,ab7e98a9-5cd3-42d2-88e3-6245fcd692e1,1ea6ea83-53c2-475f-93b6-943c5b1525a2,6c9a1b1c-e93a-4497-ab1b-0d78447da121,745bd03e-e867-444a-824e-3eaf567b2209,303246eb-2ccb-42f9-a0a8-34f80b25188d,2476e8a6-6d26-436e-b90d-18ed9f4560e6,2aefac04-7108-463a-a63f-a2ca7f1b2b1c,312b87ad-a9b5-4b18-b889-1b404294698f,fc8c9138-8aa4-4981-b294-06538d32d07d,3ed45723-ae9d-498d-b76c-bcd4725c787c,959da119-df53-4961-b2b4-9b07ce2ef388,c2a57300-1358-43a9-afff-497fcc904f75,e11898ce-d070-4867-ac5b-b7d54f394c5b,2f667d7c-7727-4c0a-a53b-1bd966a00c1e,a25f3c27-430e-4d59-a98e-08201a2b8bc7,eb2e716c-8459-480d-ba25-6731760b3157,9c57f3a9-6922-40db-af26-c9996b4a2a5b,1ab139b4-7486-4dc8-a3a7-917e415da68a,5c9f2d9a-b482-4d3b-a682-02e829333fba,b7b48431-578a-4c07-8d76-6bb6b395353b,5b858b1b-100b-43f8-866e-cbbddd01d669,a456b0ef-ea4f-4729-a61a-1d5b591e5a02,27722554-a927-4aaa-af43-a1b81c86686c,0efcab1e-34a1-4182-ad8d-0f355a57c1bf,4092679e-c64b-41d5-89ac-9e22ca9097d1,5dc8cec8-3d2e-42b3-a66c-e4717ad38095,00f485ef-1749-4d73-8532-ec8b3dc3db13,7da1b806-a3c3-48b3-89f9-a0aee1401804,dbb6eeba-c516-4084-a210-8efe47ee1ce2,69769792-eb30-4c65-ad3b-65bf76a596e1,da1ad143-8970-4b28-85e6-9b2360b18c8d,4b1302cd-3365-4044-9bd3-a30bc044a7ac,07ee43d6-b725-4fad-8ef9-964e3d88d055,21ae08b3-b704-4cb1-93f0-8f0c48f14607,b98ec2de-bb5b-44c2-bdb0-dc4639f69295,bcad70d8-b37f-4dcf-9d0f-dcf8eb78f452,da1d0576-9eed-44ab-a908-073903a9492f,cfde4f18-2ead-4024-b302-505077b64688,3e1dd588-d118-461a-81d4-cde952cb463c,9de294db-ab8c-4329-b220-f73fa3f5d164,3da353ee-6aa3-41d9-a353-f4c687f5c8e0,1ac8d36c-0c4e-4e6d-bb2e-8a46a4e5921a,b62e53ee-c3c8-4d99-b840-a6f5bb3dfc93,d12c92ec-1fa1-4336-a6d0-d5be44a05a94,58dc037b-d45a-43b6-a0d9-abf5abf090c5,5df0538a-ac15-498c-918c-749fc9d59084,9e5a500c-9d19-438b-baca-03e181069735,d3b14ba3-bc3d-4a53-a6ee-c53af4e897c3,f7b354ad-471b-4ce0-8c15-0120570a2ea6,91838220-c9db-4e48-9d36-c7328a5429b6,a5c30c50-18fc-4b0e-9b63-43312a13f024,c405d017-de82-493f-a338-7b589be0d6c9,e91fda97-251d-4f5c-94b3-0aeb0bb015d0,5c4760c0-f7ed-4b5f-a245-e6e4581c8f89,cf46d3e2-07ed-4c61-a182-c7607bc8e46a,a96bd987-8341-46c8-9c2a-a02635f0c11f,2c890f4b-b7fc-45a1-8c26-e975570602e5,07e38bb0-64c4-408f-933a-8265415efa09,42af6979-c3f2-413a-86d9-229fa335a306,4b0f37be-4bef-4230-9abb-11dd81f72b6a,440393c9-d019-47d0-933e-bd0a8ae5440f,4ccd32c6-156f-4fe7-8b1b-43c5cd336b8f,5d8cf5a0-cc69-4666-ae48-18628ad7ce0f,f42bf39a-1dad-48ca-aae9-ab52a1c47b3b,e338b701-170e-4f54-893d-3927514bd6ad,c9abd66c-d01b-45f1-aa35-725c6e80e9a7,72e3c648-2a3d-4ede-bae4-3fab9d78d445,8db478eb-59ce-4172-85fb-c8196f524f99,841131ba-e346-4061-9ab9-230dd05e2b3b,83b9bf4f-3be4-458b-a745-11c7dc57cb4c,9b914b4f-c011-4546-954c-b725b6fed20f,1336313c-6d69-4d04-860c-c1ff979643d5,5733c4d7-16d0-4b17-90e4-036bb883c81c,0040794f-30fe-4489-b684-75ef9cb1f233,a9dd422e-8f8b-4d90-b185-5872da0d2160,3c147abd-cfc8-4074-bd25-0e5c206b115d,82a166b1-21d7-4e95-b755-cbb094ee66e7,026b57e3-a2e8-4025-9b09-d9b3a0f93bc8,d790d270-acb3-4ca0-9624-31184005021c,57109b6c-b180-4765-8dda-30ea0be6b3d7,d3d89f5c-3427-49f1-9867-9d5ed6f6660d,878fe4a3-896c-4ff9-90a2-fe0f7c0c12f8,b76a1e9c-2356-4201-9f4e-51e8058cbe0d,172bb87d-9eba-4b13-9f15-523dfba77942,9675b193-430c-4d11-af99-8a53cc633cb5,e883e67d-598a-4058-90fc-f0332e493ff0,43bfb666-8673-4b55-8f7e-49a36f6baec2,bc96fc5a-1c47-4224-aa15-31a8ace7121d,696b5db1-7ed2-40f6-9c6a-7e728ea376f7,d340aa58-0f84-407f-a06c-d3399ab63cf4,cdf29acf-b4e6-4e31-b3f3-4e736b231020,3a905c9e-f1a3-4775-ad8a-03702f9d2463,c6e34b42-1bbd-40c2-b01c-5e88a48bf583,b87b2758-badd-41f4-a15d-5c12db2dbf76,f5a9eca5-d596-4660-9907-8a090c5db3ef,0c7c51d1-602d-4aaf-ab45-cac0f457822d,92f77802-7e09-4840-8ecc-40b6592a78e8,dc75f1ea-988d-4237-a6e8-c0a3abf89501,77b0f342-b973-4b61-8316-36e8790bac5f,ef5b59b8-bf94-45fd-bed7-4cb24469e5f0,5b44cbe0-5bcf-4ee7-8002-a739c39b52e8,174d91fa-e0ef-4896-86a9-26b117d5ff2d,1370b40d-edb9-4001-a8ba-ec5dc4d7b534,0ecd7c7e-f133-4eb8-9d9d-32a9f97c473f,364c956d-527b-44b2-9290-54cb8e13a28a,c0a17859-a9b0-40b0-8fc3-c73dc41de20a,d0eff689-1540-460a-a91d-d1d99f3fad10,bf0b4a82-4a3d-4a41-888b-10446f959311,583d057b-7bcd-4172-877a-9525dbde91f2,9a586f40-f928-496f-8198-4f9f74f21ac1,84d8cbcb-72bc-424b-8107-09fc098da73d,7a24d7f9-e1f6-41cb-aae0-dd5acab127ae,b251ed62-f601-458f-b093-26f69b7463bb,b0e38b44-90e0-49cf-b76f-b5cf7fbc7d7b,d18e3faf-1b8e-4736-971b-6434478d9b85,43a7531c-8f46-4a4e-a3cc-b13ec824982a,b3cc67e6-e99d-4797-a152-0b8c6d7ae17e,cee6b088-a3df-40fc-9008-56c061dbbe95,61afb665-cce4-40b1-bf2c-ac7671f80721,bdbf968b-3170-41cf-9089-b6d76dfbf370,0df7d9b8-773b-4b47-b533-8bb0a923faeb,f6bfc25c-1513-4ec9-a3a9-3484af11f10a,c51b971e-745b-4121-afdf-4799cd48b629,8f47c728-168f-43f5-b827-c76de9181db6,60c26513-c860-4cc6-b059-1468f0f94f7a,1c5bfd14-a0e9-4d6c-b285-b377157f218b,9cfb0d9f-e873-4563-9ff3-87c870dbd4ff,d5405bce-82a6-4303-87e3-9ee3cfa97dab,04ebeb25-906b-4100-a784-057bbb467824,78c6090a-ac0d-49d5-abc2-038b74fcaed5,a77ece3c-f7b8-46dd-86ef-18e972b3003b,0f3089d9-c015-4b7c-baf1-5465430f8686,83d7b279-edc1-4b3f-82c9-3381224dbd23,d1c7a6a2-b84e-4957-ad85-2cdf0f3cb38d,cec3ba29-71a7-47a7-8888-c3a5d46ba798,7c9e40e2-aaeb-48d0-9f03-ce64f092ac0b,0cadbdb4-ee68-4967-985b-f4914d713ba6,92211f9e-7018-49e6-a44f-ffb09c300784,6536436e-c851-40cd-a8ef-17daa8af07a9,a2845090-b761-4b5e-803a-d3f3ed3c2e85,37e91f08-6afe-44b9-9130-2c85999042f7,cf804b89-2120-49ba-b06e-53c283be5452,84a2b32e-046c-4e15-ad5d-7858f95b58b0,5b18733b-2cd0-48c0-839c-56d95eb44277,281a1854-2bfe-489a-89c3-4439bb361098,50f61e83-a839-4ff7-9816-89041228d58e,b4832408-bf10-4c97-bcc2-aa157f6904a0,9ea5e265-c7f7-4c50-8e4f-efc66ceac650,df581fa3-6d4c-4a87-b657-5173bf280abb,542e8957-c5d5-4d0a-bc7b-c4110d99744c,d0eed8ca-dd99-418d-9f72-d71fb2da87b1,e00da00a-a191-4eaf-a66c-9095e5b8905b,132cfd08-e815-48cf-9603-a048bf9d8cf2,76fe69d1-9d8e-4b69-8766-69d8eaa291a7,fbaf5ddf-acf7-40ba-a5c6-5bf27c58b3a7,1cce4792-4ec7-4a7a-8f0c-58962b6bf6df,07417666-0a6a-44a8-87f2-6013b84b7c79,beb89748-4a46-4977-bc5d-5b61aa57b709,3c99f72e-c623-4c36-8075-36ae62c05681,0a3447de-aee9-4266-9812-9332ca948519,623a1d82-d885-4db3-9e9f-41da279bab61,4bb2ba8e-e98c-40f7-ae1f-6ec67ac7bad1,4455666a-92ce-4c16-9a65-fe60058cd42a,a90f808d-5437-4540-80d9-5af79b74d661,4aecba4d-91a8-4d09-8c39-85e3aa197357,ce785e26-4e3f-471e-a4bc-283140554fa3,2d07b2ec-53aa-4808-932c-fad64eee025d,80787a64-041d-4867-b2aa-fe859df0f02e,25e255a5-05fb-4ff1-b87c-5125e620ea60,931c500b-7825-4bf9-917b-d95d7df7dab3,c5e033eb-84f6-4249-9341-ca42fcfa136a,cb624953-53c2-49ad-b3db-535d8c6e1d2d,129ec8b8-7553-4401-9115-f724793f4840,fd8dcaef-f5bf-4716-b5ad-afdeb268e21c,1bd6090a-14a0-4ff7-bb28-6e4c5c034842,eb169a85-efdd-4124-a85d-761ee33a9e8d,d2e0ee7c-528d-4a09-9b48-23e6a03f9395,42f4f2a5-e46c-41fc-8217-fd7caa67f0aa,706b84c4-a430-46c5-8d3f-cfa6c592e53a,0c4f7b43-3a96-43f3-9b3e-1a03365efa6d,6494b806-3eb9-400a-baff-3555700e5ba9,0f653dd6-b7a5-4d1b-9144-b697cefe1d42,4b23e5c4-607a-49aa-98da-ee05b4015f26,2b3ee6a0-1bbb-46cd-a8fc-0202626e033c,c7bb5a10-6ad6-4c48-85d8-1fd981df181b,6bf51e95-619d-407d-9b44-28b4c4a87070,3f73b63f-35a4-46ce-b671-80a9e093df10,561ae5cb-051a-4e27-8967-1fc2605491b5,4bc0f9a7-c6e7-4511-aba3-6c858e159b43,f7bf7e43-ea96-47cb-bb81-4e0282b4f707,879aebef-e87a-4c0f-a7da-16bcc2e72e9b,08b9768e-f686-435f-ac4d-d76a0fcb3bf7,baf468ff-9700-4be9-a314-ea2886b36010,597dee33-010d-4fcd-9310-f73707f5b0dd,6050eacb-9ed2-4f13-adad-567bf2e09c00,13df06b5-9761-4b52-92f1-e735636cab61,949078ce-ae46-444b-a348-3182ae3c8bec,43a2a433-7cb1-42ba-8693-ca21abd5bd2e,e3ed4f56-a45a-4cf4-a9b2-3e38456b4c5a,606d6195-76f4-44eb-a19e-e8b226f0f10e,3a3c15cd-ec7e-4776-addd-92dbf19cf410,a2f31278-8617-43cf-8810-7a4fdc07478a,c1effb4b-e7b5-496a-9a52-b20ffdb8f202,edd525ef-270f-49aa-aef2-fe6df5f9f2af,159015bb-fc81-4477-979a-0ddbcd892633,c1b2185b-fcce-4080-b1c2-969e527c38c2,8c9efacd-f4ee-46d1-a795-5c7ec26ae391,7d05878a-84db-4400-b8ff-dab2bc8db26a,f22e58a3-6fa1-4b6e-8c68-64dc04c40a62,85ec54c5-05a1-40bd-9a87-d93b6d2ad3a0,1b23f2a9-4201-4ae4-9817-04f1d022c850,1d0bd7f9-c563-4445-84ce-7e7ffd639e70,469f348b-1bf0-4adc-b88b-b0c06b360fd5,e5d7217b-82f0-4db9-bf0d-f6f242c06028,0106a3dd-fb7a-437b-a336-be1997a62486,2101a5fd-1a80-40dc-a00e-113089923dd1,34afe21e-093f-415b-a589-4ae87e6f3183,78986817-ea6e-42ee-99b9-d64fba12d887,6a0b2746-5f81-4fea-b2e3-1fd4168b4d63,9553a47a-ccd8-4e72-8fc3-e1ec906e0dab,010ef401-5fc0-4e6d-9ed3-3da0807455cb,3e44a02f-a1fb-4cdc-9005-555afef9c32c,57d8ad03-c61e-4b5a-acf2-eb5cd4d7a2e8,372d4e46-fe6c-43c9-b4a7-1d1e1cce68f7,62a6eaeb-1e5a-4297-8ebd-5a37499ec651,2ae1d81b-77d0-46ab-a56b-4ad0fadaca5a,be63e510-ada8-4f1f-a4e3-4ea6a26d09c0,74667e98-264f-4faa-b3ef-2131ae61939e,f686e357-2635-42c0-9050-fa6d81b18e6e,a9eeab1f-e21d-422d-9269-413fea1832ba,10405030-62a3-4695-bf99-8e83b7989d9f,e36eaac2-3c77-49f1-b236-9ef8d73b5e1e,0a9148cf-3c30-49b5-b170-4f9864ce6cb7,5466c6ea-c87a-4594-ba1d-5c0640e17fab,b04a7f1f-080f-4cdc-8a25-7fcb3171d128,48d8b490-5de1-40d7-b412-5b09eec24e5a,a47fa4d4-1164-4643-8249-934428baa1a9,c4e7637f-ccd9-4821-847a-e8205887b01a,bbae41bf-51f1-49f2-ac1a-65dcc6d3dac6,a38d2399-4b2b-4057-9924-bb2ec6d3de04,8dbd0c05-b0c8-4e65-9cbd-eeb4389beff1,6cd173ec-5a14-484c-bdca-76919c7a6c1c,a567f65f-989b-468d-91cd-a48653eebeb0,0cbd9bc6-393c-4411-a7b3-3a7d457a515c,63183622-14fe-412e-91df-6cfb3c0e7d9c,eed96901-5a60-4fb3-8187-2541338e7c5f,7fddccf0-67f9-4388-81a9-6fb77e1a13bc,a4536fd5-41fe-4caa-bb83-51b0aaf78b7d,a73a10a6-b723-4d9c-8a14-79e4d3d72250,beb02eb2-df56-4a08-a46e-3100406b4f0c,7f50cc99-676a-4551-a5bc-7727aea7bb02,0e48dd73-dfd2-4b7a-9c6b-131aa239b6c9,37da099e-75cd-4552-b8cc-c44925ba7a3a,19d5b714-7f14-470d-873e-feb7808fd8c4,61ddf9df-d046-4f20-a14c-cfe804d31dcd,1a608414-5547-4a15-b6b4-8080f978d188,64f3de25-799e-4a32-bf7e-b18b663daf33,39ce13c2-eb54-4496-8ada-c69aefdfec49,27eb6026-2b0d-4b04-91b0-dee439dcc908,c61d0d22-78d4-4254-a0fd-16455639265b,bb815db2-507e-408f-88e0-b4a0be7aeec2,37eb635f-506d-45c4-b8cc-3ea61a10b088,72cd52ae-369d-4f30-ae8e-4ff1877ee092,1f00a652-8ee4-4546-a713-fdd9ab423cad,7f9ce2ae-a277-4326-b0c0-41e89da5d135,65eb62b8-16af-4f45-bb42-7c26565d3499,a677781e-9858-41d9-9554-842e6971c6a4,ed03d07e-a2dd-4c33-85cc-90e327715497,1c5127cf-bb88-422b-860b-9d573da66636,1651450d-b2cc-4bb4-91f2-4cb4c9e63fc5,125483a7-7be3-48d3-9a53-233562a5af00,5b2deb06-6bf1-4e9e-aefa-4048b0e357bd,6fa8266b-79b9-476e-bcb7-abeacc55febc,07889d57-4bab-4c7b-8f8b-2a1e18e95261,43c948ec-6188-4e6f-b9ed-0fd88f7f2e9c,5ce32a88-c3fe-4ccf-bd67-f5cf61d71bb0,8e06a428-515f-4def-a9d5-c44025176e35,ebd7a3db-9085-47b4-8abb-8b6105575f13,ae8f13e5-0a42-4a63-a147-9697f56a8f9f,e0da5a25-c6ab-44a7-a5df-257cd27a3a67,f21f47e6-ac6c-473e-9e65-a174f43e261d,abba3673-13cd-4f6f-9f96-f3ab2e01c834,b29b626d-8509-4846-a265-83e8c069b172,6004f729-a393-44c9-8dbc-d4a9c21ebcb4,78cca668-860b-4c07-8455-5acf4d9bb66c,9a53f0b0-5c4a-4938-8640-d743a9c6121e,e5e1aa05-ef40-4cab-8876-de31395bf09e,197eff95-2996-4596-a058-148cd16b21d1,98d5ca62-5d6d-459a-ac14-55718626b97b,8ba26fc4-a002-460a-b047-b5436550672c,5127954e-72b9-470d-8e02-894f23e9fa93,c2250293-98ad-4676-815e-af5f92d97a07,b211167e-0b7a-40c8-bd38-60f4e33caf41,3bdf6794-b910-423c-a8a2-cb38072b4221,f35ba2fd-4492-4002-a909-ddf13a5e6d8f,99ec226a-0f68-490e-ba54-75e8aebcf10a,67e6833a-8d18-4937-a001-85b47ea9da78,e901d9ec-8d26-49aa-9b7b-a50730fc9b25,e4230f08-8ac5-4d0b-bddc-65e2f62f1554,f1f5d5de-1db7-4272-8310-e72dc5d06803,3ddd3174-d00c-4722-97b7-cec9f49f419c,64d492fc-bcb7-49d2-9a57-2d11307b91b7,a43ce48e-a693-4223-b671-d6aea068bf27,3ae08db0-c706-4fcb-8bd2-656c21eda97a,229bb365-daef-406b-8d78-a593cb827c8a,b5403174-8cf0-4288-a635-2a992decdbdd,0b3536fc-51de-4ffa-82c8-9a3af77f438e,929bb414-0021-43cd-b992-55d469d81957,710955df-c790-4b8b-9bc0-5dfcb0f9047b,ae08940d-5786-4789-b47f-d60c305a52cb,f05f1dab-db61-4a0f-adc1-b455b60f43bd,2c6ab54e-d328-4aea-bcb3-02a16d55f259,ead6dd60-241a-4e98-9dfb-b213ca986a87,bfc01e9a-754a-4d65-af7d-9f129f129dd4,d4d243fd-838e-469f-86e3-18f35fc6b57e,b3baf4cf-ba0d-4540-bb02-7d7eeeafe893,612637bd-926a-4234-adad-1953fd667cd3,f086bc7e-96b1-401b-a873-799490a0c816,51f85013-4fdc-4fb0-94ba-3610502245cd,aa517edc-662c-41ae-a43d-b7cd113e0a36,2c688415-8b93-4570-8a0c-494b2e105239,f68350a8-e4ee-4e19-8fbc-87649b36a692,2314580b-fede-42dd-a80c-916226977409,2786c3a0-0f9d-4109-97b3-f4d267610756,eb289660-7c2c-4168-a11e-7a57ebbd11fb,9132dbb8-2794-42e0-b9f2-1a2007745a1f,e9563cf8-7b4a-48c7-b421-ee6802b7b297,091a07ba-b971-4753-af79-af9e2c8a9961,44605e73-1a91-4129-8dcf-c5a7af820269,44a26c21-2da2-4362-9000-7adec104a899,931a0b93-3f79-4091-ab61-7b52ecc88200,ed124b10-a65e-4b74-a40d-5eba29ceb48f,1b2b1919-054b-4cd6-81a4-dcae8182fd3e,f6501934-c023-4b78-b4ad-ec5a0ae98d18,c876117b-8e8d-48fe-8ccd-c19b1bcf5061,8c50b644-feeb-4e54-9723-4c5098252377,be062713-ecb2-4ac3-9a53-a2ea9f8f18e6,d2f088df-8a1d-4d41-801b-e6dcec52d3da,6caf2c7d-29c9-4fa9-8a43-edf6aa10eed3,a1f88828-241c-4d5b-959f-bdcf5a0fbf93,f9cbf0e4-776c-4346-949f-51af533b9cee,0866cfbf-908d-45f8-a6e7-a675a5cf7b71,45f4fe7b-c802-493b-9c3e-a6517e9c4600,84c2d35c-bbd5-491a-ac65-56ca73e3de5d,83e64c49-debc-48be-82ed-6c582105b917,aa5157c3-265b-48b3-a60d-64823759f1af,8f44ceff-048d-4acb-872f-999b46b0795c,0bedafde-954c-453d-9b65-feaec0d62fcc,d9942f69-2a1d-4256-a27e-06f2ec02e30c,abda6430-5321-41c6-87a7-200b5ddc6fe8,bb3cc2e2-2e0f-4707-949f-41a92eabff76,3ccd7e89-85b6-4126-aa49-3513b87abf16,2ee74956-907e-4bde-b44a-b86bb14a055d,ec1c7691-03f6-431f-aef5-38285a55ddba,fd7714c1-a5fa-4293-91c0-ce5f5154e1ee,cb5baecf-0277-44ea-96ed-00517762932f,b8804abf-e2b0-476e-b856-ecfeee62204e,293eb08c-76fb-4599-ad44-2b060ea40f4b,f08ca988-a76c-4f40-86ca-f03ef79faa77,e0fbb2b1-b46c-403a-bbd9-e4c9cce16943,17917000-0361-46b2-b644-68e0ff994bad,5e61b47b-5598-49e1-9672-d9fa16cbf1c2,ced9c6ce-6b2e-4b10-aeae-f3883e4fb4ef,c0103d52-2403-4f65-841f-8896e41d3e7f,2f038b2f-2585-474c-afd9-690cec622b4f,fa797d09-2cf6-4d2c-a044-a83d6cc2196d,49d73365-1308-4282-9b99-2155984c9f96,e42e2a62-1407-4197-91cf-3681d0b8b192,c706e65e-7416-4181-af7b-a129b5eef080,1f4c10d3-ae8c-41e6-b7ba-48bb43ab87db,44b3f0c0-19b5-47b9-9407-d263e6d790c5,119c2321-a4b9-4111-abb9-362de4684d88,ab2141ee-eb95-45de-8c96-fab538bbd546,46508de1-b3c2-4341-ad88-530f6350c745,8a0f9ae9-b312-4c6b-8f5f-30a942c4c88f,a2101d3c-e199-4156-9927-9d2ecd5d245d,4b863419-6eb0-4140-b0f4-f5d7da1b6f2f,b61e2cdc-ff63-4665-a2b9-6cfdccfd21f0,8627c7ac-b25b-4bb2-849b-0df133d85878,b8ffa751-ce38-4d1a-b266-1eafac7a3477,94012c89-9b93-46e7-bc18-148756b6cca4,2d016ca4-8e9d-4a51-a746-af62d5e4cf69,09dfd02a-75e7-402e-9f06-15ad1b52d0d4,4d6a20a4-3b35-4649-8652-af62bebf174e,1721e105-2cc8-4277-bb1e-3bd78593507a,8bbf1a27-85cd-427a-a364-31f27fff856b,66866ac7-ea23-46a2-9732-7af008a428d6,fade752d-3d75-47ea-9fac-107d29f90c0d,4031f1ab-540a-4131-b5a6-b29d8db96e41,38cad8e4-7927-4540-bfdc-bbdac2517b39,91736d29-b408-4c5f-9331-a8fc267a82e7,f093c0a5-4a83-4081-b231-5999710afe76,3bd4b3ae-6010-4659-984c-e5b4955bba22,8dd562bd-4fa9-4b51-a070-b57c734f4bc0,33f8ca11-910f-4ab6-99c5-5b9f669cddfb,ea688a19-0214-401c-9621-ee870f18c275,6bf374f2-bb3f-4f03-9897-4af322f33b48,8c8d8011-1e21-402f-aeb3-b0fed6adcae6,d4cb7313-126d-4d9f-9b88-faade70df389,81248f22-2c73-43e7-86e5-74b375247ec0,e650c6cb-e720-4491-8f58-43bfd946a3e7,03ef1e53-4657-4684-b916-9543bdecd180,6c830880-3604-44b7-ac56-369fad145fcd,95656bbd-a688-425d-9d45-ffa0b7a01346,d0d2c42d-66ff-492a-8eb5-330894a91f77,ebe0b15b-2f86-4378-a054-e2e44d8d7e67,b4378396-41df-4db1-885b-0bf468ed7a72,df7692c8-00da-4305-8807-58e931fa862f,c03af095-f5ba-4f42-b135-10dd933475c6,51adc9b5-6c27-42d3-ba4b-b26d4ae404ee,2f23a028-8b57-4a34-a2fa-c50309770a13,7ac713b8-cc63-40dc-9908-5b2f050ef2fe,23ffaf4b-61ff-4e37-bac0-dffe2463753e,f696bb4f-3563-41db-bf06-ee83860c18ae,6e896df6-98ed-4f0b-a863-dd39e84dd942,e2479f53-a8ac-4a9b-ba9d-d5ccaec8f0b2,b1caa6a6-ac39-40d2-8a15-1f0f5e2b4777,e7f9d167-bdbb-42da-aacd-f30572d4c492,c1376ce5-2e6a-4036-9734-7d4af5b83a58,b7f7ffea-9113-44f7-997c-81f48863f8a0,a4bbf210-529d-4d17-a87b-686cf9c61058,fc823c53-0046-43a9-9f89-20ce4909da11,f9ab166c-b543-4527-a8d0-e31791b45d1d,fc17381e-07d5-4a92-9ba7-c27c72778d05,b10a2d6e-55c6-478d-adf6-8cdb511793fa,1a033bb5-2d39-4ba2-9e81-7ec965625bd9,f7243f3b-bb70-4a40-99b8-f3c8da0a2c54,1884caa7-8e2c-4a23-9ef4-745546ab39f0,3e709efc-2690-48c7-b49c-459f0c10c4c4,bdc15bbd-7c47-48d0-a7a3-52be33acc6a8,35cb456c-ce82-47a1-9a96-bb56d72210b8,40139ccf-f6ce-41af-bb55-8692dbb5875b,33d9b779-f5bf-4286-9dce-f84b354ed994,f7c4ea48-fd18-42df-9b63-769414caee2a,1c7fd3b4-7d48-41cc-87f3-8189bb10fc39,97060991-8dad-47a0-82da-2660c8d037b6,73849252-6d07-431f-824f-69c657cb465f,cb742b37-39d6-4f33-8f5b-6252069bb9a6,74b9335e-37a2-4c95-a35c-85e40b568cc1,cbcc859f-67b7-4f11-905b-31bbdc8036f7,991f5d51-9b2c-44db-b4f7-b7e6c39a6d44,8165af0a-a8f8-4edf-b683-9e5b5cd65cc8,f94210b6-3493-4f29-99f5-5556214c67be,ac9e3afb-1f5b-439c-a73e-7cbc01f3316d,fb164178-bda9-44dc-bfde-d0767ddbc337,eee104b7-2af9-44bb-872d-e541bab143c3,1e0061b8-3329-4ed7-b52c-3c6822582c3d,42b15855-fd4e-4f9c-842b-bee25f3910ec,6b5f0e00-2363-4937-81f0-73a90c046a92,1bd306e4-e8ea-4232-aafb-cf70a2bd48dc,67969dab-fce8-4a8e-8d77-5979991e8d0b,c43caba3-f4ce-4f08-afb7-8b23fc63df9a,c79c7ca1-326e-447f-a490-395016483003,e06ccbc3-8d61-4ba4-9913-3a66d5628ce0,d3b1c9d2-7955-4a13-9864-4083d88f12d2,0f5f1f25-5bc2-45fd-9927-56ce5e95097d,ec65271a-659c-4514-a467-a8edf7196754,80511c10-ab27-4df3-b890-0acdeef3336f,df586d71-4020-4630-8daf-fe2a3764ee39,f6446ea0-0493-47e3-83d6-09f3788b78b4,92c0b07f-ce96-4526-a764-05dcc4689160,511c7704-0d40-433b-806e-f9cc241040a0,f137418e-4a3f-4118-b1f2-2cd7a16092e7,965a18d1-1ff0-4d2c-bd6c-87c5f33fcd11,88a5a9a8-552c-4348-9be9-c8d176799e33,6458852f-6e4c-4cf7-afcc-6dac0d2cc321,82f7157c-e373-45ba-a4d7-4ba00762dee6,5805d834-f3e9-44c3-bc68-07e70d6d0bfb,ce9cd7fe-df1a-4fbe-b2b2-b1f3d7029611,0610b29a-1146-4e22-8280-cd32cec32c59,1a1523c9-675e-46b1-a123-cd48e0116b8f,c82eb596-054f-49d3-9cc9-d8117422acd4,747cd233-1e2c-4f54-9dec-a1050962d8d6,5888d788-b309-4bcf-bbe0-8559eea57a72,463347f2-ae3f-411c-b22c-caf578ff2644,ee694e2a-7201-4e2c-8e3f-f7c67c4828c9,50e48594-8568-406d-aad6-69e696ee77c7,cf8a2c0d-f70c-4e06-ab54-2935de2edde9,0ad25d93-f250-4664-b76b-4404eed67cd6,65f5fca6-94b6-4b84-a2ba-4a352b0b95db,462c784a-d8c9-4e95-b8f1-dc258439ffb6,1f03b212-a6fc-4871-ad18-047a0ece5bc2,25ea1bf2-23d3-4c8b-a0f8-436bb03fea1c,9eb0477e-7225-4128-9bd2-e9fbca270415,94f030e4-1e9e-4860-bbb3-e876924b4003,700be740-fde9-4f8b-a50e-4d12ddc106ef,f3d411d7-7f7e-439a-b8b5-7d95ef900976,e1415158-5c9c-4a81-91e4-b7133f8a9df0,e2781754-9cb5-47cf-acee-ed9025f33696,35f8c6a8-e27b-4bc3-bf41-03d79840aff7,064e31b3-04d9-45d1-b8bd-ce2f5660248d,a162af02-0404-4ff0-81fe-68ec4bd87cec,0116d192-e75c-4c7a-968e-b8468b781b26,9f109f14-254e-4d07-b604-e1b5f995f559,fbe074b0-9ce8-441f-a074-90af9660d14d,7222ee40-58bf-4f01-bc69-9c04e7793dbb,c86d44f1-79fb-4b2d-afc6-6de03f37adce,878c1a8e-7669-4f3c-9d89-34ea06ca4f8c,ec8ba424-f280-45e7-ac35-d59af643a908,aad5bcf0-3830-4a3b-afce-efb537edf884,2b8f5c96-7d03-4bdd-8c79-a1d93fac3f1d,b23199de-b682-44d6-90a8-855bbbf18438,bfe2fe5c-0740-4f7d-a878-d13fb434ed9e,096389b5-fc48-41e6-8ce9-e51df1824277,82eed936-fbbf-4c0a-aa08-451aed84e9d0,c3772287-8334-4a0b-be1c-22303b723dfd,dc864a9e-6374-491e-b298-5c11e5708993,8a669f88-20bc-46ca-955c-50196ebe6ef2,a63adb90-a8d9-4c6e-b6c1-1671a0e1e7e9,385282f1-119a-4705-a67b-5df6c5ee07a7,85b428ca-fd0b-43bd-8e91-6ffeb51c134d,c8c63fa9-eb81-4343-976e-5e88c2fcc02e,c5a74f96-db29-4ae0-890f-0c2e9225a8d3,44ae41fa-cf60-4e13-93da-e5d0f4f32270,240c7756-b688-44ec-9fc1-50c669f485a5,566bfcf5-4d59-4704-bf8c-81d9fc74cedf,1309886e-adb4-4cce-890a-3f6a9e79ed7c,b03feecb-f325-4d04-8194-ada62896a3c4,543a5b68-44b4-49ae-81c3-1315ab588b35,0a6e44df-6866-4941-a1bc-52bb51f41e95,e53eaad8-c108-461d-a139-bf16e80644ce,aeccd5e7-3204-4a4b-a5bf-47a77fedbdbf,879ae80f-f54b-42a5-bbaf-ce574a222824,8405a234-13cc-4165-8bf8-cbcf31f6d32f,083df7d5-9077-4b5f-aeae-7455f8ede091,8ff01e30-c67a-4bb3-93a1-63f434590f88,132ea99e-fc9c-429b-92fc-7210d4f3bb1f,c5c457ac-90ae-4bab-8e8b-a41208590f97,3cba888f-dee4-4e6f-acd9-e56537faf3bb,443d0c41-7f49-47a7-a7f9-e31103b3adc3,d6cc4cc4-fb06-444c-a0b0-11d9a80ad35f,84c0b294-fc06-49a1-8d84-0e9539189333,f28354ba-36c6-4b5c-af8d-a216449ac5e7,f71b8007-51dc-4d73-b1fd-0004a97397f2,0fc35448-7ee6-41ce-bcdb-aecf2401267e,2ac9a8b0-b15d-4235-bd9b-6c966bd7fc5d,69967d62-daff-4315-8ce3-b6754f576530,e0f917e7-2d15-447f-b445-b993c286a685,94313f53-d08d-4d30-8b49-25f501ecfdba,6399062e-991f-4141-a0e5-bc8f8b3ef135,895601d2-63cc-43ac-8fb8-249ed44c9cde,283e0e6b-409f-4041-8d30-bac48af40d93,205f65d4-d380-4112-a0c8-87dfb86e48aa,4d6622a5-2b70-4cc9-8fdb-d056d3b72e2e,8f28a478-d781-49b1-ac52-0e4455101600,d8c45a2e-cf48-4050-b8c9-2b85108e3348,fa4c47ea-4e52-45a2-a461-09ad273c7dbb,12b8d935-2b61-4a78-ad87-297f529b175a,4599535c-ee8f-47ca-80ad-b7c75263195d,226e7d18-0b23-4b3d-9d06-e4e7e9152fb8,561005f5-211d-4c5c-9165-d5c189c16437,c884883a-8d5d-400e-a248-17bacfd268ec,b1548719-463a-47d8-a24b-fcfb3f22b06e,4605bda7-452e-48b5-995f-64a065ad4e13,bf7d7f7e-55e9-42bf-91cb-c27184400812,774b2631-3d48-46a6-a25a-383a704bdc0f,0451eee6-bc2c-44d3-99b8-fa5f8b361199,fdbb19fc-0216-4022-8681-caf52e0a0d5c,2edef240-fed9-4bd6-b936-e5ff8d0c38be,31b47759-903b-4081-8d1c-dd386a196ca9,729d68f6-26ad-4130-add3-cd15b181cc9e,e698b44a-7ce6-4b5e-a9a5-c1566e152783,ac603639-104e-4c67-88d0-063f00b15aec,a90c4dff-6984-4bd6-a8e4-d4366d7d2ff4,cf71a315-4688-45a5-a42c-5df899f114f6,d0f13019-9875-4b43-b644-fbf51395a635,00d67dec-d052-47ed-9c4f-3921c3e320f1,0c2c12c4-f35a-4cc1-98a3-a3cd7b13ef53,0a61e155-448e-406d-a887-d3287eb37bee,a3778792-039d-4ca7-8a38-86a6e36addea,7e4f8c43-4585-436a-86eb-8fa9f4793b2d,5d27c14b-614c-4b4d-bfaf-860171773431,df5dd0f6-97c8-4f61-98b0-4acc9e40c39a,54366189-0852-4198-be01-eff94831c5f3,9e6f54fc-354d-438f-a84d-441b85d09d82,a45326f9-c138-4b51-852b-b7ff146dc01b,0d386d77-6eeb-468b-86a7-52ffa93f1243,2344ad87-8fa1-4665-89bb-b005e6af39b6,19c4979c-005d-4399-b9bb-2a3eb507de22,ce59e69b-a977-474e-88e5-45a3c56b84a2,55ca66d9-06c6-405f-82fe-1ef7913e10f4,83a38aa3-a682-4140-bac2-72112132ec05,0052ad73-0ae1-4848-965f-d03b4f8f7787,4de40218-8b73-4ff7-ab8e-8c46649910f0,906377c9-28d4-4969-b1b2-395873a6a00c,7a37f522-70ec-42c0-a859-3673b88a9659,2f4416cb-cbc4-4b12-bfb8-a0769f1ada49,7c2e6ee9-7f61-4850-985b-ae1c2072f15e,63d34479-6471-4381-8f81-f404d90bf3b8,30fb70ca-0544-4053-9641-f57d449f19b0,9b19edaf-7063-4b7f-b33f-a7522f9e9d45,b102bf1d-abbf-4a71-ad97-ea08b31f4a69,8024fbf3-f382-49ec-aff3-9c3b44185b94,90af9c7f-1365-47a6-94d2-334565b82bd9,dab739cd-0104-42fe-b1e9-c16d839ba72a,c6a54972-3f32-408a-a3a4-2999eaccdb90,6ccf139e-d9e3-4aff-8074-aed4ccdb5870,ec8efadc-62da-4045-a607-b07cff52a7dd,596f253e-774e-4a30-975b-2ed62372926c,d75d04a8-ce2a-481a-973a-9c90562030b5,f8d5c018-3639-4e52-a4ac-4fb863382603,06f91ac1-35e5-483f-8600-4ea7ac2bad4c,7a7eda26-d7e5-4d5f-b7d6-621533ea76bb,e9d48162-2531-4d53-bf50-33262a97b3bc,b5ef4e29-a5ff-4906-a5c1-8c6a4f421626,817212fa-24ac-42ba-8b58-9bb1c14854ba,316e125a-00e7-42dc-9875-faf415a924c3,4cf808d4-699d-4086-a429-d81792629374,bef7b4d0-bf20-4d7a-9de2-e894899412fa,73fd0bda-0b98-4a7a-8ebb-d179d342532a,d08d603d-a857-41aa-a440-b489727271b4,c5cd8382-dc9c-4457-bfb9-1a016d86fbff,5322403d-2912-4c6e-93c4-686015e52359,97b39af0-0d7a-4bb8-892c-ab614ae8de05,fac10667-6e86-4c66-8677-d7a278b9336b,e204092f-381b-4d09-a34e-1aa13bc7d491,810abc4e-c53a-459b-9a53-58ea1ba7c3a3,70c835ee-bff4-4ae0-8c66-a00cf739beac,ae201bbd-c885-49bd-88fe-ac6d59fd48cd,b1b9a506-e3a7-46ea-b225-a8c611a31b63,053defd8-d96c-4070-bccf-323f099c2c10,d4a8a933-29db-47cf-82de-2795e079e8ed,9e3f4f75-44be-4317-ada1-ec397c365069,c15c6206-1079-4273-a058-1b9b2bbbcf5e,1e4bb0ba-6e19-4c38-ab9f-1350f05f6ae1,d911aab3-51e5-4a11-ad56-0754a9ef27b9,5dffedbd-7e7a-49d4-a3ca-4c95c387777c,fe3c2802-b338-4ce8-bca8-8048f754999e,a28efaa7-2bd6-4b57-a4a8-4585c8670cfe,726dfda8-9c0b-4348-b15a-0935cdc5692e,ed255194-dc31-428b-931e-36d867d7a30d,e7687a83-495b-407e-99d5-7c6802a29cc4,959b9d4e-e8b4-4d18-a70e-922d5d4a9a60,20cef730-fc30-4a1b-a4db-000622166ef5,1dda947c-1b9d-493f-bf29-9f08e6714fd3,9af5d4b4-1197-499c-8afd-6f5c567284b1,b3f54f5a-309b-45cc-8a6d-718a57a4750a,4c1ce35d-9df6-4d4e-9dc8-9b8f523e1177,5af9f786-aafe-463c-b3a8-a6139ba363a8,f2c28059-0d1e-428d-b186-223b5be21ce3,b6bb8079-6144-47ad-9e27-14f47152f833,63ea2491-90a6-4cae-9f94-824d6ae3b6a7,0b82d46c-5464-4ea2-8ab2-31643bea51f5,b44fda07-5142-4a73-99a5-2517afdf4fa3,7b634dc1-f92f-4f55-951d-22d596ef37be,e78f1a39-38fb-4175-bbbd-d1b798b1cd84,59cf1b05-8762-4cc5-930c-06352b006d0e,97d06feb-399a-48ae-821e-6780b8b50163,0197abc4-77de-4d2a-a632-c75e903410eb,1f14f04d-1855-4dbb-bc46-6cea736c67d8,aeb004fa-dfb4-4c71-8d53-d194b6d1ccaf,087ee49a-6b89-4093-a28c-b4c7c11fa612,83395867-9417-4638-bdbb-89511c1a1235,ba853c4a-73f4-4ce7-a2d0-c90c78d5f0cb,9a1374f4-00d1-4003-bac3-d394871cd9a1,4fd8c108-ee5b-45e8-af54-bbca67104a02,47aca60d-6421-42ce-ae45-29d9588310cc,a0658cb7-7fde-4023-8453-19b57b65aed4,eca48e20-f5f2-4b84-a24d-612328ac6210,f8330d1c-c1a4-44c5-b914-60b842f0ed1a,42ab5731-3e0e-4795-ab09-594ef2f36888,9bf025aa-b276-4cc3-80e1-056ddce64ef2,f3df968e-15fa-4796-909e-9e7c5c7c0375,b20423f2-6b7e-432b-82d2-e4d385f1255f,174b3d3f-8de4-4441-a553-6d2ee9d1c68e,beadea19-c892-4fe7-9107-e46069043546,c948acad-dbd7-4282-b199-ef11a933dc41,c157bdfb-e173-4bd1-aa3a-b37daddefe0b,d2bf026a-cd92-4ef7-8853-5e2f3b450878,54e0d7d3-ee06-49df-adec-e319c7e2844c,a44e0e24-7083-48ae-91b2-fc40dcb97c91,befc2697-4808-4359-b0fc-cf24de13d8e7,d774004a-5924-48b8-82e0-1dc50b84ae7f,1fb0f70c-61f0-4e85-9326-cd5a2e3d9aae,8755dc5a-278d-457b-90da-25b2aac24f6b,bfa08e82-f676-40f4-a820-cf66ac7d45ad,a3879aa2-f6dd-41ac-afde-bbcd078391b9,9197f738-d181-42c7-b68b-adc803650e2e,83a20903-7ccb-49a2-82bd-47f58b4ded73,c8e14864-a21e-4972-8b99-92544d896c7b,97875bdd-2539-48e5-b5d0-2442f9b83d0d,1effa2f9-7da5-40c6-8689-376a73adad64,172eab66-c28f-4778-a278-c6c43906f399,49105a9d-e206-4975-b059-ad1a2d5c3eee,ee0565ea-68d3-4f8d-9e85-fbfd07dab8e5,e821e6b5-3448-42be-bb8c-a17973c5505e,06370b26-d137-4872-874b-0ae331a95482,5a3bd602-c6ae-4aa0-82f1-bf3d85492084,31d56255-5529-44fb-b2e1-296f5cf42183,28cfbbb6-cf38-46c7-9bb4-0971ca1c426c,2e7fa2c1-39b9-47cc-bff7-fb73cf09cc9a,84549190-3d34-4659-9a66-0dc3128fc2e2,3ef689ce-7903-4aff-be7a-946e9f41ccc7,f45f8716-9e2a-4b9b-8600-5485922ea525,de54fdc6-f057-4654-be20-043e575f65d5,559adb20-5eb6-4c67-9517-466c730299a0,629da4d8-f0be-448c-b1c6-ee38f155b113,3dbe60ae-90fe-443d-b66e-d60125a2ea00,b1418b85-cb6d-49bc-af54-2fd69b78a527,c7e3f575-5561-4370-b056-13c91b322823,95a70fe1-c25a-4769-801b-7070963ab325,06a10e6a-298d-4d8e-9985-6e6941fae1c0,7a4a0996-d2df-4976-ada1-aa308e3fed92,484d1a4f-bba6-45f4-afc0-9f39a7cf9561,3599955e-619d-4466-856f-fa2d98201375,615138d5-a0a1-4ed7-8c87-c7ac30af03fe,500aef0d-d0fd-44f5-b35d-b651acbd9971,4f6e1fce-0ab2-43b7-bcdb-d87557e0ac28,ff3c0651-5c37-4c85-ae63-05b4cc0348c6,d385d509-9b1f-4c9c-a607-88c52f4af42c,99cc1180-1206-4a02-8f1c-592591437ad8,c85463b3-0008-4b28-b53e-dae747f0cb69,051388c4-df66-4b4d-bd11-081fbde305f4,0dab613c-dd8a-4ce0-86bb-c6cd326a2466,da6b798c-0eb7-4740-b6ff-359e58c08ec1,153596e7-2489-4041-8a93-dfdc92de8f31,184437a8-32cc-4909-abf7-e7126cbca1e1,06935d11-086d-4fef-b7ae-9e2dde2df0f2,b5800285-9adc-4b25-9071-73bc0da297dd,9c7450f8-d82c-4274-bf7b-5c4c99edbc50,253a9aa3-132a-4863-8706-8b84e6a679eb,1f8b94de-a511-451b-841e-e16592016f5a,7e77f6ba-70cc-414d-86c2-94ac579535a1,ebef94a1-4537-4619-885e-91a6ebf9d122,9e5e6ee3-5939-4f5c-b0e8-4f4c187c1eca,68b5745a-8065-406f-9921-7d103aefcb12,2b0cf69e-7698-4f78-9f7b-7e23459db345,a399f9b9-f45b-43ed-951c-980c2bfe8207,fe093201-c4a7-4380-a121-31ae0a360e83,d055c4ac-d5e6-4465-86df-d6d2d3375e4f,7c213428-1c38-48b7-b6ba-43e3faad0c4a,dc8f56d5-51be-45a1-8b36-f8d1e61a429a,6320a06b-8869-4afa-bda9-29d0b809e92f,6b56eb6a-246e-4119-94e3-1b3345295241,a7499cdb-e79f-42ee-9161-16727d23818e,7eb17ba4-2a80-4820-972d-49b35909e6bb,d77a744e-7532-4a73-8bb5-340906983e21,94b46b7c-53b2-4087-8afb-de9ba3a116d1,46e00ec7-29d2-474a-bf4e-d20ba9270ac0,ef4666b9-3d2f-4397-965e-bb836d4e9368,93f0e194-d56f-4e3c-9bca-bc2588cdcf47,121f278a-ae64-497b-a56f-8539ae20f0b6,95abbbd4-b651-4e13-bb6c-65acc3091eee,2edd24d7-ba39-4748-bf39-842f3b9abc8a,ee2be023-1980-4319-bbac-229a9b319476,7a02f09d-0cc7-469c-8c9f-b1ed54bc15df,ca1a9672-7ce7-4e8f-ad78-6f69d236adbb,c935ab9d-5152-44ae-a84e-10953cbcc06c,4cce9582-4f90-48b2-9132-f0506b183089,87c98a8d-23fb-4500-83eb-cc494e7f7ce6,8bf70760-4195-4035-8833-fa27c9fa7ec5,3040ee12-27a7-4e05-971f-c8b3354b31ff,72730bb8-a74a-4c4e-bd9a-85f4f88cc1ba,f6730351-1251-4042-8ca0-4bc4cc398e8f,2d71998d-b584-4f6c-8cd0-6f52e76d75cd,6d812e64-9cbe-41fd-9fec-138a2aaa5093,a80b6796-a5fc-41af-924b-b7c193180e7f,b3c98244-f66f-4732-8c2f-13a14652df77,aed895aa-1c24-4790-8737-39e92a85812e,46de9e2c-a387-4e24-84c5-be0ed14ee1d3,4bb1b6ab-ae93-467f-80d0-1f9f960500dd,5d0e4f4b-3fdf-4370-84f6-b62029784c27,d8afa099-8356-48b5-a8e1-b4f2c7c91928,8a2bc9af-1113-42b7-bb77-7c69a53bcb91,34d63fbd-6867-4509-b175-573e8649d2a0,1eb22aa6-17c4-4b10-9cc6-268ac5e80a5e,918a7e41-bb57-4d6e-bc13-e13647918546,e65f05aa-bf30-42e1-8889-cc3e5245d939,e7709fbf-76a0-47af-8f45-bdc80e049cf5,42a68a61-04e0-409b-85a3-666cca253aa8,18e35a81-ea66-46f8-95a5-f932a4feaf64,5c6213d2-7cf6-4ac7-9f4c-e776acc1ff29,758a6f60-71b7-4b85-a474-28bfced24ca6,dd7ea237-fa01-4b01-b2ed-7b65cf0b908e,78c001b6-1985-468c-b47f-8ab5ed1642ae,10f3292c-c973-4c6b-aa0c-c3030eabed25,44cde9e7-d2db-4a12-aa62-3a70e4740d73,a794f25a-06c5-4461-9323-f6eed7e596da,53296bf0-abbf-4a97-94f1-e0fdbcba2a62,fec930f5-abb7-4a7c-88ce-05b5236d05c5,fb1e4484-dd94-41d8-bd62-eb0393eb28b7,d5a96511-45ac-4ce8-b232-345b16d8c102,0b9fc845-77c0-4370-9c04-6275b2b94042,7a25ddcb-1ee5-413b-aa38-1b43e51b4485,b8e753d7-ccaa-4ccd-b497-b89410754c72,0a71db71-56ae-4612-8272-fe5cb8b38bec,0135d679-72e3-4c9a-acd0-f693a809cbfb,0f63e869-a933-484e-9e24-225aeaebe798,fb63b246-ea23-4fca-9f35-9e161da1adc4,f697d4f8-1bd8-4400-8b4f-dab42efcb816,4fd763ac-632f-481c-8dda-bd071cd9a502,3fe1f985-da07-4972-a9b2-f52554d27732,ec64a8ff-f4c4-4313-b08c-222123e6b0f3,8d7c3534-69c0-48ce-8b45-06134ed13bf9,a468339a-3df1-4dba-a15c-b5f8a933bf85,60f33cdd-7c63-4f78-8f4b-86175125bb73,15e5af96-37df-45f1-963d-a98f28529d8a,9ccdbba6-dd05-49e3-8ad0-0305fa95977f,75f80696-1889-4851-a039-afdaaf872729,94282d82-cb21-4e42-8776-b69a5d0829cc,c5787177-6c59-4f28-aa21-e93b6d2b1231,2ad2bab7-30d2-40b2-8cb3-a9ab724011eb,3c6a2408-0efc-43c0-9799-f3d7126cf7da,a370b881-0440-4f41-af10-317e351e04c7,dca54ed7-d565-42e8-b843-08d4ddbadeab,a817f73c-e608-4997-86ce-5e55cfa9a80a,e30f135d-819a-4d5b-b46a-d0d194470555,9d23955a-5c81-4ba2-b7b8-33901c57f0e6,60eda6e1-1892-446b-bb0f-4d71ff75cd80,6249d4a1-cf9a-411f-9e1a-b7a015b5be70,3d915a51-8e00-49e1-bb7a-7ebefa4f34b4,8cbfd696-a847-48e3-ac75-71ed00933dac,9685d3e8-1c7b-46b8-a3ee-38ed11819141,4f6a2391-448b-4d96-bb06-fd4598554fa5,334f98c6-6caf-4839-b292-7a7468317126,f314aa21-6a44-49e9-93d4-b4438e628d57,6c988465-b67d-4c2a-902f-b60522042ea0,a581f110-f112-4d76-aaaf-697f3493d895,6fb6f7c8-5698-4dca-bf2d-f4a66e13998c,c44fd651-23f9-4a8e-90e7-77f2ce1c660c,dd5a581b-20e9-42c5-bf5e-58fe08062566,049c5f40-9bf8-4b84-aeae-1709b7b163fb,fb75f258-c565-429e-a169-fb92626bfcdd,da9f0aef-40c8-4c01-9779-bd018b02b488,a3cedd5b-7d2c-4228-94ca-ff784741d6d2,736c4c8e-cb64-460b-a812-877ba932bc59,3b65ee32-29c2-497c-b34e-497539e77fe2,8ca9cc48-feae-418b-ab4d-873b9b13670a,3f483d84-9a44-4cf0-981f-f7a0855864ee,9a5e9198-adb7-4bc6-b4d3-41f828128d79,38ad970e-eb79-4d4c-9292-cdb9d85d7f5a,a639439c-c778-460a-b619-ee1a184c62cf,979366ae-1410-4721-a1c2-1d3270ef40f8,d867b087-21d2-450c-98bd-a4907df3f2b1,5d4b3cb8-1eda-487d-9172-2f672d716c23,13fab654-21bc-4c37-aca1-c4345fec99e0,2e3a2c6b-9819-427d-a37f-a0a7e8231cf9,ece2797c-6081-4dc8-bd19-5c15b47b7d55,d15949b7-fc17-4f3d-8960-d15d7e0c938f,b274b938-8311-4e61-929d-ac385718b179,d1668713-18f7-4e2c-ac84-def681b4ae13,f9cd40a5-b54a-4be0-aede-06b79470a66b,3118dc34-c35b-498c-aa4c-7b3be2ccd8d9,15db10b6-e1d0-4aa4-94ed-5fb7cf97e87f,bcff22db-e8cf-4019-9385-63fcb9174df9,62a6ee02-f5d3-4a6c-babd-cce0249f229a,f92da81e-7fea-44cb-88f1-f896acd06548,f4019045-ba62-4ff3-a112-df629c89469c,c890ac2b-e181-4802-8f0a-f76b21d5f80c,17023c1d-c287-40d7-af68-93e2f5e68bb9,7298868f-fc54-41c5-a775-5545dd4b0deb,7a9d7ddf-f4f8-4b49-b4fd-3a653c8cb369,80fdeb47-3aee-48e2-8913-0cc085b36227,691c9972-15ea-42f7-b927-811616537826,451c30f8-7fba-46e5-a6c0-a02d777710ea,848ed4d0-9f0a-40f6-b687-401c2bc4f840,cd3c7223-de80-4ca3-8ff7-373a196f543e,efc5b166-e8af-4bb9-9404-d4580452beac,6eac3e3e-064b-4367-8b14-4cb86f092d6e,f2390492-8a1e-430f-a602-c3438c8580f6,14a32cc3-c49d-4964-90af-0ab7cf299612,9f6c8949-f849-43f3-928f-adc7a3b9d097,e44c351e-98b5-4458-bf9c-f2eb93749859,2b5eb04d-e87d-470f-9b3d-70b8836bad2e,6c314a91-81a2-4ce0-a374-ed686dc2e157,317221c8-7c1f-4f7a-99c3-507f08867b88,901f3bb2-8c3f-428a-9609-10f9ddd9c20e,98e58933-2f3a-46a3-979e-852568855553,2c90685b-abfe-48da-8053-f2f609fa40dd,86403387-0092-4a62-88a0-6e67e49c993c,9e54d1d5-acfb-4810-a986-0b8026d23844,2d97ca6d-62b9-4ffc-8e5d-56bdfa54bf24,6bd24928-6acc-4229-9127-964afbd5c0a3,9fe163a3-c4f3-4497-be93-c3a0f48a654d,e37359c4-c485-4f84-8a12-72bb71c46153,ed003569-aea2-423f-bc13-398302c8b8bd,00563eb3-6182-4d5c-aa24-75448698fd17,d3847e90-2d7b-4dec-9173-914ba2829272,a7f2548b-d392-45c0-91e7-34dd920dae2b,c45f216b-84ef-4997-89ca-dea56a1caefd,1e0ce822-d2bb-43e6-af8a-0a917b07108c,3341367c-5ac4-4901-b63d-61083fac2fa2,95f5c8e1-3ee5-4c3c-b6b4-3748a70a529b,d1fe1652-33f6-4116-9e7d-be05aa576520,97087e2c-924b-409c-896b-bfedff61a2ea,da371447-8070-411b-9914-4ee1976321c1,fc6c08b9-915b-4ab1-b4a2-2ff2dc71199f,46a0bdf1-c622-4e22-aab2-81ee60afe071,64fb02e6-44a9-4b40-850e-1fa2066d891e,7cb53ddf-a61f-44e3-897c-7175cdcc10d4,02d192fd-9261-494d-8740-499ba55a844f,c5065412-08c5-4b84-8107-9c1d78a2dd40,8dbf8ccb-6ee3-4a65-a82f-cf055fa95a40,fecc3f33-1ab2-4df9-84aa-c01871e4c1d7,6e23de4e-b829-41d9-a4f3-66aa09304ac2,879c93e3-eeb4-478e-9f35-4c359de4ca26,c5bf6258-37cb-40ba-8d69-2c1c8b789e46,9740a963-ace5-40f6-90f1-6a852fe0b7ff,053fb938-9deb-45f4-a810-5a4195d34f54,1911cd5b-8116-4c6e-8296-5374c5a85ed7,81991f45-216e-4ee9-a6c1-29512459ced8,d01ce933-5b46-4eaf-b9d9-df4efc32794e,21474044-69cc-41e8-80e4-42371c1503d5,a61ed22f-bf73-4af6-b5fb-8ffe15d8e657,b62d1e3a-6f3a-42f4-81e7-75ed8bac381d,7fac1368-cb4f-4b6a-baa9-bfd7f77c0f67,d099ab36-d9cb-4a45-8abd-1314553779a8,2eb92d5b-58e7-4bd0-b4bd-3512d6a84370,403b7aba-57d0-4019-b68c-ce6e2fb6d92e,d391498a-91bc-4c89-8647-e9ff64016fea,6a16e120-5340-4225-a5d3-a22d49de0980,99f764b5-a5de-4190-9a0f-8d453ff01c0d,45386a16-ba71-4c9f-9cb9-26e1dbb6408f,b56a91ca-c5ef-4018-a303-3d7b97ccb3d1,6b584d49-4258-437c-80d4-2cb762aa5475,43549f07-0993-4cb5-ab72-9dfb70d5686c,4afff610-3b08-4899-a2cc-23a7db82b9ec,b5ad7421-059b-43bb-9483-5ded332eb40b,3e30b8a9-e54e-4767-8bb0-0adba64aba64,9b59cad2-1b1f-4867-9a78-ed4cf62d2a5c,307779be-fe8e-491c-9dd6-fa8d0b2c0853,e8ec16d3-e658-44d8-a882-5a8185d18e39,8ca61cf0-775b-402e-8d2e-72b2604b329a,2dbbb8d5-6474-45d4-a8fe-d7fba57f3513,abaac7c2-06f0-4114-b4bb-ed43935203dc,7da442ab-ffac-4190-8e50-da4962883a3c,201acbea-c0cb-4046-b3d0-5bc37a59d89f,313bf738-7e52-46da-a031-5b45bcdc7beb,d80d885a-07c8-42b3-a4b6-28325a1beedb,e2f118a9-761c-4a37-8c0f-da0ad1f1a875,5acadc0a-8089-474c-9fce-cde66cc9e343,92335f58-d408-46fa-8961-b5f2220cf17d,98cf22d8-0cdb-40bb-bfae-d3cd0952a914,661878ee-13fc-4204-95cc-7d62849b3639,14fa0036-f5a2-4698-b46f-4a350deeb10b,ee6567f4-b35e-4d80-8782-8a960508bd2e,d4a0ff33-98ac-4b8e-b474-6b5f8b821bf5,9141c21d-9150-4ae5-835f-3e02dc988763,c508ecfa-ce7b-41ce-8be0-8984a63583d2,12539431-a1b3-4d66-a7aa-9cbbf34c72cd,df04448e-da27-4b78-9f1c-7406cec4dfdb,464f4850-e85a-4803-9532-d9d7a1e25424,0f8f223d-2a14-4e00-8143-bc2816111be6,52b836d9-cdf6-4663-b3f2-723ee54e3c3e,32bf2d7b-ff57-4305-a1c6-f3deb8e4dab7,923cef1d-ae79-4cc0-a3d6-fe8cab5e9a4f,377ed342-7795-4802-9e77-38fed137a966,414b4ef7-f933-43fd-bb48-72b1972bcec2,baab2847-81fb-4059-b537-38540fb1ac62,1c151a51-3406-4505-b2bd-f345710d6bca,c08e37db-d796-49cd-b66a-226f051f04af,b06d5013-ef85-430c-ac48-d16bf30ce6b8,7b3a4b50-e8df-45f8-9005-e1f60a2e8bc5,94bd61c9-7126-4ed2-9373-c0c7582e4861,677d87cd-2a43-4a6d-b124-7b8694398a0f,acf79051-33ef-4354-b106-5e9acaf982b7,9040ee22-71cd-485c-882b-ae54592ca68e,19d9b5e2-6932-4376-a791-e3018d5ba396,89f7f17b-39b5-40fe-bb4a-e93f87e87cd2,13b8a84d-9e49-44ec-9d93-781595f4ec95,abc8bf53-0a51-4fc4-8427-d9c5598d16be,b83e1a0d-fc97-44cb-baaf-19298722d6e7,8941ad38-746d-488b-95a7-785985fe14af,409ba970-6987-45a2-aff2-c24fbdbb162c,babc9a65-5659-4573-8a5e-60cfcd542abf,0b7cd9ee-df7e-4543-a20b-5c23b47d0016,b339bf90-43b9-4767-8ccc-f46bc58a7de2,fd1e1ac1-b474-4d91-a7c7-9a3e11ef13fd,d27a5fe7-3ff2-4c28-aa46-d4c3807359d8,3c7d70f3-3d91-4424-be39-ceb8f71c5164,185539fb-d202-4767-a822-339da5851bb3,3ef37871-2507-4e44-a3ac-b3eeb4f9ad2a,5b371f99-2c4f-49be-beb0-82fd73241583,f8139332-3515-4694-be16-feba5c8720a2,2ec0a7f5-499e-4712-8b91-d04cc8a0ef8b,630ecf44-aaea-4272-b76c-d93721f273aa,36bde17d-a3f1-4490-8da4-fea883150001,2e64fe34-1b8d-44d4-acbd-6adcf0203318,1c305e37-2d82-4bee-8f71-c16989d4b546,ea893502-d6b6-4bc3-bfd3-ad9d3951e6ec,1f3922ca-33a3-4303-9bda-292a06a563f1,74fa2f34-ca99-4eb8-8d7b-72b082aa09c6,897bc814-db05-44dd-b9cc-c16bbdabdff7,cd41cd84-7964-4903-837b-8fd7af48c3d0,7fa71393-d012-4919-83e4-09bc266518cf,ad47fe8c-bae3-4ca5-bcc4-56bd815882d2,1247a277-426b-4e20-bb82-ff81d9e52c41,c31377b6-8951-4faa-ba66-b04756c70b30,0e40fa71-fe97-4b84-9de8-3422239e7300,b3b18971-ad22-4471-b2b8-cc561e8b07e5,77332cc6-b8cd-415a-af52-9132e4b6038d,a00d433f-c42e-4b35-94fd-b4d377253981,20791f89-20ca-4d83-a3ab-30c0c5dc849c,66f095df-1281-4ce7-8b11-dd0ad1096dd5,b20f1c37-62e8-4f23-9425-46d91c3fa529,c10ea543-5730-458c-a728-2c37265d0fc3,a924298a-d098-4153-96d9-63ef383e4dcf,c166209c-7a12-4fb5-af5b-846b22dc39c1,1f761d41-e7c6-45c5-a797-6832bcf11ac4,bab2329a-c43d-47bc-964a-19d3ead150f9,f4adb92d-fb74-4728-b97a-93f9d48837b8,6b0874d2-3629-4d1f-a5f3-2a358d070a24,343858d7-207d-49ed-96a7-ccdd4935ae50,7d709691-3ddb-4034-8b53-5ea4b437069c,fbb5782c-52ed-4d87-98ef-6b0876cc471f,e35d677b-1700-4bae-bb88-9096dfeb5f3c,79865154-f64f-467d-9be0-fded269ef18c,22541ba9-a752-4244-818b-42e057526aa4,07135fb0-6f42-491d-b971-56abdff34e94,c5b4f015-3b03-4f6b-a293-8348fcde439b,e2b211bb-f7b2-43cf-a437-0efb9df29116,b2967ade-78df-4e14-80d0-93aa607a553e,9aae4096-5925-45fb-a3fd-5e1ffc804ab2,9fcd34f9-8027-4c80-8a0d-05adc8da2905,06065179-c2dd-47a1-87db-8640f82887ac,db700e65-7767-4060-8ce8-4465c87dd5fc,f5bca79c-4ea9-4b7c-914f-883c24d7d914,2763cd33-8097-45dc-93cc-5a103b5ddc1b,bd91d0eb-af35-4a9e-82e0-3158f10ae167,204313c9-f628-41a7-90a7-c1fddf4b008f,568eca0d-b859-4f13-9594-c5b05b730ea8,46a27568-3677-4b6f-bfab-c2a7f4b40cfa,4558489a-0b58-41f3-bbb7-ea6759d902bb,03cf2070-289a-4e69-b5d6-b3df4847aabc,3f364cca-8ba4-4efe-9288-7b95547151de,2e0c34e3-0279-4ae0-a12e-07006555b4a1,e9ee2f27-ffb5-451c-a5a5-ddc1f6e36a4e,385b4112-3db4-4000-a211-bf0eeb0e55bb,1b8091ae-7b2f-4036-bd14-528b59f79002,7f5a1916-d25b-4d2d-803c-dc8e3a48ecea,d171ea81-5739-44d6-bd68-7176ceaf2265,0777f79e-0a9a-4739-bbd4-2cf364054dde,f5762918-1512-40a6-8356-2ab6db374e50,0b541a9b-7304-4224-ad2f-09c6b21b5195,f5bfbbe4-cdec-4dda-9e6b-f08296d972e0,a927f7da-4740-4032-805d-23e482019100,70a809b9-cb62-4a50-9bc6-f5c491fb4ed4,e7a494ee-36df-49cb-8b25-04d1e275f741,1afa36c6-4db8-4c49-8ac7-698ba2be3d0f,715f53d3-bb1f-4728-8cb5-70603f104fe4,c311b8fe-3c89-4003-8909-e9d9fa141428,ecc7b523-6ecb-478c-858d-b58245f39f4d,fb111fc8-eb49-4203-a4b5-62988850e103,ff4a0b64-9970-46ef-a874-5893f3bf53f9,a81797fa-ac44-406d-8be2-ff545d1089f1,4d2a09ef-cf32-4826-83fc-e6920e3ad151,9e0a194b-b933-4b7d-8806-f758583479c8,60deaa73-c90f-4bea-af71-f9b1e92fd5c2,34688089-bae3-4351-a549-7fbe3c22accd,8e36f586-5f42-4cba-baf2-fc1960b79788,ddc754a4-eba5-487e-96f9-188e6ce011e3,29774057-d75b-49da-89a0-43a23428c5f5,114d5166-1716-46fa-a2c8-d44f1d2d6d98,d6fa6c21-f3b4-4eb7-a96e-268de0bd8f3b,5457d157-9075-4d3d-ac70-5223821b550a,f07d30aa-1ca6-470d-a432-2a64dc95e08b,b01e7cc7-9659-4373-bdca-a893482f7a9b,135215f8-7b06-4806-874f-da555bdd019d,50ceb1b4-5bcc-4ca1-8949-4891f7c8fc68,d65865bd-b79c-480d-b234-5e8ec5cc1c66,aed0d1b8-1cab-4f8f-8921-570ad983269a,557a320d-97ce-4ad7-970e-dddb0e7519ee,1a79aaa5-3aef-451f-9048-ce32b3afda86,303d555e-128a-4021-88d1-bb673d78072a,4bbbca3b-d45a-4e76-a78b-6f531c87e72b,262be86f-3bb0-4b1f-b18c-b0193401a589,12756ceb-8d71-4e1f-bcd6-8cfb65a61977,ffd570d4-2253-4f83-bb4a-e00bf713cc60,c980bc1f-371a-4faa-90ca-2073b152c952,ee73d77a-588f-4b8f-9ed6-3c900e946502,19c450d9-d0d5-4a94-bb57-92f3c131b871,ac5567bc-98e1-421a-8305-4360a18cd914,1ea542b2-21b3-4aef-b9c6-7a171b28ea00,128306c3-f37e-40e0-99f8-cc08e97e37c8,ffc2d5ec-8724-46aa-ad81-bae5221255ba,651e1b11-c5b7-44b5-992e-5bbbabab3712,4d71ba28-79f2-4c4a-8671-447ac7c57f9a,17e89525-3e6f-494a-8b6b-3ec57717dbfe,f263cbae-da7f-493e-81ca-07dddf72d55a,ba077cb9-2ec7-4ac3-a623-d32acda04be8,47168597-4f4e-4dc0-a6ef-5a117e526a90,1d1cd308-1e71-4e10-8726-b6a1de973495,14359d93-51f3-4068-80c2-911674ba2f38,1866d31e-749d-4888-a8be-56b270533b42,7f958f55-8ced-4fdb-baa8-033785c79a5e,63c50e38-1657-4987-9ac0-33dda8b6794c,0ea94889-ecaf-417b-b3f4-72cd1fdec5cd,b0369d36-43f8-47bf-a798-c62da6958b0e,bd41e5bb-7026-416f-900b-66a720d5de6c,330a61ca-0ecc-4305-b1d8-d6c4f6628480,30d455b6-7b78-4389-8c8b-612ee987a77b,5288854b-3e2d-46bd-be94-1e5fbefd7f97,c6d89ae1-07e0-4ac7-876c-8a43f92c7a54,86ce2da4-a602-4e90-ad96-3de431916960,cf6a6e49-d256-40d9-af46-444789eaf00c,9f71f82a-196b-4356-ac1b-f974c72ef0a4,742534e0-8e62-4875-a40a-09c214027938,d41e1aa5-d595-4f89-8ad4-4e8764074273,98a4407a-92b5-4e93-8e30-3474afcb1688,72bdabbd-0609-43cd-99d2-2acb62524a64,7a9922be-e409-4424-b0b5-fd33cb1544c8,1d8c893e-0926-4fa4-9895-f03b60ec2551,f3e96266-1cb1-4466-9f15-db2054189c24,851065f1-2e8d-4a5d-ba4d-e905f2670f8f,142e5780-2544-461a-a251-265c1da8a005,0295ab00-3008-438c-84d0-7d39caf0d36e,9776657c-5f77-46e9-a7f7-42b743106d56,5a100902-4191-459f-be7a-ae18b2f19e86,9635e303-b8cd-44e4-b6f0-66883c798024,39b8bf2f-d7f2-4977-b4fa-b5eaa84729b7,68f1dccd-2fe3-4fca-9779-6d92de5946c0,8b520342-e70d-4fe2-b1b2-6dcde7ad6c70,884f2c09-3a4e-44b9-a6e9-4947ac7db05b,60aa922b-4319-4372-97dd-49151ebb5591,b10e0a21-8e45-4866-91bb-455f0b732fb1,737b60a3-18c8-4944-a61f-fac27c3fb5ac,b6c4955c-37a1-4abc-ba9d-b9d440712786,8b523ff3-a44f-4874-9e9c-b6997710e1f0,7017f6e0-f8f5-4c29-b20f-44dcd4f8586d,c0317f47-ba3f-4877-a977-7f72744be37d,6c9ee05b-3b98-47bc-8f01-8b3a0447f0f1,09722868-baa6-43ed-92e5-2129f4d8216a,eb70984a-5346-453a-905b-76cb7e376a1f,bd28e8c1-8319-4dda-ac10-1f97fa277219,f345eac5-e596-427a-8037-0041300a5cde,c1b3e36d-3a1d-4f19-b053-35e48e7935eb,4e8dc698-f68b-464b-9a57-cea2c271a313,f02b3e6b-1241-487d-86a4-9090ae52399f,d6e84842-0f05-4dc7-98f7-6be1c2e09687,9bf62c01-f280-49b9-9ad2-3a7795cf51c9,9e3e1a1f-bfca-4cff-80d2-b53d03593a8e,b4efea31-47e3-4098-9533-597ae9e4af02,85388b9a-ffd1-4f88-bb1f-71c714ebd313,83ac29b2-0bfb-447f-9703-9507f03acf3f,48ac0dc4-f903-496f-8d2f-2586995a70b2,5a3000fb-8c84-4dda-8373-9cb1250e8cf8,08de8426-ddab-4024-aec8-869551d173fa,eaef0fb6-3584-427f-81aa-456e51b1c01a,7937e9fa-e2a5-4139-a996-5035ce9f13a9,c0d43a67-13fb-462e-9635-d9d3dd037aaa,a9a81d9f-56ed-4490-8746-bf3125de7ba3,885e4ad0-92f7-48b2-8aa3-8d957fbbe236,21a4f6bb-7f8e-4185-bfbf-5ed4c8e0f697,d07f2b5a-ccc9-498a-9768-f05fa3c35a48,0790236f-36b8-4825-a31e-ae12cb57d2f4,cfb3f71b-675e-46b9-87c8-2337cfbc47ee,5be9e635-db83-4c2d-849a-5f3338ac9258,2d2e4a2e-2548-4511-90c8-aa396bf1e9fc,c895221b-1898-4137-83d1-a4084196c54e,8a8b6ad5-19a1-4ce2-b770-31d046d37574,f576c7b0-e225-4daf-93c7-e1ba2b065605,2b35b406-69d3-4ab6-b3d7-b73728610efa,55715bea-0483-43ff-a0fd-1742abd189e2,260853e6-2a75-49ac-9534-1920871e95a3,6e311bd5-b5c4-43ec-8572-49661560e2ea,74258ff4-4d70-43d7-92a4-ae07cc74f24c,52b30ec3-60aa-476a-b0ae-edbc62d5c0ea,a4781abc-2707-4b18-804a-e2578f69c238,ee49fdec-19ab-4137-930e-3715ecb3f939,34aae4a9-0cb8-4797-aa3b-b17e49216629,98ac0de9-c7c0-4198-b3bc-0431170739cc,7fabbbbb-7c88-461d-ba63-e924fdfdeebf,f93345d3-3357-47aa-b073-99e7137e09a4,62e2151c-e676-42e3-8d84-c434db14477c,63d4219e-277a-4d0f-b842-e7c97c9f2f26,820c1554-81b4-401b-92f6-c5d7aae85d88,eb332e0d-ca12-425f-bd44-fad509c98efd,083581f7-5aad-47a4-96fb-d62a2bbce0ce,d964e47a-0057-4296-9209-0224c1966ca2,945398f9-2805-49dd-9bd4-fd0e9f689543,71750437-f25a-4a39-8dfa-4bb1b2a7af94,3c870955-90ee-4094-98ea-d889cefc8f19,20f31e92-e04c-49ea-b884-69a0a83b2a60,fefc3996-b466-473e-9fa3-0f1fe8951a9d,67608949-7c22-4ec9-9bd0-9f4875f061b8,38c88a32-0bcf-4f8e-b718-b671a6fcad88,33d78be3-6f52-44d6-9daf-e26ac68798b9,53b86209-7786-4105-ac5a-bace37a57b7d,594e404c-7b2a-4937-a5d9-a6874aa1a0f8,69af2b8e-971f-44ea-8309-e6fa84790bf2,340390ae-cd9b-441c-88f0-b894895da1d0,621235e9-5126-4bf1-be79-d39857c7e10d,2630a65a-de7c-4495-8bd8-67fa0d66e5b3,56d10481-1041-4345-9b2b-41cfe186ba4c,70525e89-002f-413d-b2f7-59342a2764b0,36606866-c79d-444e-89c4-08fd338f4839,e3449666-f4df-4688-8e0a-15766a418839,6dcf6323-6bf8-476e-b120-0aaee8359f11,ab6d9fec-14bd-4b93-9880-4b416a09ced9,07a1254d-aaac-4a6a-9591-11db0b244b0d,3f960d67-c574-4b25-af2d-85b63cb634a3,4c94a8bd-1616-47bc-b419-f84d4521813f,c1325c7e-a1ed-4a70-ac57-66d5d7fe16eb,59cb9f4c-fbc5-4c7d-8e2e-a64ea9e02fad,400d4547-3fd6-496c-8a01-e2c97a5d4a0a,5d5a5a38-9f7a-45f8-8b4f-9d68a7a63188,bac3078e-18e7-436a-a4c9-60d99b85c43b,16c6dc16-1081-4bd4-8cbe-242923c36c36,d70896c7-e1a6-4093-bc0a-cee5c3c41559,e1044a98-86b6-4d15-85f5-88808f8d4930,0a56bebb-5064-4d01-8868-665d6648a4a9,a76a1b09-226e-4a56-a832-a76939c8822a,718a831b-c2a8-4a0f-8777-86c9b73d5dd8,949a66f4-ac17-4a25-84bf-05e64b2fe5ad,cf82765f-aa5c-45d0-a455-fda907943e39,b9a49dc2-814b-48f2-a210-e84fc636f35d,09d4c5ab-f345-4aa4-9d39-c2d3e139ea1d,b6e61813-883e-4576-87c4-7334c91604ae,d3e54efa-2d8b-420d-ac3a-8229ef6da289,75a308fa-b388-4a54-a8e2-efd4c2f402bb,aee49d5f-6365-4ace-9416-1e654882f3dc,54becc03-51ad-4fd6-8d73-49cc38de7d7b,6ccde2be-b1da-4aaa-9377-bd2017dfe2fd,957aefe0-a425-48b0-9031-c40a0165e1c7,5b214a6c-e619-44fe-8992-efd65d2dfaf6,f9a2e80f-60f8-4fe1-af64-f8eb8949cbad,4f073e7f-1344-4150-83ee-e6d79cf79e96,c2f50f46-b48e-4533-aa86-c0d3a4216876,17373ed7-d529-44f7-bdbf-6d9bcbb8566b,5f51a314-1e1c-4855-8e54-e8d5e8864385,3e58fb9e-9b97-45f1-b110-ecca0357b274,4cca29fe-4ddd-4655-9c9f-93651beb94c0,b66e39ac-83c4-4397-adbd-208f1f8eb60a,e6d56234-6758-4099-b2ff-77cb9a46b6f2,438a829e-ede7-4093-b10b-9e89a1dbb78a,61285306-300e-4ea2-96e8-35f30cf3de9e,ab83d5e4-6ea9-4211-a4a3-9459e9b4e475,32fdd4c9-d7e5-43eb-831f-d9ef52d9b4f1,9c07d86d-0ba3-452d-8107-e600b84e6aac,4f1cc565-479a-48e2-a803-dcb3cbb2180d,b61a9eae-edf6-4875-898f-d3fd48386ef4,7a64ed58-16d4-4195-a8db-bef032320778,9877e680-bf13-4cd9-97a4-ad6bb9c8bbab,c8eefad0-e3bf-4962-b227-47fbdc157300,61993e3a-2df4-4bf6-bf91-c0df97b18089,b6db7f95-7af3-44bf-bcfe-606c149a25ce,fddfd446-fa6f-442f-9d38-3e52c3999c78,171dbc05-956a-4f8c-9067-9db854b6e27d,0a6a4893-edc7-4205-a2f0-ffa44b5df823,5181c49e-3805-4be5-8d08-1efffb627600,66839132-80fe-49ab-9ce3-10063093f98e,22512c2f-655d-4760-85e8-13f04c93dd5d,bcc2bac2-28fb-480d-b293-9f8d2faaa4c4,8268af3a-819f-42f5-93fb-de05ab6e561a,10aafa08-6f95-4872-a582-671e40b69bd8,27d017b8-bfde-40e5-8a92-5a9dc6e6339b,0322689f-879d-4e76-9ac1-da36c2c36b12,42ac35bb-6b14-499c-8f83-fe2b70ef6ee1,9ee37f49-f0f2-4ec0-a3aa-6b676c392426,f65d45b3-501b-42f7-ab55-bb81cc2ae234,046c0899-5bfd-45ba-9ae8-6ba93eac4f84,72fd732f-f28c-4583-82f7-dd21a1883206,d22033d7-9329-4f78-afc6-9966a3d3a96c,c41c9601-53bc-42ca-8a0e-9f11e31653fd,c7fef94c-6e10-440a-8082-b5769ce57896,0563553c-7ac3-4abb-abb1-4039d7cc35dd,886a12fc-bb0f-49a6-a572-b3110829bf5f,0feec476-1c88-407f-943a-ee34a1784950,5309d1bb-2b16-40ac-885a-219f7bef02bc,7fd2395a-0a14-4ad6-bb4f-04093bff7c66,f5864256-5eb5-46d0-9143-88539bfbb499,f67ef8c2-5f5a-4e93-925a-bd47cbc6c413,ddbfdadf-3502-4f19-b7e2-04df6cf4eef3,d8b76251-0e1d-46d4-860d-1ce9ad4f4893,612e7910-30e8-4c75-a4f9-71f384b67265,d8eff1fd-92ba-45bb-978f-8f0a640e3429,46ffd41a-594c-4a4a-908a-2968e93b9d95,3d124f3e-3482-49d7-a304-ad09c1cb92c5,396cd58e-1937-48ae-9c4d-92f7815b4e07,bc08f48b-fe6a-455d-88c5-a1cfdfdeac75,534dc150-1092-4dc8-914a-4f97ee104ca0,902f090c-8e48-474f-b69e-f209d59935e4,1972a3c0-3ec6-436d-bd23-04989e7415f8,8a43c637-840e-4773-b119-52482d984b13,1a912c72-492b-423a-8aa4-9636be9d348d,f116487f-f677-4da9-9105-db33cadcfcac,e35ca897-4b81-43e4-a6ea-f54b9520c7c4,5129be1b-243e-4d52-bbdc-e8a8bc89ae7b,74529fc2-d1e6-48c6-b76e-40b963dca582,420b64cd-5d26-4991-925c-50265121e7a0,a982df74-4031-49cd-b302-0770153a62fe,a60076e0-ca3b-4972-9cca-22f278555174,636d18c4-1214-4760-aa9e-f54e4831513e,bf936724-ac89-4491-9754-22087387cc37,b342052c-423d-4360-aebe-78f54bc9142b,95c13a88-5500-477d-b494-420840ab88ba,2e64b368-f3c9-4c18-9055-14f8893c34be,03ee8d54-5bcc-4314-801d-9466bf52f7ab,6be3b53f-f532-4f17-b38f-e5a6d09011bf,61062bf3-3770-4ec4-a1f7-5ce3b3db37f5,62d4c913-8beb-42f8-bbfd-778d1c003548,237bdd1f-bad5-4bc8-a0d1-db236fa20c8e,6c5c51ba-47eb-4311-ba9e-bce591019de5,eabd0d8a-c77f-4ce2-bc43-15ebceccc089,4a7f2861-32ca-4468-92d8-6bdb9d5b280f,ca9133e1-dee3-4e7e-8d71-19dfc868d863,f4dc97ef-5042-408e-91af-98188aaa925d,5ba280f7-2345-4982-895d-5cb184feca6e,70077abc-2358-4d05-874e-79ed3ca24a20,2fb5d8a5-4d03-4046-befe-ee897bbcd8c2,fe04a81e-aa80-453b-9fdd-f2c71a25b71f,a6202f26-99ab-4555-a11c-de81b4632e32,fcb652a1-a0bb-4eb1-bf67-5f4d6e3ed4ac,f5c3ef0d-d1d3-4b57-8a20-ef4ea0709d16,ed9d7847-b77c-481a-b868-0a41ada9a444,c73134ff-d3dd-46d7-bacf-d83eaaac3948,dc53f32e-2edd-4dcb-8d57-f5fe91977db9,45e647f1-64fd-42a5-9424-4c002c594bb5,304370b6-00d0-4554-b471-162ba95afc13,a083dd59-acb0-4c5b-a3c3-6fb70a6e51e2,aca6cf7b-2251-43e1-b78a-5ebbb251c19a,ba84edc7-083c-4b85-b862-521dfb042472,b3b80a45-9d1b-4cf5-b06e-2dfd7a69c151,06bac6bb-67df-40d0-9d73-d67ef85ee3dc,3d2b2244-a327-41e0-980e-dd38e8f30b1a,fa825eb9-1e63-457e-b16c-913a73e12995,63e67d92-df81-402c-b243-d3f34b3b3120,ec6444d0-41a3-4760-b7e9-3c33ebc88d5e,1292d6d9-c3dc-449c-aa8b-38930748ed60,f857505b-a5dc-45ed-96e6-5ec004e0f401,3f5e186b-3f52-46da-b7da-818aab75d384,8bcc9cbc-57c5-4684-aed5-a19273fbfc00,ae11c2ae-4636-4f9a-b5cb-c5e713d96414,e3b5898a-853b-42a1-afb6-0af38fe17312,64d0525c-7d51-455a-b211-0d660f32b4d5,9a95f58e-f77c-451e-94af-2f12515c846a,3c823544-3a1c-41cb-9be6-04c8512abf91,9b5b5e6c-87de-4d14-92cd-6e886a8329ff,a9fdbf95-686d-43b0-a1e6-87facb2ea4ee,dd6d3e14-41b9-4169-ad03-24267a693510,532764d9-9d24-4277-97bb-af6323330f5d,52cd1e85-da9e-4553-b48b-24d39c8a61c4,5e1b2a63-999e-43b8-b314-4f68704a5031,4c96810e-c365-45bb-98f5-205b9a7468a6,5a989203-502c-46dc-8564-666f8201bf06,13c194e6-176e-4532-9cba-44cbb30ee954,25ed49fd-8cfd-4fa5-adf6-2122fa02e83b,94f8945c-8bea-458d-8596-2d8aa06fff9d,1c04d886-613a-42b3-adb1-ea61619fed5c,322a30a8-ecba-43af-a418-cbf240f1a7ee,90618a15-5ef8-4129-bade-fd4d73282db5,c32092c2-5b38-44a3-b676-04f9832ef771,530e8f0c-4e97-4049-9ec9-a41ff1b1be58,78b9e8d2-e30a-44a3-9779-ea942acb434b,b06daec4-3908-4017-ba2a-57942a558e57,322c706a-d056-4b8f-86c8-bdd0db8b2c35,b2009d8c-10de-4cb4-954a-d9ebb3263abf,bf7c518a-5450-4719-b663-d5d7b906b451,88e7e0df-cf03-4e44-9a2f-6a4201fd8bd5,81c1e5ed-19fb-44c2-b7b9-3ad07f1a67e6,10ea9b67-f6a0-46ac-a5d9-e9de9c3f24f5,9ad4542a-c742-4bdf-9065-77b22c529547,112d056f-eff9-48d9-91ed-923704aaece4,f1a8be0c-2431-4348-a364-4415f7b77dcf,b1f315e8-35fb-4426-b904-620eca4189fc,d5e9d6d4-a522-4da2-b8ea-84612a24b0d4,796cc99d-6518-4b1c-b049-27d733123b6a,c07eefe8-494c-44d8-aab5-6476ce5651ff,92ca7d75-9860-435b-a61a-010760bdec3e,f8e2b020-8b58-4b74-aa63-741b525eb8ab,9308c6dc-a779-416d-9466-1e29a09f893e,4ae2d9df-5aea-42cc-a480-9b45a9c6484e,8f47b19a-6abe-4bd6-a96b-24b5636c34d7,b80c57ff-598b-4791-b93a-498b44adda66,c2ef9725-8c64-4772-9123-2dd8e7057790,846fb4bb-a857-45b3-a189-61a1559dd6aa,e5cf53af-0a65-4944-8aa3-aed818b4664e,6053a424-0756-45f3-9183-1029751ab64b,a16aa82d-4b77-47b7-b686-57954d15559e,79b5bea2-5ab3-48ee-a02a-1641c6bda431,066312dc-7436-4ccc-b88d-7cca3f9a43d1,bb5cbf0a-6cc0-4519-89ac-d79e094fc546,e692ea35-1609-40be-b4e0-6b588bbabfc5,d8ba0191-6c70-4b06-8e01-88882a383534,ddf5981d-dfe7-456e-a9d0-5f57ab3edbbc,eee63cb8-0010-47c0-bf06-37b3f699a851,cf0840d6-80e3-4f87-847d-6bb33404e14d,ab056df3-7c3c-4fd2-a34e-a3141262d09f,a0676b79-ae2f-4588-8832-1b36a1dde92d,85fd272f-2ed2-4a5a-9a1a-f75128753012,e8ab3a1d-5c96-47a9-a8f0-40d25335a281,cad1267f-d8a0-4f0a-96de-4d08be6ca397,2d01fb50-016d-47d6-bda2-da763857efb1,9253a229-a0f2-4580-bb46-6b5e60766930,74644084-426e-455e-a496-e31e4e55f985,ab36cd03-ec29-481a-9f80-55a77442c7b6,8b1319e0-cbf0-46a1-9ae7-5c2c4cab1a2e,832fd74d-a655-4145-a83d-e1b597af40ba,30df4039-0fb1-4581-b08a-e5ac8eb56117,8126d81a-906e-4a04-8a6d-222076e7cb87,98e97e08-b33d-4ffa-b860-505c78189c1f,840b8fee-0cbc-43c5-839e-65f00b7e2036,ca3aaa03-9200-46c1-9049-91c9fc1749ff,1c219567-e29d-404f-b457-b46f03deaa13,3c0f078e-8d83-40e0-8da9-8c081ff87ebe,01b3d195-00f9-40cc-a644-ef43100160f6,7211df5d-8fc7-40fb-ac20-19ad2c15d620,c29f811a-2119-4fd0-82e3-f33ab9762536,72d2a5bf-d444-4975-9cbe-4ebef11e061d,cffb662a-a335-4bc6-8f39-153c3bd64d69,874573d5-0933-4daf-9424-be8a618325d9,9246bebf-8de2-4c26-8b25-82f81568c11c,baf5dc29-1f60-4eae-961b-3c83fc8bda4e,1acc199b-9229-45eb-ba17-11fd1cbe3ad0,1eeb31d3-8c7f-45b3-932e-789185a938c6,287be888-e7cd-4755-9bcc-ee7fbc065b3d,f9a96038-4baf-4bd1-8af4-fbc4e937c2ff,d02ee7d8-6fa2-458e-8eb9-ca9c61b75ef9,6e8f9509-e2fd-4792-a083-39feb24a006e,76ad7a0b-3581-46ff-b505-916be2bfb04e,9b729bd9-4f12-4276-a4d6-52296b83909e,93025254-edba-4253-92f2-71fae9fb93f8,5be63ad3-b3aa-42a0-8f65-2d8ccc93567b,078ba76a-5a4b-4cd6-996f-5038a71eefa0,fc4cd06d-1412-4a2d-8a3c-4b2a0eccb0a1,debcac47-9036-4f2d-82f8-4a4004fa44e4,945cfb06-d4c9-45f9-b704-ef6fca4267e9,d6415521-d864-437b-b874-1659c8e16940,32537697-3f02-4fc7-9afe-e04bba864665,be190c17-2704-4d3b-874b-cb944618a6fd,fead6720-371e-4e20-bbdb-dce6b4f642be,bc4b08e8-75a3-47a6-9373-5acf19deaee7,78401da4-d964-44d8-aa5b-d0363e0df3a4,77a4ecca-16e5-4348-b377-0015642c68a9,8938d292-4ba3-40e7-b98f-cf43a8c5af69,0ac76979-ec37-4143-8165-da126727ef85,17d99fe3-2618-4ada-8953-320f05d9eac6,0ae35f48-0fd7-4180-84c5-05df1b8163af,0e04acc7-cf87-43cc-8208-a852d803fa77,9827763d-053f-4f76-9f67-7648a5e74840,e2fecd42-5fb7-47f6-85ab-465f0a6b154b,f3ab90fd-121f-4c3c-96fc-3794f9e5fe3f,22a705a4-c445-4f32-903b-ff39d67008f5,c6d2a163-8ff7-4a5f-b744-0446ab2f7ed8,2be1aad7-0d6d-4ef6-893d-50d7ce110692,69545414-ffb9-4436-8874-92ae6d8fa0ca,7cfbf9ac-a632-4164-8e43-9fe6d54a41d9,95b2135e-a900-4cc6-9e6e-955c4969fb1f,24e0527b-ae8a-473d-8d9c-70f3ea9be2ec,df605e17-6145-4e32-875c-15f8e7ad8e91,8d356a22-d996-4770-b43e-37e20d7a4360,b633ec6e-240e-41ca-8828-95ca861e5776,4037db72-4133-4ce8-8b43-49792de0df81,efc3d4e1-6294-4719-812a-f885dceda943,61afee24-763d-4c07-897c-840194d74cbf,f42b0a05-4eca-4b1d-8648-1401564c1343,879042dc-3d9e-49c1-b515-3640dc0d1243,c2a84ee2-74d0-47d1-856f-145a909a2ffa,3db62f3e-d963-4e4b-b3aa-f8af3a62ae6b,28357036-2163-4adb-965c-2ee7a99253f8,0f8c8845-7e25-432d-b6f5-0f65ecd64c83,36a90cc9-c415-4c0a-94e7-e99009171d4c,1164c749-2b8a-4807-8eeb-c4f9f6beff8b,ffd0b377-4a60-4595-93e3-262925f8cc4f,0195e542-d483-43a1-b83c-d1b9d2880c46,fa2424d5-7d00-421f-83ec-650e09c39f23,8e120fcb-6c6f-4a72-8fc8-5ce552104ecc,e0ce3252-ac69-4cb3-aa5f-68b6efa7671d,c7b8be34-9dd1-4e52-a62a-bff899dfada5,19a749fe-f1ce-4da9-bcd4-1c17d1f3074f,788cd11f-0f73-4a9d-9ab0-ada03bff8270,197111fb-8353-4dda-9f75-978ea8502955,9c89e496-2773-449b-a651-e114d735ef7a,07dd5d35-4fd8-470e-a066-fdb6c8d14ec7,4fe7d53b-7634-46e9-b00c-59d25fa9de35,0d565736-a96a-499a-89bb-1a275e15fc59,b97808af-3c91-4c88-8292-a44c091f67cb,9b278b83-2d6d-4f49-9204-d9a6dafd1bac,7a796559-d8f9-4bd2-9bb9-fa1b1e2b1942,461f9234-cee3-491b-890f-d07de4cf0351,3dca5d5b-ec6d-4165-9040-55cd7975c818,aec410b7-12bb-4c07-ba16-3185209cd7eb,d7587c26-26f2-4dc3-af20-aa56e5b4d33e,d350017e-ba61-43c8-b13d-e96f85f34958,ac90801c-d93b-46c9-8885-241403ab1cbb,0884f654-5ee7-4949-8028-4092e69b9763,b3c0f785-8422-475e-bb47-01a442b34a00,81f33324-3ceb-4862-bcb3-58c30368ae98,8c19e486-914a-4e1f-8d89-7f18af84dbba,248ea956-38c0-4b54-9d12-913cde40be7f,4d420902-387f-4da1-9179-dc739cd3dada,9874c864-d636-4877-a364-19da60d633d4,bc999f3d-005e-4252-a092-edafc042a5d1,9eb00b31-c905-4bbe-8a6d-2d071a967775,0f3c487c-dfe2-4e0c-8c91-066f9c2a771c,e8762b79-5cf3-49d8-aaff-8d1935a866b9,4f2c2f9b-3cc8-4b6d-bfc9-0feee62f6078,4e2c0b4b-c7c5-4819-80c5-8b967a710020,31d738b7-42ae-4284-8ddb-829d3fe07f53,d5aff655-5914-432d-9961-2ab80252a7e8,79f64729-28ac-4918-83a8-d00f1b00867a,b9b9cc53-d0e8-4022-bac5-c0fc2e37fc29,93c472f4-cd56-4246-8982-54d8472f8d94,070f1dd1-ec8b-490e-b6ab-fa9cabf33664,d3c1360d-1beb-4301-b42a-9260b760924e,e639d5f1-9337-4481-8e6e-3fdcaf19dcd7,a6349d66-af63-4093-a100-901d03c92c01,d01a0f89-47c6-468e-a9d1-616080bc8daf,44b9eba9-09b0-4316-b3f3-022bf4d5871b,0b8a4761-270f-49a8-bf49-17d20ca8827f,40bfddd5-1ad1-4e2b-b86e-28cc5adee422,05dd2c54-40b4-4bbb-a8a8-5d9d1e308277,c90f792a-ecc5-471b-8075-f2e89b8925f0,e01b81a3-2d48-4bb0-9057-f70196636bc3,2f764b92-1ab3-4574-af8b-239fe6e988b7,f93eea81-2638-4a5c-95e1-203d3e27556b,cead7c4a-cc46-4e7b-9a25-9286032c0dd9,19292294-9955-4121-bc38-9e46f205a98b,86c533fd-ae1b-4cd9-9d23-ddff3ed0a227,5f9985bb-e1ce-4a1e-8490-8fe5a33febc3,630baafd-bae3-43fc-aaa4-1183ef0a5ad0,d4c1c06d-f5c9-45bf-beaa-6d528712036a,85df72ee-5297-4ad6-a13e-ac915cce2bc0,8ba28406-2575-4cb4-b09e-716a850a2888,3e435d5f-21f6-46c8-982e-06d9d8a96fe5,c3889226-52a1-4059-a2bd-70fdee25155b,050e2e32-c865-4f01-91eb-dc4e06826186,897b49a3-60a2-4e87-a2f0-4dff3c645bdb,ed24af21-5899-4262-8269-b1c0f0efc3f6,b0adacce-cc2a-4110-88ea-d9e66f226fae,302b5683-6fdf-4769-b1b4-d4fb3170a05a,a96fed46-d2a3-404b-adbd-1e7fa3c3b0d3,12a98937-bd79-4f49-8b22-8eeb84af9298,ccec9456-773e-49e0-9b28-5e7c7a18df88,e357fad1-380c-445b-82cd-eeab2975d4aa,3237e1aa-aba9-41b5-b0e9-f2680b9a6143,6c86b6db-ed32-4c1b-af89-bab50ede26f1,979515c0-76be-4b47-ac8a-acf271690ef5,3a04ed25-25ac-42e8-98d4-f7720c6f8e9c,d60ebba0-f1b3-48f5-a660-1948c796fa54,689473e3-5749-4706-8072-f0256b536092,ff2f5ac7-c266-4035-9d04-e8481a8ecaac,f97f213c-d050-45fe-b219-d6ce280dbe84,077eed00-4d8d-4fd8-bf07-5676adec39e8,c56d395d-c0c3-4310-b0f9-4c84872e0872,fc2e834c-ac2b-4682-8a53-c46b8ae6bb8d,0622ac4e-0b83-4c8a-a281-9e5fc8498541,052141dc-6312-4699-bbaa-da6a37097fb2,5418f9b8-e0f4-43e9-b8b0-2081d6d0675b,ef6fc190-576c-4bfc-99e1-d1dc5445b76b,6fcdcdef-3e92-45af-aa1b-c2e9d32858d6,cc7a6785-abdd-475f-b905-d8472b395fc3,dabced68-b21b-4d8e-8dc5-93386f87285a,fa617972-b6cb-448c-861d-b01293a4d120,9b2ee19a-9d8f-4120-9741-9338ef257e41,c249f763-d24b-4afd-81f7-f671aec1998b,9a4d2d0d-d396-4534-95f8-17aeb837b60f,f51394b5-d59c-44d0-9ae7-32e45a973676,e67a5203-c975-4b07-9e49-d94af679a571,5c375a53-0be8-4ec0-9500-4a655a385854,e92a8b2f-9a11-4494-9954-62a6f5901062,a091adf2-4f7b-457b-a20f-7ec3342d9d06,35e1e377-868f-4230-ba5e-5297f2120d52,0bc2a8cc-2383-4675-a1c8-61b46c2bef16,eda03d76-bb6e-46a6-9938-ea224df6a5d4,158190af-add8-4bfe-b321-c45f0677845a,448f5250-43e8-40fb-a8e9-0119a76978a8,236631d1-47e3-43f6-ba3e-386176c98510,94bf6adc-bcc1-4435-967f-c6d75259d8ad,30508c46-c7b6-4330-a798-6a8d8a0df8b2,46d4ae8e-c6d9-41c5-9545-e3bcda6cc4de,a6e05d5f-ffc2-4e96-97c7-ec79e0e455ad,f1447be3-612b-4ab0-b3ac-7841d40aa67c,03479611-eafa-40a3-ae9e-40a41f915cb8,60cf0895-542b-443b-9053-597feee46daa,71a883e3-471e-4362-bc9e-e3b1b7ff4c88,355cb667-1c17-4649-8d30-13f80bc3b78e,5e2735ed-5b74-40d3-be0a-6fd1b50f4448,a84065bd-1095-41f2-846b-ae1c854fde20,ffa2ad03-4bdb-475c-bd20-fb4a34f3719c,bcec0f8e-ec20-401b-866b-a0e71c97e1ec,33901d57-efca-4d47-834e-4a03e97aa4ee,4d6ac33c-19db-4c6c-a1fe-94df358f853b,b0a06b3d-5d00-4908-b99c-57863b63d337,be34f907-f5fd-410c-940b-bee334a94345,c76a457f-ba6f-4f77-bae0-e1862f46f525,3eecdf1e-c369-4428-8cd9-f155082c6dc1,b9489585-ce4d-4691-b82e-fa067d7b4562,26fdf828-f3d7-4e43-8ec3-6e76454f43a3,d11b4ba6-fce8-49e7-9ceb-084c2a7d4be4,9cb2eee3-9d6e-4ae2-9b04-6183d043c13f,c57145ef-a900-4737-82d7-48b5ae5ee111,dbd95698-6853-4944-8299-98cc8824bc4e,aa15f7d9-f712-4b41-a5f3-d4e2d80a670c,4f4a5661-9163-4f52-9c54-f12538dd7889,88c6560a-b4b9-445a-bd3e-269bf5a40fa6,7c118f42-4650-48fe-9d52-7b2b3b140ddc,7202aa29-6b41-4d49-8f64-c0005373e831,b50999eb-c6a4-4305-8f32-c4fac350409e,32f191bd-bbf4-4842-9908-7344f63af2e3,596b251c-1558-4fc7-9409-4e00823bc0cc,a1bf943b-b1e9-4476-b0ab-2e4c19b57844,f301f9e9-a0b1-490a-852b-93392c7d1773,a3d60059-d584-427a-883d-3ac554850f40,cdf06cff-4219-473a-97e1-973b2294a3d2,a058a02d-f8ff-4c67-a005-0ec8b41e0524,6b2fcb39-0362-40d5-a387-9bec481e8fac,20ba5811-d9a3-4548-a320-49a4e9755dfc,07136345-b7c3-4ca8-8897-55844a5e9bfd,23d9f1f9-9290-445e-af76-0ba7accd08dc,2c758ac7-bf09-40e4-b370-359d2c7a6523,8d136f9a-3cf3-42f9-892a-ea42cdd00738,c88a33f0-e59e-4fee-82fb-52db7493eb2f,53cc36ae-a858-499b-9880-4f4508924523,b7c1a749-9550-464c-b0d1-c67f7433e9b2,5b642a0a-512c-402f-b0c5-459f94f44f1c,9aabb78c-f8a8-481d-b215-8ae613da51ee,9849ab50-5b90-4a22-bb96-6348c5b81a1a,068b14f5-9316-459b-9a3e-5ac6a1a9bf12,4649dbdc-3a97-4cbd-8dbe-cae923aedebd,040250f5-2f29-4643-b201-00fc02750ec4,bed5e979-4799-41a7-b9a5-07d5136ad9ec,20a6c57a-9a88-4a38-9caf-8c945244fa2a,8e1e7c6b-c1db-4e64-a5bf-9a1a3b5f30ab,12cae896-b482-4e58-96da-159a2569f824,2947ff87-83cb-46ea-a9f9-972fe0a94336,b4fd4919-22e6-4d5c-b5f8-1740be1a55d2,c8d80160-9774-4d86-8eff-f658d00ea64c,366ac992-eb4c-473e-870a-af08abedb63a,fb73b79c-c8c4-4d87-a42c-2583b0702067,6b73b9f6-b306-48b6-8749-ebaf7ba4172d,e712f16e-b94d-450f-819c-bb543ca87c33,fe0931d0-26b3-4a30-a704-3375c5674273,e85f079c-3b0f-4123-9a29-99d04382e563,55f3ecea-8fb8-4087-9881-9fcc77c4d885,749887cc-fa23-4480-b660-955870e96f90,3ac37ac2-b7df-4f48-bb27-0e4a60923e03,51039d5c-d40d-47d1-9cca-8c9869afc5ac,87357854-26f2-481d-a70d-a2551a7b2293,9c235dd3-8dab-411c-82f7-7df2185335a4,ceb83177-8e38-47d2-bb45-c3e1a176d23f,0f3fb3bb-1c9e-42d0-846b-557d9fb2e8b0,3fa305ec-5988-47a3-9c4a-bb4f2e5b5c8f,744fb08d-e357-46d8-a84d-a9f0cc40ce76,ac4da83b-cdc6-4a9b-b6f2-e44dc77aa22c,72968133-23b5-46df-bfc8-be589bdbb01c,98883bea-61df-40cb-831f-f13613075fee,f409d7cd-4bde-4a67-8f03-302c9fd8e2f1,806ba973-e20f-4806-aa9a-b0dbc02d3c4c,482b08d3-62c4-4784-bda0-22a6157a42ee,fb0cf54f-eacc-4e6a-9a5d-92b0712058c4,44d69617-92fa-44ee-80b4-db2b175ea0fe,a58f4add-2fb1-41f9-81dc-28dd327eba2e,bf014125-062f-414d-9bfe-f8c498022b01,4b7fbf06-eed2-46b9-af6a-ab6aabe74872,4328c159-bfa2-4bd5-9fb9-c1ba25f7b0fd,bca993d7-f435-43dd-a9cf-90234f4e46fa,c135e930-7698-4825-bbd1-ab76d52476d9,858414bc-4b9a-4bea-b891-69a56ef3dd8f,e9c6019a-71de-4cb6-a2b8-78f4fc3a65d5,c079c310-ad5c-428d-8edc-991d118ae835,943caf43-33da-4ebd-9ddc-df6595f9e4c6,f50a17ed-6d57-4eaf-9442-cf57340e02af,2a1b6414-5e10-4ae8-9d01-3a8136ebb67e,e825cfc4-71b2-4516-8089-e7026c4582e8,e465236c-c330-475d-966f-33684005af6f,7ca79b63-5727-45ae-b369-ce53f468a87f,25777930-9705-49e1-9de9-4af78a7b5155,5ad1526b-2baa-4508-b8e3-19018a6ba4a4,0f2c624d-2cd0-4716-85c3-82e60f9de82c,e9c73caa-126b-4ba9-9271-8e03b5901891,b55da44d-3fd4-44e6-866c-32274ed7cca3,05ecc41e-c8f2-430f-abcf-ac3373bb8de4,417b3b91-c1da-41f7-9588-a81620c99cbd,29bf434f-2360-4267-8688-bc1ada2e8712,ae80930d-c5ef-4b1b-9818-6e6594a10532,5099ae92-b968-4978-bd5d-55cc66bf0f68,5ba00f41-a812-4000-9561-90e48ea3a9a9,95d92fc8-234d-48e4-845a-b3117f71cdc0,ad993f77-7205-4cbc-917f-2e9922da4356,2d19c570-8e2e-4cc2-bdb5-7bf982120dad,d2af0189-cc93-4ca8-a625-4cb82ce81a28,69b221da-af0f-48d9-84d0-741d313e0d80,f8f61d80-8f53-4a69-a7b6-f80ba7bda264,bcd2614d-d8d8-4677-bd1e-3fe6f691829b,8c5fab31-3439-42a4-8339-858aedd67f7f,3a43f845-c76d-4f7f-9b2c-e808cccc3ed4,f0da8aee-39aa-4460-a925-5fc2ee22965a,5b9e71ef-11fe-4889-93ab-b951b68a7557,4b48f383-3bd7-44c7-bcaa-9d9ad6ebc289,94a8f1a4-830e-420c-8244-4100eab71e1c,d13f2b69-baa3-4ead-99d1-a2fe62485414,e0033d1c-c7ac-4092-803b-191011dd9b27,91909f09-e540-44af-8b38-5263f8592c2e,3fee1f34-f3dd-42b9-9424-6ec2fe76ce74,6dbe4d4e-ec0d-4641-a89f-3a7293468954,6f93e6f6-124f-462e-b778-96be41e779a2,0d7af699-d21c-4213-ac5d-0b71e5a9fdea,a35a96bf-b081-4361-9834-c3d823f98a1b,0bfd3942-4d0f-492a-9ce9-135e2996c456,f5e76fed-ded5-45ae-916e-c0ee3a52b6d8,8c6838db-99af-419e-9c4f-30d3f6e529ec,f2640c75-aaaf-4e29-9d27-22ccbdd18b7f,866d2753-aeeb-4ebd-9b14-97bd469705a0,0bc657fa-dbfe-47c3-a494-0f2f0b5d3077,d7ed4465-b28b-48b4-8e07-49b0f48c5232,9c75048d-e27c-492c-a1b0-bae760771b73,c86d3035-d6a4-408e-83a5-93092fc2c99d,d3d2ba4c-1877-4d99-9b77-d7da93201c7f,d9e8450f-0564-433b-8d21-165ddba8f4f8,7203ac54-4178-46e5-a383-10099d51e762,11ba524e-90f2-4e09-9993-d1940961576c,2446d0d1-1ef9-4f90-bb11-634785e81477,52bcaef4-8bf3-40ed-bc27-1481c42eb473,4bae5fe6-0805-415c-a340-6cf0a53c92ca,5d820135-0c05-419a-a587-2d1037a74a8d,8f176002-c035-407a-a796-505e61e57c4a,01159cff-1b4c-47d0-a563-a877ab8ec65b,9128cbff-6c77-40fc-a514-6e1008c3b1fb,ef749c06-4dcb-4f08-806e-75e1b76c924d,0fa7e75c-dfb1-407f-903d-0e81a9327ef0,c9142edf-a3a3-46a3-9961-2e01832bdbd4,2f2bba75-f99d-4cc3-be23-87e5d67967da,8273ccc3-8cc3-4410-86b2-6f46c4ab7d6d,a2c4868c-b37e-4467-bb8a-403c26f4249b,76d7c4b5-d009-42af-bb0a-3829d38d469d,13190a80-fa23-423e-a34e-fac6588d6573,0dd6f921-d4c3-49e2-ab68-573b2bc39d97,7bb97665-be6a-4683-9c5e-fdc43165b60c,02099900-d8fb-4a14-b0b8-362ecc6cb95d,a6b9b7db-90c0-4d6c-ab78-c87126492029,b1a5d2e4-db36-4da9-98f5-f10998c9f826,7a3a3b58-ff61-4dea-9297-42bc5a29a5e7,6b0e41aa-98da-4684-b30a-22c9717c82ab,04599b50-5223-4329-8dd5-a24b1894329a,e5614af5-833f-43fe-931c-eff44dd5c2a9,8346cdca-3237-4b5b-b00c-4fe784764f6e,cafa9e18-0f24-4682-b1a3-a3d6d781a86b,b3f9f1c7-97c0-47e7-942e-2d65946f643f,b1e9ae40-86e2-446a-81a9-d182db66748a,ed896cf8-40b3-4acf-b98d-8bb583edee5d,c404c3a5-8399-4688-9557-44323cf5bc1e,869e87d1-e312-4770-a267-9c90c9f57b7f,7eae5ea7-ebc6-401e-95fd-67b5cfa7d9d0,5d9c3092-b080-4d90-8365-bb73d882d1e9,e7ddc64a-85ab-49df-8196-cf5c295e4760,1b636f26-8545-4280-bcad-3b2998023ba5,b13eefb6-4dce-41b6-b828-e34ed11ea55b,92d6fee8-c3d5-41c8-8fdd-a57efae28908,f51811a2-80d7-4375-9622-7a41adf3bc74,946f3c62-4a46-4abf-852b-20f1a0753873,c871431c-c84c-4beb-b033-1cbd3106224d,1732b5b8-def0-45b2-afa4-980fa633e893,7a902e0c-213d-4836-af3c-508c446a4b09,3583951e-f5be-456c-a456-3b1ead3950ad,221fcab6-510e-483b-8848-ccf7bad2b63d,d01b2ad9-b4bc-406e-ad38-f0fd5d17fe41,8b74f4d6-cf77-4f85-98bf-2e301b860578,e2d1d433-23ed-4006-8ad7-dad72df75cb1,9927e668-4b70-42e4-9319-6e934ace0267,d8c1e5c3-735e-4336-8e84-6eb7de5e7264,94cf0023-d0f0-40b5-a963-03d2db233662,a13e1ea0-1448-434c-87cd-364c54be0a89,8a113bf6-0c77-4f31-9eda-0cef82658826,c4033c66-91ea-4199-87e5-e768f47f113c,a292a6a0-e4db-4fed-8ca1-ec1d2693ead8,89cab3f6-73b2-4a36-83d6-94a48228ba94,2bb964ef-29f6-4926-9d55-2ebd91d7f4f2,f0e0803c-4e9a-4dec-ad75-8f1b074915a9,34f2cfbb-b793-448a-9974-78a046c2503c,f1a445df-e2fc-4443-8369-33775e69f9a4,a8def1a0-a2b3-4433-9e79-dc9a6a559a7f,2ba1982d-c370-4ade-a59e-09c121ef0c88,6f380bbe-f3a8-410e-9145-9428db88f2af,e1674fb5-8cd4-4f90-b969-425b90d62461,dfd2fc59-aa21-4758-a8c5-344e72bc438b,b77bda14-8f58-4131-82db-e98d820bd03e,559a027e-f5fc-49e5-b44f-9e086f8a7e6d,b727d8e4-c801-412d-ad87-3882be030f15,2ba60526-d8c3-4033-b2fc-f10f507c4fc2,50d0f4db-5a19-45ce-805b-cf443f5b22d6,a237f7de-0abb-4878-97f1-3abd49f88c6e,4ab6edba-e20e-4c55-80f0-c1a2eb13630c,0cbb78d7-0cf4-4011-8ff6-7c6efe1548a8,3bc5e250-dc53-4881-a886-af79485a90f1,17b69725-0912-4ad1-bfe6-7ec098a1f8fc,346e2a17-d97b-42d0-aa2a-5f66966c18e2,f2b67c49-ec16-45f8-a6a5-810984f253fc,47b6a0e1-381c-45b5-b467-784ab7767d27,c2e69d91-8b53-4b9e-b067-3ec8d1ebc83f,48acc8a1-685d-43b2-94b9-3032fec4f380,f446158f-21cf-4a60-8361-af79c00d7ef7,bd101824-3dc1-460e-a712-f5a8dc8e9687,9e04618d-10b4-47c7-97f7-5e88c281f32e,2bbc0e5c-147e-4e02-9692-40aa68bdb971,3218a5df-4ecb-4d0c-8395-3d7eb6cbfde2,b5ee7a6c-ee74-4975-8cdb-03466e693b50,fff12636-ee14-46b8-9dff-b2389fc38d4e,2eb15f20-88af-4999-9e4e-b9ad6e929796,f528727b-77cd-40ea-ad22-a2399d4f34fb,62f99375-2a2e-4d14-a8e3-4963a7c14089,bea99efb-d175-455f-8173-4fa4ea9b1a82,d30269f1-391f-477b-aa46-6496a4ac8bab,4afdadb7-0e6e-42ad-9e88-73e01585ac65,a61cf6e0-7160-4f48-8d1b-3b81b74dd708,58de073f-0174-427b-a35c-d82cd7c3a367,8cf211ba-97cc-4099-a93f-d40c65bc1ae9,4fdae12d-4c79-4e8c-acf4-6dc11a32a751,c71a21cd-a889-45f0-a0f6-771676ae1899,c5a0ac4a-39dd-43b1-9db0-3e92d587491f,95a67e3f-3919-4e93-9d93-c2037ba47584,b20ad356-9020-4e24-b500-97720b0d8fb1,0bd481b6-c46d-4546-a2ed-e468d6966bfe,6223779b-81ed-429a-b61b-7462a3403a44,3bd02a90-fb89-4731-8bda-6376af66d63d,5ee64afc-4b9b-4053-bdf4-32950b6d358c,c05933cb-1737-411f-82ad-94c85b63108d,bd31eb96-e47f-4c70-8896-4570df2f14ae,f648cd26-d720-437a-befd-078ca73baad9,b3a4ad54-2bad-4d43-9827-26a9f5c2820e,fabfb8b3-a39c-43f8-a42a-30b18de3cea0,f382e85c-a7c6-4941-b7da-f937476aec76,31b534ae-00eb-4982-9f86-de2f480115df,de6bae5c-bd2d-4cdb-9c25-a8a03a3d9782,00bf7a6e-57a3-4883-8e85-fa303d0b3b69,bc809b7e-28ef-4e37-af7f-cbbe696222fb,8805bb65-306d-4505-a750-f93b7020f36b,fd61b760-a687-4c1d-b7e4-7d5d84bcb5a4,7f54845f-23eb-49c0-aa0c-b22279eec0a7,7b249254-44e5-42e3-8802-b7ca52d3e13c,05a13309-b56c-4491-8cb2-76b8efe9907f,2b2e7dec-153a-4bb5-b547-48a418d2e254,5bb77095-0ccd-4ff2-b3a8-d752cd6442f5,b9f5473d-c54e-493c-ab95-9e34c5718247,212f49c0-76b3-4b5b-9344-46d749fcaae3,af083ce0-cce6-46d5-8591-17f1290c7844,0bda4bdd-71a0-4638-a2ac-ee391da0b684,e9955d9b-c9c3-4449-9843-c10ca1275809,1d762200-a78d-4adc-9974-6f22e9072e4a,e07b1b72-6ad6-4e88-9a08-ad869402e010,21bd86e2-8880-4195-8220-69f8fc57c222,ed76193c-8697-4a72-a939-aac75a21fd04,9b3f5d66-f117-4a4b-8ad9-1e77442a7eb4,360ffbd7-aa80-431c-b6bf-1024202aabbe,93c9f682-26d7-4551-adbf-33443f65d214,f3f38987-288c-42da-8e09-e39a4b108061,1a42d88e-fa94-45c9-a124-5caef200bed3,1ae49561-b998-4f9e-a071-e3eeec9d1b0b,2603f7c8-39d9-43b3-8062-94adde32525f,fa2c9ba1-02e4-453b-ad8c-a460f54fccb1,0c6a8f1b-4f78-434c-92cf-58433aa3576a,96fadf60-bea4-4b7f-8632-b1e5106d9e12,7b568cbe-fb53-4627-98d0-26a31af9188f,3e5a05a2-5e76-4542-96d2-fea4ac935151,0e670fab-2c33-4a9f-922f-110726f3f6ef,cc8330cf-4cbf-472a-afdf-231e7dc20f57,9545f4b9-1b20-4ae4-a769-72fbbac6a64d,6629c9e9-fcd3-4156-8d02-65112e528202,24f8ad79-23b1-42b7-94fb-a02c93439210,9db6a096-85e6-4269-9248-6e801877a601,9d731937-b255-4822-a30b-0e8955c9dfb4,bf7cef8c-a9aa-4c8b-b271-ecaae5d2cd96,2c4c04b0-e913-4959-9eb0-768b8f804f28,8a65b7a2-fc6e-4463-b0de-7d6a1fcb9344,1b9dbef2-e7f1-4baf-87e5-71f06504c4ed,187aabcd-c102-41a4-91ae-e413b0fc47b2,f8db6014-0c27-4423-a26a-0537737dc6eb,e38c3417-80bb-4cb9-97b6-b13caadf8c26,bbfa1838-078d-4ec6-b010-07c025d43a59,7b3d9e9f-12a8-467d-9166-d247f7e23e77,3dc3cf1b-a35c-4ac5-885f-3c7a8b58ee95,fb7e5a6d-72e8-41d6-a05b-8a12b3d6eec1,7d23ef55-6418-4ac7-947b-7f212ebf4b01,b0dda9bb-4ec1-4894-a666-c8ea6497460f,965d7327-73e9-4603-af1d-291eb45c3b3e,3a90d9ee-4989-4cac-9c0d-2bb6a9235db1,12411f68-e723-458b-898e-8c6fdae62e2a,eea08580-17b9-470b-9a60-5efa6fe39e9c,11a42399-2600-4d23-84d1-9bdcf33a8905,31a2d52c-2aad-4423-ac54-fe8bd374d19c,4afc3ea7-5cf5-4dc4-b75f-6cefa00d06c1,592321ba-93f9-4253-a0ca-4aabeff6be11,05bf03e1-edb6-408b-a637-cd39d69c0b00,257dda2a-bbf8-4f98-a93a-010392b9732f,4f143328-46fb-449b-8edf-0f61f5b2148a,ad75164c-ff0f-4229-8728-a14bc71fba73,51f23bc5-828f-497e-89f1-bb2a4cbbdd0c,e2deedd1-587d-44b7-b789-38ad9fe9300e,6e2e4eb6-5b8a-47ca-b4da-5bae536928e2,36cc8030-cbba-4c93-b839-4c6ebf188317,230cdc2f-d37f-4c07-99c9-81eacbbfe65e,e59b5ee4-8ec8-453d-98be-ecc50f7dbf6c,81914a3d-6641-4b93-aea5-43df6055595d,6c846989-72c9-4a5e-a3c3-5ba789368a31,207dd101-d2b9-4f01-9831-fbe7a3d676de,9c252e14-14ba-4e4c-b3a9-c9af24250ca0,5e80d4d0-8ca8-44cb-8bac-cb59d33b262e,5002019b-803f-45e0-91fd-8a0e951ea7f5,2cf9bfbb-3e80-41e0-baa5-b17284350d17,0f0b5946-5f53-43ab-9721-462358c6f41d,76ccce10-8ddd-48a4-8e00-6401bf8c262a,986ad228-2e24-4071-ae85-44fa76f7d3e1,0da5c616-91fa-4a9a-abba-188c93fa334d,12b8342e-842e-4420-ab5b-35edf3af2ec6,52f6feb8-c9c4-43f7-94de-77d5810864a5,f35095b4-4f76-4130-8979-311cdb7442e1,278268cf-1e85-4d7c-9d58-9ce9b1ee178a,4a24c417-2605-4fe4-8466-07ba51aafad6,8a1e3402-893f-4b76-8617-1a6364d42790,569e2cbc-404b-4b0e-a86b-cc90f9e72c40,f32bbec2-2e78-4808-a46c-26a001441d8e,6ad901b4-482a-43e0-8fbc-b2b6d9623567,fb14ab3b-7bef-47c2-8137-33aa119f004f,e531aa5f-ecf3-4795-b8e6-9695c5ac3d18,41ba31bf-9dae-4bfc-930a-37c49cfa6a32,12b9b7e9-b3f6-4a86-8ec9-197587fb36ad,d063bbea-e0f9-4d22-ad2a-8adeb8ddb068,6ff2a3f4-68d3-473e-b34e-a13cab070d04,1dd21116-6ba8-4001-884a-9aa9cd0c2b6d,619ab06f-835c-4622-a93b-3cc188b5d9e5,0cc1894b-73ef-49f1-ba83-adbb432ad045,f9554867-e648-4245-8993-e80a8838f2cd,651518fe-2318-493f-8c38-2ae761601974,27e0d6d0-318d-4439-ba01-3336fcf4b0d5,03e5b026-773e-4936-8c43-361b7f2a50a9,6e13cce3-a6b0-45ec-abc1-fb2c47b4b9b1,6076ffdd-ae2a-42ea-aec0-2928098fb8c3,1b3b424c-66b6-4b17-946b-0df357443914,bc702d20-f9a0-4891-a35d-e334e8f13f91,7874cfda-16bd-47cc-a6ae-9ea5bb88630b,c33c6558-cf0f-470b-8884-524ee27bb17f,e5278bef-4762-4838-bbde-73ebf8f7df40,0094c5c6-398b-4401-8076-458ee98a4599,8d0fc11e-2a87-4acc-a588-d4bcdab5b384,ada1540b-c3a5-42f9-87d1-e1e5a4bf318d,83ccb68f-68d7-4ffe-b943-1240e3ef4ede,57a2a9d8-5cbb-42fa-8e96-ebdf8203b1d6,ee9dcbb5-31c2-41d9-a484-5bd218a38b9a,36053749-ad07-4fd9-9cdd-ca6a6f956804,4d248225-4ae5-4a8a-909c-0f722641fe39,9107db4d-e171-4170-81a5-23eeb9522acc,6a0511e0-b80d-405b-a79f-f27f014cee7b,31bd7cf1-d11b-4465-8b31-320b21de4418,8b1b280d-11f7-4228-b9a7-a625be77b1b2,e8580568-8d21-4b89-9f7f-5dc8569c655d,fe5df4fd-6374-4f84-9bd3-b51c54af6595,7084c8fb-0373-4026-aecd-56252a18a893,f3a37ebc-73e2-42e8-bd8b-12adf649b81b,150e2650-2161-4ce8-9f75-f48687eb7262,4f14daf2-45bc-490d-af7c-562ce01d1ec2,53c7d185-9548-45ba-b11a-a70b8b1b5f37,9b6ede75-8f49-4cb4-871f-a75f4da7dae7,8a203472-3280-4df7-a7cc-02ad48a59d3c,e3da90f9-d8e3-4cde-862f-b7eba56fc471,9a8ad654-1a57-4c5c-855d-1939d2e537bc,38b55314-0a9d-4a59-8b10-5689dce68434,8764dbcb-8653-4409-828d-68987435ca78,6cbabb38-d676-4e03-9d63-cb53c2146520,96a548cd-b0f4-4932-b802-215bedac0e8d,f23e80c9-c80f-4729-b386-2f9722a5c93c,1b7bf5c8-4c67-4177-99c0-7c04c5ea91ae,b6703d13-01bc-4be2-b1f2-4dfef3624156,fb6ad2c3-cf9a-49d0-b83d-2d72fcd68203,05de1aa0-e8ab-4c73-8aa5-826cd2a4015f,ee825857-b9b7-4011-b544-c8bf9b4551fb,3d6263eb-a3f7-48d9-911d-e1ea553346a9,4d50c72b-44c6-4533-8044-b7199050aeae,3b5676fb-9424-4206-a619-b5db857215ae,a908c09d-a9f3-464d-b314-004a1b9c581d,820bdfa8-4a33-456b-9681-09b9b0f221f4,7bd98d8c-59e6-4b61-b239-180e5d34a678,d769fade-260b-4946-9086-f92f3abb4e1d,30bca3a3-a2c1-4143-9ef1-841d180caeaa,13df612e-3419-4418-9c8c-418302908681,70fe579e-9d51-4e0c-8884-cb4966cc0c7a,b5dd1ac8-8806-49cd-82ea-edefa77fa4b9,20ee3318-7f7d-4bfe-b828-2caba6488279,f9ef4cbd-bede-4dd6-97f2-811b77fdefe0,6222ae8d-c67b-4055-b4f5-5740a942db8a,bc675045-cf96-4fe5-a15f-b5e6ac17021c,91761ca4-5efa-4850-b3d9-5938625b80ed,f30c4fd1-84d0-4e94-86ef-95a02486199e,b7fce2c2-83ad-4db9-8d1a-d30850cd1270,e352d1ac-dc80-4742-8672-773ce10960f1,5f8d6a38-a796-4257-9c22-7d1874213037,77282f36-8a0c-43a6-9749-7d513d20175e,7347f070-cce7-40fd-a27d-02a05c89b992,86f0d1ff-3e58-4cda-9781-9fe65ef04f49,888b409d-e9fa-462e-bfbb-8ccd6151d1bf,67fb0325-3166-4cdd-a9fa-c570600e0a4b,e4664dab-7bdd-4ebf-810d-de724000bff8,ed16cfd1-dd94-4c6f-88e1-4f1b49cbffd7,06db02cd-c062-46f6-8261-ce3427f14e73,16aa90d1-2d2b-4126-8679-d8d54ef9401d,cdbfa7ed-c4a8-4b62-b73c-51cdd5476e16,a8f3cad9-6f0b-430f-a04a-19b8bdcb11db,931a5816-551d-4a52-993e-d5f617bb258b,61c4670c-fb4f-4649-b191-c620a559958f,5eee2427-9fa0-4afa-86cf-d21a479171dd,ba67bc1c-0d8e-40ce-bb44-2ed7a338141d,3865457f-20be-4b02-ad4a-2a15909609dc,fe965de8-4366-42d8-a3ab-f9e391e1f19b,134b4ebc-c427-42bb-890c-0b0f1cb93d81,59236b9f-d521-4e7b-b1d3-1f3d86e774a3,d5d0abe0-849d-44a8-921e-4d7e01acdd78,b2fa2d93-63c6-4c78-8bcd-82c9b2404975,b77c67db-1928-49e2-b6b6-a6a5d6e148b8,d4a42645-6f2d-4935-bbea-cefac534c2f9,0a64490d-c541-455e-bce8-ca7c1e8930e3,c85681e5-5386-4aec-8ebb-4936db536ca0,190dbbb1-a0e6-4226-94ae-d212eb05210a,e496813e-147f-4edb-806a-1af96fec38db,a295866d-381c-45cf-abfe-c77eef455673,b23abf44-c38b-4286-a89e-bccf19f9eefa,e379e240-dcda-4e7a-bbe7-0710ae6059f3,3e82dfc7-ee37-4a48-840f-ae36d7ad7c54,0bd6e00d-e70f-4d8a-8372-81afae7a10f7,80818a00-a757-452a-b869-9beb13962be0,f02b659b-a9c2-4326-90de-978d26ddbf5c,41568a71-e445-491a-b5fa-9c95f3d046d5,afd3df29-aca6-4cfb-b387-02fdb6af7f9b,1bae96fe-8603-41e0-8381-c0c43b993a2c,913f696b-5832-4362-9483-2d95fee8eca1,3239f62e-0c7f-43f6-8172-067abe0a2181,13f1ba28-a034-46cb-98dc-1b75a17d87cf,bd32a7cd-1280-41a9-880d-e0647ac1217b,d7313dd5-3bc3-42a3-89d7-1b1c87c6027b,f02e520e-418a-49ca-88d8-e88cd8556fc6,1a933f79-b538-480e-a93e-3db74c2784bc,35ffbabd-df70-4795-8311-7decafb4dcc6,511caf60-ec9b-4bb1-8ced-1f00336306f2,221cf03e-550a-4304-a315-004c2b394815,36640d51-cf59-4ed9-8888-7446a60af0c1,19ea60de-ca2e-4f91-923b-9c7e901375ba,91ff6dfa-f279-41ba-b0e1-d0b9cb1e137e,ed14a97f-6482-4e7b-b202-091c847de889,1ed805f5-35d4-428a-a8c9-93766678fa1f,68e03699-fded-4a87-a1c9-2d3c3deb9fd6,9de7f2a7-c7b9-4d55-95be-2913ecf1cff1,9e76b344-96c1-4de5-b348-5212d3892be2,3c13fc42-bfe9-409a-bbb7-27f1b359bac7,6a3995ec-429e-4507-8ab8-76f0cb6a54c6,667c88e5-f5e6-443e-bc63-b90a2eb34e52,be788011-3660-4c9b-9673-52077af37044,58c46dea-1636-4a46-b38c-624ccb330d2d,b5f8dd6f-5683-4dab-b986-1d9262253bf4,2159ac7f-2fa2-40e1-b85e-3e6b51a7d178,622d2bd6-6fd9-4003-ac3c-3da19b97bd11,6c8660ca-338e-462b-9de9-60a18c1a7c2f,60305d3e-0ef2-4aaf-aac7-475d1bae9772,7eb19962-ba42-4050-8cef-af2d251336e4,a198ec46-6a98-4548-bb7c-b3635ff4dab6,9d80b9bd-f129-45cf-85bc-7f03312485f3,e552a579-6c2a-45e9-9401-dee7d4be42ab,6c92bf7c-3ff5-4a15-9d50-4aa02208b748,9eab9ba1-83de-446e-b58b-4628a06b3a07,56a5ac5e-90c5-4ba6-a0e6-f48889555e6b,44d0d444-c017-414b-9ccd-c17bf1b5307c,80da5216-72e7-45a4-a218-e7c1c5e0ccfc,2f8d3bd5-d1db-4de6-afbc-f042a0ca6935,cf758c75-e179-4f7f-aad3-94343be80e23,46f56ee1-6c71-4609-a7e6-e96085f8fabd,da750fac-4524-4611-a4b6-c065b19fa0aa,627798f6-3bb7-4683-a819-6e8bbf5917f1,c3567e0f-5a70-4829-a362-6e196cc4bef0,e7932b9d-2ad5-4618-b26b-20e413f7cf16,6281cbe4-4fa2-44a5-bd1a-fccfb360c0d0,3579a356-c552-4f1e-8ce7-4c14fb9906c3,67a93628-6679-4f1d-abf9-715dd9c860ee,d8de091f-e8c8-42f5-8c6c-3b6a3d1befcf,920c5a96-5564-4118-9aca-97f0f40f4713,6c999f74-4e8d-402d-924c-f5ba88a43734,4567e291-9dea-428b-9516-db5811ad196c,81a02c25-7c6a-49e3-8cfa-163ec1a90f82,7d3d4db6-b4e2-4794-8f4b-133a047974a9,cbab7cb0-ebda-461b-80e0-ed1e4cbdee18,ea4896f0-0653-4ad9-8c39-035e3155fa29,56d949bc-c9ec-4efc-ba2e-891f57e1082e,4bdc9b89-2752-4f44-8f6c-28973c19b3bd,6c8bbce9-3f28-4542-a8a8-d3262390fffb,3553f750-0506-4835-a57f-2d7ea64f48c6,351a6250-a4e6-4347-89ca-4ed9f4658217,c9072ed8-0494-47e8-83a4-dd5b5e73f459,436a872a-4d1c-48f5-9267-2f03634d74bb,759797f0-b94d-4433-a506-6be323cc3ae8,88bbfe6a-a788-48f5-a366-745f04ebfdec,2ec976b3-1d64-4998-80e2-a4efe18dfc90,4fbd0914-e4a3-4dfa-85ce-480d1a43a352,168e2b95-c940-48f3-a30e-c90e2c0b002a,4a95886e-8e50-4719-b5ae-2100aba85484,cb07351e-f772-4bba-9105-341cb35c639e,7e21281a-e302-4790-a525-38731df908cd,005ff175-a614-44ad-b4d6-8094458ecbf6,10a7a0d8-6f52-4217-a16d-99e42d2cb988,7267dc9f-11c1-43d0-b756-1027d8359b17,0f18f2e2-7156-4d81-842c-854fdaa7fb52,f3d47c37-da77-40a4-92c8-f475e78acb16,e749214b-ff25-4c07-b59a-bc120dde394c,6c07e402-440f-43df-afd3-47fee94b1f1d,aaac9bee-a1fa-4922-ab45-71de0e173a40,a2a909f9-1922-452c-9c08-197a25308918,54fd8c0c-fe28-4878-8be4-d6652928ffc0,cde3b08f-227c-4af9-8a61-4ba1040b4b51,e467caba-51bb-40a1-b58b-7659e98e3aaf,e9880fde-1176-4cc0-9d45-403628286bfc,383b3a9b-bc81-42b7-ad30-8587b976e757,0afca9b4-7b35-4254-9e5f-22e033c0c666,47991372-3295-4e61-85f7-6b8660c92628,de1abeec-9334-4fd5-a65a-b355e82da8b2,cf83a747-4373-4008-a0ea-aa56f60959ab,0c6f7bfd-98dd-4895-a3b1-39f92f11585d,24ed06f5-1425-469e-bec0-74f6fd728652,4dc99a60-f41b-46ce-8d5a-919b0954ee0b,e7d5684d-9208-4a2e-a567-f160d1dc2749,e5343207-c552-4c16-b817-8b7cd82ae80d,4b9e723a-af1b-49f3-878e-038e5a040a25,a7cc2732-57c8-49af-a26e-45fc43afa859,ecc44444-23eb-4c76-9fae-b669ebb2834b,fded101f-ff4d-4eb4-90ad-49b2ffddea06,3205251a-0e30-45b9-aa7c-190501ad24ce,d94896ea-7726-438c-809a-849067792b81,a1639689-d420-48f3-88c1-6567baf9c1ec,dd1695bd-dc90-49bf-b77f-891a5716889d,04e4d8ec-48e4-4bf0-83b8-6c83387f8738,cc8f26df-31d1-4a55-becb-6c9e1b703a8f,fa3dd6ef-876a-40b4-9f71-b8b63099bc83,83aa8029-0141-4f41-8906-606b86b1afe9,473ddbfa-27f6-446f-94de-782bc7f20d61,4e7b3c54-3e12-4e1d-8f4c-6323e6ad4a1b,d0f03d38-6d3b-430d-ab93-5761aefc3d90,8cc44384-1140-4c78-89d2-da57ad43b6f2,b44c06d0-69f2-4e36-a3b1-33a9b4dc9ba2,e0429a25-7428-4ae8-98be-7f082e9bbcde,7324a125-067c-45cb-b583-e0a3418b5b7d,c5323031-9291-405d-ae3b-936c5b4dfe86,4528d1d9-55ad-44c2-9041-1f9d61f7271f,557c5c31-5d1e-43e7-ac6b-dd362d56187c,2dafaf91-1466-43ec-badf-bf4cc72fe412,b658aacc-72b1-4690-85ec-e79032dc238c,8a7958c0-b322-4c53-80f5-6ae70c2b8968,b187da1a-b425-437c-a880-ae4276a80947,d5039f89-57f6-4e11-b736-8cf73bbfa7eb,0582bfae-0167-4ad3-b326-0b9b2b8ffe25,6aebbaf8-6749-427c-bc51-40ac51be290a,eaebd658-1b81-4f5e-b650-03f121a009f8,b218bfe4-5381-4322-8e6d-2757008fc531,5ab006d0-92f9-43cc-a501-25f2fe6c1196,95dea66b-eff5-4585-b847-4813e2ea1a63,3c1b7f9a-b0e1-4d61-b892-5496b2f51966,8a378d27-c267-4c43-b944-748f02834764,cbec302f-4bbe-4cc1-83ec-fb98c8b4fbfa,7e409d85-c594-4d74-94ae-3ed83b312bda,44e66303-44ec-4e9f-a245-68e3d6ea397a,4ec36c86-513f-4923-a591-a4b164566209,3daa89ea-f14c-4cf3-95bf-de71ce8ab327,72f447db-8088-490f-a844-07176b7aea1f,5e0b0697-1463-4cd3-ac5f-e07e9588be14,f8681c25-1604-4040-819b-78cac6899746,1c7e9835-2cd9-47f7-9ed9-9901e8a7e5c6,43c854fa-1537-4008-a733-f5e3f7578aae,5d73a46f-b725-4733-9932-11b9e8eb17ea,23f8982b-85c7-4028-b854-fd74bb8bf2fd,66305bad-760d-41d4-8675-33831d10a7eb,d46765e2-3173-44ff-bee8-12f35d9d6085,31860a10-d28e-4e33-8ba6-ad82e1146574,7135de9e-3028-4e4d-ba0b-f14a13a8f9e3,726f8238-b76e-416c-87d8-d2e6d395ad08,931ce0ec-eff4-4b19-9222-25d7e16f0e79,f3b7bfba-2114-4477-b955-57832b71ec87,d75ab251-b79e-4788-acfa-1c73bcda322a,7461467e-e8b3-4560-a0c3-01eb56592d35,2dd10bdf-c76e-448c-9adb-78eead645ba1,5bf4f19a-8875-4fd6-ab8f-ec546047d684,ce939094-91df-459a-8439-4c8b70e3b51a,fb690527-2183-4532-940c-7cc0811acec3,ed037823-861e-4fe4-8738-ec944393afe2,b447fe4e-e4c2-4b61-937d-3674aaa7e0fc,2023f669-e0cd-4bb2-98bb-786183a393cf,b7720c9f-355e-4a46-91ce-b54132fd685c,bc2436a3-8564-4638-8cbc-200bbff87bee,b39ddff6-b70b-4b11-b27b-3b30507e9720,1529be61-1722-41f9-8073-21492e6a3eb7,f3633d7b-5312-48e1-a07e-15c334da3c35,4dea2848-50e9-4cfe-a4f4-2e629a2cc114,387ba5c5-8ac8-4ea2-98b1-8aaf1bc44cb6,ed1471b8-68ec-420e-ab59-b3628dd0c5c3,222916fd-e64e-4539-9f36-b201ce0c3b1f,f046eb86-ccc8-46ee-968b-11a204f53002,269154d1-086a-4080-b1f8-879a03ecf62b,b7ae902d-ca7f-4427-a973-f188e6f95214,aac9b95f-fa64-4eb8-884d-08ea4dfd5420,c77efd65-4d2d-490a-985a-849868e833ae,a5cb52aa-4d79-4f75-bd33-5c11d81edd2f,85e1cbbc-d578-481e-8a6d-33845dc7f3d2,6f2a3a85-57a2-406d-b4fb-d41bb2dca412,95122f26-6508-4337-a93a-8c7a3f281864,0ba24e47-5122-457a-a7c8-099417d45c7d,462bdb55-a242-4bb8-a2b5-db9cdf6d2a16,15e81e5a-e5ca-4ec7-8ccd-8b03434a6495,06200e09-3968-41d5-89c6-f8dc0a4e9f9f,44c2b5df-139a-4e06-aff3-b75f6717d394,32db5408-c653-4150-b120-231e3a21bc5a,d2728632-31a6-4c95-883f-5506b62b6075,7ccadd05-67cb-410a-a9c2-a9fa58418389,c4b80e10-2ca8-46d8-984f-bbd15cc14344,2ba381d9-0e23-49a6-92ba-80de4a7d3f72,b45afdba-dd2a-46bc-b809-f4f121e877ee,cadf5d3c-9cb8-42c2-8593-a46a59920926,f184ae25-1077-41a1-b13f-987e49438dad,9aad0399-de95-403c-8ec5-25f4d7a9ec36,a6fcbb6d-4c64-4007-8e90-6458dc2e41c6,9971200f-828c-4a37-8214-8faefd98fb1d,38ed69dc-1c41-4037-961d-c5d944441861,20b25d34-7bd3-4295-8432-9e7fdecd9e29,a9356d13-0a60-4d41-bdf4-21771af673c5,a0bc4cae-f633-4ccc-b338-bc5d0bf43785,c7afbbd2-987e-4904-a1ea-b2faf9bed082,3106c768-5ba1-478c-8049-0cff47b32c44,f19f1692-1f4f-4bd9-8ab3-0c56e799a371,1d3e2a30-4592-4437-9726-504bd29d7676,8912374e-7397-4b1d-8f8a-540029705e78,355d2218-ca1a-4dbd-94d4-88eb2ed56025,f684ad09-59e6-4ebb-ab3a-f48f2a22d9c0,775c0a67-67e6-4e18-a2aa-a89e23132777,9b00048d-806a-4668-b055-506d66c84ad2,d5f371c1-92fa-495d-86c9-682b325559f1,7436c1bd-0aa8-408f-b9ff-a080d3ef83f7,404fed4c-23fe-4e72-adab-6149b23429d9,a673668c-eaa9-453c-9894-6566913d9e06,8df0df82-fd51-499f-8f45-1bfdf617e1d0,cec2ad71-164b-480e-ada8-f44584afb2e2,d3f286c4-5ee1-4c18-9ef2-b12c463ff05e,3539dd2a-4ada-46b5-800e-ce168ed7d2a5,68ca116d-1c24-4e64-83c6-7cb91d0a82fd,73bf27fe-0985-4d79-b2c9-34d3cf5f17bb,f66fb700-4df0-440f-b896-1749f67b96e1,49780085-6e47-4fc4-bb96-3a4399bee547,07b21161-d76d-4426-9ccf-763e614c0094,5b0191b9-d119-4980-9e85-186075488681,8ed19496-204d-4d76-9832-a24900bc621e,cecf2a75-8178-4734-a0d1-d6557d3cf955,8fbc5d2a-7bfe-4bd9-910b-2c36033ac04a,118aff81-fd3b-4b89-b7d3-650d7746841e,a5204c7e-ead2-47d8-bff3-e625951f8701,bb4c4059-e982-452b-80e4-4971d06c5864,dbbfb6ba-a4ad-45b9-ac1d-c70e0a6bb07f,e2034aec-b8f1-4f39-ac10-c0415eb35bbb,ebaa7a70-eb4c-4fc1-8eb8-65615b6b765a,862a288b-4efd-4899-af05-86b99a28b570,09c9c59b-b203-4350-90aa-387d51c8eb2d,ff085545-ddda-45ae-b081-3ad21bf7e1b3,f3fbabdd-542b-4aab-9c20-d0a4060228c6,99c267ca-20a7-4ef0-bd4a-48f5fdb32ca6,dcbd8920-1150-496b-b8d3-a62b621bb949,a561bae5-a66c-4a19-b5da-645cb4889d6d,93325007-ed91-4226-9b68-a9a80c7a954f,22da2dba-fbc9-4120-8c0e-a84dd1f29d20,8b7c5c12-ad9d-4b07-841b-72838e562dc7,2736e084-ae43-48a4-be38-87d2c4d4ef49,4144ab11-f8e4-4c7a-8f9f-9839b4e27966,ae359b6a-212e-44b7-b416-3999e0256391,8d9cd1bf-1054-48b5-8abb-1c8a0602bfac,4f59acc6-8822-4691-aa7c-bcadf73e7ff4,2e92dc5e-9b17-4891-8b89-a84c87d85f74,294c3c08-542c-4f79-b4b7-a0a5da69620a,b5dea55b-4c99-41b3-9e71-9225d7c95994,01bc1c1f-b7ac-44f4-8abd-330a6d6ce278,e6684cf5-851b-4057-9a9d-d9a3a6b43bbf,8c954550-5840-42c0-a9a9-cc47c24a5acf,87e44556-ac24-4e39-94d4-c5e464bd54c6,b174cbe9-ea26-434d-9eb3-89724144c3f8,081b98d8-4a72-480c-982f-72b37fc40a64,115b85fc-3523-4d5f-b099-4b44bf3d687b,2754866b-4bc5-4427-9a4e-2234eef863d4,bd9afbd0-8fa6-4b1b-899a-722cf0dabee8,82e40aee-5d91-4259-888e-c4a05fd5b5b4,4924c4dc-73bb-4c37-b51f-561a6b3c4dd8,d95e4873-ea5f-43b7-b8c9-e7226f9eb476,61c6fe3c-8e6f-4c73-8afc-3f8b67b9afda,28dadb95-83a6-46a3-a063-6d7149a52982,71817406-54d1-4d27-abec-82132d7e362a,16003d16-7aeb-4d1b-934d-157a37dfb4b4,2f8e1646-ce8b-4772-afec-e54a42825ce1,03e72222-bb04-46f7-8b33-2960fbca9446,e5233fc9-332a-48f6-acd4-01101ecc3483,f2674c93-c9cf-48f2-96a1-22e9883dba4a,a7349a1e-66f2-4529-add2-771fc1bf3712,26a64985-0c88-4e8a-bd9c-05eec47dbb67,79e52158-feb3-4fde-a914-aff9dfc40292,2179bd5a-a5f7-4797-8118-e922098dcea5,313de9c6-fd99-4b64-94cf-03d6c20e7294,acae27db-f36c-4c5d-8082-9359f1935796,cca7c3ba-6e51-474f-b709-b60d81d93ec8,853d53c9-5937-4daf-b3db-caf1981ec989,2050f667-e039-4c20-92d2-3fb4d036a42f,4ae67811-8a5c-48d4-8063-387434c12542,b3fc6522-e95b-49d3-80df-11026ae9c0e3,3443b7ce-0610-4498-adc2-33c531b71e27,be6ace9c-fcd2-4718-aba9-641439ee38b2,bfeb06bd-514c-4d6e-94bd-a1e4b3bb24b6,cb9cb9da-1fbe-4dd3-8e6e-e01b4b79bd46,a1797c54-b138-4e3b-ab14-288daf0710ff,f4db336a-daca-4700-9523-37fa25578837,c1ac16c3-ac72-46b3-9c5c-102d484f9920,bca37fc0-d313-46b5-9f86-f481115c2d6f,b0d0aa0b-0a6e-4c36-b41f-91bad3f02578,320e6e24-bc36-4541-85d3-978c8167dc78,4f52b042-5923-4718-b68e-641335d9c816,935d5ab4-98b9-42b6-997d-aa035588e9c9,81ea1d58-cec5-4e9a-8993-82c0f569116c,1ecac928-149c-4509-be3f-e7db7e6cac72,d55cb031-840c-48d4-a6df-7a81243f7629,db68bff3-8604-4f87-b73f-089d72a0fb8c,4a056bd9-74e9-429b-94c5-5a48bd9be5c0,c9b5799f-c401-44db-9516-b1ccaa5c5ad1,7099fa2f-3d13-4dd7-8753-1f7a03ea1541,5f9c9886-02c7-482c-bbf8-7f61ce06b478,e6f553ed-5cbb-4dd7-b9a3-36176cc69084,cf4cc3b5-0941-43e3-9c0d-d6535531e57a,561e71e5-5b23-4b91-9aff-95287e98a4f4,2c535b0a-aec3-4549-b1e3-f1656bec07ad,452157d5-8369-47fd-94f3-e1369848052b,10a98853-8530-4877-abcf-e1162cac405d,aad21516-d6f8-4375-891b-66731239758e,aecc0dc5-02c7-43c7-a37b-2d4f9ef0b775,3c63644d-52e7-4cf9-806f-d8cbf7ad8a0b,9e599b7d-f85b-411a-8b5c-49bcd26fb120,049ac777-9804-44a5-b597-77c58ea78f43,318a2309-86ea-4f1e-8dc7-b5d743b5e974,7851fbff-d92e-4cf6-98f0-167b9871b3ba,a08250ec-400e-4540-9f5e-abec2745db92,f4a8aa17-90de-4394-b6f0-a8d00a254dd8,dd126738-d4f0-4d6f-a729-8c9b9063f705,5e786397-3a2e-4f72-b38b-54ec1ec9e9b3,45a0b90a-2b35-42ab-b3dc-71e54ab6461e,d04b0f19-9f20-4f65-9ff2-54cf30b32c74,1af96d6b-7dd6-42d5-8057-77e1eb9f8eac,388dac49-948f-4944-8427-54e5ba33077a,3fd6f922-2a0f-4be1-be2a-0f5e54c77be6,e908d82e-3023-4649-9cd1-08160632136b,014c408b-b7f3-4660-9329-8eb376e02a80,b9624caf-6690-42f5-99c2-65324082c8a2,ddfc1ab7-ffb2-4f4c-b204-789ca456d6f4,38012d17-e439-41d7-976a-e8be810d5ea3,41a08e71-1b94-4427-9667-1c2323d559c6,149f3b94-1f05-4ba3-954a-647b6d010d5a,5ee84aaf-a9b8-446f-a714-a05d5dc2a886,f91b877a-6adc-412d-af9a-6fa0a1d8fea0,da2ca1bc-2e82-4497-a79a-c1d29a642540,c708033b-beb1-42da-bbf2-6be203b2a5ad,88545017-4c4d-46ec-b66b-c9614f78584c,4a41eed1-5f1d-42b5-82fc-9c48505bf678,1d311b68-448e-49b2-b951-4a9b0f73441c,a5dac469-5e49-4f93-8423-9dc44b8c0ddd,01d58399-b86a-41a8-a1da-b02d6f199b06,e0511b41-2f6e-49ab-b542-da1a957f5a9d,00bd2cb5-3c2a-411b-9895-aac9290eceb1,dee366d1-6b74-4b48-b4cb-309f491f3e59,f384a1d1-8c00-46be-b7cc-24dfde08c118,caef97a5-2ac2-4585-8de9-20e65cfad15d,1d3ece68-5f29-47e5-a5e1-14e83c0ffbcc,bd361ac6-ab6a-4b46-b8ff-55e140b96628,636a70aa-0845-4307-99d8-e34b30478836,9e4d4e00-fcca-4d72-9462-32762dd52932,36634473-7b36-4d64-9fa7-3a5cefeec673,6ec974be-6c6e-4c62-a77c-b5f5da99e077,1b3a1027-1a49-49b9-bcc6-e7b1f717fb17,cfce414e-49a5-4db6-829c-1e7a690965be,2dae1d27-dfab-4bdd-9bc2-0cb823dddb8d,e9ffc7cf-6330-4b88-a4fa-e3fb038b5ebf,be6f1ad5-9685-4c78-b5d2-ca67e68400f6,d974a842-502f-4851-88e9-7fa299da115c,5323c54e-e902-43af-bc1e-10e56853dae3,69a55b32-919d-443d-8d18-90b0dd36b0fa,15f53c39-ac57-4287-85bb-1727eef70c97,b4f10c65-2bc3-4306-8909-191dfae17a34,714985eb-cadd-4a0c-b79d-f69bae00658a,7ec20fd0-9589-41ff-80d0-1379b377258d,6aa25017-0045-49ad-8504-dc011883155d,98e1fac5-92dc-4d9d-83ae-9f1a07e2aa4f,10a4a11d-2b43-4705-b535-7257810c37ef,f0f56d68-575d-45c5-b03e-d766cc879e5d,3dc59c76-5b83-4bb4-baab-9febe4ad12e5,508f29d4-4afb-42fa-912e-b06808a1c484,901aec6a-99fb-453e-b976-111c4094f69f,a9f0536c-4e3f-4478-bb47-5386185015a2,d8c35680-8a42-4501-a87f-c7d592187996,e0545ccc-1885-4837-9ac2-bb4defe8effa,e624b33a-2cb5-45e1-b394-eb60c1b93c69,0ed0967e-03bc-47d1-8f5b-7ab0354f661a,6c2dda7c-da96-435a-9168-507efbbb10d0,0f3c2d3a-e554-4a7d-8b81-6ab9e6e666a0,c86ad5c9-81e7-466d-a0bc-41ca472e13d4,f4475536-429b-4b08-b846-fee83865f8ad,155a6ea3-b56e-4bbf-a0f0-553a35ad70bf,1d521a54-ab9a-4b37-aac6-a7d24f1ede99,0bbd9605-c2c9-4100-863b-3158dbd1cccb,1a62b904-cb0a-4c6b-adcf-f4e772be208f,47225407-3a27-45c9-947a-2b5a07d5c7ff,bd391c79-5e21-424b-a682-e72be71c7fd2,51764e71-df7a-43d1-8ba9-27b8c29f3e6d,50f3ad65-e4f6-427e-b3d5-1818ea7f690f,e71372ce-45b1-4e6b-b906-18ef9d095b06,96b82975-2593-4f17-baf4-f600bf258bc1,f471ab23-cbd4-4faa-9701-e02506ecd12b,efee244b-674a-4f69-b1bf-cee5876cb4cd,3821add0-bba3-48f5-9ae1-26a00c6cb966,17d4df1e-9277-40a1-8606-0b3a5c6492ce,98db454f-5119-4f2e-a09f-43705eeebdd3,8d6a8fa4-f21c-4ec3-a8fa-29558e079137,42086664-ac55-446c-9cbb-e4c58b47c1ba,f7f3b7f1-6a2b-4b1f-97fb-0c9673e17d9d,2fb8642d-f627-41bb-9bc1-93ecb08a7863,3af8f803-4045-403f-adf1-1df6d478aa97,be7a7580-a773-4336-bb1a-10e2b88227b6,07c912ef-6970-4bc1-85aa-5b41e8a7be3b,ce8551dc-5aeb-4591-860f-2d6ef1b624df,09d165ff-e35c-4dbc-9fff-158a85e54e03,b1681e4a-73e5-497e-a0ac-75f483eb7dc3,d4b21a06-93c8-4371-a07e-1e5c63037d7c,7bcdd428-2d13-4c72-89c3-79610a32bdcc,e1a96d6d-61bb-40ae-80e3-08d1bb1061e9,6de268ce-51d7-45e9-bd10-cc112a4b3079,a9c57fdf-ca96-432b-a722-11ea189fe063,c09e5812-20e6-474e-b4d7-21661d5f1225,daf5da4f-e8b7-487c-85bf-c3a3e51d600d,75ec36aa-5e63-404c-9c3b-fb10d4b69da7,8d93eee2-74dc-47ed-8522-bf50a102d095,5e6c45f4-bf74-405f-add0-75711244352e,2d136a95-59ef-457c-a8b7-36ec629a46e5,9921b505-11ee-4aa9-b787-6bd9910422a3,fad8fb53-8039-4116-8637-e25d255843a6,44b06fe6-fe20-4c28-91ae-6bf37d37f44f,0ce376d8-417a-411b-a6f0-eeaf434155e4,3ecc36f0-3df1-4f93-9738-dec06a765817,09819c97-898d-4502-9aa8-00180223610c,81153ec8-9eb9-41bf-b00a-a44235881710,09074855-19e7-4930-b570-f11b878b8dd7,ba617283-93ac-4e0c-b577-3813e8d69d34,e7354327-f478-440d-8fd1-89eb8e796632,d04bc487-dec9-4d62-8169-22e81cdb9cdc,2f7e83d9-e7f2-43a2-bd07-50e27f1be1d6,8e37bb35-ceb8-4d99-851a-30c0460aff5c,4b0313eb-2c49-4525-86eb-70eca10e02dd,10532aaa-bc29-4141-972f-8e37316165fc,4f454983-2d67-4797-a475-dda44e94e8d7,2e505b3a-88ba-4997-b002-600e9d31f82b,474af56c-aa5f-4db8-a122-e43a6ff133bc,a2dc263d-ef37-4370-bf9a-87dc2ee4dc05,10573852-43d3-4f03-95fe-d280578b15e4,5d3d1a29-0a55-4da0-b569-e68f3945559d,e9e8ec43-751b-4144-a611-d6714bc3ddee,2e116761-99c6-40d4-a814-01abd590374f,687976ca-bb6c-4fa3-9169-39ad57ad482a,06e185b2-6aa4-4b48-8220-b3049b3ad3c0,cc5d756f-7ad8-4352-8d2f-e6a31560d616,b79ea59f-ad2e-4548-b5a5-a9c330a7616e,2157241d-13a0-4d2a-a745-1c6c3bbee74b,8c7584e2-e2a9-4345-93ae-03acc5bae71f,8450819c-3970-4b07-81ee-46d54c8e200d,31acaf29-94be-40d0-b138-34b0b5de9a6b,5485fc1f-3fb2-4255-939e-96799a501538,4b9e983f-93fe-42f9-abcc-db88d239d3dc,6a3eda45-7c92-4cba-aad5-0669fb64b3e6,87454a12-20d4-4fb8-824c-c250742dac31,994e2899-316c-40a2-9764-814e38c649e5,458032b0-b1c7-4ed0-a475-61bafb209c61,6e1075e6-73c6-4699-aa93-86f44a767e7a,b3e5f71e-e6bf-4237-9be0-d81b2177f9b9,bb8bea88-cd4e-4627-86bd-57e8c769e805,64256455-c573-4c11-b574-fef612b463e5,441e3cd6-ae79-4e4c-a308-d9a1221c8d66,56f8bd3c-0b42-498d-99be-94cdedd4eeba,c7973dff-738e-4612-bb23-d3fcb0bb7994,f424fd7a-04b5-461b-b2db-0a0ccb0240fb,d1443a83-9857-436f-805f-bd560e924f99,c7b26037-7ff0-491d-b12d-55fd6de3e1c7,6d79d6bb-7f23-439a-b831-3080d9062601,14715b10-940b-4a1e-b7bf-043898b9adba,cc5025bd-e396-4bd5-ab70-8ecb8060caac,5c2c5037-fb4a-4509-9f5c-09e6f97007dd,dbfe74eb-8714-4aaa-86a8-8551a345507e,6e44b949-c54c-428c-a5a1-8ff5347b4153,cf1e63d3-21ec-4cc2-8289-449974a424d9,075893eb-506a-4596-bb0b-6d4a2a0b1c2c,ef60ff40-8c0a-4cd1-9c6e-6b3532499eab,5d16f574-9547-41df-8832-1da2bd141a97,1bf0197f-51f7-467c-b9eb-69dc4289270e,a9fee276-ab60-40d4-a08c-742dc20f0c6a,06811280-4987-4da1-81e7-a54e8953f652,b2110206-65ad-4f21-9f0f-05e47e727f9c,8cfbc91a-5d18-48b6-b56c-0ce5755af181,bd24b5af-4f8e-4da4-92d9-3ef643cdb837,a623b1d4-d431-4b1f-8186-da5fd7c07cc5,ed563724-03a1-4c72-a0cb-8ad3df1c2a46,e7e07d16-3eab-49f2-be2f-77370de62098,edb3771e-ff53-4c28-8d9f-0ce70b4a6e6f,e8ac5075-63d1-46e9-bc58-5fa379bb9e16,5f0ca5e0-b03a-43f6-882c-d53e5d0aecf4,b6398e65-0ad5-42f6-9e64-00020eae9056,38b16dbf-be56-4921-aeb2-ea9421417ebf,54d88f73-e744-4936-9947-74b0feffc9e7,0d33c759-11b9-4ca2-bd57-2ffbb757f0d2,558316eb-24a4-4d5e-bca4-6da3ddb4fabe,a3fe69e6-1059-4f84-acc5-5037c1547086,d88af4b8-f165-4dd1-be05-4cba3543e073,90134568-229b-4898-9759-6646a2f20bfd,f9a0cd7b-41bb-475d-80e4-7a84e4a779f0,c9efa550-8ae6-4771-b25d-25e5ca800030,0c9495db-7481-45a2-b1a8-58636b54567d,cc69e646-918a-4251-8783-fb45f390f0e5,da2c9a98-e96f-43a2-9555-c5a25c1f6453,53484a0a-e0dc-42b4-b250-a1e1227a0ef5,6211d903-f62f-43e1-a091-4baa0d7f3254,8edbcbe2-8ff5-4f3e-ae0f-f1471e5c66f4,cbd31747-422e-4647-897b-142a5668c9e6,52c783cc-7b69-4140-818a-61a44fa9f432,15b7982b-df8d-4c06-8f2b-18241186dc1c,0c568eb8-b7c9-4ae7-b7f1-e2150a2a66f7,e1a75adb-4723-45e9-a6e2-51599b3e3a94,de3c4d91-8629-40cd-8ac0-1ec0666fab75,ba2437d9-2b4d-4240-88d4-ab77e86d133c,e5609ddc-b93d-4173-a750-466760e53644,dd3cb368-9b0e-4d30-92ca-f2ff0578e966,2a7f6ae0-876a-4d97-a4b6-5ba63566b767,bcc9bd9b-2af0-4f48-bad6-65ab521dc1d7,7386ed17-e5e8-4f0a-802b-8dfe07a8ece1,8c90bbe7-e77d-4e28-930f-a96d9ab5a57e,69e342ef-d335-48c3-a3a2-233adb9a65a2,df8491aa-0178-4c7e-a749-0059309266ec,99f930b1-ba47-4270-ab10-b58513dd74d7,ca6c3f52-467b-42d8-9bf9-67582c6ace4a,96828fe0-e8f0-4183-b115-d622f85afa11,18a36271-25db-4bac-adc3-fdd24f3d3da0,78b6c989-82ba-4c89-baeb-900881d77ffc,51c77c42-e499-4dea-9465-e6b8b71c709c,b08e41ca-4331-42e6-8f00-d1887a0ae6e2,a09ab7b8-412e-4f4b-9809-5cff35495dcc,a21743f2-c4a3-47cf-bb1f-b672b3da4c16,cf764254-f7a9-4092-875e-13c9a538bc20,d0d01332-3d98-4db5-b6eb-5d04c40997cf,1ebbe3a5-0910-443f-bac6-e47f00fcfdd5,69d99727-568f-41ff-9857-012cf85c0a8c,6f311fd7-1156-4d76-81df-fcf4aa66e98e,4272379b-9baf-4164-a4a2-91184b03f6fa,9ca3205f-29d5-427a-9d7b-c345db4d03c2,8e27ebe5-63b3-408a-a7c0-226e92224d44,8b4340bf-7ce0-497a-a1d1-e30c5664d2ff,b20778f3-2a62-40ff-9c4a-053bf7dec559,b2ea0f79-cf0b-48b4-870c-074c0d1cea72,bae66a7b-07c0-4bcc-8620-867ac9b8d000,7d8dbdc6-40a3-45b3-a0be-af2d3c8d4e79,165ea8a3-de23-4cef-b194-233beddfef53,3b113089-0d74-4931-9498-63983cecb2a5,7cfb3ed0-5faa-4aff-9f30-c95323237c66,97509ddc-4fa6-4619-9589-16978f747f36,68f7a735-5c29-45c9-89a5-7957f61a1e23,b406a58a-0a4f-458e-8686-b248adabf871,d4779453-89ac-46fb-9a9b-e16d19fbce12,e8aaa483-d3f3-423b-afed-334d0ffe6719,7a703b84-dd48-4819-af2c-1b0d228afa36,16b7f900-7f2d-41b9-a34e-1d856f812286,de0d267f-f64d-4f83-948e-e58996d9c093,afb60a0f-1588-4ecd-bf55-9d6ce8bdd3c1,ababa038-9d2c-4e79-9294-de338d586a7b,518707be-d709-4470-a21a-ebb319e78a62,57f6acad-1905-40f1-bfae-4b3eb420ef15,13a10b1b-7e82-40c3-a2cb-ee85012fad08,20b28f6e-36d9-43f2-b407-f1062ff85647,7e32038b-7a62-4ac2-9d88-6dbdaf5e1fbe,5ed9b69f-a943-4f7d-8c8b-dc3126145745,796edfbf-dac1-4ac3-9641-06089bd721c8,c6f598af-c92a-4229-bf17-7af5d48e437b,2c35e532-b7c7-4a31-8f01-3aec92cad6d3,6ddecbb6-2b04-4470-9eda-cbeff24384e9,d4a90d20-9505-422b-af13-57967972bd4d,555c8ae5-82d5-4301-ba7a-ef1fd2367637,90851122-abdb-49b4-8792-fd538c5ad4de,5c55e643-82b8-4ccf-b9df-fcad564e9040,7348ee1d-bb19-40d9-ac8e-e349b5f250df,9eedc7df-196a-4f81-8b4d-24b48f52016a,04d2a802-dbd3-48c9-86de-f636a1acd509,07fecdcb-b54e-4144-8b89-e626cde4e9ab,504987c8-551e-45aa-a623-c8865a2c1fc4,9ddee713-9f8b-4b20-9ec3-9dda786382c1,f325321b-18b7-4a59-b798-3d2f14d840d6,662be3f0-d911-4f0f-b44f-b44efbdead6f,266f51bc-90e1-4771-a45d-88597fb82209,4904f10c-7645-4d95-bc38-9149d3db6b6a,81bb992d-5e76-4d71-b02c-1f4d9da01ed4,736f8d8e-16aa-4c1a-a510-eea2d4ad6c74,643a6e18-df14-462b-9c3f-49882c41c819,ad67bb11-1921-4af6-8357-0ac059861878,480dc587-22d1-4dd8-9805-7354cd572c63,0cd5d0ea-37cb-431d-89fb-31da0e0e05f3,6bf911a8-9814-4d6f-bd5d-1e32bacd9e8c,14c63dfc-d4cf-4e40-90a1-5a72a6490284,d8688795-e3e1-48e4-bb58-97a6dd60d920,25f65758-47a3-4c19-962c-92dfb4893689,df0d2075-9ed8-4295-b14e-e3c8e93c8ab7,dc7f56c3-e92d-46c8-ba9a-9330b5a9bb63,99cd2dce-224a-46e8-98ad-8b6051d06e2a,63d79348-35b6-4c92-84ef-498ed33dde9f,2ab7ba5c-6094-418e-b062-3124f504b2ae,ebd2c71a-25da-4880-a0f7-811c01d97a76,e5c646dd-9f08-4dd9-90fa-84020114798b,e8a0a1dc-ff89-4a20-a6d9-7cba1ad574a4,e62ee2bd-ab72-4a5a-93ac-b1c38d0ab556,234cd6b8-6f3e-4079-8605-64ca6da5f0d9,ce253fb5-497b-40a9-aa2c-787fe5874ca4,045c3574-c9e7-4564-b5cc-9bde4a2841e9,eef075cb-a066-47f3-a560-a14aaca15220,1ddb68fc-d6de-4a45-afb1-34abceada408,588bb9e8-84ee-4374-b82e-b5b3a1b54522,633eb98e-0d1c-41a4-a8f9-895bdc680b44,447943c1-a4ee-47c2-987d-e96819bbf12d,82948125-f949-464f-a10d-8e4e1c17ea2e,b3669c67-fc68-4c8a-822e-64661b481ff8,88389ac9-a9cc-4e48-bc9f-708a43d9a303,5956d3cb-83e6-4ead-855e-1f9eb9ab78aa,ab0be2c0-fed4-4d3b-aa0d-a577ff964f8b,ae12b098-676e-4400-ba09-a3ce01471292,7e51066e-571e-4849-84ff-cc49eea7e7e2,333e233d-d073-4707-a6b4-3ee83dbf9649,c9534bde-a431-444a-a0ee-30e862866ba9,0bd4c063-1229-43c9-aa53-5c09465d7b7e,0973ba28-f2ba-404e-be77-f0aa4ff4e50f,e74ee113-1709-46fd-8b93-e0a6653bf91e,35365822-fc67-463b-938d-128c77257720,48713744-879d-4ed2-a44d-f14fd3e85f4f,33ed3355-a10c-4962-bbab-261e86fd3bf7,a1837235-8486-4bfe-9846-5fb4727c4e7a,7094efa9-4978-4004-89fe-3de48eb832d6,a863ccfd-383b-4701-a62e-91a65567f88e,1310786f-96b8-476b-9807-4d476fa418ca,00549801-f8aa-4c1f-b73e-8387a34985a8,8296b21d-0a14-4231-87dc-cbcbb683be26,9175c30f-1e08-4fa8-ae5c-344f7a78c139,f69d4f04-1e2f-4d10-a95e-c4e27b732dc7,3c1cd2cf-b3c1-4949-8c7b-095c65c5d55b,eca9f9fb-61d6-4911-b576-35a873560767,60dfa16d-d3b2-4e50-9949-732331cdb385,e501ec53-5f20-4791-8cfc-9fb4ae2c0247,e1ccce76-37ba-4b72-a929-8cb249001632,19b7daa6-064b-4535-a7bf-06d0253354b7,dbcb980f-c447-452d-85b5-377b487e3964,1c3fd008-6750-408a-b7c4-84c13e6a3789,4b85742e-dbb6-426b-a7f6-e5cc233336a0,620740d5-29ed-440d-8534-f4eaa8fec643,4be33e44-3773-47f7-9321-1e67466f084d,e59af4b0-5b8f-4aae-a554-a3401915f7cc,a93d5b1a-b950-488b-8987-087ec9b1c528,18c22022-6e66-4d9c-98cd-38fb2044daa5,5098d7e8-bc6f-4680-b36d-73c25e21d87b,ec2c1dbb-1b67-4251-b176-d9722ecf8b02,8bdc4fe2-1c10-42d7-8791-f87b5e623f74,480f1d79-4c71-4cc8-a158-a8d3038f926b,68496db0-ac32-43e3-a6cf-b65f4fda658d,390f5898-460b-42f8-8f9b-df11cf9f5cb0,d1846073-f117-4344-9709-3a423fcb2fef,21ae38bc-1e10-4e2f-b2d8-1d6f439c48a4,8e3f1417-f036-414e-b3f5-3ff7b2964f72,d6d07488-f478-430d-bb6a-9a24c2c157c6,706d940d-6da2-47b8-b87b-58910698b268,ffbc636c-8bf4-4cc7-9037-81bfd49af1be,a0a0d2ee-558f-4343-b821-98efc7c68e91,2ce21110-b431-4bf0-a33b-7b4859c41d8a,70834cf9-b045-494d-87ec-a708db06889c,595ac0d1-fab1-4a04-b2a2-1d8753e42a53,8f728765-9169-496b-8b52-a6cc1ef2096c,9674ff8a-8352-4d15-a56b-76b32ba36393,22a7526d-84f1-42eb-9d9f-dc83097df812,d1bcfb04-0bda-4f99-a5b1-60fcb0d2a7d4,ec225744-2d7d-4e77-8d10-2d2cdcd2b8ba,b77e636e-c924-4cff-8a43-a31f00dafceb,5818ccc7-0d50-49a4-abb0-51c36f484068,9304b191-2f0a-4aeb-8f71-9f9fc1ff9c6c,0c558064-69ad-4b60-a498-d819012dc9dc,14a26ca2-e0e6-429d-9541-987caf54b802,ff9b3d6c-6929-4db7-b1a1-699ff31f85bf,51daaa54-d727-492b-bc69-b85232c1f5b2,51fbb766-57e7-47ad-9776-92743ad20caa,da06ee06-ad2f-4e10-a239-896ba67737fd,2500f0b6-a4b7-4c06-94b7-5fecf0c9ecdb,efe7fe06-7000-4f2e-a78e-907138666336,a41ab91c-fe21-40c7-bbc6-4e2224206d11,26e109ec-821e-4abb-be59-74cb68e55871,ff22b3b1-9b24-4fe3-813e-2715a4281c7c,d2b904e4-0543-4dc8-b91a-27e924fde449,7004dc22-fbbc-4ec2-8f61-a2366051d763,e76f40e2-85ee-4e56-aa9a-4dcc88ec1377,eac0cc03-2f97-464b-804c-3e5582d77b96,3098c960-d0a2-4206-a1f1-565829b83a1d,d0a49c98-f074-4777-b594-28e696b9800a,a713fbc7-8d55-4a4e-8a10-4286976e3a99,2c6908d2-0db5-4aa0-a27d-dc532e251c29,7e061070-4fce-4ee0-94bb-b26c7e188354,a19d5252-02ed-4fbf-8d8f-2bd6c52df689,9277e019-4e26-4e65-a012-968ed041c97d,fbd19b37-563f-4cd6-ab88-ffc729f0d12e,1ec89454-fe14-4c20-b5de-8903ca2e1b85,b9440c7f-c5c5-4a34-b15c-3e7895fcb7be,0fd89a29-02cd-4c0a-8c4d-5834fc9660fa,85ca7af0-b3d9-43a3-9a04-dd2b0dff6c9e,9d9e5ed7-7194-436f-b90c-584b620c2900,ea01a252-faf2-4e56-af27-d8ff8e4547d9,53aa2024-2031-4076-904f-2cd8e1b6503d,6a16eebc-9922-4e02-99c6-9c634bcb4c99,879a5488-c620-4db1-a170-4c03b1b534fe,86d07991-ecc1-4c81-aa15-49265d4a7c88,fb9c9d4c-58b0-42f8-9358-12aa1731530e,7bc6a505-372b-4377-9406-06796ad037d7,b49ec2b7-7f6e-44a4-8d74-a4fff109cddb,353c744c-a330-4762-9e70-a3bd7526b7af,782272bf-e029-410f-a2f5-c7ca96ca8c9c,aa357f37-da92-4d67-be2c-4ea9e1ce1210,03d531c5-c71a-4206-aaae-d49a809ca9ff,ee591ea0-5d48-4584-bde5-b451e7715722,e935ee0a-6cec-4f78-a286-94bc195792f1,40e7da73-0374-4eeb-b9e0-4530d1da1c4c,2466dd44-2939-4364-a777-bfcf1c1eed58,0bb2cd18-008a-4889-9099-c21bc7ad69c1,63b52960-3bc0-43ff-8e4d-c5143c67f6f5,ca0501f3-ce38-4b0d-9565-cf6f79027651,f4adc518-072c-4abc-8918-14717ad219ea,8a8f5b84-3e98-490a-ab1f-9ee67af3e343,6b3bc2b9-fd1e-4ef9-b595-2469c2bed404,de5e73d4-17de-433f-ab64-7f6b53c3bc60,6e9b385e-95f2-48ac-82a7-2cec3767ee3e,70284d97-455f-45f7-8f77-a2a95dc2b284,d0f9c636-08e4-409e-81a5-81bbdaffed7e,621df7cd-deed-4735-a092-06786365c116,f42108c7-dbc4-4196-91d0-4f3c18aac307,64f42d44-5a2e-45cc-99b2-649fffd41a87,c6365e74-2774-4c3b-90cf-1fd4dcdf0f37,fbce6d5c-e4ee-478d-9371-edc9504ecfc0,d05bf9a9-2f87-4a50-8baf-5f78c42de0bd,618db67e-96d5-4359-be60-c4ead9028cd0,c8a761e2-663d-43fa-bf53-eb68065ef654,8f4f7595-b1e5-4298-8e1b-81d03eb4dbfc,20545c38-e3c5-4947-b968-7b67da928e3e,b0eda772-d3f6-4206-89de-7ea414d26f22,0d67a953-2bc1-4b47-ac58-af82c23177c9,a2e2e1df-f9b6-459e-8044-dad1ce98ba28,f3617771-0ec4-41c2-a318-1ec167fae517,43648b66-a81d-4023-b3b5-f2270d17b657,f856767f-5200-45ab-890e-be07d1a209d0,4bfe4a32-050f-400e-b72a-394b1eb2c2dd,97691e33-664e-429a-bca2-3606d375f2bb,4100cc58-afca-4f62-852d-0c43aa276284,8d537ba7-81b7-4525-bb8c-df573e412bcf,5c6ea3d1-04d3-44a1-a2c9-0a9adb751167,dd3c680c-95ec-4772-a50a-4e552c6b90bc,32ccac6b-d684-4717-81f6-c0ef41c06dfd,7578103c-715b-462b-a630-404aeedb97a9,ba07143e-aa5d-4b1d-b927-108fbba98f24,daf9d13a-942a-436a-aeaa-824101a23bf2,225b50fa-52d3-49ce-b9c0-59bbec96c384,d324b4a8-4727-428b-bbfb-60b187360eb0,a548a2b4-1f41-4596-94f9-9b37faa2888f,c04c2d5c-bb8f-465c-9ba6-79e4623fd4a5,b582baf0-26f3-4fb5-90cb-112974001c4f,50cc2c5f-c8df-45fc-9712-6f86ad625e72,69c185a4-fd42-49a3-add7-b2829beb1d29,68ebd9aa-00c6-4585-915d-ec053907ed5b,9d7dd28e-46f2-48b8-a423-ef7bd01eef18,adbb1faa-6f27-4590-bf78-271cb1c0abc4,6b4923bf-9996-40f0-b21f-8cb8aff1fabf,efd440ac-27a8-4d1e-a5c8-19e4f876c7c1,8efc0fb1-aed5-4e68-914c-e08190b8026b,595e4517-5ae4-4cfd-95fc-19ee591e0fc1,7a045232-9639-4a5f-84ef-516393779111,00a1ab02-6e57-4702-b2a2-5c64f9c812d0,b4f180dc-3f48-42c6-93fe-1c8ee3d1db69,270768ba-b430-4703-98c8-83361ee1714c,e16d68f0-4097-4e06-aee0-f7b8b034e92a,42cbf791-f919-487e-b001-24fcd640645d,cd41f58a-ee95-4c78-84db-b90df85668ac,095bfa0b-93c4-40bc-ba65-bebb60adfc07,9036ece8-57d2-4115-a0cb-10954add4502,daff2af9-909b-4b15-a9bf-31a581c682ee,12557612-c539-420f-9205-562976672dec,7765c0ea-8049-47d1-ba4d-eefddeb3b4de,852c89bc-968d-427f-83f4-ed17036998cb,c2b87647-3581-4470-afb6-cce36461766c,d22ec87f-f419-462b-a7a0-4a11eacb0aa0,5107e089-b8ac-4032-afee-db777347a337,566479db-41a8-4430-a56a-4e6bf7fa5f79,8f6c82df-2e77-43de-b410-6855827c6cf2,0c7c8a27-f828-40c9-8f62-da2983bb9942,898829b8-26eb-4747-8eb8-7db91d188ae9,4bd358c7-2985-4d7b-a320-fe1cd5a56858,e9d62ea5-742d-4865-a683-a14ea7533cd1,71b6a01c-60f0-4cc3-a35d-e45e0ce41e9b,db6832e0-fd25-4279-bdb8-2947f80c3bbb,884ac47d-83f0-49b3-960d-45a446d9e906,b7106486-e826-447c-b0ca-62f61b69b680,fbe1a436-8722-42e3-8d5d-3e4d93de5bbf,aa4e2a4c-ec6e-4d05-b8de-2cd615387a34,005a1506-340a-428c-85f6-0f6bdd13e1b4,385c8d9b-623c-4d8c-9402-38e1157c1a78,ea68bd6d-1399-4dfa-93fe-5b4f69a837b1,ab28b538-baad-4dbc-ac8b-f52367ab2f61,4cb4f4ff-3eeb-46f1-8971-c494c8a55853,d8544f5c-fc76-463d-a30d-d09d0563f3dc,4b8793ca-c356-4f74-8bd5-6002d9906f53,c48d8d4c-b024-4c8e-b0ce-27c318e38ca4,d3e8f4f2-bdbd-4077-87e1-62d605627018,4e285192-1c2f-4c29-87e5-88d2649072b0,ad627022-adf6-432a-8f66-e5d3eddefe04,0f78fca7-625e-4acd-af5b-47b95d35ac10,c9566e74-4cee-43e6-b21a-65b74475fc60,20bd5c28-bedc-4d73-a13a-61e22438c848,f6dbf130-6a23-4113-801f-d7a9fd29d84e,86faac3f-1190-4827-be1a-4e33c528ab0b,502e547d-bec5-4377-a3da-3bf8f7cbc06f,f1a9edc7-cc32-437a-a842-79f4edc402c7,6c70a69c-5c6a-404d-89d4-05e0fab2dcff,fe458e6f-33ef-4f6d-a7f0-c8d52e935e9f,9963bf69-3ed0-4e37-bf35-7624a4f31b40,b24f6069-e37c-4904-b6d1-8f741e4285ed,e20e8371-5d3d-45c5-9f21-a3f8e388f770,52aeced4-4544-4084-9344-113a86087318,12a0f4af-45e0-448c-9c29-94f38f33b3d0,05428b6b-7179-4e45-b055-a35cf39ef41c,6a4f3907-6b14-4b7c-a9dc-64cd7bdcb562,e3116771-946a-49ea-afe6-5b53e3b75f77,000933fa-07da-4772-92df-17ed3469dbb5,b38dad64-b465-4f40-b7f5-fc05d6255a07,6aac7881-f50a-4c8c-b530-155aaa1383a4,1b859ea7-1e95-44f2-a84b-dfa703ab245c,4274f20c-6974-482f-a5bc-8b9a117950b9,49320e19-b114-4cc5-ad83-be52f6138aa6,b6ec9dad-c778-4705-8475-3c942118e82e,ceeac602-1849-445c-ab82-271d4988f391,bc192795-7934-4658-837a-b6ff328aec8d,89dd7d85-5038-45e2-89b2-024c680a1b32,2f235dee-1813-4095-8d4f-16b0db5354b2,c0672960-2443-4a58-85a6-57e981207e4b,b59c1850-b4e8-4739-aa57-7168f8568055,3cb96b6e-7396-43ca-82af-1eea01e0fa55,4f24f4d3-19ed-4aa3-8462-56d4f2b8c4f0,a36b5021-24bb-49e7-b5f6-c9b33398f386,2ddd3cee-739a-4945-90b7-50fa4d0d7d1d,79ca8c43-a454-43e1-a8bc-80c0a5b7a4f1,bc134316-4713-47dc-8f61-e58996dd2043,e030fc00-8028-4366-8e4d-713749df5d17,a0d86969-7999-4fab-8ff8-16b5e9c09870,f005302b-257e-42e3-8371-c9449ab38bfc,0eab31fb-350c-4c99-8621-000a0aedd132,fdb6e5d1-351f-428b-a3b3-2430c6866651,776ae441-b272-4908-8120-e5f27e0980e5,1bfac4b9-1de7-43fb-b49f-7827aae2e200,b95f9bcc-e22b-473d-b909-1827b0d5a7fe,501dc268-3963-4842-b683-2ad0d1b180d2,722b0ce6-6198-4fb4-8a97-bb580ba65979,b2711bbf-0dff-4192-8293-ea6af6a59c7b,ea0d78e2-433f-4e95-8a88-9af5d4676cb6,140ae995-5262-45c0-bae5-4d662b592844,4acd52fc-69b6-48df-9726-e2a4605b96e4,94cd08f4-64bc-40ab-bfda-d802e1eac7cf,e800c802-3efa-4f80-ae12-fa9e03cbdeb4,0364e218-151e-4c08-bcd8-df730291a044,fef07aa9-1733-480e-83d6-78e3dc1f9682,25817674-77d6-49e6-9d15-40aee01f178a,bacfac17-78b1-439a-bab4-1291a01246e5,a76c9588-ba7a-428f-98e0-60ce0762cea4,ae1ce1ab-6ec9-4d59-a8d0-495f331faeec,301bfe53-0085-490b-9867-82f1bab84e96,bfeefb97-9fcf-400b-bc1b-561b25b0f354,b8523d28-e6dc-4d1e-9de4-897d6dba9604,f704975e-0883-4900-b650-9b5146950292,32608998-cfaf-4ad9-b1eb-9b740c23744e,8ba271c0-bbb7-411b-85eb-ae89dd5fd2d7,bf26eb77-05db-4651-a488-79d9572def94,50fee94b-7625-47d5-bdb1-ee2d415cd20a,044b4181-6a80-46fb-876f-cd3581eb19e1,4c3bedec-7649-4349-9f31-94306722e411,67f80dd4-77e4-4cb1-bd9b-cf25ab573d8c,3e19e26a-d79c-44c1-ae0c-ee46ce294189,e29c5c0f-96f1-47be-b1e8-70287fcfbc31,f6b183cb-d508-4a0d-99e5-6d09f09433b6,f7f7648d-1b13-4b11-81aa-63a4e59e6d30,7bcadffa-82b6-46f1-8e6a-1d2f428a6a3d,3b467b2e-9cfa-4eb6-b080-fcfe1a5e56eb,98ac6d20-6a9e-456c-a02f-547054a2ccf2,caba6de4-7461-4e5a-b296-fd5e3d882e0a,2887e803-6f8d-4024-93de-16513479cb04,28f3e48b-a075-4126-b105-2ba2c9ee1f39,962772bc-6c22-4592-b1e2-90850b0f6dc0,f11906f1-f7da-4ac8-bda2-8c6d63f0bc3a,5a0b23b1-7a85-48f3-a504-757479ec8091,c7147dc2-6178-4ea1-854b-88fae9c66441,824080cc-3434-4fb9-9d15-dfdc7770fcf0,6210d7c3-2b09-4974-96f5-639e699d07d3,8994770a-976f-4e5f-b578-c04258a06d64,70bfcfe9-3f58-4822-9e52-c6f7129cb225,9c028ecd-fb85-4542-873f-ab0730fc25a0,66952799-971c-448f-9a36-3548694fcc4d,f1130089-a056-4dc7-8f33-54e5898c9f99,fa6e6af5-0408-44ba-aae4-0a0135aa45fe,4d486ad6-4d92-447a-8993-64ffbfd8687e,29ef91a9-570f-4971-b800-23c026d17c6c,09184b4f-5bc4-4c34-b893-443b2a4d3937,d12142af-853c-43d0-b1d9-e9a4acf80191,443c82c7-9f2b-4966-bd6d-61ac59a3cd47,db33d997-d870-421e-8a00-b39b27ca62f7,ed681828-3026-4567-8565-ef93f3f1c01e,e034eee6-c57d-47b5-9ad8-5a542bf70edf,83f149e7-51fe-4a8d-9df6-d174e1bfacd4,27b8d7e7-c0a6-4fdf-866b-41677e689f62,081c3f32-32c6-4ff7-bf48-2a10bc7a16ce,0b67701c-3ebc-41e4-9928-8c4a6415aeea,905c29f9-ffa4-4925-889a-579c86257664,f9c1072d-cfc2-400a-b3c0-2c7567e13f15,95dbca7e-7226-4d29-969c-7246e2cf821a,d69cb6d8-53dc-4274-ab89-33d381c85105,1727ab6e-502a-402e-bc61-a461d1d78b3f,0ebcb3ef-acd1-48fd-9dc9-fc87cb760e80,1e5d3e50-17cc-4f3d-9824-9e9fab6b181d,da499462-8aa0-493c-aa1a-81726ce4330c,6ba35bde-6f4c-40cc-a8f0-f60007c324ea,5c9d60ee-ebe6-475e-8939-89051cca86bf,8d8dad3d-b0c4-4c05-83e7-a90f43d3ca44,f910d81b-2ff3-44e2-a850-e008ad5a8294,16cf3afb-afd8-4ad7-843b-e3e219743b15,05374036-c4b4-4851-a404-7f500c777f49,9401b793-6ee4-4cef-8807-278681226e71,75e530cf-a100-4dbd-96a9-845364fd34ec,1bda469c-b27c-454c-b404-e2b9055fe3f3,ee3a9bc5-82d2-4dad-85f5-7d75ce1eacd9,a0f18185-64df-44d0-8af3-d526d3cc555c,c9fb1c6f-c887-467a-ac0f-8cf50c731db1,cb0d4ec9-1938-4a0f-a1e1-10b062459d5b,ae6c6f2e-98f9-4906-85d1-b4b670eeeb4f,efa76586-416d-48ea-b72b-93d4e805521d,aa692d27-dc70-4f10-b010-c04a63a931e6,72ecfcc0-7828-4f6e-ace3-b82100cd9e4f,2df4bc2d-db4a-4655-9508-edc05da46a51,c79dfa81-fbcf-44a9-abd4-44a71f012af1,18ee2bc6-e8e1-4d95-81d2-09d6efa118b3,f100c491-67f1-463f-b098-0a52176f6d62,bdfae84b-4ac3-451c-99fa-b31b37ed4e2a,5091303f-e4fe-4930-8fc1-f100d4b2e323,15209296-eff4-4396-9611-a5a7596c5c4b,cc5846d1-f2d5-4088-bac0-ed6248b33920,eb8196e4-e6e1-4329-8761-cfd1bde818f8,2fe49af0-85e2-4859-af42-a36ea5f6dd1b,be8f985c-353c-46da-adeb-27803a7397ff,fbc384ff-3e19-4eec-b93d-da86fb184577,d951dde6-fe0c-4dfa-ad95-385662063e56,352c7b5b-a0c4-4e18-9310-1dc1d1782c01,b70a3fe6-4196-4272-b316-dec418495518,5daedfc2-5c10-43aa-a227-32f14b1479af,a39c9e7f-5d66-4f56-8711-54461473046d,cc3c906f-7c13-468d-a504-a456a08f7993,cc18a8c9-72fd-4a8d-b11b-9c3181697b95,0427c36c-d59e-435e-8875-a5e0e7473ffd,590f7c67-a2cb-445b-96bf-d24324eda160,2d142623-4076-4162-abf5-10687c9d0db9,84acef68-2808-48ce-923a-7f9708a5c69e,98a63b57-4bf2-4278-a3d9-6311080be464,ec25b97a-6b40-452e-b438-e61838f32d50,91db456a-1008-4a08-8446-972e614cb18a,eec91e19-0a5b-4180-b464-4a3700f959d5,21077f64-f0eb-479f-9a2c-cf70fcd6ab67,4bb4ca2d-d879-4c37-9ec6-5f80659f8479,2c352b7e-bff7-4152-802c-6edb8b282fb5,a980cc8e-9af3-4f9b-9f58-e24fe16bad51,7882bbf7-8005-4970-9f39-73c7a31855bf,8aab9875-f656-4bb5-b43f-6bedb2f9d691,956856c0-f828-412d-8dd9-d0fb9bd3447b,117a5684-07ee-459c-b5cd-73fe5a4048e3,efb3fdeb-7b41-4b3a-b5a7-481583028a48,84c83b16-3af1-42cc-a5a2-c91e0a8e93d3,4051c0b3-94da-4099-a129-4e4a3a5e53f4,282f033c-1334-407b-a927-c6f287dc3bef,c6ad6f96-5f34-4f57-befa-93273f3f11a5,ea789ec4-13d9-4caf-a3c1-0350b897cbf9,a3948477-f569-4bcf-ba71-878f5b44a446,f34aa898-cbbd-433d-a2a5-4af9738c62d6,99fea41a-0473-4eea-a7da-31412bdb7622,00ddaae0-8396-4203-9dad-51011129364a,7bd91389-975e-4392-a572-594191e46aca,6f7f47f7-9c74-4c32-af5a-5feb5a58f301,1999a635-3edf-4924-aba1-5911ad157e62,e6200681-8b2d-4171-835a-54f76555a610,7cde3e31-18bc-4fd2-b0b6-54ca892db639,051b9ea1-bce9-4e66-832b-16a65fd4d9c8,f345096d-e1ad-42f2-a279-a00fafb2f4ba,03db22f8-f2a6-44f0-ae0e-9795563636e9,8bacbee0-9423-4a99-a395-485ecaf1ef77,726ba4a9-c7bb-43c9-af81-a4e277ad4aa4,e8650686-2112-4ecd-82a0-6788dec4194f,a17bf986-2030-4604-a482-449ca96325af,3ec94341-320f-4800-b992-0cfdfaf69ceb,9cd3c5ea-68b6-4dbb-9242-05d50e675c33,9e5dcbd5-8581-4f49-aeff-16c9280fa05f,539745d8-eec8-41d2-a821-8375d1e11091,5b7616ea-d618-4583-aa22-4a0039028688,fb0b9b72-4376-4df7-96c0-bec4578c0415,f093fd0f-649d-4f6b-915c-3af3c55e5db3,58a60b80-aaa4-4f09-a91e-05f8e5de3029,9408a904-adac-41a5-9794-bc401d77fd51,eed937ee-d2c7-4181-9dd2-b80f016de5f6,b648a963-f991-460a-b888-44e2ee8b7500,82fb673f-7e50-4255-93df-4b850e8cb36a,7038a1ca-e55b-44ef-9c96-40b8bf494e1a,89d54009-699d-4998-863c-88fac2e1f23e,1a07db3e-8144-4925-ac5d-3c467033b388,80ec8241-e3d2-4fac-aa72-4a68f430bd70,a1c21685-27e8-4b4e-9cbd-b5f25364b30c,d4f7a4e5-0c68-4e10-9128-51d285cab9d9,d47cdd9c-5bfe-400d-a2d7-e7869a02cfdf,ff51ae8c-bf7a-4222-8dea-612067670051,1917dba7-06bc-4731-9e3f-86eb6006b32e,f39be638-fb15-4c8a-a0f5-9a5793f45a80,e2211e05-199e-4140-a1d8-338ba6676985,75120c18-798e-4e4a-82e1-69aef2dc39d9,763efaec-8027-4e78-beaf-7e022da88462,2828b081-7354-4e43-84ee-58dfad25d7bf,5385511a-9c1a-4d2b-8d2c-252c34fd94f3,05f537cd-6eb9-49be-9109-64ec3a8a97a1,49fb0ee4-2c5c-4f0a-945f-e46de2199e43,66570679-0c14-4097-ab5c-7b75156958a1,866f123b-aef9-4fb1-8cce-5ce79a2aed7d,cc917092-7628-4ff0-b611-09fac6c6b268,8e353a96-31fd-4ddb-a915-08ded0eec90f,0f0acfc9-96e0-4be7-86f3-c1a8988c5ff6,85022722-aa7f-4437-819b-dfe487856d36,0388b5b7-0a36-4ce6-9ec1-bc5d4e8cf648,84d59bcc-048b-495c-acf9-e3a4d374980d,4a57bbe9-3de0-4ac8-a363-fe5ccced8896,e4f76ffb-2f40-4d71-a88c-ce2263b40e19,3282a41f-9a3b-480b-8615-0b0e5c13bc5c,68815098-3007-4e2e-9119-8401590e4e6b,932089ae-7d91-48b1-ac01-15ccac440c1d,bfde5996-a10d-440a-9e2e-153644a8c1cd,581dfaf8-337d-44a5-b854-272052308f33,77cba544-b3b2-4cd1-b69e-21ae63c15050,847d1054-55ef-409d-8bd7-6da634a6b684,b553b07f-48b2-4e8a-9850-be8c4f833363,429e5d17-c9e3-4296-85c9-78d445d8ffb9,e7154aa4-31b2-4b80-94e5-fc3f436bb34a,307b9b53-abcd-4231-81c6-0b58c9d234ff,7dac6ff1-baaf-40de-846e-3421d3369d07,497c2460-0047-4eb2-8cd4-0cab27aa22e7,ae04aeb4-d5d2-4f2e-8e68-0400d1fcdf1b,d512d9e3-1421-450e-bec3-badb5ef83989,6399d7b0-51a4-46af-b386-db27f2f1c7c1,be8e4c86-f87d-45fb-b393-b0150eb75799,cf991b94-88fc-4f4d-9bfd-551caec4dbbf,46c6e226-2fa4-4968-ab5f-48815bf80b45,9e9132ba-df46-43f5-a3f6-2a1bcac16662,8194134f-efdd-4eb7-9ce7-d26d9b89fd8b,f96c11f3-b570-49f7-a466-5a95b08e62cd,112e6f2c-89dc-4943-95b8-20c13ed8d115,114ee2db-41bf-46b1-85b2-ad60b50a5236,182499d6-9a08-4247-996e-9901d2bd185c,263cfe7d-24f3-40f3-bdaa-8589849cd76f,63d4e2ff-bc30-414d-a21a-0389c34ba562,ecfe414a-760c-411a-bb7a-5f271b68d643,61d274a5-2016-4c0d-aaa0-b81f26d12d92,9e1371d9-4317-4d22-b062-62eab93cd0ae,c6542e9c-86f2-4f11-9af7-fbaa8ad46a15,da4219c6-d5d2-4914-84c0-15e6e6066363,2450485d-9e1a-4a62-87df-553f4533a16a,b6b219b6-0741-478e-b393-d7731d4fa27e,4da24747-fb58-4830-86d0-3f90899f791a,aa4809a9-0d57-4ada-8a20-b37d903f5ade,5b5828ce-9ebf-4649-a658-888f19690dc0,d86ebdd6-6b8e-4e3c-9a94-aa8244b9e529,b81adb04-21dd-4712-a122-9f94e15dd972,bbaee1be-c026-4bda-8221-235c23082006,64dd2218-e479-4744-a394-abf359d11319,0a3225b9-38ca-45c4-844d-bd1173af3c07,7361522f-f5c3-47a9-846e-68bd22ff17dd,abb2443b-70c6-4bd5-ac47-acfd92673f2d,de63b24a-e948-4a91-831a-5448c316a0e3,01ceeb25-5a35-4d98-ad46-eab78ae5eb45,95f62f7d-43d7-4ae7-a512-8ae6aa42aade,c223e173-fb31-4a6f-b725-3d9a1e0dade0,3ff8341c-2101-469b-8d76-6cb50ebfc644,0241cda4-2f2b-49eb-a82e-9cbedf3c1c10,66e4a9be-c9e4-4eac-a80b-37b5db375f55,4b5eedee-0ae4-4f27-b98f-025c635c53bb,1c97e05e-490e-46b0-b018-667dd6b00ae5,18e57c9d-5755-4785-aed0-21de60399818,0624a67e-5876-46da-99c2-77a399dcebb0,625187ed-7524-4592-b654-e71a3b828368,42b5cfb5-986b-4e93-8d0d-4d3871812842,82159024-e494-453e-be76-1e5d5ce21647,799d85de-59b0-4186-ba34-e92d445bb6ca,99ee6e0e-266d-4025-9df2-ab8e5940ab80,7566fda1-ec11-428c-be32-03a2e00e3700,b72b4cc1-91ca-47aa-aa9c-2bb9be5236a6,7eb1e801-b39d-4290-8322-bcb885d37c43,1925630f-ba10-4f44-ad76-6a0ec5d2f3bf,f708489a-47e1-4177-906e-5a5d5d41dd87,5188fa33-7126-4afa-9a93-0f8803c349ba,7376ff76-a2b4-4e5a-9fe3-83e283f5bd0d,85b51d9b-cd3d-4187-b2f0-1f7c2bf3109c,bab7ecd5-688f-4a94-9c31-29115e6859d6,afec92d0-0135-4604-a247-16dec0dd8a86,3c56c994-5269-442c-8036-25892a6e58b6,eb65415d-a347-4f23-ba03-d33d51764a0b,0c9fcb5d-22ee-46d3-9166-6b0c4c3aeff6,b01ac391-c90e-427d-bf21-5b85651a8e8e,e6d22da2-a0a6-4470-8c41-c26dac6130ea,df77807f-23b2-4fe7-baf0-f254541dc46a,baf90dcf-11c8-4920-8903-adbc82611207,faefd02f-b062-4ead-b079-8ad11ff7e1e1,e5028a7f-5cdc-4aa8-bf8b-1eaf013b373c,7a43be71-5cb6-40c1-a5af-c9421bc942d8,f7b1aefc-d3cd-4b23-b7b1-d8022ccc4e6b,3521cc5c-1d0e-448b-b889-f24c0fd63482,919f1891-4094-4097-9a3c-db05ec06611a,7b92dede-39ae-4041-96a6-f67d20e64f36,80ab1cec-9894-4575-aa85-327bbfc26440,7c1931ab-b7b5-4130-9910-8cd5ca9a6051,9092637c-8a97-4315-8c64-5aafc1dc9acb,5f07d99f-ef37-4ec5-a6fa-641dfa7c0598,a021e293-924a-4683-814b-dd03dc0bb758,660822d8-90eb-4d2c-8f74-cf5d01274c31,347d0c9e-479d-43b8-8dd0-5d01cb5bce3f,65c3906a-b6e3-4d48-a308-340cafba8d13,e457f75e-6e5a-4e2e-b1e3-8b706527e622,693e277d-1584-466e-9cf8-c884f09f3582,c9857559-f4b8-447a-b738-ff768b574feb,e0a79632-9ebb-4c27-90a0-b58ea57a58e8,3289559c-8d96-4122-ae41-91a29c2bda60,6057b865-bb15-4505-89e5-34fde6a362f5,44dd19ea-abff-4a80-bf99-14a075b4c31f,e0ce07c8-137c-48e8-80b1-48b84fe233f6,89527971-7fb1-4353-80a7-19db9330a29d,94738b0e-4af2-4260-a43d-28473c960a34,b019a3f7-75e8-4731-aff9-0070ad140b5e,527ef87b-af4c-48db-973e-c1efa79c368b,40ce42fc-586e-4f15-9300-afd8c33ad37b,91ca904e-81fd-4ae7-8795-3db1986e0d68,71a8d6c3-d813-4e90-a83a-f96705e2dea2,0c5bf43e-7077-48a6-8e51-b155a6f929f6,7036b33c-0b18-4077-b887-8099600fb297,4a7d9027-d9da-47be-a4b2-31ee00765466,9b603131-8edd-4b21-b000-62d92fa58875,c3a80506-b9d5-40d7-9a13-0af97264b892,35de1d6c-82e8-4c5a-bb5a-d3e63b64348e,82ddf461-845a-4931-9daf-404b0cffcd57,fc49f36f-e292-494a-a6f0-46b0f8666c8f,3af0d221-de9a-4d02-a95d-f73c98ca0cde,1b825f1e-c9da-468c-9ecf-06ea2c46871a,24e5cf6e-c3d3-478b-a4ef-4360c794e63c,f3697421-9622-4bcc-817e-d49f36451bed,df598403-33e7-4116-875e-cd9f122b3479,50c8536c-738e-4128-be19-52858f348848,fb9b2ee8-a3e5-4191-8014-18d57cc44f97,e4cc42ea-700e-40ff-86ef-ce856013425a,f0e499e4-92cf-42ba-961b-d897f867526f,ba2815d2-16b1-41cd-a403-90dfbb783d22,89534ecc-5f84-4ea3-9a20-47869de9a2a5,d2db5f44-7869-4557-98ff-614e29799e01,4eb7ba6b-e738-4ac4-8c09-56b7f247f300,edf63565-adfe-46c8-b533-086c2c4ee72d,940d3a7c-92cb-44b8-88f3-4a02b2169d05,73b75357-233b-492a-b84c-964ccc74c53b,9cd2694b-4509-4451-98a7-f4e7fa467c5f,c0adaf5e-a28b-4214-bd31-41e4a56e6263,c476e42a-4fec-4cad-9dc3-7c66f29b35a4,281b7dff-68ae-48a9-8d78-3b58b9900839,a0855f49-5859-488e-ad33-57b80de54702,119df949-3585-4c64-a44e-9084c1bbcbce,460a2ebe-6743-4634-857e-dc5c3f54ae02,60e5b960-5211-433e-b7c7-1ded5e54b9ae,a351432e-d522-4b39-b10c-38aed1b9216b,a38015ef-d5aa-4a80-a22f-435171dc3bcc,7af51791-cc0c-4d86-9825-c28ad0c2d5b0,9c866c60-1c4f-4d0c-b617-fec82baa45a3,17a7ccf2-95f2-4892-b147-050d7132deb6,1facb6f4-d0a6-46c0-b3b0-bbde62292ebe,35754bd3-fd89-4b9b-9ed9-c9d2a0f1c170,de99d873-a85e-43d5-b110-1fc036802d73,caaf7435-c6f0-483e-a21b-c38f4ca952e9,e28ce398-7181-4d95-ada9-4adbb1b4d4d5,12aad3e7-19a2-42aa-8552-d961704d7914,6f370591-d0f8-4ffe-93d9-8b5abf7da8dc,a686174c-7535-4903-8f27-a59549443026,7a733be3-1f47-42ff-b4ea-0ea52655e316,5db17728-c402-4b3b-9109-cb82e8ab7aee,53ff968f-40e5-4f9b-a99a-052a406eed44,eeb45b4f-95f5-476a-8d6f-98b6a3a19046,333b63b9-6c28-4254-b201-243b14c526bf,81db9ea3-7742-43da-a184-843f40b9f823,84ad83cf-3c12-45b9-8335-fe56a363d76d,ff584697-6d04-4ae1-acb4-743977c57977,5b0dea7c-43e3-48b1-b3c4-e939c35658e0,b9495b37-e6d7-4dad-98d0-d3e0839e87ac,0ed81a5a-e4a3-465c-8906-62c2cc37d7ed,ad553122-e817-4aef-856f-6fe257873a26,a41f0ee2-27fc-4149-beae-f39244c9adce,18101fe4-c173-4292-bfbe-152a6337eae2,2360b389-0356-41c3-a627-dbdba2856b0e,826e9468-fcd3-4c04-822a-d9596e9e9582,6e4ed26f-edf7-4227-ac40-30c375a1a650,dbc060a0-49dd-407e-bf7e-9b3265a6945b,0a0c5ddf-5de9-4a77-a3c8-89b991553992,74b6a3df-ef0a-4d05-9163-db4939fd1106,48126337-b060-42e2-8924-bccee5af31a1,73d9959d-9ef7-44b2-b85f-7563b1d5e593,796cbdc3-4a5b-49d4-9545-82429d02b27b,44708cf9-a312-461e-924f-9034f9b39b25,0c290bc7-425a-469d-82d8-badeb7bb595b,e8159ddd-778f-43cf-892d-ff78f60d0b02,1fbb9c07-e628-484f-98ad-5e1093d7c145,f414ccf5-4c1a-4c0b-b291-aaa25a9a46c4,96439f08-ca06-435c-a771-6e40fa22857a,8abc0c12-d60f-4edb-9bc6-bd4f1c238c91,49fa7f18-56c5-4728-806e-8b64a2bdd9b6,d8ac7f95-1c54-4993-8e2b-d99c61d685bf,330f04b2-00c3-4efc-bedc-7e0f7f848c63,0565974c-af91-4d19-87ff-f4fb6a4468b2,15a56ab9-0996-4223-ab4d-510087f1e279,0c3a4ad5-78c6-4039-8ee5-1d60682b8908,6f1208e5-69c9-4ce1-9179-82de7ff0cf57,ef0c6093-adf9-431f-ac85-3e1a0a6c8666,da469ead-14da-4303-99df-37e635a72804,cb15246a-fffb-4685-b744-1dd94b63b4a8,780adf24-57ef-4cab-a849-4bca7c64e08a,6a92e566-c11a-47c3-ae69-de6fed4f2310,681e9b03-8d29-4699-bc7a-d1192bcc2404,79e3c2f3-51c3-48da-9dc3-d098ac4442de,8a6e64c0-02c7-4ff5-86b2-8fbd1b15b154,2e7eed6b-bd42-4336-95c0-afe8e7960ca9,5457c49c-cbb5-4d69-b276-aa223b2558fe,707014d9-6fcf-4489-aa17-c6197a4e221c,9c4febe9-3621-42f0-8b5e-85f67426cea8,bf669c35-bd77-41e2-a5ff-66d48092e031,0c8fc66f-2d21-49d9-a2ba-a0260500111b,63cae761-49f7-4ece-83f9-20c6f19a812d,874fe822-ae6e-4671-b555-b3912c461025,a2397c5d-94e0-4439-a649-d9a30af59a82,f8731a26-92a1-49dd-b391-68b9578e6d8b,bf9d76f1-5e91-48a4-bef8-3df6eb898734,452c94af-a7f8-4942-a49f-9afc46bf210a,b8432a45-fd99-4d92-ba4f-cb80f25e71e9,4591f025-8b0a-4fb9-9834-af8cb702611f,cc700816-0a7e-4c00-86d1-892a69822e00,319a2df3-3442-4acb-bd7a-9f79f7651d2c,6fe99c06-ebdc-48b9-bf6c-08828d344c9a,74d81060-f5df-4440-8856-608c1b988775,e1fe0721-4ffa-4969-8f65-ffe8a4a298b8,4fb222fc-04e9-49d1-b3b1-b01a84c8feb6,b2cca417-5fb8-46b8-8152-de86151212c4,d010a9d0-e4ee-4719-ab5f-e5880effa760,23ddbe94-9975-4192-bfec-a15af5f505f4,b9037227-962f-47b8-8bf9-6b0bee73c2e5,9887f970-4ed2-48b9-8434-363a353a3682,fad944c1-e553-4ee3-b6c0-bce8a72ea30f \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_jenkins_5000.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_jenkins_5000.bf.data new file mode 100644 index 000000000000..da6493da86b0 --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_jenkins_5000.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */  \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_murmur_5000.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_murmur_5000.bf.data new file mode 100644 index 000000000000..fab404c60fe6 --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_1000_000001_murmur_5000.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */  \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_200_000001_murmur_1000.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_200_000001_murmur_1000.bf.data new file mode 100644 index 000000000000..5b0558188aa5 --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/dynamic_200_000001_murmur_1000.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/////wAAABQBAAAWeAAAAMgAACPwAAAABf////8AAAAUAQAAFnj////////////////////////////////////////////////////////////////////////////////////7////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////f////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////f////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////8AAAAUAQAAFnjf////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////8AAAAUAQAAFnj/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////3////////////////////////////////////////////////////////////vnjnj////////+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////fw== \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_10000_000001_murmur.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_10000_000001_murmur.bf.data new file mode 100644 index 000000000000..9956cd037419 --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_10000_000001_murmur.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */  \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_1000_000001_murmur.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_1000_000001_murmur.bf.data new file mode 100644 index 000000000000..7ec1c6ec749b --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_1000_000001_murmur.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/////wAAABQBAABwVP///////////////////////////////////////////////////+///////////////////////////////////////////////////////////////////////////////////////////////////////////////+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////v/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////vfw8= \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_200_000001_murmur.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_200_000001_murmur.bf.data new file mode 100644 index 000000000000..0e6bd376b6da --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_200_000001_murmur.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/////wAAABQBAAAWeo newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_jenkins.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_jenkins.bf.data new file mode 100644 index 000000000000..9ab79a06af2c --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_jenkins.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */  \ No newline at end of file diff --git a/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_murmur.bf.data b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_murmur.bf.data new file mode 100644 index 000000000000..9a22fc3bdf1c --- /dev/null +++ b/hudi-common/src/test/resources/format/bloom-filter/hadoop/simple_5000_000001_murmur.bf.data @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */  \ No newline at end of file From ec91bbcfb44989400ad593c1603c5482955548f0 Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Thu, 14 Sep 2023 14:25:31 -0400 Subject: [PATCH 005/112] [MINOR] Update cleaner docs (#9716) Co-authored-by: Jonathan Vexler <=> Co-authored-by: Y Ethan Guo --- .../apache/hudi/config/HoodieCleanConfig.java | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java index a129ff950903..a41141520234 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java @@ -35,6 +35,10 @@ import java.io.IOException; import java.util.Properties; +import static org.apache.hudi.common.model.HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS; +import static org.apache.hudi.common.model.HoodieCleaningPolicy.KEEP_LATEST_COMMITS; +import static org.apache.hudi.common.model.HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS; + /** * Clean related config. */ @@ -52,9 +56,9 @@ public class HoodieCleanConfig extends HoodieConfig { .key("hoodie.clean.automatic") .defaultValue("true") .markAdvanced() - .withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit," - + " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage" - + " growth is bounded."); + .withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit, " + + "to delete older file slices. It's recommended to enable this, to ensure metadata and data storage " + + "growth is bounded."); public static final ConfigProperty ASYNC_CLEAN = ConfigProperty .key("hoodie.clean.async") @@ -67,7 +71,7 @@ public class HoodieCleanConfig extends HoodieConfig { @Deprecated public static final ConfigProperty CLEANER_POLICY = ConfigProperty .key("hoodie.cleaner.policy") - .defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()) + .defaultValue(KEEP_LATEST_COMMITS.name()) .withDocumentation(HoodieCleaningPolicy.class) .markAdvanced() .withInferFunction(cfg -> { @@ -81,13 +85,13 @@ public class HoodieCleanConfig extends HoodieConfig { // "hoodie.cleaner.hours.retained" (inferred as KEEP_LATEST_BY_HOURS) // "hoodie.cleaner.fileversions.retained" (inferred as KEEP_LATEST_FILE_VERSIONS) if (isCommitsRetainedConfigured && !isHoursRetainedConfigured && !isFileVersionsRetainedConfigured) { - return Option.of(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()); + return Option.of(KEEP_LATEST_COMMITS.name()); } if (!isCommitsRetainedConfigured && isHoursRetainedConfigured && !isFileVersionsRetainedConfigured) { - return Option.of(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS.name()); + return Option.of(KEEP_LATEST_BY_HOURS.name()); } if (!isCommitsRetainedConfigured && !isHoursRetainedConfigured && isFileVersionsRetainedConfigured) { - return Option.of(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name()); + return Option.of(KEEP_LATEST_FILE_VERSIONS.name()); } return Option.empty(); }); @@ -95,22 +99,23 @@ public class HoodieCleanConfig extends HoodieConfig { public static final ConfigProperty CLEANER_COMMITS_RETAINED = ConfigProperty .key(CLEANER_COMMITS_RETAINED_KEY) .defaultValue("10") - .withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits " - + "(scheduled). This also directly translates into how much data retention the table supports for incremental queries."); + .withDocumentation("When " + KEEP_LATEST_COMMITS.name() + " cleaning policy is used, the number of commits to retain, without cleaning. " + + "This will be retained for num_of_commits * time_between_commits (scheduled). This also directly translates into how much " + + "data retention the table supports for incremental queries."); public static final ConfigProperty CLEANER_HOURS_RETAINED = ConfigProperty.key(CLEANER_HOURS_RETAINED_KEY) .defaultValue("24") .markAdvanced() - .withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as" - + "compared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group," - + " corresponding to commits with commit times older than the configured number of hours to be retained are cleaned."); + .withDocumentation("When " + KEEP_LATEST_BY_HOURS.name() + " cleaning policy is used, the number of hours for which commits need to be retained. " + + "This config provides a more flexible option as compared to number of commits retained for cleaning service. Setting this property ensures " + + "all the files, but the latest in a file group, corresponding to commits with commit times older than the configured number of hours to be retained are cleaned."); public static final ConfigProperty CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty .key(CLEANER_FILE_VERSIONS_RETAINED_KEY) .defaultValue("3") .markAdvanced() - .withDocumentation("When " + HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name() + " cleaning policy is used, " - + " the minimum number of file slices to retain in each file group, during cleaning."); + .withDocumentation("When " + KEEP_LATEST_FILE_VERSIONS.name() + " cleaning policy is used, " + + "the minimum number of file slices to retain in each file group, during cleaning."); public static final ConfigProperty CLEAN_TRIGGER_STRATEGY = ConfigProperty .key("hoodie.clean.trigger.strategy") @@ -129,8 +134,8 @@ public class HoodieCleanConfig extends HoodieConfig { .defaultValue("true") .markAdvanced() .withDocumentation("When enabled, the plans for each cleaner service run is computed incrementally off the events " - + " in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full" - + " table for each planning (even with a metadata table)."); + + "in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full " + + "table for each planning (even with a metadata table)."); public static final ConfigProperty FAILED_WRITES_CLEANER_POLICY = ConfigProperty .key("hoodie.cleaner.policy.failed.writes") @@ -175,9 +180,9 @@ public class HoodieCleanConfig extends HoodieConfig { .defaultValue("false") .markAdvanced() .withDocumentation("When set to true, cleaner also deletes the bootstrap base file when it's skeleton base file is " - + " cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the" - + " table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap " - + " base files are also physically deleted, to comply with data privacy enforcement processes."); + + "cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the " + + "table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap " + + "base files are also physically deleted, to comply with data privacy enforcement processes."); /** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */ From 3998ef60dfbc204c109561cee3762f0f0bb6f5a8 Mon Sep 17 00:00:00 2001 From: Mulavar <978007503@qq.com> Date: Sun, 17 Sep 2023 12:59:25 +0800 Subject: [PATCH 006/112] [MINOR] Move hoodie hfile/orc reader/writer test cases from hudi-client-common to hudi-common (#9103) Co-authored-by: Y Ethan Guo --- .../io/storage/TestHoodieHFileReaderWriter.java | 15 ++++++--------- .../io/storage/TestHoodieOrcReaderWriter.java | 2 +- .../io/storage/TestHoodieReaderWriterBase.java | 0 .../src/test/resources/exampleEvolvedSchema.avsc | 0 .../exampleEvolvedSchemaChangeOrder.avsc | 0 .../exampleEvolvedSchemaColumnRequire.avsc | 0 .../exampleEvolvedSchemaColumnType.avsc | 0 .../exampleEvolvedSchemaDeleteColumn.avsc | 0 .../src/test/resources/exampleSchema.avsc | 0 .../resources/exampleSchemaWithMetaFields.avsc | 0 .../src/test/resources/exampleSchemaWithUDT.avsc | 0 ..._hbase_1_2_3_bootstrap_index_partitions.hfile | Bin .../hudi_0_10_hbase_1_2_3_complex.hfile | Bin .../resources/hudi_0_10_hbase_1_2_3_simple.hfile | Bin ..._hbase_2_4_9_bootstrap_index_partitions.hfile | Bin .../hudi_0_11_hbase_2_4_9_complex.hfile | Bin .../resources/hudi_0_11_hbase_2_4_9_simple.hfile | Bin ..._hbase_1_2_3_bootstrap_index_partitions.hfile | Bin .../resources/hudi_0_9_hbase_1_2_3_complex.hfile | Bin .../resources/hudi_0_9_hbase_1_2_3_simple.hfile | Bin 20 files changed, 7 insertions(+), 10 deletions(-) rename {hudi-client/hudi-client-common => hudi-common}/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java (97%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleEvolvedSchema.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleEvolvedSchemaChangeOrder.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleEvolvedSchemaColumnRequire.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleEvolvedSchemaColumnType.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleEvolvedSchemaDeleteColumn.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleSchema.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleSchemaWithMetaFields.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/exampleSchemaWithUDT.avsc (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile (100%) rename {hudi-client/hudi-client-common => hudi-common}/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile (100%) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java similarity index 97% rename from hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java rename to hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index af4de5b771ed..a7de5fe396b6 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -19,16 +19,16 @@ package org.apache.hudi.io.storage; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; +import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.EmptyHoodieRecordPayload; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; -import org.apache.hudi.config.HoodieIndexConfig; -import org.apache.hudi.config.HoodieWriteConfig; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; @@ -57,6 +57,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; @@ -95,20 +96,16 @@ protected Path getFilePath() { protected HoodieAvroHFileWriter createWriter( Schema avroSchema, boolean populateMetaFields) throws Exception { String instantTime = "000"; - HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() - .withPath(DUMMY_BASE_PATH) - .withIndexConfig(HoodieIndexConfig.newBuilder() - .bloomFilterNumEntries(1000).bloomFilterFPP(0.00001).build()) - .withPopulateMetaFields(populateMetaFields) - .build(); Configuration conf = new Configuration(); + Properties props = new Properties(); + props.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), Boolean.toString(populateMetaFields)); TaskContextSupplier mockTaskContextSupplier = Mockito.mock(TaskContextSupplier.class); Supplier partitionSupplier = Mockito.mock(Supplier.class); when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier); when(partitionSupplier.get()).thenReturn(10); return (HoodieAvroHFileWriter)HoodieFileWriterFactory.getFileWriter( - instantTime, getFilePath(), conf, writeConfig.getStorageConfig(), avroSchema, mockTaskContextSupplier, writeConfig.getRecordMerger().getRecordType()); + instantTime, getFilePath(), conf, HoodieStorageConfig.newBuilder().fromProperties(props).build(), avroSchema, mockTaskContextSupplier, HoodieRecord.HoodieRecordType.AVRO); } @Override diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java similarity index 100% rename from hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java rename to hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java index 438024d2f268..98614be25c3e 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java @@ -22,8 +22,8 @@ import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.bloom.BloomFilterFactory; import org.apache.hudi.common.bloom.BloomFilterTypeCode; -import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.avro.Schema; diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java similarity index 100% rename from hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java rename to hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterBase.java diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchema.avsc b/hudi-common/src/test/resources/exampleEvolvedSchema.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchema.avsc rename to hudi-common/src/test/resources/exampleEvolvedSchema.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaChangeOrder.avsc b/hudi-common/src/test/resources/exampleEvolvedSchemaChangeOrder.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaChangeOrder.avsc rename to hudi-common/src/test/resources/exampleEvolvedSchemaChangeOrder.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaColumnRequire.avsc b/hudi-common/src/test/resources/exampleEvolvedSchemaColumnRequire.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaColumnRequire.avsc rename to hudi-common/src/test/resources/exampleEvolvedSchemaColumnRequire.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaColumnType.avsc b/hudi-common/src/test/resources/exampleEvolvedSchemaColumnType.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaColumnType.avsc rename to hudi-common/src/test/resources/exampleEvolvedSchemaColumnType.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaDeleteColumn.avsc b/hudi-common/src/test/resources/exampleEvolvedSchemaDeleteColumn.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleEvolvedSchemaDeleteColumn.avsc rename to hudi-common/src/test/resources/exampleEvolvedSchemaDeleteColumn.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleSchema.avsc b/hudi-common/src/test/resources/exampleSchema.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleSchema.avsc rename to hudi-common/src/test/resources/exampleSchema.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleSchemaWithMetaFields.avsc b/hudi-common/src/test/resources/exampleSchemaWithMetaFields.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleSchemaWithMetaFields.avsc rename to hudi-common/src/test/resources/exampleSchemaWithMetaFields.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/exampleSchemaWithUDT.avsc b/hudi-common/src/test/resources/exampleSchemaWithUDT.avsc similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/exampleSchemaWithUDT.avsc rename to hudi-common/src/test/resources/exampleSchemaWithUDT.avsc diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile b/hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile rename to hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile b/hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile rename to hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile b/hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile rename to hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile b/hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile rename to hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile b/hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile rename to hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile b/hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile rename to hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile b/hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile rename to hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile b/hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile rename to hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile diff --git a/hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile b/hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile similarity index 100% rename from hudi-client/hudi-client-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile rename to hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile From 82bd7658f10bd11c1361b74edc10e62f37581b2d Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Thu, 21 Sep 2023 12:31:32 -0700 Subject: [PATCH 007/112] [MINOR] Mark advanced configs and fix since version (#9757) --- .../java/org/apache/hudi/config/HoodieCompactionConfig.java | 2 +- .../main/java/org/apache/hudi/config/HoodieWriteConfig.java | 3 ++- .../org/apache/hudi/common/config/HoodieMetadataConfig.java | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java index 19e2678c8ae5..1fe86b52cbce 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java @@ -65,7 +65,7 @@ public class HoodieCompactionConfig extends HoodieConfig { .key("hoodie.log.compaction.enable") .defaultValue("false") .markAdvanced() - .sinceVersion("0.14") + .sinceVersion("0.14.0") .withDocumentation("By enabling log compaction through this config, log compaction will also get enabled for the metadata table."); public static final ConfigProperty INLINE_LOG_COMPACT = ConfigProperty diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 0cf1f287976c..be16c3e4cb9e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -562,7 +562,8 @@ public class HoodieWriteConfig extends HoodieConfig { public static final ConfigProperty NUM_RETRIES_ON_CONFLICT_FAILURES = ConfigProperty .key("hoodie.write.num.retries.on.conflict.failures") .defaultValue(0) - .sinceVersion("0.13.0") + .markAdvanced() + .sinceVersion("0.14.0") .withDocumentation("Maximum number of times to retry a batch on conflict failure."); public static final ConfigProperty WRITE_SCHEMA_OVERRIDE = ConfigProperty diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 71a38d0c2558..5fb897c67e99 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -25,6 +25,7 @@ import org.apache.hudi.exception.HoodieNotSupportedException; import javax.annotation.concurrent.Immutable; + import java.io.File; import java.io.FileReader; import java.io.IOException; @@ -91,7 +92,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { .key(METADATA_PREFIX + ".log.compaction.enable") .defaultValue("false") .markAdvanced() - .sinceVersion("0.14") + .sinceVersion("0.14.0") .withDocumentation("This configs enables logcompaction for the metadata table."); // Log blocks threshold, after a file slice crosses this threshold log compact operation is scheduled. @@ -281,6 +282,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { public static final ConfigProperty RECORD_INDEX_MAX_PARALLELISM = ConfigProperty .key(METADATA_PREFIX + ".max.init.parallelism") .defaultValue(100000) + .markAdvanced() .sinceVersion("0.14.0") .withDocumentation("Maximum parallelism to use when initializing Record Index."); @@ -309,6 +311,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { public static final ConfigProperty MAX_LOG_FILE_SIZE_BYTES_PROP = ConfigProperty .key(METADATA_PREFIX + ".max.logfile.size") .defaultValue(2 * 1024 * 1024 * 1024L) // 2GB + .markAdvanced() .sinceVersion("0.14.0") .withDocumentation("Maximum size in bytes of a single log file. Larger log files can contain larger log blocks " + "thereby reducing the number of blocks to search for keys"); From 52c42f86a48a8afe22140dbff3c5351f8f02ac44 Mon Sep 17 00:00:00 2001 From: Prashant Wason Date: Thu, 28 Sep 2023 14:24:04 -0700 Subject: [PATCH 008/112] [HUDI-53] Update RFC-8 for Metadata based Record Index (#9775) --- rfc/README.md | 146 +++++++++---------- rfc/rfc-8/metadata_record_index.jpg | Bin 0 -> 42413 bytes rfc/rfc-8/rfc-8.md | 219 ++++++++++++++++++++++++++++ 3 files changed, 292 insertions(+), 73 deletions(-) create mode 100644 rfc/rfc-8/metadata_record_index.jpg create mode 100644 rfc/rfc-8/rfc-8.md diff --git a/rfc/README.md b/rfc/README.md index 0c5475233de3..a43751f98517 100644 --- a/rfc/README.md +++ b/rfc/README.md @@ -34,77 +34,77 @@ The list of all RFCs can be found here. > Older RFC content is still [here](https://cwiki.apache.org/confluence/display/HUDI/RFC+Process). -| RFC Number | Title | Status | -|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| -| 1 | [CSV Source Support for Delta Streamer](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+01+%3A+CSV+Source+Support+for+Delta+Streamer) | `COMPLETED` | -| 2 | [ORC Storage in Hudi](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708439) | `COMPLETED` | -| 3 | [Timeline Service with Incremental File System View Syncing](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708965) | `COMPLETED` | -| 4 | [Faster Hive incremental pull queries](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115513622) | `COMPLETED` | -| 5 | [HUI (Hudi WebUI)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=130027233) | `ABANDONED` | -| 6 | [Add indexing support to the log file](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+06+%3A+Add+indexing+support+to+the+log+file) | `ABANDONED` | -| 7 | [Point in time Time-Travel queries on Hudi table](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+07+%3A+Point+in+time+Time-Travel+queries+on+Hudi+table) | `COMPLETED` | -| 8 | [Record level indexing mechanisms for Hudi datasets](https://cwiki.apache.org/confluence/display/HUDI/RFC-08++Record+level+indexing+mechanisms+for+Hudi+datasets) | `ONGOING` | -| 9 | [Hudi Dataset Snapshot Exporter](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+09+%3A+Hudi+Dataset+Snapshot+Exporter) | `COMPLETED` | -| 10 | [Restructuring and auto-generation of docs](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+10+%3A+Restructuring+and+auto-generation+of+docs) | `COMPLETED` | -| 11 | [Refactor of the configuration framework of hudi project](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+11+%3A+Refactor+of+the+configuration+framework+of+hudi+project) | `ABANDONED` | -| 12 | [Efficient Migration of Large Parquet Tables to Apache Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+12+%3A+Efficient+Migration+of+Large+Parquet+Tables+to+Apache+Hudi) | `COMPLETED` | -| 13 | [Integrate Hudi with Flink](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=141724520) | `COMPLETED` | -| 14 | [JDBC incremental puller](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+14+%3A+JDBC+incremental+puller) | `COMPLETED` | -| 15 | [HUDI File Listing Improvements](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements) | `COMPLETED` | -| 16 | [Abstraction for HoodieInputFormat and RecordReader](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+16+Abstraction+for+HoodieInputFormat+and+RecordReader) | `COMPLETED` | -| 17 | [Abstract common meta sync module support multiple meta service](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+17+Abstract+common+meta+sync+module+support+multiple+meta+service) | `COMPLETED` | -| 18 | [Insert Overwrite API](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+18+Insert+Overwrite+API) | `COMPLETED` | -| 19 | [Clustering data for freshness and query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+19+Clustering+data+for+freshness+and+query+performance) | `COMPLETED` | -| 20 | [handle failed records](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+20+%3A+handle+failed+records) | `ONGOING` | -| 21 | [Allow HoodieRecordKey to be Virtual](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+21+%3A+Allow+HoodieRecordKey+to+be+Virtual) | `COMPLETED` | +| RFC Number | Title | Status | +|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| +| 1 | [CSV Source Support for Delta Streamer](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+01+%3A+CSV+Source+Support+for+Delta+Streamer) | `COMPLETED` | +| 2 | [ORC Storage in Hudi](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708439) | `COMPLETED` | +| 3 | [Timeline Service with Incremental File System View Syncing](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708965) | `COMPLETED` | +| 4 | [Faster Hive incremental pull queries](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115513622) | `COMPLETED` | +| 5 | [HUI (Hudi WebUI)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=130027233) | `ABANDONED` | +| 6 | [Add indexing support to the log file](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+06+%3A+Add+indexing+support+to+the+log+file) | `ABANDONED` | +| 7 | [Point in time Time-Travel queries on Hudi table](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+07+%3A+Point+in+time+Time-Travel+queries+on+Hudi+table) | `COMPLETED` | +| 8 | [Metadata based Record Index](./rfc-8/rfc-8.md) | `COMPLETED` | +| 9 | [Hudi Dataset Snapshot Exporter](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+09+%3A+Hudi+Dataset+Snapshot+Exporter) | `COMPLETED` | +| 10 | [Restructuring and auto-generation of docs](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+10+%3A+Restructuring+and+auto-generation+of+docs) | `COMPLETED` | +| 11 | [Refactor of the configuration framework of hudi project](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+11+%3A+Refactor+of+the+configuration+framework+of+hudi+project) | `ABANDONED` | +| 12 | [Efficient Migration of Large Parquet Tables to Apache Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+12+%3A+Efficient+Migration+of+Large+Parquet+Tables+to+Apache+Hudi) | `COMPLETED` | +| 13 | [Integrate Hudi with Flink](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=141724520) | `COMPLETED` | +| 14 | [JDBC incremental puller](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+14+%3A+JDBC+incremental+puller) | `COMPLETED` | +| 15 | [HUDI File Listing Improvements](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements) | `COMPLETED` | +| 16 | [Abstraction for HoodieInputFormat and RecordReader](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+16+Abstraction+for+HoodieInputFormat+and+RecordReader) | `COMPLETED` | +| 17 | [Abstract common meta sync module support multiple meta service](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+17+Abstract+common+meta+sync+module+support+multiple+meta+service) | `COMPLETED` | +| 18 | [Insert Overwrite API](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+18+Insert+Overwrite+API) | `COMPLETED` | +| 19 | [Clustering data for freshness and query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+19+Clustering+data+for+freshness+and+query+performance) | `COMPLETED` | +| 20 | [handle failed records](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+20+%3A+handle+failed+records) | `ONGOING` | +| 21 | [Allow HoodieRecordKey to be Virtual](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+21+%3A+Allow+HoodieRecordKey+to+be+Virtual) | `COMPLETED` | | 22 | [Snapshot Isolation using Optimistic Concurrency Control for multi-writers](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+22+%3A+Snapshot+Isolation+using+Optimistic+Concurrency+Control+for+multi-writers) | `COMPLETED` | -| 23 | [Hudi Observability metrics collection](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+23+%3A+Hudi+Observability+metrics+collection) | `ABANDONED` | -| 24 | [Hoodie Flink Writer Proposal](https://cwiki.apache.org/confluence/display/HUDI/RFC-24%3A+Hoodie+Flink+Writer+Proposal) | `COMPLETED` | -| 25 | [Spark SQL Extension For Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+25%3A+Spark+SQL+Extension+For+Hudi) | `COMPLETED` | -| 26 | [Optimization For Hudi Table Query](https://cwiki.apache.org/confluence/display/HUDI/RFC-26+Optimization+For+Hudi+Table+Query) | `COMPLETED` | -| 27 | [Data skipping index to improve query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) | `COMPLETED` | -| 28 | [Support Z-order curve](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181307144) | `COMPLETED` | -| 29 | [Hash Index](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+29%3A+Hash+Index) | `COMPLETED` | -| 30 | [Batch operation](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+30%3A+Batch+operation) | `ABANDONED` | -| 31 | [Hive integration Improvement](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+31%3A+Hive+integration+Improvment) | `ONGOING` | -| 32 | [Kafka Connect Sink for Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC-32+Kafka+Connect+Sink+for+Hudi) | `ONGOING` | -| 33 | [Hudi supports more comprehensive Schema Evolution](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+33++Hudi+supports+more+comprehensive+Schema+Evolution) | `COMPLETED` | -| 34 | [Hudi BigQuery Integration](./rfc-34/rfc-34.md) | `COMPLETED` | -| 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly) | `UNDER REVIEW` | -| 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server) | `ONGOING` | -| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `ONGOING` | -| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `COMPLETED` | -| 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `COMPLETED` | -| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `COMPLETED` | -| 41 | [Hudi Snowflake Integration](./rfc-41/rfc-41.md) | `IN PROGRESS` | -| 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `ONGOING` | -| 43 | [Table Management Service](./rfc-43/rfc-43.md) | `IN PROGRESS` | -| 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `COMPLETED` | -| 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `COMPLETED` | -| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `ONGOING` | -| 47 | [Add Call Produce Command for Spark SQL](./rfc-47/rfc-47.md) | `COMPLETED` | -| 48 | [LogCompaction for MOR tables](./rfc-48/rfc-48.md) | `ONGOING` | -| 49 | [Support sync with DataHub](./rfc-49/rfc-49.md) | `COMPLETED` | -| 50 | [Improve Timeline Server](./rfc-50/rfc-50.md) | `IN PROGRESS` | -| 51 | [Change Data Capture](./rfc-51/rfc-51.md) | `ONGOING` | -| 52 | [Introduce Secondary Index to Improve HUDI Query Performance](./rfc-52/rfc-52.md) | `ONGOING` | -| 53 | [Use Lock-Free Message Queue Improving Hoodie Writing Efficiency](./rfc-53/rfc-53.md) | `COMPLETED` | -| 54 | [New Table APIs and Streamline Hudi Configs](./rfc-54/rfc-54.md) | `UNDER REVIEW` | -| 55 | [Improve Hive/Meta sync class design and hierarchies](./rfc-55/rfc-55.md) | `COMPLETED` | -| 56 | [Early Conflict Detection For Multi-Writer](./rfc-56/rfc-56.md) | `COMPLETED` | -| 57 | [DeltaStreamer Protobuf Support](./rfc-57/rfc-57.md) | `COMPLETED` | -| 58 | [Integrate column stats index with all query engines](./rfc-58/rfc-58.md) | `UNDER REVIEW` | -| 59 | [Multiple event_time Fields Latest Verification in a Single Table](./rfc-59/rfc-59.md) | `UNDER REVIEW` | -| 60 | [Federated Storage Layer](./rfc-60/rfc-60.md) | `IN PROGRESS` | -| 61 | [Snapshot view management](./rfc-61/rfc-61.md) | `UNDER REVIEW` | -| 62 | [Diagnostic Reporter](./rfc-62/rfc-62.md) | `UNDER REVIEW` | -| 63 | [Index on Function and Logical Partitioning](./rfc-63/rfc-63.md) | `UNDER REVIEW` | -| 64 | [New Hudi Table Spec API for Query Integrations](./rfc-64/rfc-64.md) | `UNDER REVIEW` | -| 65 | [Partition TTL Management](./rfc-65/rfc-65.md) | `UNDER REVIEW` | -| 66 | [Lockless Multi-Writer Support](./rfc-66/rfc-66.md) | `UNDER REVIEW` | -| 67 | [Hudi Bundle Standards](./rfc-67/rfc-67.md) | `UNDER REVIEW` | -| 68 | [A More Effective HoodieMergeHandler for COW Table with Parquet](./rfc-68/rfc-68.md) | `UNDER REVIEW` | -| 69 | [Hudi 1.x](./rfc-69/rfc-69.md) | `UNDER REVIEW` | -| 70 | [Hudi Reverse Streamer](./rfc/rfc-70/rfc-70.md) | `UNDER REVIEW` | -| 71 | [Enhance OCC conflict detection](./rfc/rfc-71/rfc-71.md) | `UNDER REVIEW` | -| 72 | [Redesign Hudi-Spark Integration](./rfc/rfc-72/rfc-72.md) | `ONGOING` | \ No newline at end of file +| 23 | [Hudi Observability metrics collection](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+23+%3A+Hudi+Observability+metrics+collection) | `ABANDONED` | +| 24 | [Hoodie Flink Writer Proposal](https://cwiki.apache.org/confluence/display/HUDI/RFC-24%3A+Hoodie+Flink+Writer+Proposal) | `COMPLETED` | +| 25 | [Spark SQL Extension For Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+25%3A+Spark+SQL+Extension+For+Hudi) | `COMPLETED` | +| 26 | [Optimization For Hudi Table Query](https://cwiki.apache.org/confluence/display/HUDI/RFC-26+Optimization+For+Hudi+Table+Query) | `COMPLETED` | +| 27 | [Data skipping index to improve query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) | `COMPLETED` | +| 28 | [Support Z-order curve](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181307144) | `COMPLETED` | +| 29 | [Hash Index](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+29%3A+Hash+Index) | `COMPLETED` | +| 30 | [Batch operation](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+30%3A+Batch+operation) | `ABANDONED` | +| 31 | [Hive integration Improvement](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+31%3A+Hive+integration+Improvment) | `ONGOING` | +| 32 | [Kafka Connect Sink for Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC-32+Kafka+Connect+Sink+for+Hudi) | `ONGOING` | +| 33 | [Hudi supports more comprehensive Schema Evolution](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+33++Hudi+supports+more+comprehensive+Schema+Evolution) | `COMPLETED` | +| 34 | [Hudi BigQuery Integration](./rfc-34/rfc-34.md) | `COMPLETED` | +| 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly) | `UNDER REVIEW` | +| 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server) | `ONGOING` | +| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `ONGOING` | +| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `COMPLETED` | +| 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `COMPLETED` | +| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `COMPLETED` | +| 41 | [Hudi Snowflake Integration](./rfc-41/rfc-41.md) | `IN PROGRESS` | +| 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `ONGOING` | +| 43 | [Table Management Service](./rfc-43/rfc-43.md) | `IN PROGRESS` | +| 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `COMPLETED` | +| 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `COMPLETED` | +| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `ONGOING` | +| 47 | [Add Call Produce Command for Spark SQL](./rfc-47/rfc-47.md) | `COMPLETED` | +| 48 | [LogCompaction for MOR tables](./rfc-48/rfc-48.md) | `ONGOING` | +| 49 | [Support sync with DataHub](./rfc-49/rfc-49.md) | `COMPLETED` | +| 50 | [Improve Timeline Server](./rfc-50/rfc-50.md) | `IN PROGRESS` | +| 51 | [Change Data Capture](./rfc-51/rfc-51.md) | `ONGOING` | +| 52 | [Introduce Secondary Index to Improve HUDI Query Performance](./rfc-52/rfc-52.md) | `ONGOING` | +| 53 | [Use Lock-Free Message Queue Improving Hoodie Writing Efficiency](./rfc-53/rfc-53.md) | `COMPLETED` | +| 54 | [New Table APIs and Streamline Hudi Configs](./rfc-54/rfc-54.md) | `UNDER REVIEW` | +| 55 | [Improve Hive/Meta sync class design and hierarchies](./rfc-55/rfc-55.md) | `COMPLETED` | +| 56 | [Early Conflict Detection For Multi-Writer](./rfc-56/rfc-56.md) | `COMPLETED` | +| 57 | [DeltaStreamer Protobuf Support](./rfc-57/rfc-57.md) | `COMPLETED` | +| 58 | [Integrate column stats index with all query engines](./rfc-58/rfc-58.md) | `UNDER REVIEW` | +| 59 | [Multiple event_time Fields Latest Verification in a Single Table](./rfc-59/rfc-59.md) | `UNDER REVIEW` | +| 60 | [Federated Storage Layer](./rfc-60/rfc-60.md) | `IN PROGRESS` | +| 61 | [Snapshot view management](./rfc-61/rfc-61.md) | `UNDER REVIEW` | +| 62 | [Diagnostic Reporter](./rfc-62/rfc-62.md) | `UNDER REVIEW` | +| 63 | [Index on Function and Logical Partitioning](./rfc-63/rfc-63.md) | `UNDER REVIEW` | +| 64 | [New Hudi Table Spec API for Query Integrations](./rfc-64/rfc-64.md) | `UNDER REVIEW` | +| 65 | [Partition TTL Management](./rfc-65/rfc-65.md) | `UNDER REVIEW` | +| 66 | [Lockless Multi-Writer Support](./rfc-66/rfc-66.md) | `UNDER REVIEW` | +| 67 | [Hudi Bundle Standards](./rfc-67/rfc-67.md) | `UNDER REVIEW` | +| 68 | [A More Effective HoodieMergeHandler for COW Table with Parquet](./rfc-68/rfc-68.md) | `UNDER REVIEW` | +| 69 | [Hudi 1.x](./rfc-69/rfc-69.md) | `UNDER REVIEW` | +| 70 | [Hudi Reverse Streamer](./rfc/rfc-70/rfc-70.md) | `UNDER REVIEW` | +| 71 | [Enhance OCC conflict detection](./rfc/rfc-71/rfc-71.md) | `UNDER REVIEW` | +| 72 | [Redesign Hudi-Spark Integration](./rfc/rfc-72/rfc-72.md) | `ONGOING` | diff --git a/rfc/rfc-8/metadata_record_index.jpg b/rfc/rfc-8/metadata_record_index.jpg new file mode 100644 index 0000000000000000000000000000000000000000..52083e81728f791b23ddf3ceb41fae717b042f74 GIT binary patch literal 42413 zcmeFZWmH^Uw0KqM|6>h;D0we@?E8HD|6WkpN*HA%1 zf_1&`>GR!tzWb;9j?>*|^!Rp-HTY4R&3yJ+bFKNz`Ai-b9yWj{%JNF`0Lmi(KtX{Cxrc{X%&JLPdLwj)94Vjl7}$3GfI71bT!DLPJAEMc(a;{2o9h zKqGw0E&Z5C-58z0iI^uaAqRs|rn;L%WAcoN*Tgvp6N{9LoPv`184K(47kvBzf_}-)d@ULv(cYOwG(KEUm03H!@?sX6O%qAr=))Vl9rp7 zUr<<7TvA$7TUX!E*woz8^R2h9e_(KEcxrlPc5Z%QacL92wY{^uw|{Vm_<4SDdG+i1 z_n*ITp&)Vo`}J?Y{s|WW64xVCR1hlqU${^nc_0Uf02S>i_hUk7b#!AVA_ks73}Tst zoa$~&MqZ6G5)W|-p9Yz z$A6wT>>mH%Up69MI`Ask&G5jTP44Tdhb46lPKvPww?I${xGxTnZu%?hr`mjWtp4w3}eb&<^3&L@ahM~R&+>iXfAQsJw_Zb~8Ka;; z6$K+9Cq$S=UQTVw$FpEy6ZYu5mom8^`LyVLqrN^X>?6*$Wj5_@fXE*GlqvsWW&-Kw3UUCuC9`28I_{Z(E2z5V(D5$H1iBQCzMi?~` zxKw6CH$oJJiE;6%9qF~Vr`N;`Q6=icy#o|!_ac4wqHZOP?TS`b4jT>}imUW>YhF7i zMH`3F++-Ok1IxL!9Jg}$tG11l%huVX*&?khZ)NVpuhoZkKz;OR4{&5`S^y}|S zn%=xA@#5fhj~>MALJv+D+JgaJXe2xCYrKfcFfo@rD_X9`$-Tze5hkL8)^2(7qt*t+ zlXc-OlG*iJ#WVNR(vC0QaBLw5f-4b1MfnR05b$RkAw{D?5?l_Q{+3DCq{DAuxHcyr ze1cV>I!f!*^xkEh{ODbKW8Vqk4IHg93}zWvZrP{}?$PIYvm+y47R!U@?CY(Do~LeT ze1NGd~&(^cSxYW|-)on%Dyail`#xb9o-*lPB3iZr|-Lh!d$aen{(Jsj^=qPy} zd%!~IkPtdLgom!CL?>AP7n-pnL$R~QFo(GT?YR@Xig5H{6Wvd=Ujxt`wj!H-;W79s zbv3PRvmCh%9d5BpCcl+*o5_v)#s8>1t|tl3xt{*!$K0AZFF$)y$r4}>-C_|Mf&=T{lFtVbhjXhWGnEtS9`zd;;IE*`4c#sN3vCDvmv7pdW_JDyg(a4`*@q|cLZHUC>bF|gCFM|6gWBl})Y@d+?B-_AJg$Hr z^-e+D55PPz%CV+4Y3G(C9@J1Vcdp*nTiAj^aV4?WYvYmBuY-3FK^%~Ix--*mH z*miFwAKNZMLY5AplrY07H9(yP{III9Rl5${RN+#s?P#An$!%;`HVo6Mw|kDl;ns=Kw1qB?=QLm_2YgUBwDd&0nY-yXB$;mGFV;0&IOGMB zY%X~eP82xeVePRL@3x4`!X<<2U|JU=Kc48xLSw48nBrshzduH~9QykOj^D~5RKXKs zZUs;;UHT_)V9Yzq!VFK;K6e4mIwyM*_xgGRy^@$#lnK-I;?8qxCr&Ml&5@Rc$Dp|W z01=xPLM)+&Yobw}8an1`h{0&AnuK_18flWP4n~AR9~Yjj7v~oj!BAQii|yrQKK1W1 zPc(gg_oI)AfYt)U(<0$H2p1?A93%1cNKd?@fG^sF)X-QvE}ACa{wUKaGH+`jwo=8=sro7;THGC)f{@21N)%Bxq`(jF%qDX_g&F+JId zGe;M6Yv)on1YhJYvEw-5^HtV&{e&B6qT%VRYAAyAp&gRo?Cbe&ek_?fd`KTUwF=#f z!NftYz(9?2>T^&-kt6^OYknJ@&UN8D{v(S z)aCwb4Tz-WUq}oLbsk*k#%(p@ zvZD~bq+|XfFly-6j2i`L9VT0t(sC^buf)(fU>N9aaVhC&40H0%*ClgJPgDhh&}q28 zp%}@bX)%=ssJ_Fi3nUhPpXYSwGz$3q+SIO|_knEm)gc6zIm?N@>w9xx$X>Yu=&{yf z-2w9Qj^2NwD9>84BmZDB#0pp_ zPJ*ecwfg<)_|#j=>a4~(-6De&rUJwi^fJH6T*qyy_!~_XRG-aX$#mOMPo(No_?eK* zSmL079)QIydW?ED$w&$MNscvqT!Y1O_q`GB{HbE9FPeet$jJ?ypBRf`Aa=tkDmIR{ z!U%kXrWRmoFVqJ>J;KFA^snryMEHX+*QVy+M}yH0$wKZd)D4)c*OYmc(6F9+21M4k zFJipFqMa|XbBq4xIeJ}zUL}C=-t;f&1Ei0w1)gc17^N1`u!6pE z)-6ZH(fMdk*M&EX3nbH}63bF8=E_KcTFvgCHRBW8-cxj3v*cOLk**_ztDyHYL5s<- z1#^Rp>bNiAKgJXaeC0`GNwGk*Mbv2yNo1op`M+cJcc`+#i}>b()Rymd9;4b_scLc(Ofu zf|SNQS7T!g9{?ux?>Xi%)S!5B%QTJ1Uc&5zi3H?q1#grMQSt< zu2yxkH1d2X034UldoqY*=5ywDI%ew)WYE}fT9OA-hqoxyfJKM#^{2X+tk zdz%(lnh|8ve^is|5;~BTqpGQ8sO5U0I7Fum{O&vHm_93lyFVoq^Ey)O64bf(C z`s2)v<&;=^73}pW)rG~JOda>$FtVaXKE-<}wI_bGDnpNo_D)bewk`@sv9yem)u`oB zU|azJl}{~)di2mfL(F!@!u+SX?ANR)ZB1F0U}GnxVdyjp+1vge(JYgdFh8%`7^}?JWHVU_lvwd-hNi;(n6n@p>zbLaU!b7GH)}idNmu($3>X z`A{B?vGdcFKT-S;Z*G-x+p>c^u0Q{pg;EwhDmy|(jZQh69tDU&;Iwgforpl!?AFY^o^vMCyY{K%2VfwelAGa&?zJrpn(_ebrk)5;nyKtp z4Fvzb{dlw6(X`asXh+pzNj>dTe08$W>XL3637i>`inXxx`&!&+&b^<6Ik4xEdU$wQ zE;A-PVzrtvvlGU>yp?z6&v=0=xnUUNLY)c`u&vMk_`pc*v&mDG2n^Nc zpK+-N2X8RS(pf2v0_Ot6=-$rX3c|Uf{6H;dD>f7IMmg+XQfk~6c|&pD zk||$58-KMLq74lYBhqZ2a?K*U<({kef6muG8Ia}?-csv{%IS!w$lj@(2Ost2a%giY zYD=M{9{efO@Y31@oB>9uI(do?+#LsY36Hy6fMtnF%jBbpccj%EeT%u}&CYRbjHoH&}mZP(Id?Fj&m(a^NQ znWY(Xkd6-h73U*IliIt_ZqGbj$3*GmHdn#-qi&Q_|4!3$i^eN>%-7Lp!{v+!(ulJ3 z3L$~>;VZQ{8Blqa+2*y|Q3TzJFLBj=;hZ16aihM|@A8w2g(bEyKuqLDYf?-X!PZp` zm03*jKjq}S&XrThJll{-FS1Zo$#T0yhfWL_$k{s^+klfDMU%$fYd;lgJk|!Yl z-x%b-USeszO@b=!UfVKL#P$ctMe@*^mA=3_QPcBmKLlSwa zq;FG5#f%{|t{y7~CJXnG%eQSY2#c_DopvZ~l|}2%9KZPzl!CGOCkW*mj_rN7N>i$Y0SU`t}{BVm_rV2VcDn_SOuR0vdxkv z)XFJjpQ94QD|l_*KR-x|&DmJh+MLZC>seQAN`pR??$}Jm)Z+9zbQ%P)6gnA5hIMZ1 zPIVrQ(cJ-RkCRD8jzqE|IqHQOra4?OIRJwgPd|iGx=S*K)ZSX%1vX6kv|ouIT6cYl zn$KF*WSwM>uA${Q1|28>tVW!0sKgHP;=EB+jvw!e1LJQ`G$6Rea@H`PchmH)p$I;h zi1n%tI_WuI!f&$&!0oJD!3SrS&7b_bIR3T4Y*hTK#*8y{SlUp-^%M2G6% zM%D{Hm7=18rZEe9$e7%!vzE8+(J6?hN;~d5RiHQuZ9NZ|`fFPwWT{WzdJN;(_o&y0 zhVbB?^lYubH}61@j50M`o4-ZVSmFWT@fke1l~e4AmB;;Xu=d;t%bJ#{NoOv(7)%kFt82Be!|h4Sdwpsl`PCekTfm4`*lN-nm;VyLR5NFmFV4X?eI*u_HgJrs5E1XAwXQt-fmGn z1v)(Fo`d&tV8SaC7i=iJQMPYpa7mIs+B$5wMEw>lw;n zx0Ic&Mjtf0!(}i|C^dl;4UA;yK(8{2Wo9Gte)`*xP4Dj*+hQfA0Ml#Dew4knmSNz9 zFz3^dEhUQ>O4)jn&ySF(W|I4F)u&T_b%}gBu&Q0t&2DaR{F)z;5KJmNLk*e=5Pk6i zTXe)uW{Ry{XgwL1EJKCdChho+`aJ%Aw53?!`EY)M+PH*?6?$T}eu3!$&=7A>VD;e~3n8 zkGvqwD2V=vBE;-09ygcWJlSSmeyE4~r+d;4Ru|RRoZihBZI2;uu*Lew)MHin(370v z#{q@bWWo$jRksEYMm84Ra*O00+6Vu$r;`26qpCeAe$Npf^y<5-JEkOS3aiX<+Z$U; zR+`A^SdHVIdG-CzBCZ|oL+;of03Z)}tWMU|)J>oCwI`PgOav*$lJ~H(RDvRb^WV4F zm8!Yc1S=~CjZ3k?t`C5Tchxw}YPCVD1(hFUIabPElh#G=o_5NOMpCnFj^EpkNU4tW z{+)XYYd$@?ixN{G@l`+iNUSRV!rNz|Y~f5n&?y}=jX0`eb8|-2Ctg9KYUp2l1gGpa z=K1doMVksH%gs{;l;a6@1~B7CIq_vcRR*gsllTAM_=@6GYr<4-P_^9;%&P{j+#+6i zt7G_krOz3!k**3VPwIE?FUmoR;}qhx zkh(HR_2e;(`Gr}~n$YJ;87(QHD|lf;ePc}{*jtAo2=%7AH&Dy_4E<3-HVy~4+mB+7 zEAyAeL^ zd)aE8?86sIQJqU)0ppj0a!qN%j0>v2(%A@(B6r2OY~ia4J%8rD6w+{f;Nqv+mz()1 zlrpOa!T*RQo2f+ia{Jt{%%f=0s1DcD(!p^;k@@2&VROuY{JU6;OA;p13B4RJlraRG zjExcvEn-N_<5_^%&+{%wgWg@tYV6niBW-~tGd&;DKjk5Q!>0Fqzp1ICEd!uO>I=6L za~o4;^-aywdX@-wy&)@)T#Rm+K{D%ZDn$^JDFs72Pb9F@rj%fO|H}9TZiZK>iyF;b| z)D3y!Tk*bL;cTH^lnvaN-e^oe8n}@ha;P`!{JM*1w6tq?$@J0=Ax8Or*m%z}?ZaKX z8qPQ5#@E!Bt{$OF%g7LdBl7zrq0pAl9z{hOa_=24Z^JHGhC+Il^v5kMr`+Ni$GVqo z6*{zgZkstG$cjP1qjH{Sr+MB0 z!Eb*^tLehYG&-j*#9z#j>+#i3=jqZy#0?6#OVl1qX&q(SvZbU$~$82)J=o&K0Y30-I-_HzG14ISHhbDeM=~CFm=^8q{S>KSnJ}f%& z=)0Qz*c(MqRd>Iwe-EYjK-lfm`~xxvq7~g`CJnw<$xL{?0Z!N2tHdaKUJ3#ADw40x zGCnnu+C57sRq=FGUkFv;eL}ReeUI?kaP+J&x&N~zf2JBy2ct5a#EYUbl<&0SVcu!w zHkrT(M~Sd_s?YcX6l^%J`*UAJY$@`P=RknR=sB9b-YQOZ?B@^2yi67@)aHI(XW}v< zS_!u^bZa_4B7zXpgOQdy+F0=@{i;mFBWS}~OpdnM&fM3s4+Aa~|EPu;{GK9Jp9K8o}xhKR`c=Ow>9S_b& zV_)~vKKD;DpeXqx2ptAUeCxU=o%&(YUUhb#VdqBh-MQ~k+4FuO$rz>Q2-Pd?f*sDd zyS%H=`*A$;pRXyXeAak7scIXe;g!}c{a?#vET$`ArwMP!`{+%ICd1TxV+^vfaPMGu z^>~x!cgfpye2Mq|8JPkP0N!^?;XVF91h0m#U>9YXwA+<;bMBKat*chvm+x47HZqgv zwQOM$!&iz-t+kC!iL1ARdOFG9Rql3}9DVx$O1~`20v%aXV+We{a<&Gcymv3+&PR3s zJ1hSed;S-D{;$ZMF?KWAT-W(ZC@rZn0w`{6q@;o1c-uq-e&_|Jn`j&%G#Bjs728&t zQraspyODx99n@_xNYM`kH0f|SDmZRAMB8}v7gT-umdxuGS8?+Qc1q9_Rst7ZX5&po z?p$W5_K_~7Cs?Pnpr!t>cUlgL@=Q~_ zt?_TU)@JN@Y?Is>?PK7ZcbK+Xm{f2Vrqa|DlamM`TxELG9ZArWj^mOZ?}hW1$l;xP z9y~@HP9w1=7pGD`ZGSxX;|8q#^@G148@U#JO7{>`06NU1sJC546+Pn(Ha1o3b$!8< zF2^1R>ah-h=*+?4PM<(8nAXtQLG3@>Aj~fr!#HI7UTKMlzYg2Db|amfg2W=yL?NGh zyHHo&cvZdqd6ROYD^i%}FA?*8iN8KM@&V{Ed!Hbv6k$emTfQJp9;D;TEk)a3(?2C< zb7;WbEWP70%~&tecIr>kw|VlmAChFP*1nUrz)0o(v|=YgAUFLCFb!~;PD-0FB7i2z z&DMSOC(^UdGMjyN!|tLZx{EoSqUrbmK&c6uOp#SKQDwI8aj`!75`ksVplRhP`HTWp z$HW2l8qY>d?D)QyFFRR(q{l<4Aqpk7eotL&B_eEd+cWxVUgO>!eWT|X6DWPz&pqaA z1uD(0_7V4l87c_R8Bf8@8Aoo=re9<1i8j92s<%Vd#z}DJm;8nV7j7%-jwK)&8DFdW zk6n;uDL$jcFH0yMC|`8g6n1gDG6UlDzA(qCtm&7Xn851xP~9ai;SzWe$j}xW>Y&jv zdGR$W2HeX=x%%Q|mj4$CGxvLjd}9U1#|^GOh$}<*;t1cdJ0eSw@R2C6d8RtCiTv`p zs6SQ!Cd)EXLtyxYx_Xp6@j1!Fg}E>~qJm8d0qYxxfKRxIW{{rh7t`9>W;}B~_|(7c z#*{21j4nh^n>=>S)=Q=*`Arg6=OAVB-C;|?c~-#xz~@(T$h6)+PN}qz!7&i zJrQH*ApKnBj4qb)TbCD2_E}Lxg-_WlRgtw`F^9m}H;4o&+T2EgSEanRC-~AK1~Z-D<7a);#T}!=Fi|wqB{d26#7$ z^)-TxYovsO_68r-LX*kUxj4oE3L2M8ww$S3^-3E(&cZVDg2MVj8(edd5og45$D@Wz zyMjOR_scK0#$z4%c;%vk@<-8g)fi-Ye5)B$}cl(4ghI9?>21(A=LPKll zktkP&!{zzgrwXkhuy8j~?C|$#*hkhT>2NY7t&d@MiTIKsdX*yASsSa*wt7S|VU`69 zHgl1jxtX)dO3jMQu`BlUmK9c@NvjIODTgNt)+B2g{>E?)3At4&#aBc;e9^Lm>Ar&vq+`Fq%QufH3N9v&8BhF-o_M1Q|jwlHDN-(otX4^bC{l zNZ*22&Vfm_P4QR@Oe#hV0GIs6uBwE#OIHb=aHeTH4!$ocJ+kJ!!}cson%wMV^-?BM z+w-<^fa8@PYMN>&#E^J~(^S{eb4BH`u3Fs*yJb9PH(SE7Up?Ab%oc~G#ste8UX-0( zz9Gl=UCew_M;V?U6jDS~9S7rY2fN1o6^ARLV{?*2u@d6t0JZzoNu;8b%@ipJ!Mx`m zv+*OYMW{xZ%vRvWTqWEk^9QrqowTpM+3D)~EsGPJkhYA>l&XI7zu-DBx<;eB){ZGi zF3hs$IEEQFd?bA3*iRrW4y9H@&l=w_@bV>6jNRQ+>6me)D!L(B4TYYbM4l|aT>jay z;tX=mK2MuJyGklP_*1Z3UAAnMrjp(GbE~de?kOOeru2-psNto=J7>>|o-cgd@1&WC z`V+q~e1fjZvOOoK51t`d3J@FF>_~EW++2BxSncDCF=cNM(F};=6nY(M?dbJ{Cg}9w zW2Mx8#3$zE`$@!qfuXh$%n>SCTdvHwU~FTEeKuC@^Q$E##Mr}i<8hi5$>)vtW?9CNx(p)l zgq`8MQdMJ3;L6}+H@O_U6h+sM0Ld4fa)1-rvt{|~R3+8+P5 zO{9bq!oBHjM0zMeHdi(RrhhBsFr{eE0mb6R#c8TG;Y}*l0 z@6aG=_Gebf-|aTFA0j#Uq=wD`S0U1FMW}|kwX+@DyKafO^f@1Y&HF^ptQ zTC2lJ3G$u2A*X;=B&Xq zv@u0`bC19{^#g7FR|v^)H#=G*f@i7L<^}Jt+f#VOU@jshSbpXPk%C0As^=AJkpSGz zhpt%q?3dBmjf#!^C20q@ki|lWi?GUoy}TgK23>!hYTyOc4I2u~YRUIu)A8w1B;r%Zg6n#fB{@rhq2w~%Qp#si$;ErH zZyEFLx?oJJsNGTe`BWWiHlC$cCqm-uW{mu?bRoE;pr0KctvraJAZ9h5|4yGUKrtXR z>NubJY@MgKEgLG=mn^z*K+_%FWZ&^?LX9L;J&L z`l@)>DL1`;VjIgc1W;V^1+s8qbrtvOYdWMR!S2q^QZ%7>y!!V~CQrpAH*r}MTcMfo zu|ZwZA ziN6)uoagq5xQ;97 zf9I#3-yS0xuiZ_B$A!!BBhr!%I)GU{?8@5J(I=sgD(e@&1Z|4$ptdHLyT=065r} zkI%R6o10k&*1hi7a*=bKjG)c%M&K*Rd)l#3+8tY9FQlQbO)bR(?*+}+d{=^l4eDJe z`(g_7wlWx&{>)K*^_Z&sCC6txbAL5mET-9c79}1xnqxDc<}RY&>YKZMu+uyoXJ(#_ z7a6(dU+;D&81q)1ECH`kY@9K4fH!FP2cfMYpJ(dRy3< zB6n-*a?HXKzRI`rY}_=q%qcOK%?z(wor9$|p4DXbs+U%)0Y?E{MAs16e|!Gle?!AS zW0*#J{dJ;aVN2#4wA8Yy{BbhVMoZ8 zg{Ny|efDbp{E&>jns2PNtqJlp97JJyC{)yqp!9t!H`SkP>hP?w*l^@Mz2`4!;)F+L z&W93aSb{jtvdC&YC0kfK!ZKV;xbP6aV*l!VMSfzylsP)Yq%~1{sU&nr29I~fG(n>e zPDfx(d75er*Eir>KCGDNb}Fj9-+f;YB;=PJ;P{O?_7lqDw4u({3z}JQMk)E0(*?=y zoa7U?-LstsAkyJtRYII_LX|vLn0iI^g`&DUqVD+zE@5HR8On>b=l)NithRG$HnPtu zYS?rYw|nk2vs!)Ey@ULW<658xQnUpsNP_YTS@L6ae0?7G&g*U2_gj6qieyg~F^8MD zvJ9iaW3yQPo2fy|I2+bWNx15x0CAqk&C59Q!xp@_0?q=v!sR2c@?D{vl#PP+n!u(n zOycg*mgGZ1wDoW12h!3j%MPsXVv21^TfA*Z6RoFHgElocGC*HK$?oxA)tk%MPmDscjBVLtT-@s|N%wU2ndN2*dSv-ZbkNsuVj*bQZw zDD2QL0vm43ko3K1FSWnlYgD5V5f@ci=lfQ_ZpyW@>Y$N~ppHiwuAd*Vz%3*6HZayD z5nnnIxUgZ1o$*mGU6CzX*{?omXt7&OTYUhmZKmzwc6Us%@(CA&nc{)Va?R`1Vx^7q zl5S&D4}a3S7BF+$PIg8VKr49hG?X_DxM;BfQ)ho(efLN+S-6`<&NKP=o`>$qEgfF& zl87>5jaR|uocuHJHDX*F%g@MpwTwk`#$j)=oJzi-rrPNGbu{6#18(*p|KzXec~8Lj zg{+{3Ih`ei7z1#jAgfq~mzjGJBx4-j)FxgE9sX(ef~>7pAJx>pmw%<^+05Ha0LKwe z#Mwm;8_q>&)zXmU0Jl})Jx=d;0SZET3Q|U6E$5kl7>}}?;CFbU zXRW6=T~~5SsI!xzDzN#~nY{2Z*$&NlB0eZ`|B}O@m($L2lEW2J+ti>G*x3#>Lu>Az zl_&2P9)|&Ln%Z1pmHcIcdm`UO-o4KE^nc%C9Z0?BzVpr}Tn)sfNOIgOoCtO*5tl=Ml;=w9rNu(ckm;S<&)0H6g_G@QPe=S4Z4HqjoO& zV+K5*S7P%Mi_q!!P(f48>uAuRBZX_r%`zIECAJKRW=CsdgVIX5b;Pcv2V?q+*x)27 z+6s#ZGwv@>gba|<;gHLL0(NJn>hgq^mY0L(!l;mxydw?-qn?31_N%72p5rp+JnmSn zbDdL^=C_H`6ym&@)*T?kHytRxY#l8zf`2^ODq>H6AuQ%hq-2eU6*Qy0yP8a48C@P` z^JELh7rkU-rhY3Et0R;S^%@6kOnu@~zLIYzAS<gtfZLb|+2*WOUn zqf2`YR#!D>-2JzYXC{|k*Z97{s5}u9ODR|VJlFCuQSq^4$m{pCzDMQRkMa`Xbe2!zUVNecMg3t!VW&5k~b+);yY>BUoA0P6u*#jn~6(ziL-)gaCo&BiT2HAj9-d|F@ z(Cz$Whw>F#{GT$nx2G6o$m*2nU2v)O%Kp;vn$Cf33CN9ZLYP7NlKtCK+_j>^#XWyJ z-YzkXeL1fO{!GXt>FYvtn5(85nz*R;bUhSj$?dXz=KW?g&*LRuJP|rm2pgOF*<3>D z7^nS*S^KV@RjK~x^9Kx1z8$0$+-1Nycr!$`qtrjTHqH{+-l0y$N^U3)tOba?V9P*; z>ecKW1|DzPrmE+x$wnLh{4R;lySuAzwsUjvwofHZ>J!7`sd=@T99B(D38XdoTHb@{ zLM4xByl=NE;MEnk;xy|~uB$#PJVT}qeArJ6sg)X;G8dMmsJX&#<)CSWwUp4FUF=qE zNwet-U(pkpmt>6NrbIrydyI~NRun#=tsiIVH$8)fY}aPa3**xDmF3>@li_T|UqNK- zN-XFBXf|;u2Hl)62XnY+lmUxYbILA!kz-<*QuQ*BSsdjLp(thV9G|T}rrc(++*OIC zK$g{2^^tpnpVx=2UvFQ5933$v%9a8I+o*gi1S7;$^KC^wcNz$Q-5Nj1BqYYs9#^SJ z*nc@m=mJHCB3q2W7Jn&M_2cfylZ`eDvz*hyBHSX(J2KZbTEe@Xe94m8HyKg#7X$0T zM0M{S(QL1Sdj!)e!;ddV1(AC5wuT1p_Zo}OaLT*m@CS^g2%%eg$Q)~w&_#c&=$-OC zjRa=C!3egZ0D?`Sn`YAX4FZT8viK{p{+Yf*a;oD!* zf6W-M>o;8jP3rZ><@cD`sZaaa4~Hn4M3**2=nk&r-~R3%a|HVFRP>BUZRcMO73v!* zkX-(XIK5b~PeYZoXpF8k;{+_Qny6~i&(qdafgwB7&G6&;lJ_kWBzckkwPg{e=2*kS zsuTcDaG!`#*xY#0yPmX|UO~IEk+Nlj{yg8dvEB_({78^AdNCi0j2-yhS!1|=^=W;C z8`$YQLmljyoMe}U)Z9Fj-jxw z=0;$XXlffTvvKK2jx4`r63_C7gezaUpS*aw{~aB;u_PI^ zA){qI%S>v6*$LVceHwwgPj5@q?*$v?o=82gRMB@GxqlgRSM z@5QGW5vX=?jPSr}b0C&devP3Op@m99jq`+io+VAp)5X^sLDY3_z2tutGF6e&us){K z=y$+-E(ii;^|YL@NymZb++cE+)>N8l!e6j9MFS!FA~8jU4;--`geu zJa%mgmC%YC>8Yn_29IrHp4Zbz&jA)1)CxP<@lTpRq{qMEerqF`$uaf#%jP1#x2J=f ziri+ytXRBR?4l`W-!zKY8U{JJxKrAE>zU%)f_M9WV56aVh09-cPV?2B_Fg$9e-d2V zemo|RtK|@uFk)h8U~*DF-Vh^YlS9n&o$Nh|#OY`!=eRPE<5r4n*_x}U^Ug3-c|(=) zZ1-wx7ix-JoAf+IHX-JsffO|IYy!CYeGMIW6PLobQYFAtui{PhG-W@t-{47#|&qG1#;6B5<3)s0$@i;5qG%Nx-A# z7+TProAaCDu1bnC#~gHT-YDxwP*bi| z!!a+ZKEurGFsj%IKKi&~Ia38EWjc$PlnrcPFPcst^K%isTR$KrBN#`kOW9{Or5Hv} zEkhZQ!W_^8PX9rW{}Xii|I*|AOON-Ts>h>93xX!NRi60ct?C@=`Sv~%tZ8Ysnxs+c zO=u~@a|z;JhUuJngjKj=Q|-;LMA|86v?u=A@4|>NI1?!JTzV9kGz2@c)%Rv|ZLAMs zrB`jGRwRLR0*){AfS#E)vds*W-#^?quzF4px^Vd>*tom8h9r6_two-tW-ql@BpExH zgPIfaf?h`I1n>r~1Abb5$aFKON<$37eJCNXJ`OT%L4@;BKO!$t{Nn@Q*;7s1RAJh4 z)xk02!v(WWGoG63IalxaXrRg%VW(B#crBGLyq$24r^Am{tFj1Nayt= z+vgQm^TCZcbq#4dv{K^6AR)DC=jv=Vo%EyuXDiS+ml&G2dF8_D!a>WMdQ-bXxuR}| z(m}{^q2!io0ckvofm3anDf$><5SIY((dSzSvrel%7b7FD*_KChO( zV>33z%#U&sgue?89z)qXuawUX5W9Z>y1YVb?pae#cbAq7bU&CkSb-%I_^{LCrG|fi zX!EAZ8zSKTPY##FIv7+qr$&^U$h_GtJoR?nG>C79TZ!#isI&GA+KEwmm#!t%f;Ue) zH?PuWRu4IROdLn6eRiF{#;!!$Q*=qjSY?6s1+TP0dXsfGMb!S8aE&A`BzOe~iOoQ=@P1U(!Q{klh`b*FOq{gQ z_~h5|RYH%1<#~eyLD5^$7VVUgz?P3>jxE7;pc~trr0f*TI3y2jAi|%p<1)MSmNFSI zLo;mNt*JxmY>y}{KekDd)Q_fKorZaee`o#@O;}#}Y=5ZB_*1+~EXEJRIAkrcA(IBd z@hjqz#sMPo;nAL=odlJ>wZUy(TNQiX|~nWHDt;nLnS3MiXx4!i^$fsQQDf0v$bVm+MCb zp9AXkSLMn2lPCMt_1m^r!3>7_wWJ(CcmR2d2}zB_nGpYpTNY<<-e_?}9%qs%^Urwg z9oWP$#zAPm73j;^d}!r~TMWL==id0`YkP4K#m@=!sm|P6gzjOSZ=^eb}{@-JI*V%Wn{%>ZeE>wfL-lHO}c6D z^-N+G@dIFPW2mwJ04zybIUJwu8ZmVzG>&eGB3ebEV{G=3K=%65`m)*ysnMdL^tlq( z-tWaNhqL2;mrNHOIwF3gRuiSPj?BoiHSH-5nvrcSFItbs80LY2n|90eaiv)hxK-3U zf@A%ANI}HA-t{#N1@sj)l%j(xFRhOP0;GuLrJh#PNdBJU2S98#?cbUO|A(6SYPj0o z=2%bPW$tMzBwSVA3t1s4sr=kl##{ASe%@7k&PaK`>b`1xcwqqNAuaPH zUy0AxRgJml4C+&?Z&tNP-tUW;Es;Q_a& zo_m(9CB@p5_Mt9cssnpk8AQ zJ>6dt`4kwLFKNdT%$sjh`nJZ5(C^Cc#DDqxenZmd zmn7G(vSKZ)98DnmJ<3u5X|pe+|H{iCw$;b3D05ogYAa1~@b}IKb24$|(AQZ$?))>5 zs8)&xz@lt5*In&vF0=@Fk zW2oQ&lJ>E5oTeOK^u^^<&1%qkqzao_oc0uZJ$XW+XcM6;0@+S8wzaEycVW zj7Y(nahgkv*TaoJ_48Q#VAhMl?4(9VrYAx|#T_*742+IMUehuwG-fUcM$SW69z_uCi4yFfXrgCy#&DRu&GzcV*`ORg^Q-kqY4$D?Eo!(wR1dnF0CYG;{J zY;|sh?OxWNf6RuJ3@Nss^q z39iB2U5em(pdhc#>3*-fPs=@hJLlcLJ@N+w#$b#Md)M4^tu^QO{pO%5>tH9c(y3}& zfyr%uQd1i_Fx1~2{$BAr8&+8g1U;)C$tTJU|Ol7}<40Prl?5@05F73(?nWoORp+ zNeiEQxcy{1jsV_vU${NN5aQlJmMX@$DRPSuun{QW3VkREQhGm@qoB066#M_4tphA< zdAwu%rYf&xMcc<~_aw^8Rpn_SZx7z*3LbiIV>H>T>;v%$yiByJ6?_aM&sclP^rxwX zsc!XxRp`@{Mu8~!7!lRC3SXfN8UW6FIr_`p>U~*Ouz>_N;NlMx0!EV$@1ZJiFU9rW z0Y2d;N2PU-?EiSMSHT*xT$5>k3Yw1Td{?xULf$!FzF;V%JJ~ldM~8H+nUq-jF~N`Z z(m6t6MRYV=WsWky3ZK3AD0(g<*KcU%rmw@0=gQqv@q1-^-C4jf7NT^`UJ7Qr2X7C% z*N`>wpp~77MB~FL3Te_mEE=Pq!MN;ifBv; zd_qi?ofV&u5gprfEJK%2c#*CJ13Nh_a~brQhvegPw9a;K1#%>QJloZpxk%%PaqdzA z_iilg?W^7l&9bBRq9i>Qmi8pSd}+&XFWoNY5$d)%;VVa0fcv^xDe^OjqFbS-+kMdS ziP^__vCEBt9oaF4{yO!GJDaMDd@ty@#%$sg!nyY3SjSgSaeyl-)J+ktw?q`oa?l()l$f*bLl1Why%ci5R6 zp1)#?y0)D&0#(v?P(RfF@D=MGf*rN`4~??#LvQlG@d}3=@c028s-=J?W;0zv#HGe)^-x&9-FS8DBy{#AQeyr|N_9eQwfj zGeD=_W$e8JUxh5~Vb`jBz1bcpoc#S12A$uKJhMzHO+#Gjt(Z2N_|`y4w8-UsU;Pz) zJnM$wod!wRIbk*5RFC;xz4EoB+7i?E>cP>c6l>a5o?-0<3XS#g}71lbZP z*7BV~43nK6W6Tl%K{j|SO?Ht8JSgnIHUetKhfm6;GR63xyr*29iIfChB!14uf0Faj z&%9HtJUDk-r5I#gd$~|58q03^@t&hywgam&zELh^C%+mxRIBE2qmToBX%Jv;L)B3Z znF+~Db6sEh_*BG#iMcd5mfz(g72xDC2UzL>dDOCOzC6u{ZTn-n#H&&_)gxir&Zqtb z4Kn~SEzL(sq^oYOrt!Vb809>jb(3O^K;%y5c|^8R&sXIU@yg0e*C2DwU~#g*8)r>Ui!(F(1s2!$#es<Su zz2qBkmfhl$p^K4snu&`P1ckd^Ws*q>O{zA20Dao!GO({g%$1pHzXG8%g8_6kky%8i zm=bhuBAlEK`zsSE$`*AQ)x=dMsGT>jc{^QX=u%Xb&5<(3(!j^yLJ_8-3jU_7PPrBO z;N>xpqO!4gr`4{pIWd6PNfh1!?Ars?SisX_5SN0%RudCCNK+bS!mCH~tSs@sqYox0 zW|*cY9RMGyK5C`I(qh-nb@ZgcC$VGj<9dWHc~}jQ(pWYB;d!#HNwob`Xk+rDnMjw< zD6uGAYHGH!zVa)ozeMfyuT@`6_TRGI*VkyfUV5pp9IN;ku*+V-FC}_t zcZmjF!B*u^rgBe`Y*nFo-?K5t%q>UqxfPMy>@2(FWJH$_y$JbIcF9s6dFDaZ!z+AV zffHCKGE>w>(ItACjaL%cad5g(<3+QnAEpxVCXVMTxhgMH#@<|M?05}Q9H1@y%=(IX z(fp`+wDKoocXx*_>Z$>$H-O;)BwYE<9GayGl>V*oE$>^BHWtaE!d?y)HIcQW8H7_; zli?R)ETagn<)+71iX#r(f;15UjyoVx_fl=5`thTp8wcYcJTcGi*8u|>&_?YV;L z9kRWk<*uoH1zOYzV_@y2HJYVBZ(Z@F3IZeKHWUXTXkGx4cPW9VOuC=-h$qGHN%Zfn zxrAs8AVsv!-UoSZh1S|s>p zhpVs4;R0)Ryf&Ej=d0Aw4Fs6Df!^jjGPE_M8#g#3zAi4d9&V+ct(~@@={xPj+u3R_ z6)?1rmUjvY6d?1-{wPwOp?a#r)!C+CmXY0UWv$^@(gtNGsh-P>H#o z)_%7K;&^oyShtM?-z?u9E58_(5Ds9FWNEUji953Emfnw?`Pm-2M4)fbW{(0xmFtCp8RVh3gBtgsGUH!M2uAL%m~jOsJ*IU?T*18itcb3R2xP{T8r`#ePN`ybNtxk|7Z^b7 zG|Fr|B2el_4OCG<-m4c&{Q5@x(ZFUSaSzh@3V;j@%mUws4vdh5E|6f-Sn-nJjY}e7 zdxrk$%M>GnMo6Fii_I-^Vb+aVqFrhW`+f3WIo$WRWVht-<&2P~z~HhAafL}9@Z}!m zDohWN>tyS~(pc>^(8*WH+1!~a9DIU_kj8sGW2aImBPT~<@h&E)+s(uSt%vQU=-TlY z2aTUqY&b(m`P!CQEm-GMbccsvM?1KWpFm-hiO2tS4tXB#7 z&F!eC7lAV|V?+g6WQn9T)o15GoMOiAwLpzvb6b3KBc!`eQEB+A0GF7g^pjeUkJZt` zdu+@ZVC6NicjA{9J5u<=HM48YjiH|VWYX+&26H_QyL2v9(RSE8RZuuPDy=U^4r_Yh z_VD&$mvy09icl33fB4$N$93q5VC4bincP6vJ^tA=@P%{w5=1ESQ4e)eC`R|%OCRkq zb+ktiis7|5B?Y7dJYH1GtdFBqE@TF6$ys3mgE@FnFx}%|r%kvnaZc5odp5#%C(5EB zi6QkkbCAF!+F7)Fv)Q&7DUGY3u+nY5H|7c@JV7?E80sUBd?r%^f<5+kV0w1^yPGTQ;G#8ZwlVVtDBu z_Jy;9_RU;kR+}}?_`;G8UlAO5eP#(e$@Dk)(t0OVaUgfM^NP(hg?E#V6#( z&Q;>G{n7V#DO%QCz|vG($f_v%(}U1l`;{+>ppla=JnvC)MnTLk9pPRrQRB+tbdP@{Bzr1ZGuKS+!oqPd_)1AiA+r{dVVmgUi?v?}Nn={?c5gB1_X(c@~5+B+_-R z0hd@)NFMydfEbswtPoCw*2+|kZLqmtUVx>lNiVgJqlZ@gOI|7UHiot5wa7$Ie6V

-6OajB9cYY>^!grOkQ-+`8eyuhA9bvuMcckSzMziX=8_5R!RtG5=z(GMlp*)iz3jA;i{BR2Ve{%|a`6nsQI>z;7F{}&Bq4d>oa@BPM zgWzC}RS8HhI_G@g>E{RV8E9=xVbl7sS;@mZD`AOpHBu4}vzLIfYy8Lcpa{22!+YZn zx{k?(&NCeu;qm9E&u>`IumE>r@D;=iiTa#`XWTdnh>Ye|>CD}+#2I5reegAv#j5`( zr2oHy`v0l#e>uot>#x)bL+G-9q-Ou#Hpt&Z<<=JVa6cn+F`Dm>pKT3UOBlFSw3es| z1~6x!jHRvbwj72aJj(IU?vw>V)lZs|fJ9k(W7oq7#f>epK$a1EZb7K5gluu^AtUN_ z`U{4ZAdHrzF2srH?1lDfvCa0s({Q0*nLPik*_*0nmzx@&r07SUTNU1?x;&I#A7L88 zzqDCw700RiigBBKV1Z+db;N!uhHCoEhPb$EhQ_TU*y-O<@NXgOwz|6+;X`igCyx5Vi9m!F4!>sO`8mqhKKn2p;=oZ^W0ij-m zqeic}rb#8BMVWBDdyz05od~?!H+c7IpsQXwo&ptsVmOeQ+nm0-%fE4C>QZ8C?J?-_ z5)FkB^n?+wyZ)}?7(|(^_WL?<{=x|L-z3ZaRyWW0X;S>{hDYSa3bnYkgM+0~b_8}0 zQeR(0$(;2V5Uf46>*^;3nyup?@dU8X4YV&mRdi<#1i)3FZ>3p|&P&yoF;KnPelLPD zSNn2P$^i1!lXHHMmPDJ%2sU%`j;yKZ(cw%-Vu67YuR39zWY_r=9m-XSpK{lKu(AF3 zoW!8ZL66W8-r?J9n*cxOo4I)^?RnXr=XAY6xrMd~=kHnZduUHKJT1`YK3zQbt|k5( zV6cI3F0NTo#{HDdQJF-e1xucY(EL~W4T|_gS{c5Bq zHoTr*Q-&t3DrcKuYl4*|VFRMbKWEr;z8Qb&ek0=w0EpZQWE-u%GU5P+b`IR7g>i%! zu3i3IigQ+$04tYj|6vdIA4`PzJzg9ghnh0^Tz$?0A;`uHLJ&y7-)l{h#M_k_5_k)E ztIWnAraZCmi=P*3HzOr84j70cLxTi_p!6_f^$xr4l%`Ki)`Xuu>kZ-Bjp%;=`uM|8 zqY1gAI$#MW5ctmOXbQ8U29s=62i9iTpS!Lu_0<{@UN3UWoY*5+^{06(D8Dwvl>ZeR z9Pslnlz*qMUwcY-vVYaxmZAN{So^)t0sPiEb+42+6Oa-t?!8T{5;SGD%EHO}0W0r6 zaZI+=f?c))_#u$Pd-Mmabio^dMAgt|L~w$gl&bgE2edPmWx;pifYl<>?TYj0d%=)G z?9pF6*I!fEIsTfN`X@*4_rAIR?)CmB4n3qnP`Iv^F?UGrN27-pi-Mi}oXZl9Oz&J? z5&Fa{FRKD%iwAkaYcO3oD2>J0m8df3UZ5vr-Nm?J#P3&}*Mhgvk`Tz-A^~+UWfDI2 zM=@xm+Bg7oQ+4Y!`|^as4)fZB zHXDwr*srP-vz83AikTrWyP>~z#{Ny`;@>0H|0~!0ZC!}(lb}_>k3!kNirO(K&!3B6 zp=%;vrJ`pu+wR2Aj2|=Bg?NZV0s!J*Pk^&>hxyiw-}veI*-Zsu?Dm`d%v_f))YGZyPcXJE7Cts^3EUV zFP=CqOnV`lg6S_DCP#`l_;!dAjsrYnnoyq`>A?Lw)DvY!Ej_d2=x00C~_m9`rKR@jK`sW{c??>MI zk@x_jmu$`1#`KL~T z_vP6ffRvdvrZ5?d&Uabe!*$)&oFGmuXvhgp9O)wnQ%5g9r$V@x-$tC@o%#meg|BU` zc-W;({P7}#qLd{<0fhG4d6lUr_Bx9yW2}1ig7Dt#baw6~T&$RJIuXwmr<94`b?MMo z+PqPy(bNY4C1pwF&)L`{x(vR4!EqOlU`Pr5tHqn%hEn~TackVJBu-;Hn;XN@no%YZ z)2Y~%K$aZXS(l*@xv{{ZTCqnF&^F(wDZ|}K`E4?DEGvsS^F6cG!t-AcCumxwRb(RJmW>b?O8a;m99I+}w@7q%Oa+VAlb zOo|vRzoT~<4?;GXaen^X>)Nqtr7j0LM^(paR7z+0d>m=tJf2-)tKj78lDNVv{AI$V zNwYvxqm)>jcI`x&o$Cpjs0l$?Z7-vyHo@lkYj9gpm!_CbW?f_r+C{4UyL`wy)->1G zZ^bzc8%3^3v#iBa2Ob7C z^@yO7y86nF=8U+dH_lRDiD%OBv8=x?f;bmnm3_1?!xy>R=$?(&5V=+c^Rag;)6<)? zy3CIjOE5L{a3LyliEpe5?06H;kBXA!))nhN=)x&|5akJZ#?|W>2b_ZjM+Y~?`~3WD zbVm23t2ysBUI}*8r8UCR-%K<5@Sh1jN3zPwAFL$T1ibUayIfUOdFOn^NKSU;0(haW zx5q?7ghDCGRH%+JJ#73;^88FErNRKT=$Xu=u-K+$%;}xk+q3RFvg_9paqKKkV~-6Q zy7;WOokkm}|IFs{%R(k;A86Koj@SxK@rC8nJ0I7MtdP_+*iNSuuka$a8x*LNd<^{b zA&s3irYl9V8scjDW(iU2jgQJ2MchnQyK6XW>T{MDJdMaO+1%Sa_yX{DBkldNf-C&wPzrXQaz30NnuXiZ`dGFu2Gf@x<=9a~ z^t5%QRQ{0cPSYsYK0+F*7ht1SIj!tO;$tyCU%B~W==iJGXY?2HUj8wweNCb;t+UJr z`s43LPopY?JW2JrvNd^geQ=g;xtG6`s+m~y?8MgNEa!5KG1sbt)q%nbDyxrm&2geL zqG?=yzMnEAbclX7M&zbz9LY#vzc=vCejR$>eQzwCSgZ4%05W?jxUx*5wpWYI{` z4e&`ctcZdM`W1Pqv>wS?y9uN{iC1y5w&x3d4&=-YpCSyS?53n_A>A5NM}ITrY2?-< zr+&qnSZ*LQV|W%309|-8g2`f(uPOZeC#N0r zm&-f`pzHP$Ir!2y+xvK=@sb_2@1#dC3;RM1sj+@`5Ca#ki#)$)>2&K}-A8?+Y5g@e z%$N%kKanrS2X&!5Cp_d^+XAZ0Wd7tT@)MSVEts5JL8umFP>sRi@!ZvcEjTWksz$c8C}(h8+{gnEWQ zAgwjHwXT8M)|j9&(*5A3G9J?9IHU%`#tU!yH{^%LIS z0El%R5p0e~60hrJZ9TS8( zKh;cdajt}8VI;J-G5_4xEAbYQ%n1ETK-QhcC7t>H%fe^}2|)`1>}yFoo_C*`AnT-N zKz15tJIuxU@HMB{n&AacCWYoKU9g60acRf;f=mhyWBjthb?RqdcIq&jEpz!;!5Dvd zAo`FxdNe%Ey{UBDd$*vX&=YcQm{d-*z+O(pC8@rq&{vbBcc-PzGGZy#w(wveu}E%Z z{??HXWlktP7E502c1)sI0yylt4D~17Y$QVEDRg_LBK443HBPbl4SZt7M?8ELm*i~_ zL4iw~1A4SOG#pxSvKT%if4rTNXkch%Z8G{WD4Ft6`e5o{FW*Ar3_EJKuV~JWG4%+7 zvaBz6evTSjC0%?+B%X4GNNU~fp~9YSylBh5rf0CygfGy3<=TOZac|ne_4R0({d1WQ z15c-?dr^;$rU1u5Gfg%~&J0YYV8O`vf)Bczmtc2F3Ur5~^=V z3%F20JSBho^~O*aPRQ>PLM?(?<94EsCc(oUah;UUX^@=veqXUU>2;`N?n`S>Y!liY zeCQlkEtGfI4b2vvgT@qX2L^BfvVSZ}qa-tnDUb8? zr=6bdZT$cu*Q+M2&tgfLF|ZVJegSdDXv=isyVBWg2j!{IY@avvR^|^n%OdV-)IY6M zLa!88Ttjl2jw?vEW|9`e5n7T>Z${)@mW8WB#+{kdT|Y6L{}e`d8soDZRVw*HX}rQt zu;**uFxS@&Bi0qA&AK$Hm%Y*XZ(jMml=nU*L0LV&p=i!pj*IAs4JiW6!FAPPyhesi z9b?^K#jr#taS#Aezl35W*fk5|TAhEUD<*%FTew4Zwj>#CrWmRx$30;f9EyYCkh)_u zj=b8c?8LqV1JR~@8g(knkSDK(L8$x5Y=p>C5$&a}#vxBz4$?AnR&1Ydts8btsy+5>eBMLHP@dCF-APq%<%$VsAEx*jh zALrF-Tgr^Xyfa$nUZjZ<5inV@OeNYu{38YGWZaWal{Mo6S5eqgIzFKAe&^14N~IRk zko*g3rIAx=pJmo}9+D+VHCJwHiO`Qs=q6?|tI?+&01M=Rj_XfhzK*wR!FhFQNr+mw zZtByqJ4|DAW7j+_lfpLy2Xne&1C4ox2G;HeDvsvPcd1|D?A-zOyYf+)EdFSt@mBtLt)H+wO?mS{C=^$=~AM}kOg zIQob~GZK(nXhXCp3GXp=vEnMHB7G2e`!mW_d&^a!biVVAP@oNj^&mNV;2`f@%dq+& z^8;HVos0s!o)d)$%r8D^ug}(2%eA6W(e1#}q!i5?!D>!(->K2eRr4jkcZNDz%^!6V zKZH(y(hwmxMSr)O`?r4gqr>};JG}pe6z50i{d3{==OOsNGW5a%mZ!OEA_P@b9W_P^ zlNZFjSTI+z{?+ z-GoWH+#sd{jwI<1;%u@`M0c{zH~PK-5_XW&uSCBAE);8kmo%q?SI3s$0Np16t&$}H zL!0FRi|IT!isi3<{7XNc(vMjBjc*)|I6hvFZJA<;(T;a)u7u7JsGU3cj +# RFC-8: Metadata based Record Index + +## Proposers +- @prashantwason + +## Approvers + + +## Status +JIRA: https://issues.apache.org/jira/browse/HUDI-53 + + +## Abstract +HUDI requires an [Index](https://hudi.apache.org/docs/indexing) during updates to locate the existing records by their +unique record keys. The HUDI Index is a mapping of the record-key to record's file path. Hudi supports several indexes +like: + 1. Bloom Index: Employs bloom filters built out of the record keys, optionally also pruning candidate files using record key ranges. + 2. Simple Index (default): Performs a lean join of the incoming update/delete records against keys extracted from the table on storage. + 3. HBase Index: Manages the index mapping in an external Apache HBase table. + +We are proposing a new Index called Record Index which will save the record key to file path location within the +[HUDI Metadata Table](https://hudi.apache.org/docs/metadata). Since the HUDI Metadata Table is internal to a HUDI Dataset, +the Record Index is updated and queried using the resources already available to the HUDI dataset. + + +## Justification + +Bloom and Simple Index are slow for large datasets as they have high costs involved in gathering the index data from various +data files at lookup time. Furthermore, these indexes do not save a one-to-one record-key to record file path mapping but +deduce the mapping via an optimized search at lookup time. A per file overhead required in these indexes means that datasets +with larger number of files or number of records will not work well with these indexes. + +The Hbase Index saves one to one mapping for each record key so is very fast and scaled with the dataset size. But Hbase +Index requires a separate HBase cluster to be maintained. HBase is operationally difficult to maintain and scale for throughput, +requires dedicated resources and expertise to maintain. + +The Record Index will provide the speed and scalability of HBase Index without all the limitation and overhead. Since +the HUDI Metadata Table is a HUDI Table, all future performance improvements in writes and queries will automatically +provide those improvements to Record Index performance. + +## Design +Record Index will save the record-key to file path mapping in a new partition within the HUDI Metadata Table. Metadata table +uses HBase HFile - the tree map file format to store and retrieve data. HFile is an indexed file format +and supports map like faster lookups by keys. Since, we will be storing mapping for every single record key, Record Index +lookups for large number of keys transform into direct lookups of keys from HUDI Metadata Table and should be able to +benefit greatly from the faster lookups in HFile. + + +### Metadata Table partitioning and schema: + +A new partition `record_index` will be added under the metadata table. The existing metadata table payload schema will +be extended and shared for this partition also. The type field will be used to detect the record_index payload record. +Here is the schema for the record_index payload record. +``` + { + "name": "recordIndexMetadata", + "doc": "Metadata Index that contains information about record keys and their location in the dataset", + "type": [ + "null", + { + "type": "record", + "name": "HoodieRecordIndexInfo", + "fields": [ + { + "name": "partition", + "type": "string", + "doc": "Partition which contains the record", + "avro.java.string": "String" + }, + { + "name": "fileIdHighBits", + "type": "long", + "doc": "fileId which contains the record (high 64 bits)" + }, + { + "name": "fileIdLowBits", + "type": "long", + "doc": "fileId which contains the record (low 64 bits)" + }, + { + "name": "fileIndex", + "type": "int", + "doc": "index of the file" + }, + { + "name": "instantTime", + "type": "long", + "doc": "Epoch time in millisecond at which record was added" + } + ] + } + ], + "default" : null + } +``` + +The key for the record index record would be the actual key from the record. The partition name is also saved as string. +HUDI base files names have a format which includes a UUID fileID, an integer file Index, a write token and a timestamp. +The record index payload only saves the fileID and file index information. The fileID is split into UUID and the integer file index. The UUID is encoded into two longs and the file index is saved +as an integer. The timestamp is encoded into epoch time in milliseconds. + +This schema format is chosen to minimize the data size of each mapping to ensure the smallest possible size of the +record index even for datasets with billions of records. + +Experiments have shown that with random UUID record keys and datestr partitions (YYYY/MM/DD), we can achieve an average +size of 50 to 55 bytes per mapping saved in the record index. The size might even be lower for keys which may compress better. + +Below picture gives a pictorial representation of record index partition in metadata table. +Record Index Partition + + +### Record Index initialization: + +Like any other HUDI Metadata Table index, the record index can be initialized inline (before the writer writes records to the dataset) +or via the Async Indexer. + +The initialization involves the following steps: +1. Get the list of all files in the dataset + 1. Since the `files` partition is a pre-requisite for all other partitions in Metadata Table, the list of all files can be taken from the Metadata Table itself and does not involve listing the entire dataset. +2. Read the record keys from all the files in the dataset + 1. Only the record key column needs to be read from the base files. + 2. This step scales with more Executors and more memory +3. Determine the number of fileGroups to use for the `record index` partition +4. Create record index records corresponding to each record key read +5. Insert the records into the Metadata Table partition `record index` + +We will add functionality to automatically estimate the number of fileGroups to use for the `record index` partition based +on the number of records in the dataset (available after Step 2 above). This should simplify rollout as the user does not +have to worry about the number of fileGroups for optimal performance. Configs will allow specifying the number of fileGroups +too. + + +### Metadata Index lookup: + +For the incoming upsert records, given their keys, tag their current location. The key lookup would require the following steps: + +1. Generate the list of keys to be looked up (extract HoodieKeys from the upsert records) +2. Lookup all the keys from the HUDI Metadata Table + 1. Keys are partitioned based on the hash as the HUDI Metadata Table mappings are saved in various fileGroups (count fixed at initialization time) with each fileGroup saving a portion of the key space + 2. Each partition of keys is looked up in parallel from the fileGroup using various Executors +3. Tag the location, where a mapping was found in HUDI Metadata Table, back to upsert records + +Given N fileGroups in the record index, an indexing lookup of M keys is reduced to N lookups of M/N keys in parallel. Hence, +for fastest lookup operation, the number of executors for the writer process should be >= N. + +This also means that lookup from record index can be scaled with growing data size by: +1. Increasing the number of fileGroups (N in the above example) +2. Using at least N or greater executors for the indexing process + +HDFS based experiments have shown than on average key lookups from HFile in HUDI Metadata Table complete in 1-2msec. +So for lookup of M keys we expect ballpark time of K + M / N * 2msec where K is the overhead of opening HFile (~100msec) +and merging the log files. Periodic compaction of Metadata Table keeps the value of K lower. + + +## Implementation +1. No changes to the HoodieIndex public interface. +2. A new index type will be added - RECORD_LEVEL + + +### Writer flow: +Let's walk through the writer flow to update the record index. + +Whenever a new commit is getting applied to metadata table, we do the following.
+1. Parse the WriteStatus to determine the record which have been inserted into the dataset + 1. Such records have new location (HoodieRecord::getNewLocation()) but no current location ((HoodieRecord::getCurrentLocation())) +2. Create new records for each record keys being added to the dataset +3. Commit all these records to metadata table. + +We need to ensure that WriteStatus tracks all written records keys for every commit. + + +### Reader flow: +When a new batch of write is ingested into Hudi, we need to tag the records with their +original file group location. Refer to Metadata Index lookup section for more details. + + +### Limitations: +1. The number of file groups are fixed at the time of initialization and there is no support for dynamically increasing or decreasing the number of file groups. +2. If the total number of records in the dataset grows by large factor, the number of file groups might need to be increased to maintain the same performance. + 1. This currently requires re-initialization of the record index. +3. Record Index is a global index and hence requires unique keys in the dataset + + +### Future Improvements: +1. Add support for non-global index +2. Add support for indexing only a window of days rather than the entire dataset. + 1. This will allow the record index to be used efficiently for datasets where dedupe is required on the last N days of data. +3. Add support for dynamically increasing or decreasing the number of file groups. + + +## Rollout/Adoption Plan +* Record Index will be available in 0.14.0 release +* Metadata Table scheme will be upgraded as part of release upgrade process +* Record Index will be disabled by default and can be enabled by setting the write configs + + +## Test Plan +* Functionality + * Tag location for existing keys + * Tag location for non-existing keys +* Performance + * Prove Metadata based indices are helping upsert use cases +* Upgrade From 69d0998182794fa555a73d52071ca84b5672e011 Mon Sep 17 00:00:00 2001 From: Prashant Wason Date: Thu, 28 Sep 2023 14:39:25 -0700 Subject: [PATCH 009/112] [MINOR] Update DOAP with 0.14.0 Release (#9803) --- doap_HUDI.rdf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doap_HUDI.rdf b/doap_HUDI.rdf index 259c776a7e76..9a5eb593a3fc 100644 --- a/doap_HUDI.rdf +++ b/doap_HUDI.rdf @@ -126,6 +126,11 @@ 2023-05-25 0.13.1 + + Apache Hudi 0.14.0 + 2023-09-28 + 0.14.0 + From 1911c27d6c40427a22122eaf2c61ffa06081337b Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Wed, 1 Nov 2023 12:15:35 -0700 Subject: [PATCH 010/112] [HUDI-7016] Fix bundling of RoaringBitmap dependency (#9963) This commit fixes the bundling of RoaringBitmap dependency in Hudi bundles by including it in the shade rules and shading the classes, to avoid dependency conflict with engine-provided jars, e.g., Spark. Before this fix, with Hudi Spark bundle, NoSuchMethodError exception is thrown by Spark 3.2. --- packaging/hudi-spark-bundle/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 5752703c7a97..361e83013202 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -91,6 +91,7 @@ org.jetbrains.kotlin:* org.rocksdb:rocksdbjni org.antlr:stringtemplate + org.roaringbitmap:RoaringBitmap com.fasterxml.jackson.datatype:jackson-datatype-jsr310 @@ -195,6 +196,10 @@ org.openjdk.jol. org.apache.hudi.org.openjdk.jol. + + org.roaringbitmap. + org.apache.hudi.org.roaringbitmap. + From 7acc41e7646021bfb70f07fa28a8700cdab4539c Mon Sep 17 00:00:00 2001 From: Prabhu Joseph Date: Mon, 26 Feb 2024 15:50:00 -0800 Subject: [PATCH 011/112] [HUDI-6993] Support Flink 1.18 (#9949) * Address build failures in older Flink Versions * Remove unnecessary dependency on flink-connector-hive * Fix Flink 1.18 Validate-bundles --------- Signed-off-by: Prabhu Joseph Co-authored-by: Prabhu Joseph Co-authored-by: root --- .github/workflows/bot.yml | 12 +- README.md | 7 +- azure-pipelines-20230430.yml | 7 +- hudi-flink-datasource/hudi-flink/pom.xml | 1 + .../hudi/table/catalog/HoodieHiveCatalog.java | 36 +- .../hudi/adapter/HiveCatalogConstants.java | 51 ++ .../hudi/adapter/HiveCatalogConstants.java | 52 ++ .../hudi/adapter/HiveCatalogConstants.java | 52 ++ .../hudi/adapter/HiveCatalogConstants.java | 52 ++ .../hudi/adapter/HiveCatalogConstants.java | 52 ++ .../hudi-flink1.18.x/pom.xml | 168 +++++ .../AbstractStreamOperatorAdapter.java | 27 + .../AbstractStreamOperatorFactoryAdapter.java | 33 + .../DataStreamScanProviderAdapter.java | 34 + .../DataStreamSinkProviderAdapter.java | 37 ++ .../hudi/adapter/HiveCatalogConstants.java | 49 ++ .../hudi/adapter/MailboxExecutorAdapter.java | 37 ++ .../hudi/adapter/MaskingOutputAdapter.java | 67 ++ .../adapter/OperatorCoordinatorAdapter.java | 50 ++ .../hudi/adapter/RateLimiterAdapter.java | 40 ++ .../adapter/SortCodeGeneratorAdapter.java | 33 + .../SupportsRowLevelDeleteAdapter.java | 42 ++ .../SupportsRowLevelUpdateAdapter.java | 45 ++ .../java/org/apache/hudi/adapter/Utils.java | 91 +++ .../format/cow/ParquetSplitReaderUtil.java | 579 ++++++++++++++++++ .../format/cow/vector/HeapArrayVector.java | 70 +++ .../cow/vector/HeapMapColumnVector.java | 79 +++ .../cow/vector/HeapRowColumnVector.java | 54 ++ .../cow/vector/ParquetDecimalVector.java | 54 ++ .../vector/reader/AbstractColumnReader.java | 325 ++++++++++ .../cow/vector/reader/ArrayColumnReader.java | 473 ++++++++++++++ .../reader/BaseVectorizedColumnReader.java | 313 ++++++++++ .../cow/vector/reader/EmptyColumnReader.java | 42 ++ .../reader/FixedLenBytesColumnReader.java | 84 +++ .../reader/Int64TimestampColumnReader.java | 119 ++++ .../cow/vector/reader/MapColumnReader.java | 76 +++ .../reader/ParquetColumnarRowSplitReader.java | 390 ++++++++++++ .../reader/ParquetDataColumnReader.java | 199 ++++++ .../ParquetDataColumnReaderFactory.java | 304 +++++++++ .../cow/vector/reader/RowColumnReader.java | 63 ++ .../cow/vector/reader/RunLengthDecoder.java | 304 +++++++++ .../apache/hudi/adapter/OutputAdapter.java | 32 + .../StateInitializationContextAdapter.java | 31 + .../StreamingRuntimeContextAdapter.java | 43 ++ .../hudi/adapter/TestStreamConfigs.java | 35 ++ .../apache/hudi/adapter/TestTableEnvs.java | 52 ++ hudi-flink-datasource/pom.xml | 1 + ...2.sh => build_flink1180hive313spark332.sh} | 6 +- ...0.sh => build_flink1180hive313spark340.sh} | 6 +- packaging/bundle-validation/ci_run.sh | 2 + pom.xml | 37 +- scripts/release/deploy_staging_jars.sh | 1 + scripts/release/validate_staged_bundles.sh | 2 +- 53 files changed, 4812 insertions(+), 39 deletions(-) create mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java create mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java create mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java create mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java create mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/pom.xml create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestStreamConfigs.java create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java rename packaging/bundle-validation/base/{build_flink1170hive313spark332.sh => build_flink1180hive313spark332.sh} (81%) rename packaging/bundle-validation/base/{build_flink1170hive313spark340.sh => build_flink1180hive313spark340.sh} (81%) diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 35de0b9087ed..fd3cc67976a1 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -119,7 +119,7 @@ jobs: include: - scalaProfile: "scala-2.12" sparkProfile: "spark3.2" - flinkProfile: "flink1.17" + flinkProfile: "flink1.18" steps: - uses: actions/checkout@v3 @@ -210,6 +210,7 @@ jobs: - flinkProfile: "flink1.15" - flinkProfile: "flink1.16" - flinkProfile: "flink1.17" + - flinkProfile: "flink1.18" steps: - uses: actions/checkout@v3 - name: Set up JDK 8 @@ -234,7 +235,7 @@ jobs: env: SCALA_PROFILE: 'scala-2.12' FLINK_PROFILE: ${{ matrix.flinkProfile }} - if: ${{ endsWith(env.FLINK_PROFILE, '1.17') }} + if: ${{ endsWith(env.FLINK_PROFILE, '1.18') }} run: | mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink $MVN_ARGS @@ -244,7 +245,7 @@ jobs: strategy: matrix: include: - - flinkProfile: 'flink1.17' + - flinkProfile: 'flink1.18' sparkProfile: 'spark3.4' sparkRuntime: 'spark3.4.0' @@ -272,9 +273,12 @@ jobs: strategy: matrix: include: - - flinkProfile: 'flink1.17' + - flinkProfile: 'flink1.18' sparkProfile: 'spark3.4' sparkRuntime: 'spark3.4.0' + - flinkProfile: 'flink1.18' + sparkProfile: 'spark3.3' + sparkRuntime: 'spark3.3.2' - flinkProfile: 'flink1.17' sparkProfile: 'spark3.3' sparkRuntime: 'spark3.3.2' diff --git a/README.md b/README.md index ff2b95ec5473..20016f689ad3 100644 --- a/README.md +++ b/README.md @@ -118,14 +118,15 @@ Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specifie ### Build with different Flink versions -The default Flink version supported is 1.17. The default Flink 1.17.x version, corresponding to `flink1.17` profile is 1.17.0. +The default Flink version supported is 1.18. The default Flink 1.18.x version, corresponding to `flink1.18` profile is 1.18.0. Flink is Scala-free since 1.15.x, there is no need to specify the Scala version for Flink 1.15.x and above versions. Refer to the table below for building with different Flink and Scala versions. | Maven build options | Expected Flink bundle jar name | Notes | |:---------------------------|:-------------------------------|:---------------------------------| -| (empty) | hudi-flink1.17-bundle | For Flink 1.17 (default options) | -| `-Dflink1.17` | hudi-flink1.17-bundle | For Flink 1.17 (same as default) | +| (empty) | hudi-flink1.18-bundle | For Flink 1.18 (default options) | +| `-Dflink1.18` | hudi-flink1.18-bundle | For Flink 1.18 (same as default) | +| `-Dflink1.17` | hudi-flink1.17-bundle | For Flink 1.17 | | `-Dflink1.16` | hudi-flink1.16-bundle | For Flink 1.16 | | `-Dflink1.15` | hudi-flink1.15-bundle | For Flink 1.15 | | `-Dflink1.14` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.12 | diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml index ee5c016693a5..85d185fbc2c5 100644 --- a/azure-pipelines-20230430.yml +++ b/azure-pipelines-20230430.yml @@ -14,7 +14,7 @@ # limitations under the License. # NOTE: -# This config file defines how Azure CI runs tests with Spark 2.4 and Flink 1.17 profiles. +# This config file defines how Azure CI runs tests with Spark 2.4 and Flink 1.18 profiles. # PRs will need to keep in sync with master's version to trigger the CI runs. trigger: @@ -37,6 +37,7 @@ parameters: - 'hudi-flink-datasource/hudi-flink1.15.x' - 'hudi-flink-datasource/hudi-flink1.16.x' - 'hudi-flink-datasource/hudi-flink1.17.x' + - 'hudi-flink-datasource/hudi-flink1.18.x' - name: job2Modules type: object default: @@ -69,6 +70,7 @@ parameters: - '!hudi-flink-datasource/hudi-flink1.15.x' - '!hudi-flink-datasource/hudi-flink1.16.x' - '!hudi-flink-datasource/hudi-flink1.17.x' + - '!hudi-flink-datasource/hudi-flink1.18.x' - '!hudi-spark-datasource' - '!hudi-spark-datasource/hudi-spark' - '!hudi-spark-datasource/hudi-spark3.2.x' @@ -92,9 +94,10 @@ parameters: - '!hudi-flink-datasource/hudi-flink1.15.x' - '!hudi-flink-datasource/hudi-flink1.16.x' - '!hudi-flink-datasource/hudi-flink1.17.x' + - '!hudi-flink-datasource/hudi-flink1.18.x' variables: - BUILD_PROFILES: '-Dscala-2.12 -Dspark3.2 -Dflink1.17' + BUILD_PROFILES: '-Dscala-2.12 -Dspark3.2 -Dflink1.18' PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true -ntp -B -V -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn' MVN_OPTS_INSTALL: '-Phudi-platform-service -DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS) -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5' MVN_OPTS_TEST: '-fae -Pwarn-log $(BUILD_PROFILES) $(PLUGIN_OPTS)' diff --git a/hudi-flink-datasource/hudi-flink/pom.xml b/hudi-flink-datasource/hudi-flink/pom.xml index 5ba86552cd2e..9cdcfb426e14 100644 --- a/hudi-flink-datasource/hudi-flink/pom.xml +++ b/hudi-flink-datasource/hudi-flink/pom.xml @@ -181,6 +181,7 @@ org.apache.flink ${flink.connector.kafka.artifactId} + ${flink.connector.kafka.version} compile diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java index 23a7a1fcca71..5ea7a585a0d2 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java @@ -18,6 +18,7 @@ package org.apache.hudi.table.catalog; +import org.apache.hudi.adapter.HiveCatalogConstants.AlterHiveDatabaseOp; import org.apache.hudi.avro.AvroSchemaUtils; import org.apache.hudi.client.HoodieFlinkWriteClient; import org.apache.hudi.common.fs.FSUtils; @@ -47,9 +48,6 @@ import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.Configuration; -import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; -import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; -import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; import org.apache.flink.table.catalog.AbstractCatalog; import org.apache.flink.table.catalog.CatalogBaseTable; import org.apache.flink.table.catalog.CatalogDatabase; @@ -107,17 +105,20 @@ import java.util.List; import java.util.Map; -import static org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase.ALTER_DATABASE_OP; -import static org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; -import static org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; -import static org.apache.flink.table.factories.FactoryUtil.CONNECTOR; -import static org.apache.flink.util.Preconditions.checkArgument; -import static org.apache.flink.util.Preconditions.checkNotNull; -import static org.apache.flink.util.StringUtils.isNullOrWhitespaceOnly; +import static org.apache.hudi.adapter.HiveCatalogConstants.ALTER_DATABASE_OP; +import static org.apache.hudi.adapter.HiveCatalogConstants.DATABASE_LOCATION_URI; +import static org.apache.hudi.adapter.HiveCatalogConstants.DATABASE_OWNER_NAME; +import static org.apache.hudi.adapter.HiveCatalogConstants.DATABASE_OWNER_TYPE; +import static org.apache.hudi.adapter.HiveCatalogConstants.ROLE_OWNER; +import static org.apache.hudi.adapter.HiveCatalogConstants.USER_OWNER; import static org.apache.hudi.configuration.FlinkOptions.PATH; import static org.apache.hudi.table.catalog.TableOptionProperties.COMMENT; import static org.apache.hudi.table.catalog.TableOptionProperties.PK_CONSTRAINT_NAME; import static org.apache.hudi.table.catalog.TableOptionProperties.SPARK_SOURCE_PROVIDER; +import static org.apache.flink.table.factories.FactoryUtil.CONNECTOR; +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.StringUtils.isNullOrWhitespaceOnly; /** * A catalog implementation for Hoodie based on MetaStore. @@ -219,7 +220,7 @@ public CatalogDatabase getDatabase(String databaseName) Map properties = new HashMap<>(hiveDatabase.getParameters()); - properties.put(SqlCreateHiveDatabase.DATABASE_LOCATION_URI, hiveDatabase.getLocationUri()); + properties.put(DATABASE_LOCATION_URI, hiveDatabase.getLocationUri()); return new CatalogDatabaseImpl(properties, hiveDatabase.getDescription()); } @@ -248,7 +249,7 @@ public void createDatabase( Map properties = database.getProperties(); - String dbLocationUri = properties.remove(SqlCreateHiveDatabase.DATABASE_LOCATION_URI); + String dbLocationUri = properties.remove(DATABASE_LOCATION_URI); if (dbLocationUri == null && this.catalogPath != null) { // infer default location uri dbLocationUri = new Path(this.catalogPath, databaseName).toString(); @@ -318,11 +319,10 @@ private static Database alterDatabase(Database hiveDB, CatalogDatabase newDataba String opStr = newParams.remove(ALTER_DATABASE_OP); if (opStr == null) { // by default is to alter db properties - opStr = SqlAlterHiveDatabase.AlterHiveDatabaseOp.CHANGE_PROPS.name(); + opStr = AlterHiveDatabaseOp.CHANGE_PROPS.name(); } - String newLocation = newParams.remove(SqlCreateHiveDatabase.DATABASE_LOCATION_URI); - SqlAlterHiveDatabase.AlterHiveDatabaseOp op = - SqlAlterHiveDatabase.AlterHiveDatabaseOp.valueOf(opStr); + String newLocation = newParams.remove(DATABASE_LOCATION_URI); + AlterHiveDatabaseOp op = AlterHiveDatabaseOp.valueOf(opStr); switch (op) { case CHANGE_PROPS: hiveDB.setParameters(newParams); @@ -335,10 +335,10 @@ private static Database alterDatabase(Database hiveDB, CatalogDatabase newDataba String ownerType = newParams.remove(DATABASE_OWNER_TYPE); hiveDB.setOwnerName(ownerName); switch (ownerType) { - case SqlAlterHiveDatabaseOwner.ROLE_OWNER: + case ROLE_OWNER: hiveDB.setOwnerType(PrincipalType.ROLE); break; - case SqlAlterHiveDatabaseOwner.USER_OWNER: + case USER_OWNER: hiveDB.setOwnerType(PrincipalType.USER); break; default: diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java new file mode 100644 index 000000000000..94ed3b538879 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; +import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; + +/** + * Constants for Hive Catalog. + */ +public class HiveCatalogConstants { + + // ----------------------------------------------------------------------------------- + // Constants for ALTER DATABASE + // ----------------------------------------------------------------------------------- + public static final String ALTER_DATABASE_OP = SqlAlterHiveDatabase.ALTER_DATABASE_OP; + + public static final String DATABASE_LOCATION_URI = SqlCreateHiveDatabase.DATABASE_LOCATION_URI; + + public static final String DATABASE_OWNER_NAME = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; + + public static final String DATABASE_OWNER_TYPE = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; + + public static final String ROLE_OWNER = SqlAlterHiveDatabaseOwner.ROLE_OWNER; + + public static final String USER_OWNER = SqlAlterHiveDatabaseOwner.USER_OWNER; + + /** Type of ALTER DATABASE operation. */ + public enum AlterHiveDatabaseOp { + CHANGE_PROPS, + CHANGE_LOCATION, + CHANGE_OWNER + } +} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java new file mode 100644 index 000000000000..5d40e7ed1d87 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; +import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; + +/** + * Constants for Hive Catalog. + */ +public class HiveCatalogConstants { + + // ----------------------------------------------------------------------------------- + // Constants for ALTER DATABASE + // ----------------------------------------------------------------------------------- + public static final String ALTER_DATABASE_OP = SqlAlterHiveDatabase.ALTER_DATABASE_OP; + + public static final String DATABASE_LOCATION_URI = SqlCreateHiveDatabase.DATABASE_LOCATION_URI; + + public static final String DATABASE_OWNER_NAME = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; + + public static final String DATABASE_OWNER_TYPE = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; + + public static final String ROLE_OWNER = SqlAlterHiveDatabaseOwner.ROLE_OWNER; + + public static final String USER_OWNER = SqlAlterHiveDatabaseOwner.USER_OWNER; + + /** Type of ALTER DATABASE operation. */ + public enum AlterHiveDatabaseOp { + CHANGE_PROPS, + CHANGE_LOCATION, + CHANGE_OWNER + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java new file mode 100644 index 000000000000..5d40e7ed1d87 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; +import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; + +/** + * Constants for Hive Catalog. + */ +public class HiveCatalogConstants { + + // ----------------------------------------------------------------------------------- + // Constants for ALTER DATABASE + // ----------------------------------------------------------------------------------- + public static final String ALTER_DATABASE_OP = SqlAlterHiveDatabase.ALTER_DATABASE_OP; + + public static final String DATABASE_LOCATION_URI = SqlCreateHiveDatabase.DATABASE_LOCATION_URI; + + public static final String DATABASE_OWNER_NAME = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; + + public static final String DATABASE_OWNER_TYPE = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; + + public static final String ROLE_OWNER = SqlAlterHiveDatabaseOwner.ROLE_OWNER; + + public static final String USER_OWNER = SqlAlterHiveDatabaseOwner.USER_OWNER; + + /** Type of ALTER DATABASE operation. */ + public enum AlterHiveDatabaseOp { + CHANGE_PROPS, + CHANGE_LOCATION, + CHANGE_OWNER + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java new file mode 100644 index 000000000000..5d40e7ed1d87 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; +import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; + +/** + * Constants for Hive Catalog. + */ +public class HiveCatalogConstants { + + // ----------------------------------------------------------------------------------- + // Constants for ALTER DATABASE + // ----------------------------------------------------------------------------------- + public static final String ALTER_DATABASE_OP = SqlAlterHiveDatabase.ALTER_DATABASE_OP; + + public static final String DATABASE_LOCATION_URI = SqlCreateHiveDatabase.DATABASE_LOCATION_URI; + + public static final String DATABASE_OWNER_NAME = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; + + public static final String DATABASE_OWNER_TYPE = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; + + public static final String ROLE_OWNER = SqlAlterHiveDatabaseOwner.ROLE_OWNER; + + public static final String USER_OWNER = SqlAlterHiveDatabaseOwner.USER_OWNER; + + /** Type of ALTER DATABASE operation. */ + public enum AlterHiveDatabaseOp { + CHANGE_PROPS, + CHANGE_LOCATION, + CHANGE_OWNER + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java new file mode 100644 index 000000000000..5d40e7ed1d87 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; +import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; +import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; + +/** + * Constants for Hive Catalog. + */ +public class HiveCatalogConstants { + + // ----------------------------------------------------------------------------------- + // Constants for ALTER DATABASE + // ----------------------------------------------------------------------------------- + public static final String ALTER_DATABASE_OP = SqlAlterHiveDatabase.ALTER_DATABASE_OP; + + public static final String DATABASE_LOCATION_URI = SqlCreateHiveDatabase.DATABASE_LOCATION_URI; + + public static final String DATABASE_OWNER_NAME = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; + + public static final String DATABASE_OWNER_TYPE = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; + + public static final String ROLE_OWNER = SqlAlterHiveDatabaseOwner.ROLE_OWNER; + + public static final String USER_OWNER = SqlAlterHiveDatabaseOwner.USER_OWNER; + + /** Type of ALTER DATABASE operation. */ + public enum AlterHiveDatabaseOp { + CHANGE_PROPS, + CHANGE_LOCATION, + CHANGE_OWNER + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/pom.xml b/hudi-flink-datasource/hudi-flink1.18.x/pom.xml new file mode 100644 index 000000000000..591d40b755e1 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/pom.xml @@ -0,0 +1,168 @@ + + + + + hudi-flink-datasource + org.apache.hudi + 0.15.0-SNAPSHOT + + 4.0.0 + + hudi-flink1.18.x + 0.15.0-SNAPSHOT + jar + + + ${project.parent.parent.basedir} + + + + + + org.apache.logging.log4j + log4j-1.2-api + + + org.apache.logging.log4j + log4j-slf4j-impl + + + org.slf4j + slf4j-api + + + + + org.apache.hudi + hudi-common + ${project.version} + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + provided + + + + + org.apache.flink + flink-connector-hive_2.12 + ${flink1.18.version} + provided + + + org.apache.flink + flink-table-api-java + ${flink1.18.version} + provided + + + org.apache.flink + flink-table-api-java-bridge + ${flink1.18.version} + provided + + + org.apache.flink + flink-shaded-guava + 30.1.1-jre-14.0 + provided + + + org.apache.flink + flink-core + ${flink1.18.version} + provided + + + org.apache.flink + flink-streaming-java + ${flink1.18.version} + provided + + + org.apache.flink + flink-table-runtime + ${flink1.18.version} + provided + + + org.apache.flink + flink-parquet + ${flink1.18.version} + provided + + + org.apache.flink + flink-json + ${flink1.18.version} + provided + + + org.apache.flink + flink-table-planner_2.12 + ${flink1.18.version} + provided + + + + + org.apache.flink + flink-runtime + ${flink1.18.version} + test + test-jar + + + org.apache.hudi + hudi-tests-common + ${project.version} + test + + + + + + + org.jacoco + jacoco-maven-plugin + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + test-compile + + + + false + + + + org.apache.rat + apache-rat-plugin + + + + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java new file mode 100644 index 000000000000..d4c6bc3a8f4d --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; + +/** + * Adapter clazz for {@code AbstractStreamOperator}. + */ +public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java new file mode 100644 index 000000000000..6dcfe71ccfd9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; +import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; + +/** + * Adapter clazz for {@link AbstractStreamOperatorFactory}. + */ +public abstract class AbstractStreamOperatorFactoryAdapter + extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { + + public MailboxExecutorAdapter getMailboxExecutorAdapter() { + return new MailboxExecutorAdapter(getMailboxExecutor()); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java new file mode 100644 index 000000000000..a6b5439ea1ff --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.connector.ProviderContext; +import org.apache.flink.table.connector.source.DataStreamScanProvider; +import org.apache.flink.table.data.RowData; + +/** + * Adapter clazz for {@code DataStreamScanProvider}. + */ +public interface DataStreamScanProviderAdapter extends DataStreamScanProvider { + default DataStream produceDataStream(ProviderContext providerContext, StreamExecutionEnvironment streamExecutionEnvironment) { + return produceDataStream(streamExecutionEnvironment); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java new file mode 100644 index 000000000000..349f60f30acf --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamSink; +import org.apache.flink.table.connector.ProviderContext; +import org.apache.flink.table.connector.sink.DataStreamSinkProvider; +import org.apache.flink.table.data.RowData; + +/** + * Adapter clazz for {@code DataStreamSinkProvider}. + */ +public interface DataStreamSinkProviderAdapter extends DataStreamSinkProvider { + DataStreamSink consumeDataStream(DataStream dataStream); + + @Override + default DataStreamSink consumeDataStream(ProviderContext providerContext, DataStream dataStream) { + return consumeDataStream(dataStream); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java new file mode 100644 index 000000000000..7c1649301607 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.table.catalog.hive.util.Constants; + +/** + * Constants for Hive Catalog. + */ +public class HiveCatalogConstants { + + // ----------------------------------------------------------------------------------- + // Constants for ALTER DATABASE + // ----------------------------------------------------------------------------------- + public static final String ALTER_DATABASE_OP = Constants.ALTER_DATABASE_OP; + + public static final String DATABASE_LOCATION_URI = Constants.DATABASE_LOCATION_URI; + + public static final String DATABASE_OWNER_NAME = Constants.DATABASE_OWNER_NAME; + + public static final String DATABASE_OWNER_TYPE = Constants.DATABASE_OWNER_TYPE; + + public static final String ROLE_OWNER = Constants.ROLE_OWNER; + + public static final String USER_OWNER = Constants.USER_OWNER; + + /** Type of ALTER DATABASE operation. */ + public enum AlterHiveDatabaseOp { + CHANGE_PROPS, + CHANGE_LOCATION, + CHANGE_OWNER + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java new file mode 100644 index 000000000000..0c836f3db391 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.api.common.operators.MailboxExecutor; +import org.apache.flink.util.function.ThrowingRunnable; + +/** + * Adapter clazz for {@link MailboxExecutor}. + */ +public class MailboxExecutorAdapter { + private final MailboxExecutor executor; + + public MailboxExecutorAdapter(MailboxExecutor executor) { + this.executor = executor; + } + + public void execute(ThrowingRunnable command, String description) { + this.executor.execute(command, description); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java new file mode 100644 index 000000000000..e84da0d6ec30 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; +import org.apache.flink.util.OutputTag; + +/** Adapter class for {@code Output} to handle async compaction/clustering service thread safe issues */ +public class MaskingOutputAdapter implements Output> { + + private final Output> output; + + public MaskingOutputAdapter(Output> output) { + this.output = output; + } + + @Override + public void emitWatermark(Watermark watermark) { + // For thread safe, not to propagate the watermark + } + + @Override + public void emitLatencyMarker(LatencyMarker latencyMarker) { + // For thread safe, not to propagate latency marker + } + + @Override + public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { + // For thread safe, not to propagate watermark status + } + + @Override + public void collect(OutputTag outputTag, StreamRecord streamRecord) { + this.output.collect(outputTag, streamRecord); + } + + @Override + public void collect(StreamRecord outStreamRecord) { + this.output.collect(outStreamRecord); + } + + @Override + public void close() { + this.output.close(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java new file mode 100644 index 000000000000..9c37de17bd1f --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; +import org.apache.flink.runtime.operators.coordination.OperatorEvent; + +import javax.annotation.Nullable; + +/** + * Adapter clazz for {@code OperatorCoordinator}. + */ +public interface OperatorCoordinatorAdapter extends OperatorCoordinator { + void handleEventFromOperator(int i, OperatorEvent operatorEvent) throws Exception; + + @Override + default void handleEventFromOperator(int i, int attemptNumber, OperatorEvent operatorEvent) throws Exception { + handleEventFromOperator(i, operatorEvent); + } + + void subtaskReady(int i, SubtaskGateway subtaskGateway); + + @Override + default void executionAttemptReady(int i, int attemptNumber, SubtaskGateway subtaskGateway) { + subtaskReady(i, subtaskGateway); + } + + @Override + default void executionAttemptFailed(int i, int attemptNumber, Throwable throwable) { + subtaskReady(i, null); + } + + void subtaskFailed(int i, @Nullable Throwable throwable); +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java new file mode 100644 index 000000000000..865c0c81d4d9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; + +/** + * Bridge class for shaded guava clazz {@code RateLimiter}. + */ +public class RateLimiterAdapter { + private final RateLimiter rateLimiter; + + private RateLimiterAdapter(double permitsPerSecond) { + this.rateLimiter = RateLimiter.create(permitsPerSecond); + } + + public static RateLimiterAdapter create(double permitsPerSecond) { + return new RateLimiterAdapter(permitsPerSecond); + } + + public void acquire() { + this.rateLimiter.acquire(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java new file mode 100644 index 000000000000..e38a58a0ccfb --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.planner.codegen.sort.SortCodeGenerator; +import org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec; +import org.apache.flink.table.types.logical.RowType; + +/** + * Adapter clazz for {@code SortCodeGenerator}. + */ +public class SortCodeGeneratorAdapter extends SortCodeGenerator { + public SortCodeGeneratorAdapter(ReadableConfig tableConfig, RowType input, SortSpec sortSpec) { + super(tableConfig, Thread.currentThread().getContextClassLoader(), input, sortSpec); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java new file mode 100644 index 000000000000..de0019d41bd9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.table.connector.RowLevelModificationScanContext; +import org.apache.flink.table.connector.sink.abilities.SupportsRowLevelDelete; + +import javax.annotation.Nullable; + +/** + * Adapter clazz for {@link org.apache.flink.table.connector.sink.abilities.SupportsRowLevelDelete}. + */ +public interface SupportsRowLevelDeleteAdapter extends SupportsRowLevelDelete { + @Override + default RowLevelDeleteInfo applyRowLevelDelete(@Nullable RowLevelModificationScanContext context) { + return applyRowLevelDelete(); + } + + RowLevelDeleteInfoAdapter applyRowLevelDelete(); + + /** + * Adapter clazz for {@link SupportsRowLevelDelete.RowLevelDeleteInfo}. + */ + interface RowLevelDeleteInfoAdapter extends RowLevelDeleteInfo { + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java new file mode 100644 index 000000000000..17c785d48455 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.connector.RowLevelModificationScanContext; +import org.apache.flink.table.connector.sink.abilities.SupportsRowLevelUpdate; + +import javax.annotation.Nullable; + +import java.util.List; + +/** + * Adapter clazz for {@link org.apache.flink.table.connector.sink.abilities.SupportsRowLevelUpdate}. + */ +public interface SupportsRowLevelUpdateAdapter extends SupportsRowLevelUpdate { + @Override + default RowLevelUpdateInfo applyRowLevelUpdate(List updatedColumns, @Nullable RowLevelModificationScanContext context) { + return applyRowLevelUpdate(updatedColumns); + } + + RowLevelUpdateInfoAdapter applyRowLevelUpdate(List updatedColumns); + + /** + * Adapter clazz for {@link SupportsRowLevelUpdate.RowLevelUpdateInfo}. + */ + interface RowLevelUpdateInfoAdapter extends RowLevelUpdateInfo { + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java new file mode 100644 index 000000000000..659c65973674 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.runtime.io.disk.iomanager.IOManager; +import org.apache.flink.runtime.memory.MemoryManager; +import org.apache.flink.streaming.api.TimeCharacteristic; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.operators.StreamSourceContexts; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; +import org.apache.flink.streaming.runtime.tasks.StreamTask; +import org.apache.flink.table.api.config.ExecutionConfigOptions; +import org.apache.flink.table.catalog.ObjectIdentifier; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.runtime.generated.NormalizedKeyComputer; +import org.apache.flink.table.runtime.generated.RecordComparator; +import org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter; +import org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer; +import org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer; + +import java.util.Collections; + +/** + * Adapter utils. + */ +public class Utils { + public static SourceFunction.SourceContext getSourceContext( + TimeCharacteristic timeCharacteristic, + ProcessingTimeService processingTimeService, + StreamTask streamTask, + Output> output, + long watermarkInterval) { + return StreamSourceContexts.getSourceContext( + timeCharacteristic, + processingTimeService, + new Object(), // no actual locking needed + output, + watermarkInterval, + -1, + true); + } + + public static FactoryUtil.DefaultDynamicTableContext getTableContext( + ObjectIdentifier tablePath, + ResolvedCatalogTable catalogTable, + ReadableConfig conf) { + return new FactoryUtil.DefaultDynamicTableContext(tablePath, catalogTable, + Collections.emptyMap(), conf, Thread.currentThread().getContextClassLoader(), false); + } + + public static BinaryExternalSorter getBinaryExternalSorter( + final Object owner, + MemoryManager memoryManager, + long reservedMemorySize, + IOManager ioManager, + AbstractRowDataSerializer inputSerializer, + BinaryRowDataSerializer serializer, + NormalizedKeyComputer normalizedKeyComputer, + RecordComparator comparator, + Configuration conf) { + return new BinaryExternalSorter(owner, memoryManager, reservedMemorySize, + ioManager, inputSerializer, serializer, normalizedKeyComputer, comparator, + conf.get(ExecutionConfigOptions.TABLE_EXEC_SORT_MAX_NUM_FILE_HANDLES), + conf.get(ExecutionConfigOptions.TABLE_EXEC_SPILL_COMPRESSION_ENABLED), + (int) conf.get( + ExecutionConfigOptions.TABLE_EXEC_SPILL_COMPRESSION_BLOCK_SIZE).getBytes(), + conf.get(ExecutionConfigOptions.TABLE_EXEC_SORT_ASYNC_MERGE_ENABLED)); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java new file mode 100644 index 000000000000..9bf5390ee26c --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -0,0 +1,579 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow; + +import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.table.format.cow.vector.HeapArrayVector; +import org.apache.hudi.table.format.cow.vector.HeapMapColumnVector; +import org.apache.hudi.table.format.cow.vector.HeapRowColumnVector; +import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; +import org.apache.hudi.table.format.cow.vector.reader.ArrayColumnReader; +import org.apache.hudi.table.format.cow.vector.reader.EmptyColumnReader; +import org.apache.hudi.table.format.cow.vector.reader.FixedLenBytesColumnReader; +import org.apache.hudi.table.format.cow.vector.reader.Int64TimestampColumnReader; +import org.apache.hudi.table.format.cow.vector.reader.MapColumnReader; +import org.apache.hudi.table.format.cow.vector.reader.ParquetColumnarRowSplitReader; +import org.apache.hudi.table.format.cow.vector.reader.RowColumnReader; + +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.vector.reader.BooleanColumnReader; +import org.apache.flink.formats.parquet.vector.reader.ByteColumnReader; +import org.apache.flink.formats.parquet.vector.reader.BytesColumnReader; +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.formats.parquet.vector.reader.DoubleColumnReader; +import org.apache.flink.formats.parquet.vector.reader.FloatColumnReader; +import org.apache.flink.formats.parquet.vector.reader.IntColumnReader; +import org.apache.flink.formats.parquet.vector.reader.LongColumnReader; +import org.apache.flink.formats.parquet.vector.reader.ShortColumnReader; +import org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch; +import org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapByteVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapDoubleVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapFloatVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapIntVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapLongVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapShortVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapTimestampVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LocalZonedTimestampType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.VarBinaryType; +import org.apache.flink.util.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.parquet.ParquetRuntimeException; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.filter.UnboundRecordFilter; +import org.apache.parquet.filter2.predicate.FilterPredicate; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.InvalidSchemaException; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; + +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Date; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.apache.flink.table.utils.DateTimeUtils.toInternal; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.apache.parquet.Preconditions.checkArgument; + +/** + * Util for generating {@link ParquetColumnarRowSplitReader}. + * + *

NOTE: reference from Flink release 1.11.2 {@code ParquetSplitReaderUtil}, modify to support INT64 + * based TIMESTAMP_MILLIS as ConvertedType, should remove when Flink supports that. + */ +public class ParquetSplitReaderUtil { + + /** + * Util for generating partitioned {@link ParquetColumnarRowSplitReader}. + */ + public static ParquetColumnarRowSplitReader genPartColumnarRowReader( + boolean utcTimestamp, + boolean caseSensitive, + Configuration conf, + String[] fullFieldNames, + DataType[] fullFieldTypes, + Map partitionSpec, + int[] selectedFields, + int batchSize, + Path path, + long splitStart, + long splitLength, + FilterPredicate filterPredicate, + UnboundRecordFilter recordFilter) throws IOException { + List selNonPartNames = Arrays.stream(selectedFields) + .mapToObj(i -> fullFieldNames[i]) + .filter(n -> !partitionSpec.containsKey(n)) + .collect(Collectors.toList()); + + int[] selParquetFields = Arrays.stream(selectedFields) + .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) + .toArray(); + + ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { + // create and initialize the row batch + ColumnVector[] vectors = new ColumnVector[selectedFields.length]; + for (int i = 0; i < vectors.length; i++) { + String name = fullFieldNames[selectedFields[i]]; + LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); + vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); + } + return new VectorizedColumnBatch(vectors); + }; + + return new ParquetColumnarRowSplitReader( + utcTimestamp, + caseSensitive, + conf, + Arrays.stream(selParquetFields) + .mapToObj(i -> fullFieldTypes[i].getLogicalType()) + .toArray(LogicalType[]::new), + selNonPartNames.toArray(new String[0]), + gen, + batchSize, + new org.apache.hadoop.fs.Path(path.toUri()), + splitStart, + splitLength, + filterPredicate, + recordFilter); + } + + private static ColumnVector createVector( + ColumnVector[] readVectors, + List selNonPartNames, + String name, + LogicalType type, + Map partitionSpec, + int batchSize) { + if (partitionSpec.containsKey(name)) { + return createVectorFromConstant(type, partitionSpec.get(name), batchSize); + } + ColumnVector readVector = readVectors[selNonPartNames.indexOf(name)]; + if (readVector == null) { + // when the read vector is null, use a constant null vector instead + readVector = createVectorFromConstant(type, null, batchSize); + } + return readVector; + } + + private static ColumnVector createVectorFromConstant( + LogicalType type, + Object value, + int batchSize) { + switch (type.getTypeRoot()) { + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + HeapBytesVector bsv = new HeapBytesVector(batchSize); + if (value == null) { + bsv.fillWithNulls(); + } else { + bsv.fill(value instanceof byte[] + ? (byte[]) value + : getUTF8Bytes(value.toString())); + } + return bsv; + case BOOLEAN: + HeapBooleanVector bv = new HeapBooleanVector(batchSize); + if (value == null) { + bv.fillWithNulls(); + } else { + bv.fill((boolean) value); + } + return bv; + case TINYINT: + HeapByteVector byteVector = new HeapByteVector(batchSize); + if (value == null) { + byteVector.fillWithNulls(); + } else { + byteVector.fill(((Number) value).byteValue()); + } + return byteVector; + case SMALLINT: + HeapShortVector sv = new HeapShortVector(batchSize); + if (value == null) { + sv.fillWithNulls(); + } else { + sv.fill(((Number) value).shortValue()); + } + return sv; + case INTEGER: + HeapIntVector iv = new HeapIntVector(batchSize); + if (value == null) { + iv.fillWithNulls(); + } else { + iv.fill(((Number) value).intValue()); + } + return iv; + case BIGINT: + HeapLongVector lv = new HeapLongVector(batchSize); + if (value == null) { + lv.fillWithNulls(); + } else { + lv.fill(((Number) value).longValue()); + } + return lv; + case DECIMAL: + DecimalType decimalType = (DecimalType) type; + int precision = decimalType.getPrecision(); + int scale = decimalType.getScale(); + DecimalData decimal = value == null + ? null + : Preconditions.checkNotNull(DecimalData.fromBigDecimal((BigDecimal) value, precision, scale)); + ColumnVector internalVector = createVectorFromConstant( + new VarBinaryType(), + decimal == null ? null : decimal.toUnscaledBytes(), + batchSize); + return new ParquetDecimalVector(internalVector); + case FLOAT: + HeapFloatVector fv = new HeapFloatVector(batchSize); + if (value == null) { + fv.fillWithNulls(); + } else { + fv.fill(((Number) value).floatValue()); + } + return fv; + case DOUBLE: + HeapDoubleVector dv = new HeapDoubleVector(batchSize); + if (value == null) { + dv.fillWithNulls(); + } else { + dv.fill(((Number) value).doubleValue()); + } + return dv; + case DATE: + if (value instanceof LocalDate) { + value = Date.valueOf((LocalDate) value); + } + return createVectorFromConstant( + new IntType(), + value == null ? null : toInternal((Date) value), + batchSize); + case TIMESTAMP_WITHOUT_TIME_ZONE: + HeapTimestampVector tv = new HeapTimestampVector(batchSize); + if (value == null) { + tv.fillWithNulls(); + } else { + tv.fill(TimestampData.fromLocalDateTime((LocalDateTime) value)); + } + return tv; + case ARRAY: + HeapArrayVector arrayVector = new HeapArrayVector(batchSize); + if (value == null) { + arrayVector.fillWithNulls(); + return arrayVector; + } else { + throw new UnsupportedOperationException("Unsupported create array with default value."); + } + case MAP: + HeapMapColumnVector mapVector = new HeapMapColumnVector(batchSize, null, null); + if (value == null) { + mapVector.fillWithNulls(); + return mapVector; + } else { + throw new UnsupportedOperationException("Unsupported create map with default value."); + } + case ROW: + HeapRowColumnVector rowVector = new HeapRowColumnVector(batchSize); + if (value == null) { + rowVector.fillWithNulls(); + return rowVector; + } else { + throw new UnsupportedOperationException("Unsupported create row with default value."); + } + default: + throw new UnsupportedOperationException("Unsupported type: " + type); + } + } + + private static List filterDescriptors(int depth, Type type, List columns) throws ParquetRuntimeException { + List filtered = new ArrayList<>(); + for (ColumnDescriptor descriptor : columns) { + if (depth >= descriptor.getPath().length) { + throw new InvalidSchemaException("Expect depth " + depth + " for schema: " + descriptor); + } + if (type.getName().equals(descriptor.getPath()[depth])) { + filtered.add(descriptor); + } + } + ValidationUtils.checkState(filtered.size() > 0, "Corrupted Parquet schema"); + return filtered; + } + + public static ColumnReader createColumnReader( + boolean utcTimestamp, + LogicalType fieldType, + Type physicalType, + List descriptors, + PageReadStore pages) throws IOException { + return createColumnReader(utcTimestamp, fieldType, physicalType, descriptors, + pages, 0); + } + + private static ColumnReader createColumnReader( + boolean utcTimestamp, + LogicalType fieldType, + Type physicalType, + List columns, + PageReadStore pages, + int depth) throws IOException { + List descriptors = filterDescriptors(depth, physicalType, columns); + ColumnDescriptor descriptor = descriptors.get(0); + PageReader pageReader = pages.getPageReader(descriptor); + switch (fieldType.getTypeRoot()) { + case BOOLEAN: + return new BooleanColumnReader(descriptor, pageReader); + case TINYINT: + return new ByteColumnReader(descriptor, pageReader); + case DOUBLE: + return new DoubleColumnReader(descriptor, pageReader); + case FLOAT: + return new FloatColumnReader(descriptor, pageReader); + case INTEGER: + case DATE: + case TIME_WITHOUT_TIME_ZONE: + return new IntColumnReader(descriptor, pageReader); + case BIGINT: + return new LongColumnReader(descriptor, pageReader); + case SMALLINT: + return new ShortColumnReader(descriptor, pageReader); + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + return new BytesColumnReader(descriptor, pageReader); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { + case INT64: + int precision = fieldType instanceof TimestampType + ? ((TimestampType) fieldType).getPrecision() + : ((LocalZonedTimestampType) fieldType).getPrecision(); + return new Int64TimestampColumnReader(utcTimestamp, descriptor, pageReader, precision); + case INT96: + return new TimestampColumnReader(utcTimestamp, descriptor, pageReader); + default: + throw new AssertionError(); + } + case DECIMAL: + switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { + case INT32: + return new IntColumnReader(descriptor, pageReader); + case INT64: + return new LongColumnReader(descriptor, pageReader); + case BINARY: + return new BytesColumnReader(descriptor, pageReader); + case FIXED_LEN_BYTE_ARRAY: + return new FixedLenBytesColumnReader( + descriptor, pageReader); + default: + throw new AssertionError(); + } + case ARRAY: + return new ArrayColumnReader( + descriptor, + pageReader, + utcTimestamp, + descriptor.getPrimitiveType(), + fieldType); + case MAP: + MapType mapType = (MapType) fieldType; + ArrayColumnReader keyReader = + new ArrayColumnReader( + descriptor, + pageReader, + utcTimestamp, + descriptor.getPrimitiveType(), + new ArrayType(mapType.getKeyType())); + ArrayColumnReader valueReader = + new ArrayColumnReader( + descriptors.get(1), + pages.getPageReader(descriptors.get(1)), + utcTimestamp, + descriptors.get(1).getPrimitiveType(), + new ArrayType(mapType.getValueType())); + return new MapColumnReader(keyReader, valueReader, fieldType); + case ROW: + RowType rowType = (RowType) fieldType; + GroupType groupType = physicalType.asGroupType(); + List fieldReaders = new ArrayList<>(); + for (int i = 0; i < rowType.getFieldCount(); i++) { + // schema evolution: read the parquet file with a new extended field name. + int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType); + if (fieldIndex < 0) { + fieldReaders.add(new EmptyColumnReader()); + } else { + fieldReaders.add( + createColumnReader( + utcTimestamp, + rowType.getTypeAt(i), + groupType.getType(fieldIndex), + descriptors, + pages, + depth + 1)); + } + } + return new RowColumnReader(fieldReaders); + default: + throw new UnsupportedOperationException(fieldType + " is not supported now."); + } + } + + public static WritableColumnVector createWritableColumnVector( + int batchSize, + LogicalType fieldType, + Type physicalType, + List descriptors) { + return createWritableColumnVector(batchSize, fieldType, physicalType, descriptors, 0); + } + + private static WritableColumnVector createWritableColumnVector( + int batchSize, + LogicalType fieldType, + Type physicalType, + List columns, + int depth) { + List descriptors = filterDescriptors(depth, physicalType, columns); + PrimitiveType primitiveType = descriptors.get(0).getPrimitiveType(); + PrimitiveType.PrimitiveTypeName typeName = primitiveType.getPrimitiveTypeName(); + switch (fieldType.getTypeRoot()) { + case BOOLEAN: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, + "Unexpected type: %s", typeName); + return new HeapBooleanVector(batchSize); + case TINYINT: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.INT32, + "Unexpected type: %s", typeName); + return new HeapByteVector(batchSize); + case DOUBLE: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, + "Unexpected type: %s", typeName); + return new HeapDoubleVector(batchSize); + case FLOAT: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.FLOAT, + "Unexpected type: %s", typeName); + return new HeapFloatVector(batchSize); + case INTEGER: + case DATE: + case TIME_WITHOUT_TIME_ZONE: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.INT32, + "Unexpected type: %s", typeName); + return new HeapIntVector(batchSize); + case BIGINT: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.INT64, + "Unexpected type: %s", typeName); + return new HeapLongVector(batchSize); + case SMALLINT: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.INT32, + "Unexpected type: %s", typeName); + return new HeapShortVector(batchSize); + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + checkArgument( + typeName == PrimitiveType.PrimitiveTypeName.BINARY, + "Unexpected type: %s", typeName); + return new HeapBytesVector(batchSize); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, + "TIME_MICROS original type is not "); + return new HeapTimestampVector(batchSize); + case DECIMAL: + checkArgument( + (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY + || typeName == PrimitiveType.PrimitiveTypeName.BINARY) + && primitiveType.getOriginalType() == OriginalType.DECIMAL, + "Unexpected type: %s", typeName); + return new HeapBytesVector(batchSize); + case ARRAY: + ArrayType arrayType = (ArrayType) fieldType; + return new HeapArrayVector( + batchSize, + createWritableColumnVector( + batchSize, + arrayType.getElementType(), + physicalType, + descriptors, + depth)); + case MAP: + MapType mapType = (MapType) fieldType; + GroupType repeatedType = physicalType.asGroupType().getType(0).asGroupType(); + // the map column has three level paths. + return new HeapMapColumnVector( + batchSize, + createWritableColumnVector( + batchSize, + mapType.getKeyType(), + repeatedType.getType(0), + descriptors, + depth + 2), + createWritableColumnVector( + batchSize, + mapType.getValueType(), + repeatedType.getType(1), + descriptors, + depth + 2)); + case ROW: + RowType rowType = (RowType) fieldType; + GroupType groupType = physicalType.asGroupType(); + WritableColumnVector[] columnVectors = new WritableColumnVector[rowType.getFieldCount()]; + for (int i = 0; i < columnVectors.length; i++) { + // schema evolution: read the file with a new extended field name. + int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType); + if (fieldIndex < 0) { + columnVectors[i] = (WritableColumnVector) createVectorFromConstant(rowType.getTypeAt(i), null, batchSize); + } else { + columnVectors[i] = + createWritableColumnVector( + batchSize, + rowType.getTypeAt(i), + groupType.getType(fieldIndex), + descriptors, + depth + 1); + } + } + return new HeapRowColumnVector(batchSize, columnVectors); + default: + throw new UnsupportedOperationException(fieldType + " is not supported now."); + } + } + + /** + * Returns the field index with given physical row type {@code groupType} and field name {@code fieldName}. + * + * @return The physical field index or -1 if the field does not exist + */ + private static int getFieldIndexInPhysicalType(String fieldName, GroupType groupType) { + // get index from fileSchema type, else, return -1 + return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java new file mode 100644 index 000000000000..7db66d23d6fc --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector; + +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.columnar.ColumnarArrayData; +import org.apache.flink.table.data.columnar.vector.ArrayColumnVector; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.heap.AbstractHeapVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; + +/** + * This class represents a nullable heap array column vector. + */ +public class HeapArrayVector extends AbstractHeapVector + implements WritableColumnVector, ArrayColumnVector { + + public long[] offsets; + public long[] lengths; + public ColumnVector child; + private int size; + + public HeapArrayVector(int len) { + super(len); + offsets = new long[len]; + lengths = new long[len]; + } + + public HeapArrayVector(int len, ColumnVector vector) { + super(len); + offsets = new long[len]; + lengths = new long[len]; + this.child = vector; + } + + public int getSize() { + return size; + } + + public void setSize(int size) { + this.size = size; + } + + public int getLen() { + return this.isNull.length; + } + + @Override + public ArrayData getArray(int i) { + long offset = offsets[i]; + long length = lengths[i]; + return new ColumnarArrayData(child, (int) offset, (int) length); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java new file mode 100644 index 000000000000..a37973716950 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector; + +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.columnar.ColumnarMapData; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.MapColumnVector; +import org.apache.flink.table.data.columnar.vector.heap.AbstractHeapVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; + +/** + * This class represents a nullable heap map column vector. + */ +public class HeapMapColumnVector extends AbstractHeapVector + implements WritableColumnVector, MapColumnVector { + + private long[] offsets; + private long[] lengths; + private int size; + private ColumnVector keys; + private ColumnVector values; + + public HeapMapColumnVector(int len, ColumnVector keys, ColumnVector values) { + super(len); + size = 0; + offsets = new long[len]; + lengths = new long[len]; + this.keys = keys; + this.values = values; + } + + public void setOffsets(long[] offsets) { + this.offsets = offsets; + } + + public void setLengths(long[] lengths) { + this.lengths = lengths; + } + + public void setKeys(ColumnVector keys) { + this.keys = keys; + } + + public void setValues(ColumnVector values) { + this.values = values; + } + + public int getSize() { + return size; + } + + public void setSize(int size) { + this.size = size; + } + + @Override + public MapData getMap(int i) { + long offset = offsets[i]; + long length = lengths[i]; + return new ColumnarMapData(keys, values, (int) offset, (int) length); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java new file mode 100644 index 000000000000..ae194e4e6ab0 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector; + +import org.apache.flink.table.data.columnar.ColumnarRowData; +import org.apache.flink.table.data.columnar.vector.RowColumnVector; +import org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch; +import org.apache.flink.table.data.columnar.vector.heap.AbstractHeapVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; + +/** + * This class represents a nullable heap row column vector. + */ +public class HeapRowColumnVector extends AbstractHeapVector + implements WritableColumnVector, RowColumnVector { + + public WritableColumnVector[] vectors; + + public HeapRowColumnVector(int len, WritableColumnVector... vectors) { + super(len); + this.vectors = vectors; + } + + @Override + public ColumnarRowData getRow(int i) { + ColumnarRowData columnarRowData = new ColumnarRowData(new VectorizedColumnBatch(vectors)); + columnarRowData.setRowId(i); + return columnarRowData; + } + + @Override + public void reset() { + super.reset(); + for (WritableColumnVector vector : vectors) { + vector.reset(); + } + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java new file mode 100644 index 000000000000..98b5e6105089 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector; + +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.columnar.vector.BytesColumnVector; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.DecimalColumnVector; + +/** + * Parquet write decimal as int32 and int64 and binary, this class wrap the real vector to + * provide {@link DecimalColumnVector} interface. + * + *

Reference Flink release 1.11.2 {@link org.apache.flink.formats.parquet.vector.ParquetDecimalVector} + * because it is not public. + */ +public class ParquetDecimalVector implements DecimalColumnVector { + + public final ColumnVector vector; + + public ParquetDecimalVector(ColumnVector vector) { + this.vector = vector; + } + + @Override + public DecimalData getDecimal(int i, int precision, int scale) { + return DecimalData.fromUnscaledBytes( + ((BytesColumnVector) vector).getBytes(i).getBytes(), + precision, + scale); + } + + @Override + public boolean isNullAt(int i) { + return vector.isNullAt(i); + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java new file mode 100644 index 000000000000..a8b733de636a --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java @@ -0,0 +1,325 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.formats.parquet.vector.ParquetDictionary; +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableIntVector; +import org.apache.parquet.Preconditions; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.bytes.BytesInput; +import org.apache.parquet.bytes.BytesUtils; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.Dictionary; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.column.page.DataPage; +import org.apache.parquet.column.page.DataPageV1; +import org.apache.parquet.column.page.DataPageV2; +import org.apache.parquet.column.page.DictionaryPage; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.io.ParquetDecodingException; +import org.apache.parquet.schema.PrimitiveType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; + +/** + * Abstract {@link ColumnReader}. + * See {@link org.apache.parquet.column.impl.ColumnReaderImpl}, + * part of the code is referred from Apache Spark and Apache Parquet. + * + *

Note: Reference Flink release 1.11.2 {@link org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader} + * because some of the package scope methods. + */ +public abstract class AbstractColumnReader + implements ColumnReader { + + private static final Logger LOG = LoggerFactory.getLogger(org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader.class); + + private final PageReader pageReader; + + /** + * The dictionary, if this column has dictionary encoding. + */ + protected final Dictionary dictionary; + + /** + * Maximum definition level for this column. + */ + protected final int maxDefLevel; + + protected final ColumnDescriptor descriptor; + + /** + * Total number of values read. + */ + private long valuesRead; + + /** + * value that indicates the end of the current page. That is, if valuesRead == + * endOfPageValueCount, we are at the end of the page. + */ + private long endOfPageValueCount; + + /** + * If true, the current page is dictionary encoded. + */ + private boolean isCurrentPageDictionaryEncoded; + + /** + * Total values in the current page. + */ + private int pageValueCount; + + /* + * Input streams: + * 1.Run length encoder to encode every data, so we have run length stream to get + * run length information. + * 2.Data maybe is real data, maybe is dictionary ids which need be decode to real + * data from Dictionary. + * + * Run length stream ------> Data stream + * | + * ------> Dictionary ids stream + */ + + /** + * Run length decoder for data and dictionary. + */ + protected RunLengthDecoder runLenDecoder; + + /** + * Data input stream. + */ + ByteBufferInputStream dataInputStream; + + /** + * Dictionary decoder to wrap dictionary ids input stream. + */ + private RunLengthDecoder dictionaryIdsDecoder; + + public AbstractColumnReader( + ColumnDescriptor descriptor, + PageReader pageReader) throws IOException { + this.descriptor = descriptor; + this.pageReader = pageReader; + this.maxDefLevel = descriptor.getMaxDefinitionLevel(); + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + try { + this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage); + this.isCurrentPageDictionaryEncoded = true; + } catch (IOException e) { + throw new IOException("could not decode the dictionary for " + descriptor, e); + } + } else { + this.dictionary = null; + this.isCurrentPageDictionaryEncoded = false; + } + /* + * Total number of values in this column (in this row group). + */ + long totalValueCount = pageReader.getTotalValueCount(); + if (totalValueCount == 0) { + throw new IOException("totalValueCount == 0"); + } + } + + protected void checkTypeName(PrimitiveType.PrimitiveTypeName expectedName) { + PrimitiveType.PrimitiveTypeName actualName = descriptor.getPrimitiveType().getPrimitiveTypeName(); + Preconditions.checkArgument( + actualName == expectedName, + "Expected type name: %s, actual type name: %s", + expectedName, + actualName); + } + + /** + * Reads `total` values from this columnReader into column. + */ + @Override + public final void readToVector(int readNumber, V vector) throws IOException { + int rowId = 0; + WritableIntVector dictionaryIds = null; + if (dictionary != null) { + dictionaryIds = vector.reserveDictionaryIds(readNumber); + } + while (readNumber > 0) { + // Compute the number of values we want to read in this page. + int leftInPage = (int) (endOfPageValueCount - valuesRead); + if (leftInPage == 0) { + DataPage page = pageReader.readPage(); + if (page instanceof DataPageV1) { + readPageV1((DataPageV1) page); + } else if (page instanceof DataPageV2) { + readPageV2((DataPageV2) page); + } else { + throw new RuntimeException("Unsupported page type: " + page.getClass()); + } + leftInPage = (int) (endOfPageValueCount - valuesRead); + } + int num = Math.min(readNumber, leftInPage); + if (isCurrentPageDictionaryEncoded) { + // Read and decode dictionary ids. + runLenDecoder.readDictionaryIds( + num, dictionaryIds, vector, rowId, maxDefLevel, this.dictionaryIdsDecoder); + + if (vector.hasDictionary() || (rowId == 0 && supportLazyDecode())) { + // Column vector supports lazy decoding of dictionary values so just set the dictionary. + // We can't do this if rowId != 0 AND the column doesn't have a dictionary (i.e. some + // non-dictionary encoded values have already been added). + vector.setDictionary(new ParquetDictionary(dictionary)); + } else { + readBatchFromDictionaryIds(rowId, num, vector, dictionaryIds); + } + } else { + if (vector.hasDictionary() && rowId != 0) { + // This batch already has dictionary encoded values but this new page is not. The batch + // does not support a mix of dictionary and not so we will decode the dictionary. + readBatchFromDictionaryIds(0, rowId, vector, vector.getDictionaryIds()); + } + vector.setDictionary(null); + readBatch(rowId, num, vector); + } + + valuesRead += num; + rowId += num; + readNumber -= num; + } + } + + private void readPageV1(DataPageV1 page) throws IOException { + this.pageValueCount = page.getValueCount(); + ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); + + // Initialize the decoders. + if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { + throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); + } + int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); + this.runLenDecoder = new RunLengthDecoder(bitWidth); + try { + BytesInput bytes = page.getBytes(); + ByteBufferInputStream in = bytes.toInputStream(); + rlReader.initFromPage(pageValueCount, in); + this.runLenDecoder.initFromStream(pageValueCount, in); + prepareNewPage(page.getValueEncoding(), in); + } catch (IOException e) { + throw new IOException("could not read page " + page + " in col " + descriptor, e); + } + } + + private void readPageV2(DataPageV2 page) throws IOException { + this.pageValueCount = page.getValueCount(); + + int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); + // do not read the length from the stream. v2 pages handle dividing the page bytes. + this.runLenDecoder = new RunLengthDecoder(bitWidth, false); + this.runLenDecoder.initFromStream( + this.pageValueCount, page.getDefinitionLevels().toInputStream()); + try { + prepareNewPage(page.getDataEncoding(), page.getData().toInputStream()); + } catch (IOException e) { + throw new IOException("could not read page " + page + " in col " + descriptor, e); + } + } + + private void prepareNewPage( + Encoding dataEncoding, + ByteBufferInputStream in) throws IOException { + this.endOfPageValueCount = valuesRead + pageValueCount; + if (dataEncoding.usesDictionary()) { + if (dictionary == null) { + throw new IOException("Could not read page in col " + + descriptor + + " as the dictionary was missing for encoding " + + dataEncoding); + } + @SuppressWarnings("deprecation") + Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression + if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { + throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); + } + this.dataInputStream = null; + this.dictionaryIdsDecoder = new RunLengthDecoder(); + try { + this.dictionaryIdsDecoder.initFromStream(pageValueCount, in); + } catch (IOException e) { + throw new IOException("could not read dictionary in col " + descriptor, e); + } + this.isCurrentPageDictionaryEncoded = true; + } else { + if (dataEncoding != Encoding.PLAIN) { + throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); + } + this.dictionaryIdsDecoder = null; + LOG.debug("init from page at offset {} for length {}", in.position(), in.available()); + this.dataInputStream = in.remainingStream(); + this.isCurrentPageDictionaryEncoded = false; + } + + afterReadPage(); + } + + final ByteBuffer readDataBuffer(int length) { + try { + return dataInputStream.slice(length).order(ByteOrder.LITTLE_ENDIAN); + } catch (IOException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes", e); + } + } + + /** + * After read a page, we may need some initialization. + */ + protected void afterReadPage() { + } + + /** + * Support lazy dictionary ids decode. See more in {@link ParquetDictionary}. + * If return false, we will decode all the data first. + */ + protected boolean supportLazyDecode() { + return true; + } + + /** + * Read batch from {@link #runLenDecoder} and {@link #dataInputStream}. + */ + protected abstract void readBatch(int rowId, int num, V column); + + /** + * Decode dictionary ids to data. + * From {@link #runLenDecoder} and {@link #dictionaryIdsDecoder}. + */ + protected abstract void readBatchFromDictionaryIds( + int rowId, + int num, + V column, + WritableIntVector dictionaryIds); +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java new file mode 100644 index 000000000000..6a8a01b74946 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java @@ -0,0 +1,473 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.hudi.table.format.cow.vector.HeapArrayVector; +import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; + +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch; +import org.apache.flink.table.data.columnar.vector.heap.HeapBooleanVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapByteVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapDoubleVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapFloatVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapIntVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapLongVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapShortVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapTimestampVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Type; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Array {@link ColumnReader}. + */ +public class ArrayColumnReader extends BaseVectorizedColumnReader { + + // The value read in last time + private Object lastValue; + + // flag to indicate if there is no data in parquet data page + private boolean eof = false; + + // flag to indicate if it's the first time to read parquet data page with this instance + boolean isFirstRow = true; + + public ArrayColumnReader( + ColumnDescriptor descriptor, + PageReader pageReader, + boolean isUtcTimestamp, + Type type, + LogicalType logicalType) + throws IOException { + super(descriptor, pageReader, isUtcTimestamp, type, logicalType); + } + + @Override + public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { + HeapArrayVector lcv = (HeapArrayVector) vector; + // before readBatch, initial the size of offsets & lengths as the default value, + // the actual size will be assigned in setChildrenInfo() after reading complete. + lcv.offsets = new long[VectorizedColumnBatch.DEFAULT_SIZE]; + lcv.lengths = new long[VectorizedColumnBatch.DEFAULT_SIZE]; + // Because the length of ListColumnVector.child can't be known now, + // the valueList will save all data for ListColumnVector temporary. + List valueList = new ArrayList<>(); + + LogicalType category = ((ArrayType) logicalType).getElementType(); + + // read the first row in parquet data page, this will be only happened once for this + // instance + if (isFirstRow) { + if (!fetchNextValue(category)) { + return; + } + isFirstRow = false; + } + + int index = collectDataFromParquetPage(readNumber, lcv, valueList, category); + + // Convert valueList to array for the ListColumnVector.child + fillColumnVector(category, lcv, valueList, index); + } + + /** + * Reads a single value from parquet page, puts it into lastValue. Returns a boolean indicating + * if there is more values to read (true). + * + * @param category + * @return boolean + * @throws IOException + */ + private boolean fetchNextValue(LogicalType category) throws IOException { + int left = readPageIfNeed(); + if (left > 0) { + // get the values of repetition and definitionLevel + readRepetitionAndDefinitionLevels(); + // read the data if it isn't null + if (definitionLevel == maxDefLevel) { + if (isCurrentPageDictionaryEncoded) { + lastValue = dataColumn.readValueDictionaryId(); + } else { + lastValue = readPrimitiveTypedRow(category); + } + } else { + lastValue = null; + } + return true; + } else { + eof = true; + return false; + } + } + + private int readPageIfNeed() throws IOException { + // Compute the number of values we want to read in this page. + int leftInPage = (int) (endOfPageValueCount - valuesRead); + if (leftInPage == 0) { + // no data left in current page, load data from new page + readPage(); + leftInPage = (int) (endOfPageValueCount - valuesRead); + } + return leftInPage; + } + + // Need to be in consistent with that VectorizedPrimitiveColumnReader#readBatchHelper + // TODO Reduce the duplicated code + private Object readPrimitiveTypedRow(LogicalType category) { + switch (category.getTypeRoot()) { + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + return dataColumn.readString(); + case BOOLEAN: + return dataColumn.readBoolean(); + case TIME_WITHOUT_TIME_ZONE: + case DATE: + case INTEGER: + return dataColumn.readInteger(); + case TINYINT: + return dataColumn.readTinyInt(); + case SMALLINT: + return dataColumn.readSmallInt(); + case BIGINT: + return dataColumn.readLong(); + case FLOAT: + return dataColumn.readFloat(); + case DOUBLE: + return dataColumn.readDouble(); + case DECIMAL: + switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { + case INT32: + return dataColumn.readInteger(); + case INT64: + return dataColumn.readLong(); + case BINARY: + case FIXED_LEN_BYTE_ARRAY: + return dataColumn.readString(); + default: + throw new AssertionError(); + } + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return dataColumn.readTimestamp(); + default: + throw new RuntimeException("Unsupported type in the list: " + type); + } + } + + private Object dictionaryDecodeValue(LogicalType category, Integer dictionaryValue) { + if (dictionaryValue == null) { + return null; + } + + switch (category.getTypeRoot()) { + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + return dictionary.readString(dictionaryValue); + case DATE: + case TIME_WITHOUT_TIME_ZONE: + case INTEGER: + return dictionary.readInteger(dictionaryValue); + case BOOLEAN: + return dictionary.readBoolean(dictionaryValue) ? 1 : 0; + case DOUBLE: + return dictionary.readDouble(dictionaryValue); + case FLOAT: + return dictionary.readFloat(dictionaryValue); + case TINYINT: + return dictionary.readTinyInt(dictionaryValue); + case SMALLINT: + return dictionary.readSmallInt(dictionaryValue); + case BIGINT: + return dictionary.readLong(dictionaryValue); + case DECIMAL: + switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { + case INT32: + return dictionary.readInteger(dictionaryValue); + case INT64: + return dictionary.readLong(dictionaryValue); + case FIXED_LEN_BYTE_ARRAY: + case BINARY: + return dictionary.readString(dictionaryValue); + default: + throw new AssertionError(); + } + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return dictionary.readTimestamp(dictionaryValue); + default: + throw new RuntimeException("Unsupported type in the list: " + type); + } + } + + /** + * Collects data from a parquet page and returns the final row index where it stopped. The + * returned index can be equal to or less than total. + * + * @param total maximum number of rows to collect + * @param lcv column vector to do initial setup in data collection time + * @param valueList collection of values that will be fed into the vector later + * @param category + * @return int + * @throws IOException + */ + private int collectDataFromParquetPage( + int total, HeapArrayVector lcv, List valueList, LogicalType category) + throws IOException { + int index = 0; + /* + * Here is a nested loop for collecting all values from a parquet page. + * A column of array type can be considered as a list of lists, so the two loops are as below: + * 1. The outer loop iterates on rows (index is a row index, so points to a row in the batch), e.g.: + * [0, 2, 3] <- index: 0 + * [NULL, 3, 4] <- index: 1 + * + * 2. The inner loop iterates on values within a row (sets all data from parquet data page + * for an element in ListColumnVector), so fetchNextValue returns values one-by-one: + * 0, 2, 3, NULL, 3, 4 + * + * As described below, the repetition level (repetitionLevel != 0) + * can be used to decide when we'll start to read values for the next list. + */ + while (!eof && index < total) { + // add element to ListColumnVector one by one + lcv.offsets[index] = valueList.size(); + /* + * Let's collect all values for a single list. + * Repetition level = 0 means that a new list started there in the parquet page, + * in that case, let's exit from the loop, and start to collect value for a new list. + */ + do { + /* + * Definition level = 0 when a NULL value was returned instead of a list + * (this is not the same as a NULL value in of a list). + */ + if (definitionLevel == 0) { + lcv.setNullAt(index); + } + valueList.add( + isCurrentPageDictionaryEncoded + ? dictionaryDecodeValue(category, (Integer) lastValue) + : lastValue); + } while (fetchNextValue(category) && (repetitionLevel != 0)); + + lcv.lengths[index] = valueList.size() - lcv.offsets[index]; + index++; + } + return index; + } + + /** + * The lengths & offsets will be initialized as default size (1024), it should be set to the + * actual size according to the element number. + */ + private void setChildrenInfo(HeapArrayVector lcv, int itemNum, int elementNum) { + lcv.setSize(itemNum); + long[] lcvLength = new long[elementNum]; + long[] lcvOffset = new long[elementNum]; + System.arraycopy(lcv.lengths, 0, lcvLength, 0, elementNum); + System.arraycopy(lcv.offsets, 0, lcvOffset, 0, elementNum); + lcv.lengths = lcvLength; + lcv.offsets = lcvOffset; + } + + private void fillColumnVector( + LogicalType category, HeapArrayVector lcv, List valueList, int elementNum) { + int total = valueList.size(); + setChildrenInfo(lcv, total, elementNum); + switch (category.getTypeRoot()) { + case CHAR: + case VARCHAR: + case BINARY: + case VARBINARY: + lcv.child = new HeapBytesVector(total); + ((HeapBytesVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + byte[] src = ((List) valueList).get(i); + if (src == null) { + ((HeapBytesVector) lcv.child).setNullAt(i); + } else { + ((HeapBytesVector) lcv.child).appendBytes(i, src, 0, src.length); + } + } + break; + case BOOLEAN: + lcv.child = new HeapBooleanVector(total); + ((HeapBooleanVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapBooleanVector) lcv.child).setNullAt(i); + } else { + ((HeapBooleanVector) lcv.child).vector[i] = + ((List) valueList).get(i); + } + } + break; + case TINYINT: + lcv.child = new HeapByteVector(total); + ((HeapByteVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapByteVector) lcv.child).setNullAt(i); + } else { + ((HeapByteVector) lcv.child).vector[i] = + (byte) ((List) valueList).get(i).intValue(); + } + } + break; + case SMALLINT: + lcv.child = new HeapShortVector(total); + ((HeapShortVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapShortVector) lcv.child).setNullAt(i); + } else { + ((HeapShortVector) lcv.child).vector[i] = + (short) ((List) valueList).get(i).intValue(); + } + } + break; + case INTEGER: + case DATE: + case TIME_WITHOUT_TIME_ZONE: + lcv.child = new HeapIntVector(total); + ((HeapIntVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapIntVector) lcv.child).setNullAt(i); + } else { + ((HeapIntVector) lcv.child).vector[i] = ((List) valueList).get(i); + } + } + break; + case FLOAT: + lcv.child = new HeapFloatVector(total); + ((HeapFloatVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapFloatVector) lcv.child).setNullAt(i); + } else { + ((HeapFloatVector) lcv.child).vector[i] = ((List) valueList).get(i); + } + } + break; + case BIGINT: + lcv.child = new HeapLongVector(total); + ((HeapLongVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapLongVector) lcv.child).setNullAt(i); + } else { + ((HeapLongVector) lcv.child).vector[i] = ((List) valueList).get(i); + } + } + break; + case DOUBLE: + lcv.child = new HeapDoubleVector(total); + ((HeapDoubleVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapDoubleVector) lcv.child).setNullAt(i); + } else { + ((HeapDoubleVector) lcv.child).vector[i] = + ((List) valueList).get(i); + } + } + break; + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + lcv.child = new HeapTimestampVector(total); + ((HeapTimestampVector) lcv.child).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapTimestampVector) lcv.child).setNullAt(i); + } else { + ((HeapTimestampVector) lcv.child) + .setTimestamp(i, ((List) valueList).get(i)); + } + } + break; + case DECIMAL: + PrimitiveType.PrimitiveTypeName primitiveTypeName = + descriptor.getPrimitiveType().getPrimitiveTypeName(); + switch (primitiveTypeName) { + case INT32: + lcv.child = new ParquetDecimalVector(new HeapIntVector(total)); + ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector) + .setNullAt(i); + } else { + ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector) + .vector[i] = + ((List) valueList).get(i); + } + } + break; + case INT64: + lcv.child = new ParquetDecimalVector(new HeapLongVector(total)); + ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector).reset(); + for (int i = 0; i < valueList.size(); i++) { + if (valueList.get(i) == null) { + ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector) + .setNullAt(i); + } else { + ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector) + .vector[i] = + ((List) valueList).get(i); + } + } + break; + default: + lcv.child = new ParquetDecimalVector(new HeapBytesVector(total)); + ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector).reset(); + for (int i = 0; i < valueList.size(); i++) { + byte[] src = ((List) valueList).get(i); + if (valueList.get(i) == null) { + ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector) + .setNullAt(i); + } else { + ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector) + .appendBytes(i, src, 0, src.length); + } + } + break; + } + break; + default: + throw new RuntimeException("Unsupported type in the list: " + type); + } + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java new file mode 100644 index 000000000000..fea6dc47af50 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.bytes.BytesInput; +import org.apache.parquet.bytes.BytesUtils; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.column.page.DataPage; +import org.apache.parquet.column.page.DataPageV1; +import org.apache.parquet.column.page.DataPageV2; +import org.apache.parquet.column.page.DictionaryPage; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; +import org.apache.parquet.io.ParquetDecodingException; +import org.apache.parquet.schema.Type; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL; +import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; +import static org.apache.parquet.column.ValuesType.VALUES; + +/** + * Abstract {@link ColumnReader}. part of the code is referred from Apache Hive and Apache Parquet. + */ +public abstract class BaseVectorizedColumnReader implements ColumnReader { + + private static final Logger LOG = LoggerFactory.getLogger(BaseVectorizedColumnReader.class); + + protected boolean isUtcTimestamp; + + /** + * Total number of values read. + */ + protected long valuesRead; + + /** + * value that indicates the end of the current page. That is, if valuesRead == + * endOfPageValueCount, we are at the end of the page. + */ + protected long endOfPageValueCount; + + /** + * The dictionary, if this column has dictionary encoding. + */ + protected final ParquetDataColumnReader dictionary; + + /** + * If true, the current page is dictionary encoded. + */ + protected boolean isCurrentPageDictionaryEncoded; + + /** + * Maximum definition level for this column. + */ + protected final int maxDefLevel; + + protected int definitionLevel; + protected int repetitionLevel; + + /** + * Repetition/Definition/Value readers. + */ + protected IntIterator repetitionLevelColumn; + + protected IntIterator definitionLevelColumn; + protected ParquetDataColumnReader dataColumn; + + /** + * Total values in the current page. + */ + protected int pageValueCount; + + protected final PageReader pageReader; + protected final ColumnDescriptor descriptor; + protected final Type type; + protected final LogicalType logicalType; + + public BaseVectorizedColumnReader( + ColumnDescriptor descriptor, + PageReader pageReader, + boolean isUtcTimestamp, + Type parquetType, + LogicalType logicalType) + throws IOException { + this.descriptor = descriptor; + this.type = parquetType; + this.pageReader = pageReader; + this.maxDefLevel = descriptor.getMaxDefinitionLevel(); + this.isUtcTimestamp = isUtcTimestamp; + this.logicalType = logicalType; + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + try { + this.dictionary = + ParquetDataColumnReaderFactory.getDataColumnReaderByTypeOnDictionary( + parquetType.asPrimitiveType(), + dictionaryPage + .getEncoding() + .initDictionary(descriptor, dictionaryPage), + isUtcTimestamp); + this.isCurrentPageDictionaryEncoded = true; + } catch (IOException e) { + throw new IOException("could not decode the dictionary for " + descriptor, e); + } + } else { + this.dictionary = null; + this.isCurrentPageDictionaryEncoded = false; + } + } + + protected void readRepetitionAndDefinitionLevels() { + repetitionLevel = repetitionLevelColumn.nextInt(); + definitionLevel = definitionLevelColumn.nextInt(); + valuesRead++; + } + + protected void readPage() throws IOException { + DataPage page = pageReader.readPage(); + + if (page == null) { + return; + } + + page.accept( + new DataPage.Visitor() { + @Override + public Void visit(DataPageV1 dataPageV1) { + readPageV1(dataPageV1); + return null; + } + + @Override + public Void visit(DataPageV2 dataPageV2) { + readPageV2(dataPageV2); + return null; + } + }); + } + + private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) + throws IOException { + this.pageValueCount = valueCount; + this.endOfPageValueCount = valuesRead + pageValueCount; + if (dataEncoding.usesDictionary()) { + this.dataColumn = null; + if (dictionary == null) { + throw new IOException( + "could not read page in col " + + descriptor + + " as the dictionary was missing for encoding " + + dataEncoding); + } + dataColumn = + ParquetDataColumnReaderFactory.getDataColumnReaderByType( + type.asPrimitiveType(), + dataEncoding.getDictionaryBasedValuesReader( + descriptor, VALUES, dictionary.getDictionary()), + isUtcTimestamp); + this.isCurrentPageDictionaryEncoded = true; + } else { + dataColumn = + ParquetDataColumnReaderFactory.getDataColumnReaderByType( + type.asPrimitiveType(), + dataEncoding.getValuesReader(descriptor, VALUES), + isUtcTimestamp); + this.isCurrentPageDictionaryEncoded = false; + } + + try { + dataColumn.initFromPage(pageValueCount, in); + } catch (IOException e) { + throw new IOException("could not read page in col " + descriptor, e); + } + } + + private void readPageV1(DataPageV1 page) { + ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); + ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL); + this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); + this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); + try { + BytesInput bytes = page.getBytes(); + LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records"); + ByteBufferInputStream in = bytes.toInputStream(); + LOG.debug("reading repetition levels at " + in.position()); + rlReader.initFromPage(pageValueCount, in); + LOG.debug("reading definition levels at " + in.position()); + dlReader.initFromPage(pageValueCount, in); + LOG.debug("reading data at " + in.position()); + initDataReader(page.getValueEncoding(), in, page.getValueCount()); + } catch (IOException e) { + throw new ParquetDecodingException( + "could not read page " + page + " in col " + descriptor, e); + } + } + + private void readPageV2(DataPageV2 page) { + this.pageValueCount = page.getValueCount(); + this.repetitionLevelColumn = + newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); + this.definitionLevelColumn = + newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); + try { + LOG.debug( + "page data size " + + page.getData().size() + + " bytes and " + + pageValueCount + + " records"); + initDataReader( + page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); + } catch (IOException e) { + throw new ParquetDecodingException( + "could not read page " + page + " in col " + descriptor, e); + } + } + + private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { + try { + if (maxLevel == 0) { + return new NullIntIterator(); + } + return new RLEIntIterator( + new RunLengthBitPackingHybridDecoder( + BytesUtils.getWidthFromMaxInt(maxLevel), + new ByteArrayInputStream(bytes.toByteArray()))); + } catch (IOException e) { + throw new ParquetDecodingException( + "could not read levels in page for col " + descriptor, e); + } + } + + /** + * Utility classes to abstract over different way to read ints with different encodings. + */ + abstract static class IntIterator { + abstract int nextInt(); + } + + /** + * read ints from {@link ValuesReader}. + */ + protected static final class ValuesReaderIntIterator extends IntIterator { + ValuesReader delegate; + + public ValuesReaderIntIterator(ValuesReader delegate) { + this.delegate = delegate; + } + + @Override + int nextInt() { + return delegate.readInteger(); + } + } + + /** + * read ints from {@link RunLengthBitPackingHybridDecoder}. + */ + protected static final class RLEIntIterator extends IntIterator { + RunLengthBitPackingHybridDecoder delegate; + + public RLEIntIterator(RunLengthBitPackingHybridDecoder delegate) { + this.delegate = delegate; + } + + @Override + int nextInt() { + try { + return delegate.readInt(); + } catch (IOException e) { + throw new ParquetDecodingException(e); + } + } + } + + /** + * return zero. + */ + protected static final class NullIntIterator extends IntIterator { + @Override + int nextInt() { + return 0; + } + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java new file mode 100644 index 000000000000..6ea610bf2af2 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; + +import java.io.IOException; + +/** + * Empty {@link ColumnReader}. + *

+ * This reader is to handle parquet files that have not been updated to the latest Schema. + * When reading a parquet file with the latest schema, parquet file might not have the new field. + * The EmptyColumnReader is used to handle such scenarios. + */ +public class EmptyColumnReader implements ColumnReader { + + public EmptyColumnReader() {} + + @Override + public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { + vector.fillWithNulls(); + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java new file mode 100644 index 000000000000..be50e6c6239d --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.table.data.columnar.vector.writable.WritableBytesVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableIntVector; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.PrimitiveType; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Fixed length bytes {@code ColumnReader}, just for decimal. + * + *

Note: Reference Flink release 1.13.2 + * {@code org.apache.flink.formats.parquet.vector.reader.FixedLenBytesColumnReader} + * to always write as legacy decimal format. + */ +public class FixedLenBytesColumnReader + extends AbstractColumnReader { + + public FixedLenBytesColumnReader( + ColumnDescriptor descriptor, PageReader pageReader) throws IOException { + super(descriptor, pageReader); + checkTypeName(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY); + } + + @Override + protected void readBatch(int rowId, int num, V column) { + int bytesLen = descriptor.getPrimitiveType().getTypeLength(); + WritableBytesVector bytesVector = (WritableBytesVector) column; + for (int i = 0; i < num; i++) { + if (runLenDecoder.readInteger() == maxDefLevel) { + byte[] bytes = readDataBinary(bytesLen).getBytes(); + bytesVector.appendBytes(rowId + i, bytes, 0, bytes.length); + } else { + bytesVector.setNullAt(rowId + i); + } + } + } + + @Override + protected void readBatchFromDictionaryIds( + int rowId, int num, V column, WritableIntVector dictionaryIds) { + WritableBytesVector bytesVector = (WritableBytesVector) column; + for (int i = rowId; i < rowId + num; ++i) { + if (!bytesVector.isNullAt(i)) { + byte[] v = dictionary.decodeToBinary(dictionaryIds.getInt(i)).getBytes(); + bytesVector.appendBytes(i, v, 0, v.length); + } + } + } + + private Binary readDataBinary(int len) { + ByteBuffer buffer = readDataBuffer(len); + if (buffer.hasArray()) { + return Binary.fromConstantByteArray( + buffer.array(), buffer.arrayOffset() + buffer.position(), len); + } else { + byte[] bytes = new byte[len]; + buffer.get(bytes); + return Binary.fromConstantByteArray(bytes); + } + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java new file mode 100644 index 000000000000..b44273b57ca2 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.data.columnar.vector.writable.WritableIntVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableTimestampVector; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.schema.PrimitiveType; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.temporal.ChronoUnit; + +/** + * Timestamp {@link org.apache.flink.formats.parquet.vector.reader.ColumnReader} that supports INT64 8 bytes, + * TIMESTAMP_MILLIS is the deprecated ConvertedType counterpart of a TIMESTAMP logical type + * that is UTC normalized and has MILLIS precision. + * + *

See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp + * TIMESTAMP_MILLIS and TIMESTAMP_MICROS are the deprecated ConvertedType. + */ +public class Int64TimestampColumnReader extends AbstractColumnReader { + + private final boolean utcTimestamp; + + private final ChronoUnit chronoUnit; + + public Int64TimestampColumnReader( + boolean utcTimestamp, + ColumnDescriptor descriptor, + PageReader pageReader, + int precision) throws IOException { + super(descriptor, pageReader); + this.utcTimestamp = utcTimestamp; + if (precision <= 3) { + this.chronoUnit = ChronoUnit.MILLIS; + } else if (precision <= 6) { + this.chronoUnit = ChronoUnit.MICROS; + } else { + throw new IllegalArgumentException( + "Avro does not support TIMESTAMP type with precision: " + + precision + + ", it only support precisions <= 6."); + } + checkTypeName(PrimitiveType.PrimitiveTypeName.INT64); + } + + @Override + protected boolean supportLazyDecode() { + return false; + } + + @Override + protected void readBatch(int rowId, int num, WritableTimestampVector column) { + for (int i = 0; i < num; i++) { + if (runLenDecoder.readInteger() == maxDefLevel) { + ByteBuffer buffer = readDataBuffer(8); + column.setTimestamp(rowId + i, int64ToTimestamp(utcTimestamp, buffer.getLong(), chronoUnit)); + } else { + column.setNullAt(rowId + i); + } + } + } + + @Override + protected void readBatchFromDictionaryIds( + int rowId, + int num, + WritableTimestampVector column, + WritableIntVector dictionaryIds) { + for (int i = rowId; i < rowId + num; ++i) { + if (!column.isNullAt(i)) { + column.setTimestamp(i, decodeInt64ToTimestamp( + utcTimestamp, dictionary, dictionaryIds.getInt(i), chronoUnit)); + } + } + } + + public static TimestampData decodeInt64ToTimestamp( + boolean utcTimestamp, + org.apache.parquet.column.Dictionary dictionary, + int id, + ChronoUnit unit) { + long value = dictionary.decodeToLong(id); + return int64ToTimestamp(utcTimestamp, value, unit); + } + + private static TimestampData int64ToTimestamp( + boolean utcTimestamp, + long interval, + ChronoUnit unit) { + final Instant instant = Instant.EPOCH.plus(interval, unit); + if (utcTimestamp) { + return TimestampData.fromInstant(instant); + } else { + // this applies the local timezone + return TimestampData.fromTimestamp(Timestamp.from(instant)); + } + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java new file mode 100644 index 000000000000..a6762d2e175c --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.hudi.table.format.cow.vector.HeapArrayVector; +import org.apache.hudi.table.format.cow.vector.HeapMapColumnVector; + +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.MapType; + +import java.io.IOException; + +/** + * Map {@link ColumnReader}. + */ +public class MapColumnReader implements ColumnReader { + + private final LogicalType logicalType; + private final ArrayColumnReader keyReader; + private final ArrayColumnReader valueReader; + + public MapColumnReader( + ArrayColumnReader keyReader, ArrayColumnReader valueReader, LogicalType logicalType) { + this.keyReader = keyReader; + this.valueReader = valueReader; + this.logicalType = logicalType; + } + + public void readBatch(int total, ColumnVector column) throws IOException { + HeapMapColumnVector mapColumnVector = (HeapMapColumnVector) column; + MapType mapType = (MapType) logicalType; + // initialize 2 ListColumnVector for keys and values + HeapArrayVector keyArrayColumnVector = new HeapArrayVector(total); + HeapArrayVector valueArrayColumnVector = new HeapArrayVector(total); + // read the keys and values + keyReader.readToVector(total, keyArrayColumnVector); + valueReader.readToVector(total, valueArrayColumnVector); + + // set the related attributes according to the keys and values + mapColumnVector.setKeys(keyArrayColumnVector.child); + mapColumnVector.setValues(valueArrayColumnVector.child); + mapColumnVector.setOffsets(keyArrayColumnVector.offsets); + mapColumnVector.setLengths(keyArrayColumnVector.lengths); + mapColumnVector.setSize(keyArrayColumnVector.getSize()); + for (int i = 0; i < keyArrayColumnVector.getLen(); i++) { + if (keyArrayColumnVector.isNullAt(i)) { + mapColumnVector.setNullAt(i); + } + } + } + + @Override + public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { + readBatch(readNumber, vector); + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java new file mode 100644 index 000000000000..65912cef671b --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java @@ -0,0 +1,390 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; + +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.columnar.ColumnarRowData; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; +import org.apache.flink.util.FlinkRuntimeException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.filter.UnboundRecordFilter; +import org.apache.parquet.filter2.compat.FilterCompat; +import org.apache.parquet.filter2.predicate.FilterPredicate; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.Type; +import org.apache.parquet.schema.Types; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.stream.IntStream; + +import static org.apache.hudi.table.format.cow.ParquetSplitReaderUtil.createColumnReader; +import static org.apache.hudi.table.format.cow.ParquetSplitReaderUtil.createWritableColumnVector; +import static org.apache.parquet.filter2.compat.FilterCompat.get; +import static org.apache.parquet.filter2.compat.RowGroupFilter.filterRowGroups; +import static org.apache.parquet.format.converter.ParquetMetadataConverter.range; +import static org.apache.parquet.hadoop.ParquetFileReader.readFooter; + +/** + * This reader is used to read a {@link VectorizedColumnBatch} from input split. + * + *

Note: Reference Flink release 1.11.2 + * {@code org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader} + * because it is package scope. + */ +public class ParquetColumnarRowSplitReader implements Closeable { + + private final boolean utcTimestamp; + + private final MessageType fileSchema; + + private final LogicalType[] requestedTypes; + + private final MessageType requestedSchema; + + /** + * The total number of rows this RecordReader will eventually read. The sum of the rows of all + * the row groups. + */ + private final long totalRowCount; + + private final WritableColumnVector[] writableVectors; + + private final VectorizedColumnBatch columnarBatch; + + private final ColumnarRowData row; + + private final int batchSize; + + private ParquetFileReader reader; + + /** + * For each request column, the reader to read this column. This is NULL if this column is + * missing from the file, in which case we populate the attribute with NULL. + */ + private ColumnReader[] columnReaders; + + /** + * The number of rows that have been returned. + */ + private long rowsReturned; + + /** + * The number of rows that have been reading, including the current in flight row group. + */ + private long totalCountLoadedSoFar; + + // the index of the next row to return + private int nextRow; + + // the number of rows in the current batch + private int rowsInBatch; + + public ParquetColumnarRowSplitReader( + boolean utcTimestamp, + boolean caseSensitive, + Configuration conf, + LogicalType[] selectedTypes, + String[] selectedFieldNames, + ColumnBatchGenerator generator, + int batchSize, + Path path, + long splitStart, + long splitLength, + FilterPredicate filterPredicate, + UnboundRecordFilter recordFilter) throws IOException { + this.utcTimestamp = utcTimestamp; + this.batchSize = batchSize; + // then we need to apply the predicate push down filter + ParquetMetadata footer = readFooter(conf, path, range(splitStart, splitStart + splitLength)); + MessageType fileSchema = footer.getFileMetaData().getSchema(); + FilterCompat.Filter filter = get(filterPredicate, recordFilter); + List blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); + + this.fileSchema = footer.getFileMetaData().getSchema(); + + Type[] types = clipParquetSchema(fileSchema, selectedFieldNames, caseSensitive); + int[] requestedIndices = IntStream.range(0, types.length).filter(i -> types[i] != null).toArray(); + Type[] readTypes = Arrays.stream(requestedIndices).mapToObj(i -> types[i]).toArray(Type[]::new); + + this.requestedTypes = Arrays.stream(requestedIndices).mapToObj(i -> selectedTypes[i]).toArray(LogicalType[]::new); + this.requestedSchema = Types.buildMessage().addFields(readTypes).named("flink-parquet"); + this.reader = new ParquetFileReader( + conf, footer.getFileMetaData(), path, blocks, requestedSchema.getColumns()); + + long totalRowCount = 0; + for (BlockMetaData block : blocks) { + totalRowCount += block.getRowCount(); + } + this.totalRowCount = totalRowCount; + this.nextRow = 0; + this.rowsInBatch = 0; + this.rowsReturned = 0; + + checkSchema(); + + this.writableVectors = createWritableVectors(); + ColumnVector[] columnVectors = patchedVector(selectedFieldNames.length, createReadableVectors(), requestedIndices); + this.columnarBatch = generator.generate(columnVectors); + this.row = new ColumnarRowData(columnarBatch); + } + + /** + * Patches the given vectors with nulls. + * The vector position that is not requested (or read from file) is patched as null. + * + * @param fields The total selected fields number + * @param vectors The readable vectors + * @param indices The requested indices from the selected fields + */ + private static ColumnVector[] patchedVector(int fields, ColumnVector[] vectors, int[] indices) { + ColumnVector[] patched = new ColumnVector[fields]; + for (int i = 0; i < indices.length; i++) { + patched[indices[i]] = vectors[i]; + } + return patched; + } + + /** + * Clips `parquetSchema` according to `fieldNames`. + */ + private static Type[] clipParquetSchema( + GroupType parquetSchema, String[] fieldNames, boolean caseSensitive) { + Type[] types = new Type[fieldNames.length]; + if (caseSensitive) { + for (int i = 0; i < fieldNames.length; ++i) { + String fieldName = fieldNames[i]; + types[i] = parquetSchema.containsField(fieldName) ? parquetSchema.getType(fieldName) : null; + } + } else { + Map caseInsensitiveFieldMap = new HashMap<>(); + for (Type type : parquetSchema.getFields()) { + caseInsensitiveFieldMap.compute(type.getName().toLowerCase(Locale.ROOT), + (key, previousType) -> { + if (previousType != null) { + throw new FlinkRuntimeException( + "Parquet with case insensitive mode should have no duplicate key: " + key); + } + return type; + }); + } + for (int i = 0; i < fieldNames.length; ++i) { + Type type = caseInsensitiveFieldMap.get(fieldNames[i].toLowerCase(Locale.ROOT)); + // TODO clip for array,map,row types. + types[i] = type; + } + } + + return types; + } + + private WritableColumnVector[] createWritableVectors() { + WritableColumnVector[] columns = new WritableColumnVector[requestedTypes.length]; + List types = requestedSchema.getFields(); + List descriptors = requestedSchema.getColumns(); + for (int i = 0; i < requestedTypes.length; i++) { + columns[i] = createWritableColumnVector( + batchSize, + requestedTypes[i], + types.get(i), + descriptors); + } + return columns; + } + + /** + * Create readable vectors from writable vectors. + * Especially for decimal, see {@link org.apache.flink.formats.parquet.vector.ParquetDecimalVector}. + */ + private ColumnVector[] createReadableVectors() { + ColumnVector[] vectors = new ColumnVector[writableVectors.length]; + for (int i = 0; i < writableVectors.length; i++) { + vectors[i] = requestedTypes[i].getTypeRoot() == LogicalTypeRoot.DECIMAL + ? new ParquetDecimalVector(writableVectors[i]) + : writableVectors[i]; + } + return vectors; + } + + private void checkSchema() throws IOException, UnsupportedOperationException { + /* + * Check that the requested schema is supported. + */ + for (int i = 0; i < requestedSchema.getFieldCount(); ++i) { + String[] colPath = requestedSchema.getPaths().get(i); + if (fileSchema.containsPath(colPath)) { + ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); + if (!fd.equals(requestedSchema.getColumns().get(i))) { + throw new UnsupportedOperationException("Schema evolution not supported."); + } + } else { + if (requestedSchema.getColumns().get(i).getMaxDefinitionLevel() == 0) { + // Column is missing in data but the required data is non-nullable. This file is invalid. + throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(colPath)); + } + } + } + } + + /** + * Method used to check if the end of the input is reached. + * + * @return True if the end is reached, otherwise false. + * @throws IOException Thrown, if an I/O error occurred. + */ + public boolean reachedEnd() throws IOException { + return !ensureBatch(); + } + + public RowData nextRecord() { + // return the next row + row.setRowId(this.nextRow++); + return row; + } + + /** + * Checks if there is at least one row left in the batch to return. If no more row are + * available, it reads another batch of rows. + * + * @return Returns true if there is one more row to return, false otherwise. + * @throws IOException throw if an exception happens while reading a batch. + */ + private boolean ensureBatch() throws IOException { + if (nextRow >= rowsInBatch) { + // No more rows available in the Rows array. + nextRow = 0; + // Try to read the next batch if rows from the file. + return nextBatch(); + } + // there is at least one Row left in the Rows array. + return true; + } + + /** + * Advances to the next batch of rows. Returns false if there are no more. + */ + private boolean nextBatch() throws IOException { + for (WritableColumnVector v : writableVectors) { + v.reset(); + } + columnarBatch.setNumRows(0); + if (rowsReturned >= totalRowCount) { + return false; + } + if (rowsReturned == totalCountLoadedSoFar) { + readNextRowGroup(); + } + + int num = (int) Math.min(batchSize, totalCountLoadedSoFar - rowsReturned); + for (int i = 0; i < columnReaders.length; ++i) { + //noinspection unchecked + columnReaders[i].readToVector(num, writableVectors[i]); + } + rowsReturned += num; + columnarBatch.setNumRows(num); + rowsInBatch = num; + return true; + } + + private void readNextRowGroup() throws IOException { + PageReadStore pages = reader.readNextRowGroup(); + if (pages == null) { + throw new IOException("expecting more rows but reached last block. Read " + + rowsReturned + " out of " + totalRowCount); + } + List types = requestedSchema.getFields(); + List columns = requestedSchema.getColumns(); + columnReaders = new ColumnReader[types.size()]; + for (int i = 0; i < types.size(); ++i) { + columnReaders[i] = createColumnReader( + utcTimestamp, + requestedTypes[i], + types.get(i), + columns, + pages); + } + totalCountLoadedSoFar += pages.getRowCount(); + } + + /** + * Seek to a particular row number. + */ + public void seekToRow(long rowCount) throws IOException { + if (totalCountLoadedSoFar != 0) { + throw new UnsupportedOperationException("Only support seek at first."); + } + + List blockMetaData = reader.getRowGroups(); + + for (BlockMetaData metaData : blockMetaData) { + if (metaData.getRowCount() > rowCount) { + break; + } else { + reader.skipNextRowGroup(); + rowsReturned += metaData.getRowCount(); + totalCountLoadedSoFar += metaData.getRowCount(); + rowsInBatch = (int) metaData.getRowCount(); + nextRow = (int) metaData.getRowCount(); + rowCount -= metaData.getRowCount(); + } + } + for (int i = 0; i < rowCount; i++) { + boolean end = reachedEnd(); + if (end) { + throw new RuntimeException("Seek to many rows."); + } + nextRecord(); + } + } + + @Override + public void close() throws IOException { + if (reader != null) { + reader.close(); + reader = null; + } + } + + /** + * Interface to gen {@link VectorizedColumnBatch}. + */ + public interface ColumnBatchGenerator { + VectorizedColumnBatch generate(ColumnVector[] readVectors); + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java new file mode 100644 index 000000000000..e96cf22d29ef --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.table.data.TimestampData; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.Dictionary; + +import java.io.IOException; + +/** + * The interface to wrap the underlying Parquet dictionary and non dictionary encoded page reader. + */ +public interface ParquetDataColumnReader { + + /** + * Initialize the reader by page data. + * + * @param valueCount value count + * @param in page data + * @throws IOException + */ + void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException; + + /** + * @return the next Dictionary ID from the page + */ + int readValueDictionaryId(); + + /** + * @return the next Long from the page + */ + long readLong(); + + /** + * @return the next Integer from the page + */ + int readInteger(); + + /** + * @return the next SmallInt from the page + */ + int readSmallInt(); + + /** + * @return the next TinyInt from the page + */ + int readTinyInt(); + + /** + * @return the next Float from the page + */ + float readFloat(); + + /** + * @return the next Boolean from the page + */ + boolean readBoolean(); + + /** + * @return the next String from the page + */ + byte[] readString(); + + /** + * @return the next Varchar from the page + */ + byte[] readVarchar(); + + /** + * @return the next Char from the page + */ + byte[] readChar(); + + /** + * @return the next Bytes from the page + */ + byte[] readBytes(); + + /** + * @return the next Decimal from the page + */ + byte[] readDecimal(); + + /** + * @return the next Double from the page + */ + double readDouble(); + + /** + * @return the next TimestampData from the page + */ + TimestampData readTimestamp(); + + /** + * @return is data valid + */ + boolean isValid(); + + /** + * @return the underlying dictionary if current reader is dictionary encoded + */ + Dictionary getDictionary(); + + /** + * @param id in dictionary + * @return the Bytes from the dictionary by id + */ + byte[] readBytes(int id); + + /** + * @param id in dictionary + * @return the Float from the dictionary by id + */ + float readFloat(int id); + + /** + * @param id in dictionary + * @return the Double from the dictionary by id + */ + double readDouble(int id); + + /** + * @param id in dictionary + * @return the Integer from the dictionary by id + */ + int readInteger(int id); + + /** + * @param id in dictionary + * @return the Long from the dictionary by id + */ + long readLong(int id); + + /** + * @param id in dictionary + * @return the Small Int from the dictionary by id + */ + int readSmallInt(int id); + + /** + * @param id in dictionary + * @return the tiny int from the dictionary by id + */ + int readTinyInt(int id); + + /** + * @param id in dictionary + * @return the Boolean from the dictionary by id + */ + boolean readBoolean(int id); + + /** + * @param id in dictionary + * @return the Decimal from the dictionary by id + */ + byte[] readDecimal(int id); + + /** + * @param id in dictionary + * @return the TimestampData from the dictionary by id + */ + TimestampData readTimestamp(int id); + + /** + * @param id in dictionary + * @return the String from the dictionary by id + */ + byte[] readString(int id); + + /** + * @param id in dictionary + * @return the Varchar from the dictionary by id + */ + byte[] readVarchar(int id); + + /** + * @param id in dictionary + * @return the Char from the dictionary by id + */ + byte[] readChar(int id); +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java new file mode 100644 index 000000000000..861d5cb00bbe --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.table.data.TimestampData; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.column.Dictionary; +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.PrimitiveType; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Timestamp; + +import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.JULIAN_EPOCH_OFFSET_DAYS; +import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.MILLIS_IN_DAY; +import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.NANOS_PER_MILLISECOND; +import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.NANOS_PER_SECOND; + +/** + * Parquet file has self-describing schema which may differ from the user required schema (e.g. + * schema evolution). This factory is used to retrieve user required typed data via corresponding + * reader which reads the underlying data. + */ +public final class ParquetDataColumnReaderFactory { + + private ParquetDataColumnReaderFactory() { + } + + /** + * default reader for {@link ParquetDataColumnReader}. + */ + public static class DefaultParquetDataColumnReader implements ParquetDataColumnReader { + protected ValuesReader valuesReader; + protected Dictionary dict; + + // After the data is read in the parquet type, isValid will be set to true if the data can + // be returned in the type defined in HMS. Otherwise isValid is set to false. + boolean isValid = true; + + public DefaultParquetDataColumnReader(ValuesReader valuesReader) { + this.valuesReader = valuesReader; + } + + public DefaultParquetDataColumnReader(Dictionary dict) { + this.dict = dict; + } + + @Override + public void initFromPage(int i, ByteBufferInputStream in) throws IOException { + valuesReader.initFromPage(i, in); + } + + @Override + public boolean readBoolean() { + return valuesReader.readBoolean(); + } + + @Override + public boolean readBoolean(int id) { + return dict.decodeToBoolean(id); + } + + @Override + public byte[] readString(int id) { + return dict.decodeToBinary(id).getBytesUnsafe(); + } + + @Override + public byte[] readString() { + return valuesReader.readBytes().getBytesUnsafe(); + } + + @Override + public byte[] readVarchar() { + // we need to enforce the size here even the types are the same + return valuesReader.readBytes().getBytesUnsafe(); + } + + @Override + public byte[] readVarchar(int id) { + return dict.decodeToBinary(id).getBytesUnsafe(); + } + + @Override + public byte[] readChar() { + return valuesReader.readBytes().getBytesUnsafe(); + } + + @Override + public byte[] readChar(int id) { + return dict.decodeToBinary(id).getBytesUnsafe(); + } + + @Override + public byte[] readBytes() { + return valuesReader.readBytes().getBytesUnsafe(); + } + + @Override + public byte[] readBytes(int id) { + return dict.decodeToBinary(id).getBytesUnsafe(); + } + + @Override + public byte[] readDecimal() { + return valuesReader.readBytes().getBytesUnsafe(); + } + + @Override + public byte[] readDecimal(int id) { + return dict.decodeToBinary(id).getBytesUnsafe(); + } + + @Override + public float readFloat() { + return valuesReader.readFloat(); + } + + @Override + public float readFloat(int id) { + return dict.decodeToFloat(id); + } + + @Override + public double readDouble() { + return valuesReader.readDouble(); + } + + @Override + public double readDouble(int id) { + return dict.decodeToDouble(id); + } + + @Override + public TimestampData readTimestamp() { + throw new RuntimeException("Unsupported operation"); + } + + @Override + public TimestampData readTimestamp(int id) { + throw new RuntimeException("Unsupported operation"); + } + + @Override + public int readInteger() { + return valuesReader.readInteger(); + } + + @Override + public int readInteger(int id) { + return dict.decodeToInt(id); + } + + @Override + public boolean isValid() { + return isValid; + } + + @Override + public long readLong(int id) { + return dict.decodeToLong(id); + } + + @Override + public long readLong() { + return valuesReader.readLong(); + } + + @Override + public int readSmallInt() { + return valuesReader.readInteger(); + } + + @Override + public int readSmallInt(int id) { + return dict.decodeToInt(id); + } + + @Override + public int readTinyInt() { + return valuesReader.readInteger(); + } + + @Override + public int readTinyInt(int id) { + return dict.decodeToInt(id); + } + + @Override + public int readValueDictionaryId() { + return valuesReader.readValueDictionaryId(); + } + + public void skip() { + valuesReader.skip(); + } + + @Override + public Dictionary getDictionary() { + return dict; + } + } + + /** + * The reader who reads from the underlying Timestamp value value. + */ + public static class TypesFromInt96PageReader extends DefaultParquetDataColumnReader { + private final boolean isUtcTimestamp; + + public TypesFromInt96PageReader(ValuesReader realReader, boolean isUtcTimestamp) { + super(realReader); + this.isUtcTimestamp = isUtcTimestamp; + } + + public TypesFromInt96PageReader(Dictionary dict, boolean isUtcTimestamp) { + super(dict); + this.isUtcTimestamp = isUtcTimestamp; + } + + private TimestampData convert(Binary binary) { + ByteBuffer buf = binary.toByteBuffer(); + buf.order(ByteOrder.LITTLE_ENDIAN); + long timeOfDayNanos = buf.getLong(); + int julianDay = buf.getInt(); + return int96ToTimestamp(isUtcTimestamp, timeOfDayNanos, julianDay); + } + + @Override + public TimestampData readTimestamp(int id) { + return convert(dict.decodeToBinary(id)); + } + + @Override + public TimestampData readTimestamp() { + return convert(valuesReader.readBytes()); + } + } + + private static ParquetDataColumnReader getDataColumnReaderByTypeHelper( + boolean isDictionary, + PrimitiveType parquetType, + Dictionary dictionary, + ValuesReader valuesReader, + boolean isUtcTimestamp) { + if (parquetType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT96) { + return isDictionary + ? new TypesFromInt96PageReader(dictionary, isUtcTimestamp) + : new TypesFromInt96PageReader(valuesReader, isUtcTimestamp); + } else { + return isDictionary + ? new DefaultParquetDataColumnReader(dictionary) + : new DefaultParquetDataColumnReader(valuesReader); + } + } + + public static ParquetDataColumnReader getDataColumnReaderByTypeOnDictionary( + PrimitiveType parquetType, Dictionary realReader, boolean isUtcTimestamp) { + return getDataColumnReaderByTypeHelper(true, parquetType, realReader, null, isUtcTimestamp); + } + + public static ParquetDataColumnReader getDataColumnReaderByType( + PrimitiveType parquetType, ValuesReader realReader, boolean isUtcTimestamp) { + return getDataColumnReaderByTypeHelper( + false, parquetType, null, realReader, isUtcTimestamp); + } + + private static TimestampData int96ToTimestamp( + boolean utcTimestamp, long nanosOfDay, int julianDay) { + long millisecond = julianDayToMillis(julianDay) + (nanosOfDay / NANOS_PER_MILLISECOND); + + if (utcTimestamp) { + int nanoOfMillisecond = (int) (nanosOfDay % NANOS_PER_MILLISECOND); + return TimestampData.fromEpochMillis(millisecond, nanoOfMillisecond); + } else { + Timestamp timestamp = new Timestamp(millisecond); + timestamp.setNanos((int) (nanosOfDay % NANOS_PER_SECOND)); + return TimestampData.fromTimestamp(timestamp); + } + } + + private static long julianDayToMillis(int julianDay) { + return (julianDay - JULIAN_EPOCH_OFFSET_DAYS) * MILLIS_IN_DAY; + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java new file mode 100644 index 000000000000..79b50487f13c --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.hudi.table.format.cow.vector.HeapRowColumnVector; + +import org.apache.flink.formats.parquet.vector.reader.ColumnReader; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; + +import java.io.IOException; +import java.util.List; + +/** + * Row {@link ColumnReader}. + */ +public class RowColumnReader implements ColumnReader { + + private final List fieldReaders; + + public RowColumnReader(List fieldReaders) { + this.fieldReaders = fieldReaders; + } + + @Override + public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { + HeapRowColumnVector rowColumnVector = (HeapRowColumnVector) vector; + WritableColumnVector[] vectors = rowColumnVector.vectors; + // row vector null array + boolean[] isNulls = new boolean[readNumber]; + for (int i = 0; i < vectors.length; i++) { + fieldReaders.get(i).readToVector(readNumber, vectors[i]); + + for (int j = 0; j < readNumber; j++) { + if (i == 0) { + isNulls[j] = vectors[i].isNullAt(j); + } else { + isNulls[j] = isNulls[j] && vectors[i].isNullAt(j); + } + if (i == vectors.length - 1 && isNulls[j]) { + // rowColumnVector[j] is null only when all fields[j] of rowColumnVector[j] is + // null + rowColumnVector.setNullAt(j); + } + } + } + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java new file mode 100644 index 000000000000..4371ec30ae4c --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector.reader; + +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableIntVector; +import org.apache.parquet.Preconditions; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.bytes.BytesUtils; +import org.apache.parquet.column.values.bitpacking.BytePacker; +import org.apache.parquet.column.values.bitpacking.Packer; +import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; +import org.apache.parquet.io.ParquetDecodingException; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Run length decoder for data and dictionary ids. + * See https://github.com/apache/parquet-format/blob/master/Encodings.md + * See {@link RunLengthBitPackingHybridDecoder}. + * + *

Note: Reference Flink release 1.11.2 + * {@code org.apache.flink.formats.parquet.vector.reader.RunLengthDecoder} + * because it is package scope. + */ +final class RunLengthDecoder { + + /** + * If true, the bit width is fixed. This decoder is used in different places and this also + * controls if we need to read the bitwidth from the beginning of the data stream. + */ + private final boolean fixedWidth; + private final boolean readLength; + + // Encoded data. + private ByteBufferInputStream in; + + // bit/byte width of decoded data and utility to batch unpack them. + private int bitWidth; + private int bytesWidth; + private BytePacker packer; + + // Current decoding mode and values + MODE mode; + int currentCount; + int currentValue; + + // Buffer of decoded values if the values are PACKED. + int[] currentBuffer = new int[16]; + int currentBufferIdx = 0; + + RunLengthDecoder() { + this.fixedWidth = false; + this.readLength = false; + } + + RunLengthDecoder(int bitWidth) { + this.fixedWidth = true; + this.readLength = bitWidth != 0; + initWidthAndPacker(bitWidth); + } + + RunLengthDecoder(int bitWidth, boolean readLength) { + this.fixedWidth = true; + this.readLength = readLength; + initWidthAndPacker(bitWidth); + } + + /** + * Init from input stream. + */ + void initFromStream(int valueCount, ByteBufferInputStream in) throws IOException { + this.in = in; + if (fixedWidth) { + // initialize for repetition and definition levels + if (readLength) { + int length = readIntLittleEndian(); + this.in = in.sliceStream(length); + } + } else { + // initialize for values + if (in.available() > 0) { + initWidthAndPacker(in.read()); + } + } + if (bitWidth == 0) { + // 0 bit width, treat this as an RLE run of valueCount number of 0's. + this.mode = MODE.RLE; + this.currentCount = valueCount; + this.currentValue = 0; + } else { + this.currentCount = 0; + } + } + + /** + * Initializes the internal state for decoding ints of `bitWidth`. + */ + private void initWidthAndPacker(int bitWidth) { + Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); + this.bitWidth = bitWidth; + this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth); + this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); + } + + int readInteger() { + if (this.currentCount == 0) { + this.readNextGroup(); + } + + this.currentCount--; + switch (mode) { + case RLE: + return this.currentValue; + case PACKED: + return this.currentBuffer[currentBufferIdx++]; + default: + throw new AssertionError(); + } + } + + /** + * Decoding for dictionary ids. The IDs are populated into `values` and the nullability is + * populated into `nulls`. + */ + void readDictionaryIds( + int total, + WritableIntVector values, + WritableColumnVector nulls, + int rowId, + int level, + RunLengthDecoder data) { + int left = total; + while (left > 0) { + if (this.currentCount == 0) { + this.readNextGroup(); + } + int n = Math.min(left, this.currentCount); + switch (mode) { + case RLE: + if (currentValue == level) { + data.readDictionaryIdData(n, values, rowId); + } else { + nulls.setNulls(rowId, n); + } + break; + case PACKED: + for (int i = 0; i < n; ++i) { + if (currentBuffer[currentBufferIdx++] == level) { + values.setInt(rowId + i, data.readInteger()); + } else { + nulls.setNullAt(rowId + i); + } + } + break; + default: + throw new AssertionError(); + } + rowId += n; + left -= n; + currentCount -= n; + } + } + + /** + * It is used to decode dictionary IDs. + */ + private void readDictionaryIdData(int total, WritableIntVector c, int rowId) { + int left = total; + while (left > 0) { + if (this.currentCount == 0) { + this.readNextGroup(); + } + int n = Math.min(left, this.currentCount); + switch (mode) { + case RLE: + c.setInts(rowId, n, currentValue); + break; + case PACKED: + c.setInts(rowId, n, currentBuffer, currentBufferIdx); + currentBufferIdx += n; + break; + default: + throw new AssertionError(); + } + rowId += n; + left -= n; + currentCount -= n; + } + } + + /** + * Reads the next varint encoded int. + */ + private int readUnsignedVarInt() throws IOException { + int value = 0; + int shift = 0; + int b; + do { + b = in.read(); + value |= (b & 0x7F) << shift; + shift += 7; + } while ((b & 0x80) != 0); + return value; + } + + /** + * Reads the next 4 byte little endian int. + */ + private int readIntLittleEndian() throws IOException { + int ch4 = in.read(); + int ch3 = in.read(); + int ch2 = in.read(); + int ch1 = in.read(); + return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4); + } + + /** + * Reads the next byteWidth little endian int. + */ + private int readIntLittleEndianPaddedOnBitWidth() throws IOException { + switch (bytesWidth) { + case 0: + return 0; + case 1: + return in.read(); + case 2: { + int ch2 = in.read(); + int ch1 = in.read(); + return (ch1 << 8) + ch2; + } + case 3: { + int ch3 = in.read(); + int ch2 = in.read(); + int ch1 = in.read(); + return (ch1 << 16) + (ch2 << 8) + ch3; + } + case 4: { + return readIntLittleEndian(); + } + default: + throw new RuntimeException("Unreachable"); + } + } + + /** + * Reads the next group. + */ + void readNextGroup() { + try { + int header = readUnsignedVarInt(); + this.mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; + switch (mode) { + case RLE: + this.currentCount = header >>> 1; + this.currentValue = readIntLittleEndianPaddedOnBitWidth(); + return; + case PACKED: + int numGroups = header >>> 1; + this.currentCount = numGroups * 8; + + if (this.currentBuffer.length < this.currentCount) { + this.currentBuffer = new int[this.currentCount]; + } + currentBufferIdx = 0; + int valueIndex = 0; + while (valueIndex < this.currentCount) { + // values are bit packed 8 at a time, so reading bitWidth will always work + ByteBuffer buffer = in.slice(bitWidth); + this.packer.unpack8Values(buffer, buffer.position(), this.currentBuffer, valueIndex); + valueIndex += 8; + } + return; + default: + throw new ParquetDecodingException("not a valid mode " + this.mode); + } + } catch (IOException e) { + throw new ParquetDecodingException("Failed to read from input stream", e); + } + } + + enum MODE { + RLE, + PACKED + } +} + diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java new file mode 100644 index 000000000000..c0d83e6096e3 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; + +/** + * Adapter clazz for {@link Output}. + */ +public interface OutputAdapter extends Output { + @Override + default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { + // no operation + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java new file mode 100644 index 000000000000..c903ec2ed408 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.runtime.state.StateInitializationContext; + +import java.util.OptionalLong; + +/** + * Adapter clazz for {@link StateInitializationContext}. + */ +public interface StateInitializationContextAdapter extends StateInitializationContext { + default OptionalLong getRestoredCheckpointId() { + return OptionalLong.empty(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java new file mode 100644 index 000000000000..4461c28943d3 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.api.common.accumulators.Accumulator; +import org.apache.flink.metrics.groups.OperatorMetricGroup; +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; +import org.apache.flink.runtime.execution.Environment; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; + +import java.util.Map; + +/** + * Adapter clazz for {@link StreamingRuntimeContext}. + */ +public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { + + public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, + Map> accumulators) { + super(operator, env, accumulators); + } + + @Override + public OperatorMetricGroup getMetricGroup() { + return UnregisteredMetricsGroup.createOperatorMetricGroup(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestStreamConfigs.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestStreamConfigs.java new file mode 100644 index 000000000000..a7a620b4ec13 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestStreamConfigs.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.streaming.api.graph.StreamConfig; + +/** + * StreamConfig for test goals. + */ +public class TestStreamConfigs { + + public static void setupNetworkInputs(StreamConfig streamConfig, TypeSerializer... inputSerializers) { + streamConfig.setupNetworkInputs(inputSerializers); + // Since Flink 1.16, need call serializeAllConfigs to serialize all object configs synchronously. + // See https://issues.apache.org/jira/browse/FLINK-26675. + streamConfig.serializeAllConfigs(); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java new file mode 100644 index 000000000000..e65437609a21 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.adapter; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; + +/** + * TableEnv for test goals. + */ +public class TestTableEnvs { + + public static TableEnvironment getBatchTableEnv() { + Configuration conf = new Configuration(); + // for batch upsert use cases: current suggestion is to disable these 2 options, + // from 1.14, flink runtime execution mode has switched from streaming + // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), + // current batch execution mode has these limitations: + // + // 1. the keyed stream default to always sort the inputs by key; + // 2. the batch state-backend requires the inputs sort by state key + // + // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, + // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, + // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode + // to keep the strategy before 1.14. + conf.setBoolean("execution.sorted-inputs.enabled", false); + conf.setBoolean("execution.batch-state-backend.enabled", false); + StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); + EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); + return StreamTableEnvironment.create(execEnv, settings); + } +} diff --git a/hudi-flink-datasource/pom.xml b/hudi-flink-datasource/pom.xml index e3f8c55b2868..e309092a2e97 100644 --- a/hudi-flink-datasource/pom.xml +++ b/hudi-flink-datasource/pom.xml @@ -38,6 +38,7 @@ hudi-flink1.15.x hudi-flink1.16.x hudi-flink1.17.x + hudi-flink1.18.x hudi-flink diff --git a/packaging/bundle-validation/base/build_flink1170hive313spark332.sh b/packaging/bundle-validation/base/build_flink1180hive313spark332.sh similarity index 81% rename from packaging/bundle-validation/base/build_flink1170hive313spark332.sh rename to packaging/bundle-validation/base/build_flink1180hive313spark332.sh index ae4858afcabb..dca096a8d9b8 100755 --- a/packaging/bundle-validation/base/build_flink1170hive313spark332.sh +++ b/packaging/bundle-validation/base/build_flink1180hive313spark332.sh @@ -19,9 +19,9 @@ docker build \ --build-arg HIVE_VERSION=3.1.3 \ - --build-arg FLINK_VERSION=1.17.0 \ + --build-arg FLINK_VERSION=1.18.0 \ --build-arg SPARK_VERSION=3.3.2 \ --build-arg SPARK_HADOOP_VERSION=3 \ --build-arg HADOOP_VERSION=3.3.5 \ - -t hudi-ci-bundle-validation-base:flink1170hive313spark332 . -docker image tag hudi-ci-bundle-validation-base:flink1170hive313spark332 apachehudi/hudi-ci-bundle-validation-base:flink1170hive313spark332 + -t hudi-ci-bundle-validation-base:flink1180hive313spark332 . +docker image tag hudi-ci-bundle-validation-base:flink1180hive313spark332 apachehudi/hudi-ci-bundle-validation-base:flink1180hive313spark332 diff --git a/packaging/bundle-validation/base/build_flink1170hive313spark340.sh b/packaging/bundle-validation/base/build_flink1180hive313spark340.sh similarity index 81% rename from packaging/bundle-validation/base/build_flink1170hive313spark340.sh rename to packaging/bundle-validation/base/build_flink1180hive313spark340.sh index e59ccea7766f..2ceb9a81c58c 100755 --- a/packaging/bundle-validation/base/build_flink1170hive313spark340.sh +++ b/packaging/bundle-validation/base/build_flink1180hive313spark340.sh @@ -19,9 +19,9 @@ docker build \ --build-arg HIVE_VERSION=3.1.3 \ - --build-arg FLINK_VERSION=1.17.0 \ + --build-arg FLINK_VERSION=1.18.0 \ --build-arg SPARK_VERSION=3.4.0 \ --build-arg SPARK_HADOOP_VERSION=3 \ --build-arg HADOOP_VERSION=3.3.5 \ - -t hudi-ci-bundle-validation-base:flink1170hive313spark340 . -docker image tag hudi-ci-bundle-validation-base:flink1170hive313spark340 apachehudi/hudi-ci-bundle-validation-base:flink1170hive313spark340 + -t hudi-ci-bundle-validation-base:flink1180hive313spark340 . +docker image tag hudi-ci-bundle-validation-base:flink1180hive313spark340 apachehudi/hudi-ci-bundle-validation-base:flink1180hive313spark340 diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index bfdf9a1f661b..505ee9c7c2d4 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -162,6 +162,8 @@ else HUDI_FLINK_BUNDLE_NAME=hudi-flink1.16-bundle elif [[ ${FLINK_PROFILE} == 'flink1.17' ]]; then HUDI_FLINK_BUNDLE_NAME=hudi-flink1.17-bundle + elif [[ ${FLINK_PROFILE} == 'flink1.18' ]]; then + HUDI_FLINK_BUNDLE_NAME=hudi-flink1.18-bundle fi echo "Downloading bundle jars from staging repo orgapachehudi-$STAGING_REPO_NUM ..." diff --git a/pom.xml b/pom.xml index fd59bd06959f..337f8f2391ea 100644 --- a/pom.xml +++ b/pom.xml @@ -136,17 +136,19 @@ 2.4.4 3.4.1 + 1.18.0 1.17.1 1.16.2 1.15.1 1.14.5 1.13.6 - ${flink1.17.version} - hudi-flink1.17.x - 1.17 + ${flink1.18.version} + hudi-flink1.18.x + 1.18 1.11.1 - 1.12.2 + 1.13.1 + 3.0.0-1.17 flink-runtime flink-table-runtime flink-table-planner_2.12 @@ -1066,7 +1068,7 @@ org.apache.flink ${flink.connector.kafka.artifactId} - ${flink.version} + ${flink.connector.kafka.version} provided @@ -2525,11 +2527,29 @@ + + flink1.18 + + 1.5.6 + 1.11.1 + 1.13.1 + + + + flink1.18 + + + flink1.17 + ${flink1.17.version} + hudi-flink1.17.x + 1.17 1.5.6 1.11.1 + 1.12.3 + ${flink1.17.version} @@ -2545,6 +2565,8 @@ 1.16 1.5.6 1.11.1 + 1.12.2 + ${flink1.16.version} @@ -2560,6 +2582,8 @@ 1.15 1.5.6 1.11.1 + 1.12.2 + ${flink1.15.version} @@ -2584,6 +2608,8 @@ flink-clients_${scala.binary.version} flink-connector-kafka_${scala.binary.version} flink-hadoop-compatibility_${scala.binary.version} + 1.11.1 + ${flink1.14.version} @@ -2609,6 +2635,7 @@ flink-clients_${scala.binary.version} flink-connector-kafka_${scala.binary.version} flink-hadoop-compatibility_${scala.binary.version} + ${flink1.13.version} true diff --git a/scripts/release/deploy_staging_jars.sh b/scripts/release/deploy_staging_jars.sh index 221c3ddfede7..146e3fbdfdea 100755 --- a/scripts/release/deploy_staging_jars.sh +++ b/scripts/release/deploy_staging_jars.sh @@ -84,6 +84,7 @@ declare -a ALL_VERSION_OPTS=( "-Dscala-2.12 -Dflink1.15 -Davro.version=1.10.0 -pl packaging/hudi-flink-bundle -am" "-Dscala-2.12 -Dflink1.16 -Davro.version=1.11.1 -pl packaging/hudi-flink-bundle -am" "-Dscala-2.12 -Dflink1.17 -Davro.version=1.11.1 -pl packaging/hudi-flink-bundle -am" +"-Dscala-2.12 -Dflink1.18 -Davro.version=1.11.1 -pl packaging/hudi-flink-bundle -am" ) printf -v joined "'%s'\n" "${ALL_VERSION_OPTS[@]}" diff --git a/scripts/release/validate_staged_bundles.sh b/scripts/release/validate_staged_bundles.sh index 19db3b2fb48d..866b8cee335b 100755 --- a/scripts/release/validate_staged_bundles.sh +++ b/scripts/release/validate_staged_bundles.sh @@ -33,7 +33,7 @@ declare -a extensions=("-javadoc.jar" "-javadoc.jar.asc" "-javadoc.jar.md5" "-ja ".pom.md5" ".pom.sha1") declare -a bundles=("hudi-aws-bundle" "hudi-cli-bundle_2.11" "hudi-cli-bundle_2.12" "hudi-datahub-sync-bundle" "hudi-flink1.13-bundle" "hudi-flink1.14-bundle" -"hudi-flink1.15-bundle" "hudi-flink1.16-bundle" "hudi-flink1.17-bundle" "hudi-gcp-bundle" "hudi-hadoop-mr-bundle" "hudi-hive-sync-bundle" "hudi-integ-test-bundle" +"hudi-flink1.15-bundle" "hudi-flink1.16-bundle" "hudi-flink1.17-bundle" "hudi-flink1.18-bundle" "hudi-gcp-bundle" "hudi-hadoop-mr-bundle" "hudi-hive-sync-bundle" "hudi-integ-test-bundle" "hudi-kafka-connect-bundle" "hudi-metaserver-server-bundle" "hudi-presto-bundle" "hudi-spark-bundle_2.11" "hudi-spark-bundle_2.12" "hudi-spark2.4-bundle_2.11" "hudi-spark2.4-bundle_2.12" "hudi-spark3-bundle_2.12" "hudi-spark3.0-bundle_2.12" "hudi-spark3.1-bundle_2.12" "hudi-spark3.2-bundle_2.12" "hudi-spark3.3-bundle_2.12" "hudi-spark3.4-bundle_2.12" "hudi-timeline-server-bundle" "hudi-trino-bundle" From 8fc4135fe5e089a6dc348b8b891be38d43a9d25c Mon Sep 17 00:00:00 2001 From: Danny Chan Date: Mon, 13 Nov 2023 14:49:05 +0800 Subject: [PATCH 012/112] [HUDI-7082] Add Flink 1.14 and Spark 3.13 docker image script (#10066) --- ...hive313spark313.sh => build_flink1146hive313spark313.sh} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename packaging/bundle-validation/base/{build_flink1136hive313spark313.sh => build_flink1146hive313spark313.sh} (80%) diff --git a/packaging/bundle-validation/base/build_flink1136hive313spark313.sh b/packaging/bundle-validation/base/build_flink1146hive313spark313.sh similarity index 80% rename from packaging/bundle-validation/base/build_flink1136hive313spark313.sh rename to packaging/bundle-validation/base/build_flink1146hive313spark313.sh index 721515e86746..ee5308ff8977 100755 --- a/packaging/bundle-validation/base/build_flink1136hive313spark313.sh +++ b/packaging/bundle-validation/base/build_flink1146hive313spark313.sh @@ -19,8 +19,8 @@ docker build \ --build-arg HIVE_VERSION=3.1.3 \ - --build-arg FLINK_VERSION=1.13.6 \ + --build-arg FLINK_VERSION=1.14.6 \ --build-arg SPARK_VERSION=3.1.3 \ --build-arg SPARK_HADOOP_VERSION=2.7 \ - -t hudi-ci-bundle-validation-base:flink1136hive313spark313 . -docker image tag hudi-ci-bundle-validation-base:flink1136hive313spark313 apachehudi/hudi-ci-bundle-validation-base:flink1136hive313spark313 + -t hudi-ci-bundle-validation-base:flink1146hive313spark313 . +docker image tag hudi-ci-bundle-validation-base:flink1146hive313spark313 apachehudi/hudi-ci-bundle-validation-base:flink1146hive313spark313 From c072007778540bd3da31c6fa5f8717546fafb629 Mon Sep 17 00:00:00 2001 From: Sagar Sumit Date: Tue, 14 Nov 2023 23:25:51 +0530 Subject: [PATCH 013/112] [HUDI-7016] Fix bundling of RoaringBitmap in hudi-utilities-bundle (#10083) --- packaging/hudi-utilities-bundle/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index c4d8f798ad6e..0f0e8f68e2ea 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -115,6 +115,7 @@ org.rocksdb:rocksdbjni org.antlr:stringtemplate org.apache.parquet:parquet-avro + org.roaringbitmap:RoaringBitmap com.fasterxml.jackson.datatype:jackson-datatype-jsr310 @@ -225,6 +226,10 @@ org.apache.httpcomponents. org.apache.hudi.aws.org.apache.httpcomponents. + + org.roaringbitmap. + org.apache.hudi.org.roaringbitmap. + From ae80cbd81758c3787c47e8dbcb60d3be3c2f66cf Mon Sep 17 00:00:00 2001 From: Shawn Chang <42792772+CTTY@users.noreply.github.com> Date: Mon, 26 Feb 2024 15:51:10 -0800 Subject: [PATCH 014/112] [HUDI-6806] Support Spark 3.5.0 (#9717) --------- Co-authored-by: Shawn Chang Co-authored-by: Y Ethan Guo --- .github/workflows/bot.yml | 13 + .../org/apache/hudi/HoodieSparkUtils.scala | 2 + .../org/apache/hudi/SparkAdapterSupport.scala | 4 +- .../org/apache/spark/sql/DataFrameUtil.scala | 6 +- .../sql/HoodieCatalystExpressionUtils.scala | 16 +- .../apache/spark/sql/HoodieSchemaUtils.scala | 9 + .../apache/spark/sql/HoodieUnsafeUtils.scala | 13 +- .../HoodieSparkPartitionedFileUtils.scala | 20 +- .../apache/spark/sql/hudi/SparkAdapter.scala | 5 +- .../apache/hudi/avro/TestHoodieAvroUtils.java | 4 +- .../hudi/common/util/TestClusteringUtils.java | 2 + .../dag/nodes/BaseValidateDatasetNode.java | 13 +- .../org/apache/hudi/HoodieBaseRelation.scala | 4 +- .../org/apache/hudi/HoodieFileIndex.scala | 9 +- .../datasources/HoodieInMemoryFileIndex.scala | 5 +- .../hudi/testutils/SparkDatasetTestUtils.java | 19 +- hudi-spark-datasource/hudi-spark/pom.xml | 30 + .../sql/hudi/analysis/HoodieAnalysis.scala | 19 +- .../command/CallProcedureHoodieCommand.scala | 6 +- .../command/CompactionHoodiePathCommand.scala | 5 +- .../CompactionHoodieTableCommand.scala | 5 +- .../CompactionShowHoodiePathCommand.scala | 5 +- .../CompactionShowHoodieTableCommand.scala | 5 +- .../InsertIntoHoodieTableCommand.scala | 10 +- ...tBulkInsertInternalPartitionerForRows.java | 0 .../TestHoodieDatasetBulkInsertHelper.java | 19 +- .../TestHoodieInternalRowParquetWriter.java | 0 .../row/TestHoodieRowCreateHandle.java | 14 +- .../testutils/KeyGeneratorTestUtilities.java | 20 +- .../apache/hudi/TestAvroConversionUtils.scala | 2 +- .../spark/sql/hudi/TestInsertTable.scala | 22 +- hudi-spark-datasource/hudi-spark2/pom.xml | 8 + .../HoodieSpark2CatalystExpressionUtils.scala | 7 +- .../spark/sql/HoodieSpark2SchemaUtils.scala | 6 + .../spark/sql/adapter/Spark2Adapter.scala | 7 +- .../HoodieSpark2PartitionedFileUtils.scala | 12 +- ...oodieBulkInsertInternalWriterTestBase.java | 0 .../hudi/spark3/internal/ReflectUtil.java | 8 +- .../spark/sql/adapter/BaseSpark3Adapter.scala | 6 +- hudi-spark-datasource/hudi-spark3.0.x/pom.xml | 15 + ...HoodieSpark30CatalystExpressionUtils.scala | 7 +- .../spark/sql/HoodieSpark30SchemaUtils.scala | 6 + .../HoodieSpark30PartitionedFileUtils.scala | 12 +- ...oodieBulkInsertInternalWriterTestBase.java | 174 + ...estHoodieBulkInsertDataInternalWriter.java | 0 ...estHoodieDataSourceInternalBatchWrite.java | 0 hudi-spark-datasource/hudi-spark3.1.x/pom.xml | 15 + ...HoodieSpark31CatalystExpressionUtils.scala | 8 +- .../spark/sql/HoodieSpark31SchemaUtils.scala | 6 + .../HoodieSpark31PartitionedFileUtils.scala | 12 +- ...oodieBulkInsertInternalWriterTestBase.java | 174 + ...estHoodieBulkInsertDataInternalWriter.java | 175 + ...estHoodieDataSourceInternalBatchWrite.java | 331 ++ hudi-spark-datasource/hudi-spark3.2.x/pom.xml | 8 +- ...HoodieSpark32CatalystExpressionUtils.scala | 7 +- .../spark/sql/HoodieSpark32SchemaUtils.scala | 6 + .../HoodieSpark32PartitionedFileUtils.scala | 12 +- .../parquet/Spark32DataSourceUtils.scala} | 2 +- ...Spark32LegacyHoodieParquetFileFormat.scala | 10 +- .../hudi/analysis/HoodieSpark32Analysis.scala | 66 + ...oodieBulkInsertInternalWriterTestBase.java | 174 + ...estHoodieBulkInsertDataInternalWriter.java | 175 + ...estHoodieDataSourceInternalBatchWrite.java | 331 ++ .../analysis/HoodieSpark32PlusAnalysis.scala | 28 - ...HoodieSpark33CatalystExpressionUtils.scala | 9 +- .../spark/sql/HoodieSpark33SchemaUtils.scala | 6 + .../HoodieSpark33PartitionedFileUtils.scala | 12 +- .../parquet/Spark33DataSourceUtils.scala | 77 + ...Spark33LegacyHoodieParquetFileFormat.scala | 10 +- .../hudi/analysis/HoodieSpark33Analysis.scala | 66 + ...oodieBulkInsertInternalWriterTestBase.java | 174 + .../hudi/spark3/internal/TestReflectUtil.java | 3 +- ...HoodieSpark34CatalystExpressionUtils.scala | 7 +- .../spark/sql/HoodieSpark34SchemaUtils.scala | 6 + .../HoodieSpark34PartitionedFileUtils.scala | 12 +- .../parquet/Spark34DataSourceUtils.scala | 77 + ...Spark34LegacyHoodieParquetFileFormat.scala | 10 +- .../hudi/analysis/HoodieSpark34Analysis.scala | 66 + ...oodieBulkInsertInternalWriterTestBase.java | 174 + .../hudi/spark3/internal/TestReflectUtil.java | 3 +- hudi-spark-datasource/hudi-spark3.5.x/pom.xml | 342 ++ .../src/main/antlr4/imports/SqlBase.g4 | 1940 ++++++++++ .../hudi/spark/sql/parser/HoodieSqlBase.g4 | 40 + ...pache.spark.sql.sources.DataSourceRegister | 19 + .../hudi/Spark35HoodieFileScanRDD.scala | 36 + .../spark/sql/HoodieSpark35CatalogUtils.scala | 30 + ...HoodieSpark35CatalystExpressionUtils.scala | 117 + .../sql/HoodieSpark35CatalystPlanUtils.scala | 83 + .../spark/sql/HoodieSpark35SchemaUtils.scala | 40 + .../spark/sql/adapter/Spark3_5Adapter.scala | 130 + .../spark/sql/avro/AvroDeserializer.scala | 495 +++ .../spark/sql/avro/AvroSerializer.scala | 450 +++ .../org/apache/spark/sql/avro/AvroUtils.scala | 228 ++ .../avro/HoodieSpark3_5AvroDeserializer.scala | 31 + .../avro/HoodieSpark3_5AvroSerializer.scala | 29 + .../HoodieSpark35PartitionedFileUtils.scala | 52 + .../Spark35NestedSchemaPruning.scala | 198 + .../parquet/Spark35DataSourceUtils.scala | 76 + ...Spark35LegacyHoodieParquetFileFormat.scala | 536 +++ .../Spark35ResolveHudiAlterTableCommand.scala | 71 + .../hudi/analysis/HoodieSpark35Analysis.scala | 66 + .../HoodieSpark3_5ExtendedSqlAstBuilder.scala | 3426 +++++++++++++++++ .../HoodieSpark3_5ExtendedSqlParser.scala | 201 + ...oodieBulkInsertInternalWriterTestBase.java | 174 + ...estHoodieBulkInsertDataInternalWriter.java | 174 + ...estHoodieDataSourceInternalBatchWrite.java | 330 ++ .../hudi/spark3/internal/TestReflectUtil.java | 11 +- .../base/build_flink1180hive313spark350.sh | 27 + packaging/bundle-validation/ci_run.sh | 10 + .../bundle-validation/run_docker_java17.sh | 10 + packaging/hudi-utilities-bundle/pom.xml | 6 + packaging/hudi-utilities-slim-bundle/pom.xml | 6 + pom.xml | 92 +- 113 files changed, 12101 insertions(+), 201 deletions(-) rename {hudi-client/hudi-spark-client => hudi-spark-datasource/hudi-spark-common}/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java (93%) rename {hudi-client/hudi-spark-client => hudi-spark-datasource/hudi-spark}/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitionerForRows.java (100%) rename {hudi-client/hudi-spark-client => hudi-spark-datasource/hudi-spark}/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java (100%) rename {hudi-client/hudi-spark-client => hudi-spark-datasource/hudi-spark}/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java (94%) rename hudi-spark-datasource/{hudi-spark-common => hudi-spark2}/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java (100%) create mode 100644 hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java rename hudi-spark-datasource/{hudi-spark3-common => hudi-spark3.0.x}/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java (100%) rename hudi-spark-datasource/{hudi-spark3-common => hudi-spark3.0.x}/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java (100%) create mode 100644 hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java create mode 100644 hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java create mode 100644 hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java rename hudi-spark-datasource/{hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusDataSourceUtils.scala => hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32DataSourceUtils.scala} (98%) create mode 100644 hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32Analysis.scala create mode 100644 hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java create mode 100644 hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java create mode 100644 hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java create mode 100644 hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33DataSourceUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark33Analysis.scala create mode 100644 hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java create mode 100644 hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34DataSourceUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark34Analysis.scala create mode 100644 hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/pom.xml create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/hudi/Spark35HoodieFileScanRDD.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalogUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystExpressionUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35SchemaUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroDeserializer.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroSerializer.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark35PartitionedFileUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark35NestedSchemaPruning.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35DataSourceUtils.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/Spark35ResolveHudiAlterTableCommand.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark35Analysis.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java create mode 100644 hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java rename hudi-spark-datasource/{hudi-spark3-common => hudi-spark3.5.x}/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java (90%) create mode 100755 packaging/bundle-validation/base/build_flink1180hive313spark350.sh diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index fd3cc67976a1..daa315d95cd5 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -74,6 +74,10 @@ jobs: sparkProfile: "spark3.4" sparkModules: "hudi-spark-datasource/hudi-spark3.4.x" + - scalaProfile: "scala-2.12" + sparkProfile: "spark3.5" + sparkModules: "hudi-spark-datasource/hudi-spark3.5.x" + steps: - uses: actions/checkout@v3 - name: Set up JDK 8 @@ -156,6 +160,9 @@ jobs: - scalaProfile: "scala-2.12" sparkProfile: "spark3.4" sparkModules: "hudi-spark-datasource/hudi-spark3.4.x" + - scalaProfile: "scala-2.12" + sparkProfile: "spark3.5" + sparkModules: "hudi-spark-datasource/hudi-spark3.5.x" steps: - uses: actions/checkout@v3 @@ -245,6 +252,9 @@ jobs: strategy: matrix: include: + - flinkProfile: 'flink1.18' + sparkProfile: 'spark3.5' + sparkRuntime: 'spark3.5.0' - flinkProfile: 'flink1.18' sparkProfile: 'spark3.4' sparkRuntime: 'spark3.4.0' @@ -273,6 +283,9 @@ jobs: strategy: matrix: include: + - flinkProfile: 'flink1.18' + sparkProfile: 'spark3.5' + sparkRuntime: 'spark3.5.0' - flinkProfile: 'flink1.18' sparkProfile: 'spark3.4' sparkRuntime: 'spark3.4.0' diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala index a0fe879b3dbe..527864fcf244 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala @@ -51,6 +51,7 @@ private[hudi] trait SparkVersionsSupport { def isSpark3_2: Boolean = getSparkVersion.startsWith("3.2") def isSpark3_3: Boolean = getSparkVersion.startsWith("3.3") def isSpark3_4: Boolean = getSparkVersion.startsWith("3.4") + def isSpark3_5: Boolean = getSparkVersion.startsWith("3.5") def gteqSpark3_0: Boolean = getSparkVersion >= "3.0" def gteqSpark3_1: Boolean = getSparkVersion >= "3.1" @@ -61,6 +62,7 @@ private[hudi] trait SparkVersionsSupport { def gteqSpark3_3: Boolean = getSparkVersion >= "3.3" def gteqSpark3_3_2: Boolean = getSparkVersion >= "3.3.2" def gteqSpark3_4: Boolean = getSparkVersion >= "3.4" + def gteqSpark3_5: Boolean = getSparkVersion >= "3.5" } object HoodieSparkUtils extends SparkAdapterSupport with SparkVersionsSupport with Logging { diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala index 7e035a95ef5f..09229d74b205 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkAdapterSupport.scala @@ -33,7 +33,9 @@ trait SparkAdapterSupport { object SparkAdapterSupport { lazy val sparkAdapter: SparkAdapter = { - val adapterClass = if (HoodieSparkUtils.isSpark3_4) { + val adapterClass = if (HoodieSparkUtils.isSpark3_5) { + "org.apache.spark.sql.adapter.Spark3_5Adapter" + } else if (HoodieSparkUtils.isSpark3_4) { "org.apache.spark.sql.adapter.Spark3_4Adapter" } else if (HoodieSparkUtils.isSpark3_3) { "org.apache.spark.sql.adapter.Spark3_3Adapter" diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/DataFrameUtil.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/DataFrameUtil.scala index 290b118bd897..11ccc59388eb 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/DataFrameUtil.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/DataFrameUtil.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql +import org.apache.hudi.SparkAdapterSupport import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.LogicalRDD @@ -31,7 +32,8 @@ object DataFrameUtil { */ def createFromInternalRows(sparkSession: SparkSession, schema: StructType, rdd: RDD[InternalRow]): DataFrame = { - val logicalPlan = LogicalRDD(schema.toAttributes, rdd)(sparkSession) + val logicalPlan = LogicalRDD( + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes(schema), rdd)(sparkSession) Dataset.ofRows(sparkSession, logicalPlan) } -} \ No newline at end of file +} diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala index a83afd514f1c..df55a19db441 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala @@ -18,20 +18,22 @@ package org.apache.spark.sql import org.apache.hudi.SparkAdapterSupport -import org.apache.hudi.SparkAdapterSupport.sparkAdapter -import org.apache.hudi.common.util.ValidationUtils.checkState import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction} -import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateMutableProjection, GenerateUnsafeProjection} -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeEq, AttributeReference, Cast, Expression, Like, Literal, MutableProjection, SubqueryExpression, UnsafeProjection} -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateStruct, Expression, GetStructField, Like, Literal, Projection, SubqueryExpression, UnsafeProjection, UnsafeRow} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeEq, AttributeReference, AttributeSet, Cast, Expression, Like, Literal, SubqueryExpression, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{DataType, StructType} trait HoodieCatalystExpressionUtils { + /** + * SPARK-44531 Encoder inference moved elsewhere in Spark 3.5.0 + * Mainly used for unit tests + */ + def getEncoder(schema: StructType): ExpressionEncoder[Row] + /** * Returns a filter that its reference is a subset of `outputSet` and it contains the maximum * constraints from `condition`. This is used for predicate push-down @@ -269,7 +271,7 @@ object HoodieCatalystExpressionUtils extends SparkAdapterSupport { } private def generateUnsafeProjectionInternal(from: StructType, to: StructType): UnsafeProjection = { - val attrs = from.toAttributes + val attrs = sparkAdapter.getSchemaUtils.toAttributes(from) val attrsMap = attrs.map(attr => (attr.name, attr)).toMap val targetExprs = to.fields.map(f => attrsMap(f.name)) diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieSchemaUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieSchemaUtils.scala index 2ee323ec3700..2ee489ada4d5 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieSchemaUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieSchemaUtils.scala @@ -19,6 +19,9 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType + /** * Utils on schema, which have different implementation across Spark versions. */ @@ -34,4 +37,10 @@ trait HoodieSchemaUtils { def checkColumnNameDuplication(columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit + + /** + * SPARK-44353 StructType#toAttributes was removed in Spark 3.5.0 + * Use DataTypeUtils#toAttributes for Spark 3.5+ + */ + def toAttributes(struct: StructType): Seq[Attribute] } diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieUnsafeUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieUnsafeUtils.scala index ee22f714c9c9..138815bc9c84 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieUnsafeUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieUnsafeUtils.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql -import org.apache.hudi.HoodieUnsafeRDD +import org.apache.hudi.{HoodieUnsafeRDD, SparkAdapterSupport} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} @@ -68,14 +68,15 @@ object HoodieUnsafeUtils { * Creates [[DataFrame]] from the in-memory [[Seq]] of [[Row]]s with provided [[schema]] * * NOTE: [[DataFrame]] is based on [[LocalRelation]], entailing that most computations with it - * will be executed by Spark locally + * will be executed by Spark locally * - * @param spark spark's session - * @param rows collection of rows to base [[DataFrame]] on + * @param spark spark's session + * @param rows collection of rows to base [[DataFrame]] on * @param schema target [[DataFrame]]'s schema */ def createDataFrameFromRows(spark: SparkSession, rows: Seq[Row], schema: StructType): DataFrame = - Dataset.ofRows(spark, LocalRelation.fromExternalRows(schema.toAttributes, rows)) + Dataset.ofRows(spark, LocalRelation.fromExternalRows( + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes(schema), rows)) /** * Creates [[DataFrame]] from the in-memory [[Seq]] of [[InternalRow]]s with provided [[schema]] @@ -88,7 +89,7 @@ object HoodieUnsafeUtils { * @param schema target [[DataFrame]]'s schema */ def createDataFrameFromInternalRows(spark: SparkSession, rows: Seq[InternalRow], schema: StructType): DataFrame = - Dataset.ofRows(spark, LocalRelation(schema.toAttributes, rows)) + Dataset.ofRows(spark, LocalRelation(SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes(schema), rows)) /** diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSparkPartitionedFileUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSparkPartitionedFileUtils.scala index 0e3b3f261d82..53d95f09394b 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSparkPartitionedFileUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSparkPartitionedFileUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] to adapt to type changes. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] to adapt to type changes. * Before Spark 3.4.0, * ``` * case class PartitionedFile( @@ -65,13 +65,23 @@ trait HoodieSparkPartitionedFileUtils extends Serializable { * Creates a new [[PartitionedFile]] instance. * * @param partitionValues value of partition columns to be prepended to each row. - * @param filePath URI of the file to read. - * @param start the beginning offset (in bytes) of the block. - * @param length number of bytes to read. + * @param filePath URI of the file to read. + * @param start the beginning offset (in bytes) of the block. + * @param length number of bytes to read. * @return a new [[PartitionedFile]] instance. */ def createPartitionedFile(partitionValues: InternalRow, filePath: Path, start: Long, length: Long): PartitionedFile + + /** + * SPARK-43039 FileIndex#PartitionDirectory refactored in Spark 3.5.0 + */ + def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] + + /** + * SPARK-43039 FileIndex#PartitionDirectory refactored in Spark 3.5.0 + */ + def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory } diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala index 1c6111afe47f..5691dd5c3805 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala @@ -19,14 +19,15 @@ package org.apache.spark.sql.hudi import org.apache.avro.Schema -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hudi.client.utils.SparkRowSerDe import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.spark.sql._ import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer} import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.catalog.CatalogTable -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, InterpretedPredicate} +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InterpretedPredicate} import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index 517590a81e03..eb20081475ff 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -301,7 +301,7 @@ public void testRemoveFields() { // partitioned table test. String schemaStr = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ " + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," - + "{\"name\": \"non_pii_col\", \"type\": \"string\"}]},"; + + "{\"name\": \"non_pii_col\", \"type\": \"string\"}]}"; Schema expectedSchema = new Schema.Parser().parse(schemaStr); GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA)); rec.put("_row_key", "key1"); @@ -324,7 +324,7 @@ public void testRemoveFields() { schemaStr = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ " + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," - + "{\"name\": \"pii_col\", \"type\": \"string\"}]},"; + + "{\"name\": \"pii_col\", \"type\": \"string\"}]}"; expectedSchema = new Schema.Parser().parse(schemaStr); rec1 = HoodieAvroUtils.removeFields(rec, Collections.singleton("")); assertEquals(expectedSchema, rec1.getSchema()); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java index 4e76d25f41fc..28def8fddcfc 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java @@ -37,6 +37,7 @@ import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.IOException; @@ -107,6 +108,7 @@ public void testClusteringPlanMultipleInstants() throws Exception { // replacecommit.inflight doesn't have clustering plan. // Verify that getClusteringPlan fetches content from corresponding requested file. + @Disabled("Will fail due to avro issue AVRO-3789. This is fixed in avro 1.11.3") @Test public void testClusteringPlanInflight() throws Exception { String partitionPath1 = "partition1"; diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java index 8f86421c7724..892730c675b7 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java @@ -20,6 +20,7 @@ package org.apache.hudi.integ.testsuite.dag.nodes; import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.SparkAdapterSupport$; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieTimeline; @@ -40,10 +41,7 @@ import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$; import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; -import org.apache.spark.sql.catalyst.encoders.RowEncoder; -import org.apache.spark.sql.catalyst.expressions.Attribute; import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; @@ -51,11 +49,8 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; -import java.util.stream.Collectors; import scala.Tuple2; -import scala.collection.JavaConversions; -import scala.collection.JavaConverters; import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY; @@ -244,10 +239,6 @@ private Dataset getInputDf(ExecutionContext context, SparkSession session, } private ExpressionEncoder getEncoder(StructType schema) { - List attributes = JavaConversions.asJavaCollection(schema.toAttributes()).stream() - .map(Attribute::toAttribute).collect(Collectors.toList()); - return RowEncoder.apply(schema) - .resolveAndBind(JavaConverters.asScalaBufferConverter(attributes).asScala().toSeq(), - SimpleAnalyzer$.MODULE$); + return SparkAdapterSupport$.MODULE$.sparkAdapter().getCatalystExpressionUtils().getEncoder(schema); } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala index 0098ee54c2bc..f97e18079250 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala @@ -67,7 +67,6 @@ import org.apache.spark.sql.{Row, SQLContext, SparkSession} import java.net.URI import scala.collection.JavaConverters._ -import scala.util.control.NonFatal import scala.util.{Failure, Success, Try} trait HoodieFileSplit {} @@ -424,7 +423,8 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext, inMemoryFileIndex.listFiles(partitionFilters, dataFilters) } - val fsView = new HoodieTableFileSystemView(metaClient, timeline, partitionDirs.flatMap(_.files).toArray) + val fsView = new HoodieTableFileSystemView( + metaClient, timeline, sparkAdapter.getSparkPartitionedFileUtils.toFileStatuses(partitionDirs).toArray) fsView.getPartitionPaths.asScala.flatMap { partitionPath => val relativePath = getRelativePartitionPath(basePath, partitionPath) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala index f60263b3344e..5416961872b2 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala @@ -164,9 +164,11 @@ case class HoodieFileIndex(spark: SparkSession, || (f.getBaseFile.isPresent && f.getBaseFile.get().getBootstrapBaseFile.isPresent)). foldLeft(Map[String, FileSlice]()) { (m, f) => m + (f.getFileId -> f) } if (c.nonEmpty) { - PartitionDirectory(new PartitionFileSliceMapping(InternalRow.fromSeq(partitionOpt.get.values), c), baseFileStatusesAndLogFileOnly) + sparkAdapter.getSparkPartitionedFileUtils.newPartitionDirectory( + new PartitionFileSliceMapping(InternalRow.fromSeq(partitionOpt.get.values), c), baseFileStatusesAndLogFileOnly) } else { - PartitionDirectory(InternalRow.fromSeq(partitionOpt.get.values), baseFileStatusesAndLogFileOnly) + sparkAdapter.getSparkPartitionedFileUtils.newPartitionDirectory( + InternalRow.fromSeq(partitionOpt.get.values), baseFileStatusesAndLogFileOnly) } } else { @@ -181,7 +183,8 @@ case class HoodieFileIndex(spark: SparkSession, baseFileStatusOpt.foreach(f => files.append(f)) files }) - PartitionDirectory(InternalRow.fromSeq(partitionOpt.get.values), allCandidateFiles) + sparkAdapter.getSparkPartitionedFileUtils.newPartitionDirectory( + InternalRow.fromSeq(partitionOpt.get.values), allCandidateFiles) } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/execution/datasources/HoodieInMemoryFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/execution/datasources/HoodieInMemoryFileIndex.scala index ad1e87f8ce04..e69364d67660 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/execution/datasources/HoodieInMemoryFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/execution/datasources/HoodieInMemoryFileIndex.scala @@ -49,7 +49,8 @@ class HoodieInMemoryFileIndex(sparkSession: SparkSession, */ override def listFiles(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionDirectory] = { val selectedPartitions = if (partitionSpec().partitionColumns.isEmpty) { - PartitionDirectory(InternalRow.empty, allFiles().filter(f => isDataPath(f.getPath))) :: Nil + sparkAdapter.getSparkPartitionedFileUtils.newPartitionDirectory( + InternalRow.empty, allFiles().filter(f => isDataPath(f.getPath))) :: Nil } else { prunePartitions(partitionFilters, partitionSpec()).map { case PartitionPath(values, path) => @@ -62,7 +63,7 @@ class HoodieInMemoryFileIndex(sparkSession: SparkSession, // Directory does not exist, or has no children files Nil } - PartitionDirectory(values, files) + sparkAdapter.getSparkPartitionedFileUtils.newPartitionDirectory(values, files) } } logTrace("Selected files after partition pruning:\n\t" + selectedPartitions.mkString("\n\t")) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java similarity index 93% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java rename to hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java index 09e6bd699bce..a80aa1d09e6c 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/testutils/SparkDatasetTestUtils.java @@ -18,12 +18,13 @@ package org.apache.hudi.testutils; +import org.apache.hudi.SparkAdapterSupport$; +import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieIndexConfig; -import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.index.HoodieIndex; @@ -32,10 +33,7 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$; import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; -import org.apache.spark.sql.catalyst.encoders.RowEncoder; -import org.apache.spark.sql.catalyst.expressions.Attribute; import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; import org.apache.spark.sql.catalyst.expressions.GenericRow; import org.apache.spark.sql.types.DataTypes; @@ -48,15 +46,14 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; -import java.util.stream.Collectors; - -import scala.collection.JavaConversions; -import scala.collection.JavaConverters; import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; /** * Dataset test utils. + * Note: This util class can be only used within `hudi-spark` modules because it + * relies on SparkAdapterSupport to get encoder for different versions of Spark. If used elsewhere this + * class won't be initialized properly amd could cause ClassNotFoundException or NoClassDefFoundError */ public class SparkDatasetTestUtils { @@ -95,11 +92,7 @@ public class SparkDatasetTestUtils { * @return the encoder thus generated. */ private static ExpressionEncoder getEncoder(StructType schema) { - List attributes = JavaConversions.asJavaCollection(schema.toAttributes()).stream() - .map(Attribute::toAttribute).collect(Collectors.toList()); - return RowEncoder.apply(schema) - .resolveAndBind(JavaConverters.asScalaBufferConverter(attributes).asScala().toSeq(), - SimpleAnalyzer$.MODULE$); + return SparkAdapterSupport$.MODULE$.sparkAdapter().getCatalystExpressionUtils().getEncoder(schema); } /** diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml index 87311926be12..5072f445db68 100644 --- a/hudi-spark-datasource/hudi-spark/pom.xml +++ b/hudi-spark-datasource/hudi-spark/pom.xml @@ -245,6 +245,12 @@ org.apache.parquet parquet-avro + + org.apache.parquet + parquet-hadoop-bundle + ${parquet.version} + provided + @@ -335,6 +341,10 @@ org.pentaho * + + org.apache.parquet + * + @@ -350,6 +360,10 @@ javax.servlet.jsp * + + org.apache.parquet + * + @@ -365,6 +379,10 @@ javax.servlet.jsp * + + org.apache.parquet + * + @@ -376,6 +394,10 @@ org.eclipse.jetty.orbit javax.servlet + + org.apache.parquet + * + @@ -420,6 +442,14 @@ test-jar test + + org.apache.hudi + hudi-spark-common_${scala.binary.version} + ${project.version} + tests + test-jar + test + org.apache.hudi hudi-common diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala index 24820c1c0320..70790af41386 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala @@ -77,7 +77,16 @@ object HoodieAnalysis extends SparkAdapterSupport { } } else { rules += adaptIngestionTargetLogicalRelations - val dataSourceV2ToV1FallbackClass = "org.apache.spark.sql.hudi.analysis.HoodieDataSourceV2ToV1Fallback" + val dataSourceV2ToV1FallbackClass = if (HoodieSparkUtils.isSpark3_5) + "org.apache.spark.sql.hudi.analysis.HoodieSpark35DataSourceV2ToV1Fallback" + else if (HoodieSparkUtils.isSpark3_4) + "org.apache.spark.sql.hudi.analysis.HoodieSpark34DataSourceV2ToV1Fallback" + else if (HoodieSparkUtils.isSpark3_3) + "org.apache.spark.sql.hudi.analysis.HoodieSpark33DataSourceV2ToV1Fallback" + else { + // Spark 3.2.x + "org.apache.spark.sql.hudi.analysis.HoodieSpark32DataSourceV2ToV1Fallback" + } val dataSourceV2ToV1Fallback: RuleBuilder = session => instantiateKlass(dataSourceV2ToV1FallbackClass, session) @@ -95,7 +104,9 @@ object HoodieAnalysis extends SparkAdapterSupport { if (HoodieSparkUtils.isSpark3) { val resolveAlterTableCommandsClass = - if (HoodieSparkUtils.gteqSpark3_4) { + if (HoodieSparkUtils.gteqSpark3_5) { + "org.apache.spark.sql.hudi.Spark35ResolveHudiAlterTableCommand" + } else if (HoodieSparkUtils.gteqSpark3_4) { "org.apache.spark.sql.hudi.Spark34ResolveHudiAlterTableCommand" } else if (HoodieSparkUtils.gteqSpark3_3) { "org.apache.spark.sql.hudi.Spark33ResolveHudiAlterTableCommand" @@ -149,7 +160,9 @@ object HoodieAnalysis extends SparkAdapterSupport { if (HoodieSparkUtils.gteqSpark3_0) { val nestedSchemaPruningClass = - if (HoodieSparkUtils.gteqSpark3_4) { + if (HoodieSparkUtils.gteqSpark3_5) { + "org.apache.spark.sql.execution.datasources.Spark35NestedSchemaPruning" + } else if (HoodieSparkUtils.gteqSpark3_4) { "org.apache.spark.sql.execution.datasources.Spark34NestedSchemaPruning" } else if (HoodieSparkUtils.gteqSpark3_3) { "org.apache.spark.sql.execution.datasources.Spark33NestedSchemaPruning" diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala index f63f4115e919..f18509696193 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala @@ -17,17 +17,17 @@ package org.apache.spark.sql.hudi.command +import org.apache.hudi.SparkAdapterSupport import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.hudi.command.procedures.{Procedure, ProcedureArgs} import org.apache.spark.sql.{Row, SparkSession} -import scala.collection.Seq - case class CallProcedureHoodieCommand( procedure: Procedure, args: ProcedureArgs) extends HoodieLeafRunnableCommand { - override def output: Seq[Attribute] = procedure.outputType.toAttributes + override def output: Seq[Attribute] = + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes(procedure.outputType) override def run(sparkSession: SparkSession): Seq[Row] = { procedure.call(args) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala index 57aff092b742..5bb62524a2bc 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hudi.command +import org.apache.hudi.SparkAdapterSupport import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.spark.sql.catalyst.expressions.Attribute @@ -48,5 +49,7 @@ case class CompactionHoodiePathCommand(path: String, RunCompactionProcedure.builder.get().build.call(procedureArgs) } - override val output: Seq[Attribute] = RunCompactionProcedure.builder.get().build.outputType.toAttributes + override val output: Seq[Attribute] = + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes( + RunCompactionProcedure.builder.get().build.outputType) } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala index adaaeae9e55c..426d6f27720b 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hudi.command +import org.apache.hudi.SparkAdapterSupport import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation.CompactionOperation @@ -35,5 +36,7 @@ case class CompactionHoodieTableCommand(table: CatalogTable, CompactionHoodiePathCommand(basePath, operation, instantTimestamp).run(sparkSession) } - override val output: Seq[Attribute] = RunCompactionProcedure.builder.get().build.outputType.toAttributes + override val output: Seq[Attribute] = + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes( + RunCompactionProcedure.builder.get().build.outputType) } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala index 95a4ecf7800e..a61bea7aa848 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hudi.command +import org.apache.hudi.SparkAdapterSupport import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.spark.sql.catalyst.expressions.Attribute @@ -40,5 +41,7 @@ case class CompactionShowHoodiePathCommand(path: String, limit: Int) ShowCompactionProcedure.builder.get().build.call(procedureArgs) } - override val output: Seq[Attribute] = ShowCompactionProcedure.builder.get().build.outputType.toAttributes + override val output: Seq[Attribute] = + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes( + ShowCompactionProcedure.builder.get().build.outputType) } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala index afd15d5153db..070e93912aba 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hudi.command +import org.apache.hudi.SparkAdapterSupport import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.getTableLocation @@ -32,5 +33,7 @@ case class CompactionShowHoodieTableCommand(table: CatalogTable, limit: Int) CompactionShowHoodiePathCommand(basePath, limit).run(sparkSession) } - override val output: Seq[Attribute] = ShowCompactionProcedure.builder.get().build.outputType.toAttributes + override val output: Seq[Attribute] = + SparkAdapterSupport.sparkAdapter.getSchemaUtils.toAttributes( + ShowCompactionProcedure.builder.get().build.outputType) } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala index 3f3d4e10ea9e..5a7aec53b63c 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala @@ -156,11 +156,15 @@ object InsertIntoHoodieTableCommand extends Logging with ProvidesHoodieConfig wi conf: SQLConf): LogicalPlan = { val planUtils = sparkAdapter.getCatalystPlanUtils try { - planUtils.resolveOutputColumns(catalogTable.catalogTableName, expectedSchema.toAttributes, query, byName = true, conf) + planUtils.resolveOutputColumns( + catalogTable.catalogTableName, sparkAdapter.getSchemaUtils.toAttributes(expectedSchema), query, byName = true, conf) } catch { // NOTE: In case matching by name didn't match the query output, we will attempt positional matching - case ae: AnalysisException if ae.getMessage().startsWith("Cannot write incompatible data to table") => - planUtils.resolveOutputColumns(catalogTable.catalogTableName, expectedSchema.toAttributes, query, byName = false, conf) + // SPARK-42309 Error message changed in Spark 3.5.0 so we need to match two strings here + case ae: AnalysisException if (ae.getMessage().startsWith("[INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA] Cannot write incompatible data for the table") + || ae.getMessage().startsWith("Cannot write incompatible data to table")) => + planUtils.resolveOutputColumns( + catalogTable.catalogTableName, sparkAdapter.getSchemaUtils.toAttributes(expectedSchema), query, byName = false, conf) } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitionerForRows.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitionerForRows.java similarity index 100% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitionerForRows.java rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/execution/bulkinsert/TestBulkInsertInternalPartitionerForRows.java diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java index 1c21c9a52530..50ec641c182f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java @@ -17,10 +17,10 @@ package org.apache.hudi.functional; -import org.apache.avro.Schema; import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.HoodieDatasetBulkInsertHelper; +import org.apache.hudi.SparkAdapterSupport$; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.FileIOUtils; @@ -33,34 +33,31 @@ import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.testutils.DataSourceTestUtils; import org.apache.hudi.testutils.HoodieSparkClientTestBase; + +import org.apache.avro.Schema; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.ReduceFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$; import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; -import org.apache.spark.sql.catalyst.encoders.RowEncoder; -import org.apache.spark.sql.catalyst.expressions.Attribute; import org.apache.spark.sql.types.StructType; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; -import scala.Tuple2; -import scala.collection.JavaConversions; -import scala.collection.JavaConverters; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; +import scala.Tuple2; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -349,10 +346,6 @@ public void testNoPropsSet() { } private ExpressionEncoder getEncoder(StructType schema) { - List attributes = JavaConversions.asJavaCollection(schema.toAttributes()).stream() - .map(Attribute::toAttribute).collect(Collectors.toList()); - return RowEncoder.apply(schema) - .resolveAndBind(JavaConverters.asScalaBufferConverter(attributes).asScala().toSeq(), - SimpleAnalyzer$.MODULE$); + return SparkAdapterSupport$.MODULE$.sparkAdapter().getCatalystExpressionUtils().getEncoder(schema); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java similarity index 100% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java similarity index 94% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java index a88f4dcf9e89..86aa6cff7a3d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java @@ -45,6 +45,8 @@ import java.util.Random; import java.util.UUID; +import static org.apache.hudi.common.testutils.HoodieTestUtils.getJavaVersion; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -166,7 +168,17 @@ public void testGlobalFailure() throws Exception { fileNames.add(handle.getFileName()); // verify write status assertNotNull(writeStatus.getGlobalError()); - assertTrue(writeStatus.getGlobalError().getMessage().contains("java.lang.String cannot be cast to org.apache.spark.unsafe.types.UTF8String")); + + String expectedError = getJavaVersion() == 11 || getJavaVersion() == 17 + ? "class java.lang.String cannot be cast to class org.apache.spark.unsafe.types.UTF8String" + : "java.lang.String cannot be cast to org.apache.spark.unsafe.types.UTF8String"; + + try { + assertTrue(writeStatus.getGlobalError().getMessage().contains(expectedError)); + } catch (Throwable e) { + fail("Expected error to contain: " + expectedError + ", the actual error message: " + writeStatus.getGlobalError().getMessage()); + } + assertEquals(writeStatus.getFileId(), fileId); assertEquals(writeStatus.getPartitionPath(), partitionPath); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java index e1f8f9f6105e..d704e833ba08 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java @@ -18,27 +18,23 @@ package org.apache.hudi.testutils; +import org.apache.hudi.AvroConversionUtils; +import org.apache.hudi.SparkAdapterSupport$; + import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; -import org.apache.hudi.AvroConversionUtils; import org.apache.spark.package$; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer$; import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; -import org.apache.spark.sql.catalyst.encoders.RowEncoder; -import org.apache.spark.sql.catalyst.expressions.Attribute; import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema; import org.apache.spark.sql.types.StructType; -import scala.Function1; -import scala.collection.JavaConversions; -import scala.collection.JavaConverters; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.util.List; -import java.util.stream.Collectors; + +import scala.Function1; public class KeyGeneratorTestUtilities { @@ -101,11 +97,7 @@ public static InternalRow getInternalRow(Row row) { } private static ExpressionEncoder getEncoder(StructType schema) { - List attributes = JavaConversions.asJavaCollection(schema.toAttributes()).stream() - .map(Attribute::toAttribute).collect(Collectors.toList()); - return RowEncoder.apply(schema) - .resolveAndBind(JavaConverters.asScalaBufferConverter(attributes).asScala().toSeq(), - SimpleAnalyzer$.MODULE$); + return SparkAdapterSupport$.MODULE$.sparkAdapter().getCatalystExpressionUtils().getEncoder(schema); } public static InternalRow getInternalRow(Row row, ExpressionEncoder encoder) throws ClassNotFoundException, InvocationTargetException, IllegalAccessException, NoSuchMethodException { diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala index 592f9e2bfc46..5cd6ac3954ee 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionUtils.scala @@ -387,7 +387,7 @@ class TestAvroConversionUtils extends FunSuite with Matchers { } } ] - }} + } """ val expectedAvroSchema = new Schema.Parser().parse(expectedSchemaStr) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala index 9d14064f3987..16215fe485c7 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala @@ -635,6 +635,10 @@ class TestInsertTable extends HoodieSparkSqlTestBase { test("Test insert for uppercase table name") { withRecordType()(withTempDir{ tmp => val tableName = s"H_$generateTableName" + if (HoodieSparkUtils.gteqSpark3_5) { + // [SPARK-44284] Spark 3.5+ requires conf below to be case sensitive + spark.sql(s"set spark.sql.caseSensitive=true") + } spark.sql( s""" @@ -655,7 +659,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase { .setBasePath(tmp.getCanonicalPath) .setConf(spark.sessionState.newHadoopConf()) .build() - assertResult(metaClient.getTableConfig.getTableName)(tableName) + assertResult(tableName)(metaClient.getTableConfig.getTableName) }) } @@ -673,7 +677,13 @@ class TestInsertTable extends HoodieSparkSqlTestBase { | tblproperties (primaryKey = 'id') | partitioned by (dt) """.stripMargin) - val tooManyDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_4) { + val tooManyDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_5) { + s""" + |[INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS] Cannot write to `spark_catalog`.`default`.`$tableName`, the reason is too many data columns: + |Table columns: `id`, `name`, `price`. + |Data columns: `1`, `a1`, `10`, `2021-06-20`. + |""".stripMargin + } else if (HoodieSparkUtils.gteqSpark3_4) { """ |too many data columns: |Table columns: 'id', 'name', 'price'. @@ -689,7 +699,13 @@ class TestInsertTable extends HoodieSparkSqlTestBase { checkExceptionContain(s"insert into $tableName partition(dt = '2021-06-20') select 1, 'a1', 10, '2021-06-20'")( tooManyDataColumnsErrorMsg) - val notEnoughDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_4) { + val notEnoughDataColumnsErrorMsg = if (HoodieSparkUtils.gteqSpark3_5) { + s""" + |[INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS] Cannot write to `spark_catalog`.`default`.`$tableName`, the reason is not enough data columns: + |Table columns: `id`, `name`, `price`, `dt`. + |Data columns: `1`, `a1`, `10`. + |""".stripMargin + } else if (HoodieSparkUtils.gteqSpark3_4) { """ |not enough data columns: |Table columns: 'id', 'name', 'price', 'dt'. diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml index 636713ef269f..57c849026c67 100644 --- a/hudi-spark-datasource/hudi-spark2/pom.xml +++ b/hudi-spark-datasource/hudi-spark2/pom.xml @@ -197,6 +197,14 @@ true + + org.apache.spark + spark-core_${scala.binary.version} + ${spark2.version} + provided + true + + org.apache.hudi diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala index ea5841ecdf43..337773db162a 100644 --- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala +++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2CatalystExpressionUtils.scala @@ -18,11 +18,16 @@ package org.apache.spark.sql import HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{Add, And, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Like, Log, Log10, Log1p, Log2, Lower, Multiply, Or, ParseToDate, ParseToTimestamp, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.types.{DataType, StructType} object HoodieSpark2CatalystExpressionUtils extends HoodieCatalystExpressionUtils { + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder.apply(schema).resolveAndBind() + } + // NOTE: This method has been borrowed from Spark 3.1 override def extractPredicatesWithinOutputSet(condition: Expression, outputSet: AttributeSet): Option[Expression] = condition match { diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2SchemaUtils.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2SchemaUtils.scala index e2c1dc4a2444..beee0d293dfd 100644 --- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2SchemaUtils.scala +++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/HoodieSpark2SchemaUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SchemaUtils /** @@ -30,4 +32,8 @@ object HoodieSpark2SchemaUtils extends HoodieSchemaUtils { caseSensitiveAnalysis: Boolean): Unit = { SchemaUtils.checkColumnNameDuplication(columnNames, colType, caseSensitiveAnalysis) } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + struct.toAttributes + } } diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala index ec275a1d3fdc..00e4d0c1ca91 100644 --- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala +++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.adapter import org.apache.avro.Schema +import org.apache.hadoop.fs.FileStatus import org.apache.hadoop.fs.Path import org.apache.hudi.client.utils.SparkRowSerDe import org.apache.hudi.common.table.HoodieTableMetaClient @@ -26,8 +27,8 @@ import org.apache.hudi.{AvroConversionUtils, DefaultSource, Spark2HoodieFileScan import org.apache.spark.sql._ import org.apache.spark.sql.avro._ import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, InterpretedPredicate} +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InterpretedPredicate} import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.catalyst.plans.logical.{Command, DeleteFromTable, Join, LogicalPlan} @@ -91,7 +92,7 @@ class Spark2Adapter extends SparkAdapter { override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters override def createSparkRowSerDe(schema: StructType): SparkRowSerDe = { - val encoder = RowEncoder(schema).resolveAndBind() + val encoder = getCatalystExpressionUtils.getEncoder(schema) new Spark2RowSerDe(encoder) } diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark2PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark2PartitionedFileUtils.scala index 66c4722f6619..99b0a58bb25a 100644 --- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark2PartitionedFileUtils.scala +++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark2PartitionedFileUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] for Spark 2.4. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 2.4. */ object HoodieSpark2PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { @@ -40,4 +40,12 @@ object HoodieSpark2PartitionedFileUtils extends HoodieSparkPartitionedFileUtils length: Long): PartitionedFile = { PartitionedFile(partitionValues, filePath.toUri.toString, start, length) } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses) + } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark2/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java similarity index 100% rename from hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java rename to hudi-spark-datasource/hudi-spark2/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/ReflectUtil.java b/hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/ReflectUtil.java index d7a9a1f12241..ad83720b0213 100644 --- a/hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/ReflectUtil.java +++ b/hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/ReflectUtil.java @@ -33,9 +33,13 @@ public class ReflectUtil { public static InsertIntoStatement createInsertInto(LogicalPlan table, Map> partition, Seq userSpecifiedCols, - LogicalPlan query, boolean overwrite, boolean ifPartitionNotExists) { + LogicalPlan query, boolean overwrite, boolean ifPartitionNotExists, boolean byName) { try { - if (HoodieSparkUtils.isSpark3_0()) { + if (HoodieSparkUtils.gteqSpark3_5()) { + Constructor constructor = InsertIntoStatement.class.getConstructor( + LogicalPlan.class, Map.class, Seq.class, LogicalPlan.class, boolean.class, boolean.class, boolean.class); + return constructor.newInstance(table, partition, userSpecifiedCols, query, overwrite, ifPartitionNotExists, byName); + } else if (HoodieSparkUtils.isSpark3_0()) { Constructor constructor = InsertIntoStatement.class.getConstructor( LogicalPlan.class, Map.class, LogicalPlan.class, boolean.class, boolean.class); return constructor.newInstance(table, partition, query, overwrite, ifPartitionNotExists); diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala index b2a9a529511e..01e435b4f8d2 100644 --- a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala +++ b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/BaseSpark3Adapter.scala @@ -26,15 +26,14 @@ import org.apache.hudi.spark3.internal.ReflectUtil import org.apache.hudi.{AvroConversionUtils, DefaultSource, HoodieSparkUtils, Spark3RowSerDe} import org.apache.spark.internal.Logging import org.apache.spark.sql.avro.{HoodieAvroSchemaConverters, HoodieSparkAvroSchemaConverters} -import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Expression, InterpretedPredicate, Predicate} import org.apache.spark.sql.catalyst.util.DateFormatter import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.hudi.SparkAdapter import org.apache.spark.sql.sources.{BaseRelation, Filter} -import org.apache.spark.sql.{HoodieSpark3CatalogUtils, SQLContext, SparkSession} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.vectorized.{ColumnVector, ColumnarBatch} +import org.apache.spark.sql.{HoodieSpark3CatalogUtils, SQLContext, SparkSession} import org.apache.spark.storage.StorageLevel import java.time.ZoneId @@ -57,8 +56,7 @@ abstract class BaseSpark3Adapter extends SparkAdapter with Logging { def getCatalogUtils: HoodieSpark3CatalogUtils override def createSparkRowSerDe(schema: StructType): SparkRowSerDe = { - val encoder = RowEncoder(schema).resolveAndBind() - new Spark3RowSerDe(encoder) + new Spark3RowSerDe(getCatalystExpressionUtils.getEncoder(schema)) } override def getAvroSchemaConverters: HoodieAvroSchemaConverters = HoodieSparkAvroSchemaConverters diff --git a/hudi-spark-datasource/hudi-spark3.0.x/pom.xml b/hudi-spark-datasource/hudi-spark3.0.x/pom.xml index 2035653a141a..8418ac2f0e53 100644 --- a/hudi-spark-datasource/hudi-spark3.0.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.0.x/pom.xml @@ -157,6 +157,14 @@ true + + org.apache.spark + spark-core_${scala.binary.version} + ${spark30.version} + provided + true + + com.fasterxml.jackson.core jackson-databind @@ -263,6 +271,13 @@ + + + + org.apache.parquet + parquet-avro + test + diff --git a/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30CatalystExpressionUtils.scala index ef3e8fdb6d16..c4708be813b4 100644 --- a/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30CatalystExpressionUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30CatalystExpressionUtils.scala @@ -19,11 +19,16 @@ package org.apache.spark.sql import org.apache.spark.sql.HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{AnsiCast, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, PredicateHelper, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.types.{DataType, StructType} object HoodieSpark30CatalystExpressionUtils extends HoodieSpark3CatalystExpressionUtils { + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder.apply(schema).resolveAndBind() + } + override def matchCast(expr: Expression): Option[(Expression, DataType, Option[String])] = expr match { case Cast(child, dataType, timeZoneId) => Some((child, dataType, timeZoneId)) diff --git a/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30SchemaUtils.scala b/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30SchemaUtils.scala index 10775e11a4bb..f66fd837c7e8 100644 --- a/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30SchemaUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark30SchemaUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SchemaUtils /** @@ -30,4 +32,8 @@ object HoodieSpark30SchemaUtils extends HoodieSchemaUtils { caseSensitiveAnalysis: Boolean): Unit = { SchemaUtils.checkColumnNameDuplication(columnNames, colType, caseSensitiveAnalysis) } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + struct.toAttributes + } } diff --git a/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark30PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark30PartitionedFileUtils.scala index 0abc17db05b4..5282e110c1fc 100644 --- a/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark30PartitionedFileUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.0.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark30PartitionedFileUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] for Spark 3.0. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 3.0. */ object HoodieSpark30PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { @@ -40,4 +40,12 @@ object HoodieSpark30PartitionedFileUtils extends HoodieSparkPartitionedFileUtils length: Long): PartitionedFile = { PartitionedFile(partitionValues, filePath.toUri.toString, start, length) } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses) + } } diff --git a/hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java new file mode 100644 index 000000000000..d4b0b0e764ed --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.testutils.HoodieSparkClientTestHarness; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for TestHoodieBulkInsertDataInternalWriter. + */ +public class HoodieBulkInsertInternalWriterTestBase extends HoodieSparkClientTestHarness { + + protected static final Random RANDOM = new Random(); + + @BeforeEach + public void setUp() throws Exception { + initSparkContexts(); + initPath(); + initFileSystem(); + initTestDataGenerator(); + initMetaClient(); + initTimelineService(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { + return getWriteConfig(populateMetaFields, DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().defaultValue()); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields, String hiveStylePartitioningValue) { + Properties properties = new Properties(); + if (!populateMetaFields) { + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false"); + } + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().key(), hiveStylePartitioningValue); + return SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withProperties(properties).build(); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, + Option> fileAbsPaths, Option> fileNames) { + assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames, false); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, boolean areRecordsSorted, + Option> fileAbsPaths, Option> fileNames, boolean isHiveStylePartitioning) { + if (areRecordsSorted) { + assertEquals(batches, writeStatuses.size()); + } else { + assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size()); + } + + Map sizeMap = new HashMap<>(); + if (!areRecordsSorted) { + // no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected + // per write status + for (int i = 0; i < batches; i++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3]; + if (!sizeMap.containsKey(partitionPath)) { + sizeMap.put(partitionPath, 0L); + } + sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size); + } + } + + int counter = 0; + for (WriteStatus writeStatus : writeStatuses) { + // verify write status + String actualPartitionPathFormat = isHiveStylePartitioning ? SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME + "=%s" : "%s"; + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStatus.getPartitionPath()); + if (areRecordsSorted) { + assertEquals(writeStatus.getTotalRecords(), size); + } else { + assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3])); + } + assertNull(writeStatus.getGlobalError()); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertFalse(writeStatus.hasErrors()); + assertNotNull(writeStatus.getFileId()); + String fileId = writeStatus.getFileId(); + if (fileAbsPaths.isPresent()) { + fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath()); + } + if (fileNames.isPresent()) { + fileNames.get().add(writeStatus.getStat().getPath() + .substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1)); + } + HoodieWriteStat writeStat = writeStatus.getStat(); + if (areRecordsSorted) { + assertEquals(size, writeStat.getNumInserts()); + assertEquals(size, writeStat.getNumWrites()); + } else { + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts()); + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites()); + } + assertEquals(fileId, writeStat.getFileId()); + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3]), writeStat.getPartitionPath()); + assertEquals(0, writeStat.getNumDeletes()); + assertEquals(0, writeStat.getNumUpdateWrites()); + assertEquals(0, writeStat.getTotalWriteErrors()); + } + } + + protected void assertOutput(Dataset expectedRows, Dataset actualRows, String instantTime, Option> fileNames, + boolean populateMetaColumns) { + if (populateMetaColumns) { + // verify 3 meta fields that are filled in within create handle + actualRows.collectAsList().forEach(entry -> { + assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime); + assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())); + if (fileNames.isPresent()) { + assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()))); + } + assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal())); + }); + + // after trimming 2 of the meta fields, rest of the fields should match + Dataset trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + Dataset trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + assertEquals(0, trimmedActual.except(trimmedExpected).count()); + } else { // operation = BULK_INSERT_APPEND_ONLY + // all meta columns are untouched + assertEquals(0, expectedRows.except(actualRows).count()); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java b/hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java similarity index 100% rename from hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java rename to hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java diff --git a/hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java b/hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java similarity index 100% rename from hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java rename to hudi-spark-datasource/hudi-spark3.0.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java diff --git a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml index 42c7ff0dcaf1..0c0609d45106 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml @@ -157,6 +157,14 @@ true + + org.apache.spark + spark-core_${scala.binary.version} + ${spark31.version} + provided + true + + com.fasterxml.jackson.core jackson-databind @@ -263,6 +271,13 @@ + + + + org.apache.parquet + parquet-avro + test + diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31CatalystExpressionUtils.scala index 33e338d3afe8..3d32b206fd14 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31CatalystExpressionUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31CatalystExpressionUtils.scala @@ -19,12 +19,16 @@ package org.apache.spark.sql import org.apache.spark.sql.HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{Add, AnsiCast, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, PredicateHelper, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} import org.apache.spark.sql.execution.datasources.DataSourceStrategy -import org.apache.spark.sql.types.DataType - +import org.apache.spark.sql.types.{DataType, StructType} object HoodieSpark31CatalystExpressionUtils extends HoodieSpark3CatalystExpressionUtils with PredicateHelper { + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder.apply(schema).resolveAndBind() + } + override def normalizeExprs(exprs: Seq[Expression], attributes: Seq[Attribute]): Seq[Expression] = DataSourceStrategy.normalizeExprs(exprs, attributes) diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31SchemaUtils.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31SchemaUtils.scala index c4753067f51e..49388f557913 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31SchemaUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/HoodieSpark31SchemaUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SchemaUtils /** @@ -30,4 +32,8 @@ object HoodieSpark31SchemaUtils extends HoodieSchemaUtils { caseSensitiveAnalysis: Boolean): Unit = { SchemaUtils.checkColumnNameDuplication(columnNames, colType, caseSensitiveAnalysis) } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + struct.toAttributes + } } diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark31PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark31PartitionedFileUtils.scala index 5a359234631d..3be432691f8f 100644 --- a/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark31PartitionedFileUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark31PartitionedFileUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] for Spark 3.1. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 3.1. */ object HoodieSpark31PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { @@ -40,4 +40,12 @@ object HoodieSpark31PartitionedFileUtils extends HoodieSparkPartitionedFileUtils length: Long): PartitionedFile = { PartitionedFile(partitionValues, filePath.toUri.toString, start, length) } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses) + } } diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java new file mode 100644 index 000000000000..d4b0b0e764ed --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.testutils.HoodieSparkClientTestHarness; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for TestHoodieBulkInsertDataInternalWriter. + */ +public class HoodieBulkInsertInternalWriterTestBase extends HoodieSparkClientTestHarness { + + protected static final Random RANDOM = new Random(); + + @BeforeEach + public void setUp() throws Exception { + initSparkContexts(); + initPath(); + initFileSystem(); + initTestDataGenerator(); + initMetaClient(); + initTimelineService(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { + return getWriteConfig(populateMetaFields, DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().defaultValue()); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields, String hiveStylePartitioningValue) { + Properties properties = new Properties(); + if (!populateMetaFields) { + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false"); + } + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().key(), hiveStylePartitioningValue); + return SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withProperties(properties).build(); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, + Option> fileAbsPaths, Option> fileNames) { + assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames, false); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, boolean areRecordsSorted, + Option> fileAbsPaths, Option> fileNames, boolean isHiveStylePartitioning) { + if (areRecordsSorted) { + assertEquals(batches, writeStatuses.size()); + } else { + assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size()); + } + + Map sizeMap = new HashMap<>(); + if (!areRecordsSorted) { + // no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected + // per write status + for (int i = 0; i < batches; i++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3]; + if (!sizeMap.containsKey(partitionPath)) { + sizeMap.put(partitionPath, 0L); + } + sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size); + } + } + + int counter = 0; + for (WriteStatus writeStatus : writeStatuses) { + // verify write status + String actualPartitionPathFormat = isHiveStylePartitioning ? SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME + "=%s" : "%s"; + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStatus.getPartitionPath()); + if (areRecordsSorted) { + assertEquals(writeStatus.getTotalRecords(), size); + } else { + assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3])); + } + assertNull(writeStatus.getGlobalError()); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertFalse(writeStatus.hasErrors()); + assertNotNull(writeStatus.getFileId()); + String fileId = writeStatus.getFileId(); + if (fileAbsPaths.isPresent()) { + fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath()); + } + if (fileNames.isPresent()) { + fileNames.get().add(writeStatus.getStat().getPath() + .substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1)); + } + HoodieWriteStat writeStat = writeStatus.getStat(); + if (areRecordsSorted) { + assertEquals(size, writeStat.getNumInserts()); + assertEquals(size, writeStat.getNumWrites()); + } else { + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts()); + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites()); + } + assertEquals(fileId, writeStat.getFileId()); + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3]), writeStat.getPartitionPath()); + assertEquals(0, writeStat.getNumDeletes()); + assertEquals(0, writeStat.getNumUpdateWrites()); + assertEquals(0, writeStat.getTotalWriteErrors()); + } + } + + protected void assertOutput(Dataset expectedRows, Dataset actualRows, String instantTime, Option> fileNames, + boolean populateMetaColumns) { + if (populateMetaColumns) { + // verify 3 meta fields that are filled in within create handle + actualRows.collectAsList().forEach(entry -> { + assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime); + assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())); + if (fileNames.isPresent()) { + assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()))); + } + assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal())); + }); + + // after trimming 2 of the meta fields, rest of the fields should match + Dataset trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + Dataset trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + assertEquals(0, trimmedActual.except(trimmedExpected).count()); + } else { // operation = BULK_INSERT_APPEND_ONLY + // all meta columns are untouched + assertEquals(0, expectedRows.except(actualRows).count()); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java b/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java new file mode 100644 index 000000000000..206d4931b15e --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.spark3.internal; + +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.internal.HoodieBulkInsertInternalWriterTestBase; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getInternalRowWithError; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Unit tests {@link HoodieBulkInsertDataInternalWriter}. + */ +public class TestHoodieBulkInsertDataInternalWriter extends + HoodieBulkInsertInternalWriterTestBase { + + private static Stream configParams() { + Object[][] data = new Object[][] { + {true, true}, + {true, false}, + {false, true}, + {false, false} + }; + return Stream.of(data).map(Arguments::of); + } + + private static Stream bulkInsertTypeParams() { + Object[][] data = new Object[][] { + {true}, + {false} + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("configParams") + public void testDataInternalWriter(boolean sorted, boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + // execute N rounds + for (int i = 0; i < 2; i++) { + String instantTime = "00" + i; + // init writer + HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), + RANDOM.nextLong(), STRUCT_TYPE, populateMetaFields, sorted); + + int size = 10 + RANDOM.nextInt(1000); + // write N rows to partition1, N rows to partition2 and N rows to partition3 ... Each batch should create a new RowCreateHandle and a new file + int batches = 3; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + Option> fileAbsPaths = Option.of(new ArrayList<>()); + Option> fileNames = Option.of(new ArrayList<>()); + + // verify write statuses + assertWriteStatuses(commitMetadata.getWriteStatuses(), batches, size, sorted, fileAbsPaths, fileNames, false); + + // verify rows + Dataset result = sqlContext.read().parquet(fileAbsPaths.get().toArray(new String[0])); + assertOutput(totalInputRows, result, instantTime, fileNames, populateMetaFields); + } + } + + + /** + * Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch2 of invalid records which is expected + * to throw Global Error. Verify global error is set appropriately and only first batch of records are written to disk. + */ + @Test + public void testGlobalFailure() throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(true); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0]; + + String instantTime = "001"; + HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), + RANDOM.nextLong(), STRUCT_TYPE, true, false); + + int size = 10 + RANDOM.nextInt(100); + int totalFailures = 5; + // Generate first batch of valid rows + Dataset inputRows = getRandomRows(sqlContext, size / 2, partitionPath, false); + List internalRows = toInternalRows(inputRows, ENCODER); + + // generate some failures rows + for (int i = 0; i < totalFailures; i++) { + internalRows.add(getInternalRowWithError(partitionPath)); + } + + // generate 2nd batch of valid rows + Dataset inputRows2 = getRandomRows(sqlContext, size / 2, partitionPath, false); + internalRows.addAll(toInternalRows(inputRows2, ENCODER)); + + // issue writes + try { + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + fail("Should have failed"); + } catch (Throwable e) { + // expected + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + + Option> fileAbsPaths = Option.of(new ArrayList<>()); + Option> fileNames = Option.of(new ArrayList<>()); + // verify write statuses + assertWriteStatuses(commitMetadata.getWriteStatuses(), 1, size / 2, fileAbsPaths, fileNames); + + // verify rows + Dataset result = sqlContext.read().parquet(fileAbsPaths.get().toArray(new String[0])); + assertOutput(inputRows, result, instantTime, fileNames, true); + } + + private void writeRows(Dataset inputRows, HoodieBulkInsertDataInternalWriter writer) + throws Exception { + List internalRows = toInternalRows(inputRows, ENCODER); + // issue writes + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java b/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java new file mode 100644 index 000000000000..31d606de4a1e --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.1.x/src/test/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.spark3.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.common.model.HoodieCommitMetadata; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.internal.HoodieBulkInsertInternalWriterTestBase; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; +import org.apache.hudi.testutils.HoodieClientTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.connector.write.DataWriter; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unit tests {@link HoodieDataSourceInternalBatchWrite}. + */ +public class TestHoodieDataSourceInternalBatchWrite extends + HoodieBulkInsertInternalWriterTestBase { + + private static Stream bulkInsertTypeParams() { + Object[][] data = new Object[][] { + {true}, + {false} + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testDataSourceWriter(boolean populateMetaFields) throws Exception { + testDataSourceWriterInternal(Collections.emptyMap(), Collections.emptyMap(), populateMetaFields); + } + + private void testDataSourceWriterInternal(Map extraMetadata, Map expectedExtraMetadata, boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String instantTime = "001"; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, extraMetadata, populateMetaFields, false); + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(0, RANDOM.nextLong()); + + String[] partitionPaths = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS; + List partitionPathsAbs = new ArrayList<>(); + for (String partitionPath : partitionPaths) { + partitionPathsAbs.add(basePath + "/" + partitionPath + "/*"); + } + + int size = 10 + RANDOM.nextInt(1000); + int batches = 5; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + List commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + + metaClient.reloadActiveTimeline(); + Dataset result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + + // verify extra metadata + Option commitMetadataOption = HoodieClientTestUtils.getCommitMetadataForLatestInstant(metaClient); + assertTrue(commitMetadataOption.isPresent()); + Map actualExtraMetadata = new HashMap<>(); + commitMetadataOption.get().getExtraMetadata().entrySet().stream().filter(entry -> + !entry.getKey().equals(HoodieCommitMetadata.SCHEMA_KEY)).forEach(entry -> actualExtraMetadata.put(entry.getKey(), entry.getValue())); + assertEquals(actualExtraMetadata, expectedExtraMetadata); + } + + @Test + public void testDataSourceWriterExtraCommitMetadata() throws Exception { + String commitExtraMetaPrefix = "commit_extra_meta_"; + Map extraMeta = new HashMap<>(); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); + extraMeta.put(commitExtraMetaPrefix + "a", "valA"); + extraMeta.put(commitExtraMetaPrefix + "b", "valB"); + extraMeta.put("commit_extra_c", "valC"); // should not be part of commit extra metadata + + Map expectedMetadata = new HashMap<>(); + expectedMetadata.putAll(extraMeta); + expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key()); + expectedMetadata.remove("commit_extra_c"); + + testDataSourceWriterInternal(extraMeta, expectedMetadata, true); + } + + @Test + public void testDataSourceWriterEmptyExtraCommitMetadata() throws Exception { + String commitExtraMetaPrefix = "commit_extra_meta_"; + Map extraMeta = new HashMap<>(); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); + extraMeta.put("keyA", "valA"); + extraMeta.put("keyB", "valB"); + extraMeta.put("commit_extra_c", "valC"); + // none of the keys has commit metadata key prefix. + testDataSourceWriterInternal(extraMeta, Collections.emptyMap(), true); + } + + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testMultipleDataSourceWrites(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + int partitionCounter = 0; + + // execute N rounds + for (int i = 0; i < 2; i++) { + String instantTime = "00" + i; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + List commitMessages = new ArrayList<>(); + Dataset totalInputRows = null; + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(partitionCounter++, RANDOM.nextLong()); + + int size = 10 + RANDOM.nextInt(1000); + int batches = 3; // one batch per partition + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + + Dataset result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime, populateMetaFields); + + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + } + } + + // Large writes are not required to be executed w/ regular CI jobs. Takes lot of running time. + @Disabled + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testLargeWrites(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + int partitionCounter = 0; + + // execute N rounds + for (int i = 0; i < 3; i++) { + String instantTime = "00" + i; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + List commitMessages = new ArrayList<>(); + Dataset totalInputRows = null; + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(partitionCounter++, RANDOM.nextLong()); + + int size = 10000 + RANDOM.nextInt(10000); + int batches = 3; // one batch per partition + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + + Dataset result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime, + populateMetaFields); + + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + } + } + + /** + * Tests that DataSourceWriter.abort() will abort the written records of interest write and commit batch1 write and abort batch2 Read of entire dataset should show only records from batch1. + * commit batch1 + * abort batch2 + * verify only records from batch1 is available to read + */ + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testAbort(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String instantTime0 = "00" + 0; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime0, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(0, RANDOM.nextLong()); + + List partitionPaths = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS); + List partitionPathsAbs = new ArrayList<>(); + for (String partitionPath : partitionPaths) { + partitionPathsAbs.add(basePath + "/" + partitionPath + "/*"); + } + + int size = 10 + RANDOM.nextInt(100); + int batches = 1; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + List commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + // commit 1st batch + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + Dataset result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify rows + assertOutput(totalInputRows, result, instantTime0, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + + // 2nd batch. abort in the end + String instantTime1 = "00" + 1; + dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime1, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(1, RANDOM.nextLong()); + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + } + + commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + // commit 1st batch + dataSourceInternalBatchWrite.abort(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify rows + // only rows from first batch should be present + assertOutput(totalInputRows, result, instantTime0, Option.empty(), populateMetaFields); + } + + private void writeRows(Dataset inputRows, DataWriter writer) throws Exception { + List internalRows = toInternalRows(inputRows, ENCODER); + // issue writes + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml index 70dbc0d47757..0078178422ec 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml +++ b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml @@ -196,12 +196,6 @@ ${spark32.version} provided true - - - * - * - - @@ -315,6 +309,8 @@ test-jar test + + org.apache.parquet parquet-avro diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32CatalystExpressionUtils.scala index 9cd85ca8a53e..1eaa99ac77f6 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32CatalystExpressionUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32CatalystExpressionUtils.scala @@ -18,12 +18,17 @@ package org.apache.spark.sql import org.apache.spark.sql.HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{Add, AnsiCast, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, PredicateHelper, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} import org.apache.spark.sql.execution.datasources.DataSourceStrategy -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.types.{DataType, StructType} object HoodieSpark32CatalystExpressionUtils extends HoodieSpark3CatalystExpressionUtils with PredicateHelper { + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder.apply(schema).resolveAndBind() + } + override def normalizeExprs(exprs: Seq[Expression], attributes: Seq[Attribute]): Seq[Expression] = DataSourceStrategy.normalizeExprs(exprs, attributes) diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32SchemaUtils.scala b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32SchemaUtils.scala index 03931067d6e5..b5127fe328f7 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32SchemaUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/HoodieSpark32SchemaUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SchemaUtils /** @@ -30,4 +32,8 @@ object HoodieSpark32SchemaUtils extends HoodieSchemaUtils { caseSensitiveAnalysis: Boolean): Unit = { SchemaUtils.checkColumnNameDuplication(columnNames, colType, caseSensitiveAnalysis) } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + struct.toAttributes + } } diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark32PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark32PartitionedFileUtils.scala index a5e4c04a1709..a9fac5d45ef7 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark32PartitionedFileUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark32PartitionedFileUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] for Spark 3.2. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 3.2. */ object HoodieSpark32PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { @@ -40,4 +40,12 @@ object HoodieSpark32PartitionedFileUtils extends HoodieSparkPartitionedFileUtils length: Long): PartitionedFile = { PartitionedFile(partitionValues, filePath.toUri.toString, start, length) } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses) + } } diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusDataSourceUtils.scala b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32DataSourceUtils.scala similarity index 98% rename from hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusDataSourceUtils.scala rename to hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32DataSourceUtils.scala index 5c3f5a976c25..6d1c76380f21 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32PlusDataSourceUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32DataSourceUtils.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy import org.apache.spark.util.Utils -object Spark32PlusDataSourceUtils { +object Spark32DataSourceUtils { /** * NOTE: This method was copied from Spark 3.2.0, and is required to maintain runtime diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32LegacyHoodieParquetFileFormat.scala index c88c35b5eeb4..6099e4ac25ac 100644 --- a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark32LegacyHoodieParquetFileFormat.scala @@ -185,7 +185,7 @@ class Spark32LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu } else { // Spark 3.2.0 val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark32DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) createParquetFilters( parquetSchema, pushDownDate, @@ -285,9 +285,9 @@ class Spark32LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu } else { // Spark 3.2.0 val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark32DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) val int96RebaseMode = - Spark32PlusDataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + Spark32DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) createVectorizedParquetRecordReader( convertTz.orNull, datetimeRebaseMode.toString, @@ -347,9 +347,9 @@ class Spark32LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu int96RebaseSpec) } else { val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark32DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) val int96RebaseMode = - Spark32PlusDataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + Spark32DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) createParquetReadSupport( convertTz, /* enableVectorizedReader = */ false, diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32Analysis.scala b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32Analysis.scala new file mode 100644 index 000000000000..f139e8beb7fb --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32Analysis.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hudi.analysis + +import org.apache.hudi.DefaultSource + +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.hudi.ProvidesHoodieConfig +import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table +import org.apache.spark.sql.{SQLContext, SparkSession} + +/** + * NOTE: PLEASE READ CAREFULLY + * + * Since Hudi relations don't currently implement DS V2 Read API, we have to fallback to V1 here. + * Such fallback will have considerable performance impact, therefore it's only performed in cases + * where V2 API have to be used. Currently only such use-case is using of Schema Evolution feature + * + * Check out HUDI-4178 for more details + */ +case class HoodieSpark32DataSourceV2ToV1Fallback(sparkSession: SparkSession) extends Rule[LogicalPlan] + with ProvidesHoodieConfig { + + override def apply(plan: LogicalPlan): LogicalPlan = plan match { + // The only place we're avoiding fallback is in [[AlterTableCommand]]s since + // current implementation relies on DSv2 features + case _: AlterTableCommand => plan + + // NOTE: Unfortunately, [[InsertIntoStatement]] is implemented in a way that doesn't expose + // target relation as a child (even though there's no good reason for that) + case iis@InsertIntoStatement(rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _), _, _, _, _, _) => + iis.copy(table = convertToV1(rv2, v2Table)) + + case _ => + plan.resolveOperatorsDown { + case rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _) => convertToV1(rv2, v2Table) + } + } + + private def convertToV1(rv2: DataSourceV2Relation, v2Table: HoodieInternalV2Table) = { + val output = rv2.output + val catalogTable = v2Table.catalogTable.map(_ => v2Table.v1Table) + val relation = new DefaultSource().createRelation(new SQLContext(sparkSession), + buildHoodieConfig(v2Table.hoodieCatalogTable), v2Table.hoodieCatalogTable.tableSchema) + + LogicalRelation(relation, output, catalogTable, isStreaming = false) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java new file mode 100644 index 000000000000..d4b0b0e764ed --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.testutils.HoodieSparkClientTestHarness; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for TestHoodieBulkInsertDataInternalWriter. + */ +public class HoodieBulkInsertInternalWriterTestBase extends HoodieSparkClientTestHarness { + + protected static final Random RANDOM = new Random(); + + @BeforeEach + public void setUp() throws Exception { + initSparkContexts(); + initPath(); + initFileSystem(); + initTestDataGenerator(); + initMetaClient(); + initTimelineService(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { + return getWriteConfig(populateMetaFields, DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().defaultValue()); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields, String hiveStylePartitioningValue) { + Properties properties = new Properties(); + if (!populateMetaFields) { + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false"); + } + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().key(), hiveStylePartitioningValue); + return SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withProperties(properties).build(); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, + Option> fileAbsPaths, Option> fileNames) { + assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames, false); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, boolean areRecordsSorted, + Option> fileAbsPaths, Option> fileNames, boolean isHiveStylePartitioning) { + if (areRecordsSorted) { + assertEquals(batches, writeStatuses.size()); + } else { + assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size()); + } + + Map sizeMap = new HashMap<>(); + if (!areRecordsSorted) { + // no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected + // per write status + for (int i = 0; i < batches; i++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3]; + if (!sizeMap.containsKey(partitionPath)) { + sizeMap.put(partitionPath, 0L); + } + sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size); + } + } + + int counter = 0; + for (WriteStatus writeStatus : writeStatuses) { + // verify write status + String actualPartitionPathFormat = isHiveStylePartitioning ? SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME + "=%s" : "%s"; + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStatus.getPartitionPath()); + if (areRecordsSorted) { + assertEquals(writeStatus.getTotalRecords(), size); + } else { + assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3])); + } + assertNull(writeStatus.getGlobalError()); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertFalse(writeStatus.hasErrors()); + assertNotNull(writeStatus.getFileId()); + String fileId = writeStatus.getFileId(); + if (fileAbsPaths.isPresent()) { + fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath()); + } + if (fileNames.isPresent()) { + fileNames.get().add(writeStatus.getStat().getPath() + .substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1)); + } + HoodieWriteStat writeStat = writeStatus.getStat(); + if (areRecordsSorted) { + assertEquals(size, writeStat.getNumInserts()); + assertEquals(size, writeStat.getNumWrites()); + } else { + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts()); + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites()); + } + assertEquals(fileId, writeStat.getFileId()); + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3]), writeStat.getPartitionPath()); + assertEquals(0, writeStat.getNumDeletes()); + assertEquals(0, writeStat.getNumUpdateWrites()); + assertEquals(0, writeStat.getTotalWriteErrors()); + } + } + + protected void assertOutput(Dataset expectedRows, Dataset actualRows, String instantTime, Option> fileNames, + boolean populateMetaColumns) { + if (populateMetaColumns) { + // verify 3 meta fields that are filled in within create handle + actualRows.collectAsList().forEach(entry -> { + assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime); + assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())); + if (fileNames.isPresent()) { + assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()))); + } + assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal())); + }); + + // after trimming 2 of the meta fields, rest of the fields should match + Dataset trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + Dataset trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + assertEquals(0, trimmedActual.except(trimmedExpected).count()); + } else { // operation = BULK_INSERT_APPEND_ONLY + // all meta columns are untouched + assertEquals(0, expectedRows.except(actualRows).count()); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java b/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java new file mode 100644 index 000000000000..206d4931b15e --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.spark3.internal; + +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.internal.HoodieBulkInsertInternalWriterTestBase; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getInternalRowWithError; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Unit tests {@link HoodieBulkInsertDataInternalWriter}. + */ +public class TestHoodieBulkInsertDataInternalWriter extends + HoodieBulkInsertInternalWriterTestBase { + + private static Stream configParams() { + Object[][] data = new Object[][] { + {true, true}, + {true, false}, + {false, true}, + {false, false} + }; + return Stream.of(data).map(Arguments::of); + } + + private static Stream bulkInsertTypeParams() { + Object[][] data = new Object[][] { + {true}, + {false} + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("configParams") + public void testDataInternalWriter(boolean sorted, boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + // execute N rounds + for (int i = 0; i < 2; i++) { + String instantTime = "00" + i; + // init writer + HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), + RANDOM.nextLong(), STRUCT_TYPE, populateMetaFields, sorted); + + int size = 10 + RANDOM.nextInt(1000); + // write N rows to partition1, N rows to partition2 and N rows to partition3 ... Each batch should create a new RowCreateHandle and a new file + int batches = 3; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + Option> fileAbsPaths = Option.of(new ArrayList<>()); + Option> fileNames = Option.of(new ArrayList<>()); + + // verify write statuses + assertWriteStatuses(commitMetadata.getWriteStatuses(), batches, size, sorted, fileAbsPaths, fileNames, false); + + // verify rows + Dataset result = sqlContext.read().parquet(fileAbsPaths.get().toArray(new String[0])); + assertOutput(totalInputRows, result, instantTime, fileNames, populateMetaFields); + } + } + + + /** + * Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch2 of invalid records which is expected + * to throw Global Error. Verify global error is set appropriately and only first batch of records are written to disk. + */ + @Test + public void testGlobalFailure() throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(true); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0]; + + String instantTime = "001"; + HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), + RANDOM.nextLong(), STRUCT_TYPE, true, false); + + int size = 10 + RANDOM.nextInt(100); + int totalFailures = 5; + // Generate first batch of valid rows + Dataset inputRows = getRandomRows(sqlContext, size / 2, partitionPath, false); + List internalRows = toInternalRows(inputRows, ENCODER); + + // generate some failures rows + for (int i = 0; i < totalFailures; i++) { + internalRows.add(getInternalRowWithError(partitionPath)); + } + + // generate 2nd batch of valid rows + Dataset inputRows2 = getRandomRows(sqlContext, size / 2, partitionPath, false); + internalRows.addAll(toInternalRows(inputRows2, ENCODER)); + + // issue writes + try { + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + fail("Should have failed"); + } catch (Throwable e) { + // expected + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + + Option> fileAbsPaths = Option.of(new ArrayList<>()); + Option> fileNames = Option.of(new ArrayList<>()); + // verify write statuses + assertWriteStatuses(commitMetadata.getWriteStatuses(), 1, size / 2, fileAbsPaths, fileNames); + + // verify rows + Dataset result = sqlContext.read().parquet(fileAbsPaths.get().toArray(new String[0])); + assertOutput(inputRows, result, instantTime, fileNames, true); + } + + private void writeRows(Dataset inputRows, HoodieBulkInsertDataInternalWriter writer) + throws Exception { + List internalRows = toInternalRows(inputRows, ENCODER); + // issue writes + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java b/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java new file mode 100644 index 000000000000..31d606de4a1e --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.2.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java @@ -0,0 +1,331 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.spark3.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.common.model.HoodieCommitMetadata; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.internal.HoodieBulkInsertInternalWriterTestBase; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; +import org.apache.hudi.testutils.HoodieClientTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.connector.write.DataWriter; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unit tests {@link HoodieDataSourceInternalBatchWrite}. + */ +public class TestHoodieDataSourceInternalBatchWrite extends + HoodieBulkInsertInternalWriterTestBase { + + private static Stream bulkInsertTypeParams() { + Object[][] data = new Object[][] { + {true}, + {false} + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testDataSourceWriter(boolean populateMetaFields) throws Exception { + testDataSourceWriterInternal(Collections.emptyMap(), Collections.emptyMap(), populateMetaFields); + } + + private void testDataSourceWriterInternal(Map extraMetadata, Map expectedExtraMetadata, boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String instantTime = "001"; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, extraMetadata, populateMetaFields, false); + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(0, RANDOM.nextLong()); + + String[] partitionPaths = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS; + List partitionPathsAbs = new ArrayList<>(); + for (String partitionPath : partitionPaths) { + partitionPathsAbs.add(basePath + "/" + partitionPath + "/*"); + } + + int size = 10 + RANDOM.nextInt(1000); + int batches = 5; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + List commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + + metaClient.reloadActiveTimeline(); + Dataset result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + + // verify extra metadata + Option commitMetadataOption = HoodieClientTestUtils.getCommitMetadataForLatestInstant(metaClient); + assertTrue(commitMetadataOption.isPresent()); + Map actualExtraMetadata = new HashMap<>(); + commitMetadataOption.get().getExtraMetadata().entrySet().stream().filter(entry -> + !entry.getKey().equals(HoodieCommitMetadata.SCHEMA_KEY)).forEach(entry -> actualExtraMetadata.put(entry.getKey(), entry.getValue())); + assertEquals(actualExtraMetadata, expectedExtraMetadata); + } + + @Test + public void testDataSourceWriterExtraCommitMetadata() throws Exception { + String commitExtraMetaPrefix = "commit_extra_meta_"; + Map extraMeta = new HashMap<>(); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); + extraMeta.put(commitExtraMetaPrefix + "a", "valA"); + extraMeta.put(commitExtraMetaPrefix + "b", "valB"); + extraMeta.put("commit_extra_c", "valC"); // should not be part of commit extra metadata + + Map expectedMetadata = new HashMap<>(); + expectedMetadata.putAll(extraMeta); + expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key()); + expectedMetadata.remove("commit_extra_c"); + + testDataSourceWriterInternal(extraMeta, expectedMetadata, true); + } + + @Test + public void testDataSourceWriterEmptyExtraCommitMetadata() throws Exception { + String commitExtraMetaPrefix = "commit_extra_meta_"; + Map extraMeta = new HashMap<>(); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); + extraMeta.put("keyA", "valA"); + extraMeta.put("keyB", "valB"); + extraMeta.put("commit_extra_c", "valC"); + // none of the keys has commit metadata key prefix. + testDataSourceWriterInternal(extraMeta, Collections.emptyMap(), true); + } + + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testMultipleDataSourceWrites(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + int partitionCounter = 0; + + // execute N rounds + for (int i = 0; i < 2; i++) { + String instantTime = "00" + i; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + List commitMessages = new ArrayList<>(); + Dataset totalInputRows = null; + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(partitionCounter++, RANDOM.nextLong()); + + int size = 10 + RANDOM.nextInt(1000); + int batches = 3; // one batch per partition + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + + Dataset result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime, populateMetaFields); + + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + } + } + + // Large writes are not required to be executed w/ regular CI jobs. Takes lot of running time. + @Disabled + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testLargeWrites(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + int partitionCounter = 0; + + // execute N rounds + for (int i = 0; i < 3; i++) { + String instantTime = "00" + i; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + List commitMessages = new ArrayList<>(); + Dataset totalInputRows = null; + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(partitionCounter++, RANDOM.nextLong()); + + int size = 10000 + RANDOM.nextInt(10000); + int batches = 3; // one batch per partition + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + + Dataset result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime, + populateMetaFields); + + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + } + } + + /** + * Tests that DataSourceWriter.abort() will abort the written records of interest write and commit batch1 write and abort batch2 Read of entire dataset should show only records from batch1. + * commit batch1 + * abort batch2 + * verify only records from batch1 is available to read + */ + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testAbort(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String instantTime0 = "00" + 0; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime0, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(0, RANDOM.nextLong()); + + List partitionPaths = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS); + List partitionPathsAbs = new ArrayList<>(); + for (String partitionPath : partitionPaths) { + partitionPathsAbs.add(basePath + "/" + partitionPath + "/*"); + } + + int size = 10 + RANDOM.nextInt(100); + int batches = 1; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + List commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + // commit 1st batch + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + Dataset result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify rows + assertOutput(totalInputRows, result, instantTime0, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + + // 2nd batch. abort in the end + String instantTime1 = "00" + 1; + dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime1, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.emptyMap(), populateMetaFields, false); + writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(1, RANDOM.nextLong()); + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + } + + commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + // commit 1st batch + dataSourceInternalBatchWrite.abort(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify rows + // only rows from first batch should be present + assertOutput(totalInputRows, result, instantTime0, Option.empty(), populateMetaFields); + } + + private void writeRows(Dataset inputRows, DataWriter writer) throws Exception { + List internalRows = toInternalRows(inputRows, ENCODER); + // issue writes + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala index d64bc94301a1..d603f2c13d6f 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala +++ b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala @@ -47,34 +47,6 @@ import org.apache.spark.sql.{AnalysisException, SQLContext, SparkSession} * * Check out HUDI-4178 for more details */ -case class HoodieDataSourceV2ToV1Fallback(sparkSession: SparkSession) extends Rule[LogicalPlan] - with ProvidesHoodieConfig { - - override def apply(plan: LogicalPlan): LogicalPlan = plan match { - // The only place we're avoiding fallback is in [[AlterTableCommand]]s since - // current implementation relies on DSv2 features - case _: AlterTableCommand => plan - - // NOTE: Unfortunately, [[InsertIntoStatement]] is implemented in a way that doesn't expose - // target relation as a child (even though there's no good reason for that) - case iis@InsertIntoStatement(rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _), _, _, _, _, _) => - iis.copy(table = convertToV1(rv2, v2Table)) - - case _ => - plan.resolveOperatorsDown { - case rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _) => convertToV1(rv2, v2Table) - } - } - - private def convertToV1(rv2: DataSourceV2Relation, v2Table: HoodieInternalV2Table) = { - val output = rv2.output - val catalogTable = v2Table.catalogTable.map(_ => v2Table.v1Table) - val relation = new DefaultSource().createRelation(new SQLContext(sparkSession), - buildHoodieConfig(v2Table.hoodieCatalogTable), v2Table.hoodieCatalogTable.tableSchema) - - LogicalRelation(relation, output, catalogTable, isStreaming = false) - } -} /** * Rule for resolve hoodie's extended syntax or rewrite some logical plan. diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystExpressionUtils.scala index 3ba5ed3d9991..29c2ac57da01 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystExpressionUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystExpressionUtils.scala @@ -17,13 +17,18 @@ package org.apache.spark.sql -import HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{Add, AnsiCast, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, PredicateHelper, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} import org.apache.spark.sql.execution.datasources.DataSourceStrategy -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.types.{DataType, StructType} object HoodieSpark33CatalystExpressionUtils extends HoodieSpark3CatalystExpressionUtils with PredicateHelper { + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder.apply(schema).resolveAndBind() + } + override def normalizeExprs(exprs: Seq[Expression], attributes: Seq[Attribute]): Seq[Expression] = DataSourceStrategy.normalizeExprs(exprs, attributes) diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33SchemaUtils.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33SchemaUtils.scala index 37563a61ca64..f31dadd0c317 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33SchemaUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33SchemaUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SchemaUtils /** @@ -30,4 +32,8 @@ object HoodieSpark33SchemaUtils extends HoodieSchemaUtils { caseSensitiveAnalysis: Boolean): Unit = { SchemaUtils.checkColumnNameDuplication(columnNames, colType, caseSensitiveAnalysis) } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + struct.toAttributes + } } diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark33PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark33PartitionedFileUtils.scala index 39e9c8efe347..220825a6875d 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark33PartitionedFileUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark33PartitionedFileUtils.scala @@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] for Spark 3.3. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 3.3. */ object HoodieSpark33PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { @@ -40,4 +40,12 @@ object HoodieSpark33PartitionedFileUtils extends HoodieSparkPartitionedFileUtils length: Long): PartitionedFile = { PartitionedFile(partitionValues, filePath.toUri.toString, start, length) } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses) + } } diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33DataSourceUtils.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33DataSourceUtils.scala new file mode 100644 index 000000000000..2aa85660eb51 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33DataSourceUtils.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.spark.sql.SPARK_VERSION_METADATA_KEY +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy +import org.apache.spark.util.Utils + +object Spark33DataSourceUtils { + + /** + * NOTE: This method was copied from Spark 3.2.0, and is required to maintain runtime + * compatibility against Spark 3.2.0 + */ + // scalastyle:off + def int96RebaseMode(lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + return LegacyBehaviorPolicy.CORRECTED + } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 3.0 and earlier follow the legacy hybrid calendar and we need to + // rebase the INT96 timestamp values. + // Files written by Spark 3.1 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.1.0" || lookupFileMeta("org.apache.spark.legacyINT96") != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) + } + // scalastyle:on + + /** + * NOTE: This method was copied from Spark 3.2.0, and is required to maintain runtime + * compatibility against Spark 3.2.0 + */ + // scalastyle:off + def datetimeRebaseMode(lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + return LegacyBehaviorPolicy.CORRECTED + } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 2.4 and earlier follow the legacy hybrid calendar and we need to + // rebase the datetime values. + // Files written by Spark 3.0 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.0.0" || lookupFileMeta("org.apache.spark.legacyDateTime") != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) + } + // scalastyle:on + +} diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala index de6cbff90ca5..3b53b753b69d 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala @@ -187,7 +187,7 @@ class Spark33LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu } else { // Spark 3.2.0 val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark33DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) createParquetFilters( parquetSchema, pushDownDate, @@ -287,9 +287,9 @@ class Spark33LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu } else { // Spark 3.2.0 val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark33DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) val int96RebaseMode = - Spark32PlusDataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + Spark33DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) createVectorizedParquetRecordReader( convertTz.orNull, datetimeRebaseMode.toString, @@ -349,9 +349,9 @@ class Spark33LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu int96RebaseSpec) } else { val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark33DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) val int96RebaseMode = - Spark32PlusDataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + Spark33DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) createParquetReadSupport( convertTz, /* enableVectorizedReader = */ false, diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark33Analysis.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark33Analysis.scala new file mode 100644 index 000000000000..3273d23e7c89 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark33Analysis.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hudi.analysis + +import org.apache.hudi.DefaultSource + +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.hudi.ProvidesHoodieConfig +import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table +import org.apache.spark.sql.{SQLContext, SparkSession} + +/** + * NOTE: PLEASE READ CAREFULLY + * + * Since Hudi relations don't currently implement DS V2 Read API, we have to fallback to V1 here. + * Such fallback will have considerable performance impact, therefore it's only performed in cases + * where V2 API have to be used. Currently only such use-case is using of Schema Evolution feature + * + * Check out HUDI-4178 for more details + */ +case class HoodieSpark33DataSourceV2ToV1Fallback(sparkSession: SparkSession) extends Rule[LogicalPlan] + with ProvidesHoodieConfig { + + override def apply(plan: LogicalPlan): LogicalPlan = plan match { + // The only place we're avoiding fallback is in [[AlterTableCommand]]s since + // current implementation relies on DSv2 features + case _: AlterTableCommand => plan + + // NOTE: Unfortunately, [[InsertIntoStatement]] is implemented in a way that doesn't expose + // target relation as a child (even though there's no good reason for that) + case iis@InsertIntoStatement(rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _), _, _, _, _, _) => + iis.copy(table = convertToV1(rv2, v2Table)) + + case _ => + plan.resolveOperatorsDown { + case rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _) => convertToV1(rv2, v2Table) + } + } + + private def convertToV1(rv2: DataSourceV2Relation, v2Table: HoodieInternalV2Table) = { + val output = rv2.output + val catalogTable = v2Table.catalogTable.map(_ => v2Table.v1Table) + val relation = new DefaultSource().createRelation(new SQLContext(sparkSession), + buildHoodieConfig(v2Table.hoodieCatalogTable), v2Table.hoodieCatalogTable.tableSchema) + + LogicalRelation(relation, output, catalogTable, isStreaming = false) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java new file mode 100644 index 000000000000..d4b0b0e764ed --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.testutils.HoodieSparkClientTestHarness; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for TestHoodieBulkInsertDataInternalWriter. + */ +public class HoodieBulkInsertInternalWriterTestBase extends HoodieSparkClientTestHarness { + + protected static final Random RANDOM = new Random(); + + @BeforeEach + public void setUp() throws Exception { + initSparkContexts(); + initPath(); + initFileSystem(); + initTestDataGenerator(); + initMetaClient(); + initTimelineService(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { + return getWriteConfig(populateMetaFields, DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().defaultValue()); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields, String hiveStylePartitioningValue) { + Properties properties = new Properties(); + if (!populateMetaFields) { + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false"); + } + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().key(), hiveStylePartitioningValue); + return SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withProperties(properties).build(); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, + Option> fileAbsPaths, Option> fileNames) { + assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames, false); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, boolean areRecordsSorted, + Option> fileAbsPaths, Option> fileNames, boolean isHiveStylePartitioning) { + if (areRecordsSorted) { + assertEquals(batches, writeStatuses.size()); + } else { + assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size()); + } + + Map sizeMap = new HashMap<>(); + if (!areRecordsSorted) { + // no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected + // per write status + for (int i = 0; i < batches; i++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3]; + if (!sizeMap.containsKey(partitionPath)) { + sizeMap.put(partitionPath, 0L); + } + sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size); + } + } + + int counter = 0; + for (WriteStatus writeStatus : writeStatuses) { + // verify write status + String actualPartitionPathFormat = isHiveStylePartitioning ? SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME + "=%s" : "%s"; + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStatus.getPartitionPath()); + if (areRecordsSorted) { + assertEquals(writeStatus.getTotalRecords(), size); + } else { + assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3])); + } + assertNull(writeStatus.getGlobalError()); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertFalse(writeStatus.hasErrors()); + assertNotNull(writeStatus.getFileId()); + String fileId = writeStatus.getFileId(); + if (fileAbsPaths.isPresent()) { + fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath()); + } + if (fileNames.isPresent()) { + fileNames.get().add(writeStatus.getStat().getPath() + .substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1)); + } + HoodieWriteStat writeStat = writeStatus.getStat(); + if (areRecordsSorted) { + assertEquals(size, writeStat.getNumInserts()); + assertEquals(size, writeStat.getNumWrites()); + } else { + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts()); + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites()); + } + assertEquals(fileId, writeStat.getFileId()); + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3]), writeStat.getPartitionPath()); + assertEquals(0, writeStat.getNumDeletes()); + assertEquals(0, writeStat.getNumUpdateWrites()); + assertEquals(0, writeStat.getTotalWriteErrors()); + } + } + + protected void assertOutput(Dataset expectedRows, Dataset actualRows, String instantTime, Option> fileNames, + boolean populateMetaColumns) { + if (populateMetaColumns) { + // verify 3 meta fields that are filled in within create handle + actualRows.collectAsList().forEach(entry -> { + assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime); + assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())); + if (fileNames.isPresent()) { + assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()))); + } + assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal())); + }); + + // after trimming 2 of the meta fields, rest of the fields should match + Dataset trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + Dataset trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + assertEquals(0, trimmedActual.except(trimmedExpected).count()); + } else { // operation = BULK_INSERT_APPEND_ONLY + // all meta columns are untouched + assertEquals(0, expectedRows.except(actualRows).count()); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java b/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java index 0d1867047847..0763a22f032c 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java @@ -45,7 +45,8 @@ public void testDataSourceWriterExtraCommitMetadata() throws Exception { scala.collection.immutable.List.empty(), statement.query(), statement.overwrite(), - statement.ifPartitionNotExists()); + statement.ifPartitionNotExists(), + false); Assertions.assertTrue( ((UnresolvedRelation)newStatment.table()).multipartIdentifier().contains("test_reflect_util")); diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystExpressionUtils.scala index e93228a47ee5..c36ca1ed55b4 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystExpressionUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystExpressionUtils.scala @@ -18,12 +18,17 @@ package org.apache.spark.sql import org.apache.spark.sql.HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{Add, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, EvalMode, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, PredicateHelper, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} import org.apache.spark.sql.execution.datasources.DataSourceStrategy -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.types.{DataType, StructType} object HoodieSpark34CatalystExpressionUtils extends HoodieSpark3CatalystExpressionUtils with PredicateHelper { + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder.apply(schema).resolveAndBind() + } + override def normalizeExprs(exprs: Seq[Expression], attributes: Seq[Attribute]): Seq[Expression] = { DataSourceStrategy.normalizeExprs(exprs, attributes) } diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34SchemaUtils.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34SchemaUtils.scala index d597544d2631..d6cf4a3fad07 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34SchemaUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34SchemaUtils.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.SchemaUtils /** @@ -30,4 +32,8 @@ object HoodieSpark34SchemaUtils extends HoodieSchemaUtils { caseSensitiveAnalysis: Boolean): Unit = { SchemaUtils.checkColumnNameDuplication(columnNames, caseSensitiveAnalysis) } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + struct.toAttributes + } } diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark34PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark34PartitionedFileUtils.scala index 249d7e59051d..cfbf22246c5f 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark34PartitionedFileUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark34PartitionedFileUtils.scala @@ -19,12 +19,12 @@ package org.apache.spark.sql.execution.datasources -import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.paths.SparkPath import org.apache.spark.sql.catalyst.InternalRow /** - * Utils on Spark [[PartitionedFile]] for Spark 3.4. + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 3.4. */ object HoodieSpark34PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { @@ -41,4 +41,12 @@ object HoodieSpark34PartitionedFileUtils extends HoodieSparkPartitionedFileUtils length: Long): PartitionedFile = { PartitionedFile(partitionValues, SparkPath.fromPath(filePath), start, length) } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses) + } } diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34DataSourceUtils.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34DataSourceUtils.scala new file mode 100644 index 000000000000..d404bc8c24b5 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34DataSourceUtils.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.spark.sql.SPARK_VERSION_METADATA_KEY +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy +import org.apache.spark.util.Utils + +object Spark34DataSourceUtils { + + /** + * NOTE: This method was copied from Spark 3.2.0, and is required to maintain runtime + * compatibility against Spark 3.2.0 + */ + // scalastyle:off + def int96RebaseMode(lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + return LegacyBehaviorPolicy.CORRECTED + } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 3.0 and earlier follow the legacy hybrid calendar and we need to + // rebase the INT96 timestamp values. + // Files written by Spark 3.1 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.1.0" || lookupFileMeta("org.apache.spark.legacyINT96") != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) + } + // scalastyle:on + + /** + * NOTE: This method was copied from Spark 3.2.0, and is required to maintain runtime + * compatibility against Spark 3.2.0 + */ + // scalastyle:off + def datetimeRebaseMode(lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + return LegacyBehaviorPolicy.CORRECTED + } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 2.4 and earlier follow the legacy hybrid calendar and we need to + // rebase the datetime values. + // Files written by Spark 3.0 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.0.0" || lookupFileMeta("org.apache.spark.legacyDateTime") != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) + } + // scalastyle:on + +} diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala index 6de8ded06ec0..cd76ce6f3b2e 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala @@ -203,7 +203,7 @@ class Spark34LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu } else { // Spark 3.2.0 val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark34DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) createParquetFilters( parquetSchema, pushDownDate, @@ -303,9 +303,9 @@ class Spark34LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu } else { // Spark 3.2.0 val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark34DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) val int96RebaseMode = - Spark32PlusDataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + Spark34DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) createVectorizedParquetRecordReader( convertTz.orNull, datetimeRebaseMode.toString, @@ -365,9 +365,9 @@ class Spark34LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu int96RebaseSpec) } else { val datetimeRebaseMode = - Spark32PlusDataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + Spark34DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) val int96RebaseMode = - Spark32PlusDataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + Spark34DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) createParquetReadSupport( convertTz, /* enableVectorizedReader = */ false, diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark34Analysis.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark34Analysis.scala new file mode 100644 index 000000000000..9194a667a890 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark34Analysis.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hudi.analysis + +import org.apache.hudi.DefaultSource + +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.hudi.ProvidesHoodieConfig +import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table +import org.apache.spark.sql.{SQLContext, SparkSession} + +/** + * NOTE: PLEASE READ CAREFULLY + * + * Since Hudi relations don't currently implement DS V2 Read API, we have to fallback to V1 here. + * Such fallback will have considerable performance impact, therefore it's only performed in cases + * where V2 API have to be used. Currently only such use-case is using of Schema Evolution feature + * + * Check out HUDI-4178 for more details + */ +case class HoodieSpark34DataSourceV2ToV1Fallback(sparkSession: SparkSession) extends Rule[LogicalPlan] + with ProvidesHoodieConfig { + + override def apply(plan: LogicalPlan): LogicalPlan = plan match { + // The only place we're avoiding fallback is in [[AlterTableCommand]]s since + // current implementation relies on DSv2 features + case _: AlterTableCommand => plan + + // NOTE: Unfortunately, [[InsertIntoStatement]] is implemented in a way that doesn't expose + // target relation as a child (even though there's no good reason for that) + case iis@InsertIntoStatement(rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _), _, _, _, _, _) => + iis.copy(table = convertToV1(rv2, v2Table)) + + case _ => + plan.resolveOperatorsDown { + case rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _) => convertToV1(rv2, v2Table) + } + } + + private def convertToV1(rv2: DataSourceV2Relation, v2Table: HoodieInternalV2Table) = { + val output = rv2.output + val catalogTable = v2Table.catalogTable.map(_ => v2Table.v1Table) + val relation = new DefaultSource().createRelation(new SQLContext(sparkSession), + buildHoodieConfig(v2Table.hoodieCatalogTable), v2Table.hoodieCatalogTable.tableSchema) + + LogicalRelation(relation, output, catalogTable, isStreaming = false) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java new file mode 100644 index 000000000000..d4b0b0e764ed --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.testutils.HoodieSparkClientTestHarness; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for TestHoodieBulkInsertDataInternalWriter. + */ +public class HoodieBulkInsertInternalWriterTestBase extends HoodieSparkClientTestHarness { + + protected static final Random RANDOM = new Random(); + + @BeforeEach + public void setUp() throws Exception { + initSparkContexts(); + initPath(); + initFileSystem(); + initTestDataGenerator(); + initMetaClient(); + initTimelineService(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { + return getWriteConfig(populateMetaFields, DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().defaultValue()); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields, String hiveStylePartitioningValue) { + Properties properties = new Properties(); + if (!populateMetaFields) { + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false"); + } + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().key(), hiveStylePartitioningValue); + return SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withProperties(properties).build(); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, + Option> fileAbsPaths, Option> fileNames) { + assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames, false); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, boolean areRecordsSorted, + Option> fileAbsPaths, Option> fileNames, boolean isHiveStylePartitioning) { + if (areRecordsSorted) { + assertEquals(batches, writeStatuses.size()); + } else { + assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size()); + } + + Map sizeMap = new HashMap<>(); + if (!areRecordsSorted) { + // no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected + // per write status + for (int i = 0; i < batches; i++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3]; + if (!sizeMap.containsKey(partitionPath)) { + sizeMap.put(partitionPath, 0L); + } + sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size); + } + } + + int counter = 0; + for (WriteStatus writeStatus : writeStatuses) { + // verify write status + String actualPartitionPathFormat = isHiveStylePartitioning ? SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME + "=%s" : "%s"; + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStatus.getPartitionPath()); + if (areRecordsSorted) { + assertEquals(writeStatus.getTotalRecords(), size); + } else { + assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3])); + } + assertNull(writeStatus.getGlobalError()); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertFalse(writeStatus.hasErrors()); + assertNotNull(writeStatus.getFileId()); + String fileId = writeStatus.getFileId(); + if (fileAbsPaths.isPresent()) { + fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath()); + } + if (fileNames.isPresent()) { + fileNames.get().add(writeStatus.getStat().getPath() + .substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1)); + } + HoodieWriteStat writeStat = writeStatus.getStat(); + if (areRecordsSorted) { + assertEquals(size, writeStat.getNumInserts()); + assertEquals(size, writeStat.getNumWrites()); + } else { + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts()); + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites()); + } + assertEquals(fileId, writeStat.getFileId()); + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3]), writeStat.getPartitionPath()); + assertEquals(0, writeStat.getNumDeletes()); + assertEquals(0, writeStat.getNumUpdateWrites()); + assertEquals(0, writeStat.getTotalWriteErrors()); + } + } + + protected void assertOutput(Dataset expectedRows, Dataset actualRows, String instantTime, Option> fileNames, + boolean populateMetaColumns) { + if (populateMetaColumns) { + // verify 3 meta fields that are filled in within create handle + actualRows.collectAsList().forEach(entry -> { + assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime); + assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())); + if (fileNames.isPresent()) { + assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()))); + } + assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal())); + }); + + // after trimming 2 of the meta fields, rest of the fields should match + Dataset trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + Dataset trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + assertEquals(0, trimmedActual.except(trimmedExpected).count()); + } else { // operation = BULK_INSERT_APPEND_ONLY + // all meta columns are untouched + assertEquals(0, expectedRows.except(actualRows).count()); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java b/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java index 0d1867047847..0763a22f032c 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java @@ -45,7 +45,8 @@ public void testDataSourceWriterExtraCommitMetadata() throws Exception { scala.collection.immutable.List.empty(), statement.query(), statement.overwrite(), - statement.ifPartitionNotExists()); + statement.ifPartitionNotExists(), + false); Assertions.assertTrue( ((UnresolvedRelation)newStatment.table()).multipartIdentifier().contains("test_reflect_util")); diff --git a/hudi-spark-datasource/hudi-spark3.5.x/pom.xml b/hudi-spark-datasource/hudi-spark3.5.x/pom.xml new file mode 100644 index 000000000000..a39cc993f2dd --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/pom.xml @@ -0,0 +1,342 @@ + + + + + hudi-spark-datasource + org.apache.hudi + 0.15.0-SNAPSHOT + + 4.0.0 + + hudi-spark3.5.x_2.12 + 0.15.0-SNAPSHOT + + hudi-spark3.5.x_2.12 + jar + + + ${project.parent.parent.basedir} + + + + + + src/main/resources + + + + + + net.alchim31.maven + scala-maven-plugin + ${scala-maven-plugin.version} + + + -nobootcp + + false + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-dependencies + prepare-package + + copy-dependencies + + + ${project.build.directory}/lib + true + true + true + + + + + + net.alchim31.maven + scala-maven-plugin + + + -nobootcp + -target:jvm-1.8 + + + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + compile + + compile + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + test-compile + + + + false + + + + org.apache.maven.plugins + maven-surefire-plugin + + ${skip.hudi-spark3.unit.tests} + + + + org.apache.rat + apache-rat-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + org.jacoco + jacoco-maven-plugin + + + org.antlr + antlr4-maven-plugin + ${antlr.version} + + + + antlr4 + + + + + true + true + ../hudi-spark3.5.x/src/main/antlr4 + ../hudi-spark3.5.x/src/main/antlr4/imports + + + + + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark35.version} + provided + true + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark35.version} + provided + true + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark35.version} + provided + true + + + * + * + + + + + + com.fasterxml.jackson.core + jackson-databind + ${fasterxml.spark3.version} + + + com.fasterxml.jackson.core + jackson-annotations + ${fasterxml.spark3.version} + + + com.fasterxml.jackson.core + jackson-core + ${fasterxml.spark3.version} + + + + org.apache.hudi + hudi-spark-client + ${project.version} + + + + org.apache.hudi + hudi-spark-common_${scala.binary.version} + ${project.version} + + + + org.json4s + json4s-jackson_${scala.binary.version} + 3.7.0-M11 + + + com.fasterxml.jackson.core + * + + + + + + + org.apache.hudi + hudi-spark3-common + ${project.version} + + + + + org.apache.hudi + hudi-spark3.2plus-common + ${project.version} + + + + + org.apache.hudi + hudi-tests-common + ${project.version} + test + + + + org.apache.hudi + hudi-client-common + ${project.version} + tests + test-jar + test + + + + org.apache.hudi + hudi-spark-client + ${project.version} + tests + test-jar + test + + + + org.apache.hudi + hudi-common + ${project.version} + tests + test-jar + test + + + + org.apache.hudi + hudi-spark-common_${scala.binary.version} + ${project.version} + tests + test-jar + test + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark3.version} + tests + test + + + + org.apache.parquet + parquet-avro + test + + + + org.apache.hadoop + hadoop-hdfs + tests + test + + + + org.mortbay.jetty + * + + + javax.servlet.jsp + * + + + javax.servlet + * + + + + + + + diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 b/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 new file mode 100644 index 000000000000..d7f87b4e5aa5 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 @@ -0,0 +1,1940 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar. + */ + +// The parser file is forked from spark 3.2.0's SqlBase.g4. +grammar SqlBase; + +@parser::members { + /** + * When false, INTERSECT is given the greater precedence over the other set + * operations (UNION, EXCEPT and MINUS) as per the SQL standard. + */ + public boolean legacy_setops_precedence_enabled = false; + + /** + * When false, a literal with an exponent would be converted into + * double type rather than decimal type. + */ + public boolean legacy_exponent_literal_as_decimal_enabled = false; + + /** + * When true, the behavior of keywords follows ANSI SQL standard. + */ + public boolean SQL_standard_keyword_behavior = false; +} + +@lexer::members { + /** + * Verify whether current token is a valid decimal token (which contains dot). + * Returns true if the character that follows the token is not a digit or letter or underscore. + * + * For example: + * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'. + * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'. + * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'. + * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed + * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+' + * which is not a digit or letter or underscore. + */ + public boolean isValidDecimal() { + int nextChar = _input.LA(1); + if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' || + nextChar == '_') { + return false; + } else { + return true; + } + } + + /** + * This method will be called when we see '/*' and try to match it as a bracketed comment. + * If the next character is '+', it should be parsed as hint later, and we cannot match + * it as a bracketed comment. + * + * Returns true if the next character is '+'. + */ + public boolean isHint() { + int nextChar = _input.LA(1); + if (nextChar == '+') { + return true; + } else { + return false; + } + } +} + +singleStatement + : statement ';'* EOF + ; + +singleExpression + : namedExpression EOF + ; + +singleTableIdentifier + : tableIdentifier EOF + ; + +singleMultipartIdentifier + : multipartIdentifier EOF + ; + +singleFunctionIdentifier + : functionIdentifier EOF + ; + +singleDataType + : dataType EOF + ; + +singleTableSchema + : colTypeList EOF + ; + +statement + : query #statementDefault + | ctes? dmlStatementNoWith #dmlStatement + | USE NAMESPACE? multipartIdentifier #use + | CREATE namespace (IF NOT EXISTS)? multipartIdentifier + (commentSpec | + locationSpec | + (WITH (DBPROPERTIES | PROPERTIES) tablePropertyList))* #createNamespace + | ALTER namespace multipartIdentifier + SET (DBPROPERTIES | PROPERTIES) tablePropertyList #setNamespaceProperties + | ALTER namespace multipartIdentifier + SET locationSpec #setNamespaceLocation + | DROP namespace (IF EXISTS)? multipartIdentifier + (RESTRICT | CASCADE)? #dropNamespace + | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)? + (LIKE? pattern=STRING)? #showNamespaces + | createTableHeader ('(' colTypeList ')')? tableProvider? + createTableClauses + (AS? query)? #createTable + | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier + LIKE source=tableIdentifier + (tableProvider | + rowFormat | + createFileFormat | + locationSpec | + (TBLPROPERTIES tableProps=tablePropertyList))* #createTableLike + | replaceTableHeader ('(' colTypeList ')')? tableProvider? + createTableClauses + (AS? query)? #replaceTable + | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS + (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze + | ANALYZE TABLES ((FROM | IN) multipartIdentifier)? COMPUTE STATISTICS + (identifier)? #analyzeTables + | ALTER TABLE multipartIdentifier + ADD (COLUMN | COLUMNS) + columns=qualifiedColTypeWithPositionList #addTableColumns + | ALTER TABLE multipartIdentifier + ADD (COLUMN | COLUMNS) + '(' columns=qualifiedColTypeWithPositionList ')' #addTableColumns + | ALTER TABLE table=multipartIdentifier + RENAME COLUMN + from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn + | ALTER TABLE multipartIdentifier + DROP (COLUMN | COLUMNS) + '(' columns=multipartIdentifierList ')' #dropTableColumns + | ALTER TABLE multipartIdentifier + DROP (COLUMN | COLUMNS) columns=multipartIdentifierList #dropTableColumns + | ALTER (TABLE | VIEW) from=multipartIdentifier + RENAME TO to=multipartIdentifier #renameTable + | ALTER (TABLE | VIEW) multipartIdentifier + SET TBLPROPERTIES tablePropertyList #setTableProperties + | ALTER (TABLE | VIEW) multipartIdentifier + UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList #unsetTableProperties + | ALTER TABLE table=multipartIdentifier + (ALTER | CHANGE) COLUMN? column=multipartIdentifier + alterColumnAction? #alterTableAlterColumn + | ALTER TABLE table=multipartIdentifier partitionSpec? + CHANGE COLUMN? + colName=multipartIdentifier colType colPosition? #hiveChangeColumn + | ALTER TABLE table=multipartIdentifier partitionSpec? + REPLACE COLUMNS + '(' columns=qualifiedColTypeWithPositionList ')' #hiveReplaceColumns + | ALTER TABLE multipartIdentifier (partitionSpec)? + SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)? #setTableSerDe + | ALTER TABLE multipartIdentifier (partitionSpec)? + SET SERDEPROPERTIES tablePropertyList #setTableSerDe + | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)? + partitionSpecLocation+ #addTablePartition + | ALTER TABLE multipartIdentifier + from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition + | ALTER (TABLE | VIEW) multipartIdentifier + DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE? #dropTablePartitions + | ALTER TABLE multipartIdentifier + (partitionSpec)? SET locationSpec #setTableLocation + | ALTER TABLE multipartIdentifier RECOVER PARTITIONS #recoverPartitions + | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE? #dropTable + | DROP VIEW (IF EXISTS)? multipartIdentifier #dropView + | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? + VIEW (IF NOT EXISTS)? multipartIdentifier + identifierCommentList? + (commentSpec | + (PARTITIONED ON identifierList) | + (TBLPROPERTIES tablePropertyList))* + AS query #createView + | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW + tableIdentifier ('(' colTypeList ')')? tableProvider + (OPTIONS tablePropertyList)? #createTempViewUsing + | ALTER VIEW multipartIdentifier AS? query #alterViewQuery + | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? + multipartIdentifier AS className=STRING + (USING resource (',' resource)*)? #createFunction + | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier #dropFunction + | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? + statement #explain + | SHOW TABLES ((FROM | IN) multipartIdentifier)? + (LIKE? pattern=STRING)? #showTables + | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)? + LIKE pattern=STRING partitionSpec? #showTableExtended + | SHOW TBLPROPERTIES table=multipartIdentifier + ('(' key=tablePropertyKey ')')? #showTblProperties + | SHOW COLUMNS (FROM | IN) table=multipartIdentifier + ((FROM | IN) ns=multipartIdentifier)? #showColumns + | SHOW VIEWS ((FROM | IN) multipartIdentifier)? + (LIKE? pattern=STRING)? #showViews + | SHOW PARTITIONS multipartIdentifier partitionSpec? #showPartitions + | SHOW identifier? FUNCTIONS + (LIKE? (multipartIdentifier | pattern=STRING))? #showFunctions + | SHOW CREATE TABLE multipartIdentifier (AS SERDE)? #showCreateTable + | SHOW CURRENT NAMESPACE #showCurrentNamespace + | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction + | (DESC | DESCRIBE) namespace EXTENDED? + multipartIdentifier #describeNamespace + | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)? + multipartIdentifier partitionSpec? describeColName? #describeRelation + | (DESC | DESCRIBE) QUERY? query #describeQuery + | COMMENT ON namespace multipartIdentifier IS + comment=(STRING | NULL) #commentNamespace + | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL) #commentTable + | REFRESH TABLE multipartIdentifier #refreshTable + | REFRESH FUNCTION multipartIdentifier #refreshFunction + | REFRESH (STRING | .*?) #refreshResource + | CACHE LAZY? TABLE multipartIdentifier + (OPTIONS options=tablePropertyList)? (AS? query)? #cacheTable + | UNCACHE TABLE (IF EXISTS)? multipartIdentifier #uncacheTable + | CLEAR CACHE #clearCache + | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE + multipartIdentifier partitionSpec? #loadData + | TRUNCATE TABLE multipartIdentifier partitionSpec? #truncateTable + | MSCK REPAIR TABLE multipartIdentifier + (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable + | op=(ADD | LIST) identifier .*? #manageResource + | SET ROLE .*? #failNativeCommand + | SET TIME ZONE interval #setTimeZone + | SET TIME ZONE timezone=(STRING | LOCAL) #setTimeZone + | SET TIME ZONE .*? #setTimeZone + | SET configKey EQ configValue #setQuotedConfiguration + | SET configKey (EQ .*?)? #setQuotedConfiguration + | SET .*? EQ configValue #setQuotedConfiguration + | SET .*? #setConfiguration + | RESET configKey #resetQuotedConfiguration + | RESET .*? #resetConfiguration + | unsupportedHiveNativeCommands .*? #failNativeCommand + ; + +configKey + : quotedIdentifier + ; + +configValue + : quotedIdentifier + ; + +unsupportedHiveNativeCommands + : kw1=CREATE kw2=ROLE + | kw1=DROP kw2=ROLE + | kw1=GRANT kw2=ROLE? + | kw1=REVOKE kw2=ROLE? + | kw1=SHOW kw2=GRANT + | kw1=SHOW kw2=ROLE kw3=GRANT? + | kw1=SHOW kw2=PRINCIPALS + | kw1=SHOW kw2=ROLES + | kw1=SHOW kw2=CURRENT kw3=ROLES + | kw1=EXPORT kw2=TABLE + | kw1=IMPORT kw2=TABLE + | kw1=SHOW kw2=COMPACTIONS + | kw1=SHOW kw2=CREATE kw3=TABLE + | kw1=SHOW kw2=TRANSACTIONS + | kw1=SHOW kw2=INDEXES + | kw1=SHOW kw2=LOCKS + | kw1=CREATE kw2=INDEX + | kw1=DROP kw2=INDEX + | kw1=ALTER kw2=INDEX + | kw1=LOCK kw2=TABLE + | kw1=LOCK kw2=DATABASE + | kw1=UNLOCK kw2=TABLE + | kw1=UNLOCK kw2=DATABASE + | kw1=CREATE kw2=TEMPORARY kw3=MACRO + | kw1=DROP kw2=TEMPORARY kw3=MACRO + | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED + | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY + | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED + | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY + | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED + | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES + | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION + | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION + | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION + | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION + | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH + | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT + | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE + | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT + | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS + | kw1=START kw2=TRANSACTION + | kw1=COMMIT + | kw1=ROLLBACK + | kw1=DFS + ; + +createTableHeader + : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? multipartIdentifier + ; + +replaceTableHeader + : (CREATE OR)? REPLACE TABLE multipartIdentifier + ; + +bucketSpec + : CLUSTERED BY identifierList + (SORTED BY orderedIdentifierList)? + INTO INTEGER_VALUE BUCKETS + ; + +skewSpec + : SKEWED BY identifierList + ON (constantList | nestedConstantList) + (STORED AS DIRECTORIES)? + ; + +locationSpec + : LOCATION STRING + ; + +commentSpec + : COMMENT STRING + ; + +query + : ctes? queryTerm queryOrganization + ; + +insertInto + : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)? identifierList? #insertOverwriteTable + | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList? #insertIntoTable + | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat? #insertOverwriteHiveDir + | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)? #insertOverwriteDir + ; + +partitionSpecLocation + : partitionSpec locationSpec? + ; + +partitionSpec + : PARTITION '(' partitionVal (',' partitionVal)* ')' + ; + +partitionVal + : identifier (EQ constant)? + ; + +namespace + : NAMESPACE + | DATABASE + | SCHEMA + ; + +describeFuncName + : qualifiedName + | STRING + | comparisonOperator + | arithmeticOperator + | predicateOperator + ; + +describeColName + : nameParts+=identifier ('.' nameParts+=identifier)* + ; + +ctes + : WITH namedQuery (',' namedQuery)* + ; + +namedQuery + : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? '(' query ')' + ; + +tableProvider + : USING multipartIdentifier + ; + +createTableClauses + :((OPTIONS options=tablePropertyList) | + (PARTITIONED BY partitioning=partitionFieldList) | + skewSpec | + bucketSpec | + rowFormat | + createFileFormat | + locationSpec | + commentSpec | + (TBLPROPERTIES tableProps=tablePropertyList))* + ; + +tablePropertyList + : '(' tableProperty (',' tableProperty)* ')' + ; + +tableProperty + : key=tablePropertyKey (EQ? value=tablePropertyValue)? + ; + +tablePropertyKey + : identifier ('.' identifier)* + | STRING + ; + +tablePropertyValue + : INTEGER_VALUE + | DECIMAL_VALUE + | booleanValue + | STRING + ; + +constantList + : '(' constant (',' constant)* ')' + ; + +nestedConstantList + : '(' constantList (',' constantList)* ')' + ; + +createFileFormat + : STORED AS fileFormat + | STORED BY storageHandler + ; + +fileFormat + : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING #tableFileFormat + | identifier #genericFileFormat + ; + +storageHandler + : STRING (WITH SERDEPROPERTIES tablePropertyList)? + ; + +resource + : identifier STRING + ; + +dmlStatementNoWith + : insertInto queryTerm queryOrganization #singleInsertQuery + | fromClause multiInsertQueryBody+ #multiInsertQuery + | DELETE FROM multipartIdentifier tableAlias whereClause? #deleteFromTable + | UPDATE multipartIdentifier tableAlias setClause whereClause? #updateTable + | MERGE INTO target=multipartIdentifier targetAlias=tableAlias + USING (source=multipartIdentifier | + '(' sourceQuery=query')') sourceAlias=tableAlias + ON mergeCondition=booleanExpression + matchedClause* + notMatchedClause* #mergeIntoTable + ; + +queryOrganization + : (ORDER BY order+=sortItem (',' order+=sortItem)*)? + (CLUSTER BY clusterBy+=expression (',' clusterBy+=expression)*)? + (DISTRIBUTE BY distributeBy+=expression (',' distributeBy+=expression)*)? + (SORT BY sort+=sortItem (',' sort+=sortItem)*)? + windowClause? + (LIMIT (ALL | limit=expression))? + ; + +multiInsertQueryBody + : insertInto fromStatementBody + ; + +queryTerm + : queryPrimary #queryTermDefault + | left=queryTerm {legacy_setops_precedence_enabled}? + operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation + | left=queryTerm {!legacy_setops_precedence_enabled}? + operator=INTERSECT setQuantifier? right=queryTerm #setOperation + | left=queryTerm {!legacy_setops_precedence_enabled}? + operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation + ; + +queryPrimary + : querySpecification #queryPrimaryDefault + | fromStatement #fromStmt + | TABLE multipartIdentifier #table + | inlineTable #inlineTableDefault1 + | '(' query ')' #subquery + ; + +sortItem + : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))? + ; + +fromStatement + : fromClause fromStatementBody+ + ; + +fromStatementBody + : transformClause + whereClause? + queryOrganization + | selectClause + lateralView* + whereClause? + aggregationClause? + havingClause? + windowClause? + queryOrganization + ; + +querySpecification + : transformClause + fromClause? + lateralView* + whereClause? + aggregationClause? + havingClause? + windowClause? #transformQuerySpecification + | selectClause + fromClause? + lateralView* + whereClause? + aggregationClause? + havingClause? + windowClause? #regularQuerySpecification + ; + +transformClause + : (SELECT kind=TRANSFORM '(' setQuantifier? expressionSeq ')' + | kind=MAP setQuantifier? expressionSeq + | kind=REDUCE setQuantifier? expressionSeq) + inRowFormat=rowFormat? + (RECORDWRITER recordWriter=STRING)? + USING script=STRING + (AS (identifierSeq | colTypeList | ('(' (identifierSeq | colTypeList) ')')))? + outRowFormat=rowFormat? + (RECORDREADER recordReader=STRING)? + ; + +selectClause + : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq + ; + +setClause + : SET assignmentList + ; + +matchedClause + : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction + ; +notMatchedClause + : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction + ; + +matchedAction + : DELETE + | UPDATE SET ASTERISK + | UPDATE SET assignmentList + ; + +notMatchedAction + : INSERT ASTERISK + | INSERT '(' columns=multipartIdentifierList ')' + VALUES '(' expression (',' expression)* ')' + ; + +assignmentList + : assignment (',' assignment)* + ; + +assignment + : key=multipartIdentifier EQ value=expression + ; + +whereClause + : WHERE booleanExpression + ; + +havingClause + : HAVING booleanExpression + ; + +hint + : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/' + ; + +hintStatement + : hintName=identifier + | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')' + ; + +fromClause + : FROM relation (',' relation)* lateralView* pivotClause? + ; + +temporalClause + : FOR? (SYSTEM_TIME | TIMESTAMP) AS OF timestamp=valueExpression + | FOR? (SYSTEM_VERSION | VERSION) AS OF version=(INTEGER_VALUE | STRING) + ; + +aggregationClause + : GROUP BY groupingExpressionsWithGroupingAnalytics+=groupByClause + (',' groupingExpressionsWithGroupingAnalytics+=groupByClause)* + | GROUP BY groupingExpressions+=expression (',' groupingExpressions+=expression)* ( + WITH kind=ROLLUP + | WITH kind=CUBE + | kind=GROUPING SETS '(' groupingSet (',' groupingSet)* ')')? + ; + +groupByClause + : groupingAnalytics + | expression + ; + +groupingAnalytics + : (ROLLUP | CUBE) '(' groupingSet (',' groupingSet)* ')' + | GROUPING SETS '(' groupingElement (',' groupingElement)* ')' + ; + +groupingElement + : groupingAnalytics + | groupingSet + ; + +groupingSet + : '(' (expression (',' expression)*)? ')' + | expression + ; + +pivotClause + : PIVOT '(' aggregates=namedExpressionSeq FOR pivotColumn IN '(' pivotValues+=pivotValue (',' pivotValues+=pivotValue)* ')' ')' + ; + +pivotColumn + : identifiers+=identifier + | '(' identifiers+=identifier (',' identifiers+=identifier)* ')' + ; + +pivotValue + : expression (AS? identifier)? + ; + +lateralView + : LATERAL VIEW (OUTER)? qualifiedName '(' (expression (',' expression)*)? ')' tblName=identifier (AS? colName+=identifier (',' colName+=identifier)*)? + ; + +setQuantifier + : DISTINCT + | ALL + ; + +relation + : LATERAL? relationPrimary joinRelation* + ; + +joinRelation + : (joinType) JOIN LATERAL? right=relationPrimary joinCriteria? + | NATURAL joinType JOIN LATERAL? right=relationPrimary + ; + +joinType + : INNER? + | CROSS + | LEFT OUTER? + | LEFT? SEMI + | RIGHT OUTER? + | FULL OUTER? + | LEFT? ANTI + ; + +joinCriteria + : ON booleanExpression + | USING identifierList + ; + +sample + : TABLESAMPLE '(' sampleMethod? ')' + ; + +sampleMethod + : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile + | expression ROWS #sampleByRows + | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE + (ON (identifier | qualifiedName '(' ')'))? #sampleByBucket + | bytes=expression #sampleByBytes + ; + +identifierList + : '(' identifierSeq ')' + ; + +identifierSeq + : ident+=errorCapturingIdentifier (',' ident+=errorCapturingIdentifier)* + ; + +orderedIdentifierList + : '(' orderedIdentifier (',' orderedIdentifier)* ')' + ; + +orderedIdentifier + : ident=errorCapturingIdentifier ordering=(ASC | DESC)? + ; + +identifierCommentList + : '(' identifierComment (',' identifierComment)* ')' + ; + +identifierComment + : identifier commentSpec? + ; + +relationPrimary + : multipartIdentifier temporalClause? + sample? tableAlias #tableName + | '(' query ')' sample? tableAlias #aliasedQuery + | '(' relation ')' sample? tableAlias #aliasedRelation + | inlineTable #inlineTableDefault2 + | functionTable #tableValuedFunction + ; + +inlineTable + : VALUES expression (',' expression)* tableAlias + ; + +functionTable + : funcName=functionName '(' (expression (',' expression)*)? ')' tableAlias + ; + +tableAlias + : (AS? strictIdentifier identifierList?)? + ; + +rowFormat + : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=tablePropertyList)? #rowFormatSerde + | ROW FORMAT DELIMITED + (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)? + (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)? + (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)? + (LINES TERMINATED BY linesSeparatedBy=STRING)? + (NULL DEFINED AS nullDefinedAs=STRING)? #rowFormatDelimited + ; + +multipartIdentifierList + : multipartIdentifier (',' multipartIdentifier)* + ; + +multipartIdentifier + : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)* + ; + +tableIdentifier + : (db=errorCapturingIdentifier '.')? table=errorCapturingIdentifier + ; + +functionIdentifier + : (db=errorCapturingIdentifier '.')? function=errorCapturingIdentifier + ; + +multipartIdentifierPropertyList + : multipartIdentifierProperty (COMMA multipartIdentifierProperty)* + ; + +multipartIdentifierProperty + : multipartIdentifier (OPTIONS options=propertyList)? + ; + +propertyList + : LEFT_PAREN property (COMMA property)* RIGHT_PAREN + ; + +property + : key=propertyKey (EQ? value=propertyValue)? + ; + +propertyKey + : identifier (DOT identifier)* + | STRING + ; + +propertyValue + : INTEGER_VALUE + | DECIMAL_VALUE + | booleanValue + | STRING + ; + +namedExpression + : expression (AS? (name=errorCapturingIdentifier | identifierList))? + ; + +namedExpressionSeq + : namedExpression (',' namedExpression)* + ; + +partitionFieldList + : '(' fields+=partitionField (',' fields+=partitionField)* ')' + ; + +partitionField + : transform #partitionTransform + | colType #partitionColumn + ; + +transform + : qualifiedName #identityTransform + | transformName=identifier + '(' argument+=transformArgument (',' argument+=transformArgument)* ')' #applyTransform + ; + +transformArgument + : qualifiedName + | constant + ; + +expression + : booleanExpression + ; + +expressionSeq + : expression (',' expression)* + ; + +booleanExpression + : NOT booleanExpression #logicalNot + | EXISTS '(' query ')' #exists + | valueExpression predicate? #predicated + | left=booleanExpression operator=AND right=booleanExpression #logicalBinary + | left=booleanExpression operator=OR right=booleanExpression #logicalBinary + ; + +predicate + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN '(' expression (',' expression)* ')' + | NOT? kind=IN '(' query ')' + | NOT? kind=RLIKE pattern=valueExpression + | NOT? kind=LIKE quantifier=(ANY | SOME | ALL) ('('')' | '(' expression (',' expression)* ')') + | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)? + | IS NOT? kind=NULL + | IS NOT? kind=(TRUE | FALSE | UNKNOWN) + | IS NOT? kind=DISTINCT FROM right=valueExpression + ; + +valueExpression + : primaryExpression #valueExpressionDefault + | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary + | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary + | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary + | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary + | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary + | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary + | left=valueExpression comparisonOperator right=valueExpression #comparison + ; + +primaryExpression + : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER) #currentLike + | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase + | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase + | name=(CAST | TRY_CAST) '(' expression AS dataType ')' #cast + | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')' #struct + | FIRST '(' expression (IGNORE NULLS)? ')' #first + | LAST '(' expression (IGNORE NULLS)? ')' #last + | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position + | constant #constantDefault + | ASTERISK #star + | qualifiedName '.' ASTERISK #star + | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor + | '(' query ')' #subqueryExpression + | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' + (FILTER '(' WHERE where=booleanExpression ')')? + (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall + | identifier '->' expression #lambda + | '(' identifier (',' identifier)+ ')' '->' expression #lambda + | value=primaryExpression '[' index=valueExpression ']' #subscript + | identifier #columnReference + | base=primaryExpression '.' fieldName=identifier #dereference + | '(' expression ')' #parenthesizedExpression + | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract + | (SUBSTR | SUBSTRING) '(' str=valueExpression (FROM | ',') pos=valueExpression + ((FOR | ',') len=valueExpression)? ')' #substring + | TRIM '(' trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)? + FROM srcStr=valueExpression ')' #trim + | OVERLAY '(' input=valueExpression PLACING replace=valueExpression + FROM position=valueExpression (FOR length=valueExpression)? ')' #overlay + ; + +constant + : NULL #nullLiteral + | interval #intervalLiteral + | identifier STRING #typeConstructor + | number #numericLiteral + | booleanValue #booleanLiteral + | STRING+ #stringLiteral + ; + +comparisonOperator + : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ + ; + +arithmeticOperator + : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT + ; + +predicateOperator + : OR | AND | IN | NOT + ; + +booleanValue + : TRUE | FALSE + ; + +interval + : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? + ; + +errorCapturingMultiUnitsInterval + : body=multiUnitsInterval unitToUnitInterval? + ; + +multiUnitsInterval + : (intervalValue unit+=identifier)+ + ; + +errorCapturingUnitToUnitInterval + : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)? + ; + +unitToUnitInterval + : value=intervalValue from=identifier TO to=identifier + ; + +intervalValue + : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE | STRING) + ; + +colPosition + : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier + ; + +dataType + : complex=ARRAY '<' dataType '>' #complexDataType + | complex=MAP '<' dataType ',' dataType '>' #complexDataType + | complex=STRUCT ('<' complexColTypeList? '>' | NEQ) #complexDataType + | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType + | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) + (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType + | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + ; + +qualifiedColTypeWithPositionList + : qualifiedColTypeWithPosition (',' qualifiedColTypeWithPosition)* + ; + +qualifiedColTypeWithPosition + : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition? + ; + +colTypeList + : colType (',' colType)* + ; + +colType + : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? + ; + +complexColTypeList + : complexColType (',' complexColType)* + ; + +complexColType + : identifier ':'? dataType (NOT NULL)? commentSpec? + ; + +whenClause + : WHEN condition=expression THEN result=expression + ; + +windowClause + : WINDOW namedWindow (',' namedWindow)* + ; + +namedWindow + : name=errorCapturingIdentifier AS windowSpec + ; + +windowSpec + : name=errorCapturingIdentifier #windowRef + | '('name=errorCapturingIdentifier')' #windowRef + | '(' + ( CLUSTER BY partition+=expression (',' partition+=expression)* + | ((PARTITION | DISTRIBUTE) BY partition+=expression (',' partition+=expression)*)? + ((ORDER | SORT) BY sortItem (',' sortItem)*)?) + windowFrame? + ')' #windowDef + ; + +windowFrame + : frameType=RANGE start=frameBound + | frameType=ROWS start=frameBound + | frameType=RANGE BETWEEN start=frameBound AND end=frameBound + | frameType=ROWS BETWEEN start=frameBound AND end=frameBound + ; + +frameBound + : UNBOUNDED boundType=(PRECEDING | FOLLOWING) + | boundType=CURRENT ROW + | expression boundType=(PRECEDING | FOLLOWING) + ; + +qualifiedNameList + : qualifiedName (',' qualifiedName)* + ; + +functionName + : qualifiedName + | FILTER + | LEFT + | RIGHT + ; + +qualifiedName + : identifier ('.' identifier)* + ; + +// this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table` +// replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise +// valid expressions such as "a-b" can be recognized as an identifier +errorCapturingIdentifier + : identifier errorCapturingIdentifierExtra + ; + +// extra left-factoring grammar +errorCapturingIdentifierExtra + : (MINUS identifier)+ #errorIdent + | #realIdent + ; + +identifier + : strictIdentifier + | {!SQL_standard_keyword_behavior}? strictNonReserved + ; + +strictIdentifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier + | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier + ; + +quotedIdentifier + : BACKQUOTED_IDENTIFIER + ; + +number + : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral + | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral + | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral + | MINUS? INTEGER_VALUE #integerLiteral + | MINUS? BIGINT_LITERAL #bigIntLiteral + | MINUS? SMALLINT_LITERAL #smallIntLiteral + | MINUS? TINYINT_LITERAL #tinyIntLiteral + | MINUS? DOUBLE_LITERAL #doubleLiteral + | MINUS? FLOAT_LITERAL #floatLiteral + | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral + ; + +alterColumnAction + : TYPE dataType + | commentSpec + | colPosition + | setOrDrop=(SET | DROP) NOT NULL + ; + +// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. +// - Reserved keywords: +// Keywords that are reserved and can't be used as identifiers for table, view, column, +// function, alias, etc. +// - Non-reserved keywords: +// Keywords that have a special meaning only in particular contexts and can be used as +// identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN +// can be used as identifiers in other places. +// You can find the full keywords list by searching "Start of the keywords list" in this file. +// The non-reserved keywords are listed below. Keywords not in this list are reserved keywords. +ansiNonReserved +//--ANSI-NON-RESERVED-START + : ADD + | AFTER + | ALTER + | ANALYZE + | ANTI + | ARCHIVE + | ARRAY + | ASC + | AT + | BETWEEN + | BUCKET + | BUCKETS + | BY + | CACHE + | CASCADE + | CHANGE + | CLEAR + | CLUSTER + | CLUSTERED + | CODEGEN + | COLLECTION + | COLUMNS + | COMMENT + | COMMIT + | COMPACT + | COMPACTIONS + | COMPUTE + | CONCATENATE + | COST + | CUBE + | CURRENT + | DATA + | DATABASE + | DATABASES + | DAY + | DBPROPERTIES + | DEFINED + | DELETE + | DELIMITED + | DESC + | DESCRIBE + | DFS + | DIRECTORIES + | DIRECTORY + | DISTRIBUTE + | DIV + | DROP + | ESCAPED + | EXCHANGE + | EXISTS + | EXPLAIN + | EXPORT + | EXTENDED + | EXTERNAL + | EXTRACT + | FIELDS + | FILEFORMAT + | FIRST + | FOLLOWING + | FORMAT + | FORMATTED + | FUNCTION + | FUNCTIONS + | GLOBAL + | GROUPING + | HOUR + | IF + | IGNORE + | IMPORT + | INDEX + | INDEXES + | INPATH + | INPUTFORMAT + | INSERT + | INTERVAL + | ITEMS + | KEYS + | LAST + | LAZY + | LIKE + | LIMIT + | LINES + | LIST + | LOAD + | LOCAL + | LOCATION + | LOCK + | LOCKS + | LOGICAL + | MACRO + | MAP + | MATCHED + | MERGE + | MINUTE + | MONTH + | MSCK + | NAMESPACE + | NAMESPACES + | NO + | NULLS + | OF + | OPTION + | OPTIONS + | OUT + | OUTPUTFORMAT + | OVER + | OVERLAY + | OVERWRITE + | PARTITION + | PARTITIONED + | PARTITIONS + | PERCENTLIT + | PIVOT + | PLACING + | POSITION + | PRECEDING + | PRINCIPALS + | PROPERTIES + | PURGE + | QUERY + | RANGE + | RECORDREADER + | RECORDWRITER + | RECOVER + | REDUCE + | REFRESH + | RENAME + | REPAIR + | REPLACE + | RESET + | RESPECT + | RESTRICT + | REVOKE + | RLIKE + | ROLE + | ROLES + | ROLLBACK + | ROLLUP + | ROW + | ROWS + | SCHEMA + | SECOND + | SEMI + | SEPARATED + | SERDE + | SERDEPROPERTIES + | SET + | SETMINUS + | SETS + | SHOW + | SKEWED + | SORT + | SORTED + | START + | STATISTICS + | STORED + | STRATIFY + | STRUCT + | SUBSTR + | SUBSTRING + | SYNC + | TABLES + | TABLESAMPLE + | TBLPROPERTIES + | TEMPORARY + | TERMINATED + | TOUCH + | TRANSACTION + | TRANSACTIONS + | TRANSFORM + | TRIM + | TRUE + | TRUNCATE + | TRY_CAST + | TYPE + | UNARCHIVE + | UNBOUNDED + | UNCACHE + | UNLOCK + | UNSET + | UPDATE + | USE + | VALUES + | VIEW + | VIEWS + | WINDOW + | YEAR + | ZONE +//--ANSI-NON-RESERVED-END + ; + +// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL. +// - Non-reserved keywords: +// Same definition as the one when `SQL_standard_keyword_behavior=true`. +// - Strict-non-reserved keywords: +// A strict version of non-reserved keywords, which can not be used as table alias. +// You can find the full keywords list by searching "Start of the keywords list" in this file. +// The strict-non-reserved keywords are listed in `strictNonReserved`. +// The non-reserved keywords are listed in `nonReserved`. +// These 2 together contain all the keywords. +strictNonReserved + : ANTI + | CROSS + | EXCEPT + | FULL + | INNER + | INTERSECT + | JOIN + | LATERAL + | LEFT + | NATURAL + | ON + | RIGHT + | SEMI + | SETMINUS + | UNION + | USING + ; + +nonReserved +//--DEFAULT-NON-RESERVED-START + : ADD + | AFTER + | ALL + | ALTER + | ANALYZE + | AND + | ANY + | ARCHIVE + | ARRAY + | AS + | ASC + | AT + | AUTHORIZATION + | BETWEEN + | BOTH + | BUCKET + | BUCKETS + | BY + | CACHE + | CASCADE + | CASE + | CAST + | CHANGE + | CHECK + | CLEAR + | CLUSTER + | CLUSTERED + | CODEGEN + | COLLATE + | COLLECTION + | COLUMN + | COLUMNS + | COMMENT + | COMMIT + | COMPACT + | COMPACTIONS + | COMPUTE + | CONCATENATE + | CONSTRAINT + | COST + | CREATE + | CUBE + | CURRENT + | CURRENT_DATE + | CURRENT_TIME + | CURRENT_TIMESTAMP + | CURRENT_USER + | DATA + | DATABASE + | DATABASES + | DAY + | DBPROPERTIES + | DEFINED + | DELETE + | DELIMITED + | DESC + | DESCRIBE + | DFS + | DIRECTORIES + | DIRECTORY + | DISTINCT + | DISTRIBUTE + | DIV + | DROP + | ELSE + | END + | ESCAPE + | ESCAPED + | EXCHANGE + | EXISTS + | EXPLAIN + | EXPORT + | EXTENDED + | EXTERNAL + | EXTRACT + | FALSE + | FETCH + | FILTER + | FIELDS + | FILEFORMAT + | FIRST + | FOLLOWING + | FOR + | FOREIGN + | FORMAT + | FORMATTED + | FROM + | FUNCTION + | FUNCTIONS + | GLOBAL + | GRANT + | GROUP + | GROUPING + | HAVING + | HOUR + | IF + | IGNORE + | IMPORT + | IN + | INDEX + | INDEXES + | INPATH + | INPUTFORMAT + | INSERT + | INTERVAL + | INTO + | IS + | ITEMS + | KEYS + | LAST + | LAZY + | LEADING + | LIKE + | LIMIT + | LINES + | LIST + | LOAD + | LOCAL + | LOCATION + | LOCK + | LOCKS + | LOGICAL + | MACRO + | MAP + | MATCHED + | MERGE + | MINUTE + | MONTH + | MSCK + | NAMESPACE + | NAMESPACES + | NO + | NOT + | NULL + | NULLS + | OF + | ONLY + | OPTION + | OPTIONS + | OR + | ORDER + | OUT + | OUTER + | OUTPUTFORMAT + | OVER + | OVERLAPS + | OVERLAY + | OVERWRITE + | PARTITION + | PARTITIONED + | PARTITIONS + | PERCENTLIT + | PIVOT + | PLACING + | POSITION + | PRECEDING + | PRIMARY + | PRINCIPALS + | PROPERTIES + | PURGE + | QUERY + | RANGE + | RECORDREADER + | RECORDWRITER + | RECOVER + | REDUCE + | REFERENCES + | REFRESH + | RENAME + | REPAIR + | REPLACE + | RESET + | RESPECT + | RESTRICT + | REVOKE + | RLIKE + | ROLE + | ROLES + | ROLLBACK + | ROLLUP + | ROW + | ROWS + | SCHEMA + | SECOND + | SELECT + | SEPARATED + | SERDE + | SERDEPROPERTIES + | SESSION_USER + | SET + | SETS + | SHOW + | SKEWED + | SOME + | SORT + | SORTED + | START + | STATISTICS + | STORED + | STRATIFY + | STRUCT + | SUBSTR + | SUBSTRING + | SYNC + | TABLE + | TABLES + | TABLESAMPLE + | TBLPROPERTIES + | TEMPORARY + | TERMINATED + | THEN + | TIME + | TO + | TOUCH + | TRAILING + | TRANSACTION + | TRANSACTIONS + | TRANSFORM + | TRIM + | TRUE + | TRUNCATE + | TRY_CAST + | TYPE + | UNARCHIVE + | UNBOUNDED + | UNCACHE + | UNIQUE + | UNKNOWN + | UNLOCK + | UNSET + | UPDATE + | USE + | USER + | VALUES + | VIEW + | VIEWS + | WHEN + | WHERE + | WINDOW + | WITH + | YEAR + | ZONE + | SYSTEM_VERSION + | VERSION + | SYSTEM_TIME + | TIMESTAMP +//--DEFAULT-NON-RESERVED-END + ; + +// NOTE: If you add a new token in the list below, you should update the list of keywords +// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`. + +//============================ +// Start of the keywords list +//============================ +//--SPARK-KEYWORD-LIST-START +ADD: 'ADD'; +AFTER: 'AFTER'; +ALL: 'ALL'; +ALTER: 'ALTER'; +ANALYZE: 'ANALYZE'; +AND: 'AND'; +ANTI: 'ANTI'; +ANY: 'ANY'; +ARCHIVE: 'ARCHIVE'; +ARRAY: 'ARRAY'; +AS: 'AS'; +ASC: 'ASC'; +AT: 'AT'; +AUTHORIZATION: 'AUTHORIZATION'; +BETWEEN: 'BETWEEN'; +BOTH: 'BOTH'; +BUCKET: 'BUCKET'; +BUCKETS: 'BUCKETS'; +BY: 'BY'; +CACHE: 'CACHE'; +CASCADE: 'CASCADE'; +CASE: 'CASE'; +CAST: 'CAST'; +CHANGE: 'CHANGE'; +CHECK: 'CHECK'; +CLEAR: 'CLEAR'; +CLUSTER: 'CLUSTER'; +CLUSTERED: 'CLUSTERED'; +CODEGEN: 'CODEGEN'; +COLLATE: 'COLLATE'; +COLLECTION: 'COLLECTION'; +COLUMN: 'COLUMN'; +COLUMNS: 'COLUMNS'; +COMMENT: 'COMMENT'; +COMMIT: 'COMMIT'; +COMPACT: 'COMPACT'; +COMPACTIONS: 'COMPACTIONS'; +COMPUTE: 'COMPUTE'; +CONCATENATE: 'CONCATENATE'; +CONSTRAINT: 'CONSTRAINT'; +COST: 'COST'; +CREATE: 'CREATE'; +CROSS: 'CROSS'; +CUBE: 'CUBE'; +CURRENT: 'CURRENT'; +CURRENT_DATE: 'CURRENT_DATE'; +CURRENT_TIME: 'CURRENT_TIME'; +CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +CURRENT_USER: 'CURRENT_USER'; +DAY: 'DAY'; +DATA: 'DATA'; +DATABASE: 'DATABASE'; +DATABASES: 'DATABASES' | 'SCHEMAS'; +DBPROPERTIES: 'DBPROPERTIES'; +DEFINED: 'DEFINED'; +DELETE: 'DELETE'; +DELIMITED: 'DELIMITED'; +DESC: 'DESC'; +DESCRIBE: 'DESCRIBE'; +DFS: 'DFS'; +DIRECTORIES: 'DIRECTORIES'; +DIRECTORY: 'DIRECTORY'; +DISTINCT: 'DISTINCT'; +DISTRIBUTE: 'DISTRIBUTE'; +DIV: 'DIV'; +DROP: 'DROP'; +ELSE: 'ELSE'; +END: 'END'; +ESCAPE: 'ESCAPE'; +ESCAPED: 'ESCAPED'; +EXCEPT: 'EXCEPT'; +EXCHANGE: 'EXCHANGE'; +EXISTS: 'EXISTS'; +EXPLAIN: 'EXPLAIN'; +EXPORT: 'EXPORT'; +EXTENDED: 'EXTENDED'; +EXTERNAL: 'EXTERNAL'; +EXTRACT: 'EXTRACT'; +FALSE: 'FALSE'; +FETCH: 'FETCH'; +FIELDS: 'FIELDS'; +FILTER: 'FILTER'; +FILEFORMAT: 'FILEFORMAT'; +FIRST: 'FIRST'; +FOLLOWING: 'FOLLOWING'; +FOR: 'FOR'; +FOREIGN: 'FOREIGN'; +FORMAT: 'FORMAT'; +FORMATTED: 'FORMATTED'; +FROM: 'FROM'; +FULL: 'FULL'; +FUNCTION: 'FUNCTION'; +FUNCTIONS: 'FUNCTIONS'; +GLOBAL: 'GLOBAL'; +GRANT: 'GRANT'; +GROUP: 'GROUP'; +GROUPING: 'GROUPING'; +HAVING: 'HAVING'; +HOUR: 'HOUR'; +IF: 'IF'; +IGNORE: 'IGNORE'; +IMPORT: 'IMPORT'; +IN: 'IN'; +INDEX: 'INDEX'; +INDEXES: 'INDEXES'; +INNER: 'INNER'; +INPATH: 'INPATH'; +INPUTFORMAT: 'INPUTFORMAT'; +INSERT: 'INSERT'; +INTERSECT: 'INTERSECT'; +INTERVAL: 'INTERVAL'; +INTO: 'INTO'; +IS: 'IS'; +ITEMS: 'ITEMS'; +JOIN: 'JOIN'; +KEYS: 'KEYS'; +LAST: 'LAST'; +LATERAL: 'LATERAL'; +LAZY: 'LAZY'; +LEADING: 'LEADING'; +LEFT: 'LEFT'; +LIKE: 'LIKE'; +LIMIT: 'LIMIT'; +LINES: 'LINES'; +LIST: 'LIST'; +LOAD: 'LOAD'; +LOCAL: 'LOCAL'; +LOCATION: 'LOCATION'; +LOCK: 'LOCK'; +LOCKS: 'LOCKS'; +LOGICAL: 'LOGICAL'; +MACRO: 'MACRO'; +MAP: 'MAP'; +MATCHED: 'MATCHED'; +MERGE: 'MERGE'; +MINUTE: 'MINUTE'; +MONTH: 'MONTH'; +MSCK: 'MSCK'; +NAMESPACE: 'NAMESPACE'; +NAMESPACES: 'NAMESPACES'; +NATURAL: 'NATURAL'; +NO: 'NO'; +NOT: 'NOT' | '!'; +NULL: 'NULL'; +NULLS: 'NULLS'; +OF: 'OF'; +ON: 'ON'; +ONLY: 'ONLY'; +OPTION: 'OPTION'; +OPTIONS: 'OPTIONS'; +OR: 'OR'; +ORDER: 'ORDER'; +OUT: 'OUT'; +OUTER: 'OUTER'; +OUTPUTFORMAT: 'OUTPUTFORMAT'; +OVER: 'OVER'; +OVERLAPS: 'OVERLAPS'; +OVERLAY: 'OVERLAY'; +OVERWRITE: 'OVERWRITE'; +PARTITION: 'PARTITION'; +PARTITIONED: 'PARTITIONED'; +PARTITIONS: 'PARTITIONS'; +PERCENTLIT: 'PERCENT'; +PIVOT: 'PIVOT'; +PLACING: 'PLACING'; +POSITION: 'POSITION'; +PRECEDING: 'PRECEDING'; +PRIMARY: 'PRIMARY'; +PRINCIPALS: 'PRINCIPALS'; +PROPERTIES: 'PROPERTIES'; +PURGE: 'PURGE'; +QUERY: 'QUERY'; +RANGE: 'RANGE'; +RECORDREADER: 'RECORDREADER'; +RECORDWRITER: 'RECORDWRITER'; +RECOVER: 'RECOVER'; +REDUCE: 'REDUCE'; +REFERENCES: 'REFERENCES'; +REFRESH: 'REFRESH'; +RENAME: 'RENAME'; +REPAIR: 'REPAIR'; +REPLACE: 'REPLACE'; +RESET: 'RESET'; +RESPECT: 'RESPECT'; +RESTRICT: 'RESTRICT'; +REVOKE: 'REVOKE'; +RIGHT: 'RIGHT'; +RLIKE: 'RLIKE' | 'REGEXP'; +ROLE: 'ROLE'; +ROLES: 'ROLES'; +ROLLBACK: 'ROLLBACK'; +ROLLUP: 'ROLLUP'; +ROW: 'ROW'; +ROWS: 'ROWS'; +SECOND: 'SECOND'; +SCHEMA: 'SCHEMA'; +SELECT: 'SELECT'; +SEMI: 'SEMI'; +SEPARATED: 'SEPARATED'; +SERDE: 'SERDE'; +SERDEPROPERTIES: 'SERDEPROPERTIES'; +SESSION_USER: 'SESSION_USER'; +SET: 'SET'; +SETMINUS: 'MINUS'; +SETS: 'SETS'; +SHOW: 'SHOW'; +SKEWED: 'SKEWED'; +SOME: 'SOME'; +SORT: 'SORT'; +SORTED: 'SORTED'; +START: 'START'; +STATISTICS: 'STATISTICS'; +STORED: 'STORED'; +STRATIFY: 'STRATIFY'; +STRUCT: 'STRUCT'; +SUBSTR: 'SUBSTR'; +SUBSTRING: 'SUBSTRING'; +SYNC: 'SYNC'; +TABLE: 'TABLE'; +TABLES: 'TABLES'; +TABLESAMPLE: 'TABLESAMPLE'; +TBLPROPERTIES: 'TBLPROPERTIES'; +TEMPORARY: 'TEMPORARY' | 'TEMP'; +TERMINATED: 'TERMINATED'; +THEN: 'THEN'; +TIME: 'TIME'; +TO: 'TO'; +TOUCH: 'TOUCH'; +TRAILING: 'TRAILING'; +TRANSACTION: 'TRANSACTION'; +TRANSACTIONS: 'TRANSACTIONS'; +TRANSFORM: 'TRANSFORM'; +TRIM: 'TRIM'; +TRUE: 'TRUE'; +TRUNCATE: 'TRUNCATE'; +TRY_CAST: 'TRY_CAST'; +TYPE: 'TYPE'; +UNARCHIVE: 'UNARCHIVE'; +UNBOUNDED: 'UNBOUNDED'; +UNCACHE: 'UNCACHE'; +UNION: 'UNION'; +UNIQUE: 'UNIQUE'; +UNKNOWN: 'UNKNOWN'; +UNLOCK: 'UNLOCK'; +UNSET: 'UNSET'; +UPDATE: 'UPDATE'; +USE: 'USE'; +USER: 'USER'; +USING: 'USING'; +VALUES: 'VALUES'; +VIEW: 'VIEW'; +VIEWS: 'VIEWS'; +WHEN: 'WHEN'; +WHERE: 'WHERE'; +WINDOW: 'WINDOW'; +WITH: 'WITH'; +YEAR: 'YEAR'; +ZONE: 'ZONE'; + +SYSTEM_VERSION: 'SYSTEM_VERSION'; +VERSION: 'VERSION'; +SYSTEM_TIME: 'SYSTEM_TIME'; +TIMESTAMP: 'TIMESTAMP'; +//--SPARK-KEYWORD-LIST-END +//============================ +// End of the keywords list +//============================ +LEFT_PAREN: '('; +RIGHT_PAREN: ')'; +COMMA: ','; +DOT: '.'; + +EQ : '=' | '=='; +NSEQ: '<=>'; +NEQ : '<>'; +NEQJ: '!='; +LT : '<'; +LTE : '<=' | '!>'; +GT : '>'; +GTE : '>=' | '!<'; + +PLUS: '+'; +MINUS: '-'; +ASTERISK: '*'; +SLASH: '/'; +PERCENT: '%'; +TILDE: '~'; +AMPERSAND: '&'; +PIPE: '|'; +CONCAT_PIPE: '||'; +HAT: '^'; + +STRING + : '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '"' ( ~('"'|'\\') | ('\\' .) )* '"' + ; + +BIGINT_LITERAL + : DIGIT+ 'L' + ; + +SMALLINT_LITERAL + : DIGIT+ 'S' + ; + +TINYINT_LITERAL + : DIGIT+ 'Y' + ; + +INTEGER_VALUE + : DIGIT+ + ; + +EXPONENT_VALUE + : DIGIT+ EXPONENT + | DECIMAL_DIGITS EXPONENT {isValidDecimal()}? + ; + +DECIMAL_VALUE + : DECIMAL_DIGITS {isValidDecimal()}? + ; + +FLOAT_LITERAL + : DIGIT+ EXPONENT? 'F' + | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}? + ; + +DOUBLE_LITERAL + : DIGIT+ EXPONENT? 'D' + | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}? + ; + +BIGDECIMAL_LITERAL + : DIGIT+ EXPONENT? 'BD' + | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? + ; + +IDENTIFIER + : (LETTER | DIGIT | '_')+ + ; + +BACKQUOTED_IDENTIFIER + : '`' ( ~'`' | '``' )* '`' + ; + +fragment DECIMAL_DIGITS + : DIGIT+ '.' DIGIT* + | '.' DIGIT+ + ; + +fragment EXPONENT + : 'E' [+-]? DIGIT+ + ; + +fragment DIGIT + : [0-9] + ; + +fragment LETTER + : [A-Z] + ; + +SIMPLE_COMMENT + : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) + ; + +BRACKETED_COMMENT + : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + ; + +WS + : [ \r\n\t]+ -> channel(HIDDEN) + ; + +// Catch-all for anything we can't recognize. +// We use this to be able to ignore and recover all the text +// when splitting statements with DelimiterLexer +UNRECOGNIZED + : . + ; diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 b/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 new file mode 100644 index 000000000000..ddbecfefc760 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlBase.g4 @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +grammar HoodieSqlBase; + +import SqlBase; + +singleStatement + : statement EOF + ; + +statement + : query #queryStatement + | ctes? dmlStatementNoWith #dmlStatement + | createTableHeader ('(' colTypeList ')')? tableProvider? + createTableClauses + (AS? query)? #createTable + | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE? + tableIdentifier (USING indexType=identifier)? + LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN + (OPTIONS indexOptions=propertyList)? #createIndex + | DROP INDEX (IF EXISTS)? identifier ON TABLE? tableIdentifier #dropIndex + | SHOW INDEXES (FROM | IN) TABLE? tableIdentifier #showIndexes + | REFRESH INDEX identifier ON TABLE? tableIdentifier #refreshIndex + | .*? #passThrough + ; diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/hudi-spark-datasource/hudi-spark3.5.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 000000000000..c8dd99a95c27 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1,19 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +org.apache.hudi.Spark32PlusDefaultSource \ No newline at end of file diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/hudi/Spark35HoodieFileScanRDD.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/hudi/Spark35HoodieFileScanRDD.scala new file mode 100644 index 000000000000..9ab3c04605d5 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/hudi/Spark35HoodieFileScanRDD.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile} +import org.apache.spark.sql.types.StructType + +class Spark35HoodieFileScanRDD(@transient private val sparkSession: SparkSession, + read: PartitionedFile => Iterator[InternalRow], + @transient filePartitions: Seq[FilePartition], + readDataSchema: StructType, + metadataColumns: Seq[AttributeReference] = Seq.empty) + extends FileScanRDD(sparkSession, read, filePartitions, readDataSchema, metadataColumns) + with HoodieUnsafeRDD { + + override final def collect(): Array[InternalRow] = super[HoodieUnsafeRDD].collect() +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalogUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalogUtils.scala new file mode 100644 index 000000000000..b97f94e7de07 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalogUtils.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.connector.expressions.{BucketTransform, NamedReference, Transform} + +object HoodieSpark35CatalogUtils extends HoodieSpark3CatalogUtils { + + override def unapplyBucketTransform(t: Transform): Option[(Int, Seq[NamedReference], Seq[NamedReference])] = + t match { + case BucketTransform(numBuckets, refs, sortedRefs) => Some(numBuckets, refs, sortedRefs) + case _ => None + } + +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystExpressionUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystExpressionUtils.scala new file mode 100644 index 000000000000..ae4803dc8b91 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystExpressionUtils.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.HoodieSparkTypeUtils.isCastPreservingOrdering +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.{Add, Attribute, AttributeReference, AttributeSet, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, EvalMode, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, PredicateHelper, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper} +import org.apache.spark.sql.execution.datasources.DataSourceStrategy +import org.apache.spark.sql.types.{DataType, StructType} + +object HoodieSpark35CatalystExpressionUtils extends HoodieSpark3CatalystExpressionUtils with PredicateHelper { + + override def getEncoder(schema: StructType): ExpressionEncoder[Row] = { + ExpressionEncoder.apply(schema).resolveAndBind() + } + + override def normalizeExprs(exprs: Seq[Expression], attributes: Seq[Attribute]): Seq[Expression] = { + DataSourceStrategy.normalizeExprs(exprs, attributes) + } + + override def extractPredicatesWithinOutputSet(condition: Expression, outputSet: AttributeSet): Option[Expression] = { + super[PredicateHelper].extractPredicatesWithinOutputSet(condition, outputSet) + } + + override def matchCast(expr: Expression): Option[(Expression, DataType, Option[String])] = { + expr match { + case Cast(child, dataType, timeZoneId, _) => Some((child, dataType, timeZoneId)) + case _ => None + } + } + + override def tryMatchAttributeOrderingPreservingTransformation(expr: Expression): Option[AttributeReference] = { + expr match { + case OrderPreservingTransformation(attrRef) => Some(attrRef) + case _ => None + } + } + + def canUpCast(fromType: DataType, toType: DataType): Boolean = + Cast.canUpCast(fromType, toType) + + override def unapplyCastExpression(expr: Expression): Option[(Expression, DataType, Option[String], Boolean)] = + expr match { + case Cast(castedExpr, dataType, timeZoneId, ansiEnabled) => + Some((castedExpr, dataType, timeZoneId, if (ansiEnabled == EvalMode.ANSI) true else false)) + case _ => None + } + + private object OrderPreservingTransformation { + def unapply(expr: Expression): Option[AttributeReference] = { + expr match { + // Date/Time Expressions + case DateFormatClass(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef) + case DateAdd(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case DateSub(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case DateDiff(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case DateDiff(_, OrderPreservingTransformation(attrRef)) => Some(attrRef) + case FromUnixTime(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef) + case FromUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case ParseToDate(OrderPreservingTransformation(attrRef), _, _, _) => Some(attrRef) + case ParseToTimestamp(OrderPreservingTransformation(attrRef), _, _, _, _) => Some(attrRef) + case ToUnixTimestamp(OrderPreservingTransformation(attrRef), _, _, _) => Some(attrRef) + case ToUTCTimestamp(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + + // String Expressions + case Lower(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case Upper(OrderPreservingTransformation(attrRef)) => Some(attrRef) + // Left API change: Improve RuntimeReplaceable + // https://issues.apache.org/jira/browse/SPARK-38240 + case org.apache.spark.sql.catalyst.expressions.Left(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + + // Math Expressions + // Binary + case Add(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef) + case Add(_, OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case Multiply(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef) + case Multiply(_, OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case Divide(OrderPreservingTransformation(attrRef), _, _) => Some(attrRef) + case BitwiseOr(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case BitwiseOr(_, OrderPreservingTransformation(attrRef)) => Some(attrRef) + // Unary + case Exp(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case Expm1(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case Log(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case Log10(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case Log1p(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case Log2(OrderPreservingTransformation(attrRef)) => Some(attrRef) + case ShiftLeft(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + case ShiftRight(OrderPreservingTransformation(attrRef), _) => Some(attrRef) + + // Other + case cast @ Cast(OrderPreservingTransformation(attrRef), _, _, _) + if isCastPreservingOrdering(cast.child.dataType, cast.dataType) => Some(attrRef) + + // Identity transformation + case attrRef: AttributeReference => Some(attrRef) + // No match + case _ => None + } + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala new file mode 100644 index 000000000000..1b4b86c4e421 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.hudi.SparkHoodieTableFileIndex + +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.analysis.{AnalysisErrorAt, ResolvedTable} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, ProjectionOverSchema} +import org.apache.spark.sql.catalyst.planning.ScanOperation +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MergeIntoTable, Project} +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} +import org.apache.spark.sql.execution.command.RepairTableCommand +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} +import org.apache.spark.sql.execution.datasources.parquet.NewHoodieParquetFileFormat +import org.apache.spark.sql.types.StructType + +object HoodieSpark35CatalystPlanUtils extends HoodieSpark3CatalystPlanUtils { + + def unapplyResolvedTable(plan: LogicalPlan): Option[(TableCatalog, Identifier, Table)] = + plan match { + case ResolvedTable(catalog, identifier, table, _) => Some((catalog, identifier, table)) + case _ => None + } + + override def unapplyMergeIntoTable(plan: LogicalPlan): Option[(LogicalPlan, LogicalPlan, Expression)] = { + plan match { + case MergeIntoTable(targetTable, sourceTable, mergeCondition, _, _, _) => + Some((targetTable, sourceTable, mergeCondition)) + case _ => None + } + } + + override def applyNewHoodieParquetFileFormatProjection(plan: LogicalPlan): LogicalPlan = { + plan match { + case s@ScanOperation(_, _, _, + l@LogicalRelation(fs: HadoopFsRelation, _, _, _)) if fs.fileFormat.isInstanceOf[NewHoodieParquetFileFormat] && !fs.fileFormat.asInstanceOf[NewHoodieParquetFileFormat].isProjected => + fs.fileFormat.asInstanceOf[NewHoodieParquetFileFormat].isProjected = true + Project(l.resolve(fs.location.asInstanceOf[SparkHoodieTableFileIndex].schema, fs.sparkSession.sessionState.analyzer.resolver), s) + case _ => plan + } + } + + override def projectOverSchema(schema: StructType, output: AttributeSet): ProjectionOverSchema = + ProjectionOverSchema(schema, output) + + override def isRepairTable(plan: LogicalPlan): Boolean = { + plan.isInstanceOf[RepairTableCommand] + } + + override def getRepairTableChildren(plan: LogicalPlan): Option[(TableIdentifier, Boolean, Boolean, String)] = { + plan match { + case rtc: RepairTableCommand => + Some((rtc.tableName, rtc.enableAddPartitions, rtc.enableDropPartitions, rtc.cmd)) + case _ => + None + } + } + + override def failAnalysisForMIT(a: Attribute, cols: String): Unit = { + a.failAnalysis( + errorClass = "_LEGACY_ERROR_TEMP_2309", + messageParameters = Map( + "sqlExpr" -> a.sql, + "cols" -> cols)) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35SchemaUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35SchemaUtils.scala new file mode 100644 index 000000000000..8c657d91fb03 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35SchemaUtils.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.SchemaUtils + +/** + * Utils on schema for Spark 3.4+. + */ +object HoodieSpark35SchemaUtils extends HoodieSchemaUtils { + override def checkColumnNameDuplication(columnNames: Seq[String], + colType: String, + caseSensitiveAnalysis: Boolean): Unit = { + SchemaUtils.checkColumnNameDuplication(columnNames, caseSensitiveAnalysis) + } + + override def toAttributes(struct: StructType): Seq[Attribute] = { + DataTypeUtils.toAttributes(struct) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala new file mode 100644 index 000000000000..12beba9ba322 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.adapter + +import org.apache.avro.Schema +import org.apache.hadoop.fs.FileStatus +import org.apache.hudi.Spark35HoodieFileScanRDD +import org.apache.spark.sql.avro._ +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.parser.ParserInterface +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.catalyst.util.METADATA_COL_ATTR_KEY +import org.apache.spark.sql.connector.catalog.V2TableWithV1Fallback +import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, Spark35LegacyHoodieParquetFileFormat} +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.datasources._ +import org.apache.spark.sql.hudi.analysis.TableValuedFunctions +import org.apache.spark.sql.parser.{HoodieExtendedParserInterface, HoodieSpark3_5ExtendedSqlParser} +import org.apache.spark.sql.types.{DataType, Metadata, MetadataBuilder, StructType} +import org.apache.spark.sql.vectorized.ColumnarBatchRow +import org.apache.spark.sql._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.storage.StorageLevel._ + +/** + * Implementation of [[SparkAdapter]] for Spark 3.5.x branch + */ +class Spark3_5Adapter extends BaseSpark3Adapter { + + override def resolveHoodieTable(plan: LogicalPlan): Option[CatalogTable] = { + super.resolveHoodieTable(plan).orElse { + EliminateSubqueryAliases(plan) match { + // First, we need to weed out unresolved plans + case plan if !plan.resolved => None + // NOTE: When resolving Hudi table we allow [[Filter]]s and [[Project]]s be applied + // on top of it + case PhysicalOperation(_, _, DataSourceV2Relation(v2: V2TableWithV1Fallback, _, _, _, _)) if isHoodieTable(v2.v1Table) => + Some(v2.v1Table) + case _ => None + } + } + } + + override def isColumnarBatchRow(r: InternalRow): Boolean = r.isInstanceOf[ColumnarBatchRow] + + def createCatalystMetadataForMetaField: Metadata = + new MetadataBuilder() + .putBoolean(METADATA_COL_ATTR_KEY, value = true) + .build() + + override def getCatalogUtils: HoodieSpark3CatalogUtils = HoodieSpark35CatalogUtils + + override def getCatalystExpressionUtils: HoodieCatalystExpressionUtils = HoodieSpark35CatalystExpressionUtils + + override def getCatalystPlanUtils: HoodieCatalystPlansUtils = HoodieSpark35CatalystPlanUtils + + override def getSchemaUtils: HoodieSchemaUtils = HoodieSpark35SchemaUtils + + override def getSparkPartitionedFileUtils: HoodieSparkPartitionedFileUtils = HoodieSpark35PartitionedFileUtils + + override def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializer = + new HoodieSpark3_5AvroSerializer(rootCatalystType, rootAvroType, nullable) + + override def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializer = + new HoodieSpark3_5AvroDeserializer(rootAvroType, rootCatalystType) + + override def createExtendedSparkParser(spark: SparkSession, delegate: ParserInterface): HoodieExtendedParserInterface = + new HoodieSpark3_5ExtendedSqlParser(spark, delegate) + + override def createLegacyHoodieParquetFileFormat(appendPartitionValues: Boolean): Option[ParquetFileFormat] = { + Some(new Spark35LegacyHoodieParquetFileFormat(appendPartitionValues)) + } + + override def createHoodieFileScanRDD(sparkSession: SparkSession, + readFunction: PartitionedFile => Iterator[InternalRow], + filePartitions: Seq[FilePartition], + readDataSchema: StructType, + metadataColumns: Seq[AttributeReference] = Seq.empty): FileScanRDD = { + new Spark35HoodieFileScanRDD(sparkSession, readFunction, filePartitions, readDataSchema, metadataColumns) + } + + override def extractDeleteCondition(deleteFromTable: Command): Expression = { + deleteFromTable.asInstanceOf[DeleteFromTable].condition + } + + override def injectTableFunctions(extensions: SparkSessionExtensions): Unit = { + TableValuedFunctions.funcs.foreach(extensions.injectTableFunction) + } + + /** + * Converts instance of [[StorageLevel]] to a corresponding string + */ + override def convertStorageLevelToString(level: StorageLevel): String = level match { + case NONE => "NONE" + case DISK_ONLY => "DISK_ONLY" + case DISK_ONLY_2 => "DISK_ONLY_2" + case DISK_ONLY_3 => "DISK_ONLY_3" + case MEMORY_ONLY => "MEMORY_ONLY" + case MEMORY_ONLY_2 => "MEMORY_ONLY_2" + case MEMORY_ONLY_SER => "MEMORY_ONLY_SER" + case MEMORY_ONLY_SER_2 => "MEMORY_ONLY_SER_2" + case MEMORY_AND_DISK => "MEMORY_AND_DISK" + case MEMORY_AND_DISK_2 => "MEMORY_AND_DISK_2" + case MEMORY_AND_DISK_SER => "MEMORY_AND_DISK_SER" + case MEMORY_AND_DISK_SER_2 => "MEMORY_AND_DISK_SER_2" + case OFF_HEAP => "OFF_HEAP" + case _ => throw new IllegalArgumentException(s"Invalid StorageLevel: $level") + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala new file mode 100644 index 000000000000..583e2da0e65a --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala @@ -0,0 +1,495 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.avro + +import java.math.BigDecimal +import java.nio.ByteBuffer +import scala.collection.JavaConverters._ +import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder} +import org.apache.avro.Conversions.DecimalConversion +import org.apache.avro.LogicalTypes.{LocalTimestampMicros, LocalTimestampMillis, TimestampMicros, TimestampMillis} +import org.apache.avro.Schema.Type._ +import org.apache.avro.generic._ +import org.apache.avro.util.Utf8 +import org.apache.spark.sql.avro.AvroDeserializer.{RebaseSpec, createDateRebaseFuncInRead, createTimestampRebaseFuncInRead} +import org.apache.spark.sql.avro.AvroUtils.{AvroMatchedField, toFieldStr} +import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, StructFilters} +import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData, RebaseDateTime} +import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY +import org.apache.spark.sql.execution.datasources.DataSourceUtils +import org.apache.spark.sql.internal.LegacyBehaviorPolicy +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +import java.util.TimeZone + +/** + * A deserializer to deserialize data in avro format to data in catalyst format. + * + * NOTE: This code is borrowed from Spark 3.3.0 + * This code is borrowed, so that we can better control compatibility w/in Spark minor + * branches (3.2.x, 3.1.x, etc) + * + * PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY + */ +private[sql] class AvroDeserializer(rootAvroType: Schema, + rootCatalystType: DataType, + positionalFieldMatch: Boolean, + datetimeRebaseSpec: RebaseSpec, + filters: StructFilters) { + + def this(rootAvroType: Schema, + rootCatalystType: DataType, + datetimeRebaseMode: String) = { + this( + rootAvroType, + rootCatalystType, + positionalFieldMatch = false, + RebaseSpec(LegacyBehaviorPolicy.withName(datetimeRebaseMode)), + new NoopFilters) + } + + private lazy val decimalConversions = new DecimalConversion() + + private val dateRebaseFunc = createDateRebaseFuncInRead(datetimeRebaseSpec.mode, "Avro") + + private val timestampRebaseFunc = createTimestampRebaseFuncInRead(datetimeRebaseSpec, "Avro") + + private val converter: Any => Option[Any] = try { + rootCatalystType match { + // A shortcut for empty schema. + case st: StructType if st.isEmpty => + (_: Any) => Some(InternalRow.empty) + + case st: StructType => + val resultRow = new SpecificInternalRow(st.map(_.dataType)) + val fieldUpdater = new RowUpdater(resultRow) + val applyFilters = filters.skipRow(resultRow, _) + val writer = getRecordWriter(rootAvroType, st, Nil, Nil, applyFilters) + (data: Any) => { + val record = data.asInstanceOf[GenericRecord] + val skipRow = writer(fieldUpdater, record) + if (skipRow) None else Some(resultRow) + } + + case _ => + val tmpRow = new SpecificInternalRow(Seq(rootCatalystType)) + val fieldUpdater = new RowUpdater(tmpRow) + val writer = newWriter(rootAvroType, rootCatalystType, Nil, Nil) + (data: Any) => { + writer(fieldUpdater, 0, data) + Some(tmpRow.get(0, rootCatalystType)) + } + } + } catch { + case ise: IncompatibleSchemaException => throw new IncompatibleSchemaException( + s"Cannot convert Avro type $rootAvroType to SQL type ${rootCatalystType.sql}.", ise) + } + + def deserialize(data: Any): Option[Any] = converter(data) + + /** + * Creates a writer to write avro values to Catalyst values at the given ordinal with the given + * updater. + */ + private def newWriter(avroType: Schema, + catalystType: DataType, + avroPath: Seq[String], + catalystPath: Seq[String]): (CatalystDataUpdater, Int, Any) => Unit = { + val errorPrefix = s"Cannot convert Avro ${toFieldStr(avroPath)} to " + + s"SQL ${toFieldStr(catalystPath)} because " + val incompatibleMsg = errorPrefix + + s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})" + + (avroType.getType, catalystType) match { + case (NULL, NullType) => (updater, ordinal, _) => + updater.setNullAt(ordinal) + + // TODO: we can avoid boxing if future version of avro provide primitive accessors. + case (BOOLEAN, BooleanType) => (updater, ordinal, value) => + updater.setBoolean(ordinal, value.asInstanceOf[Boolean]) + + case (INT, IntegerType) => (updater, ordinal, value) => + updater.setInt(ordinal, value.asInstanceOf[Int]) + + case (INT, DateType) => (updater, ordinal, value) => + updater.setInt(ordinal, dateRebaseFunc(value.asInstanceOf[Int])) + + case (LONG, LongType) => (updater, ordinal, value) => + updater.setLong(ordinal, value.asInstanceOf[Long]) + + case (LONG, TimestampType) => avroType.getLogicalType match { + // For backward compatibility, if the Avro type is Long and it is not logical type + // (the `null` case), the value is processed as timestamp type with millisecond precision. + case null | _: TimestampMillis => (updater, ordinal, value) => + val millis = value.asInstanceOf[Long] + val micros = DateTimeUtils.millisToMicros(millis) + updater.setLong(ordinal, timestampRebaseFunc(micros)) + case _: TimestampMicros => (updater, ordinal, value) => + val micros = value.asInstanceOf[Long] + updater.setLong(ordinal, timestampRebaseFunc(micros)) + case other => throw new IncompatibleSchemaException(errorPrefix + + s"Avro logical type $other cannot be converted to SQL type ${TimestampType.sql}.") + } + + case (LONG, TimestampNTZType) => avroType.getLogicalType match { + // To keep consistent with TimestampType, if the Avro type is Long and it is not + // logical type (the `null` case), the value is processed as TimestampNTZ + // with millisecond precision. + case null | _: LocalTimestampMillis => (updater, ordinal, value) => + val millis = value.asInstanceOf[Long] + val micros = DateTimeUtils.millisToMicros(millis) + updater.setLong(ordinal, micros) + case _: LocalTimestampMicros => (updater, ordinal, value) => + val micros = value.asInstanceOf[Long] + updater.setLong(ordinal, micros) + case other => throw new IncompatibleSchemaException(errorPrefix + + s"Avro logical type $other cannot be converted to SQL type ${TimestampNTZType.sql}.") + } + + // Before we upgrade Avro to 1.8 for logical type support, spark-avro converts Long to Date. + // For backward compatibility, we still keep this conversion. + case (LONG, DateType) => (updater, ordinal, value) => + updater.setInt(ordinal, (value.asInstanceOf[Long] / MILLIS_PER_DAY).toInt) + + case (FLOAT, FloatType) => (updater, ordinal, value) => + updater.setFloat(ordinal, value.asInstanceOf[Float]) + + case (DOUBLE, DoubleType) => (updater, ordinal, value) => + updater.setDouble(ordinal, value.asInstanceOf[Double]) + + case (STRING, StringType) => (updater, ordinal, value) => + val str = value match { + case s: String => UTF8String.fromString(s) + case s: Utf8 => + val bytes = new Array[Byte](s.getByteLength) + System.arraycopy(s.getBytes, 0, bytes, 0, s.getByteLength) + UTF8String.fromBytes(bytes) + case s: GenericData.EnumSymbol => UTF8String.fromString(s.toString) + } + updater.set(ordinal, str) + + case (ENUM, StringType) => (updater, ordinal, value) => + updater.set(ordinal, UTF8String.fromString(value.toString)) + + case (FIXED, BinaryType) => (updater, ordinal, value) => + updater.set(ordinal, value.asInstanceOf[GenericFixed].bytes().clone()) + + case (BYTES, BinaryType) => (updater, ordinal, value) => + val bytes = value match { + case b: ByteBuffer => + val bytes = new Array[Byte](b.remaining) + b.get(bytes) + // Do not forget to reset the position + b.rewind() + bytes + case b: Array[Byte] => b + case other => + throw new RuntimeException(errorPrefix + s"$other is not a valid avro binary.") + } + updater.set(ordinal, bytes) + + case (FIXED, _: DecimalType) => (updater, ordinal, value) => + val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal] + val bigDecimal = decimalConversions.fromFixed(value.asInstanceOf[GenericFixed], avroType, d) + val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale) + updater.setDecimal(ordinal, decimal) + + case (BYTES, _: DecimalType) => (updater, ordinal, value) => + val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal] + val bigDecimal = decimalConversions.fromBytes(value.asInstanceOf[ByteBuffer], avroType, d) + val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale) + updater.setDecimal(ordinal, decimal) + + case (RECORD, st: StructType) => + // Avro datasource doesn't accept filters with nested attributes. See SPARK-32328. + // We can always return `false` from `applyFilters` for nested records. + val writeRecord = + getRecordWriter(avroType, st, avroPath, catalystPath, applyFilters = _ => false) + (updater, ordinal, value) => + val row = new SpecificInternalRow(st) + writeRecord(new RowUpdater(row), value.asInstanceOf[GenericRecord]) + updater.set(ordinal, row) + + case (ARRAY, ArrayType(elementType, containsNull)) => + val avroElementPath = avroPath :+ "element" + val elementWriter = newWriter(avroType.getElementType, elementType, + avroElementPath, catalystPath :+ "element") + (updater, ordinal, value) => + val collection = value.asInstanceOf[java.util.Collection[Any]] + val result = createArrayData(elementType, collection.size()) + val elementUpdater = new ArrayDataUpdater(result) + + var i = 0 + val iter = collection.iterator() + while (iter.hasNext) { + val element = iter.next() + if (element == null) { + if (!containsNull) { + throw new RuntimeException( + s"Array value at path ${toFieldStr(avroElementPath)} is not allowed to be null") + } else { + elementUpdater.setNullAt(i) + } + } else { + elementWriter(elementUpdater, i, element) + } + i += 1 + } + + updater.set(ordinal, result) + + case (MAP, MapType(keyType, valueType, valueContainsNull)) if keyType == StringType => + val keyWriter = newWriter(SchemaBuilder.builder().stringType(), StringType, + avroPath :+ "key", catalystPath :+ "key") + val valueWriter = newWriter(avroType.getValueType, valueType, + avroPath :+ "value", catalystPath :+ "value") + (updater, ordinal, value) => + val map = value.asInstanceOf[java.util.Map[AnyRef, AnyRef]] + val keyArray = createArrayData(keyType, map.size()) + val keyUpdater = new ArrayDataUpdater(keyArray) + val valueArray = createArrayData(valueType, map.size()) + val valueUpdater = new ArrayDataUpdater(valueArray) + val iter = map.entrySet().iterator() + var i = 0 + while (iter.hasNext) { + val entry = iter.next() + assert(entry.getKey != null) + keyWriter(keyUpdater, i, entry.getKey) + if (entry.getValue == null) { + if (!valueContainsNull) { + throw new RuntimeException( + s"Map value at path ${toFieldStr(avroPath :+ "value")} is not allowed to be null") + } else { + valueUpdater.setNullAt(i) + } + } else { + valueWriter(valueUpdater, i, entry.getValue) + } + i += 1 + } + + // The Avro map will never have null or duplicated map keys, it's safe to create a + // ArrayBasedMapData directly here. + updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray)) + + case (UNION, _) => + val allTypes = avroType.getTypes.asScala + val nonNullTypes = allTypes.filter(_.getType != NULL) + val nonNullAvroType = Schema.createUnion(nonNullTypes.asJava) + if (nonNullTypes.nonEmpty) { + if (nonNullTypes.length == 1) { + newWriter(nonNullTypes.head, catalystType, avroPath, catalystPath) + } else { + nonNullTypes.map(_.getType).toSeq match { + case Seq(a, b) if Set(a, b) == Set(INT, LONG) && catalystType == LongType => + (updater, ordinal, value) => value match { + case null => updater.setNullAt(ordinal) + case l: java.lang.Long => updater.setLong(ordinal, l) + case i: java.lang.Integer => updater.setLong(ordinal, i.longValue()) + } + + case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && catalystType == DoubleType => + (updater, ordinal, value) => value match { + case null => updater.setNullAt(ordinal) + case d: java.lang.Double => updater.setDouble(ordinal, d) + case f: java.lang.Float => updater.setDouble(ordinal, f.doubleValue()) + } + + case _ => + catalystType match { + case st: StructType if st.length == nonNullTypes.size => + val fieldWriters = nonNullTypes.zip(st.fields).map { + case (schema, field) => + newWriter(schema, field.dataType, avroPath, catalystPath :+ field.name) + }.toArray + (updater, ordinal, value) => { + val row = new SpecificInternalRow(st) + val fieldUpdater = new RowUpdater(row) + val i = GenericData.get().resolveUnion(nonNullAvroType, value) + fieldWriters(i)(fieldUpdater, i, value) + updater.set(ordinal, row) + } + + case _ => throw new IncompatibleSchemaException(incompatibleMsg) + } + } + } + } else { + (updater, ordinal, _) => updater.setNullAt(ordinal) + } + + case (INT, _: YearMonthIntervalType) => (updater, ordinal, value) => + updater.setInt(ordinal, value.asInstanceOf[Int]) + + case (LONG, _: DayTimeIntervalType) => (updater, ordinal, value) => + updater.setLong(ordinal, value.asInstanceOf[Long]) + + case _ => throw new IncompatibleSchemaException(incompatibleMsg) + } + } + + // TODO: move the following method in Decimal object on creating Decimal from BigDecimal? + private def createDecimal(decimal: BigDecimal, precision: Int, scale: Int): Decimal = { + if (precision <= Decimal.MAX_LONG_DIGITS) { + // Constructs a `Decimal` with an unscaled `Long` value if possible. + Decimal(decimal.unscaledValue().longValue(), precision, scale) + } else { + // Otherwise, resorts to an unscaled `BigInteger` instead. + Decimal(decimal, precision, scale) + } + } + + private def getRecordWriter( + avroType: Schema, + catalystType: StructType, + avroPath: Seq[String], + catalystPath: Seq[String], + applyFilters: Int => Boolean): (CatalystDataUpdater, GenericRecord) => Boolean = { + + val avroSchemaHelper = new AvroUtils.AvroSchemaHelper( + avroType, catalystType, avroPath, catalystPath, positionalFieldMatch) + + avroSchemaHelper.validateNoExtraCatalystFields(ignoreNullable = true) + // no need to validateNoExtraAvroFields since extra Avro fields are ignored + + val (validFieldIndexes, fieldWriters) = avroSchemaHelper.matchedFields.map { + case AvroMatchedField(catalystField, ordinal, avroField) => + val baseWriter = newWriter(avroField.schema(), catalystField.dataType, + avroPath :+ avroField.name, catalystPath :+ catalystField.name) + val fieldWriter = (fieldUpdater: CatalystDataUpdater, value: Any) => { + if (value == null) { + fieldUpdater.setNullAt(ordinal) + } else { + baseWriter(fieldUpdater, ordinal, value) + } + } + (avroField.pos(), fieldWriter) + }.toArray.unzip + + (fieldUpdater, record) => { + var i = 0 + var skipRow = false + while (i < validFieldIndexes.length && !skipRow) { + fieldWriters(i)(fieldUpdater, record.get(validFieldIndexes(i))) + skipRow = applyFilters(i) + i += 1 + } + skipRow + } + } + + private def createArrayData(elementType: DataType, length: Int): ArrayData = elementType match { + case BooleanType => UnsafeArrayData.fromPrimitiveArray(new Array[Boolean](length)) + case ByteType => UnsafeArrayData.fromPrimitiveArray(new Array[Byte](length)) + case ShortType => UnsafeArrayData.fromPrimitiveArray(new Array[Short](length)) + case IntegerType => UnsafeArrayData.fromPrimitiveArray(new Array[Int](length)) + case LongType => UnsafeArrayData.fromPrimitiveArray(new Array[Long](length)) + case FloatType => UnsafeArrayData.fromPrimitiveArray(new Array[Float](length)) + case DoubleType => UnsafeArrayData.fromPrimitiveArray(new Array[Double](length)) + case _ => new GenericArrayData(new Array[Any](length)) + } + + /** + * A base interface for updating values inside catalyst data structure like `InternalRow` and + * `ArrayData`. + */ + sealed trait CatalystDataUpdater { + def set(ordinal: Int, value: Any): Unit + + def setNullAt(ordinal: Int): Unit = set(ordinal, null) + def setBoolean(ordinal: Int, value: Boolean): Unit = set(ordinal, value) + def setByte(ordinal: Int, value: Byte): Unit = set(ordinal, value) + def setShort(ordinal: Int, value: Short): Unit = set(ordinal, value) + def setInt(ordinal: Int, value: Int): Unit = set(ordinal, value) + def setLong(ordinal: Int, value: Long): Unit = set(ordinal, value) + def setDouble(ordinal: Int, value: Double): Unit = set(ordinal, value) + def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value) + def setDecimal(ordinal: Int, value: Decimal): Unit = set(ordinal, value) + } + + final class RowUpdater(row: InternalRow) extends CatalystDataUpdater { + override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value) + + override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal) + override def setBoolean(ordinal: Int, value: Boolean): Unit = row.setBoolean(ordinal, value) + override def setByte(ordinal: Int, value: Byte): Unit = row.setByte(ordinal, value) + override def setShort(ordinal: Int, value: Short): Unit = row.setShort(ordinal, value) + override def setInt(ordinal: Int, value: Int): Unit = row.setInt(ordinal, value) + override def setLong(ordinal: Int, value: Long): Unit = row.setLong(ordinal, value) + override def setDouble(ordinal: Int, value: Double): Unit = row.setDouble(ordinal, value) + override def setFloat(ordinal: Int, value: Float): Unit = row.setFloat(ordinal, value) + override def setDecimal(ordinal: Int, value: Decimal): Unit = + row.setDecimal(ordinal, value, value.precision) + } + + final class ArrayDataUpdater(array: ArrayData) extends CatalystDataUpdater { + override def set(ordinal: Int, value: Any): Unit = array.update(ordinal, value) + + override def setNullAt(ordinal: Int): Unit = array.setNullAt(ordinal) + override def setBoolean(ordinal: Int, value: Boolean): Unit = array.setBoolean(ordinal, value) + override def setByte(ordinal: Int, value: Byte): Unit = array.setByte(ordinal, value) + override def setShort(ordinal: Int, value: Short): Unit = array.setShort(ordinal, value) + override def setInt(ordinal: Int, value: Int): Unit = array.setInt(ordinal, value) + override def setLong(ordinal: Int, value: Long): Unit = array.setLong(ordinal, value) + override def setDouble(ordinal: Int, value: Double): Unit = array.setDouble(ordinal, value) + override def setFloat(ordinal: Int, value: Float): Unit = array.setFloat(ordinal, value) + override def setDecimal(ordinal: Int, value: Decimal): Unit = array.update(ordinal, value) + } +} + +object AvroDeserializer { + + // NOTE: Following methods have been renamed in Spark 3.2.1 [1] making [[AvroDeserializer]] implementation + // (which relies on it) be only compatible with the exact same version of [[DataSourceUtils]]. + // To make sure this implementation is compatible w/ all Spark versions w/in Spark 3.2.x branch, + // we're preemptively cloned those methods to make sure Hudi is compatible w/ Spark 3.2.0 as well as + // w/ Spark >= 3.2.1 + // + // [1] https://github.com/apache/spark/pull/34978 + + // Specification of rebase operation including `mode` and the time zone in which it is performed + case class RebaseSpec(mode: LegacyBehaviorPolicy.Value, originTimeZone: Option[String] = None) { + // Use the default JVM time zone for backward compatibility + def timeZone: String = originTimeZone.getOrElse(TimeZone.getDefault.getID) + } + + def createDateRebaseFuncInRead(rebaseMode: LegacyBehaviorPolicy.Value, + format: String): Int => Int = rebaseMode match { + case LegacyBehaviorPolicy.EXCEPTION => days: Int => + if (days < RebaseDateTime.lastSwitchJulianDay) { + throw DataSourceUtils.newRebaseExceptionInRead(format) + } + days + case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseJulianToGregorianDays + case LegacyBehaviorPolicy.CORRECTED => identity[Int] + } + + def createTimestampRebaseFuncInRead(rebaseSpec: RebaseSpec, + format: String): Long => Long = rebaseSpec.mode match { + case LegacyBehaviorPolicy.EXCEPTION => micros: Long => + if (micros < RebaseDateTime.lastSwitchJulianTs) { + throw DataSourceUtils.newRebaseExceptionInRead(format) + } + micros + case LegacyBehaviorPolicy.LEGACY => micros: Long => + RebaseDateTime.rebaseJulianToGregorianMicros(TimeZone.getTimeZone(rebaseSpec.timeZone), micros) + case LegacyBehaviorPolicy.CORRECTED => identity[Long] + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala new file mode 100644 index 000000000000..a2ed346a97e1 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala @@ -0,0 +1,450 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.avro + +import org.apache.avro.Conversions.DecimalConversion +import org.apache.avro.LogicalTypes.{LocalTimestampMicros, LocalTimestampMillis, TimestampMicros, TimestampMillis} +import org.apache.avro.{LogicalTypes, Schema} +import org.apache.avro.Schema.Type +import org.apache.avro.Schema.Type._ +import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed, Record} +import org.apache.avro.util.Utf8 +import org.apache.spark.internal.Logging +import org.apache.spark.sql.avro.AvroSerializer.{createDateRebaseFuncInWrite, createTimestampRebaseFuncInWrite} +import org.apache.spark.sql.avro.AvroUtils.{AvroMatchedField, toFieldStr} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, SpecificInternalRow} +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, RebaseDateTime} +import org.apache.spark.sql.execution.datasources.DataSourceUtils +import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} +import org.apache.spark.sql.types._ + +import java.nio.ByteBuffer +import java.util.TimeZone +import scala.collection.JavaConverters._ + +/** + * A serializer to serialize data in catalyst format to data in avro format. + * + * NOTE: This code is borrowed from Spark 3.3.0 + * This code is borrowed, so that we can better control compatibility w/in Spark minor + * branches (3.2.x, 3.1.x, etc) + * + * NOTE: THIS IMPLEMENTATION HAS BEEN MODIFIED FROM ITS ORIGINAL VERSION WITH THE MODIFICATION + * BEING EXPLICITLY ANNOTATED INLINE. PLEASE MAKE SURE TO UNDERSTAND PROPERLY ALL THE + * MODIFICATIONS. + * + * PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY + */ +private[sql] class AvroSerializer(rootCatalystType: DataType, + rootAvroType: Schema, + nullable: Boolean, + positionalFieldMatch: Boolean, + datetimeRebaseMode: LegacyBehaviorPolicy.Value) extends Logging { + + def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) = { + this(rootCatalystType, rootAvroType, nullable, positionalFieldMatch = false, + LegacyBehaviorPolicy.withName(SQLConf.get.getConf(SQLConf.AVRO_REBASE_MODE_IN_WRITE, + LegacyBehaviorPolicy.CORRECTED.toString))) + } + + def serialize(catalystData: Any): Any = { + converter.apply(catalystData) + } + + private val dateRebaseFunc = createDateRebaseFuncInWrite( + datetimeRebaseMode, "Avro") + + private val timestampRebaseFunc = createTimestampRebaseFuncInWrite( + datetimeRebaseMode, "Avro") + + private val converter: Any => Any = { + val actualAvroType = resolveNullableType(rootAvroType, nullable) + val baseConverter = try { + rootCatalystType match { + case st: StructType => + newStructConverter(st, actualAvroType, Nil, Nil).asInstanceOf[Any => Any] + case _ => + val tmpRow = new SpecificInternalRow(Seq(rootCatalystType)) + val converter = newConverter(rootCatalystType, actualAvroType, Nil, Nil) + (data: Any) => + tmpRow.update(0, data) + converter.apply(tmpRow, 0) + } + } catch { + case ise: IncompatibleSchemaException => throw new IncompatibleSchemaException( + s"Cannot convert SQL type ${rootCatalystType.sql} to Avro type $rootAvroType.", ise) + } + if (nullable) { + (data: Any) => + if (data == null) { + null + } else { + baseConverter.apply(data) + } + } else { + baseConverter + } + } + + private type Converter = (SpecializedGetters, Int) => Any + + private lazy val decimalConversions = new DecimalConversion() + + private def newConverter(catalystType: DataType, + avroType: Schema, + catalystPath: Seq[String], + avroPath: Seq[String]): Converter = { + val errorPrefix = s"Cannot convert SQL ${toFieldStr(catalystPath)} " + + s"to Avro ${toFieldStr(avroPath)} because " + (catalystType, avroType.getType) match { + case (NullType, NULL) => + (getter, ordinal) => null + case (BooleanType, BOOLEAN) => + (getter, ordinal) => getter.getBoolean(ordinal) + case (ByteType, INT) => + (getter, ordinal) => getter.getByte(ordinal).toInt + case (ShortType, INT) => + (getter, ordinal) => getter.getShort(ordinal).toInt + case (IntegerType, INT) => + (getter, ordinal) => getter.getInt(ordinal) + case (LongType, LONG) => + (getter, ordinal) => getter.getLong(ordinal) + case (FloatType, FLOAT) => + (getter, ordinal) => getter.getFloat(ordinal) + case (DoubleType, DOUBLE) => + (getter, ordinal) => getter.getDouble(ordinal) + case (d: DecimalType, FIXED) + if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) => + (getter, ordinal) => + val decimal = getter.getDecimal(ordinal, d.precision, d.scale) + decimalConversions.toFixed(decimal.toJavaBigDecimal, avroType, + LogicalTypes.decimal(d.precision, d.scale)) + + case (d: DecimalType, BYTES) + if avroType.getLogicalType == LogicalTypes.decimal(d.precision, d.scale) => + (getter, ordinal) => + val decimal = getter.getDecimal(ordinal, d.precision, d.scale) + decimalConversions.toBytes(decimal.toJavaBigDecimal, avroType, + LogicalTypes.decimal(d.precision, d.scale)) + + case (StringType, ENUM) => + val enumSymbols: Set[String] = avroType.getEnumSymbols.asScala.toSet + (getter, ordinal) => + val data = getter.getUTF8String(ordinal).toString + if (!enumSymbols.contains(data)) { + throw new IncompatibleSchemaException(errorPrefix + + s""""$data" cannot be written since it's not defined in enum """ + + enumSymbols.mkString("\"", "\", \"", "\"")) + } + new EnumSymbol(avroType, data) + + case (StringType, STRING) => + (getter, ordinal) => new Utf8(getter.getUTF8String(ordinal).getBytes) + + case (BinaryType, FIXED) => + val size = avroType.getFixedSize + (getter, ordinal) => + val data: Array[Byte] = getter.getBinary(ordinal) + if (data.length != size) { + def len2str(len: Int): String = s"$len ${if (len > 1) "bytes" else "byte"}" + + throw new IncompatibleSchemaException(errorPrefix + len2str(data.length) + + " of binary data cannot be written into FIXED type with size of " + len2str(size)) + } + new Fixed(avroType, data) + + case (BinaryType, BYTES) => + (getter, ordinal) => ByteBuffer.wrap(getter.getBinary(ordinal)) + + case (DateType, INT) => + (getter, ordinal) => dateRebaseFunc(getter.getInt(ordinal)) + + case (TimestampType, LONG) => avroType.getLogicalType match { + // For backward compatibility, if the Avro type is Long and it is not logical type + // (the `null` case), output the timestamp value as with millisecond precision. + case null | _: TimestampMillis => (getter, ordinal) => + DateTimeUtils.microsToMillis(timestampRebaseFunc(getter.getLong(ordinal))) + case _: TimestampMicros => (getter, ordinal) => + timestampRebaseFunc(getter.getLong(ordinal)) + case other => throw new IncompatibleSchemaException(errorPrefix + + s"SQL type ${TimestampType.sql} cannot be converted to Avro logical type $other") + } + + case (TimestampNTZType, LONG) => avroType.getLogicalType match { + // To keep consistent with TimestampType, if the Avro type is Long and it is not + // logical type (the `null` case), output the TimestampNTZ as long value + // in millisecond precision. + case null | _: LocalTimestampMillis => (getter, ordinal) => + DateTimeUtils.microsToMillis(getter.getLong(ordinal)) + case _: LocalTimestampMicros => (getter, ordinal) => + getter.getLong(ordinal) + case other => throw new IncompatibleSchemaException(errorPrefix + + s"SQL type ${TimestampNTZType.sql} cannot be converted to Avro logical type $other") + } + + case (ArrayType(et, containsNull), ARRAY) => + val elementConverter = newConverter( + et, resolveNullableType(avroType.getElementType, containsNull), + catalystPath :+ "element", avroPath :+ "element") + (getter, ordinal) => { + val arrayData = getter.getArray(ordinal) + val len = arrayData.numElements() + val result = new Array[Any](len) + var i = 0 + while (i < len) { + if (containsNull && arrayData.isNullAt(i)) { + result(i) = null + } else { + result(i) = elementConverter(arrayData, i) + } + i += 1 + } + // avro writer is expecting a Java Collection, so we convert it into + // `ArrayList` backed by the specified array without data copying. + java.util.Arrays.asList(result: _*) + } + + case (st: StructType, RECORD) => + val structConverter = newStructConverter(st, avroType, catalystPath, avroPath) + val numFields = st.length + (getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields)) + + //////////////////////////////////////////////////////////////////////////////////////////// + // Following section is amended to the original (Spark's) implementation + // >>> BEGINS + //////////////////////////////////////////////////////////////////////////////////////////// + + case (st: StructType, UNION) => + val unionConverter = newUnionConverter(st, avroType, catalystPath, avroPath) + val numFields = st.length + (getter, ordinal) => unionConverter(getter.getStruct(ordinal, numFields)) + + //////////////////////////////////////////////////////////////////////////////////////////// + // <<< ENDS + //////////////////////////////////////////////////////////////////////////////////////////// + + case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType => + val valueConverter = newConverter( + vt, resolveNullableType(avroType.getValueType, valueContainsNull), + catalystPath :+ "value", avroPath :+ "value") + (getter, ordinal) => + val mapData = getter.getMap(ordinal) + val len = mapData.numElements() + val result = new java.util.HashMap[String, Any](len) + val keyArray = mapData.keyArray() + val valueArray = mapData.valueArray() + var i = 0 + while (i < len) { + val key = keyArray.getUTF8String(i).toString + if (valueContainsNull && valueArray.isNullAt(i)) { + result.put(key, null) + } else { + result.put(key, valueConverter(valueArray, i)) + } + i += 1 + } + result + + case (_: YearMonthIntervalType, INT) => + (getter, ordinal) => getter.getInt(ordinal) + + case (_: DayTimeIntervalType, LONG) => + (getter, ordinal) => getter.getLong(ordinal) + + case _ => + throw new IncompatibleSchemaException(errorPrefix + + s"schema is incompatible (sqlType = ${catalystType.sql}, avroType = $avroType)") + } + } + + private def newStructConverter(catalystStruct: StructType, + avroStruct: Schema, + catalystPath: Seq[String], + avroPath: Seq[String]): InternalRow => Record = { + + val avroSchemaHelper = new AvroUtils.AvroSchemaHelper( + avroStruct, catalystStruct, avroPath, catalystPath, positionalFieldMatch) + + avroSchemaHelper.validateNoExtraCatalystFields(ignoreNullable = false) + avroSchemaHelper.validateNoExtraRequiredAvroFields() + + val (avroIndices, fieldConverters) = avroSchemaHelper.matchedFields.map { + case AvroMatchedField(catalystField, _, avroField) => + val converter = newConverter(catalystField.dataType, + resolveNullableType(avroField.schema(), catalystField.nullable), + catalystPath :+ catalystField.name, avroPath :+ avroField.name) + (avroField.pos(), converter) + }.toArray.unzip + + val numFields = catalystStruct.length + row: InternalRow => + val result = new Record(avroStruct) + var i = 0 + while (i < numFields) { + if (row.isNullAt(i)) { + result.put(avroIndices(i), null) + } else { + result.put(avroIndices(i), fieldConverters(i).apply(row, i)) + } + i += 1 + } + result + } + + //////////////////////////////////////////////////////////////////////////////////////////// + // Following section is amended to the original (Spark's) implementation + // >>> BEGINS + //////////////////////////////////////////////////////////////////////////////////////////// + + private def newUnionConverter(catalystStruct: StructType, + avroUnion: Schema, + catalystPath: Seq[String], + avroPath: Seq[String]): InternalRow => Any = { + if (avroUnion.getType != UNION || !canMapUnion(catalystStruct, avroUnion)) { + throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " + + s"Avro type $avroUnion.") + } + val nullable = avroUnion.getTypes.size() > 0 && avroUnion.getTypes.get(0).getType == Type.NULL + val avroInnerTypes = if (nullable) { + avroUnion.getTypes.asScala.tail + } else { + avroUnion.getTypes.asScala + } + val fieldConverters = catalystStruct.zip(avroInnerTypes).map { + case (f1, f2) => newConverter(f1.dataType, f2, catalystPath, avroPath) + } + val numFields = catalystStruct.length + (row: InternalRow) => + var i = 0 + var result: Any = null + while (i < numFields) { + if (!row.isNullAt(i)) { + if (result != null) { + throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " + + s"Avro union $avroUnion. Record has more than one optional values set") + } + result = fieldConverters(i).apply(row, i) + } + i += 1 + } + if (!nullable && result == null) { + throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " + + s"Avro union $avroUnion. Record has no values set, while should have exactly one") + } + result + } + + private def canMapUnion(catalystStruct: StructType, avroStruct: Schema): Boolean = { + (avroStruct.getTypes.size() > 0 && + avroStruct.getTypes.get(0).getType == Type.NULL && + avroStruct.getTypes.size() - 1 == catalystStruct.length) || avroStruct.getTypes.size() == catalystStruct.length + } + + //////////////////////////////////////////////////////////////////////////////////////////// + // <<< ENDS + //////////////////////////////////////////////////////////////////////////////////////////// + + + /** + * Resolve a possibly nullable Avro Type. + * + * An Avro type is nullable when it is a [[UNION]] of two types: one null type and another + * non-null type. This method will check the nullability of the input Avro type and return the + * non-null type within when it is nullable. Otherwise it will return the input Avro type + * unchanged. It will throw an [[UnsupportedAvroTypeException]] when the input Avro type is an + * unsupported nullable type. + * + * It will also log a warning message if the nullability for Avro and catalyst types are + * different. + */ + private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = { + val (avroNullable, resolvedAvroType) = resolveAvroType(avroType) + warnNullabilityDifference(avroNullable, nullable) + resolvedAvroType + } + + /** + * Check the nullability of the input Avro type and resolve it when it is nullable. The first + * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second + * return value is the possibly resolved type. + */ + private def resolveAvroType(avroType: Schema): (Boolean, Schema) = { + if (avroType.getType == Type.UNION) { + val fields = avroType.getTypes.asScala + val actualType = fields.filter(_.getType != Type.NULL) + if (fields.length == 2 && actualType.length == 1) { + (true, actualType.head) + } else { + // This is just a normal union, not used to designate nullability + (false, avroType) + } + } else { + (false, avroType) + } + } + + /** + * log a warning message if the nullability for Avro and catalyst types are different. + */ + private def warnNullabilityDifference(avroNullable: Boolean, catalystNullable: Boolean): Unit = { + if (avroNullable && !catalystNullable) { + logWarning("Writing Avro files with nullable Avro schema and non-nullable catalyst schema.") + } + if (!avroNullable && catalystNullable) { + logWarning("Writing Avro files with non-nullable Avro schema and nullable catalyst " + + "schema will throw runtime exception if there is a record with null value.") + } + } +} + +object AvroSerializer { + + // NOTE: Following methods have been renamed in Spark 3.2.1 [1] making [[AvroSerializer]] implementation + // (which relies on it) be only compatible with the exact same version of [[DataSourceUtils]]. + // To make sure this implementation is compatible w/ all Spark versions w/in Spark 3.2.x branch, + // we're preemptively cloned those methods to make sure Hudi is compatible w/ Spark 3.2.0 as well as + // w/ Spark >= 3.2.1 + // + // [1] https://github.com/apache/spark/pull/34978 + + def createDateRebaseFuncInWrite(rebaseMode: LegacyBehaviorPolicy.Value, + format: String): Int => Int = rebaseMode match { + case LegacyBehaviorPolicy.EXCEPTION => days: Int => + if (days < RebaseDateTime.lastSwitchGregorianDay) { + throw DataSourceUtils.newRebaseExceptionInWrite(format) + } + days + case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseGregorianToJulianDays + case LegacyBehaviorPolicy.CORRECTED => identity[Int] + } + + def createTimestampRebaseFuncInWrite(rebaseMode: LegacyBehaviorPolicy.Value, + format: String): Long => Long = rebaseMode match { + case LegacyBehaviorPolicy.EXCEPTION => micros: Long => + if (micros < RebaseDateTime.lastSwitchGregorianTs) { + throw DataSourceUtils.newRebaseExceptionInWrite(format) + } + micros + case LegacyBehaviorPolicy.LEGACY => + val timeZone = SQLConf.get.sessionLocalTimeZone + RebaseDateTime.rebaseGregorianToJulianMicros(TimeZone.getTimeZone(timeZone), _) + case LegacyBehaviorPolicy.CORRECTED => identity[Long] + } + +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala new file mode 100644 index 000000000000..b9845c491dc0 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.avro + +import java.util.Locale + +import scala.collection.JavaConverters._ + +import org.apache.avro.Schema +import org.apache.avro.file. FileReader +import org.apache.avro.generic.GenericRecord + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +/** + * NOTE: This code is borrowed from Spark 3.3.0 + * This code is borrowed, so that we can better control compatibility w/in Spark minor + * branches (3.2.x, 3.1.x, etc) + * + * PLEASE REFRAIN MAKING ANY CHANGES TO THIS CODE UNLESS ABSOLUTELY NECESSARY + */ +private[sql] object AvroUtils extends Logging { + + def supportsDataType(dataType: DataType): Boolean = dataType match { + case _: AtomicType => true + + case st: StructType => st.forall { f => supportsDataType(f.dataType) } + + case ArrayType(elementType, _) => supportsDataType(elementType) + + case MapType(keyType, valueType, _) => + supportsDataType(keyType) && supportsDataType(valueType) + + case udt: UserDefinedType[_] => supportsDataType(udt.sqlType) + + case _: NullType => true + + case _ => false + } + + // The trait provides iterator-like interface for reading records from an Avro file, + // deserializing and returning them as internal rows. + trait RowReader { + protected val fileReader: FileReader[GenericRecord] + protected val deserializer: AvroDeserializer + protected val stopPosition: Long + + private[this] var completed = false + private[this] var currentRow: Option[InternalRow] = None + + def hasNextRow: Boolean = { + while (!completed && currentRow.isEmpty) { + val r = fileReader.hasNext && !fileReader.pastSync(stopPosition) + if (!r) { + fileReader.close() + completed = true + currentRow = None + } else { + val record = fileReader.next() + // the row must be deserialized in hasNextRow, because AvroDeserializer#deserialize + // potentially filters rows + currentRow = deserializer.deserialize(record).asInstanceOf[Option[InternalRow]] + } + } + currentRow.isDefined + } + + def nextRow: InternalRow = { + if (currentRow.isEmpty) { + hasNextRow + } + val returnRow = currentRow + currentRow = None // free up hasNextRow to consume more Avro records, if not exhausted + returnRow.getOrElse { + throw new NoSuchElementException("next on empty iterator") + } + } + } + + /** Wrapper for a pair of matched fields, one Catalyst and one corresponding Avro field. */ + private[sql] case class AvroMatchedField( + catalystField: StructField, + catalystPosition: Int, + avroField: Schema.Field) + + /** + * Helper class to perform field lookup/matching on Avro schemas. + * + * This will match `avroSchema` against `catalystSchema`, attempting to find a matching field in + * the Avro schema for each field in the Catalyst schema and vice-versa, respecting settings for + * case sensitivity. The match results can be accessed using the getter methods. + * + * @param avroSchema The schema in which to search for fields. Must be of type RECORD. + * @param catalystSchema The Catalyst schema to use for matching. + * @param avroPath The seq of parent field names leading to `avroSchema`. + * @param catalystPath The seq of parent field names leading to `catalystSchema`. + * @param positionalFieldMatch If true, perform field matching in a positional fashion + * (structural comparison between schemas, ignoring names); + * otherwise, perform field matching using field names. + */ + class AvroSchemaHelper( + avroSchema: Schema, + catalystSchema: StructType, + avroPath: Seq[String], + catalystPath: Seq[String], + positionalFieldMatch: Boolean) { + if (avroSchema.getType != Schema.Type.RECORD) { + throw new IncompatibleSchemaException( + s"Attempting to treat ${avroSchema.getName} as a RECORD, but it was: ${avroSchema.getType}") + } + + private[this] val avroFieldArray = avroSchema.getFields.asScala.toArray + private[this] val fieldMap = avroSchema.getFields.asScala + .groupBy(_.name.toLowerCase(Locale.ROOT)) + .mapValues(_.toSeq) // toSeq needed for scala 2.13 + + /** The fields which have matching equivalents in both Avro and Catalyst schemas. */ + val matchedFields: Seq[AvroMatchedField] = catalystSchema.zipWithIndex.flatMap { + case (sqlField, sqlPos) => + getAvroField(sqlField.name, sqlPos).map(AvroMatchedField(sqlField, sqlPos, _)) + } + + /** + * Validate that there are no Catalyst fields which don't have a matching Avro field, throwing + * [[IncompatibleSchemaException]] if such extra fields are found. If `ignoreNullable` is false, + * consider nullable Catalyst fields to be eligible to be an extra field; otherwise, + * ignore nullable Catalyst fields when checking for extras. + */ + def validateNoExtraCatalystFields(ignoreNullable: Boolean): Unit = + catalystSchema.zipWithIndex.foreach { case (sqlField, sqlPos) => + if (getAvroField(sqlField.name, sqlPos).isEmpty && + (!ignoreNullable || !sqlField.nullable)) { + if (positionalFieldMatch) { + throw new IncompatibleSchemaException("Cannot find field at position " + + s"$sqlPos of ${toFieldStr(avroPath)} from Avro schema (using positional matching)") + } else { + throw new IncompatibleSchemaException( + s"Cannot find ${toFieldStr(catalystPath :+ sqlField.name)} in Avro schema") + } + } + } + + /** + * Validate that there are no Avro fields which don't have a matching Catalyst field, throwing + * [[IncompatibleSchemaException]] if such extra fields are found. Only required (non-nullable) + * fields are checked; nullable fields are ignored. + */ + def validateNoExtraRequiredAvroFields(): Unit = { + val extraFields = avroFieldArray.toSet -- matchedFields.map(_.avroField) + extraFields.filterNot(isNullable).foreach { extraField => + if (positionalFieldMatch) { + throw new IncompatibleSchemaException(s"Found field '${extraField.name()}' at position " + + s"${extraField.pos()} of ${toFieldStr(avroPath)} from Avro schema but there is no " + + s"match in the SQL schema at ${toFieldStr(catalystPath)} (using positional matching)") + } else { + throw new IncompatibleSchemaException( + s"Found ${toFieldStr(avroPath :+ extraField.name())} in Avro schema but there is no " + + "match in the SQL schema") + } + } + } + + /** + * Extract a single field from the contained avro schema which has the desired field name, + * performing the matching with proper case sensitivity according to SQLConf.resolver. + * + * @param name The name of the field to search for. + * @return `Some(match)` if a matching Avro field is found, otherwise `None`. + */ + private[avro] def getFieldByName(name: String): Option[Schema.Field] = { + + // get candidates, ignoring case of field name + val candidates = fieldMap.getOrElse(name.toLowerCase(Locale.ROOT), Seq.empty) + + // search candidates, taking into account case sensitivity settings + candidates.filter(f => SQLConf.get.resolver(f.name(), name)) match { + case Seq(avroField) => Some(avroField) + case Seq() => None + case matches => throw new IncompatibleSchemaException(s"Searching for '$name' in Avro " + + s"schema at ${toFieldStr(avroPath)} gave ${matches.size} matches. Candidates: " + + matches.map(_.name()).mkString("[", ", ", "]") + ) + } + } + + /** Get the Avro field corresponding to the provided Catalyst field name/position, if any. */ + def getAvroField(fieldName: String, catalystPos: Int): Option[Schema.Field] = { + if (positionalFieldMatch) { + avroFieldArray.lift(catalystPos) + } else { + getFieldByName(fieldName) + } + } + } + + /** + * Convert a sequence of hierarchical field names (like `Seq(foo, bar)`) into a human-readable + * string representing the field, like "field 'foo.bar'". If `names` is empty, the string + * "top-level record" is returned. + */ + private[avro] def toFieldStr(names: Seq[String]): String = names match { + case Seq() => "top-level record" + case n => s"field '${n.mkString(".")}'" + } + + /** Return true iff `avroField` is nullable, i.e. `UNION` type and has `NULL` as an option. */ + private[avro] def isNullable(avroField: Schema.Field): Boolean = + avroField.schema().getType == Schema.Type.UNION && + avroField.schema().getTypes.asScala.exists(_.getType == Schema.Type.NULL) +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroDeserializer.scala new file mode 100644 index 000000000000..c99b1a499f69 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroDeserializer.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.avro + +import org.apache.avro.Schema +import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} +import org.apache.spark.sql.types.DataType + +class HoodieSpark3_5AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) + extends HoodieAvroDeserializer { + + private val avroDeserializer = new AvroDeserializer(rootAvroType, rootCatalystType, + SQLConf.get.getConf(SQLConf.AVRO_REBASE_MODE_IN_READ, LegacyBehaviorPolicy.CORRECTED.toString)) + + def deserialize(data: Any): Option[Any] = avroDeserializer.deserialize(data) +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroSerializer.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroSerializer.scala new file mode 100644 index 000000000000..639f16cb3c96 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_5AvroSerializer.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.avro + +import org.apache.avro.Schema +import org.apache.spark.sql.types.DataType + +class HoodieSpark3_5AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) + extends HoodieAvroSerializer { + + val avroSerializer = new AvroSerializer(rootCatalystType, rootAvroType, nullable) + + override def serialize(catalystData: Any): Any = avroSerializer.serialize(catalystData) +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark35PartitionedFileUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark35PartitionedFileUtils.scala new file mode 100644 index 000000000000..611ccf7c0b1a --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/HoodieSpark35PartitionedFileUtils.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.spark.paths.SparkPath +import org.apache.spark.sql.catalyst.InternalRow + +/** + * Utils on Spark [[PartitionedFile]] and [[PartitionDirectory]] for Spark 3.5. + */ +object HoodieSpark35PartitionedFileUtils extends HoodieSparkPartitionedFileUtils { + override def getPathFromPartitionedFile(partitionedFile: PartitionedFile): Path = { + partitionedFile.filePath.toPath + } + + override def getStringPathFromPartitionedFile(partitionedFile: PartitionedFile): String = { + partitionedFile.filePath.toString + } + + override def createPartitionedFile(partitionValues: InternalRow, + filePath: Path, + start: Long, + length: Long): PartitionedFile = { + PartitionedFile(partitionValues, SparkPath.fromPath(filePath), start, length) + } + + override def toFileStatuses(partitionDirs: Seq[PartitionDirectory]): Seq[FileStatus] = { + partitionDirs.flatMap(_.files).map(_.fileStatus) + } + + override def newPartitionDirectory(internalRow: InternalRow, statuses: Seq[FileStatus]): PartitionDirectory = { + PartitionDirectory(internalRow, statuses.toArray) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark35NestedSchemaPruning.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark35NestedSchemaPruning.scala new file mode 100644 index 000000000000..966ade0db79c --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/Spark35NestedSchemaPruning.scala @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.hudi.{HoodieBaseRelation, SparkAdapterSupport} +import org.apache.spark.sql.HoodieSpark3CatalystPlanUtils +import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, NamedExpression, ProjectionOverSchema} +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.sources.BaseRelation +import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} +import org.apache.spark.sql.util.SchemaUtils.restoreOriginalOutputNames + +/** + * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation. + * By "physical column", we mean a column as defined in the data source format like Parquet format + * or ORC format. For example, in Spark SQL, a root-level Parquet column corresponds to a SQL + * column, and a nested Parquet column corresponds to a [[StructField]]. + * + * NOTE: This class is borrowed from Spark 3.2.1, with modifications adapting it to handle [[HoodieBaseRelation]], + * instead of [[HadoopFsRelation]] + */ +class Spark35NestedSchemaPruning extends Rule[LogicalPlan] { + import org.apache.spark.sql.catalyst.expressions.SchemaPruning._ + + override def apply(plan: LogicalPlan): LogicalPlan = + if (conf.nestedSchemaPruningEnabled) { + apply0(plan) + } else { + plan + } + + private def apply0(plan: LogicalPlan): LogicalPlan = + plan transformDown { + case op @ PhysicalOperation(projects, filters, + // NOTE: This is modified to accommodate for Hudi's custom relations, given that original + // [[NestedSchemaPruning]] rule is tightly coupled w/ [[HadoopFsRelation]] + // TODO generalize to any file-based relation + l @ LogicalRelation(relation: HoodieBaseRelation, _, _, _)) + if relation.canPruneRelationSchema => + + prunePhysicalColumns(l.output, projects, filters, relation.dataSchema, + prunedDataSchema => { + val prunedRelation = + relation.updatePrunedDataSchema(prunedSchema = prunedDataSchema) + buildPrunedRelation(l, prunedRelation) + }).getOrElse(op) + } + + /** + * This method returns optional logical plan. `None` is returned if no nested field is required or + * all nested fields are required. + */ + private def prunePhysicalColumns(output: Seq[AttributeReference], + projects: Seq[NamedExpression], + filters: Seq[Expression], + dataSchema: StructType, + outputRelationBuilder: StructType => LogicalRelation): Option[LogicalPlan] = { + val (normalizedProjects, normalizedFilters) = + normalizeAttributeRefNames(output, projects, filters) + val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters) + + // If requestedRootFields includes a nested field, continue. Otherwise, + // return op + if (requestedRootFields.exists { root: RootField => !root.derivedFromAtt }) { + val prunedDataSchema = pruneSchema(dataSchema, requestedRootFields) + + // If the data schema is different from the pruned data schema, continue. Otherwise, + // return op. We effect this comparison by counting the number of "leaf" fields in + // each schemata, assuming the fields in prunedDataSchema are a subset of the fields + // in dataSchema. + if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) { + val planUtils = SparkAdapterSupport.sparkAdapter.getCatalystPlanUtils.asInstanceOf[HoodieSpark3CatalystPlanUtils] + + val prunedRelation = outputRelationBuilder(prunedDataSchema) + val projectionOverSchema = planUtils.projectOverSchema(prunedDataSchema, AttributeSet(output)) + + Some(buildNewProjection(projects, normalizedProjects, normalizedFilters, + prunedRelation, projectionOverSchema)) + } else { + None + } + } else { + None + } + } + + /** + * Normalizes the names of the attribute references in the given projects and filters to reflect + * the names in the given logical relation. This makes it possible to compare attributes and + * fields by name. Returns a tuple with the normalized projects and filters, respectively. + */ + private def normalizeAttributeRefNames(output: Seq[AttributeReference], + projects: Seq[NamedExpression], + filters: Seq[Expression]): (Seq[NamedExpression], Seq[Expression]) = { + val normalizedAttNameMap = output.map(att => (att.exprId, att.name)).toMap + val normalizedProjects = projects.map(_.transform { + case att: AttributeReference if normalizedAttNameMap.contains(att.exprId) => + att.withName(normalizedAttNameMap(att.exprId)) + }).map { case expr: NamedExpression => expr } + val normalizedFilters = filters.map(_.transform { + case att: AttributeReference if normalizedAttNameMap.contains(att.exprId) => + att.withName(normalizedAttNameMap(att.exprId)) + }) + (normalizedProjects, normalizedFilters) + } + + /** + * Builds the new output [[Project]] Spark SQL operator that has the `leafNode`. + */ + private def buildNewProjection(projects: Seq[NamedExpression], + normalizedProjects: Seq[NamedExpression], + filters: Seq[Expression], + prunedRelation: LogicalRelation, + projectionOverSchema: ProjectionOverSchema): Project = { + // Construct a new target for our projection by rewriting and + // including the original filters where available + val projectionChild = + if (filters.nonEmpty) { + val projectedFilters = filters.map(_.transformDown { + case projectionOverSchema(expr) => expr + }) + val newFilterCondition = projectedFilters.reduce(And) + Filter(newFilterCondition, prunedRelation) + } else { + prunedRelation + } + + // Construct the new projections of our Project by + // rewriting the original projections + val newProjects = normalizedProjects.map(_.transformDown { + case projectionOverSchema(expr) => expr + }).map { case expr: NamedExpression => expr } + + if (log.isDebugEnabled) { + logDebug(s"New projects:\n${newProjects.map(_.treeString).mkString("\n")}") + } + + Project(restoreOriginalOutputNames(newProjects, projects.map(_.name)), projectionChild) + } + + /** + * Builds a pruned logical relation from the output of the output relation and the schema of the + * pruned base relation. + */ + private def buildPrunedRelation(outputRelation: LogicalRelation, + prunedBaseRelation: BaseRelation): LogicalRelation = { + val prunedOutput = getPrunedOutput(outputRelation.output, prunedBaseRelation.schema) + outputRelation.copy(relation = prunedBaseRelation, output = prunedOutput) + } + + // Prune the given output to make it consistent with `requiredSchema`. + private def getPrunedOutput(output: Seq[AttributeReference], + requiredSchema: StructType): Seq[AttributeReference] = { + // We need to replace the expression ids of the pruned relation output attributes + // with the expression ids of the original relation output attributes so that + // references to the original relation's output are not broken + val outputIdMap = output.map(att => (att.name, att.exprId)).toMap + DataTypeUtils.toAttributes(requiredSchema) + .map { + case att if outputIdMap.contains(att.name) => + att.withExprId(outputIdMap(att.name)) + case att => att + } + } + + /** + * Counts the "leaf" fields of the given dataType. Informally, this is the + * number of fields of non-complex data type in the tree representation of + * [[DataType]]. + */ + private def countLeaves(dataType: DataType): Int = { + dataType match { + case array: ArrayType => countLeaves(array.elementType) + case map: MapType => countLeaves(map.keyType) + countLeaves(map.valueType) + case struct: StructType => + struct.map(field => countLeaves(field.dataType)).sum + case _ => 1 + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35DataSourceUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35DataSourceUtils.scala new file mode 100644 index 000000000000..4e08f975eefb --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35DataSourceUtils.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.spark.sql.SPARK_VERSION_METADATA_KEY +import org.apache.spark.sql.internal.{SQLConf, LegacyBehaviorPolicy} +import org.apache.spark.util.Utils + +object Spark35DataSourceUtils { + + /** + * NOTE: This method was copied from [[Spark32PlusDataSourceUtils]], and is required to maintain runtime + * compatibility against Spark 3.5.0 + */ + // scalastyle:off + def int96RebaseMode(lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + return LegacyBehaviorPolicy.CORRECTED + } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 3.0 and earlier follow the legacy hybrid calendar and we need to + // rebase the INT96 timestamp values. + // Files written by Spark 3.1 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.1.0" || lookupFileMeta("org.apache.spark.legacyINT96") != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) + } + // scalastyle:on + + /** + * NOTE: This method was copied from Spark 3.2.0, and is required to maintain runtime + * compatibility against Spark 3.2.0 + */ + // scalastyle:off + def datetimeRebaseMode(lookupFileMeta: String => String, + modeByConfig: String): LegacyBehaviorPolicy.Value = { + if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") { + return LegacyBehaviorPolicy.CORRECTED + } + // If there is no version, we return the mode specified by the config. + Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version => + // Files written by Spark 2.4 and earlier follow the legacy hybrid calendar and we need to + // rebase the datetime values. + // Files written by Spark 3.0 and latter may also need the rebase if they were written with + // the "LEGACY" rebase mode. + if (version < "3.0.0" || lookupFileMeta("org.apache.spark.legacyDateTime") != null) { + LegacyBehaviorPolicy.LEGACY + } else { + LegacyBehaviorPolicy.CORRECTED + } + }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig)) + } + // scalastyle:on + +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala new file mode 100644 index 000000000000..dd70aa08b856 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala @@ -0,0 +1,536 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.mapred.FileSplit +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl +import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType} +import org.apache.hudi.HoodieSparkUtils +import org.apache.hudi.client.utils.SparkInternalSchemaConverter +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.common.util.InternalSchemaCache +import org.apache.hudi.common.util.StringUtils.isNullOrEmpty +import org.apache.hudi.common.util.collection.Pair +import org.apache.hudi.internal.schema.InternalSchema +import org.apache.hudi.internal.schema.action.InternalSchemaMerger +import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper} +import org.apache.parquet.filter2.compat.FilterCompat +import org.apache.parquet.filter2.predicate.FilterApi +import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS +import org.apache.parquet.hadoop.{ParquetInputFormat, ParquetRecordReader} +import org.apache.spark.TaskContext +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.catalyst.expressions.{Cast, JoinedRow} +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.datasources.parquet.Spark35LegacyHoodieParquetFileFormat._ +import org.apache.spark.sql.execution.datasources.{DataSourceUtils, PartitionedFile, RecordReaderIterator} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.sources._ +import org.apache.spark.sql.types.{AtomicType, DataType, StructField, StructType} +import org.apache.spark.util.SerializableConfiguration +/** + * This class is an extension of [[ParquetFileFormat]] overriding Spark-specific behavior + * that's not possible to customize in any other way + * + * NOTE: This is a version of [[AvroDeserializer]] impl from Spark 3.2.1 w/ w/ the following changes applied to it: + *

    + *
  1. Avoiding appending partition values to the rows read from the data file
  2. + *
  3. Schema on-read
  4. + *
+ */ +class Spark35LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValues: Boolean) extends ParquetFileFormat { + + override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = { + val conf = sparkSession.sessionState.conf + conf.parquetVectorizedReaderEnabled && schema.forall(_.dataType.isInstanceOf[AtomicType]) + } + + def supportsColumnar(sparkSession: SparkSession, schema: StructType): Boolean = { + val conf = sparkSession.sessionState.conf + // Only output columnar if there is WSCG to read it. + val requiredWholeStageCodegenSettings = + conf.wholeStageEnabled && !WholeStageCodegenExec.isTooManyFields(conf, schema) + requiredWholeStageCodegenSettings && + supportBatch(sparkSession, schema) + } + + override def buildReaderWithPartitionValues(sparkSession: SparkSession, + dataSchema: StructType, + partitionSchema: StructType, + requiredSchema: StructType, + filters: Seq[Filter], + options: Map[String, String], + hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = { + hadoopConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[ParquetReadSupport].getName) + hadoopConf.set( + ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA, + requiredSchema.json) + hadoopConf.set( + ParquetWriteSupport.SPARK_ROW_SCHEMA, + requiredSchema.json) + hadoopConf.set( + SQLConf.SESSION_LOCAL_TIMEZONE.key, + sparkSession.sessionState.conf.sessionLocalTimeZone) + hadoopConf.setBoolean( + SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key, + sparkSession.sessionState.conf.nestedSchemaPruningEnabled) + hadoopConf.setBoolean( + SQLConf.CASE_SENSITIVE.key, + sparkSession.sessionState.conf.caseSensitiveAnalysis) + + ParquetWriteSupport.setSchema(requiredSchema, hadoopConf) + + // Sets flags for `ParquetToSparkSchemaConverter` + hadoopConf.setBoolean( + SQLConf.PARQUET_BINARY_AS_STRING.key, + sparkSession.sessionState.conf.isParquetBinaryAsString) + hadoopConf.setBoolean( + SQLConf.PARQUET_INT96_AS_TIMESTAMP.key, + sparkSession.sessionState.conf.isParquetINT96AsTimestamp) + // Using string value of this conf to preserve compatibility across spark versions. + hadoopConf.setBoolean( + SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key, + sparkSession.sessionState.conf.getConfString( + SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key, + SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.defaultValueString).toBoolean + ) + hadoopConf.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key, sparkSession.sessionState.conf.parquetInferTimestampNTZEnabled) + hadoopConf.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key, sparkSession.sessionState.conf.legacyParquetNanosAsLong) + val internalSchemaStr = hadoopConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA) + // For Spark DataSource v1, there's no Physical Plan projection/schema pruning w/in Spark itself, + // therefore it's safe to do schema projection here + if (!isNullOrEmpty(internalSchemaStr)) { + val prunedInternalSchemaStr = + pruneInternalSchema(internalSchemaStr, requiredSchema) + hadoopConf.set(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA, prunedInternalSchemaStr) + } + + val broadcastedHadoopConf = + sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) + + // TODO: if you move this into the closure it reverts to the default values. + // If true, enable using the custom RecordReader for parquet. This only works for + // a subset of the types (no complex types). + val resultSchema = StructType(partitionSchema.fields ++ requiredSchema.fields) + val sqlConf = sparkSession.sessionState.conf + val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled + val enableVectorizedReader: Boolean = + sqlConf.parquetVectorizedReaderEnabled && + resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled + val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion + val capacity = sqlConf.parquetVectorizedReaderBatchSize + val enableParquetFilterPushDown: Boolean = sqlConf.parquetFilterPushDown + val pushDownDate = sqlConf.parquetFilterPushDownDate + val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp + val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal + val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringPredicate + val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold + val isCaseSensitive = sqlConf.caseSensitiveAnalysis + val parquetOptions = new ParquetOptions(options, sparkSession.sessionState.conf) + val datetimeRebaseModeInRead = parquetOptions.datetimeRebaseModeInRead + val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead + val timeZoneId = Option(sqlConf.sessionLocalTimeZone) + // Should always be set by FileSourceScanExec creating this. + // Check conf before checking option, to allow working around an issue by changing conf. + val returningBatch = sparkSession.sessionState.conf.parquetVectorizedReaderEnabled && + supportsColumnar(sparkSession, resultSchema).toString.equals("true") + + + (file: PartitionedFile) => { + assert(!shouldAppendPartitionValues || file.partitionValues.numFields == partitionSchema.size) + + val filePath = file.filePath.toPath + val split = new FileSplit(filePath, file.start, file.length, Array.empty[String]) + + val sharedConf = broadcastedHadoopConf.value.value + + // Fetch internal schema + val internalSchemaStr = sharedConf.get(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA) + // Internal schema has to be pruned at this point + val querySchemaOption = SerDeHelper.fromJson(internalSchemaStr) + + var shouldUseInternalSchema = !isNullOrEmpty(internalSchemaStr) && querySchemaOption.isPresent + + val tablePath = sharedConf.get(SparkInternalSchemaConverter.HOODIE_TABLE_PATH) + val fileSchema = if (shouldUseInternalSchema) { + val commitInstantTime = FSUtils.getCommitTime(filePath.getName).toLong; + val validCommits = sharedConf.get(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST) + InternalSchemaCache.getInternalSchemaByVersionId(commitInstantTime, tablePath, sharedConf, if (validCommits == null) "" else validCommits) + } else { + null + } + + lazy val footerFileMetaData = + ParquetFooterReader.readFooter(sharedConf, filePath, SKIP_ROW_GROUPS).getFileMetaData + // Try to push down filters when filter push-down is enabled. + val pushed = if (enableParquetFilterPushDown) { + val parquetSchema = footerFileMetaData.getSchema + val parquetFilters = if (HoodieSparkUtils.gteqSpark3_2_1) { + // NOTE: Below code could only be compiled against >= Spark 3.2.1, + // and unfortunately won't compile against Spark 3.2.0 + // However this code is runtime-compatible w/ both Spark 3.2.0 and >= Spark 3.2.1 + val datetimeRebaseSpec = + DataSourceUtils.datetimeRebaseSpec(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + new ParquetFilters( + parquetSchema, + pushDownDate, + pushDownTimestamp, + pushDownDecimal, + pushDownStringStartWith, + pushDownInFilterThreshold, + isCaseSensitive, + datetimeRebaseSpec) + } else { + // Spark 3.2.0 + val datetimeRebaseMode = + Spark35DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + createParquetFilters( + parquetSchema, + pushDownDate, + pushDownTimestamp, + pushDownDecimal, + pushDownStringStartWith, + pushDownInFilterThreshold, + isCaseSensitive, + datetimeRebaseMode) + } + filters.map(rebuildFilterFromParquet(_, fileSchema, querySchemaOption.orElse(null))) + // Collects all converted Parquet filter predicates. Notice that not all predicates can be + // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap` + // is used here. + .flatMap(parquetFilters.createFilter) + .reduceOption(FilterApi.and) + } else { + None + } + + // PARQUET_INT96_TIMESTAMP_CONVERSION says to apply timezone conversions to int96 timestamps' + // *only* if the file was created by something other than "parquet-mr", so check the actual + // writer here for this file. We have to do this per-file, as each file in the table may + // have different writers. + // Define isCreatedByParquetMr as function to avoid unnecessary parquet footer reads. + def isCreatedByParquetMr: Boolean = + footerFileMetaData.getCreatedBy().startsWith("parquet-mr") + + val convertTz = + if (timestampConversion && !isCreatedByParquetMr) { + Some(DateTimeUtils.getZoneId(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key))) + } else { + None + } + + val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) + + // Clone new conf + val hadoopAttemptConf = new Configuration(broadcastedHadoopConf.value.value) + val typeChangeInfos: java.util.Map[Integer, Pair[DataType, DataType]] = if (shouldUseInternalSchema) { + val mergedInternalSchema = new InternalSchemaMerger(fileSchema, querySchemaOption.get(), true, true).mergeSchema() + val mergedSchema = SparkInternalSchemaConverter.constructSparkSchemaFromInternalSchema(mergedInternalSchema) + + hadoopAttemptConf.set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA, mergedSchema.json) + + SparkInternalSchemaConverter.collectTypeChangedCols(querySchemaOption.get(), mergedInternalSchema) + } else { + val (implicitTypeChangeInfo, sparkRequestSchema) = HoodieParquetFileFormatHelper.buildImplicitSchemaChangeInfo(hadoopAttemptConf, footerFileMetaData, requiredSchema) + if (!implicitTypeChangeInfo.isEmpty) { + shouldUseInternalSchema = true + hadoopAttemptConf.set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA, sparkRequestSchema.json) + } + implicitTypeChangeInfo + } + + val hadoopAttemptContext = + new TaskAttemptContextImpl(hadoopAttemptConf, attemptId) + + // Try to push down filters when filter push-down is enabled. + // Notice: This push-down is RowGroups level, not individual records. + if (pushed.isDefined) { + ParquetInputFormat.setFilterPredicate(hadoopAttemptContext.getConfiguration, pushed.get) + } + val taskContext = Option(TaskContext.get()) + if (enableVectorizedReader) { + val vectorizedReader = + if (shouldUseInternalSchema) { + val int96RebaseSpec = + DataSourceUtils.int96RebaseSpec(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + val datetimeRebaseSpec = + DataSourceUtils.datetimeRebaseSpec(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + new Spark32PlusHoodieVectorizedParquetRecordReader( + convertTz.orNull, + datetimeRebaseSpec.mode.toString, + datetimeRebaseSpec.timeZone, + int96RebaseSpec.mode.toString, + int96RebaseSpec.timeZone, + enableOffHeapColumnVector && taskContext.isDefined, + capacity, + typeChangeInfos) + } else if (HoodieSparkUtils.gteqSpark3_2_1) { + // NOTE: Below code could only be compiled against >= Spark 3.2.1, + // and unfortunately won't compile against Spark 3.2.0 + // However this code is runtime-compatible w/ both Spark 3.2.0 and >= Spark 3.2.1 + val int96RebaseSpec = + DataSourceUtils.int96RebaseSpec(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + val datetimeRebaseSpec = + DataSourceUtils.datetimeRebaseSpec(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + new VectorizedParquetRecordReader( + convertTz.orNull, + datetimeRebaseSpec.mode.toString, + datetimeRebaseSpec.timeZone, + int96RebaseSpec.mode.toString, + int96RebaseSpec.timeZone, + enableOffHeapColumnVector && taskContext.isDefined, + capacity) + } else { + // Spark 3.2.0 + val datetimeRebaseMode = + Spark35DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + val int96RebaseMode = + Spark35DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + createVectorizedParquetRecordReader( + convertTz.orNull, + datetimeRebaseMode.toString, + int96RebaseMode.toString, + enableOffHeapColumnVector && taskContext.isDefined, + capacity) + } + + // SPARK-37089: We cannot register a task completion listener to close this iterator here + // because downstream exec nodes have already registered their listeners. Since listeners + // are executed in reverse order of registration, a listener registered here would close the + // iterator while downstream exec nodes are still running. When off-heap column vectors are + // enabled, this can cause a use-after-free bug leading to a segfault. + // + // Instead, we use FileScanRDD's task completion listener to close this iterator. + val iter = new RecordReaderIterator(vectorizedReader) + try { + vectorizedReader.initialize(split, hadoopAttemptContext) + + // NOTE: We're making appending of the partitioned values to the rows read from the + // data file configurable + if (shouldAppendPartitionValues) { + logDebug(s"Appending $partitionSchema ${file.partitionValues}") + vectorizedReader.initBatch(partitionSchema, file.partitionValues) + } else { + vectorizedReader.initBatch(StructType(Nil), InternalRow.empty) + } + + if (returningBatch) { + vectorizedReader.enableReturningBatches() + } + + // UnsafeRowParquetRecordReader appends the columns internally to avoid another copy. + iter.asInstanceOf[Iterator[InternalRow]] + } catch { + case e: Throwable => + // SPARK-23457: In case there is an exception in initialization, close the iterator to + // avoid leaking resources. + iter.close() + throw e + } + } else { + logDebug(s"Falling back to parquet-mr") + val readSupport = if (HoodieSparkUtils.gteqSpark3_2_1) { + // ParquetRecordReader returns InternalRow + // NOTE: Below code could only be compiled against >= Spark 3.2.1, + // and unfortunately won't compile against Spark 3.2.0 + // However this code is runtime-compatible w/ both Spark 3.2.0 and >= Spark 3.2.1 + val int96RebaseSpec = + DataSourceUtils.int96RebaseSpec(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + val datetimeRebaseSpec = + DataSourceUtils.datetimeRebaseSpec(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + new ParquetReadSupport( + convertTz, + enableVectorizedReader = false, + datetimeRebaseSpec, + int96RebaseSpec) + } else { + val datetimeRebaseMode = + Spark35DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead) + val int96RebaseMode = + Spark35DataSourceUtils.int96RebaseMode(footerFileMetaData.getKeyValueMetaData.get, int96RebaseModeInRead) + createParquetReadSupport( + convertTz, + /* enableVectorizedReader = */ false, + datetimeRebaseMode, + int96RebaseMode) + } + + val reader = if (pushed.isDefined && enableRecordFilter) { + val parquetFilter = FilterCompat.get(pushed.get, null) + new ParquetRecordReader[InternalRow](readSupport, parquetFilter) + } else { + new ParquetRecordReader[InternalRow](readSupport) + } + val iter = new RecordReaderIterator[InternalRow](reader) + try { + reader.initialize(split, hadoopAttemptContext) + + val fullSchema = DataTypeUtils.toAttributes(requiredSchema) ++ DataTypeUtils.toAttributes(partitionSchema) + val unsafeProjection = if (typeChangeInfos.isEmpty) { + GenerateUnsafeProjection.generate(fullSchema, fullSchema) + } else { + // find type changed. + val newSchema = new StructType(requiredSchema.fields.zipWithIndex.map { case (f, i) => + if (typeChangeInfos.containsKey(i)) { + StructField(f.name, typeChangeInfos.get(i).getRight, f.nullable, f.metadata) + } else f + }) + val newFullSchema = DataTypeUtils.toAttributes(newSchema) ++ DataTypeUtils.toAttributes(partitionSchema) + val castSchema = newFullSchema.zipWithIndex.map { case (attr, i) => + if (typeChangeInfos.containsKey(i)) { + val srcType = typeChangeInfos.get(i).getRight + val dstType = typeChangeInfos.get(i).getLeft + val needTimeZone = Cast.needsTimeZone(srcType, dstType) + Cast(attr, dstType, if (needTimeZone) timeZoneId else None) + } else attr + } + GenerateUnsafeProjection.generate(castSchema, newFullSchema) + } + + // NOTE: We're making appending of the partitioned values to the rows read from the + // data file configurable + if (!shouldAppendPartitionValues || partitionSchema.length == 0) { + // There is no partition columns + iter.map(unsafeProjection) + } else { + val joinedRow = new JoinedRow() + iter.map(d => unsafeProjection(joinedRow(d, file.partitionValues))) + } + } catch { + case e: Throwable => + // SPARK-23457: In case there is an exception in initialization, close the iterator to + // avoid leaking resources. + iter.close() + throw e + } + } + } + } +} + +object Spark35LegacyHoodieParquetFileFormat { + + /** + * NOTE: This method is specific to Spark 3.2.0 + */ + private def createParquetFilters(args: Any*): ParquetFilters = { + // NOTE: ParquetFilters ctor args contain Scala enum, therefore we can't look it + // up by arg types, and have to instead rely on the number of args based on individual class; + // the ctor order is not guaranteed + val ctor = classOf[ParquetFilters].getConstructors.maxBy(_.getParameterCount) + ctor.newInstance(args.map(_.asInstanceOf[AnyRef]): _*) + .asInstanceOf[ParquetFilters] + } + + /** + * NOTE: This method is specific to Spark 3.2.0 + */ + private def createParquetReadSupport(args: Any*): ParquetReadSupport = { + // NOTE: ParquetReadSupport ctor args contain Scala enum, therefore we can't look it + // up by arg types, and have to instead rely on the number of args based on individual class; + // the ctor order is not guaranteed + val ctor = classOf[ParquetReadSupport].getConstructors.maxBy(_.getParameterCount) + ctor.newInstance(args.map(_.asInstanceOf[AnyRef]): _*) + .asInstanceOf[ParquetReadSupport] + } + + /** + * NOTE: This method is specific to Spark 3.2.0 + */ + private def createVectorizedParquetRecordReader(args: Any*): VectorizedParquetRecordReader = { + // NOTE: ParquetReadSupport ctor args contain Scala enum, therefore we can't look it + // up by arg types, and have to instead rely on the number of args based on individual class; + // the ctor order is not guaranteed + val ctor = classOf[VectorizedParquetRecordReader].getConstructors.maxBy(_.getParameterCount) + ctor.newInstance(args.map(_.asInstanceOf[AnyRef]): _*) + .asInstanceOf[VectorizedParquetRecordReader] + } + + def pruneInternalSchema(internalSchemaStr: String, requiredSchema: StructType): String = { + val querySchemaOption = SerDeHelper.fromJson(internalSchemaStr) + if (querySchemaOption.isPresent && requiredSchema.nonEmpty) { + val prunedSchema = SparkInternalSchemaConverter.convertAndPruneStructTypeToInternalSchema(requiredSchema, querySchemaOption.get()) + SerDeHelper.toJson(prunedSchema) + } else { + internalSchemaStr + } + } + + private def rebuildFilterFromParquet(oldFilter: Filter, fileSchema: InternalSchema, querySchema: InternalSchema): Filter = { + if (fileSchema == null || querySchema == null) { + oldFilter + } else { + oldFilter match { + case eq: EqualTo => + val newAttribute = InternalSchemaUtils.reBuildFilterName(eq.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else eq.copy(attribute = newAttribute) + case eqs: EqualNullSafe => + val newAttribute = InternalSchemaUtils.reBuildFilterName(eqs.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else eqs.copy(attribute = newAttribute) + case gt: GreaterThan => + val newAttribute = InternalSchemaUtils.reBuildFilterName(gt.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else gt.copy(attribute = newAttribute) + case gtr: GreaterThanOrEqual => + val newAttribute = InternalSchemaUtils.reBuildFilterName(gtr.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else gtr.copy(attribute = newAttribute) + case lt: LessThan => + val newAttribute = InternalSchemaUtils.reBuildFilterName(lt.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else lt.copy(attribute = newAttribute) + case lte: LessThanOrEqual => + val newAttribute = InternalSchemaUtils.reBuildFilterName(lte.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else lte.copy(attribute = newAttribute) + case i: In => + val newAttribute = InternalSchemaUtils.reBuildFilterName(i.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else i.copy(attribute = newAttribute) + case isn: IsNull => + val newAttribute = InternalSchemaUtils.reBuildFilterName(isn.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else isn.copy(attribute = newAttribute) + case isnn: IsNotNull => + val newAttribute = InternalSchemaUtils.reBuildFilterName(isnn.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else isnn.copy(attribute = newAttribute) + case And(left, right) => + And(rebuildFilterFromParquet(left, fileSchema, querySchema), rebuildFilterFromParquet(right, fileSchema, querySchema)) + case Or(left, right) => + Or(rebuildFilterFromParquet(left, fileSchema, querySchema), rebuildFilterFromParquet(right, fileSchema, querySchema)) + case Not(child) => + Not(rebuildFilterFromParquet(child, fileSchema, querySchema)) + case ssw: StringStartsWith => + val newAttribute = InternalSchemaUtils.reBuildFilterName(ssw.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else ssw.copy(attribute = newAttribute) + case ses: StringEndsWith => + val newAttribute = InternalSchemaUtils.reBuildFilterName(ses.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else ses.copy(attribute = newAttribute) + case sc: StringContains => + val newAttribute = InternalSchemaUtils.reBuildFilterName(sc.attribute, fileSchema, querySchema) + if (newAttribute.isEmpty) AlwaysTrue else sc.copy(attribute = newAttribute) + case AlwaysTrue => + AlwaysTrue + case AlwaysFalse => + AlwaysFalse + case _ => + AlwaysTrue + } + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/Spark35ResolveHudiAlterTableCommand.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/Spark35ResolveHudiAlterTableCommand.scala new file mode 100644 index 000000000000..160804f62b37 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/Spark35ResolveHudiAlterTableCommand.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hudi + +import org.apache.hudi.common.config.HoodieCommonConfig +import org.apache.hudi.internal.schema.action.TableChange.ColumnChangeID +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.ResolvedTable +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table +import org.apache.spark.sql.hudi.command.{AlterTableCommand => HudiAlterTableCommand} + +/** + * Rule to mostly resolve, normalize and rewrite column names based on case sensitivity. + * for alter table column commands. + */ +class Spark35ResolveHudiAlterTableCommand(sparkSession: SparkSession) extends Rule[LogicalPlan] { + + def apply(plan: LogicalPlan): LogicalPlan = { + if (schemaEvolutionEnabled) { + plan.resolveOperatorsUp { + case set@SetTableProperties(ResolvedHoodieV2TablePlan(t), _) if set.resolved => + HudiAlterTableCommand(t.v1Table, set.changes, ColumnChangeID.PROPERTY_CHANGE) + case unSet@UnsetTableProperties(ResolvedHoodieV2TablePlan(t), _, _) if unSet.resolved => + HudiAlterTableCommand(t.v1Table, unSet.changes, ColumnChangeID.PROPERTY_CHANGE) + case drop@DropColumns(ResolvedHoodieV2TablePlan(t), _, _) if drop.resolved => + HudiAlterTableCommand(t.v1Table, drop.changes, ColumnChangeID.DELETE) + case add@AddColumns(ResolvedHoodieV2TablePlan(t), _) if add.resolved => + HudiAlterTableCommand(t.v1Table, add.changes, ColumnChangeID.ADD) + case renameColumn@RenameColumn(ResolvedHoodieV2TablePlan(t), _, _) if renameColumn.resolved => + HudiAlterTableCommand(t.v1Table, renameColumn.changes, ColumnChangeID.UPDATE) + case alter@AlterColumn(ResolvedHoodieV2TablePlan(t), _, _, _, _, _, _) if alter.resolved => + HudiAlterTableCommand(t.v1Table, alter.changes, ColumnChangeID.UPDATE) + case replace@ReplaceColumns(ResolvedHoodieV2TablePlan(t), _) if replace.resolved => + HudiAlterTableCommand(t.v1Table, replace.changes, ColumnChangeID.REPLACE) + } + } else { + plan + } + } + + private def schemaEvolutionEnabled: Boolean = + sparkSession.sessionState.conf.getConfString(HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key, + HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.defaultValue.toString).toBoolean + + object ResolvedHoodieV2TablePlan { + def unapply(plan: LogicalPlan): Option[HoodieInternalV2Table] = { + plan match { + case ResolvedTable(_, _, v2Table: HoodieInternalV2Table, _) => Some(v2Table) + case _ => None + } + } + } +} + diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark35Analysis.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark35Analysis.scala new file mode 100644 index 000000000000..f137c9dea6c3 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark35Analysis.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hudi.analysis + +import org.apache.hudi.DefaultSource + +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.hudi.ProvidesHoodieConfig +import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table +import org.apache.spark.sql.{SQLContext, SparkSession} + +/** + * NOTE: PLEASE READ CAREFULLY + * + * Since Hudi relations don't currently implement DS V2 Read API, we have to fallback to V1 here. + * Such fallback will have considerable performance impact, therefore it's only performed in cases + * where V2 API have to be used. Currently only such use-case is using of Schema Evolution feature + * + * Check out HUDI-4178 for more details + */ +case class HoodieSpark35DataSourceV2ToV1Fallback(sparkSession: SparkSession) extends Rule[LogicalPlan] + with ProvidesHoodieConfig { + + override def apply(plan: LogicalPlan): LogicalPlan = plan match { + // The only place we're avoiding fallback is in [[AlterTableCommand]]s since + // current implementation relies on DSv2 features + case _: AlterTableCommand => plan + + // NOTE: Unfortunately, [[InsertIntoStatement]] is implemented in a way that doesn't expose + // target relation as a child (even though there's no good reason for that) + case iis@InsertIntoStatement(rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _), _, _, _, _, _, _) => + iis.copy(table = convertToV1(rv2, v2Table)) + + case _ => + plan.resolveOperatorsDown { + case rv2@DataSourceV2Relation(v2Table: HoodieInternalV2Table, _, _, _, _) => convertToV1(rv2, v2Table) + } + } + + private def convertToV1(rv2: DataSourceV2Relation, v2Table: HoodieInternalV2Table) = { + val output = rv2.output + val catalogTable = v2Table.catalogTable.map(_ => v2Table.v1Table) + val relation = new DefaultSource().createRelation(new SQLContext(sparkSession), + buildHoodieConfig(v2Table.hoodieCatalogTable), v2Table.hoodieCatalogTable.tableSchema) + + LogicalRelation(relation, output, catalogTable, isStreaming = false) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala new file mode 100644 index 000000000000..c2f3accf874b --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala @@ -0,0 +1,3426 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.parser + +import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode} +import org.antlr.v4.runtime.{ParserRuleContext, Token} +import org.apache.hudi.spark.sql.parser.HoodieSqlBaseParser._ +import org.apache.hudi.spark.sql.parser.{HoodieSqlBaseBaseVisitor, HoodieSqlBaseParser} +import org.apache.spark.internal.Logging +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} +import org.apache.spark.sql.catalyst.parser.ParserUtils.{checkDuplicateClauses, checkDuplicateKeys, entry, escapedIdentifier, operationNotAllowed, source, string, stringWithoutUnescape, validate, withOrigin} +import org.apache.spark.sql.catalyst.parser.{EnhancedLogicalPlan, ParseException, ParserInterface} +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils._ +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils, truncatedString} +import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.BucketSpecHelper +import org.apache.spark.sql.connector.catalog.TableCatalog +import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition +import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform, Expression => V2Expression} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.util.Utils.isTesting +import org.apache.spark.util.random.RandomSampler + +import java.util.Locale +import java.util.concurrent.TimeUnit +import javax.xml.bind.DatatypeConverter +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +/** + * The AstBuilder for HoodieSqlParser to parser the AST tree to Logical Plan. + * Here we only do the parser for the extended sql syntax. e.g MergeInto. For + * other sql syntax we use the delegate sql parser which is the SparkSqlParser. + */ +class HoodieSpark3_5ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterface) + extends HoodieSqlBaseBaseVisitor[AnyRef] with Logging { + + protected def typedVisit[T](ctx: ParseTree): T = { + ctx.accept(this).asInstanceOf[T] + } + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + override def visitChildren(node: RuleNode): AnyRef = { + if (node.getChildCount == 1) { + node.getChild(0).accept(this) + } else { + null + } + } + + /** + * Create an aliased table reference. This is typically used in FROM clauses. + */ + override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) { + val tableId = visitMultipartIdentifier(ctx.multipartIdentifier()) + val relation = UnresolvedRelation(tableId) + val table = mayApplyAliasPlan( + ctx.tableAlias, relation.optionalMap(ctx.temporalClause)(withTimeTravel)) + table.optionalMap(ctx.sample)(withSample) + } + + private def withTimeTravel( + ctx: TemporalClauseContext, plan: LogicalPlan): LogicalPlan = withOrigin(ctx) { + val v = ctx.version + val version = if (ctx.INTEGER_VALUE != null) { + Some(v.getText) + } else { + Option(v).map(string) + } + + val timestamp = Option(ctx.timestamp).map(expression) + if (timestamp.exists(_.references.nonEmpty)) { + throw new ParseException( + "timestamp expression cannot refer to any columns", ctx.timestamp) + } + if (timestamp.exists(e => SubqueryExpression.hasSubquery(e))) { + throw new ParseException( + "timestamp expression cannot contain subqueries", ctx.timestamp) + } + + TimeTravelRelation(plan, timestamp, version) + } + + // ============== The following code is fork from org.apache.spark.sql.catalyst.parser.AstBuilder + override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) { + visit(ctx.statement).asInstanceOf[LogicalPlan] + } + + override def visitSingleExpression(ctx: SingleExpressionContext): Expression = withOrigin(ctx) { + visitNamedExpression(ctx.namedExpression) + } + + override def visitSingleTableIdentifier( + ctx: SingleTableIdentifierContext): TableIdentifier = withOrigin(ctx) { + visitTableIdentifier(ctx.tableIdentifier) + } + + override def visitSingleFunctionIdentifier( + ctx: SingleFunctionIdentifierContext): FunctionIdentifier = withOrigin(ctx) { + visitFunctionIdentifier(ctx.functionIdentifier) + } + + override def visitSingleMultipartIdentifier( + ctx: SingleMultipartIdentifierContext): Seq[String] = withOrigin(ctx) { + visitMultipartIdentifier(ctx.multipartIdentifier) + } + + override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) { + typedVisit[DataType](ctx.dataType) + } + + override def visitSingleTableSchema(ctx: SingleTableSchemaContext): StructType = { + val schema = StructType(visitColTypeList(ctx.colTypeList)) + withOrigin(ctx)(schema) + } + + /* ******************************************************************************************** + * Plan parsing + * ******************************************************************************************** */ + protected def plan(tree: ParserRuleContext): LogicalPlan = typedVisit(tree) + + /** + * Create a top-level plan with Common Table Expressions. + */ + override def visitQuery(ctx: QueryContext): LogicalPlan = withOrigin(ctx) { + val query = plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses) + + // Apply CTEs + query.optionalMap(ctx.ctes)(withCTE) + } + + override def visitDmlStatement(ctx: DmlStatementContext): AnyRef = withOrigin(ctx) { + val dmlStmt = plan(ctx.dmlStatementNoWith) + // Apply CTEs + dmlStmt.optionalMap(ctx.ctes)(withCTE) + } + + private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = { + val ctes = ctx.namedQuery.asScala.map { nCtx => + val namedQuery = visitNamedQuery(nCtx) + (namedQuery.alias, namedQuery) + } + // Check for duplicate names. + val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys + if (duplicates.nonEmpty) { + throw new ParseException(s"CTE definition can't have duplicate names: ${duplicates.mkString("'", "', '", "'")}.", ctx) + } + UnresolvedWith(plan, ctes.toSeq) + } + + /** + * Create a logical query plan for a hive-style FROM statement body. + */ + private def withFromStatementBody( + ctx: FromStatementBodyContext, plan: LogicalPlan): LogicalPlan = withOrigin(ctx) { + // two cases for transforms and selects + if (ctx.transformClause != null) { + withTransformQuerySpecification( + ctx, + ctx.transformClause, + ctx.lateralView, + ctx.whereClause, + ctx.aggregationClause, + ctx.havingClause, + ctx.windowClause, + plan + ) + } else { + withSelectQuerySpecification( + ctx, + ctx.selectClause, + ctx.lateralView, + ctx.whereClause, + ctx.aggregationClause, + ctx.havingClause, + ctx.windowClause, + plan + ) + } + } + + override def visitFromStatement(ctx: FromStatementContext): LogicalPlan = withOrigin(ctx) { + val from = visitFromClause(ctx.fromClause) + val selects = ctx.fromStatementBody.asScala.map { body => + withFromStatementBody(body, from). + // Add organization statements. + optionalMap(body.queryOrganization)(withQueryResultClauses) + } + // If there are multiple SELECT just UNION them together into one query. + if (selects.length == 1) { + selects.head + } else { + Union(selects.toSeq) + } + } + + /** + * Create a named logical plan. + * + * This is only used for Common Table Expressions. + */ + override def visitNamedQuery(ctx: NamedQueryContext): SubqueryAlias = withOrigin(ctx) { + val subQuery: LogicalPlan = plan(ctx.query).optionalMap(ctx.columnAliases)( + (columnAliases, plan) => + UnresolvedSubqueryColumnAliases(visitIdentifierList(columnAliases), plan) + ) + SubqueryAlias(ctx.name.getText, subQuery) + } + + /** + * Create a logical plan which allows for multiple inserts using one 'from' statement. These + * queries have the following SQL form: + * {{{ + * [WITH cte...]? + * FROM src + * [INSERT INTO tbl1 SELECT *]+ + * }}} + * For example: + * {{{ + * FROM db.tbl1 A + * INSERT INTO dbo.tbl1 SELECT * WHERE A.value = 10 LIMIT 5 + * INSERT INTO dbo.tbl2 SELECT * WHERE A.value = 12 + * }}} + * This (Hive) feature cannot be combined with set-operators. + */ + override def visitMultiInsertQuery(ctx: MultiInsertQueryContext): LogicalPlan = withOrigin(ctx) { + val from = visitFromClause(ctx.fromClause) + + // Build the insert clauses. + val inserts = ctx.multiInsertQueryBody.asScala.map { body => + withInsertInto(body.insertInto, + withFromStatementBody(body.fromStatementBody, from). + optionalMap(body.fromStatementBody.queryOrganization)(withQueryResultClauses)) + } + + // If there are multiple INSERTS just UNION them together into one query. + if (inserts.length == 1) { + inserts.head + } else { + Union(inserts.toSeq) + } + } + + /** + * Create a logical plan for a regular (single-insert) query. + */ + override def visitSingleInsertQuery( + ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) { + withInsertInto( + ctx.insertInto(), + plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)) + } + + /** + * Parameters used for writing query to a table: + * (UnresolvedRelation, tableColumnList, partitionKeys, ifPartitionNotExists). + */ + type InsertTableParams = (UnresolvedRelation, Seq[String], Map[String, Option[String]], Boolean) + + /** + * Parameters used for writing query to a directory: (isLocal, CatalogStorageFormat, provider). + */ + type InsertDirParams = (Boolean, CatalogStorageFormat, Option[String]) + + /** + * Add an + * {{{ + * INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? [identifierList] + * INSERT INTO [TABLE] tableIdentifier [partitionSpec] [identifierList] + * INSERT OVERWRITE [LOCAL] DIRECTORY STRING [rowFormat] [createFileFormat] + * INSERT OVERWRITE [LOCAL] DIRECTORY [STRING] tableProvider [OPTIONS tablePropertyList] + * }}} + * operation to logical plan + */ + private def withInsertInto( + ctx: InsertIntoContext, + query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + ctx match { + case table: InsertIntoTableContext => + val (relation, cols, partition, ifPartitionNotExists) = visitInsertIntoTable(table) + InsertIntoStatement( + relation, + partition, + cols, + query, + overwrite = false, + ifPartitionNotExists) + case table: InsertOverwriteTableContext => + val (relation, cols, partition, ifPartitionNotExists) = visitInsertOverwriteTable(table) + InsertIntoStatement( + relation, + partition, + cols, + query, + overwrite = true, + ifPartitionNotExists) + case dir: InsertOverwriteDirContext => + val (isLocal, storage, provider) = visitInsertOverwriteDir(dir) + InsertIntoDir(isLocal, storage, provider, query, overwrite = true) + case hiveDir: InsertOverwriteHiveDirContext => + val (isLocal, storage, provider) = visitInsertOverwriteHiveDir(hiveDir) + InsertIntoDir(isLocal, storage, provider, query, overwrite = true) + case _ => + throw new ParseException("Invalid InsertIntoContext", ctx) + } + } + + /** + * Add an INSERT INTO TABLE operation to the logical plan. + */ + override def visitInsertIntoTable( + ctx: InsertIntoTableContext): InsertTableParams = withOrigin(ctx) { + val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil) + val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty) + + if (ctx.EXISTS != null) { + operationNotAllowed("INSERT INTO ... IF NOT EXISTS", ctx) + } + + (createUnresolvedRelation(ctx.multipartIdentifier), cols, partitionKeys, false) + } + + /** + * Add an INSERT OVERWRITE TABLE operation to the logical plan. + */ + override def visitInsertOverwriteTable( + ctx: InsertOverwriteTableContext): InsertTableParams = withOrigin(ctx) { + assert(ctx.OVERWRITE() != null) + val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil) + val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty) + + val dynamicPartitionKeys: Map[String, Option[String]] = partitionKeys.filter(_._2.isEmpty) + if (ctx.EXISTS != null && dynamicPartitionKeys.nonEmpty) { + operationNotAllowed("IF NOT EXISTS with dynamic partitions: " + + dynamicPartitionKeys.keys.mkString(", "), ctx) + } + + (createUnresolvedRelation(ctx.multipartIdentifier), cols, partitionKeys, ctx.EXISTS() != null) + } + + /** + * Write to a directory, returning a [[InsertIntoDir]] logical plan. + */ + override def visitInsertOverwriteDir( + ctx: InsertOverwriteDirContext): InsertDirParams = withOrigin(ctx) { + throw new ParseException("INSERT OVERWRITE DIRECTORY is not supported", ctx) + } + + /** + * Write to a directory, returning a [[InsertIntoDir]] logical plan. + */ + override def visitInsertOverwriteHiveDir( + ctx: InsertOverwriteHiveDirContext): InsertDirParams = withOrigin(ctx) { + throw new ParseException("INSERT OVERWRITE DIRECTORY is not supported", ctx) + } + + private def getTableAliasWithoutColumnAlias( + ctx: TableAliasContext, op: String): Option[String] = { + if (ctx == null) { + None + } else { + val ident = ctx.strictIdentifier() + if (ctx.identifierList() != null) { + throw new ParseException(s"Columns aliases are not allowed in $op.", ctx.identifierList()) + } + if (ident != null) Some(ident.getText) else None + } + } + + override def visitDeleteFromTable( + ctx: DeleteFromTableContext): LogicalPlan = withOrigin(ctx) { + val table = createUnresolvedRelation(ctx.multipartIdentifier()) + val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "DELETE") + val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table) + val predicate = if (ctx.whereClause() != null) { + Some(expression(ctx.whereClause().booleanExpression())) + } else { + None + } + DeleteFromTable(aliasedTable, predicate.get) + } + + override def visitUpdateTable(ctx: UpdateTableContext): LogicalPlan = withOrigin(ctx) { + val table = createUnresolvedRelation(ctx.multipartIdentifier()) + val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "UPDATE") + val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table) + val assignments = withAssignments(ctx.setClause().assignmentList()) + val predicate = if (ctx.whereClause() != null) { + Some(expression(ctx.whereClause().booleanExpression())) + } else { + None + } + + UpdateTable(aliasedTable, assignments, predicate) + } + + private def withAssignments(assignCtx: AssignmentListContext): Seq[Assignment] = + withOrigin(assignCtx) { + assignCtx.assignment().asScala.map { assign => + Assignment(UnresolvedAttribute(visitMultipartIdentifier(assign.key)), + expression(assign.value)) + }.toSeq + } + + override def visitMergeIntoTable(ctx: MergeIntoTableContext): LogicalPlan = withOrigin(ctx) { + val targetTable = createUnresolvedRelation(ctx.target) + val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE") + val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable) + + val sourceTableOrQuery = if (ctx.source != null) { + createUnresolvedRelation(ctx.source) + } else if (ctx.sourceQuery != null) { + visitQuery(ctx.sourceQuery) + } else { + throw new ParseException("Empty source for merge: you should specify a source" + + " table/subquery in merge.", ctx.source) + } + val sourceTableAlias = getTableAliasWithoutColumnAlias(ctx.sourceAlias, "MERGE") + val aliasedSource = + sourceTableAlias.map(SubqueryAlias(_, sourceTableOrQuery)).getOrElse(sourceTableOrQuery) + + val mergeCondition = expression(ctx.mergeCondition) + + val matchedActions = ctx.matchedClause().asScala.map { + clause => { + if (clause.matchedAction().DELETE() != null) { + DeleteAction(Option(clause.matchedCond).map(expression)) + } else if (clause.matchedAction().UPDATE() != null) { + val condition = Option(clause.matchedCond).map(expression) + if (clause.matchedAction().ASTERISK() != null) { + UpdateStarAction(condition) + } else { + UpdateAction(condition, withAssignments(clause.matchedAction().assignmentList())) + } + } else { + // It should not be here. + throw new ParseException(s"Unrecognized matched action: ${clause.matchedAction().getText}", + clause.matchedAction()) + } + } + } + val notMatchedActions = ctx.notMatchedClause().asScala.map { + clause => { + if (clause.notMatchedAction().INSERT() != null) { + val condition = Option(clause.notMatchedCond).map(expression) + if (clause.notMatchedAction().ASTERISK() != null) { + InsertStarAction(condition) + } else { + val columns = clause.notMatchedAction().columns.multipartIdentifier() + .asScala.map(attr => UnresolvedAttribute(visitMultipartIdentifier(attr))) + val values = clause.notMatchedAction().expression().asScala.map(expression) + if (columns.size != values.size) { + throw new ParseException("The number of inserted values cannot match the fields.", + clause.notMatchedAction()) + } + InsertAction(condition, columns.zip(values).map(kv => Assignment(kv._1, kv._2)).toSeq) + } + } else { + // It should not be here. + throw new ParseException(s"Unrecognized not matched action: ${clause.notMatchedAction().getText}", + clause.notMatchedAction()) + } + } + } + if (matchedActions.isEmpty && notMatchedActions.isEmpty) { + throw new ParseException("There must be at least one WHEN clause in a MERGE statement", ctx) + } + // children being empty means that the condition is not set + val matchedActionSize = matchedActions.length + if (matchedActionSize >= 2 && !matchedActions.init.forall(_.condition.nonEmpty)) { + throw new ParseException("When there are more than one MATCHED clauses in a MERGE " + + "statement, only the last MATCHED clause can omit the condition.", ctx) + } + val notMatchedActionSize = notMatchedActions.length + if (notMatchedActionSize >= 2 && !notMatchedActions.init.forall(_.condition.nonEmpty)) { + throw new ParseException("When there are more than one NOT MATCHED clauses in a MERGE " + + "statement, only the last NOT MATCHED clause can omit the condition.", ctx) + } + + MergeIntoTable( + aliasedTarget, + aliasedSource, + mergeCondition, + matchedActions.toSeq, + notMatchedActions.toSeq, + Seq.empty) + } + + /** + * Create a partition specification map. + */ + override def visitPartitionSpec( + ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) { + val legacyNullAsString = + conf.getConf(SQLConf.LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL) + val parts = ctx.partitionVal.asScala.map { pVal => + val name = pVal.identifier.getText + val value = Option(pVal.constant).map(v => visitStringConstant(v, legacyNullAsString)) + name -> value + } + // Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values + // in partition spec like PARTITION(a='1', b='2', a='3'). The real semantical check for + // partition columns will be done in analyzer. + if (conf.caseSensitiveAnalysis) { + checkDuplicateKeys(parts.toSeq, ctx) + } else { + checkDuplicateKeys(parts.map(kv => kv._1.toLowerCase(Locale.ROOT) -> kv._2).toSeq, ctx) + } + parts.toMap + } + + /** + * Create a partition specification map without optional values. + */ + protected def visitNonOptionalPartitionSpec( + ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) { + visitPartitionSpec(ctx).map { + case (key, None) => throw new ParseException(s"Found an empty partition key '$key'.", ctx) + case (key, Some(value)) => key -> value + } + } + + /** + * Convert a constant of any type into a string. This is typically used in DDL commands, and its + * main purpose is to prevent slight differences due to back to back conversions i.e.: + * String -> Literal -> String. + */ + protected def visitStringConstant( + ctx: ConstantContext, + legacyNullAsString: Boolean): String = withOrigin(ctx) { + expression(ctx) match { + case Literal(null, _) if !legacyNullAsString => null + case l@Literal(null, _) => l.toString + case l: Literal => + // TODO For v2 commands, we will cast the string back to its actual value, + // which is a waste and can be improved in the future. + Cast(l, StringType, Some(conf.sessionLocalTimeZone)).eval().toString + case other => + throw new IllegalArgumentException(s"Only literals are allowed in the " + + s"partition spec, but got ${other.sql}") + } + } + + /** + * Add ORDER BY/SORT BY/CLUSTER BY/DISTRIBUTE BY/LIMIT/WINDOWS clauses to the logical plan. These + * clauses determine the shape (ordering/partitioning/rows) of the query result. + */ + private def withQueryResultClauses( + ctx: QueryOrganizationContext, + query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + import ctx._ + + // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause. + val withOrder = if ( + !order.isEmpty && sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) { + // ORDER BY ... + Sort(order.asScala.map(visitSortItem).toSeq, global = true, query) + } else if (order.isEmpty && !sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) { + // SORT BY ... + Sort(sort.asScala.map(visitSortItem).toSeq, global = false, query) + } else if (order.isEmpty && sort.isEmpty && !distributeBy.isEmpty && clusterBy.isEmpty) { + // DISTRIBUTE BY ... + withRepartitionByExpression(ctx, expressionList(distributeBy), query) + } else if (order.isEmpty && !sort.isEmpty && !distributeBy.isEmpty && clusterBy.isEmpty) { + // SORT BY ... DISTRIBUTE BY ... + Sort( + sort.asScala.map(visitSortItem).toSeq, + global = false, + withRepartitionByExpression(ctx, expressionList(distributeBy), query)) + } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && !clusterBy.isEmpty) { + // CLUSTER BY ... + val expressions = expressionList(clusterBy) + Sort( + expressions.map(SortOrder(_, Ascending)), + global = false, + withRepartitionByExpression(ctx, expressions, query)) + } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) { + // [EMPTY] + query + } else { + throw new ParseException( + "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported", ctx) + } + + // WINDOWS + val withWindow = withOrder.optionalMap(windowClause)(withWindowClause) + + // LIMIT + // - LIMIT ALL is the same as omitting the LIMIT clause + withWindow.optional(limit) { + Limit(typedVisit(limit), withWindow) + } + } + + /** + * Create a clause for DISTRIBUTE BY. + */ + protected def withRepartitionByExpression( + ctx: QueryOrganizationContext, + expressions: Seq[Expression], + query: LogicalPlan): LogicalPlan = { + RepartitionByExpression(expressions, query, None) + } + + override def visitTransformQuerySpecification( + ctx: TransformQuerySpecificationContext): LogicalPlan = withOrigin(ctx) { + val from = OneRowRelation().optional(ctx.fromClause) { + visitFromClause(ctx.fromClause) + } + withTransformQuerySpecification( + ctx, + ctx.transformClause, + ctx.lateralView, + ctx.whereClause, + ctx.aggregationClause, + ctx.havingClause, + ctx.windowClause, + from + ) + } + + override def visitRegularQuerySpecification( + ctx: RegularQuerySpecificationContext): LogicalPlan = withOrigin(ctx) { + val from = OneRowRelation().optional(ctx.fromClause) { + visitFromClause(ctx.fromClause) + } + withSelectQuerySpecification( + ctx, + ctx.selectClause, + ctx.lateralView, + ctx.whereClause, + ctx.aggregationClause, + ctx.havingClause, + ctx.windowClause, + from + ) + } + + override def visitNamedExpressionSeq( + ctx: NamedExpressionSeqContext): Seq[Expression] = { + Option(ctx).toSeq + .flatMap(_.namedExpression.asScala) + .map(typedVisit[Expression]) + } + + override def visitExpressionSeq(ctx: ExpressionSeqContext): Seq[Expression] = { + Option(ctx).toSeq + .flatMap(_.expression.asScala) + .map(typedVisit[Expression]) + } + + /** + * Create a logical plan using a having clause. + */ + private def withHavingClause( + ctx: HavingClauseContext, plan: LogicalPlan): LogicalPlan = { + // Note that we add a cast to non-predicate expressions. If the expression itself is + // already boolean, the optimizer will get rid of the unnecessary cast. + val predicate = expression(ctx.booleanExpression) match { + case p: Predicate => p + case e => Cast(e, BooleanType) + } + UnresolvedHaving(predicate, plan) + } + + /** + * Create a logical plan using a where clause. + */ + private def withWhereClause(ctx: WhereClauseContext, plan: LogicalPlan): LogicalPlan = { + Filter(expression(ctx.booleanExpression), plan) + } + + /** + * Add a hive-style transform (SELECT TRANSFORM/MAP/REDUCE) query specification to a logical plan. + */ + private def withTransformQuerySpecification( + ctx: ParserRuleContext, + transformClause: TransformClauseContext, + lateralView: java.util.List[LateralViewContext], + whereClause: WhereClauseContext, + aggregationClause: AggregationClauseContext, + havingClause: HavingClauseContext, + windowClause: WindowClauseContext, + relation: LogicalPlan): LogicalPlan = withOrigin(ctx) { + if (transformClause.setQuantifier != null) { + throw new ParseException("TRANSFORM does not support DISTINCT/ALL in inputs", transformClause.setQuantifier) + } + // Create the attributes. + val (attributes, schemaLess) = if (transformClause.colTypeList != null) { + // Typed return columns. + (DataTypeUtils.toAttributes(createSchema(transformClause.colTypeList)), false) + } else if (transformClause.identifierSeq != null) { + // Untyped return columns. + val attrs = visitIdentifierSeq(transformClause.identifierSeq).map { name => + AttributeReference(name, StringType, nullable = true)() + } + (attrs, false) + } else { + (Seq(AttributeReference("key", StringType)(), + AttributeReference("value", StringType)()), true) + } + + val plan = visitCommonSelectQueryClausePlan( + relation, + visitExpressionSeq(transformClause.expressionSeq), + lateralView, + whereClause, + aggregationClause, + havingClause, + windowClause, + isDistinct = false) + + ScriptTransformation( + string(transformClause.script), + attributes, + plan, + withScriptIOSchema( + ctx, + transformClause.inRowFormat, + transformClause.recordWriter, + transformClause.outRowFormat, + transformClause.recordReader, + schemaLess + ) + ) + } + + /** + * Add a regular (SELECT) query specification to a logical plan. The query specification + * is the core of the logical plan, this is where sourcing (FROM clause), projection (SELECT), + * aggregation (GROUP BY ... HAVING ...) and filtering (WHERE) takes place. + * + * Note that query hints are ignored (both by the parser and the builder). + */ + private def withSelectQuerySpecification( + ctx: ParserRuleContext, + selectClause: SelectClauseContext, + lateralView: java.util.List[LateralViewContext], + whereClause: WhereClauseContext, + aggregationClause: AggregationClauseContext, + havingClause: HavingClauseContext, + windowClause: WindowClauseContext, + relation: LogicalPlan): LogicalPlan = withOrigin(ctx) { + val isDistinct = selectClause.setQuantifier() != null && + selectClause.setQuantifier().DISTINCT() != null + + val plan = visitCommonSelectQueryClausePlan( + relation, + visitNamedExpressionSeq(selectClause.namedExpressionSeq), + lateralView, + whereClause, + aggregationClause, + havingClause, + windowClause, + isDistinct) + + // Hint + selectClause.hints.asScala.foldRight(plan)(withHints) + } + + def visitCommonSelectQueryClausePlan( + relation: LogicalPlan, + expressions: Seq[Expression], + lateralView: java.util.List[LateralViewContext], + whereClause: WhereClauseContext, + aggregationClause: AggregationClauseContext, + havingClause: HavingClauseContext, + windowClause: WindowClauseContext, + isDistinct: Boolean): LogicalPlan = { + // Add lateral views. + val withLateralView = lateralView.asScala.foldLeft(relation)(withGenerate) + + // Add where. + val withFilter = withLateralView.optionalMap(whereClause)(withWhereClause) + + // Add aggregation or a project. + val namedExpressions = expressions.map { + case e: NamedExpression => e + case e: Expression => UnresolvedAlias(e) + } + + def createProject() = if (namedExpressions.nonEmpty) { + Project(namedExpressions, withFilter) + } else { + withFilter + } + + val withProject = if (aggregationClause == null && havingClause != null) { + if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) { + // If the legacy conf is set, treat HAVING without GROUP BY as WHERE. + val predicate = expression(havingClause.booleanExpression) match { + case p: Predicate => p + case e => Cast(e, BooleanType) + } + Filter(predicate, createProject()) + } else { + // According to SQL standard, HAVING without GROUP BY means global aggregate. + withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter)) + } + } else if (aggregationClause != null) { + val aggregate = withAggregationClause(aggregationClause, namedExpressions, withFilter) + aggregate.optionalMap(havingClause)(withHavingClause) + } else { + // When hitting this branch, `having` must be null. + createProject() + } + + // Distinct + val withDistinct = if (isDistinct) { + Distinct(withProject) + } else { + withProject + } + + // Window + val withWindow = withDistinct.optionalMap(windowClause)(withWindowClause) + + withWindow + } + + // Script Transform's input/output format. + type ScriptIOFormat = + (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String]) + + protected def getRowFormatDelimited(ctx: RowFormatDelimitedContext): ScriptIOFormat = { + // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema + // expects a seq of pairs in which the old parsers' token names are used as keys. + // Transforming the result of visitRowFormatDelimited would be quite a bit messier than + // retrieving the key value pairs ourselves. + val entries = entry("TOK_TABLEROWFORMATFIELD", ctx.fieldsTerminatedBy) ++ + entry("TOK_TABLEROWFORMATCOLLITEMS", ctx.collectionItemsTerminatedBy) ++ + entry("TOK_TABLEROWFORMATMAPKEYS", ctx.keysTerminatedBy) ++ + entry("TOK_TABLEROWFORMATNULL", ctx.nullDefinedAs) ++ + Option(ctx.linesSeparatedBy).toSeq.map { token => + val value = string(token) + validate( + value == "\n", + s"LINES TERMINATED BY only supports newline '\\n' right now: $value", + ctx) + "TOK_TABLEROWFORMATLINES" -> value + } + + (entries, None, Seq.empty, None) + } + + /** + * Create a [[ScriptInputOutputSchema]]. + */ + protected def withScriptIOSchema( + ctx: ParserRuleContext, + inRowFormat: RowFormatContext, + recordWriter: Token, + outRowFormat: RowFormatContext, + recordReader: Token, + schemaLess: Boolean): ScriptInputOutputSchema = { + + def format(fmt: RowFormatContext): ScriptIOFormat = fmt match { + case c: RowFormatDelimitedContext => + getRowFormatDelimited(c) + + case c: RowFormatSerdeContext => + throw new ParseException("TRANSFORM with serde is only supported in hive mode", ctx) + + // SPARK-32106: When there is no definition about format, we return empty result + // to use a built-in default Serde in SparkScriptTransformationExec. + case null => + (Nil, None, Seq.empty, None) + } + + val (inFormat, inSerdeClass, inSerdeProps, reader) = format(inRowFormat) + + val (outFormat, outSerdeClass, outSerdeProps, writer) = format(outRowFormat) + + ScriptInputOutputSchema( + inFormat, outFormat, + inSerdeClass, outSerdeClass, + inSerdeProps, outSerdeProps, + reader, writer, + schemaLess) + } + + /** + * Create a logical plan for a given 'FROM' clause. Note that we support multiple (comma + * separated) relations here, these get converted into a single plan by condition-less inner join. + */ + override def visitFromClause(ctx: FromClauseContext): LogicalPlan = withOrigin(ctx) { + val from = ctx.relation.asScala.foldLeft(null: LogicalPlan) { (left, relation) => + val right = plan(relation.relationPrimary) + val join = right.optionalMap(left) { (left, right) => + if (relation.LATERAL != null) { + if (!relation.relationPrimary.isInstanceOf[AliasedQueryContext]) { + throw new ParseException(s"LATERAL can only be used with subquery", relation.relationPrimary) + } + LateralJoin(left, LateralSubquery(right), Inner, None) + } else { + Join(left, right, Inner, None, JoinHint.NONE) + } + } + withJoinRelations(join, relation) + } + if (ctx.pivotClause() != null) { + if (!ctx.lateralView.isEmpty) { + throw new ParseException("LATERAL cannot be used together with PIVOT in FROM clause", ctx) + } + withPivot(ctx.pivotClause, from) + } else { + ctx.lateralView.asScala.foldLeft(from)(withGenerate) + } + } + + /** + * Connect two queries by a Set operator. + * + * Supported Set operators are: + * - UNION [ DISTINCT | ALL ] + * - EXCEPT [ DISTINCT | ALL ] + * - MINUS [ DISTINCT | ALL ] + * - INTERSECT [DISTINCT | ALL] + */ + override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = withOrigin(ctx) { + val left = plan(ctx.left) + val right = plan(ctx.right) + val all = Option(ctx.setQuantifier()).exists(_.ALL != null) + ctx.operator.getType match { + case HoodieSqlBaseParser.UNION if all => + Union(left, right) + case HoodieSqlBaseParser.UNION => + Distinct(Union(left, right)) + case HoodieSqlBaseParser.INTERSECT if all => + Intersect(left, right, isAll = true) + case HoodieSqlBaseParser.INTERSECT => + Intersect(left, right, isAll = false) + case HoodieSqlBaseParser.EXCEPT if all => + Except(left, right, isAll = true) + case HoodieSqlBaseParser.EXCEPT => + Except(left, right, isAll = false) + case HoodieSqlBaseParser.SETMINUS if all => + Except(left, right, isAll = true) + case HoodieSqlBaseParser.SETMINUS => + Except(left, right, isAll = false) + } + } + + /** + * Add a [[WithWindowDefinition]] operator to a logical plan. + */ + private def withWindowClause( + ctx: WindowClauseContext, + query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + // Collect all window specifications defined in the WINDOW clause. + val baseWindowTuples = ctx.namedWindow.asScala.map { + wCtx => + (wCtx.name.getText, typedVisit[WindowSpec](wCtx.windowSpec)) + } + baseWindowTuples.groupBy(_._1).foreach { kv => + if (kv._2.size > 1) { + throw new ParseException(s"The definition of window '${kv._1}' is repetitive", ctx) + } + } + val baseWindowMap = baseWindowTuples.toMap + + // Handle cases like + // window w1 as (partition by p_mfgr order by p_name + // range between 2 preceding and 2 following), + // w2 as w1 + val windowMapView = baseWindowMap.mapValues { + case WindowSpecReference(name) => + baseWindowMap.get(name) match { + case Some(spec: WindowSpecDefinition) => + spec + case Some(ref) => + throw new ParseException(s"Window reference '$name' is not a window specification", ctx) + case None => + throw new ParseException(s"Cannot resolve window reference '$name'", ctx) + } + case spec: WindowSpecDefinition => spec + } + + // Note that mapValues creates a view instead of materialized map. We force materialization by + // mapping over identity. + WithWindowDefinition(windowMapView.map(identity).toMap, query) + } + + /** + * Add an [[Aggregate]] to a logical plan. + */ + private def withAggregationClause( + ctx: AggregationClauseContext, + selectExpressions: Seq[NamedExpression], + query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + if (ctx.groupingExpressionsWithGroupingAnalytics.isEmpty) { + val groupByExpressions = expressionList(ctx.groupingExpressions) + if (ctx.GROUPING != null) { + // GROUP BY ... GROUPING SETS (...) + // `groupByExpressions` can be non-empty for Hive compatibility. It may add extra grouping + // expressions that do not exist in GROUPING SETS (...), and the value is always null. + // For example, `SELECT a, b, c FROM ... GROUP BY a, b, c GROUPING SETS (a, b)`, the output + // of column `c` is always null. + val groupingSets = + ctx.groupingSet.asScala.map(_.expression.asScala.map(e => expression(e)).toSeq) + Aggregate(Seq(GroupingSets(groupingSets.toSeq, groupByExpressions)), + selectExpressions, query) + } else { + // GROUP BY .... (WITH CUBE | WITH ROLLUP)? + val mappedGroupByExpressions = if (ctx.CUBE != null) { + Seq(Cube(groupByExpressions.map(Seq(_)))) + } else if (ctx.ROLLUP != null) { + Seq(Rollup(groupByExpressions.map(Seq(_)))) + } else { + groupByExpressions + } + Aggregate(mappedGroupByExpressions, selectExpressions, query) + } + } else { + val groupByExpressions = + ctx.groupingExpressionsWithGroupingAnalytics.asScala + .map(groupByExpr => { + val groupingAnalytics = groupByExpr.groupingAnalytics + if (groupingAnalytics != null) { + visitGroupingAnalytics(groupingAnalytics) + } else { + expression(groupByExpr.expression) + } + }) + Aggregate(groupByExpressions.toSeq, selectExpressions, query) + } + } + + override def visitGroupingAnalytics( + groupingAnalytics: GroupingAnalyticsContext): BaseGroupingSets = { + val groupingSets = groupingAnalytics.groupingSet.asScala + .map(_.expression.asScala.map(e => expression(e)).toSeq) + if (groupingAnalytics.CUBE != null) { + // CUBE(A, B, (A, B), ()) is not supported. + if (groupingSets.exists(_.isEmpty)) { + throw new ParseException(s"Empty set in CUBE grouping sets is not supported.", groupingAnalytics) + } + Cube(groupingSets.toSeq) + } else if (groupingAnalytics.ROLLUP != null) { + // ROLLUP(A, B, (A, B), ()) is not supported. + if (groupingSets.exists(_.isEmpty)) { + throw new ParseException(s"Empty set in ROLLUP grouping sets is not supported.", groupingAnalytics) + } + Rollup(groupingSets.toSeq) + } else { + assert(groupingAnalytics.GROUPING != null && groupingAnalytics.SETS != null) + val groupingSets = groupingAnalytics.groupingElement.asScala.flatMap { expr => + val groupingAnalytics = expr.groupingAnalytics() + if (groupingAnalytics != null) { + visitGroupingAnalytics(groupingAnalytics).selectedGroupByExprs + } else { + Seq(expr.groupingSet().expression().asScala.map(e => expression(e)).toSeq) + } + } + GroupingSets(groupingSets.toSeq) + } + } + + /** + * Add [[UnresolvedHint]]s to a logical plan. + */ + private def withHints( + ctx: HintContext, + query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + var plan = query + ctx.hintStatements.asScala.reverse.foreach { stmt => + plan = UnresolvedHint(stmt.hintName.getText, + stmt.parameters.asScala.map(expression).toSeq, plan) + } + plan + } + + /** + * Add a [[Pivot]] to a logical plan. + */ + private def withPivot( + ctx: PivotClauseContext, + query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + val aggregates = Option(ctx.aggregates).toSeq + .flatMap(_.namedExpression.asScala) + .map(typedVisit[Expression]) + val pivotColumn = if (ctx.pivotColumn.identifiers.size == 1) { + UnresolvedAttribute.quoted(ctx.pivotColumn.identifier.getText) + } else { + CreateStruct( + ctx.pivotColumn.identifiers.asScala.map( + identifier => UnresolvedAttribute.quoted(identifier.getText)).toSeq) + } + val pivotValues = ctx.pivotValues.asScala.map(visitPivotValue) + Pivot(None, pivotColumn, pivotValues.toSeq, aggregates, query) + } + + /** + * Create a Pivot column value with or without an alias. + */ + override def visitPivotValue(ctx: PivotValueContext): Expression = withOrigin(ctx) { + val e = expression(ctx.expression) + if (ctx.identifier != null) { + Alias(e, ctx.identifier.getText)() + } else { + e + } + } + + /** + * Add a [[Generate]] (Lateral View) to a logical plan. + */ + private def withGenerate( + query: LogicalPlan, + ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) { + val expressions = expressionList(ctx.expression) + Generate( + UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions), + unrequiredChildIndex = Nil, + outer = ctx.OUTER != null, + // scalastyle:off caselocale + Some(ctx.tblName.getText.toLowerCase), + // scalastyle:on caselocale + ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.quoted).toSeq, + query) + } + + /** + * Create a single relation referenced in a FROM clause. This method is used when a part of the + * join condition is nested, for example: + * {{{ + * select * from t1 join (t2 cross join t3) on col1 = col2 + * }}} + */ + override def visitRelation(ctx: RelationContext): LogicalPlan = withOrigin(ctx) { + withJoinRelations(plan(ctx.relationPrimary), ctx) + } + + /** + * Join one more [[LogicalPlan]]s to the current logical plan. + */ + private def withJoinRelations(base: LogicalPlan, ctx: RelationContext): LogicalPlan = { + ctx.joinRelation.asScala.foldLeft(base) { (left, join) => + withOrigin(join) { + val baseJoinType = join.joinType match { + case null => Inner + case jt if jt.CROSS != null => Cross + case jt if jt.FULL != null => FullOuter + case jt if jt.SEMI != null => LeftSemi + case jt if jt.ANTI != null => LeftAnti + case jt if jt.LEFT != null => LeftOuter + case jt if jt.RIGHT != null => RightOuter + case _ => Inner + } + + if (join.LATERAL != null && !join.right.isInstanceOf[AliasedQueryContext]) { + throw new ParseException(s"LATERAL can only be used with subquery", join.right) + } + + // Resolve the join type and join condition + val (joinType, condition) = Option(join.joinCriteria) match { + case Some(c) if c.USING != null => + if (join.LATERAL != null) { + throw new ParseException("LATERAL join with USING join is not supported", ctx) + } + (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None) + case Some(c) if c.booleanExpression != null => + (baseJoinType, Option(expression(c.booleanExpression))) + case Some(c) => + throw new ParseException(s"Unimplemented joinCriteria: $c", ctx) + case None if join.NATURAL != null => + if (join.LATERAL != null) { + throw new ParseException("LATERAL join with NATURAL join is not supported", ctx) + } + if (baseJoinType == Cross) { + throw new ParseException("NATURAL CROSS JOIN is not supported", ctx) + } + (NaturalJoin(baseJoinType), None) + case None => + (baseJoinType, None) + } + if (join.LATERAL != null) { + if (!Seq(Inner, Cross, LeftOuter).contains(joinType)) { + throw new ParseException(s"Unsupported LATERAL join type ${joinType.toString}", ctx) + } + LateralJoin(left, LateralSubquery(plan(join.right)), joinType, condition) + } else { + Join(left, plan(join.right), joinType, condition, JoinHint.NONE) + } + } + } + } + + /** + * Add a [[Sample]] to a logical plan. + * + * This currently supports the following sampling methods: + * - TABLESAMPLE(x ROWS): Sample the table down to the given number of rows. + * - TABLESAMPLE(x PERCENT): Sample the table down to the given percentage. Note that percentages + * are defined as a number between 0 and 100. + * - TABLESAMPLE(BUCKET x OUT OF y): Sample the table down to a 'x' divided by 'y' fraction. + */ + private def withSample(ctx: SampleContext, query: LogicalPlan): LogicalPlan = withOrigin(ctx) { + // Create a sampled plan if we need one. + def sample(fraction: Double): Sample = { + // The range of fraction accepted by Sample is [0, 1]. Because Hive's block sampling + // function takes X PERCENT as the input and the range of X is [0, 100], we need to + // adjust the fraction. + val eps = RandomSampler.roundingEpsilon + validate(fraction >= 0.0 - eps && fraction <= 1.0 + eps, + s"Sampling fraction ($fraction) must be on interval [0, 1]", + ctx) + Sample(0.0, fraction, withReplacement = false, (math.random * 1000).toInt, query) + } + + if (ctx.sampleMethod() == null) { + throw new ParseException("TABLESAMPLE does not accept empty inputs.", ctx) + } + + ctx.sampleMethod() match { + case ctx: SampleByRowsContext => + Limit(expression(ctx.expression), query) + + case ctx: SampleByPercentileContext => + val fraction = ctx.percentage.getText.toDouble + val sign = if (ctx.negativeSign == null) 1 else -1 + sample(sign * fraction / 100.0d) + + case ctx: SampleByBytesContext => + val bytesStr = ctx.bytes.getText + if (bytesStr.matches("[0-9]+[bBkKmMgG]")) { + throw new ParseException(s"TABLESAMPLE(byteLengthLiteral) is not supported", ctx) + } else { + throw new ParseException(s"$bytesStr is not a valid byte length literal, " + + "expected syntax: DIGIT+ ('B' | 'K' | 'M' | 'G')", ctx) + } + + case ctx: SampleByBucketContext if ctx.ON() != null => + if (ctx.identifier != null) { + throw new ParseException(s"TABLESAMPLE(BUCKET x OUT OF y ON colname) is not supported", ctx) + } else { + throw new ParseException(s"TABLESAMPLE(BUCKET x OUT OF y ON function) is not supported", ctx) + } + + case ctx: SampleByBucketContext => + sample(ctx.numerator.getText.toDouble / ctx.denominator.getText.toDouble) + } + } + + /** + * Create a logical plan for a sub-query. + */ + override def visitSubquery(ctx: SubqueryContext): LogicalPlan = withOrigin(ctx) { + plan(ctx.query) + } + + /** + * Create an un-aliased table reference. This is typically used for top-level table references, + * for example: + * {{{ + * INSERT INTO db.tbl2 + * TABLE db.tbl1 + * }}} + */ + override def visitTable(ctx: TableContext): LogicalPlan = withOrigin(ctx) { + UnresolvedRelation(visitMultipartIdentifier(ctx.multipartIdentifier)) + } + + /** + * Create a table-valued function call with arguments, e.g. range(1000) + */ + override def visitTableValuedFunction(ctx: TableValuedFunctionContext) + : LogicalPlan = withOrigin(ctx) { + val func = ctx.functionTable + val aliases = if (func.tableAlias.identifierList != null) { + visitIdentifierList(func.tableAlias.identifierList) + } else { + Seq.empty + } + val name = getFunctionIdentifier(func.functionName) + if (name.database.nonEmpty) { + operationNotAllowed(s"table valued function cannot specify database name: $name", ctx) + } + + val tvf = UnresolvedTableValuedFunction(name, func.expression.asScala.map(expression).toSeq) + + val tvfAliases = if (aliases.nonEmpty) UnresolvedTVFAliases(name, tvf, aliases) else tvf + + tvfAliases.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan) + } + + /** + * Create an inline table (a virtual table in Hive parlance). + */ + override def visitInlineTable(ctx: InlineTableContext): LogicalPlan = withOrigin(ctx) { + // Get the backing expressions. + val rows = ctx.expression.asScala.map { e => + expression(e) match { + // inline table comes in two styles: + // style 1: values (1), (2), (3) -- multiple columns are supported + // style 2: values 1, 2, 3 -- only a single column is supported here + case struct: CreateNamedStruct => struct.valExprs // style 1 + case child => Seq(child) // style 2 + } + } + + val aliases = if (ctx.tableAlias.identifierList != null) { + visitIdentifierList(ctx.tableAlias.identifierList) + } else { + Seq.tabulate(rows.head.size)(i => s"col${i + 1}") + } + + val table = UnresolvedInlineTable(aliases, rows.toSeq) + table.optionalMap(ctx.tableAlias.strictIdentifier)(aliasPlan) + } + + /** + * Create an alias (SubqueryAlias) for a join relation. This is practically the same as + * visitAliasedQuery and visitNamedExpression, ANTLR4 however requires us to use 3 different + * hooks. We could add alias names for output columns, for example: + * {{{ + * SELECT a, b, c, d FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d) + * }}} + */ + override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan = withOrigin(ctx) { + val relation = plan(ctx.relation).optionalMap(ctx.sample)(withSample) + mayApplyAliasPlan(ctx.tableAlias, relation) + } + + /** + * Create an alias (SubqueryAlias) for a sub-query. This is practically the same as + * visitAliasedRelation and visitNamedExpression, ANTLR4 however requires us to use 3 different + * hooks. We could add alias names for output columns, for example: + * {{{ + * SELECT col1, col2 FROM testData AS t(col1, col2) + * }}} + */ + override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) { + val relation = plan(ctx.query).optionalMap(ctx.sample)(withSample) + if (ctx.tableAlias.strictIdentifier == null) { + // For un-aliased subqueries, use a default alias name that is not likely to conflict with + // normal subquery names, so that parent operators can only access the columns in subquery by + // unqualified names. Users can still use this special qualifier to access columns if they + // know it, but that's not recommended. + SubqueryAlias("__auto_generated_subquery_name", relation) + } else { + mayApplyAliasPlan(ctx.tableAlias, relation) + } + } + + /** + * Create an alias ([[SubqueryAlias]]) for a [[LogicalPlan]]. + */ + private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = { + SubqueryAlias(alias.getText, plan) + } + + /** + * If aliases specified in a FROM clause, create a subquery alias ([[SubqueryAlias]]) and + * column aliases for a [[LogicalPlan]]. + */ + private def mayApplyAliasPlan(tableAlias: TableAliasContext, plan: LogicalPlan): LogicalPlan = { + if (tableAlias.strictIdentifier != null) { + val alias = tableAlias.strictIdentifier.getText + if (tableAlias.identifierList != null) { + val columnNames = visitIdentifierList(tableAlias.identifierList) + SubqueryAlias(alias, UnresolvedSubqueryColumnAliases(columnNames, plan)) + } else { + SubqueryAlias(alias, plan) + } + } else { + plan + } + } + + /** + * Create a Sequence of Strings for a parenthesis enclosed alias list. + */ + override def visitIdentifierList(ctx: IdentifierListContext): Seq[String] = withOrigin(ctx) { + visitIdentifierSeq(ctx.identifierSeq) + } + + /** + * Create a Sequence of Strings for an identifier list. + */ + override def visitIdentifierSeq(ctx: IdentifierSeqContext): Seq[String] = withOrigin(ctx) { + ctx.ident.asScala.map(_.getText).toSeq + } + + /* ******************************************************************************************** + * Table Identifier parsing + * ******************************************************************************************** */ + + /** + * Create a [[TableIdentifier]] from a 'tableName' or 'databaseName'.'tableName' pattern. + */ + override def visitTableIdentifier( + ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) { + TableIdentifier(ctx.table.getText, Option(ctx.db).map(_.getText)) + } + + /** + * Create a [[FunctionIdentifier]] from a 'functionName' or 'databaseName'.'functionName' pattern. + */ + override def visitFunctionIdentifier( + ctx: FunctionIdentifierContext): FunctionIdentifier = withOrigin(ctx) { + FunctionIdentifier(ctx.function.getText, Option(ctx.db).map(_.getText)) + } + + /** + * Create a multi-part identifier. + */ + override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = + withOrigin(ctx) { + ctx.parts.asScala.map(_.getText).toSeq + } + + /* ******************************************************************************************** + * Expression parsing + * ******************************************************************************************** */ + + /** + * Create an expression from the given context. This method just passes the context on to the + * visitor and only takes care of typing (We assume that the visitor returns an Expression here). + */ + protected def expression(ctx: ParserRuleContext): Expression = typedVisit(ctx) + + /** + * Create sequence of expressions from the given sequence of contexts. + */ + private def expressionList(trees: java.util.List[ExpressionContext]): Seq[Expression] = { + trees.asScala.map(expression).toSeq + } + + /** + * Create a star (i.e. all) expression; this selects all elements (in the specified object). + * Both un-targeted (global) and targeted aliases are supported. + */ + override def visitStar(ctx: StarContext): Expression = withOrigin(ctx) { + UnresolvedStar(Option(ctx.qualifiedName()).map(_.identifier.asScala.map(_.getText).toSeq)) + } + + /** + * Create an aliased expression if an alias is specified. Both single and multi-aliases are + * supported. + */ + override def visitNamedExpression(ctx: NamedExpressionContext): Expression = withOrigin(ctx) { + val e = expression(ctx.expression) + if (ctx.name != null) { + Alias(e, ctx.name.getText)() + } else if (ctx.identifierList != null) { + MultiAlias(e, visitIdentifierList(ctx.identifierList)) + } else { + e + } + } + + /** + * Combine a number of boolean expressions into a balanced expression tree. These expressions are + * either combined by a logical [[And]] or a logical [[Or]]. + * + * A balanced binary tree is created because regular left recursive trees cause considerable + * performance degradations and can cause stack overflows. + */ + override def visitLogicalBinary(ctx: LogicalBinaryContext): Expression = withOrigin(ctx) { + val expressionType = ctx.operator.getType + val expressionCombiner = expressionType match { + case HoodieSqlBaseParser.AND => And.apply _ + case HoodieSqlBaseParser.OR => Or.apply _ + } + + // Collect all similar left hand contexts. + val contexts = ArrayBuffer(ctx.right) + var current = ctx.left + + def collectContexts: Boolean = current match { + case lbc: LogicalBinaryContext if lbc.operator.getType == expressionType => + contexts += lbc.right + current = lbc.left + true + case _ => + contexts += current + false + } + + while (collectContexts) { + // No body - all updates take place in the collectContexts. + } + + // Reverse the contexts to have them in the same sequence as in the SQL statement & turn them + // into expressions. + val expressions = contexts.reverseMap(expression) + + // Create a balanced tree. + def reduceToExpressionTree(low: Int, high: Int): Expression = high - low match { + case 0 => + expressions(low) + case 1 => + expressionCombiner(expressions(low), expressions(high)) + case x => + val mid = low + x / 2 + expressionCombiner( + reduceToExpressionTree(low, mid), + reduceToExpressionTree(mid + 1, high)) + } + + reduceToExpressionTree(0, expressions.size - 1) + } + + /** + * Invert a boolean expression. + */ + override def visitLogicalNot(ctx: LogicalNotContext): Expression = withOrigin(ctx) { + Not(expression(ctx.booleanExpression())) + } + + /** + * Create a filtering correlated sub-query (EXISTS). + */ + override def visitExists(ctx: ExistsContext): Expression = { + Exists(plan(ctx.query)) + } + + /** + * Create a comparison expression. This compares two expressions. The following comparison + * operators are supported: + * - Equal: '=' or '==' + * - Null-safe Equal: '<=>' + * - Not Equal: '<>' or '!=' + * - Less than: '<' + * - Less then or Equal: '<=' + * - Greater than: '>' + * - Greater then or Equal: '>=' + */ + override def visitComparison(ctx: ComparisonContext): Expression = withOrigin(ctx) { + val left = expression(ctx.left) + val right = expression(ctx.right) + val operator = ctx.comparisonOperator().getChild(0).asInstanceOf[TerminalNode] + operator.getSymbol.getType match { + case HoodieSqlBaseParser.EQ => + EqualTo(left, right) + case HoodieSqlBaseParser.NSEQ => + EqualNullSafe(left, right) + case HoodieSqlBaseParser.NEQ | HoodieSqlBaseParser.NEQJ => + Not(EqualTo(left, right)) + case HoodieSqlBaseParser.LT => + LessThan(left, right) + case HoodieSqlBaseParser.LTE => + LessThanOrEqual(left, right) + case HoodieSqlBaseParser.GT => + GreaterThan(left, right) + case HoodieSqlBaseParser.GTE => + GreaterThanOrEqual(left, right) + } + } + + /** + * Create a predicated expression. A predicated expression is a normal expression with a + * predicate attached to it, for example: + * {{{ + * a + 1 IS NULL + * }}} + */ + override def visitPredicated(ctx: PredicatedContext): Expression = withOrigin(ctx) { + val e = expression(ctx.valueExpression) + if (ctx.predicate != null) { + withPredicate(e, ctx.predicate) + } else { + e + } + } + + /** + * Add a predicate to the given expression. Supported expressions are: + * - (NOT) BETWEEN + * - (NOT) IN + * - (NOT) LIKE (ANY | SOME | ALL) + * - (NOT) RLIKE + * - IS (NOT) NULL. + * - IS (NOT) (TRUE | FALSE | UNKNOWN) + * - IS (NOT) DISTINCT FROM + */ + private def withPredicate(e: Expression, ctx: PredicateContext): Expression = withOrigin(ctx) { + // Invert a predicate if it has a valid NOT clause. + def invertIfNotDefined(e: Expression): Expression = ctx.NOT match { + case null => e + case not => Not(e) + } + + def getValueExpressions(e: Expression): Seq[Expression] = e match { + case c: CreateNamedStruct => c.valExprs + case other => Seq(other) + } + + // Create the predicate. + ctx.kind.getType match { + case HoodieSqlBaseParser.BETWEEN => + // BETWEEN is translated to lower <= e && e <= upper + invertIfNotDefined(And( + GreaterThanOrEqual(e, expression(ctx.lower)), + LessThanOrEqual(e, expression(ctx.upper)))) + case HoodieSqlBaseParser.IN if ctx.query != null => + invertIfNotDefined(InSubquery(getValueExpressions(e), ListQuery(plan(ctx.query)))) + case HoodieSqlBaseParser.IN => + invertIfNotDefined(In(e, ctx.expression.asScala.map(expression).toSeq)) + case HoodieSqlBaseParser.LIKE => + Option(ctx.quantifier).map(_.getType) match { + case Some(HoodieSqlBaseParser.ANY) | Some(HoodieSqlBaseParser.SOME) => + validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx) + val expressions = expressionList(ctx.expression) + if (expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) { + // If there are many pattern expressions, will throw StackOverflowError. + // So we use LikeAny or NotLikeAny instead. + val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String]) + ctx.NOT match { + case null => LikeAny(e, patterns) + case _ => NotLikeAny(e, patterns) + } + } else { + ctx.expression.asScala.map(expression) + .map(p => invertIfNotDefined(new Like(e, p))).toSeq.reduceLeft(Or) + } + case Some(HoodieSqlBaseParser.ALL) => + validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx) + val expressions = expressionList(ctx.expression) + if (expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) { + // If there are many pattern expressions, will throw StackOverflowError. + // So we use LikeAll or NotLikeAll instead. + val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String]) + ctx.NOT match { + case null => LikeAll(e, patterns) + case _ => NotLikeAll(e, patterns) + } + } else { + ctx.expression.asScala.map(expression) + .map(p => invertIfNotDefined(new Like(e, p))).toSeq.reduceLeft(And) + } + case _ => + val escapeChar = Option(ctx.escapeChar).map(string).map { str => + if (str.length != 1) { + throw new ParseException("Invalid escape string. Escape string must contain only one character.", ctx) + } + str.charAt(0) + }.getOrElse('\\') + invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar)) + } + case HoodieSqlBaseParser.RLIKE => + invertIfNotDefined(RLike(e, expression(ctx.pattern))) + case HoodieSqlBaseParser.NULL if ctx.NOT != null => + IsNotNull(e) + case HoodieSqlBaseParser.NULL => + IsNull(e) + case HoodieSqlBaseParser.TRUE => ctx.NOT match { + case null => EqualNullSafe(e, Literal(true)) + case _ => Not(EqualNullSafe(e, Literal(true))) + } + case HoodieSqlBaseParser.FALSE => ctx.NOT match { + case null => EqualNullSafe(e, Literal(false)) + case _ => Not(EqualNullSafe(e, Literal(false))) + } + case HoodieSqlBaseParser.UNKNOWN => ctx.NOT match { + case null => IsUnknown(e) + case _ => IsNotUnknown(e) + } + case HoodieSqlBaseParser.DISTINCT if ctx.NOT != null => + EqualNullSafe(e, expression(ctx.right)) + case HoodieSqlBaseParser.DISTINCT => + Not(EqualNullSafe(e, expression(ctx.right))) + } + } + + /** + * Create a binary arithmetic expression. The following arithmetic operators are supported: + * - Multiplication: '*' + * - Division: '/' + * - Hive Long Division: 'DIV' + * - Modulo: '%' + * - Addition: '+' + * - Subtraction: '-' + * - Binary AND: '&' + * - Binary XOR + * - Binary OR: '|' + */ + override def visitArithmeticBinary(ctx: ArithmeticBinaryContext): Expression = withOrigin(ctx) { + val left = expression(ctx.left) + val right = expression(ctx.right) + ctx.operator.getType match { + case HoodieSqlBaseParser.ASTERISK => + Multiply(left, right) + case HoodieSqlBaseParser.SLASH => + Divide(left, right) + case HoodieSqlBaseParser.PERCENT => + Remainder(left, right) + case HoodieSqlBaseParser.DIV => + IntegralDivide(left, right) + case HoodieSqlBaseParser.PLUS => + Add(left, right) + case HoodieSqlBaseParser.MINUS => + Subtract(left, right) + case HoodieSqlBaseParser.CONCAT_PIPE => + Concat(left :: right :: Nil) + case HoodieSqlBaseParser.AMPERSAND => + BitwiseAnd(left, right) + case HoodieSqlBaseParser.HAT => + BitwiseXor(left, right) + case HoodieSqlBaseParser.PIPE => + BitwiseOr(left, right) + } + } + + /** + * Create a unary arithmetic expression. The following arithmetic operators are supported: + * - Plus: '+' + * - Minus: '-' + * - Bitwise Not: '~' + */ + override def visitArithmeticUnary(ctx: ArithmeticUnaryContext): Expression = withOrigin(ctx) { + val value = expression(ctx.valueExpression) + ctx.operator.getType match { + case HoodieSqlBaseParser.PLUS => + UnaryPositive(value) + case HoodieSqlBaseParser.MINUS => + UnaryMinus(value) + case HoodieSqlBaseParser.TILDE => + BitwiseNot(value) + } + } + + override def visitCurrentLike(ctx: CurrentLikeContext): Expression = withOrigin(ctx) { + if (conf.ansiEnabled) { + ctx.name.getType match { + case HoodieSqlBaseParser.CURRENT_DATE => + CurrentDate() + case HoodieSqlBaseParser.CURRENT_TIMESTAMP => + CurrentTimestamp() + case HoodieSqlBaseParser.CURRENT_USER => + CurrentUser() + } + } else { + // If the parser is not in ansi mode, we should return `UnresolvedAttribute`, in case there + // are columns named `CURRENT_DATE` or `CURRENT_TIMESTAMP`. + UnresolvedAttribute.quoted(ctx.name.getText) + } + } + + /** + * Create a [[Cast]] expression. + */ + override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) { + val rawDataType = typedVisit[DataType](ctx.dataType()) + val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) + val cast = ctx.name.getType match { + case HoodieSqlBaseParser.CAST => + Cast(expression(ctx.expression), dataType) + + case HoodieSqlBaseParser.TRY_CAST => + Cast(expression(ctx.expression), dataType, evalMode = EvalMode.TRY) + } + cast.setTagValue(Cast.USER_SPECIFIED_CAST, true) + cast + } + + /** + * Create a [[CreateStruct]] expression. + */ + override def visitStruct(ctx: StructContext): Expression = withOrigin(ctx) { + CreateStruct.create(ctx.argument.asScala.map(expression).toSeq) + } + + /** + * Create a [[First]] expression. + */ + override def visitFirst(ctx: FirstContext): Expression = withOrigin(ctx) { + val ignoreNullsExpr = ctx.IGNORE != null + First(expression(ctx.expression), ignoreNullsExpr).toAggregateExpression() + } + + /** + * Create a [[Last]] expression. + */ + override def visitLast(ctx: LastContext): Expression = withOrigin(ctx) { + val ignoreNullsExpr = ctx.IGNORE != null + Last(expression(ctx.expression), ignoreNullsExpr).toAggregateExpression() + } + + /** + * Create a Position expression. + */ + override def visitPosition(ctx: PositionContext): Expression = withOrigin(ctx) { + new StringLocate(expression(ctx.substr), expression(ctx.str)) + } + + /** + * Create a Extract expression. + */ + override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) { + val arguments = Seq(Literal(ctx.field.getText), expression(ctx.source)) + UnresolvedFunction("extract", arguments, isDistinct = false) + } + + /** + * Create a Substring/Substr expression. + */ + override def visitSubstring(ctx: SubstringContext): Expression = withOrigin(ctx) { + if (ctx.len != null) { + Substring(expression(ctx.str), expression(ctx.pos), expression(ctx.len)) + } else { + new Substring(expression(ctx.str), expression(ctx.pos)) + } + } + + /** + * Create a Trim expression. + */ + override def visitTrim(ctx: TrimContext): Expression = withOrigin(ctx) { + val srcStr = expression(ctx.srcStr) + val trimStr = Option(ctx.trimStr).map(expression) + Option(ctx.trimOption).map(_.getType).getOrElse(HoodieSqlBaseParser.BOTH) match { + case HoodieSqlBaseParser.BOTH => + StringTrim(srcStr, trimStr) + case HoodieSqlBaseParser.LEADING => + StringTrimLeft(srcStr, trimStr) + case HoodieSqlBaseParser.TRAILING => + StringTrimRight(srcStr, trimStr) + case other => + throw new ParseException("Function trim doesn't support with " + + s"type $other. Please use BOTH, LEADING or TRAILING as trim type", ctx) + } + } + + /** + * Create a Overlay expression. + */ + override def visitOverlay(ctx: OverlayContext): Expression = withOrigin(ctx) { + val input = expression(ctx.input) + val replace = expression(ctx.replace) + val position = expression(ctx.position) + val lengthOpt = Option(ctx.length).map(expression) + lengthOpt match { + case Some(length) => Overlay(input, replace, position, length) + case None => new Overlay(input, replace, position) + } + } + + /** + * Create a (windowed) Function expression. + */ + override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) { + // Create the function call. + val name = ctx.functionName.getText + val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null) + // Call `toSeq`, otherwise `ctx.argument.asScala.map(expression)` is `Buffer` in Scala 2.13 + val arguments = ctx.argument.asScala.map(expression).toSeq match { + case Seq(UnresolvedStar(None)) + if name.toLowerCase(Locale.ROOT) == "count" && !isDistinct => + // Transform COUNT(*) into COUNT(1). + Seq(Literal(1)) + case expressions => + expressions + } + val filter = Option(ctx.where).map(expression(_)) + val ignoreNulls = + Option(ctx.nullsOption).map(_.getType == HoodieSqlBaseParser.IGNORE).getOrElse(false) + val function = UnresolvedFunction( + getFunctionMultiparts(ctx.functionName), arguments, isDistinct, filter, ignoreNulls) + + // Check if the function is evaluated in a windowed context. + ctx.windowSpec match { + case spec: WindowRefContext => + UnresolvedWindowExpression(function, visitWindowRef(spec)) + case spec: WindowDefContext => + WindowExpression(function, visitWindowDef(spec)) + case _ => function + } + } + + /** + * Create a function database (optional) and name pair. + */ + protected def visitFunctionName(ctx: QualifiedNameContext): FunctionIdentifier = { + visitFunctionName(ctx, ctx.identifier().asScala.map(_.getText).toSeq) + } + + /** + * Create a function database (optional) and name pair. + */ + private def visitFunctionName(ctx: ParserRuleContext, texts: Seq[String]): FunctionIdentifier = { + texts match { + case Seq(db, fn) => FunctionIdentifier(fn, Option(db)) + case Seq(fn) => FunctionIdentifier(fn, None) + case other => + throw new ParseException(s"Unsupported function name '${texts.mkString(".")}'", ctx) + } + } + + /** + * Get a function identifier consist by database (optional) and name. + */ + protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = { + if (ctx.qualifiedName != null) { + visitFunctionName(ctx.qualifiedName) + } else { + FunctionIdentifier(ctx.getText, None) + } + } + + protected def getFunctionMultiparts(ctx: FunctionNameContext): Seq[String] = { + if (ctx.qualifiedName != null) { + ctx.qualifiedName().identifier().asScala.map(_.getText).toSeq + } else { + Seq(ctx.getText) + } + } + + /** + * Create an [[LambdaFunction]]. + */ + override def visitLambda(ctx: LambdaContext): Expression = withOrigin(ctx) { + val arguments = ctx.identifier().asScala.map { name => + UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(name.getText).nameParts) + } + val function = expression(ctx.expression).transformUp { + case a: UnresolvedAttribute => UnresolvedNamedLambdaVariable(a.nameParts) + } + LambdaFunction(function, arguments.toSeq) + } + + /** + * Create a reference to a window frame, i.e. [[WindowSpecReference]]. + */ + override def visitWindowRef(ctx: WindowRefContext): WindowSpecReference = withOrigin(ctx) { + WindowSpecReference(ctx.name.getText) + } + + /** + * Create a window definition, i.e. [[WindowSpecDefinition]]. + */ + override def visitWindowDef(ctx: WindowDefContext): WindowSpecDefinition = withOrigin(ctx) { + // CLUSTER BY ... | PARTITION BY ... ORDER BY ... + val partition = ctx.partition.asScala.map(expression) + val order = ctx.sortItem.asScala.map(visitSortItem) + + // RANGE/ROWS BETWEEN ... + val frameSpecOption = Option(ctx.windowFrame).map { frame => + val frameType = frame.frameType.getType match { + case HoodieSqlBaseParser.RANGE => RangeFrame + case HoodieSqlBaseParser.ROWS => RowFrame + } + + SpecifiedWindowFrame( + frameType, + visitFrameBound(frame.start), + Option(frame.end).map(visitFrameBound).getOrElse(CurrentRow)) + } + + WindowSpecDefinition( + partition.toSeq, + order.toSeq, + frameSpecOption.getOrElse(UnspecifiedFrame)) + } + + /** + * Create or resolve a frame boundary expressions. + */ + override def visitFrameBound(ctx: FrameBoundContext): Expression = withOrigin(ctx) { + def value: Expression = { + val e = expression(ctx.expression) + validate(e.resolved && e.foldable, "Frame bound value must be a literal.", ctx) + e + } + + ctx.boundType.getType match { + case HoodieSqlBaseParser.PRECEDING if ctx.UNBOUNDED != null => + UnboundedPreceding + case HoodieSqlBaseParser.PRECEDING => + UnaryMinus(value) + case HoodieSqlBaseParser.CURRENT => + CurrentRow + case HoodieSqlBaseParser.FOLLOWING if ctx.UNBOUNDED != null => + UnboundedFollowing + case HoodieSqlBaseParser.FOLLOWING => + value + } + } + + /** + * Create a [[CreateStruct]] expression. + */ + override def visitRowConstructor(ctx: RowConstructorContext): Expression = withOrigin(ctx) { + CreateStruct(ctx.namedExpression().asScala.map(expression).toSeq) + } + + /** + * Create a [[ScalarSubquery]] expression. + */ + override def visitSubqueryExpression( + ctx: SubqueryExpressionContext): Expression = withOrigin(ctx) { + ScalarSubquery(plan(ctx.query)) + } + + /** + * Create a value based [[CaseWhen]] expression. This has the following SQL form: + * {{{ + * CASE [expression] + * WHEN [value] THEN [expression] + * ... + * ELSE [expression] + * END + * }}} + */ + override def visitSimpleCase(ctx: SimpleCaseContext): Expression = withOrigin(ctx) { + val e = expression(ctx.value) + val branches = ctx.whenClause.asScala.map { wCtx => + (EqualTo(e, expression(wCtx.condition)), expression(wCtx.result)) + } + CaseWhen(branches.toSeq, Option(ctx.elseExpression).map(expression)) + } + + /** + * Create a condition based [[CaseWhen]] expression. This has the following SQL syntax: + * {{{ + * CASE + * WHEN [predicate] THEN [expression] + * ... + * ELSE [expression] + * END + * }}} + * + * @param ctx the parse tree + * */ + override def visitSearchedCase(ctx: SearchedCaseContext): Expression = withOrigin(ctx) { + val branches = ctx.whenClause.asScala.map { wCtx => + (expression(wCtx.condition), expression(wCtx.result)) + } + CaseWhen(branches.toSeq, Option(ctx.elseExpression).map(expression)) + } + + /** + * Currently only regex in expressions of SELECT statements are supported; in other + * places, e.g., where `(a)?+.+` = 2, regex are not meaningful. + */ + private def canApplyRegex(ctx: ParserRuleContext): Boolean = withOrigin(ctx) { + var parent = ctx.getParent + var rtn = false + while (parent != null) { + if (parent.isInstanceOf[NamedExpressionContext]) { + rtn = true + } + parent = parent.getParent + } + rtn + } + + /** + * Create a dereference expression. The return type depends on the type of the parent. + * If the parent is an [[UnresolvedAttribute]], it can be a [[UnresolvedAttribute]] or + * a [[UnresolvedRegex]] for regex quoted in ``; if the parent is some other expression, + * it can be [[UnresolvedExtractValue]]. + */ + override def visitDereference(ctx: DereferenceContext): Expression = withOrigin(ctx) { + val attr = ctx.fieldName.getText + expression(ctx.base) match { + case unresolved_attr@UnresolvedAttribute(nameParts) => + ctx.fieldName.getStart.getText match { + case escapedIdentifier(columnNameRegex) + if conf.supportQuotedRegexColumnName && canApplyRegex(ctx) => + UnresolvedRegex(columnNameRegex, Some(unresolved_attr.name), + conf.caseSensitiveAnalysis) + case _ => + UnresolvedAttribute(nameParts :+ attr) + } + case e => + UnresolvedExtractValue(e, Literal(attr)) + } + } + + /** + * Create an [[UnresolvedAttribute]] expression or a [[UnresolvedRegex]] if it is a regex + * quoted in `` + */ + override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) { + ctx.getStart.getText match { + case escapedIdentifier(columnNameRegex) + if conf.supportQuotedRegexColumnName && canApplyRegex(ctx) => + UnresolvedRegex(columnNameRegex, None, conf.caseSensitiveAnalysis) + case _ => + UnresolvedAttribute.quoted(ctx.getText) + } + + } + + /** + * Create an [[UnresolvedExtractValue]] expression, this is used for subscript access to an array. + */ + override def visitSubscript(ctx: SubscriptContext): Expression = withOrigin(ctx) { + UnresolvedExtractValue(expression(ctx.value), expression(ctx.index)) + } + + /** + * Create an expression for an expression between parentheses. This is need because the ANTLR + * visitor cannot automatically convert the nested context into an expression. + */ + override def visitParenthesizedExpression( + ctx: ParenthesizedExpressionContext): Expression = withOrigin(ctx) { + expression(ctx.expression) + } + + /** + * Create a [[SortOrder]] expression. + */ + override def visitSortItem(ctx: SortItemContext): SortOrder = withOrigin(ctx) { + val direction = if (ctx.DESC != null) { + Descending + } else { + Ascending + } + val nullOrdering = if (ctx.FIRST != null) { + NullsFirst + } else if (ctx.LAST != null) { + NullsLast + } else { + direction.defaultNullOrdering + } + SortOrder(expression(ctx.expression), direction, nullOrdering, Seq.empty) + } + + /** + * Create a typed Literal expression. A typed literal has the following SQL syntax: + * {{{ + * [TYPE] '[VALUE]' + * }}} + * Currently Date, Timestamp, Interval and Binary typed literals are supported. + */ + override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) { + val value = string(ctx.STRING) + val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT) + + def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = { + f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse { + throw new ParseException(s"Cannot parse the $valueType value: $value", ctx) + } + } + + def constructTimestampLTZLiteral(value: String): Literal = { + val zoneId = getZoneId(conf.sessionLocalTimeZone) + val specialTs = convertSpecialTimestamp(value, zoneId).map(Literal(_, TimestampType)) + specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType)) + } + + try { + valueType match { + case "DATE" => + val zoneId = getZoneId(conf.sessionLocalTimeZone) + val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) + specialDate.getOrElse(toLiteral(stringToDate, DateType)) + // SPARK-36227: Remove TimestampNTZ type support in Spark 3.2 with minimal code changes. + case "TIMESTAMP_NTZ" if isTesting => + convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) + .map(Literal(_, TimestampNTZType)) + .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) + case "TIMESTAMP_LTZ" if isTesting => + constructTimestampLTZLiteral(value) + case "TIMESTAMP" => + SQLConf.get.timestampType match { + case TimestampNTZType => + convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) + .map(Literal(_, TimestampNTZType)) + .getOrElse { + val containsTimeZonePart = + DateTimeUtils.parseTimestampString(UTF8String.fromString(value))._2.isDefined + // If the input string contains time zone part, return a timestamp with local time + // zone literal. + if (containsTimeZonePart) { + constructTimestampLTZLiteral(value) + } else { + toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType) + } + } + + case TimestampType => + constructTimestampLTZLiteral(value) + } + + case "INTERVAL" => + val interval = try { + IntervalUtils.stringToInterval(UTF8String.fromString(value)) + } catch { + case e: IllegalArgumentException => + val ex = new ParseException(s"Cannot parse the INTERVAL value: $value", ctx) + ex.setStackTrace(e.getStackTrace) + throw ex + } + if (!conf.legacyIntervalEnabled) { + val units = value + .split("\\s") + .map(_.toLowerCase(Locale.ROOT).stripSuffix("s")) + .filter(s => s != "interval" && s.matches("[a-z]+")) + constructMultiUnitsIntervalLiteral(ctx, interval, units) + } else { + Literal(interval, CalendarIntervalType) + } + case "X" => + val padding = if (value.length % 2 != 0) "0" else "" + Literal(DatatypeConverter.parseHexBinary(padding + value)) + case other => + throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx) + } + } catch { + case e: IllegalArgumentException => + val message = Option(e.getMessage).getOrElse(s"Exception parsing $valueType") + throw new ParseException(message, ctx) + } + } + + /** + * Create a NULL literal expression. + */ + override def visitNullLiteral(ctx: NullLiteralContext): Literal = withOrigin(ctx) { + Literal(null) + } + + /** + * Create a Boolean literal expression. + */ + override def visitBooleanLiteral(ctx: BooleanLiteralContext): Literal = withOrigin(ctx) { + if (ctx.getText.toBoolean) { + Literal.TrueLiteral + } else { + Literal.FalseLiteral + } + } + + /** + * Create an integral literal expression. The code selects the most narrow integral type + * possible, either a BigDecimal, a Long or an Integer is returned. + */ + override def visitIntegerLiteral(ctx: IntegerLiteralContext): Literal = withOrigin(ctx) { + BigDecimal(ctx.getText) match { + case v if v.isValidInt => + Literal(v.intValue) + case v if v.isValidLong => + Literal(v.longValue) + case v => Literal(v.underlying()) + } + } + + /** + * Create a decimal literal for a regular decimal number. + */ + override def visitDecimalLiteral(ctx: DecimalLiteralContext): Literal = withOrigin(ctx) { + Literal(BigDecimal(ctx.getText).underlying()) + } + + /** + * Create a decimal literal for a regular decimal number or a scientific decimal number. + */ + override def visitLegacyDecimalLiteral( + ctx: LegacyDecimalLiteralContext): Literal = withOrigin(ctx) { + Literal(BigDecimal(ctx.getText).underlying()) + } + + /** + * Create a double literal for number with an exponent, e.g. 1E-30 + */ + override def visitExponentLiteral(ctx: ExponentLiteralContext): Literal = { + numericLiteral(ctx, ctx.getText, /* exponent values don't have a suffix */ + Double.MinValue, Double.MaxValue, DoubleType.simpleString)(_.toDouble) + } + + /** Create a numeric literal expression. */ + private def numericLiteral( + ctx: NumberContext, + rawStrippedQualifier: String, + minValue: BigDecimal, + maxValue: BigDecimal, + typeName: String)(converter: String => Any): Literal = withOrigin(ctx) { + try { + val rawBigDecimal = BigDecimal(rawStrippedQualifier) + if (rawBigDecimal < minValue || rawBigDecimal > maxValue) { + throw new ParseException(s"Numeric literal $rawStrippedQualifier does not " + + s"fit in range [$minValue, $maxValue] for type $typeName", ctx) + } + Literal(converter(rawStrippedQualifier)) + } catch { + case e: NumberFormatException => + throw new ParseException(e.getMessage, ctx) + } + } + + /** + * Create a Byte Literal expression. + */ + override def visitTinyIntLiteral(ctx: TinyIntLiteralContext): Literal = { + val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1) + numericLiteral(ctx, rawStrippedQualifier, + Byte.MinValue, Byte.MaxValue, ByteType.simpleString)(_.toByte) + } + + /** + * Create a Short Literal expression. + */ + override def visitSmallIntLiteral(ctx: SmallIntLiteralContext): Literal = { + val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1) + numericLiteral(ctx, rawStrippedQualifier, + Short.MinValue, Short.MaxValue, ShortType.simpleString)(_.toShort) + } + + /** + * Create a Long Literal expression. + */ + override def visitBigIntLiteral(ctx: BigIntLiteralContext): Literal = { + val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1) + numericLiteral(ctx, rawStrippedQualifier, + Long.MinValue, Long.MaxValue, LongType.simpleString)(_.toLong) + } + + /** + * Create a Float Literal expression. + */ + override def visitFloatLiteral(ctx: FloatLiteralContext): Literal = { + val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1) + numericLiteral(ctx, rawStrippedQualifier, + Float.MinValue, Float.MaxValue, FloatType.simpleString)(_.toFloat) + } + + /** + * Create a Double Literal expression. + */ + override def visitDoubleLiteral(ctx: DoubleLiteralContext): Literal = { + val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1) + numericLiteral(ctx, rawStrippedQualifier, + Double.MinValue, Double.MaxValue, DoubleType.simpleString)(_.toDouble) + } + + /** + * Create a BigDecimal Literal expression. + */ + override def visitBigDecimalLiteral(ctx: BigDecimalLiteralContext): Literal = { + val raw = ctx.getText.substring(0, ctx.getText.length - 2) + try { + Literal(BigDecimal(raw).underlying()) + } catch { + case e: AnalysisException => + throw new ParseException(e.message, ctx) + } + } + + /** + * Create a String literal expression. + */ + override def visitStringLiteral(ctx: StringLiteralContext): Literal = withOrigin(ctx) { + Literal(createString(ctx)) + } + + /** + * Create a String from a string literal context. This supports multiple consecutive string + * literals, these are concatenated, for example this expression "'hello' 'world'" will be + * converted into "helloworld". + * + * Special characters can be escaped by using Hive/C-style escaping. + */ + private def createString(ctx: StringLiteralContext): String = { + if (conf.escapedStringLiterals) { + ctx.STRING().asScala.map(x => stringWithoutUnescape(x.getSymbol)).mkString + } else { + ctx.STRING().asScala.map(string).mkString + } + } + + /** + * Create an [[UnresolvedRelation]] from a multi-part identifier context. + */ + private def createUnresolvedRelation( + ctx: MultipartIdentifierContext): UnresolvedRelation = withOrigin(ctx) { + UnresolvedRelation(visitMultipartIdentifier(ctx)) + } + + /** + * Construct an [[Literal]] from [[CalendarInterval]] and + * units represented as a [[Seq]] of [[String]]. + */ + private def constructMultiUnitsIntervalLiteral( + ctx: ParserRuleContext, + calendarInterval: CalendarInterval, + units: Seq[String]): Literal = { + var yearMonthFields = Set.empty[Byte] + var dayTimeFields = Set.empty[Byte] + for (unit <- units) { + if (YearMonthIntervalType.stringToField.contains(unit)) { + yearMonthFields += YearMonthIntervalType.stringToField(unit) + } else if (DayTimeIntervalType.stringToField.contains(unit)) { + dayTimeFields += DayTimeIntervalType.stringToField(unit) + } else if (unit == "week") { + dayTimeFields += DayTimeIntervalType.DAY + } else { + assert(unit == "millisecond" || unit == "microsecond") + dayTimeFields += DayTimeIntervalType.SECOND + } + } + if (yearMonthFields.nonEmpty) { + if (dayTimeFields.nonEmpty) { + val literalStr = source(ctx) + throw new ParseException(s"Cannot mix year-month and day-time fields: $literalStr", ctx) + } + Literal( + calendarInterval.months, + YearMonthIntervalType(yearMonthFields.min, yearMonthFields.max) + ) + } else { + Literal( + IntervalUtils.getDuration(calendarInterval, TimeUnit.MICROSECONDS), + DayTimeIntervalType(dayTimeFields.min, dayTimeFields.max)) + } + } + + /** + * Create a [[CalendarInterval]] or ANSI interval literal expression. + * Two syntaxes are supported: + * - multiple unit value pairs, for instance: interval 2 months 2 days. + * - from-to unit, for instance: interval '1-2' year to month. + */ + override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) { + val calendarInterval = parseIntervalLiteral(ctx) + if (ctx.errorCapturingUnitToUnitInterval != null && !conf.legacyIntervalEnabled) { + // Check the `to` unit to distinguish year-month and day-time intervals because + // `CalendarInterval` doesn't have enough info. For instance, new CalendarInterval(0, 0, 0) + // can be derived from INTERVAL '0-0' YEAR TO MONTH as well as from + // INTERVAL '0 00:00:00' DAY TO SECOND. + val fromUnit = + ctx.errorCapturingUnitToUnitInterval.body.from.getText.toLowerCase(Locale.ROOT) + val toUnit = ctx.errorCapturingUnitToUnitInterval.body.to.getText.toLowerCase(Locale.ROOT) + if (toUnit == "month") { + assert(calendarInterval.days == 0 && calendarInterval.microseconds == 0) + val start = YearMonthIntervalType.stringToField(fromUnit) + Literal(calendarInterval.months, YearMonthIntervalType(start, YearMonthIntervalType.MONTH)) + } else { + assert(calendarInterval.months == 0) + val micros = IntervalUtils.getDuration(calendarInterval, TimeUnit.MICROSECONDS) + val start = DayTimeIntervalType.stringToField(fromUnit) + val end = DayTimeIntervalType.stringToField(toUnit) + Literal(micros, DayTimeIntervalType(start, end)) + } + } else if (ctx.errorCapturingMultiUnitsInterval != null && !conf.legacyIntervalEnabled) { + val units = + ctx.errorCapturingMultiUnitsInterval.body.unit.asScala.map( + _.getText.toLowerCase(Locale.ROOT).stripSuffix("s")).toSeq + constructMultiUnitsIntervalLiteral(ctx, calendarInterval, units) + } else { + Literal(calendarInterval, CalendarIntervalType) + } + } + + /** + * Create a [[CalendarInterval]] object + */ + protected def parseIntervalLiteral(ctx: IntervalContext): CalendarInterval = withOrigin(ctx) { + if (ctx.errorCapturingMultiUnitsInterval != null) { + val innerCtx = ctx.errorCapturingMultiUnitsInterval + if (innerCtx.unitToUnitInterval != null) { + throw new ParseException("Can only have a single from-to unit in the interval literal syntax", innerCtx.unitToUnitInterval) + } + visitMultiUnitsInterval(innerCtx.multiUnitsInterval) + } else if (ctx.errorCapturingUnitToUnitInterval != null) { + val innerCtx = ctx.errorCapturingUnitToUnitInterval + if (innerCtx.error1 != null || innerCtx.error2 != null) { + val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2 + throw new ParseException("Can only have a single from-to unit in the interval literal syntax", errorCtx) + } + visitUnitToUnitInterval(innerCtx.body) + } else { + throw new ParseException("at least one time unit should be given for interval literal", ctx) + } + } + + /** + * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS. + */ + override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = { + withOrigin(ctx) { + val units = ctx.unit.asScala + val values = ctx.intervalValue().asScala + try { + assert(units.length == values.length) + val kvs = units.indices.map { i => + val u = units(i).getText + val v = if (values(i).STRING() != null) { + val value = string(values(i).STRING()) + // SPARK-32840: For invalid cases, e.g. INTERVAL '1 day 2' hour, + // INTERVAL 'interval 1' day, we need to check ahead before they are concatenated with + // units and become valid ones, e.g. '1 day 2 hour'. + // Ideally, we only ensure the value parts don't contain any units here. + if (value.exists(Character.isLetter)) { + throw new ParseException("Can only use numbers in the interval value part for" + + s" multiple unit value pairs interval form, but got invalid value: $value", ctx) + } + if (values(i).MINUS() == null) { + value + } else { + value.startsWith("-") match { + case true => value.replaceFirst("-", "") + case false => s"-$value" + } + } + } else { + values(i).getText + } + UTF8String.fromString(" " + v + " " + u) + } + IntervalUtils.stringToInterval(UTF8String.concat(kvs: _*)) + } catch { + case i: IllegalArgumentException => + val e = new ParseException(i.getMessage, ctx) + e.setStackTrace(i.getStackTrace) + throw e + } + } + } + + /** + * Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH. + */ + override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = { + withOrigin(ctx) { + val value = Option(ctx.intervalValue.STRING).map(string).map { interval => + if (ctx.intervalValue().MINUS() == null) { + interval + } else { + interval.startsWith("-") match { + case true => interval.replaceFirst("-", "") + case false => s"-$interval" + } + } + }.getOrElse { + throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue) + } + try { + val from = ctx.from.getText.toLowerCase(Locale.ROOT) + val to = ctx.to.getText.toLowerCase(Locale.ROOT) + (from, to) match { + case ("year", "month") => + IntervalUtils.fromYearMonthString(value) + case ("day", "hour") | ("day", "minute") | ("day", "second") | ("hour", "minute") | + ("hour", "second") | ("minute", "second") => + IntervalUtils.fromDayTimeString(value, + DayTimeIntervalType.stringToField(from), DayTimeIntervalType.stringToField(to)) + case _ => + throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx) + } + } catch { + // Handle Exceptions thrown by CalendarInterval + case e: IllegalArgumentException => + val pe = new ParseException(e.getMessage, ctx) + pe.setStackTrace(e.getStackTrace) + throw pe + } + } + } + + /* ******************************************************************************************** + * DataType parsing + * ******************************************************************************************** */ + + /** + * Resolve/create a primitive type. + */ + override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { + val dataType = ctx.identifier.getText.toLowerCase(Locale.ROOT) + (dataType, ctx.INTEGER_VALUE().asScala.toList) match { + case ("boolean", Nil) => BooleanType + case ("tinyint" | "byte", Nil) => ByteType + case ("smallint" | "short", Nil) => ShortType + case ("int" | "integer", Nil) => IntegerType + case ("bigint" | "long", Nil) => LongType + case ("float" | "real", Nil) => FloatType + case ("double", Nil) => DoubleType + case ("date", Nil) => DateType + case ("timestamp", Nil) => SQLConf.get.timestampType + // SPARK-36227: Remove TimestampNTZ type support in Spark 3.2 with minimal code changes. + case ("timestamp_ntz", Nil) if isTesting => TimestampNTZType + case ("timestamp_ltz", Nil) if isTesting => TimestampType + case ("string", Nil) => StringType + case ("character" | "char", length :: Nil) => CharType(length.getText.toInt) + case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) + case ("binary", Nil) => BinaryType + case ("decimal" | "dec" | "numeric", Nil) => DecimalType.USER_DEFAULT + case ("decimal" | "dec" | "numeric", precision :: Nil) => + DecimalType(precision.getText.toInt, 0) + case ("decimal" | "dec" | "numeric", precision :: scale :: Nil) => + DecimalType(precision.getText.toInt, scale.getText.toInt) + case ("void", Nil) => NullType + case ("interval", Nil) => CalendarIntervalType + case (dt, params) => + val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt + throw new ParseException(s"DataType $dtStr is not supported.", ctx) + } + } + + override def visitYearMonthIntervalDataType(ctx: YearMonthIntervalDataTypeContext): DataType = { + val startStr = ctx.from.getText.toLowerCase(Locale.ROOT) + val start = YearMonthIntervalType.stringToField(startStr) + if (ctx.to != null) { + val endStr = ctx.to.getText.toLowerCase(Locale.ROOT) + val end = YearMonthIntervalType.stringToField(endStr) + if (end <= start) { + throw new ParseException(s"Intervals FROM $startStr TO $endStr are not supported.", ctx) + } + YearMonthIntervalType(start, end) + } else { + YearMonthIntervalType(start) + } + } + + override def visitDayTimeIntervalDataType(ctx: DayTimeIntervalDataTypeContext): DataType = { + val startStr = ctx.from.getText.toLowerCase(Locale.ROOT) + val start = DayTimeIntervalType.stringToField(startStr) + if (ctx.to != null) { + val endStr = ctx.to.getText.toLowerCase(Locale.ROOT) + val end = DayTimeIntervalType.stringToField(endStr) + if (end <= start) { + throw new ParseException(s"Intervals FROM $startStr TO $endStr are not supported.", ctx) + } + DayTimeIntervalType(start, end) + } else { + DayTimeIntervalType(start) + } + } + + /** + * Create a complex DataType. Arrays, Maps and Structures are supported. + */ + override def visitComplexDataType(ctx: ComplexDataTypeContext): DataType = withOrigin(ctx) { + ctx.complex.getType match { + case HoodieSqlBaseParser.ARRAY => + ArrayType(typedVisit(ctx.dataType(0))) + case HoodieSqlBaseParser.MAP => + MapType(typedVisit(ctx.dataType(0)), typedVisit(ctx.dataType(1))) + case HoodieSqlBaseParser.STRUCT => + StructType(Option(ctx.complexColTypeList).toSeq.flatMap(visitComplexColTypeList)) + } + } + + /** + * Create top level table schema. + */ + protected def createSchema(ctx: ColTypeListContext): StructType = { + StructType(Option(ctx).toSeq.flatMap(visitColTypeList)) + } + + /** + * Create a [[StructType]] from a number of column definitions. + */ + override def visitColTypeList(ctx: ColTypeListContext): Seq[StructField] = withOrigin(ctx) { + ctx.colType().asScala.map(visitColType).toSeq + } + + /** + * Create a top level [[StructField]] from a column definition. + */ + override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) { + import ctx._ + + val builder = new MetadataBuilder + // Add comment to metadata + Option(commentSpec()).map(visitCommentSpec).foreach { + builder.putString("comment", _) + } + + StructField( + name = colName.getText, + dataType = typedVisit[DataType](ctx.dataType), + nullable = NULL == null, + metadata = builder.build()) + } + + /** + * Create a [[StructType]] from a sequence of [[StructField]]s. + */ + protected def createStructType(ctx: ComplexColTypeListContext): StructType = { + StructType(Option(ctx).toSeq.flatMap(visitComplexColTypeList)) + } + + /** + * Create a [[StructType]] from a number of column definitions. + */ + override def visitComplexColTypeList( + ctx: ComplexColTypeListContext): Seq[StructField] = withOrigin(ctx) { + ctx.complexColType().asScala.map(visitComplexColType).toSeq + } + + /** + * Create a [[StructField]] from a column definition. + */ + override def visitComplexColType(ctx: ComplexColTypeContext): StructField = withOrigin(ctx) { + import ctx._ + val structField = StructField( + name = identifier.getText, + dataType = typedVisit(dataType()), + nullable = NULL == null) + Option(commentSpec).map(visitCommentSpec).map(structField.withComment).getOrElse(structField) + } + + /** + * Create a location string. + */ + override def visitLocationSpec(ctx: LocationSpecContext): String = withOrigin(ctx) { + string(ctx.STRING) + } + + /** + * Create an optional location string. + */ + protected def visitLocationSpecList(ctx: java.util.List[LocationSpecContext]): Option[String] = { + ctx.asScala.headOption.map(visitLocationSpec) + } + + /** + * Create a comment string. + */ + override def visitCommentSpec(ctx: CommentSpecContext): String = withOrigin(ctx) { + string(ctx.STRING) + } + + /** + * Create an optional comment string. + */ + protected def visitCommentSpecList(ctx: java.util.List[CommentSpecContext]): Option[String] = { + ctx.asScala.headOption.map(visitCommentSpec) + } + + /** + * Create a [[BucketSpec]]. + */ + override def visitBucketSpec(ctx: BucketSpecContext): BucketSpec = withOrigin(ctx) { + BucketSpec( + ctx.INTEGER_VALUE.getText.toInt, + visitIdentifierList(ctx.identifierList), + Option(ctx.orderedIdentifierList) + .toSeq + .flatMap(_.orderedIdentifier.asScala) + .map { orderedIdCtx => + Option(orderedIdCtx.ordering).map(_.getText).foreach { dir => + if (dir.toLowerCase(Locale.ROOT) != "asc") { + operationNotAllowed(s"Column ordering must be ASC, was '$dir'", ctx) + } + } + + orderedIdCtx.ident.getText + }) + } + + /** + * Convert a table property list into a key-value map. + * This should be called through [[visitPropertyKeyValues]] or [[visitPropertyKeys]]. + */ + override def visitTablePropertyList( + ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) { + val properties = ctx.tableProperty.asScala.map { property => + val key = visitTablePropertyKey(property.key) + val value = visitTablePropertyValue(property.value) + key -> value + } + // Check for duplicate property names. + checkDuplicateKeys(properties.toSeq, ctx) + properties.toMap + } + + /** + * Parse a key-value map from a [[TablePropertyListContext]], assuming all values are specified. + */ + def visitPropertyKeyValues(ctx: TablePropertyListContext): Map[String, String] = { + val props = visitTablePropertyList(ctx) + val badKeys = props.collect { case (key, null) => key } + if (badKeys.nonEmpty) { + operationNotAllowed( + s"Values must be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx) + } + props + } + + /** + * Parse a list of keys from a [[TablePropertyListContext]], assuming no values are specified. + */ + def visitPropertyKeys(ctx: TablePropertyListContext): Seq[String] = { + val props = visitTablePropertyList(ctx) + val badKeys = props.filter { case (_, v) => v != null }.keys + if (badKeys.nonEmpty) { + operationNotAllowed( + s"Values should not be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx) + } + props.keys.toSeq + } + + /** + * A table property key can either be String or a collection of dot separated elements. This + * function extracts the property key based on whether its a string literal or a table property + * identifier. + */ + override def visitTablePropertyKey(key: TablePropertyKeyContext): String = { + if (key.STRING != null) { + string(key.STRING) + } else { + key.getText + } + } + + /** + * A table property value can be String, Integer, Boolean or Decimal. This function extracts + * the property value based on whether its a string, integer, boolean or decimal literal. + */ + override def visitTablePropertyValue(value: TablePropertyValueContext): String = { + if (value == null) { + null + } else if (value.STRING != null) { + string(value.STRING) + } else if (value.booleanValue != null) { + value.getText.toLowerCase(Locale.ROOT) + } else { + value.getText + } + } + + /** + * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal). + */ + type TableHeader = (Seq[String], Boolean, Boolean, Boolean) + + /** + * Type to keep track of table clauses: + * - partition transforms + * - partition columns + * - bucketSpec + * - properties + * - options + * - location + * - comment + * - serde + * + * Note: Partition transforms are based on existing table schema definition. It can be simple + * column names, or functions like `year(date_col)`. Partition columns are column names with data + * types like `i INT`, which should be appended to the existing table schema. + */ + type TableClauses = ( + Seq[Transform], Seq[StructField], Option[BucketSpec], Map[String, String], + Map[String, String], Option[String], Option[String], Option[SerdeInfo]) + + /** + * Validate a create table statement and return the [[TableIdentifier]]. + */ + override def visitCreateTableHeader( + ctx: CreateTableHeaderContext): TableHeader = withOrigin(ctx) { + val temporary = ctx.TEMPORARY != null + val ifNotExists = ctx.EXISTS != null + if (temporary && ifNotExists) { + operationNotAllowed("CREATE TEMPORARY TABLE ... IF NOT EXISTS", ctx) + } + val multipartIdentifier = ctx.multipartIdentifier.parts.asScala.map(_.getText).toSeq + (multipartIdentifier, temporary, ifNotExists, ctx.EXTERNAL != null) + } + + /** + * Validate a replace table statement and return the [[TableIdentifier]]. + */ + override def visitReplaceTableHeader( + ctx: ReplaceTableHeaderContext): TableHeader = withOrigin(ctx) { + val multipartIdentifier = ctx.multipartIdentifier.parts.asScala.map(_.getText).toSeq + (multipartIdentifier, false, false, false) + } + + /** + * Parse a qualified name to a multipart name. + */ + override def visitQualifiedName(ctx: QualifiedNameContext): Seq[String] = withOrigin(ctx) { + ctx.identifier.asScala.map(_.getText).toSeq + } + + /** + * Parse a list of transforms or columns. + */ + override def visitPartitionFieldList( + ctx: PartitionFieldListContext): (Seq[Transform], Seq[StructField]) = withOrigin(ctx) { + val (transforms, columns) = ctx.fields.asScala.map { + case transform: PartitionTransformContext => + (Some(visitPartitionTransform(transform)), None) + case field: PartitionColumnContext => + (None, Some(visitColType(field.colType))) + }.unzip + + (transforms.flatten.toSeq, columns.flatten.toSeq) + } + + override def visitPartitionTransform( + ctx: PartitionTransformContext): Transform = withOrigin(ctx) { + def getFieldReference( + ctx: ApplyTransformContext, + arg: V2Expression): FieldReference = { + lazy val name: String = ctx.identifier.getText + arg match { + case ref: FieldReference => + ref + case nonRef => + throw new ParseException(s"Expected a column reference for transform $name: $nonRef.describe", ctx) + } + } + + def getSingleFieldReference( + ctx: ApplyTransformContext, + arguments: Seq[V2Expression]): FieldReference = { + lazy val name: String = ctx.identifier.getText + if (arguments.size > 1) { + throw new ParseException(s"Too many arguments for transform $name", ctx) + } else if (arguments.isEmpty) { + throw + + new ParseException(s"Not enough arguments for transform $name", ctx) + } else { + getFieldReference(ctx, arguments.head) + } + } + + ctx.transform match { + case identityCtx: IdentityTransformContext => + IdentityTransform(FieldReference(typedVisit[Seq[String]](identityCtx.qualifiedName))) + + case applyCtx: ApplyTransformContext => + val arguments = applyCtx.argument.asScala.map(visitTransformArgument).toSeq + + applyCtx.identifier.getText match { + case "bucket" => + val numBuckets: Int = arguments.head match { + case LiteralValue(shortValue, ShortType) => + shortValue.asInstanceOf[Short].toInt + case LiteralValue(intValue, IntegerType) => + intValue.asInstanceOf[Int] + case LiteralValue(longValue, LongType) => + longValue.asInstanceOf[Long].toInt + case lit => + throw new ParseException(s"Invalid number of buckets: ${lit.describe}", applyCtx) + } + + val fields = arguments.tail.map(arg => getFieldReference(applyCtx, arg)) + + BucketTransform(LiteralValue(numBuckets, IntegerType), fields) + + case "years" => + YearsTransform(getSingleFieldReference(applyCtx, arguments)) + + case "months" => + MonthsTransform(getSingleFieldReference(applyCtx, arguments)) + + case "days" => + DaysTransform(getSingleFieldReference(applyCtx, arguments)) + + case "hours" => + HoursTransform(getSingleFieldReference(applyCtx, arguments)) + + case name => + ApplyTransform(name, arguments) + } + } + } + + /** + * Parse an argument to a transform. An argument may be a field reference (qualified name) or + * a value literal. + */ + override def visitTransformArgument(ctx: TransformArgumentContext): V2Expression = { + withOrigin(ctx) { + val reference = Option(ctx.qualifiedName) + .map(typedVisit[Seq[String]]) + .map(FieldReference(_)) + val literal = Option(ctx.constant) + .map(typedVisit[Literal]) + .map(lit => LiteralValue(lit.value, lit.dataType)) + reference.orElse(literal) + .getOrElse(throw new ParseException("Invalid transform argument", ctx)) + } + } + + def cleanTableProperties( + ctx: ParserRuleContext, properties: Map[String, String]): Map[String, String] = { + import TableCatalog._ + val legacyOn = conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED) + properties.filter { + case (PROP_PROVIDER, _) if !legacyOn => + throw new ParseException(s"$PROP_PROVIDER is a reserved table property, please use the USING clause to specify it.", ctx) + case (PROP_PROVIDER, _) => false + case (PROP_LOCATION, _) if !legacyOn => + throw new ParseException(s"$PROP_LOCATION is a reserved table property, please use the LOCATION clause to specify it.", ctx) + case (PROP_LOCATION, _) => false + case (PROP_OWNER, _) if !legacyOn => + throw new ParseException(s"$PROP_OWNER is a reserved table property, it will be set to the current user.", ctx) + case (PROP_OWNER, _) => false + case _ => true + } + } + + def cleanTableOptions( + ctx: ParserRuleContext, + options: Map[String, String], + location: Option[String]): (Map[String, String], Option[String]) = { + var path = location + val filtered = cleanTableProperties(ctx, options).filter { + case (k, v) if k.equalsIgnoreCase("path") && path.nonEmpty => + throw new ParseException(s"Duplicated table paths found: '${path.get}' and '$v'. LOCATION" + + s" and the case insensitive key 'path' in OPTIONS are all used to indicate the custom" + + s" table path, you can only specify one of them.", ctx) + case (k, v) if k.equalsIgnoreCase("path") => + path = Some(v) + false + case _ => true + } + (filtered, path) + } + + /** + * Create a [[SerdeInfo]] for creating tables. + * + * Format: STORED AS (name | INPUTFORMAT input_format OUTPUTFORMAT output_format) + */ + override def visitCreateFileFormat(ctx: CreateFileFormatContext): SerdeInfo = withOrigin(ctx) { + (ctx.fileFormat, ctx.storageHandler) match { + // Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format + case (c: TableFileFormatContext, null) => + SerdeInfo(formatClasses = Some(FormatClasses(string(c.inFmt), string(c.outFmt)))) + // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO + case (c: GenericFileFormatContext, null) => + SerdeInfo(storedAs = Some(c.identifier.getText)) + case (null, storageHandler) => + operationNotAllowed("STORED BY", ctx) + case _ => + throw new ParseException("Expected either STORED AS or STORED BY, not both", ctx) + } + } + + /** + * Create a [[SerdeInfo]] used for creating tables. + * + * Example format: + * {{{ + * SERDE serde_name [WITH SERDEPROPERTIES (k1=v1, k2=v2, ...)] + * }}} + * + * OR + * + * {{{ + * DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] + * [COLLECTION ITEMS TERMINATED BY char] + * [MAP KEYS TERMINATED BY char] + * [LINES TERMINATED BY char] + * [NULL DEFINED AS char] + * }}} + */ + def visitRowFormat(ctx: RowFormatContext): SerdeInfo = withOrigin(ctx) { + ctx match { + case serde: RowFormatSerdeContext => visitRowFormatSerde(serde) + case delimited: RowFormatDelimitedContext => visitRowFormatDelimited(delimited) + } + } + + /** + * Create SERDE row format name and properties pair. + */ + override def visitRowFormatSerde(ctx: RowFormatSerdeContext): SerdeInfo = withOrigin(ctx) { + import ctx._ + SerdeInfo( + serde = Some(string(name)), + serdeProperties = Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)) + } + + /** + * Create a delimited row format properties object. + */ + override def visitRowFormatDelimited( + ctx: RowFormatDelimitedContext): SerdeInfo = withOrigin(ctx) { + // Collect the entries if any. + def entry(key: String, value: Token): Seq[(String, String)] = { + Option(value).toSeq.map(x => key -> string(x)) + } + + // TODO we need proper support for the NULL format. + val entries = + entry("field.delim", ctx.fieldsTerminatedBy) ++ + entry("serialization.format", ctx.fieldsTerminatedBy) ++ + entry("escape.delim", ctx.escapedBy) ++ + // The following typo is inherited from Hive... + entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++ + entry("mapkey.delim", ctx.keysTerminatedBy) ++ + Option(ctx.linesSeparatedBy).toSeq.map { token => + val value = string(token) + validate( + value == "\n", + s"LINES TERMINATED BY only supports newline '\\n' right now: $value", + ctx) + "line.delim" -> value + } + SerdeInfo(serdeProperties = entries.toMap) + } + + /** + * Throw a [[ParseException]] if the user specified incompatible SerDes through ROW FORMAT + * and STORED AS. + * + * The following are allowed. Anything else is not: + * ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE] + * ROW FORMAT DELIMITED ... STORED AS TEXTFILE + * ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ... + */ + protected def validateRowFormatFileFormat( + rowFormatCtx: RowFormatContext, + createFileFormatCtx: CreateFileFormatContext, + parentCtx: ParserRuleContext): Unit = { + if (!(rowFormatCtx == null || createFileFormatCtx == null)) { + (rowFormatCtx, createFileFormatCtx.fileFormat) match { + case (_, ffTable: TableFileFormatContext) => // OK + case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) => + ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match { + case ("sequencefile" | "textfile" | "rcfile") => // OK + case fmt => + operationNotAllowed( + s"ROW FORMAT SERDE is incompatible with format '$fmt', which also specifies a serde", + parentCtx) + } + case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) => + ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match { + case "textfile" => // OK + case fmt => operationNotAllowed( + s"ROW FORMAT DELIMITED is only compatible with 'textfile', not '$fmt'", parentCtx) + } + case _ => + // should never happen + def str(ctx: ParserRuleContext): String = { + (0 until ctx.getChildCount).map { i => ctx.getChild(i).getText }.mkString(" ") + } + + operationNotAllowed( + s"Unexpected combination of ${str(rowFormatCtx)} and ${str(createFileFormatCtx)}", + parentCtx) + } + } + } + + protected def validateRowFormatFileFormat( + rowFormatCtx: Seq[RowFormatContext], + createFileFormatCtx: Seq[CreateFileFormatContext], + parentCtx: ParserRuleContext): Unit = { + if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) { + validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx) + } + } + + override def visitCreateTableClauses(ctx: CreateTableClausesContext): TableClauses = { + checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx) + checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx) + checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx) + checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx) + checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx) + checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx) + checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx) + checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx) + + if (ctx.skewSpec.size > 0) { + operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx) + } + + val (partTransforms, partCols) = + Option(ctx.partitioning).map(visitPartitionFieldList).getOrElse((Nil, Nil)) + val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec) + val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) + val cleanedProperties = cleanTableProperties(ctx, properties) + val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty) + val location = visitLocationSpecList(ctx.locationSpec()) + val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location) + val comment = visitCommentSpecList(ctx.commentSpec()) + val serdeInfo = + getSerdeInfo(ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx) + (partTransforms, partCols, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment, + serdeInfo) + } + + protected def getSerdeInfo( + rowFormatCtx: Seq[RowFormatContext], + createFileFormatCtx: Seq[CreateFileFormatContext], + ctx: ParserRuleContext): Option[SerdeInfo] = { + validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx) + val rowFormatSerdeInfo = rowFormatCtx.map(visitRowFormat) + val fileFormatSerdeInfo = createFileFormatCtx.map(visitCreateFileFormat) + (fileFormatSerdeInfo ++ rowFormatSerdeInfo).reduceLeftOption((l, r) => l.merge(r)) + } + + private def partitionExpressions( + partTransforms: Seq[Transform], + partCols: Seq[StructField], + ctx: ParserRuleContext): Seq[Transform] = { + if (partTransforms.nonEmpty) { + if (partCols.nonEmpty) { + val references = partTransforms.map(_.describe()).mkString(", ") + val columns = partCols + .map(field => s"${field.name} ${field.dataType.simpleString}") + .mkString(", ") + operationNotAllowed( + s"""PARTITION BY: Cannot mix partition expressions and partition columns: + |Expressions: $references + |Columns: $columns""".stripMargin, ctx) + + } + partTransforms + } else { + // columns were added to create the schema. convert to column references + partCols.map { column => + IdentityTransform(FieldReference(Seq(column.name))) + } + } + } + + /** + * Create a table, returning a [[CreateTable]] or [[CreateTableAsSelect]] logical plan. + * + * Expected format: + * {{{ + * CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name + * [USING table_provider] + * create_table_clauses + * [[AS] select_statement]; + * + * create_table_clauses (order insensitive): + * [PARTITIONED BY (partition_fields)] + * [OPTIONS table_property_list] + * [ROW FORMAT row_format] + * [STORED AS file_format] + * [CLUSTERED BY (col_name, col_name, ...) + * [SORTED BY (col_name [ASC|DESC], ...)] + * INTO num_buckets BUCKETS + * ] + * [LOCATION path] + * [COMMENT table_comment] + * [TBLPROPERTIES (property_name=property_value, ...)] + * + * partition_fields: + * col_name, transform(col_name), transform(constant, col_name), ... | + * col_name data_type [NOT NULL] [COMMENT col_comment], ... + * }}} + */ + override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) { + val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader) + + val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil) + val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText) + val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) = + visitCreateTableClauses(ctx.createTableClauses()) + + if (provider.isDefined && serdeInfo.isDefined) { + operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx) + } + + if (temp) { + val asSelect = if (ctx.query == null) "" else " AS ..." + operationNotAllowed( + s"CREATE TEMPORARY TABLE ...$asSelect, use CREATE TEMPORARY VIEW instead", ctx) + } + + // partition transforms for BucketSpec was moved inside parser + // https://issues.apache.org/jira/browse/SPARK-37923 + val partitioning = + partitionExpressions(partTransforms, partCols, ctx) ++ bucketSpec.map(_.asTransform) + val tableSpec = TableSpec(properties, provider, options, location, comment, + serdeInfo, external) + + Option(ctx.query).map(plan) match { + case Some(_) if columns.nonEmpty => + operationNotAllowed( + "Schema may not be specified in a Create Table As Select (CTAS) statement", + ctx) + + case Some(_) if partCols.nonEmpty => + // non-reference partition columns are not allowed because schema can't be specified + operationNotAllowed( + "Partition column types may not be specified in Create Table As Select (CTAS)", + ctx) + + // CreateTable / CreateTableAsSelect was migrated to v2 in Spark 3.3.0 + // https://issues.apache.org/jira/browse/SPARK-36850 + case Some(query) => + CreateTableAsSelect( + UnresolvedIdentifier(table), + partitioning, query, tableSpec, Map.empty, ifNotExists) + + case _ => + // Note: table schema includes both the table columns list and the partition columns + // with data type. + val schema = StructType(columns ++ partCols) + CreateTable( + UnresolvedIdentifier(table), + schema, partitioning, tableSpec, ignoreIfExists = ifNotExists) + } + } + + /** + * Parse new column info from ADD COLUMN into a QualifiedColType. + */ + override def visitQualifiedColTypeWithPosition( + ctx: QualifiedColTypeWithPositionContext): QualifiedColType = withOrigin(ctx) { + val name = typedVisit[Seq[String]](ctx.name) + QualifiedColType( + path = if (name.length > 1) Some(UnresolvedFieldName(name.init)) else None, + colName = name.last, + dataType = typedVisit[DataType](ctx.dataType), + nullable = ctx.NULL == null, + comment = Option(ctx.commentSpec()).map(visitCommentSpec), + position = Option(ctx.colPosition).map(pos => + UnresolvedFieldPosition(typedVisit[ColumnPosition](pos))), + default = Option(null)) + } + + /** + * Convert a property list into a key-value map. + * This should be called through [[visitPropertyKeyValues]] or [[visitPropertyKeys]]. + */ + override def visitPropertyList(ctx: PropertyListContext): Map[String, String] = withOrigin(ctx) { + val properties = ctx.property.asScala.map { property => + val key = visitPropertyKey(property.key) + val value = visitPropertyValue(property.value) + key -> value + } + // Check for duplicate property names. + checkDuplicateKeys(properties.toSeq, ctx) + properties.toMap + } + + /** + * Parse a key-value map from a [[PropertyListContext]], assuming all values are specified. + */ + def visitPropertyKeyValues(ctx: PropertyListContext): Map[String, String] = { + val props = visitPropertyList(ctx) + val badKeys = props.collect { case (key, null) => key } + if (badKeys.nonEmpty) { + operationNotAllowed( + s"Values must be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx) + } + props + } + + /** + * Parse a list of keys from a [[PropertyListContext]], assuming no values are specified. + */ + def visitPropertyKeys(ctx: PropertyListContext): Seq[String] = { + val props = visitPropertyList(ctx) + val badKeys = props.filter { case (_, v) => v != null }.keys + if (badKeys.nonEmpty) { + operationNotAllowed( + s"Values should not be specified for key(s): ${badKeys.mkString("[", ",", "]")}", ctx) + } + props.keys.toSeq + } + + /** + * A property key can either be String or a collection of dot separated elements. This + * function extracts the property key based on whether its a string literal or a property + * identifier. + */ + override def visitPropertyKey(key: PropertyKeyContext): String = { + if (key.STRING != null) { + string(key.STRING) + } else { + key.getText + } + } + + /** + * A property value can be String, Integer, Boolean or Decimal. This function extracts + * the property value based on whether its a string, integer, boolean or decimal literal. + */ + override def visitPropertyValue(value: PropertyValueContext): String = { + if (value == null) { + null + } else if (value.STRING != null) { + string(value.STRING) + } else if (value.booleanValue != null) { + value.getText.toLowerCase(Locale.ROOT) + } else { + value.getText + } + } +} + +/** + * A container for holding named common table expressions (CTEs) and a query plan. + * This operator will be removed during analysis and the relations will be substituted into child. + * + * @param child The final query of this CTE. + * @param cteRelations A sequence of pair (alias, the CTE definition) that this CTE defined + * Each CTE can see the base tables and the previously defined CTEs only. + */ +case class With(child: LogicalPlan, cteRelations: Seq[(String, SubqueryAlias)]) extends UnaryNode { + override def output: Seq[Attribute] = child.output + + override def simpleString(maxFields: Int): String = { + val cteAliases = truncatedString(cteRelations.map(_._1), "[", ", ", "]", maxFields) + s"CTE $cteAliases" + } + + override def innerChildren: Seq[LogicalPlan] = cteRelations.map(_._2) + + def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = this +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala new file mode 100644 index 000000000000..bbde7bea5538 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.parser + +import org.antlr.v4.runtime._ +import org.antlr.v4.runtime.atn.PredictionMode +import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException} +import org.antlr.v4.runtime.tree.TerminalNodeImpl +import org.apache.hudi.spark.sql.parser.HoodieSqlBaseParser.{NonReservedContext, QuotedIdentifierContext} +import org.apache.hudi.spark.sql.parser.{HoodieSqlBaseBaseListener, HoodieSqlBaseLexer, HoodieSqlBaseParser} +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} +import org.apache.spark.sql.internal.VariableSubstitution +import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, SparkSession} + +import java.util.Locale + +class HoodieSpark3_5ExtendedSqlParser(session: SparkSession, delegate: ParserInterface) + extends HoodieExtendedParserInterface with Logging { + + private lazy val conf = session.sqlContext.conf + private lazy val builder = new HoodieSpark3_5ExtendedSqlAstBuilder(conf, delegate) + private val substitutor = new VariableSubstitution + + override def parsePlan(sqlText: String): LogicalPlan = { + val substitutionSql = substitutor.substitute(sqlText) + if (isHoodieCommand(substitutionSql)) { + parse(substitutionSql) { parser => + builder.visit(parser.singleStatement()) match { + case plan: LogicalPlan => plan + case _ => delegate.parsePlan(sqlText) + } + } + } else { + delegate.parsePlan(substitutionSql) + } + } + + override def parseQuery(sqlText: String): LogicalPlan = delegate.parseQuery(sqlText) + + override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText) + + override def parseTableIdentifier(sqlText: String): TableIdentifier = + delegate.parseTableIdentifier(sqlText) + + override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = + delegate.parseFunctionIdentifier(sqlText) + + override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText) + + override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) + + protected def parse[T](command: String)(toResult: HoodieSqlBaseParser => T): T = { + logDebug(s"Parsing command: $command") + + val lexer = new HoodieSqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command))) + lexer.removeErrorListeners() + lexer.addErrorListener(ParseErrorListener) + + val tokenStream = new CommonTokenStream(lexer) + val parser = new HoodieSqlBaseParser(tokenStream) + parser.addParseListener(PostProcessor) + parser.removeErrorListeners() + parser.addErrorListener(ParseErrorListener) + // parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced + parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled + parser.SQL_standard_keyword_behavior = conf.ansiEnabled + + try { + try { + // first, try parsing with potentially faster SLL mode + parser.getInterpreter.setPredictionMode(PredictionMode.SLL) + toResult(parser) + } + catch { + case e: ParseCancellationException => + // if we fail, parse with LL mode + tokenStream.seek(0) // rewind input stream + parser.reset() + + // Try Again. + parser.getInterpreter.setPredictionMode(PredictionMode.LL) + toResult(parser) + } + } + catch { + case e: ParseException if e.command.isDefined => + throw e + case e: ParseException => + throw e.withCommand(command) + case e: AnalysisException => + val position = Origin(e.line, e.startPosition) + throw new ParseException(Option(command), e.message, position, position) + } + } + + override def parseMultipartIdentifier(sqlText: String): Seq[String] = { + delegate.parseMultipartIdentifier(sqlText) + } + + private def isHoodieCommand(sqlText: String): Boolean = { + val normalized = sqlText.toLowerCase(Locale.ROOT).trim().replaceAll("\\s+", " ") + normalized.contains("system_time as of") || + normalized.contains("timestamp as of") || + normalized.contains("system_version as of") || + normalized.contains("version as of") || + normalized.contains("create index") || + normalized.contains("drop index") || + normalized.contains("show indexes") || + normalized.contains("refresh index") + } +} + +/** + * Fork from `org.apache.spark.sql.catalyst.parser.UpperCaseCharStream`. + */ +class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream { + override def consume(): Unit = wrapped.consume + override def getSourceName(): String = wrapped.getSourceName + override def index(): Int = wrapped.index + override def mark(): Int = wrapped.mark + override def release(marker: Int): Unit = wrapped.release(marker) + override def seek(where: Int): Unit = wrapped.seek(where) + override def size(): Int = wrapped.size + + override def getText(interval: Interval): String = { + // ANTLR 4.7's CodePointCharStream implementations have bugs when + // getText() is called with an empty stream, or intervals where + // the start > end. See + // https://github.com/antlr/antlr4/commit/ac9f7530 for one fix + // that is not yet in a released ANTLR artifact. + if (size() > 0 && (interval.b - interval.a >= 0)) { + wrapped.getText(interval) + } else { + "" + } + } + // scalastyle:off + override def LA(i: Int): Int = { + // scalastyle:on + val la = wrapped.LA(i) + if (la == 0 || la == IntStream.EOF) la + else Character.toUpperCase(la) + } +} + +/** + * Fork from `org.apache.spark.sql.catalyst.parser.PostProcessor`. + */ +case object PostProcessor extends HoodieSqlBaseBaseListener { + + /** Remove the back ticks from an Identifier. */ + override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = { + replaceTokenByIdentifier(ctx, 1) { token => + // Remove the double back ticks in the string. + token.setText(token.getText.replace("``", "`")) + token + } + } + + /** Treat non-reserved keywords as Identifiers. */ + override def exitNonReserved(ctx: NonReservedContext): Unit = { + replaceTokenByIdentifier(ctx, 0)(identity) + } + + private def replaceTokenByIdentifier( + ctx: ParserRuleContext, + stripMargins: Int)( + f: CommonToken => CommonToken = identity): Unit = { + val parent = ctx.getParent + parent.removeLastChild() + val token = ctx.getChild(0).getPayload.asInstanceOf[Token] + val newToken = new CommonToken( + new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream), + HoodieSqlBaseParser.IDENTIFIER, + token.getChannel, + token.getStartIndex + stripMargins, + token.getStopIndex - stripMargins) + parent.addChild(new TerminalNodeImpl(f(newToken))) + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java new file mode 100644 index 000000000000..d4b0b0e764ed --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.testutils.HoodieSparkClientTestHarness; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for TestHoodieBulkInsertDataInternalWriter. + */ +public class HoodieBulkInsertInternalWriterTestBase extends HoodieSparkClientTestHarness { + + protected static final Random RANDOM = new Random(); + + @BeforeEach + public void setUp() throws Exception { + initSparkContexts(); + initPath(); + initFileSystem(); + initTestDataGenerator(); + initMetaClient(); + initTimelineService(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { + return getWriteConfig(populateMetaFields, DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().defaultValue()); + } + + protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields, String hiveStylePartitioningValue) { + Properties properties = new Properties(); + if (!populateMetaFields) { + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), "false"); + } + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING().key(), hiveStylePartitioningValue); + return SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).withProperties(properties).build(); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, + Option> fileAbsPaths, Option> fileNames) { + assertWriteStatuses(writeStatuses, batches, size, false, fileAbsPaths, fileNames, false); + } + + protected void assertWriteStatuses(List writeStatuses, int batches, int size, boolean areRecordsSorted, + Option> fileAbsPaths, Option> fileNames, boolean isHiveStylePartitioning) { + if (areRecordsSorted) { + assertEquals(batches, writeStatuses.size()); + } else { + assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size()); + } + + Map sizeMap = new HashMap<>(); + if (!areRecordsSorted) { + // no of records are written per batch. Every 4th batch goes into same writeStatus. So, populating the size expected + // per write status + for (int i = 0; i < batches; i++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3]; + if (!sizeMap.containsKey(partitionPath)) { + sizeMap.put(partitionPath, 0L); + } + sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size); + } + } + + int counter = 0; + for (WriteStatus writeStatus : writeStatuses) { + // verify write status + String actualPartitionPathFormat = isHiveStylePartitioning ? SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME + "=%s" : "%s"; + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStatus.getPartitionPath()); + if (areRecordsSorted) { + assertEquals(writeStatus.getTotalRecords(), size); + } else { + assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3])); + } + assertNull(writeStatus.getGlobalError()); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertEquals(writeStatus.getTotalErrorRecords(), 0); + assertFalse(writeStatus.hasErrors()); + assertNotNull(writeStatus.getFileId()); + String fileId = writeStatus.getFileId(); + if (fileAbsPaths.isPresent()) { + fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath()); + } + if (fileNames.isPresent()) { + fileNames.get().add(writeStatus.getStat().getPath() + .substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1)); + } + HoodieWriteStat writeStat = writeStatus.getStat(); + if (areRecordsSorted) { + assertEquals(size, writeStat.getNumInserts()); + assertEquals(size, writeStat.getNumWrites()); + } else { + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts()); + assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites()); + } + assertEquals(fileId, writeStat.getFileId()); + assertEquals(String.format(actualPartitionPathFormat, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3]), writeStat.getPartitionPath()); + assertEquals(0, writeStat.getNumDeletes()); + assertEquals(0, writeStat.getNumUpdateWrites()); + assertEquals(0, writeStat.getTotalWriteErrors()); + } + } + + protected void assertOutput(Dataset expectedRows, Dataset actualRows, String instantTime, Option> fileNames, + boolean populateMetaColumns) { + if (populateMetaColumns) { + // verify 3 meta fields that are filled in within create handle + actualRows.collectAsList().forEach(entry -> { + assertEquals(entry.get(HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.ordinal()).toString(), instantTime); + assertFalse(entry.isNullAt(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal())); + if (fileNames.isPresent()) { + assertTrue(fileNames.get().contains(entry.get(HoodieMetadataField.FILENAME_METADATA_FIELD.ordinal()))); + } + assertFalse(entry.isNullAt(HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.ordinal())); + }); + + // after trimming 2 of the meta fields, rest of the fields should match + Dataset trimmedExpected = expectedRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + Dataset trimmedActual = actualRows.drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HoodieRecord.FILENAME_METADATA_FIELD); + assertEquals(0, trimmedActual.except(trimmedExpected).count()); + } else { // operation = BULK_INSERT_APPEND_ONLY + // all meta columns are untouched + assertEquals(0, expectedRows.except(actualRows).count()); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java new file mode 100644 index 000000000000..96b06937504f --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieBulkInsertDataInternalWriter.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.spark3.internal; + +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.internal.HoodieBulkInsertInternalWriterTestBase; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getInternalRowWithError; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Unit tests {@link HoodieBulkInsertDataInternalWriter}. + */ +public class TestHoodieBulkInsertDataInternalWriter extends + HoodieBulkInsertInternalWriterTestBase { + + private static Stream configParams() { + Object[][] data = new Object[][] { + {true, true}, + {true, false}, + {false, true}, + {false, false} + }; + return Stream.of(data).map(Arguments::of); + } + + private static Stream bulkInsertTypeParams() { + Object[][] data = new Object[][] { + {true}, + {false} + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("configParams") + public void testDataInternalWriter(boolean sorted, boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + // execute N rounds + for (int i = 0; i < 2; i++) { + String instantTime = "00" + i; + // init writer + HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), + RANDOM.nextLong(), STRUCT_TYPE, populateMetaFields, sorted); + + int size = 10 + RANDOM.nextInt(1000); + // write N rows to partition1, N rows to partition2 and N rows to partition3 ... Each batch should create a new RowCreateHandle and a new file + int batches = 3; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + Option> fileAbsPaths = Option.of(new ArrayList<>()); + Option> fileNames = Option.of(new ArrayList<>()); + + // verify write statuses + assertWriteStatuses(commitMetadata.getWriteStatuses(), batches, size, sorted, fileAbsPaths, fileNames, false); + + // verify rows + Dataset result = sqlContext.read().parquet(fileAbsPaths.get().toArray(new String[0])); + assertOutput(totalInputRows, result, instantTime, fileNames, populateMetaFields); + } + } + + + /** + * Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch2 of invalid records which is expected + * to throw Global Error. Verify global error is set appropriately and only first batch of records are written to disk. + */ + @Test + public void testGlobalFailure() throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(true); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0]; + + String instantTime = "001"; + HoodieBulkInsertDataInternalWriter writer = new HoodieBulkInsertDataInternalWriter(table, cfg, instantTime, RANDOM.nextInt(100000), + RANDOM.nextLong(), STRUCT_TYPE, true, false); + + int size = 10 + RANDOM.nextInt(100); + int totalFailures = 5; + // Generate first batch of valid rows + Dataset inputRows = getRandomRows(sqlContext, size / 2, partitionPath, false); + List internalRows = toInternalRows(inputRows, ENCODER); + + // generate some failures rows + for (int i = 0; i < totalFailures; i++) { + internalRows.add(getInternalRowWithError(partitionPath)); + } + + // generate 2nd batch of valid rows + Dataset inputRows2 = getRandomRows(sqlContext, size / 2, partitionPath, false); + internalRows.addAll(toInternalRows(inputRows2, ENCODER)); + + // issue writes + try { + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + fail("Should have failed"); + } catch (Throwable e) { + // expected + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + + Option> fileAbsPaths = Option.of(new ArrayList<>()); + Option> fileNames = Option.of(new ArrayList<>()); + // verify write statuses + assertWriteStatuses(commitMetadata.getWriteStatuses(), 1, size / 2, fileAbsPaths, fileNames); + + // verify rows + Dataset result = sqlContext.read().parquet(fileAbsPaths.get().toArray(new String[0])); + assertOutput(inputRows, result, instantTime, fileNames, true); + } + + private void writeRows(Dataset inputRows, HoodieBulkInsertDataInternalWriter writer) + throws Exception { + List internalRows = toInternalRows(inputRows, ENCODER); + // issue writes + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java new file mode 100644 index 000000000000..176b67bbe98f --- /dev/null +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java @@ -0,0 +1,330 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.spark3.internal; + +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.common.model.HoodieCommitMetadata; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.internal.HoodieBulkInsertInternalWriterTestBase; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; +import org.apache.hudi.testutils.HoodieClientTestUtils; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.connector.write.DataWriter; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.apache.hudi.testutils.SparkDatasetTestUtils.ENCODER; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.STRUCT_TYPE; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.getRandomRows; +import static org.apache.hudi.testutils.SparkDatasetTestUtils.toInternalRows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unit tests {@link HoodieDataSourceInternalBatchWrite}. + */ +public class TestHoodieDataSourceInternalBatchWrite extends + HoodieBulkInsertInternalWriterTestBase { + + private static Stream bulkInsertTypeParams() { + Object[][] data = new Object[][] { + {true}, + {false} + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testDataSourceWriter(boolean populateMetaFields) throws Exception { + testDataSourceWriterInternal(Collections.EMPTY_MAP, Collections.EMPTY_MAP, populateMetaFields); + } + + private void testDataSourceWriterInternal(Map extraMetadata, Map expectedExtraMetadata, boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String instantTime = "001"; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, extraMetadata, populateMetaFields, false); + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(0, RANDOM.nextLong()); + + String[] partitionPaths = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS; + List partitionPathsAbs = new ArrayList<>(); + for (String partitionPath : partitionPaths) { + partitionPathsAbs.add(basePath + "/" + partitionPath + "/*"); + } + + int size = 10 + RANDOM.nextInt(1000); + int batches = 5; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + List commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + + metaClient.reloadActiveTimeline(); + Dataset result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + + // verify extra metadata + Option commitMetadataOption = HoodieClientTestUtils.getCommitMetadataForLatestInstant(metaClient); + assertTrue(commitMetadataOption.isPresent()); + Map actualExtraMetadata = new HashMap<>(); + commitMetadataOption.get().getExtraMetadata().entrySet().stream().filter(entry -> + !entry.getKey().equals(HoodieCommitMetadata.SCHEMA_KEY)).forEach(entry -> actualExtraMetadata.put(entry.getKey(), entry.getValue())); + assertEquals(actualExtraMetadata, expectedExtraMetadata); + } + + @Test + public void testDataSourceWriterExtraCommitMetadata() throws Exception { + String commitExtraMetaPrefix = "commit_extra_meta_"; + Map extraMeta = new HashMap<>(); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); + extraMeta.put(commitExtraMetaPrefix + "a", "valA"); + extraMeta.put(commitExtraMetaPrefix + "b", "valB"); + extraMeta.put("commit_extra_c", "valC"); // should not be part of commit extra metadata + + Map expectedMetadata = new HashMap<>(); + expectedMetadata.putAll(extraMeta); + expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key()); + expectedMetadata.remove("commit_extra_c"); + + testDataSourceWriterInternal(extraMeta, expectedMetadata, true); + } + + @Test + public void testDataSourceWriterEmptyExtraCommitMetadata() throws Exception { + String commitExtraMetaPrefix = "commit_extra_meta_"; + Map extraMeta = new HashMap<>(); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); + extraMeta.put("keyA", "valA"); + extraMeta.put("keyB", "valB"); + extraMeta.put("commit_extra_c", "valC"); + // none of the keys has commit metadata key prefix. + testDataSourceWriterInternal(extraMeta, Collections.EMPTY_MAP, true); + } + + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testMultipleDataSourceWrites(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + int partitionCounter = 0; + + // execute N rounds + for (int i = 0; i < 2; i++) { + String instantTime = "00" + i; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.EMPTY_MAP, populateMetaFields, false); + List commitMessages = new ArrayList<>(); + Dataset totalInputRows = null; + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(partitionCounter++, RANDOM.nextLong()); + + int size = 10 + RANDOM.nextInt(1000); + int batches = 3; // one batch per partition + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + + Dataset result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime, populateMetaFields); + + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + } + } + + // Large writes are not required to be executed w/ regular CI jobs. Takes lot of running time. + @Disabled + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testLargeWrites(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + int partitionCounter = 0; + + // execute N rounds + for (int i = 0; i < 3; i++) { + String instantTime = "00" + i; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.EMPTY_MAP, populateMetaFields, false); + List commitMessages = new ArrayList<>(); + Dataset totalInputRows = null; + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(partitionCounter++, RANDOM.nextLong()); + + int size = 10000 + RANDOM.nextInt(10000); + int batches = 3; // one batch per partition + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages.add(commitMetadata); + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + + Dataset result = HoodieClientTestUtils.readCommit(basePath, sqlContext, metaClient.getCommitTimeline(), instantTime, + populateMetaFields); + + // verify output + assertOutput(totalInputRows, result, instantTime, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + } + } + + /** + * Tests that DataSourceWriter.abort() will abort the written records of interest write and commit batch1 write and abort batch2 Read of entire dataset should show only records from batch1. + * commit batch1 + * abort batch2 + * verify only records from batch1 is available to read + */ + @ParameterizedTest + @MethodSource("bulkInsertTypeParams") + public void testAbort(boolean populateMetaFields) throws Exception { + // init config and table + HoodieWriteConfig cfg = getWriteConfig(populateMetaFields); + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + String instantTime0 = "00" + 0; + // init writer + HoodieDataSourceInternalBatchWrite dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime0, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.EMPTY_MAP, populateMetaFields, false); + DataWriter writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(0, RANDOM.nextLong()); + + List partitionPaths = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS); + List partitionPathsAbs = new ArrayList<>(); + for (String partitionPath : partitionPaths) { + partitionPathsAbs.add(basePath + "/" + partitionPath + "/*"); + } + + int size = 10 + RANDOM.nextInt(100); + int batches = 1; + Dataset totalInputRows = null; + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + if (totalInputRows == null) { + totalInputRows = inputRows; + } else { + totalInputRows = totalInputRows.union(inputRows); + } + } + + HoodieWriterCommitMessage commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + List commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + // commit 1st batch + dataSourceInternalBatchWrite.commit(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + Dataset result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify rows + assertOutput(totalInputRows, result, instantTime0, Option.empty(), populateMetaFields); + assertWriteStatuses(commitMessages.get(0).getWriteStatuses(), batches, size, Option.empty(), Option.empty()); + + // 2nd batch. abort in the end + String instantTime1 = "00" + 1; + dataSourceInternalBatchWrite = + new HoodieDataSourceInternalBatchWrite(instantTime1, cfg, STRUCT_TYPE, sqlContext.sparkSession(), hadoopConf, Collections.EMPTY_MAP, populateMetaFields, false); + writer = dataSourceInternalBatchWrite.createBatchWriterFactory(null).createWriter(1, RANDOM.nextLong()); + + for (int j = 0; j < batches; j++) { + String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[j % 3]; + Dataset inputRows = getRandomRows(sqlContext, size, partitionPath, false); + writeRows(inputRows, writer); + } + + commitMetadata = (HoodieWriterCommitMessage) writer.commit(); + commitMessages = new ArrayList<>(); + commitMessages.add(commitMetadata); + // commit 1st batch + dataSourceInternalBatchWrite.abort(commitMessages.toArray(new HoodieWriterCommitMessage[0])); + metaClient.reloadActiveTimeline(); + result = HoodieClientTestUtils.read(jsc, basePath, sqlContext, metaClient.getFs(), partitionPathsAbs.toArray(new String[0])); + // verify rows + // only rows from first batch should be present + assertOutput(totalInputRows, result, instantTime0, Option.empty(), populateMetaFields); + } + + private void writeRows(Dataset inputRows, DataWriter writer) throws Exception { + List internalRows = toInternalRows(inputRows, ENCODER); + // issue writes + for (InternalRow internalRow : internalRows) { + writer.write(internalRow); + } + } +} diff --git a/hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java similarity index 90% rename from hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java rename to hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java index 075e4242cb00..5a08e54f5e17 100644 --- a/hudi-spark-datasource/hudi-spark3-common/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java @@ -23,14 +23,10 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation; import org.apache.spark.sql.catalyst.plans.logical.InsertIntoStatement; + import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Collections; - -import static scala.collection.JavaConverters.asScalaBuffer; - - /** * Unit tests {@link ReflectUtil}. */ @@ -46,10 +42,11 @@ public void testDataSourceWriterExtraCommitMetadata() throws Exception { InsertIntoStatement newStatment = ReflectUtil.createInsertInto( statement.table(), statement.partitionSpec(), - asScalaBuffer(Collections.emptyList()).toSeq(), + scala.collection.immutable.List.empty(), statement.query(), statement.overwrite(), - statement.ifPartitionNotExists()); + statement.ifPartitionNotExists(), + statement.byName()); Assertions.assertTrue( ((UnresolvedRelation)newStatment.table()).multipartIdentifier().contains("test_reflect_util")); diff --git a/packaging/bundle-validation/base/build_flink1180hive313spark350.sh b/packaging/bundle-validation/base/build_flink1180hive313spark350.sh new file mode 100755 index 000000000000..dca3acdc5bc5 --- /dev/null +++ b/packaging/bundle-validation/base/build_flink1180hive313spark350.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +docker build \ + --build-arg HIVE_VERSION=3.1.3 \ + --build-arg FLINK_VERSION=1.18.0 \ + --build-arg SPARK_VERSION=3.5.0 \ + --build-arg SPARK_HADOOP_VERSION=3 \ + --build-arg HADOOP_VERSION=3.3.5 \ + -t hudi-ci-bundle-validation-base:flink1180hive313spark350 . +docker image tag hudi-ci-bundle-validation-base:flink1180hive313spark350 apachehudi/hudi-ci-bundle-validation-base:flink1180hive313spark350 diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index 505ee9c7c2d4..59fc5d9df397 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -104,6 +104,16 @@ elif [[ ${SPARK_RUNTIME} == 'spark3.4.0' ]]; then CONFLUENT_VERSION=5.5.12 KAFKA_CONNECT_HDFS_VERSION=10.1.13 IMAGE_TAG=flink1170hive313spark340 +elif [[ ${SPARK_RUNTIME} == 'spark3.5.0' ]]; then + HADOOP_VERSION=3.3.5 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.18.0 + SPARK_VERSION=3.5.0 + SPARK_HADOOP_VERSION=3 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1180hive313spark350 fi # Copy bundle jars to temp dir for mounting diff --git a/packaging/bundle-validation/run_docker_java17.sh b/packaging/bundle-validation/run_docker_java17.sh index 879b56367e0c..d9f50cc90768 100755 --- a/packaging/bundle-validation/run_docker_java17.sh +++ b/packaging/bundle-validation/run_docker_java17.sh @@ -93,6 +93,16 @@ elif [[ ${SPARK_RUNTIME} == 'spark3.4.0' ]]; then CONFLUENT_VERSION=5.5.12 KAFKA_CONNECT_HDFS_VERSION=10.1.13 IMAGE_TAG=flink1170hive313spark340 +elif [[ ${SPARK_RUNTIME} == 'spark3.5.0' ]]; then + HADOOP_VERSION=3.3.5 + HIVE_VERSION=3.1.3 + DERBY_VERSION=10.14.1.0 + FLINK_VERSION=1.18.0 + SPARK_VERSION=3.5.0 + SPARK_HADOOP_VERSION=3 + CONFLUENT_VERSION=5.5.12 + KAFKA_CONNECT_HDFS_VERSION=10.1.13 + IMAGE_TAG=flink1180hive313spark350 fi # build docker image diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 0f0e8f68e2ea..0d01bace432e 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -123,6 +123,8 @@ com.github.davidmoten:guava-mini com.github.davidmoten:hilbert-curve com.github.ben-manes.caffeine:caffeine + + com.google.protobuf:protobuf-java com.twitter:bijection-avro_${scala.binary.version} com.twitter:bijection-core_${scala.binary.version} io.confluent:kafka-avro-serializer @@ -226,6 +228,10 @@ org.apache.httpcomponents. org.apache.hudi.aws.org.apache.httpcomponents. + + com.google.protobuf. + org.apache.hudi.com.google.protobuf. + org.roaringbitmap. org.apache.hudi.org.roaringbitmap. diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml index e70e94cbaf51..3fce33ae6efd 100644 --- a/packaging/hudi-utilities-slim-bundle/pom.xml +++ b/packaging/hudi-utilities-slim-bundle/pom.xml @@ -109,6 +109,8 @@ com.github.davidmoten:guava-mini com.github.davidmoten:hilbert-curve + + com.google.protobuf:protobuf-java com.twitter:bijection-avro_${scala.binary.version} com.twitter:bijection-core_${scala.binary.version} io.confluent:kafka-avro-serializer @@ -189,6 +191,10 @@ org.openjdk.jol. org.apache.hudi.org.openjdk.jol. + + com.google.protobuf. + org.apache.hudi.com.google.protobuf. + diff --git a/pom.xml b/pom.xml index 337f8f2391ea..da214b0ceb26 100644 --- a/pom.xml +++ b/pom.xml @@ -82,7 +82,7 @@ 3.2.0 2.22.2 2.22.2 - 3.2.4 + 3.4.0 3.1.1 3.8.0 2.4 @@ -165,6 +165,7 @@ 3.2.3 3.3.1 3.4.1 + 3.5.0 hudi-spark3.2.x hudi-spark3-common hudi-spark3.2plus-common ${scalatest.spark3.version} ${kafka.spark3.version} + 2.8.1 - 1.12.3 - 1.8.3 - 1.11.1 + 1.13.1 + 1.9.1 + 1.11.2 4.9.3 - 2.14.2 + 2.15.2 ${fasterxml.spark3.version} ${fasterxml.spark3.version} ${fasterxml.spark3.version} - ${fasterxml.spark3.version} + ${fasterxml.spark3.version} + ${pulsar.spark.scala12.version} - 2.19.0 - 2.0.6 + 2.20.0 + 2.0.7 true true - hudi-spark-datasource/hudi-spark3.4.x + hudi-spark-datasource/hudi-spark3.5.x hudi-spark-datasource/hudi-spark3-common hudi-spark-datasource/hudi-spark3.2plus-common @@ -2298,6 +2301,11 @@ ${slf4j.version} test + + ${hive.groupid} + hive-storage-api + ${hive.storage.version} + @@ -2527,6 +2535,66 @@ + + spark3.5 + + ${spark35.version} + ${spark3.version} + 3.5 + 2.12.18 + ${scala12.version} + 2.12 + hudi-spark3.5.x + + hudi-spark3-common + hudi-spark3.2plus-common + ${scalatest.spark3.version} + ${kafka.spark3.version} + 2.8.1 + + 1.13.1 + 1.9.1 + 1.11.2 + 4.9.3 + 2.15.2 + ${fasterxml.spark3.version} + ${fasterxml.spark3.version} + ${fasterxml.spark3.version} + ${fasterxml.spark3.version} + ${pulsar.spark.scala12.version} + 2.20.0 + 2.0.7 + true + true + + + hudi-spark-datasource/hudi-spark3.5.x + hudi-spark-datasource/hudi-spark3-common + hudi-spark-datasource/hudi-spark3.2plus-common + + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + test + + + ${hive.groupid} + hive-storage-api + ${hive.storage.version} + + + + + spark3.5 + + + + flink1.18 From 1605c2832c606cebf0904b3746f2e21c57989c85 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 17 Nov 2023 11:20:57 -0800 Subject: [PATCH 015/112] [HUDI-7113] Update release scripts and docs for Spark 3.5 support (#10123) --- README.md | 9 +++++---- scripts/release/deploy_staging_jars.sh | 8 ++++++-- scripts/release/validate_staged_bundles.sh | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 20016f689ad3..6645f55b49b0 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,8 @@ git clone https://github.com/apache/hudi.git && cd hudi mvn clean package -DskipTests # Start command -spark-3.2.3-bin-hadoop3.2/bin/spark-shell \ - --jars `ls packaging/hudi-spark-bundle/target/hudi-spark3.2-bundle_2.12-*.*.*-SNAPSHOT.jar` \ +spark-3.5.0-bin-hadoop3/bin/spark-shell \ + --jars `ls packaging/hudi-spark-bundle/target/hudi-spark3.5-bundle_2.12-*.*.*-SNAPSHOT.jar` \ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \ --conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \ --conf 'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog' \ @@ -85,7 +85,7 @@ mvn clean javadoc:aggregate -Pjavadocs ### Build with different Spark versions The default Spark 2.x version supported is 2.4.4. The default Spark 3.x version, corresponding to `spark3` profile is -3.4.0. The default Scala version is 2.12. Refer to the table below for building with different Spark and Scala versions. +3.5.0. The default Scala version is 2.12. Refer to the table below for building with different Spark and Scala versions. | Maven build options | Expected Spark bundle jar name | Notes | |:--------------------------|:---------------------------------------------|:-------------------------------------------------| @@ -96,9 +96,10 @@ The default Spark 2.x version supported is 2.4.4. The default Spark 3.x version, | `-Dspark3.2` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 (same as default) | | `-Dspark3.3` | hudi-spark3.3-bundle_2.12 | For Spark 3.3.x and Scala 2.12 | | `-Dspark3.4` | hudi-spark3.4-bundle_2.12 | For Spark 3.4.x and Scala 2.12 | +| `-Dspark3.5` | hudi-spark3.5-bundle_2.12 | For Spark 3.5.x and Scala 2.12 | | `-Dspark2 -Dscala-2.11` | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 | | `-Dspark2 -Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 | -| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.4.x and Scala 2.12 | +| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.5.x and Scala 2.12 | For example, ``` diff --git a/scripts/release/deploy_staging_jars.sh b/scripts/release/deploy_staging_jars.sh index 146e3fbdfdea..d36b3bb814da 100755 --- a/scripts/release/deploy_staging_jars.sh +++ b/scripts/release/deploy_staging_jars.sh @@ -66,9 +66,13 @@ declare -a ALL_VERSION_OPTS=( "-Dscala-2.12 -Dspark3.3 -pl hudi-spark-datasource/hudi-spark3.3.x,packaging/hudi-spark-bundle -am" # For Spark 3.4, Scala 2.12: # hudi-spark3.4.x_2.12 -# hudi-cli-bundle_2.12 # hudi-spark3.4-bundle_2.12 -"-Dscala-2.12 -Dspark3.4 -pl hudi-spark-datasource/hudi-spark3.4.x,packaging/hudi-spark-bundle,packaging/hudi-cli-bundle -am" +"-Dscala-2.12 -Dspark3.4 -pl hudi-spark-datasource/hudi-spark3.4.x,packaging/hudi-spark-bundle -am" +# For Spark 3.5, Scala 2.12: +# hudi-spark3.5.x_2.12 +# hudi-cli-bundle_2.12 +# hudi-spark3.5-bundle_2.12 +"-Dscala-2.12 -Dspark3.5 -pl hudi-spark-datasource/hudi-spark3.5.x,packaging/hudi-spark-bundle,packaging/hudi-cli-bundle -am" # For Spark 3.1, Scala 2.12: # All other modules and bundles using avro 1.8 "-Dscala-2.12 -Dspark3.1" diff --git a/scripts/release/validate_staged_bundles.sh b/scripts/release/validate_staged_bundles.sh index 866b8cee335b..579dc2410d38 100755 --- a/scripts/release/validate_staged_bundles.sh +++ b/scripts/release/validate_staged_bundles.sh @@ -36,8 +36,8 @@ declare -a bundles=("hudi-aws-bundle" "hudi-cli-bundle_2.11" "hudi-cli-bundle_2. "hudi-flink1.15-bundle" "hudi-flink1.16-bundle" "hudi-flink1.17-bundle" "hudi-flink1.18-bundle" "hudi-gcp-bundle" "hudi-hadoop-mr-bundle" "hudi-hive-sync-bundle" "hudi-integ-test-bundle" "hudi-kafka-connect-bundle" "hudi-metaserver-server-bundle" "hudi-presto-bundle" "hudi-spark-bundle_2.11" "hudi-spark-bundle_2.12" "hudi-spark2.4-bundle_2.11" "hudi-spark2.4-bundle_2.12" "hudi-spark3-bundle_2.12" "hudi-spark3.0-bundle_2.12" "hudi-spark3.1-bundle_2.12" -"hudi-spark3.2-bundle_2.12" "hudi-spark3.3-bundle_2.12" "hudi-spark3.4-bundle_2.12" "hudi-timeline-server-bundle" "hudi-trino-bundle" -"hudi-utilities-bundle_2.11" "hudi-utilities-bundle_2.12" "hudi-utilities-slim-bundle_2.11" +"hudi-spark3.2-bundle_2.12" "hudi-spark3.3-bundle_2.12" "hudi-spark3.4-bundle_2.12" "hudi-spark3.5-bundle_2.12" "hudi-timeline-server-bundle" +"hudi-trino-bundle" "hudi-utilities-bundle_2.11" "hudi-utilities-bundle_2.12" "hudi-utilities-slim-bundle_2.11" "hudi-utilities-slim-bundle_2.12") NOW=$(date +%s) From 149ca9a2e337c3dfc08118c5979e7807820bfdf9 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Sun, 19 Nov 2023 09:35:54 +0800 Subject: [PATCH 016/112] [HUDI-7072] Remove support for Flink 1.13 (#10052) --- .github/workflows/bot.yml | 11 +- README.md | 5 - azure-pipelines-20230430.yml | 3 - .../RowDataToHoodieFunctionWithRateLimit.java | 10 +- .../hudi/source/StreamReadOperator.java | 41 +- .../hudi/sink/utils/CollectorOutput.java | 10 +- .../utils/MockStateInitializationContext.java | 12 +- .../utils/MockStreamingRuntimeContext.java | 11 +- .../hudi/table/ITTestHoodieDataSource.java | 2 +- .../catalog/TestHoodieCatalogFactory.java | 2 +- .../org/apache/hudi/utils}/TestTableEnvs.java | 2 +- .../hudi-flink1.13.x/pom.xml | 144 ----- .../AbstractStreamOperatorAdapter.java | 35 -- .../AbstractStreamOperatorFactoryAdapter.java | 50 -- .../DataStreamScanProviderAdapter.java | 27 - .../DataStreamSinkProviderAdapter.java | 27 - .../hudi/adapter/HiveCatalogConstants.java | 51 -- .../hudi/adapter/MailboxExecutorAdapter.java | 37 -- .../hudi/adapter/MaskingOutputAdapter.java | 61 -- .../adapter/OperatorCoordinatorAdapter.java | 27 - .../hudi/adapter/RateLimiterAdapter.java | 40 -- .../adapter/SortCodeGeneratorAdapter.java | 33 - .../SupportsRowLevelDeleteAdapter.java | 33 - .../SupportsRowLevelUpdateAdapter.java | 37 -- .../java/org/apache/hudi/adapter/Utils.java | 83 --- .../hudi/table/data/ColumnarArrayData.java | 270 -------- .../hudi/table/data/ColumnarMapData.java | 73 --- .../hudi/table/data/ColumnarRowData.java | 231 ------- .../table/data/vector/MapColumnVector.java | 29 - .../table/data/vector/RowColumnVector.java | 30 - .../data/vector/VectorizedColumnBatch.java | 148 ----- .../format/cow/ParquetSplitReaderUtil.java | 579 ------------------ .../format/cow/vector/HeapArrayVector.java | 71 --- .../cow/vector/HeapMapColumnVector.java | 80 --- .../cow/vector/HeapRowColumnVector.java | 55 -- .../cow/vector/ParquetDecimalVector.java | 54 -- .../vector/reader/AbstractColumnReader.java | 325 ---------- .../cow/vector/reader/ArrayColumnReader.java | 473 -------------- .../reader/BaseVectorizedColumnReader.java | 313 ---------- .../cow/vector/reader/EmptyColumnReader.java | 41 -- .../reader/FixedLenBytesColumnReader.java | 84 --- .../reader/Int64TimestampColumnReader.java | 119 ---- .../cow/vector/reader/MapColumnReader.java | 76 --- .../reader/ParquetColumnarRowSplitReader.java | 390 ------------ .../reader/ParquetDataColumnReader.java | 199 ------ .../ParquetDataColumnReaderFactory.java | 304 --------- .../cow/vector/reader/RowColumnReader.java | 63 -- .../cow/vector/reader/RunLengthDecoder.java | 304 --------- .../apache/hudi/adapter/OutputAdapter.java | 27 - .../StateInitializationContextAdapter.java | 26 - .../StreamingRuntimeContextAdapter.java | 43 -- .../apache/hudi/adapter/TestTableEnvs.java | 34 - .../AbstractStreamOperatorAdapter.java | 27 - .../AbstractStreamOperatorFactoryAdapter.java | 33 - .../hudi/adapter/MailboxExecutorAdapter.java | 37 -- .../hudi/adapter/RateLimiterAdapter.java | 40 -- .../java/org/apache/hudi/adapter/Utils.java | 23 - .../apache/hudi/adapter/OutputAdapter.java | 32 - .../StateInitializationContextAdapter.java | 32 - .../StreamingRuntimeContextAdapter.java | 43 -- .../AbstractStreamOperatorAdapter.java | 27 - .../AbstractStreamOperatorFactoryAdapter.java | 33 - .../hudi/adapter/MailboxExecutorAdapter.java | 37 -- .../hudi/adapter/RateLimiterAdapter.java | 40 -- .../java/org/apache/hudi/adapter/Utils.java | 23 - .../apache/hudi/adapter/OutputAdapter.java | 32 - .../StateInitializationContextAdapter.java | 31 - .../StreamingRuntimeContextAdapter.java | 43 -- .../apache/hudi/adapter/TestTableEnvs.java | 52 -- .../AbstractStreamOperatorAdapter.java | 27 - .../AbstractStreamOperatorFactoryAdapter.java | 33 - .../hudi/adapter/MailboxExecutorAdapter.java | 37 -- .../hudi/adapter/RateLimiterAdapter.java | 40 -- .../java/org/apache/hudi/adapter/Utils.java | 23 - .../apache/hudi/adapter/OutputAdapter.java | 32 - .../StateInitializationContextAdapter.java | 31 - .../StreamingRuntimeContextAdapter.java | 43 -- .../apache/hudi/adapter/TestTableEnvs.java | 52 -- .../AbstractStreamOperatorAdapter.java | 27 - .../AbstractStreamOperatorFactoryAdapter.java | 33 - .../hudi/adapter/MailboxExecutorAdapter.java | 37 -- .../hudi/adapter/RateLimiterAdapter.java | 40 -- .../java/org/apache/hudi/adapter/Utils.java | 23 - .../apache/hudi/adapter/OutputAdapter.java | 32 - .../StateInitializationContextAdapter.java | 31 - .../StreamingRuntimeContextAdapter.java | 43 -- .../apache/hudi/adapter/TestTableEnvs.java | 52 -- .../AbstractStreamOperatorAdapter.java | 27 - .../AbstractStreamOperatorFactoryAdapter.java | 33 - .../hudi/adapter/MailboxExecutorAdapter.java | 37 -- .../hudi/adapter/RateLimiterAdapter.java | 40 -- .../java/org/apache/hudi/adapter/Utils.java | 25 +- .../apache/hudi/adapter/OutputAdapter.java | 32 - .../StateInitializationContextAdapter.java | 31 - .../StreamingRuntimeContextAdapter.java | 43 -- .../apache/hudi/adapter/TestTableEnvs.java | 52 -- hudi-flink-datasource/pom.xml | 1 - packaging/bundle-validation/README.md | 8 +- packaging/bundle-validation/ci_run.sh | 12 +- .../bundle-validation/run_docker_java17.sh | 8 +- pom.xml | 28 - scripts/release/deploy_staging_jars.sh | 1 - scripts/release/validate_staged_bundles.sh | 2 +- 103 files changed, 82 insertions(+), 6754 deletions(-) rename hudi-flink-datasource/{hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter => hudi-flink/src/test/java/org/apache/hudi/utils}/TestTableEnvs.java (98%) delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/pom.xml delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarArrayData.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarMapData.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarRowData.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/MapColumnVector.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/RowColumnVector.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/VectorizedColumnBatch.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java delete mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index daa315d95cd5..a52b706fe22b 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -212,7 +212,6 @@ jobs: strategy: matrix: include: - - flinkProfile: "flink1.13" - flinkProfile: "flink1.14" - flinkProfile: "flink1.15" - flinkProfile: "flink1.16" @@ -304,13 +303,13 @@ jobs: - flinkProfile: 'flink1.14' sparkProfile: 'spark3.2' sparkRuntime: 'spark3.2.3' - - flinkProfile: 'flink1.13' + - flinkProfile: 'flink1.14' sparkProfile: 'spark3.1' sparkRuntime: 'spark3.1.3' - flinkProfile: 'flink1.14' sparkProfile: 'spark3.0' sparkRuntime: 'spark3.0.2' - - flinkProfile: 'flink1.13' + - flinkProfile: 'flink1.14' sparkProfile: 'spark2.4' sparkRuntime: 'spark2.4.8' steps: @@ -378,13 +377,13 @@ jobs: - flinkProfile: 'flink1.14' sparkProfile: 'spark3.2' sparkRuntime: 'spark3.2.3' - - flinkProfile: 'flink1.13' + - flinkProfile: 'flink1.14' sparkProfile: 'spark3.1' sparkRuntime: 'spark3.1.3' - - flinkProfile: 'flink1.13' + - flinkProfile: 'flink1.14' sparkProfile: 'spark' sparkRuntime: 'spark2.4.8' - - flinkProfile: 'flink1.13' + - flinkProfile: 'flink1.14' sparkProfile: 'spark2.4' sparkRuntime: 'spark2.4.8' steps: diff --git a/README.md b/README.md index 6645f55b49b0..e57f5581ee26 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,6 @@ Refer to the table below for building with different Flink and Scala versions. | `-Dflink1.15` | hudi-flink1.15-bundle | For Flink 1.15 | | `-Dflink1.14` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.12 | | `-Dflink1.14 -Dscala-2.11` | hudi-flink1.14-bundle | For Flink 1.14 and Scala 2.11 | -| `-Dflink1.13` | hudi-flink1.13-bundle | For Flink 1.13 and Scala 2.12 | -| `-Dflink1.13 -Dscala-2.11` | hudi-flink1.13-bundle | For Flink 1.13 and Scala 2.11 | For example, ``` @@ -142,9 +140,6 @@ mvn clean package -DskipTests -Dflink1.15 # Build against Flink 1.14.x and Scala 2.11 mvn clean package -DskipTests -Dflink1.14 -Dscala-2.11 - -# Build against Flink 1.13.x and Scala 2.12 -mvn clean package -DskipTests -Dflink1.13 ``` ## Running Tests diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml index 85d185fbc2c5..21c6d932ef9c 100644 --- a/azure-pipelines-20230430.yml +++ b/azure-pipelines-20230430.yml @@ -32,7 +32,6 @@ parameters: - 'hudi-common' - 'hudi-flink-datasource' - 'hudi-flink-datasource/hudi-flink' - - 'hudi-flink-datasource/hudi-flink1.13.x' - 'hudi-flink-datasource/hudi-flink1.14.x' - 'hudi-flink-datasource/hudi-flink1.15.x' - 'hudi-flink-datasource/hudi-flink1.16.x' @@ -65,7 +64,6 @@ parameters: - '!hudi-examples/hudi-examples-spark' - '!hudi-flink-datasource' - '!hudi-flink-datasource/hudi-flink' - - '!hudi-flink-datasource/hudi-flink1.13.x' - '!hudi-flink-datasource/hudi-flink1.14.x' - '!hudi-flink-datasource/hudi-flink1.15.x' - '!hudi-flink-datasource/hudi-flink1.16.x' @@ -89,7 +87,6 @@ parameters: - '!hudi-examples/hudi-examples-spark' - '!hudi-flink-datasource' - '!hudi-flink-datasource/hudi-flink' - - '!hudi-flink-datasource/hudi-flink1.13.x' - '!hudi-flink-datasource/hudi-flink1.14.x' - '!hudi-flink-datasource/hudi-flink1.15.x' - '!hudi-flink-datasource/hudi-flink1.16.x' diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java index fc9c2177e7c0..4a1962bf9b48 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java @@ -18,14 +18,16 @@ package org.apache.hudi.sink.transform; -import org.apache.hudi.adapter.RateLimiterAdapter; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.util.RateLimiter; import org.apache.hudi.configuration.FlinkOptions; import org.apache.flink.configuration.Configuration; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.logical.RowType; +import java.util.concurrent.TimeUnit; + /** * Function that transforms RowData to a HoodieRecord with RateLimit. */ @@ -39,7 +41,7 @@ public class RowDataToHoodieFunctionWithRateLimit +public class StreamReadOperator extends AbstractStreamOperator implements OneInputStreamOperator { private static final Logger LOG = LoggerFactory.getLogger(StreamReadOperator.class); @@ -70,7 +73,7 @@ public class StreamReadOperator extends AbstractStreamOperatorAdapter // It's the same thread that runs this operator and checkpoint actions. Use this executor to schedule only // splits for subsequent reading, so that a new checkpoint could be triggered without blocking a long time // for exhausting all scheduled split reading tasks. - private final MailboxExecutorAdapter executor; + private final MailboxExecutor executor; private MergeOnReadInputFormat format; @@ -89,7 +92,7 @@ public class StreamReadOperator extends AbstractStreamOperatorAdapter private transient FlinkStreamReadMetrics readMetrics; private StreamReadOperator(MergeOnReadInputFormat format, ProcessingTimeService timeService, - MailboxExecutorAdapter mailboxExecutor) { + MailboxExecutor mailboxExecutor) { this.format = Preconditions.checkNotNull(format, "The InputFormat should not be null."); this.processingTimeService = timeService; this.executor = Preconditions.checkNotNull(mailboxExecutor, "The mailboxExecutor should not be null."); @@ -119,10 +122,9 @@ public void initializeState(StateInitializationContext context) throws Exception } } - this.sourceContext = Utils.getSourceContext( + this.sourceContext = getSourceContext( getOperatorConfig().getTimeCharacteristic(), getProcessingTimeService(), - getContainingTask(), output, getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval()); @@ -247,8 +249,8 @@ private enum SplitState { IDLE, RUNNING } - private static class OperatorFactory extends AbstractStreamOperatorFactoryAdapter - implements OneInputStreamOperatorFactory { + private static class OperatorFactory extends AbstractStreamOperatorFactory + implements OneInputStreamOperatorFactory, YieldingOperatorFactory { private final MergeOnReadInputFormat format; @@ -259,7 +261,7 @@ private OperatorFactory(MergeOnReadInputFormat format) { @SuppressWarnings("unchecked") @Override public > O createStreamOperator(StreamOperatorParameters parameters) { - StreamReadOperator operator = new StreamReadOperator(format, processingTimeService, getMailboxExecutorAdapter()); + StreamReadOperator operator = new StreamReadOperator(format, processingTimeService, getMailboxExecutor()); operator.setup(parameters.getContainingTask(), parameters.getStreamConfig(), parameters.getOutput()); return (O) operator; } @@ -269,4 +271,19 @@ public Class getStreamOperatorClass(ClassLoader classL return StreamReadOperator.class; } } + + private static SourceFunction.SourceContext getSourceContext( + TimeCharacteristic timeCharacteristic, + ProcessingTimeService processingTimeService, + Output> output, + long watermarkInterval) { + return StreamSourceContexts.getSourceContext( + timeCharacteristic, + processingTimeService, + new Object(), // no actual locking needed + output, + watermarkInterval, + -1, + true); + } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java index b18cfac51b44..9df912f12995 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java @@ -18,12 +18,11 @@ package org.apache.hudi.sink.utils; -import org.apache.hudi.adapter.OutputAdapter; - import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; import org.apache.flink.util.OutputTag; import java.util.ArrayList; @@ -32,7 +31,7 @@ /** * Collecting {@link Output} for {@link StreamRecord}. */ -public class CollectorOutput implements OutputAdapter> { +public class CollectorOutput implements Output> { private final List records; @@ -68,4 +67,9 @@ public void collect(OutputTag outputTag, StreamRecord record) { public void close() { this.records.clear(); } + + @Override + public void emitWatermarkStatus(WatermarkStatus watermarkStatus) { + // no operation + } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java index e218f29df6fe..23f87b15c65f 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java @@ -17,17 +17,18 @@ package org.apache.hudi.sink.utils; -import org.apache.hudi.adapter.StateInitializationContextAdapter; - import org.apache.flink.api.common.state.KeyedStateStore; import org.apache.flink.runtime.state.FunctionInitializationContext; import org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider; +import org.apache.flink.runtime.state.StateInitializationContext; import org.apache.flink.runtime.state.StatePartitionStreamProvider; +import java.util.OptionalLong; + /** * A {@link FunctionInitializationContext} for testing purpose. */ -public class MockStateInitializationContext implements StateInitializationContextAdapter { +public class MockStateInitializationContext implements StateInitializationContext { private final MockOperatorStateStore operatorStateStore; @@ -59,4 +60,9 @@ public Iterable getRawOperatorStateInputs() { public Iterable getRawKeyedStateInputs() { return null; } + + @Override + public OptionalLong getRestoredCheckpointId() { + return OptionalLong.empty(); + } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java index 888e349bdd90..e7be9b92d136 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java @@ -17,10 +17,10 @@ package org.apache.hudi.sink.utils; -import org.apache.hudi.adapter.StreamingRuntimeContextAdapter; - import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.common.state.KeyedStateStore; +import org.apache.flink.metrics.groups.OperatorMetricGroup; +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; import org.apache.flink.runtime.jobgraph.OperatorID; import org.apache.flink.runtime.memory.MemoryManager; import org.apache.flink.runtime.operators.testutils.MockEnvironment; @@ -37,7 +37,7 @@ * *

NOTE: Adapted from Apache Flink, the MockStreamOperator is modified to support MapState. */ -public class MockStreamingRuntimeContext extends StreamingRuntimeContextAdapter { +public class MockStreamingRuntimeContext extends StreamingRuntimeContext { private final boolean isCheckpointingEnabled; @@ -128,4 +128,9 @@ public KeyedStateStore getKeyedStateStore() { return mockOperatorStateStore; } } + + @Override + public OperatorMetricGroup getMetricGroup() { + return UnregisteredMetricsGroup.createOperatorMetricGroup(); + } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java index 40fb28619de4..111bb42e73e3 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java @@ -18,7 +18,6 @@ package org.apache.hudi.table; -import org.apache.hudi.adapter.TestTableEnvs; import org.apache.hudi.common.model.DefaultHoodieRecordPayload; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.cdc.HoodieCDCSupplementalLoggingMode; @@ -32,6 +31,7 @@ import org.apache.hudi.utils.TestConfigurations; import org.apache.hudi.utils.TestData; import org.apache.hudi.utils.TestSQL; +import org.apache.hudi.utils.TestTableEnvs; import org.apache.hudi.utils.TestUtils; import org.apache.hudi.utils.factory.CollectSinkTableFactory; diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalogFactory.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalogFactory.java index 6e7ee2e8f84b..5ee8aac90f80 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalogFactory.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieCatalogFactory.java @@ -18,7 +18,7 @@ package org.apache.hudi.table.catalog; -import org.apache.hudi.adapter.TestTableEnvs; +import org.apache.hudi.utils.TestTableEnvs; import org.apache.flink.configuration.Configuration; import org.apache.flink.table.api.TableEnvironment; diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestTableEnvs.java similarity index 98% rename from hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java rename to hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestTableEnvs.java index e65437609a21..fdec322fc9ac 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestTableEnvs.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hudi.adapter; +package org.apache.hudi.utils; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; diff --git a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml b/hudi-flink-datasource/hudi-flink1.13.x/pom.xml deleted file mode 100644 index 3dd876dd20af..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/pom.xml +++ /dev/null @@ -1,144 +0,0 @@ - - - - - hudi-flink-datasource - org.apache.hudi - 0.15.0-SNAPSHOT - - 4.0.0 - - hudi-flink1.13.x - 0.15.0-SNAPSHOT - jar - - - ${project.parent.parent.basedir} - - - - - - org.apache.logging.log4j - log4j-1.2-api - - - org.apache.logging.log4j - log4j-slf4j-impl - - - org.slf4j - slf4j-api - - - - - org.apache.hudi - hudi-common - ${project.version} - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - provided - - - - - org.apache.flink - flink-table-runtime-blink_${scala.binary.version} - ${flink1.13.version} - provided - - - org.apache.flink - flink-streaming-java_${scala.binary.version} - ${flink1.13.version} - provided - - - org.apache.flink - flink-core - ${flink1.13.version} - provided - - - org.apache.flink - flink-parquet_${scala.binary.version} - ${flink1.13.version} - provided - - - org.apache.flink - flink-json - ${flink1.13.version} - provided - - - org.apache.flink - flink-table-planner-blink_${scala.binary.version} - ${flink1.13.version} - provided - - - - - org.apache.flink - flink-runtime_${scala.binary.version} - ${flink1.13.version} - test - test-jar - - - org.apache.hudi - hudi-tests-common - ${project.version} - test - - - - - - - org.jacoco - jacoco-maven-plugin - - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - test-compile - - - - false - - - - org.apache.rat - apache-rat-plugin - - - - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java deleted file mode 100644 index 51c53f368fb9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; - -/** - * Adapter clazz for {@code AbstractStreamOperator}. - */ -public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { - @Override - public void close() throws Exception { - super.dispose(); - } - - public void finish() throws Exception { - super.close(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java deleted file mode 100644 index 0ea0968f1758..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; -import org.apache.flink.streaming.api.operators.MailboxExecutor; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Adapter clazz for {@link AbstractStreamOperatorFactory}. - */ -public abstract class AbstractStreamOperatorFactoryAdapter - extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { - private transient MailboxExecutor mailboxExecutor; - - @Override - public void setMailboxExecutor(MailboxExecutor mailboxExecutor) { - this.mailboxExecutor = mailboxExecutor; - } - - public MailboxExecutorAdapter getMailboxExecutorAdapter() { - return new MailboxExecutorAdapter(getMailboxExecutor()); - } - - /** - * Provides the mailbox executor iff this factory implements {@link YieldingOperatorFactory}. - */ - protected MailboxExecutor getMailboxExecutor() { - return checkNotNull( - mailboxExecutor, "Factory does not implement %s", YieldingOperatorFactory.class); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java deleted file mode 100644 index 867395c43f19..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamScanProviderAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.table.connector.source.DataStreamScanProvider; - -/** - * Adapter clazz for {@code DataStreamScanProvider}. - */ -public interface DataStreamScanProviderAdapter extends DataStreamScanProvider { -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java deleted file mode 100644 index e8eaa3c62d44..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/DataStreamSinkProviderAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.table.connector.sink.DataStreamSinkProvider; - -/** - * Adapter clazz for {@code DataStreamSinkProvider}. - */ -public interface DataStreamSinkProviderAdapter extends DataStreamSinkProvider { -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java deleted file mode 100644 index 94ed3b538879..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/HiveCatalogConstants.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabase; -import org.apache.flink.sql.parser.hive.ddl.SqlAlterHiveDatabaseOwner; -import org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveDatabase; - -/** - * Constants for Hive Catalog. - */ -public class HiveCatalogConstants { - - // ----------------------------------------------------------------------------------- - // Constants for ALTER DATABASE - // ----------------------------------------------------------------------------------- - public static final String ALTER_DATABASE_OP = SqlAlterHiveDatabase.ALTER_DATABASE_OP; - - public static final String DATABASE_LOCATION_URI = SqlCreateHiveDatabase.DATABASE_LOCATION_URI; - - public static final String DATABASE_OWNER_NAME = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_NAME; - - public static final String DATABASE_OWNER_TYPE = SqlAlterHiveDatabaseOwner.DATABASE_OWNER_TYPE; - - public static final String ROLE_OWNER = SqlAlterHiveDatabaseOwner.ROLE_OWNER; - - public static final String USER_OWNER = SqlAlterHiveDatabaseOwner.USER_OWNER; - - /** Type of ALTER DATABASE operation. */ - public enum AlterHiveDatabaseOp { - CHANGE_PROPS, - CHANGE_LOCATION, - CHANGE_OWNER - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java deleted file mode 100644 index 9ae3ca6912f6..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.MailboxExecutor; -import org.apache.flink.util.function.ThrowingRunnable; - -/** - * Adapter clazz for {@link MailboxExecutor}. - */ -public class MailboxExecutorAdapter { - private final MailboxExecutor executor; - - public MailboxExecutorAdapter(MailboxExecutor executor) { - this.executor = executor; - } - - public void execute(ThrowingRunnable command, String description) { - this.executor.execute(command, description); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java deleted file mode 100644 index ea0ba0419214..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/MaskingOutputAdapter.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.util.OutputTag; - -/** Adapter class for {@code Output} to handle async compaction/clustering service thread safe issues */ -public class MaskingOutputAdapter implements Output> { - - private final Output> output; - - public MaskingOutputAdapter(Output> output) { - this.output = output; - } - - @Override - public void emitWatermark(Watermark watermark) { - // For thread safe, not to propagate the watermark - } - - @Override - public void emitLatencyMarker(LatencyMarker latencyMarker) { - // For thread safe, not to propagate latency marker - } - - @Override - public void collect(OutputTag outputTag, StreamRecord streamRecord) { - this.output.collect(outputTag, streamRecord); - } - - @Override - public void collect(StreamRecord outStreamRecord) { - this.output.collect(outStreamRecord); - } - - @Override - public void close() { - this.output.close(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java deleted file mode 100644 index 887833c90e16..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/OperatorCoordinatorAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.operators.coordination.OperatorCoordinator; - -/** - * Adapter clazz for {@code OperatorCoordinator}. - */ -public interface OperatorCoordinatorAdapter extends OperatorCoordinator { -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java deleted file mode 100644 index 6d058de89bc5..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.RateLimiter; - -/** - * Bridge class for shaded guava clazz {@code RateLimiter}. - */ -public class RateLimiterAdapter { - private final RateLimiter rateLimiter; - - private RateLimiterAdapter(double permitsPerSecond) { - this.rateLimiter = RateLimiter.create(permitsPerSecond); - } - - public static RateLimiterAdapter create(double permitsPerSecond) { - return new RateLimiterAdapter(permitsPerSecond); - } - - public void acquire() { - this.rateLimiter.acquire(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java deleted file mode 100644 index a3ee8e6eed17..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SortCodeGeneratorAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.table.api.TableConfig; -import org.apache.flink.table.planner.codegen.sort.SortCodeGenerator; -import org.apache.flink.table.planner.plan.nodes.exec.spec.SortSpec; -import org.apache.flink.table.types.logical.RowType; - -/** - * Adapter clazz for {@code SortCodeGenerator}. - */ -public class SortCodeGeneratorAdapter extends SortCodeGenerator { - public SortCodeGeneratorAdapter(TableConfig conf, RowType input, SortSpec sortSpec) { - super(conf, input, sortSpec); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java deleted file mode 100644 index cd5c4eb891b0..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelDeleteAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -/** - * Adapter clazz for {@code org.apache.flink.table.connector.sink.abilities.SupportsRowLevelDelete}. - */ -public interface SupportsRowLevelDeleteAdapter { - - RowLevelDeleteInfoAdapter applyRowLevelDelete(); - - /** - * Adapter clazz for {@code SupportsRowLevelDelete.RowLevelDeleteInfo}. - */ - interface RowLevelDeleteInfoAdapter { - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java deleted file mode 100644 index 6a62763ec5b7..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/SupportsRowLevelUpdateAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.table.catalog.Column; - -import java.util.List; - -/** - * Adapter clazz for {@code org.apache.flink.table.connector.sink.abilities.SupportsRowLevelUpdate}. - */ -public interface SupportsRowLevelUpdateAdapter { - - RowLevelUpdateInfoAdapter applyRowLevelUpdate(List updatedColumns); - - /** - * Adapter clazz for {@code SupportsRowLevelUpdate.RowLevelUpdateInfo}. - */ - interface RowLevelUpdateInfoAdapter { - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java deleted file mode 100644 index 521fd50c8d8a..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/adapter/Utils.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.configuration.ReadableConfig; -import org.apache.flink.runtime.io.disk.iomanager.IOManager; -import org.apache.flink.runtime.memory.MemoryManager; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTask; -import org.apache.flink.table.catalog.ObjectIdentifier; -import org.apache.flink.table.catalog.ResolvedCatalogTable; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.factories.FactoryUtil; -import org.apache.flink.table.runtime.generated.NormalizedKeyComputer; -import org.apache.flink.table.runtime.generated.RecordComparator; -import org.apache.flink.table.runtime.operators.sort.BinaryExternalSorter; -import org.apache.flink.table.runtime.typeutils.AbstractRowDataSerializer; -import org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer; - -/** - * Adapter utils. - */ -public class Utils { - public static SourceFunction.SourceContext getSourceContext( - TimeCharacteristic timeCharacteristic, - ProcessingTimeService processingTimeService, - StreamTask streamTask, - Output> output, - long watermarkInterval) { - return StreamSourceContexts.getSourceContext( - timeCharacteristic, - processingTimeService, - new Object(), // no actual locking needed - streamTask.getStreamStatusMaintainer(), - output, - watermarkInterval, - -1); - } - - public static FactoryUtil.DefaultDynamicTableContext getTableContext( - ObjectIdentifier tablePath, - ResolvedCatalogTable catalogTable, - ReadableConfig conf) { - return new FactoryUtil.DefaultDynamicTableContext(tablePath, catalogTable, - conf, Thread.currentThread().getContextClassLoader(), false); - } - - public static BinaryExternalSorter getBinaryExternalSorter( - final Object owner, - MemoryManager memoryManager, - long reservedMemorySize, - IOManager ioManager, - AbstractRowDataSerializer inputSerializer, - BinaryRowDataSerializer serializer, - NormalizedKeyComputer normalizedKeyComputer, - RecordComparator comparator, - Configuration conf) { - return new BinaryExternalSorter(owner, memoryManager, reservedMemorySize, - ioManager, inputSerializer, serializer, normalizedKeyComputer, comparator, conf); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarArrayData.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarArrayData.java deleted file mode 100644 index 20c63d26f749..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarArrayData.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.data; - -import org.apache.hudi.table.data.vector.MapColumnVector; -import org.apache.hudi.table.data.vector.RowColumnVector; - -import org.apache.flink.table.data.ArrayData; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.MapData; -import org.apache.flink.table.data.RawValueData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.data.binary.TypedSetters; -import org.apache.flink.table.data.vector.ArrayColumnVector; -import org.apache.flink.table.data.vector.BooleanColumnVector; -import org.apache.flink.table.data.vector.ByteColumnVector; -import org.apache.flink.table.data.vector.BytesColumnVector; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.DecimalColumnVector; -import org.apache.flink.table.data.vector.DoubleColumnVector; -import org.apache.flink.table.data.vector.FloatColumnVector; -import org.apache.flink.table.data.vector.IntColumnVector; -import org.apache.flink.table.data.vector.LongColumnVector; -import org.apache.flink.table.data.vector.ShortColumnVector; -import org.apache.flink.table.data.vector.TimestampColumnVector; - -import java.util.Arrays; - -/** - * Columnar array to support access to vector column data. - * - *

References {@code org.apache.flink.table.data.ColumnarArrayData} to include FLINK-15390. - */ -public final class ColumnarArrayData implements ArrayData, TypedSetters { - - private final ColumnVector data; - private final int offset; - private final int numElements; - - public ColumnarArrayData(ColumnVector data, int offset, int numElements) { - this.data = data; - this.offset = offset; - this.numElements = numElements; - } - - @Override - public int size() { - return numElements; - } - - @Override - public boolean isNullAt(int pos) { - return data.isNullAt(offset + pos); - } - - @Override - public void setNullAt(int pos) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public boolean getBoolean(int pos) { - return ((BooleanColumnVector) data).getBoolean(offset + pos); - } - - @Override - public byte getByte(int pos) { - return ((ByteColumnVector) data).getByte(offset + pos); - } - - @Override - public short getShort(int pos) { - return ((ShortColumnVector) data).getShort(offset + pos); - } - - @Override - public int getInt(int pos) { - return ((IntColumnVector) data).getInt(offset + pos); - } - - @Override - public long getLong(int pos) { - return ((LongColumnVector) data).getLong(offset + pos); - } - - @Override - public float getFloat(int pos) { - return ((FloatColumnVector) data).getFloat(offset + pos); - } - - @Override - public double getDouble(int pos) { - return ((DoubleColumnVector) data).getDouble(offset + pos); - } - - @Override - public StringData getString(int pos) { - BytesColumnVector.Bytes byteArray = getByteArray(pos); - return StringData.fromBytes(byteArray.data, byteArray.offset, byteArray.len); - } - - @Override - public DecimalData getDecimal(int pos, int precision, int scale) { - return ((DecimalColumnVector) data).getDecimal(offset + pos, precision, scale); - } - - @Override - public TimestampData getTimestamp(int pos, int precision) { - return ((TimestampColumnVector) data).getTimestamp(offset + pos, precision); - } - - @Override - public RawValueData getRawValue(int pos) { - throw new UnsupportedOperationException("RawValueData is not supported."); - } - - @Override - public byte[] getBinary(int pos) { - BytesColumnVector.Bytes byteArray = getByteArray(pos); - if (byteArray.len == byteArray.data.length) { - return byteArray.data; - } else { - return Arrays.copyOfRange(byteArray.data, byteArray.offset, byteArray.len); - } - } - - @Override - public ArrayData getArray(int pos) { - return ((ArrayColumnVector) data).getArray(offset + pos); - } - - @Override - public MapData getMap(int pos) { - return ((MapColumnVector) data).getMap(offset + pos); - } - - @Override - public RowData getRow(int pos, int numFields) { - return ((RowColumnVector) data).getRow(offset + pos); - } - - @Override - public void setBoolean(int pos, boolean value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setByte(int pos, byte value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setShort(int pos, short value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setInt(int pos, int value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setLong(int pos, long value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setFloat(int pos, float value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setDouble(int pos, double value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setDecimal(int pos, DecimalData value, int precision) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setTimestamp(int pos, TimestampData value, int precision) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public boolean[] toBooleanArray() { - boolean[] res = new boolean[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getBoolean(i); - } - return res; - } - - @Override - public byte[] toByteArray() { - byte[] res = new byte[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getByte(i); - } - return res; - } - - @Override - public short[] toShortArray() { - short[] res = new short[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getShort(i); - } - return res; - } - - @Override - public int[] toIntArray() { - int[] res = new int[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getInt(i); - } - return res; - } - - @Override - public long[] toLongArray() { - long[] res = new long[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getLong(i); - } - return res; - } - - @Override - public float[] toFloatArray() { - float[] res = new float[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getFloat(i); - } - return res; - } - - @Override - public double[] toDoubleArray() { - double[] res = new double[numElements]; - for (int i = 0; i < numElements; i++) { - res[i] = getDouble(i); - } - return res; - } - - private BytesColumnVector.Bytes getByteArray(int pos) { - return ((BytesColumnVector) data).getBytes(offset + pos); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarMapData.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarMapData.java deleted file mode 100644 index bba462f404b3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarMapData.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.data; - -import org.apache.flink.table.data.ArrayData; -import org.apache.flink.table.data.MapData; -import org.apache.flink.table.data.vector.ColumnVector; - -/** - * Columnar map to support access to vector column data. - * - *

Referenced from flink 1.14.0 {@code org.apache.flink.table.data.ColumnarMapData}. - */ -public final class ColumnarMapData implements MapData { - - private final ColumnVector keyColumnVector; - private final ColumnVector valueColumnVector; - private final int offset; - private final int numElements; - - public ColumnarMapData( - ColumnVector keyColumnVector, - ColumnVector valueColumnVector, - int offset, - int numElements) { - this.keyColumnVector = keyColumnVector; - this.valueColumnVector = valueColumnVector; - this.offset = offset; - this.numElements = numElements; - } - - @Override - public int size() { - return numElements; - } - - @Override - public ArrayData keyArray() { - return new ColumnarArrayData(keyColumnVector, offset, numElements); - } - - @Override - public ArrayData valueArray() { - return new ColumnarArrayData(valueColumnVector, offset, numElements); - } - - @Override - public boolean equals(Object o) { - throw new UnsupportedOperationException( - "ColumnarMapData do not support equals, please compare fields one by one!"); - } - - @Override - public int hashCode() { - throw new UnsupportedOperationException( - "ColumnarMapData do not support hashCode, please hash fields one by one!"); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarRowData.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarRowData.java deleted file mode 100644 index 9a95035b2703..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/ColumnarRowData.java +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.data; - -import org.apache.hudi.table.data.vector.VectorizedColumnBatch; - -import org.apache.flink.table.data.ArrayData; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.MapData; -import org.apache.flink.table.data.RawValueData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.data.binary.TypedSetters; -import org.apache.flink.table.data.vector.BytesColumnVector.Bytes; -import org.apache.flink.types.RowKind; - -/** - * Columnar row to support access to vector column data. - * It is a row view in {@link VectorizedColumnBatch}. - * - *

References {@code org.apache.flink.table.data.ColumnarRowData} to include FLINK-15390. - */ -public final class ColumnarRowData implements RowData, TypedSetters { - - private RowKind rowKind = RowKind.INSERT; - private VectorizedColumnBatch vectorizedColumnBatch; - private int rowId; - - public ColumnarRowData() { - } - - public ColumnarRowData(VectorizedColumnBatch vectorizedColumnBatch) { - this(vectorizedColumnBatch, 0); - } - - public ColumnarRowData(VectorizedColumnBatch vectorizedColumnBatch, int rowId) { - this.vectorizedColumnBatch = vectorizedColumnBatch; - this.rowId = rowId; - } - - public void setVectorizedColumnBatch(VectorizedColumnBatch vectorizedColumnBatch) { - this.vectorizedColumnBatch = vectorizedColumnBatch; - this.rowId = 0; - } - - public void setRowId(int rowId) { - this.rowId = rowId; - } - - @Override - public RowKind getRowKind() { - return rowKind; - } - - @Override - public void setRowKind(RowKind kind) { - this.rowKind = kind; - } - - @Override - public int getArity() { - return vectorizedColumnBatch.getArity(); - } - - @Override - public boolean isNullAt(int pos) { - return vectorizedColumnBatch.isNullAt(rowId, pos); - } - - @Override - public boolean getBoolean(int pos) { - return vectorizedColumnBatch.getBoolean(rowId, pos); - } - - @Override - public byte getByte(int pos) { - return vectorizedColumnBatch.getByte(rowId, pos); - } - - @Override - public short getShort(int pos) { - return vectorizedColumnBatch.getShort(rowId, pos); - } - - @Override - public int getInt(int pos) { - return vectorizedColumnBatch.getInt(rowId, pos); - } - - @Override - public long getLong(int pos) { - return vectorizedColumnBatch.getLong(rowId, pos); - } - - @Override - public float getFloat(int pos) { - return vectorizedColumnBatch.getFloat(rowId, pos); - } - - @Override - public double getDouble(int pos) { - return vectorizedColumnBatch.getDouble(rowId, pos); - } - - @Override - public StringData getString(int pos) { - Bytes byteArray = vectorizedColumnBatch.getByteArray(rowId, pos); - return StringData.fromBytes(byteArray.data, byteArray.offset, byteArray.len); - } - - @Override - public DecimalData getDecimal(int pos, int precision, int scale) { - return vectorizedColumnBatch.getDecimal(rowId, pos, precision, scale); - } - - @Override - public TimestampData getTimestamp(int pos, int precision) { - return vectorizedColumnBatch.getTimestamp(rowId, pos, precision); - } - - @Override - public RawValueData getRawValue(int pos) { - throw new UnsupportedOperationException("RawValueData is not supported."); - } - - @Override - public byte[] getBinary(int pos) { - Bytes byteArray = vectorizedColumnBatch.getByteArray(rowId, pos); - if (byteArray.len == byteArray.data.length) { - return byteArray.data; - } else { - byte[] ret = new byte[byteArray.len]; - System.arraycopy(byteArray.data, byteArray.offset, ret, 0, byteArray.len); - return ret; - } - } - - @Override - public RowData getRow(int pos, int numFields) { - return vectorizedColumnBatch.getRow(rowId, pos); - } - - @Override - public ArrayData getArray(int pos) { - return vectorizedColumnBatch.getArray(rowId, pos); - } - - @Override - public MapData getMap(int pos) { - return vectorizedColumnBatch.getMap(rowId, pos); - } - - @Override - public void setNullAt(int pos) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setBoolean(int pos, boolean value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setByte(int pos, byte value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setShort(int pos, short value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setInt(int pos, int value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setLong(int pos, long value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setFloat(int pos, float value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setDouble(int pos, double value) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setDecimal(int pos, DecimalData value, int precision) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public void setTimestamp(int pos, TimestampData value, int precision) { - throw new UnsupportedOperationException("Not support the operation!"); - } - - @Override - public boolean equals(Object o) { - throw new UnsupportedOperationException( - "ColumnarRowData do not support equals, please compare fields one by one!"); - } - - @Override - public int hashCode() { - throw new UnsupportedOperationException( - "ColumnarRowData do not support hashCode, please hash fields one by one!"); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/MapColumnVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/MapColumnVector.java deleted file mode 100644 index 6bdf8782f4d3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/MapColumnVector.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.data.vector; - -import org.apache.flink.table.data.MapData; -import org.apache.flink.table.data.vector.ColumnVector; - -/** - * Map column vector. - */ -public interface MapColumnVector extends ColumnVector { - MapData getMap(int i); -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/RowColumnVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/RowColumnVector.java deleted file mode 100644 index bd0e9bbe7de7..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/RowColumnVector.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.data.vector; - -import org.apache.hudi.table.data.ColumnarRowData; - -import org.apache.flink.table.data.vector.ColumnVector; - -/** - * Row column vector. - */ -public interface RowColumnVector extends ColumnVector { - ColumnarRowData getRow(int i); -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/VectorizedColumnBatch.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/VectorizedColumnBatch.java deleted file mode 100644 index bccaec8fdcad..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/data/vector/VectorizedColumnBatch.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.data.vector; - -import org.apache.flink.table.data.ArrayData; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.MapData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.data.vector.ArrayColumnVector; -import org.apache.flink.table.data.vector.BooleanColumnVector; -import org.apache.flink.table.data.vector.ByteColumnVector; -import org.apache.flink.table.data.vector.BytesColumnVector; -import org.apache.flink.table.data.vector.BytesColumnVector.Bytes; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.DecimalColumnVector; -import org.apache.flink.table.data.vector.DoubleColumnVector; -import org.apache.flink.table.data.vector.FloatColumnVector; -import org.apache.flink.table.data.vector.IntColumnVector; -import org.apache.flink.table.data.vector.LongColumnVector; -import org.apache.flink.table.data.vector.ShortColumnVector; -import org.apache.flink.table.data.vector.TimestampColumnVector; - -import java.io.Serializable; -import java.nio.charset.StandardCharsets; - -/** - * A VectorizedColumnBatch is a set of rows, organized with each column as a vector. It is the unit - * of query execution, organized to minimize the cost per row. - * - *

{@code VectorizedColumnBatch}s are influenced by Apache Hive VectorizedRowBatch. - * - *

References {@code org.apache.flink.table.data.vector.VectorizedColumnBatch} to include FLINK-15390. - */ -public class VectorizedColumnBatch implements Serializable { - private static final long serialVersionUID = 8180323238728166155L; - - /** - * This number is carefully chosen to minimize overhead and typically allows one - * VectorizedColumnBatch to fit in cache. - */ - public static final int DEFAULT_SIZE = 2048; - - private int numRows; - public final ColumnVector[] columns; - - public VectorizedColumnBatch(ColumnVector[] vectors) { - this.columns = vectors; - } - - public void setNumRows(int numRows) { - this.numRows = numRows; - } - - public int getNumRows() { - return numRows; - } - - public int getArity() { - return columns.length; - } - - public boolean isNullAt(int rowId, int colId) { - return columns[colId].isNullAt(rowId); - } - - public boolean getBoolean(int rowId, int colId) { - return ((BooleanColumnVector) columns[colId]).getBoolean(rowId); - } - - public byte getByte(int rowId, int colId) { - return ((ByteColumnVector) columns[colId]).getByte(rowId); - } - - public short getShort(int rowId, int colId) { - return ((ShortColumnVector) columns[colId]).getShort(rowId); - } - - public int getInt(int rowId, int colId) { - return ((IntColumnVector) columns[colId]).getInt(rowId); - } - - public long getLong(int rowId, int colId) { - return ((LongColumnVector) columns[colId]).getLong(rowId); - } - - public float getFloat(int rowId, int colId) { - return ((FloatColumnVector) columns[colId]).getFloat(rowId); - } - - public double getDouble(int rowId, int colId) { - return ((DoubleColumnVector) columns[colId]).getDouble(rowId); - } - - public Bytes getByteArray(int rowId, int colId) { - return ((BytesColumnVector) columns[colId]).getBytes(rowId); - } - - private byte[] getBytes(int rowId, int colId) { - Bytes byteArray = getByteArray(rowId, colId); - if (byteArray.len == byteArray.data.length) { - return byteArray.data; - } else { - return byteArray.getBytes(); - } - } - - public String getString(int rowId, int colId) { - Bytes byteArray = getByteArray(rowId, colId); - return new String(byteArray.data, byteArray.offset, byteArray.len, StandardCharsets.UTF_8); - } - - public DecimalData getDecimal(int rowId, int colId, int precision, int scale) { - return ((DecimalColumnVector) (columns[colId])).getDecimal(rowId, precision, scale); - } - - public TimestampData getTimestamp(int rowId, int colId, int precision) { - return ((TimestampColumnVector) (columns[colId])).getTimestamp(rowId, precision); - } - - public ArrayData getArray(int rowId, int colId) { - return ((ArrayColumnVector) columns[colId]).getArray(rowId); - } - - public RowData getRow(int rowId, int colId) { - return ((RowColumnVector) columns[colId]).getRow(rowId); - } - - public MapData getMap(int rowId, int colId) { - return ((MapColumnVector) columns[colId]).getMap(rowId); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java deleted file mode 100644 index ac9ca59d574d..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow; - -import org.apache.hudi.common.util.ValidationUtils; -import org.apache.hudi.table.data.vector.VectorizedColumnBatch; -import org.apache.hudi.table.format.cow.vector.HeapArrayVector; -import org.apache.hudi.table.format.cow.vector.HeapMapColumnVector; -import org.apache.hudi.table.format.cow.vector.HeapRowColumnVector; -import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; -import org.apache.hudi.table.format.cow.vector.reader.ArrayColumnReader; -import org.apache.hudi.table.format.cow.vector.reader.EmptyColumnReader; -import org.apache.hudi.table.format.cow.vector.reader.FixedLenBytesColumnReader; -import org.apache.hudi.table.format.cow.vector.reader.Int64TimestampColumnReader; -import org.apache.hudi.table.format.cow.vector.reader.MapColumnReader; -import org.apache.hudi.table.format.cow.vector.reader.ParquetColumnarRowSplitReader; -import org.apache.hudi.table.format.cow.vector.reader.RowColumnReader; - -import org.apache.flink.core.fs.Path; -import org.apache.flink.formats.parquet.vector.reader.BooleanColumnReader; -import org.apache.flink.formats.parquet.vector.reader.ByteColumnReader; -import org.apache.flink.formats.parquet.vector.reader.BytesColumnReader; -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.formats.parquet.vector.reader.DoubleColumnReader; -import org.apache.flink.formats.parquet.vector.reader.FloatColumnReader; -import org.apache.flink.formats.parquet.vector.reader.IntColumnReader; -import org.apache.flink.formats.parquet.vector.reader.LongColumnReader; -import org.apache.flink.formats.parquet.vector.reader.ShortColumnReader; -import org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.heap.HeapBooleanVector; -import org.apache.flink.table.data.vector.heap.HeapByteVector; -import org.apache.flink.table.data.vector.heap.HeapBytesVector; -import org.apache.flink.table.data.vector.heap.HeapDoubleVector; -import org.apache.flink.table.data.vector.heap.HeapFloatVector; -import org.apache.flink.table.data.vector.heap.HeapIntVector; -import org.apache.flink.table.data.vector.heap.HeapLongVector; -import org.apache.flink.table.data.vector.heap.HeapShortVector; -import org.apache.flink.table.data.vector.heap.HeapTimestampVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.types.DataType; -import org.apache.flink.table.types.logical.ArrayType; -import org.apache.flink.table.types.logical.DecimalType; -import org.apache.flink.table.types.logical.IntType; -import org.apache.flink.table.types.logical.LocalZonedTimestampType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.MapType; -import org.apache.flink.table.types.logical.RowType; -import org.apache.flink.table.types.logical.TimestampType; -import org.apache.flink.table.types.logical.VarBinaryType; -import org.apache.flink.util.Preconditions; -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.ParquetRuntimeException; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.page.PageReadStore; -import org.apache.parquet.column.page.PageReader; -import org.apache.parquet.filter.UnboundRecordFilter; -import org.apache.parquet.filter2.predicate.FilterPredicate; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.InvalidSchemaException; -import org.apache.parquet.schema.OriginalType; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.Type; - -import java.io.IOException; -import java.math.BigDecimal; -import java.sql.Date; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import static org.apache.flink.table.runtime.functions.SqlDateTimeUtils.dateToInternal; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.apache.parquet.Preconditions.checkArgument; - -/** - * Util for generating {@link ParquetColumnarRowSplitReader}. - * - *

NOTE: reference from Flink release 1.11.2 {@code ParquetSplitReaderUtil}, modify to support INT64 - * based TIMESTAMP_MILLIS as ConvertedType, should remove when Flink supports that. - */ -public class ParquetSplitReaderUtil { - - /** - * Util for generating partitioned {@link ParquetColumnarRowSplitReader}. - */ - public static ParquetColumnarRowSplitReader genPartColumnarRowReader( - boolean utcTimestamp, - boolean caseSensitive, - Configuration conf, - String[] fullFieldNames, - DataType[] fullFieldTypes, - Map partitionSpec, - int[] selectedFields, - int batchSize, - Path path, - long splitStart, - long splitLength, - FilterPredicate filterPredicate, - UnboundRecordFilter recordFilter) throws IOException { - List selNonPartNames = Arrays.stream(selectedFields) - .mapToObj(i -> fullFieldNames[i]) - .filter(n -> !partitionSpec.containsKey(n)) - .collect(Collectors.toList()); - - int[] selParquetFields = Arrays.stream(selectedFields) - .filter(i -> !partitionSpec.containsKey(fullFieldNames[i])) - .toArray(); - - ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> { - // create and initialize the row batch - ColumnVector[] vectors = new ColumnVector[selectedFields.length]; - for (int i = 0; i < vectors.length; i++) { - String name = fullFieldNames[selectedFields[i]]; - LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType(); - vectors[i] = createVector(readVectors, selNonPartNames, name, type, partitionSpec, batchSize); - } - return new VectorizedColumnBatch(vectors); - }; - - return new ParquetColumnarRowSplitReader( - utcTimestamp, - caseSensitive, - conf, - Arrays.stream(selParquetFields) - .mapToObj(i -> fullFieldTypes[i].getLogicalType()) - .toArray(LogicalType[]::new), - selNonPartNames.toArray(new String[0]), - gen, - batchSize, - new org.apache.hadoop.fs.Path(path.toUri()), - splitStart, - splitLength, - filterPredicate, - recordFilter); - } - - private static ColumnVector createVector( - ColumnVector[] readVectors, - List selNonPartNames, - String name, - LogicalType type, - Map partitionSpec, - int batchSize) { - if (partitionSpec.containsKey(name)) { - return createVectorFromConstant(type, partitionSpec.get(name), batchSize); - } - ColumnVector readVector = readVectors[selNonPartNames.indexOf(name)]; - if (readVector == null) { - // when the read vector is null, use a constant null vector instead - readVector = createVectorFromConstant(type, null, batchSize); - } - return readVector; - } - - private static ColumnVector createVectorFromConstant( - LogicalType type, - Object value, - int batchSize) { - switch (type.getTypeRoot()) { - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - HeapBytesVector bsv = new HeapBytesVector(batchSize); - if (value == null) { - bsv.fillWithNulls(); - } else { - bsv.fill(value instanceof byte[] - ? (byte[]) value - : getUTF8Bytes(value.toString())); - } - return bsv; - case BOOLEAN: - HeapBooleanVector bv = new HeapBooleanVector(batchSize); - if (value == null) { - bv.fillWithNulls(); - } else { - bv.fill((boolean) value); - } - return bv; - case TINYINT: - HeapByteVector byteVector = new HeapByteVector(batchSize); - if (value == null) { - byteVector.fillWithNulls(); - } else { - byteVector.fill(((Number) value).byteValue()); - } - return byteVector; - case SMALLINT: - HeapShortVector sv = new HeapShortVector(batchSize); - if (value == null) { - sv.fillWithNulls(); - } else { - sv.fill(((Number) value).shortValue()); - } - return sv; - case INTEGER: - HeapIntVector iv = new HeapIntVector(batchSize); - if (value == null) { - iv.fillWithNulls(); - } else { - iv.fill(((Number) value).intValue()); - } - return iv; - case BIGINT: - HeapLongVector lv = new HeapLongVector(batchSize); - if (value == null) { - lv.fillWithNulls(); - } else { - lv.fill(((Number) value).longValue()); - } - return lv; - case DECIMAL: - DecimalType decimalType = (DecimalType) type; - int precision = decimalType.getPrecision(); - int scale = decimalType.getScale(); - DecimalData decimal = value == null - ? null - : Preconditions.checkNotNull(DecimalData.fromBigDecimal((BigDecimal) value, precision, scale)); - ColumnVector internalVector = createVectorFromConstant( - new VarBinaryType(), - decimal == null ? null : decimal.toUnscaledBytes(), - batchSize); - return new ParquetDecimalVector(internalVector); - case FLOAT: - HeapFloatVector fv = new HeapFloatVector(batchSize); - if (value == null) { - fv.fillWithNulls(); - } else { - fv.fill(((Number) value).floatValue()); - } - return fv; - case DOUBLE: - HeapDoubleVector dv = new HeapDoubleVector(batchSize); - if (value == null) { - dv.fillWithNulls(); - } else { - dv.fill(((Number) value).doubleValue()); - } - return dv; - case DATE: - if (value instanceof LocalDate) { - value = Date.valueOf((LocalDate) value); - } - return createVectorFromConstant( - new IntType(), - value == null ? null : dateToInternal((Date) value), - batchSize); - case TIMESTAMP_WITHOUT_TIME_ZONE: - HeapTimestampVector tv = new HeapTimestampVector(batchSize); - if (value == null) { - tv.fillWithNulls(); - } else { - tv.fill(TimestampData.fromLocalDateTime((LocalDateTime) value)); - } - return tv; - case ARRAY: - HeapArrayVector arrayVector = new HeapArrayVector(batchSize); - if (value == null) { - arrayVector.fillWithNulls(); - return arrayVector; - } else { - throw new UnsupportedOperationException("Unsupported create array with default value."); - } - case MAP: - HeapMapColumnVector mapVector = new HeapMapColumnVector(batchSize, null, null); - if (value == null) { - mapVector.fillWithNulls(); - return mapVector; - } else { - throw new UnsupportedOperationException("Unsupported create map with default value."); - } - case ROW: - HeapRowColumnVector rowVector = new HeapRowColumnVector(batchSize); - if (value == null) { - rowVector.fillWithNulls(); - return rowVector; - } else { - throw new UnsupportedOperationException("Unsupported create row with default value."); - } - default: - throw new UnsupportedOperationException("Unsupported type: " + type); - } - } - - private static List filterDescriptors(int depth, Type type, List columns) throws ParquetRuntimeException { - List filtered = new ArrayList<>(); - for (ColumnDescriptor descriptor : columns) { - if (depth >= descriptor.getPath().length) { - throw new InvalidSchemaException("Expect depth " + depth + " for schema: " + descriptor); - } - if (type.getName().equals(descriptor.getPath()[depth])) { - filtered.add(descriptor); - } - } - ValidationUtils.checkState(filtered.size() > 0, "Corrupted Parquet schema"); - return filtered; - } - - public static ColumnReader createColumnReader( - boolean utcTimestamp, - LogicalType fieldType, - Type physicalType, - List descriptors, - PageReadStore pages) throws IOException { - return createColumnReader(utcTimestamp, fieldType, physicalType, descriptors, - pages, 0); - } - - private static ColumnReader createColumnReader( - boolean utcTimestamp, - LogicalType fieldType, - Type physicalType, - List columns, - PageReadStore pages, - int depth) throws IOException { - List descriptors = filterDescriptors(depth, physicalType, columns); - ColumnDescriptor descriptor = descriptors.get(0); - PageReader pageReader = pages.getPageReader(descriptor); - switch (fieldType.getTypeRoot()) { - case BOOLEAN: - return new BooleanColumnReader(descriptor, pageReader); - case TINYINT: - return new ByteColumnReader(descriptor, pageReader); - case DOUBLE: - return new DoubleColumnReader(descriptor, pageReader); - case FLOAT: - return new FloatColumnReader(descriptor, pageReader); - case INTEGER: - case DATE: - case TIME_WITHOUT_TIME_ZONE: - return new IntColumnReader(descriptor, pageReader); - case BIGINT: - return new LongColumnReader(descriptor, pageReader); - case SMALLINT: - return new ShortColumnReader(descriptor, pageReader); - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - return new BytesColumnReader(descriptor, pageReader); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { - case INT64: - int precision = fieldType instanceof TimestampType - ? ((TimestampType) fieldType).getPrecision() - : ((LocalZonedTimestampType) fieldType).getPrecision(); - return new Int64TimestampColumnReader(utcTimestamp, descriptor, pageReader, precision); - case INT96: - return new TimestampColumnReader(utcTimestamp, descriptor, pageReader); - default: - throw new AssertionError(); - } - case DECIMAL: - switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { - case INT32: - return new IntColumnReader(descriptor, pageReader); - case INT64: - return new LongColumnReader(descriptor, pageReader); - case BINARY: - return new BytesColumnReader(descriptor, pageReader); - case FIXED_LEN_BYTE_ARRAY: - return new FixedLenBytesColumnReader( - descriptor, pageReader); - default: - throw new AssertionError(); - } - case ARRAY: - return new ArrayColumnReader( - descriptor, - pageReader, - utcTimestamp, - descriptor.getPrimitiveType(), - fieldType); - case MAP: - MapType mapType = (MapType) fieldType; - ArrayColumnReader keyReader = - new ArrayColumnReader( - descriptor, - pageReader, - utcTimestamp, - descriptor.getPrimitiveType(), - new ArrayType(mapType.getKeyType())); - ArrayColumnReader valueReader = - new ArrayColumnReader( - descriptors.get(1), - pages.getPageReader(descriptors.get(1)), - utcTimestamp, - descriptors.get(1).getPrimitiveType(), - new ArrayType(mapType.getValueType())); - return new MapColumnReader(keyReader, valueReader, fieldType); - case ROW: - RowType rowType = (RowType) fieldType; - GroupType groupType = physicalType.asGroupType(); - List fieldReaders = new ArrayList<>(); - for (int i = 0; i < rowType.getFieldCount(); i++) { - // schema evolution: read the parquet file with a new extended field name. - int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType); - if (fieldIndex < 0) { - fieldReaders.add(new EmptyColumnReader()); - } else { - fieldReaders.add( - createColumnReader( - utcTimestamp, - rowType.getTypeAt(i), - groupType.getType(fieldIndex), - descriptors, - pages, - depth + 1)); - } - } - return new RowColumnReader(fieldReaders); - default: - throw new UnsupportedOperationException(fieldType + " is not supported now."); - } - } - - public static WritableColumnVector createWritableColumnVector( - int batchSize, - LogicalType fieldType, - Type physicalType, - List descriptors) { - return createWritableColumnVector(batchSize, fieldType, physicalType, descriptors, 0); - } - - private static WritableColumnVector createWritableColumnVector( - int batchSize, - LogicalType fieldType, - Type physicalType, - List columns, - int depth) { - List descriptors = filterDescriptors(depth, physicalType, columns); - PrimitiveType primitiveType = descriptors.get(0).getPrimitiveType(); - PrimitiveType.PrimitiveTypeName typeName = primitiveType.getPrimitiveTypeName(); - switch (fieldType.getTypeRoot()) { - case BOOLEAN: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, - "Unexpected type: %s", typeName); - return new HeapBooleanVector(batchSize); - case TINYINT: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); - return new HeapByteVector(batchSize); - case DOUBLE: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, - "Unexpected type: %s", typeName); - return new HeapDoubleVector(batchSize); - case FLOAT: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.FLOAT, - "Unexpected type: %s", typeName); - return new HeapFloatVector(batchSize); - case INTEGER: - case DATE: - case TIME_WITHOUT_TIME_ZONE: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); - return new HeapIntVector(batchSize); - case BIGINT: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT64, - "Unexpected type: %s", typeName); - return new HeapLongVector(batchSize); - case SMALLINT: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); - return new HeapShortVector(batchSize); - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BINARY, - "Unexpected type: %s", typeName); - return new HeapBytesVector(batchSize); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, - "TIME_MICROS original type is not "); - return new HeapTimestampVector(batchSize); - case DECIMAL: - checkArgument( - (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY - || typeName == PrimitiveType.PrimitiveTypeName.BINARY) - && primitiveType.getOriginalType() == OriginalType.DECIMAL, - "Unexpected type: %s", typeName); - return new HeapBytesVector(batchSize); - case ARRAY: - ArrayType arrayType = (ArrayType) fieldType; - return new HeapArrayVector( - batchSize, - createWritableColumnVector( - batchSize, - arrayType.getElementType(), - physicalType, - descriptors, - depth)); - case MAP: - MapType mapType = (MapType) fieldType; - GroupType repeatedType = physicalType.asGroupType().getType(0).asGroupType(); - // the map column has three level paths. - return new HeapMapColumnVector( - batchSize, - createWritableColumnVector( - batchSize, - mapType.getKeyType(), - repeatedType.getType(0), - descriptors, - depth + 2), - createWritableColumnVector( - batchSize, - mapType.getValueType(), - repeatedType.getType(1), - descriptors, - depth + 2)); - case ROW: - RowType rowType = (RowType) fieldType; - GroupType groupType = physicalType.asGroupType(); - WritableColumnVector[] columnVectors = new WritableColumnVector[rowType.getFieldCount()]; - for (int i = 0; i < columnVectors.length; i++) { - // schema evolution: read the file with a new extended field name. - int fieldIndex = getFieldIndexInPhysicalType(rowType.getFields().get(i).getName(), groupType); - if (fieldIndex < 0) { - columnVectors[i] = (WritableColumnVector) createVectorFromConstant(rowType.getTypeAt(i), null, batchSize); - } else { - columnVectors[i] = - createWritableColumnVector( - batchSize, - rowType.getTypeAt(i), - groupType.getType(fieldIndex), - descriptors, - depth + 1); - } - } - return new HeapRowColumnVector(batchSize, columnVectors); - default: - throw new UnsupportedOperationException(fieldType + " is not supported now."); - } - } - - /** - * Returns the field index with given physical row type {@code groupType} and field name {@code fieldName}. - * - * @return The physical field index or -1 if the field does not exist - */ - private static int getFieldIndexInPhysicalType(String fieldName, GroupType groupType) { - // get index from fileSchema type, else, return -1 - return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java deleted file mode 100644 index 6d31d26b8d97..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector; - -import org.apache.hudi.table.data.ColumnarArrayData; - -import org.apache.flink.table.data.ArrayData; -import org.apache.flink.table.data.vector.ArrayColumnVector; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.heap.AbstractHeapVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; - -/** - * This class represents a nullable heap array column vector. - */ -public class HeapArrayVector extends AbstractHeapVector - implements WritableColumnVector, ArrayColumnVector { - - public long[] offsets; - public long[] lengths; - public ColumnVector child; - private int size; - - public HeapArrayVector(int len) { - super(len); - offsets = new long[len]; - lengths = new long[len]; - } - - public HeapArrayVector(int len, ColumnVector vector) { - super(len); - offsets = new long[len]; - lengths = new long[len]; - this.child = vector; - } - - public int getSize() { - return size; - } - - public void setSize(int size) { - this.size = size; - } - - public int getLen() { - return this.isNull.length; - } - - @Override - public ArrayData getArray(int i) { - long offset = offsets[i]; - long length = lengths[i]; - return new ColumnarArrayData(child, (int) offset, (int) length); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java deleted file mode 100644 index cf39fc981624..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector; - -import org.apache.hudi.table.data.ColumnarMapData; -import org.apache.hudi.table.data.vector.MapColumnVector; - -import org.apache.flink.table.data.MapData; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.heap.AbstractHeapVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; - -/** - * This class represents a nullable heap map column vector. - */ -public class HeapMapColumnVector extends AbstractHeapVector - implements WritableColumnVector, MapColumnVector { - - private long[] offsets; - private long[] lengths; - private int size; - private ColumnVector keys; - private ColumnVector values; - - public HeapMapColumnVector(int len, ColumnVector keys, ColumnVector values) { - super(len); - size = 0; - offsets = new long[len]; - lengths = new long[len]; - this.keys = keys; - this.values = values; - } - - public void setOffsets(long[] offsets) { - this.offsets = offsets; - } - - public void setLengths(long[] lengths) { - this.lengths = lengths; - } - - public void setKeys(ColumnVector keys) { - this.keys = keys; - } - - public void setValues(ColumnVector values) { - this.values = values; - } - - public int getSize() { - return size; - } - - public void setSize(int size) { - this.size = size; - } - - @Override - public MapData getMap(int i) { - long offset = offsets[i]; - long length = lengths[i]; - return new ColumnarMapData(keys, values, (int) offset, (int) length); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java deleted file mode 100644 index 03da9205d313..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector; - -import org.apache.hudi.table.data.ColumnarRowData; -import org.apache.hudi.table.data.vector.RowColumnVector; -import org.apache.hudi.table.data.vector.VectorizedColumnBatch; - -import org.apache.flink.table.data.vector.heap.AbstractHeapVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; - -/** - * This class represents a nullable heap row column vector. - */ -public class HeapRowColumnVector extends AbstractHeapVector - implements WritableColumnVector, RowColumnVector { - - public WritableColumnVector[] vectors; - - public HeapRowColumnVector(int len, WritableColumnVector... vectors) { - super(len); - this.vectors = vectors; - } - - @Override - public ColumnarRowData getRow(int i) { - ColumnarRowData columnarRowData = new ColumnarRowData(new VectorizedColumnBatch(vectors)); - columnarRowData.setRowId(i); - return columnarRowData; - } - - @Override - public void reset() { - super.reset(); - for (WritableColumnVector vector : vectors) { - vector.reset(); - } - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java deleted file mode 100644 index a2f6d5b0cd74..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector; - -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.vector.BytesColumnVector; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.DecimalColumnVector; - -/** - * Parquet write decimal as int32 and int64 and binary, this class wrap the real vector to - * provide {@link DecimalColumnVector} interface. - * - *

Reference Flink release 1.11.2 {@link org.apache.flink.formats.parquet.vector.ParquetDecimalVector} - * because it is not public. - */ -public class ParquetDecimalVector implements DecimalColumnVector { - - public final ColumnVector vector; - - public ParquetDecimalVector(ColumnVector vector) { - this.vector = vector; - } - - @Override - public DecimalData getDecimal(int i, int precision, int scale) { - return DecimalData.fromUnscaledBytes( - ((BytesColumnVector) vector).getBytes(i).getBytes(), - precision, - scale); - } - - @Override - public boolean isNullAt(int i) { - return vector.isNullAt(i); - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java deleted file mode 100644 index 07416a371715..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/AbstractColumnReader.java +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.formats.parquet.vector.ParquetDictionary; -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.data.vector.writable.WritableIntVector; -import org.apache.parquet.Preconditions; -import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.bytes.BytesInput; -import org.apache.parquet.bytes.BytesUtils; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.Dictionary; -import org.apache.parquet.column.Encoding; -import org.apache.parquet.column.page.DataPage; -import org.apache.parquet.column.page.DataPageV1; -import org.apache.parquet.column.page.DataPageV2; -import org.apache.parquet.column.page.DictionaryPage; -import org.apache.parquet.column.page.PageReader; -import org.apache.parquet.column.values.ValuesReader; -import org.apache.parquet.io.ParquetDecodingException; -import org.apache.parquet.schema.PrimitiveType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; - -/** - * Abstract {@link ColumnReader}. - * See {@link org.apache.parquet.column.impl.ColumnReaderImpl}, - * part of the code is referred from Apache Spark and Apache Parquet. - * - *

Note: Reference Flink release 1.11.2 {@link org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader} - * because some of the package scope methods. - */ -public abstract class AbstractColumnReader - implements ColumnReader { - - private static final Logger LOG = LoggerFactory.getLogger(org.apache.flink.formats.parquet.vector.reader.AbstractColumnReader.class); - - private final PageReader pageReader; - - /** - * The dictionary, if this column has dictionary encoding. - */ - protected final Dictionary dictionary; - - /** - * Maximum definition level for this column. - */ - protected final int maxDefLevel; - - protected final ColumnDescriptor descriptor; - - /** - * Total number of values read. - */ - private long valuesRead; - - /** - * value that indicates the end of the current page. That is, if valuesRead == - * endOfPageValueCount, we are at the end of the page. - */ - private long endOfPageValueCount; - - /** - * If true, the current page is dictionary encoded. - */ - private boolean isCurrentPageDictionaryEncoded; - - /** - * Total values in the current page. - */ - private int pageValueCount; - - /* - * Input streams: - * 1.Run length encoder to encode every data, so we have run length stream to get - * run length information. - * 2.Data maybe is real data, maybe is dictionary ids which need be decode to real - * data from Dictionary. - * - * Run length stream ------> Data stream - * | - * ------> Dictionary ids stream - */ - - /** - * Run length decoder for data and dictionary. - */ - protected RunLengthDecoder runLenDecoder; - - /** - * Data input stream. - */ - ByteBufferInputStream dataInputStream; - - /** - * Dictionary decoder to wrap dictionary ids input stream. - */ - private RunLengthDecoder dictionaryIdsDecoder; - - public AbstractColumnReader( - ColumnDescriptor descriptor, - PageReader pageReader) throws IOException { - this.descriptor = descriptor; - this.pageReader = pageReader; - this.maxDefLevel = descriptor.getMaxDefinitionLevel(); - - DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); - if (dictionaryPage != null) { - try { - this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage); - this.isCurrentPageDictionaryEncoded = true; - } catch (IOException e) { - throw new IOException("could not decode the dictionary for " + descriptor, e); - } - } else { - this.dictionary = null; - this.isCurrentPageDictionaryEncoded = false; - } - /* - * Total number of values in this column (in this row group). - */ - long totalValueCount = pageReader.getTotalValueCount(); - if (totalValueCount == 0) { - throw new IOException("totalValueCount == 0"); - } - } - - protected void checkTypeName(PrimitiveType.PrimitiveTypeName expectedName) { - PrimitiveType.PrimitiveTypeName actualName = descriptor.getPrimitiveType().getPrimitiveTypeName(); - Preconditions.checkArgument( - actualName == expectedName, - "Expected type name: %s, actual type name: %s", - expectedName, - actualName); - } - - /** - * Reads `total` values from this columnReader into column. - */ - @Override - public final void readToVector(int readNumber, V vector) throws IOException { - int rowId = 0; - WritableIntVector dictionaryIds = null; - if (dictionary != null) { - dictionaryIds = vector.reserveDictionaryIds(readNumber); - } - while (readNumber > 0) { - // Compute the number of values we want to read in this page. - int leftInPage = (int) (endOfPageValueCount - valuesRead); - if (leftInPage == 0) { - DataPage page = pageReader.readPage(); - if (page instanceof DataPageV1) { - readPageV1((DataPageV1) page); - } else if (page instanceof DataPageV2) { - readPageV2((DataPageV2) page); - } else { - throw new RuntimeException("Unsupported page type: " + page.getClass()); - } - leftInPage = (int) (endOfPageValueCount - valuesRead); - } - int num = Math.min(readNumber, leftInPage); - if (isCurrentPageDictionaryEncoded) { - // Read and decode dictionary ids. - runLenDecoder.readDictionaryIds( - num, dictionaryIds, vector, rowId, maxDefLevel, this.dictionaryIdsDecoder); - - if (vector.hasDictionary() || (rowId == 0 && supportLazyDecode())) { - // Column vector supports lazy decoding of dictionary values so just set the dictionary. - // We can't do this if rowId != 0 AND the column doesn't have a dictionary (i.e. some - // non-dictionary encoded values have already been added). - vector.setDictionary(new ParquetDictionary(dictionary)); - } else { - readBatchFromDictionaryIds(rowId, num, vector, dictionaryIds); - } - } else { - if (vector.hasDictionary() && rowId != 0) { - // This batch already has dictionary encoded values but this new page is not. The batch - // does not support a mix of dictionary and not so we will decode the dictionary. - readBatchFromDictionaryIds(0, rowId, vector, vector.getDictionaryIds()); - } - vector.setDictionary(null); - readBatch(rowId, num, vector); - } - - valuesRead += num; - rowId += num; - readNumber -= num; - } - } - - private void readPageV1(DataPageV1 page) throws IOException { - this.pageValueCount = page.getValueCount(); - ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); - - // Initialize the decoders. - if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { - throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); - } - int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); - this.runLenDecoder = new RunLengthDecoder(bitWidth); - try { - BytesInput bytes = page.getBytes(); - ByteBufferInputStream in = bytes.toInputStream(); - rlReader.initFromPage(pageValueCount, in); - this.runLenDecoder.initFromStream(pageValueCount, in); - prepareNewPage(page.getValueEncoding(), in); - } catch (IOException e) { - throw new IOException("could not read page " + page + " in col " + descriptor, e); - } - } - - private void readPageV2(DataPageV2 page) throws IOException { - this.pageValueCount = page.getValueCount(); - - int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); - // do not read the length from the stream. v2 pages handle dividing the page bytes. - this.runLenDecoder = new RunLengthDecoder(bitWidth, false); - this.runLenDecoder.initFromStream( - this.pageValueCount, page.getDefinitionLevels().toInputStream()); - try { - prepareNewPage(page.getDataEncoding(), page.getData().toInputStream()); - } catch (IOException e) { - throw new IOException("could not read page " + page + " in col " + descriptor, e); - } - } - - private void prepareNewPage( - Encoding dataEncoding, - ByteBufferInputStream in) throws IOException { - this.endOfPageValueCount = valuesRead + pageValueCount; - if (dataEncoding.usesDictionary()) { - if (dictionary == null) { - throw new IOException("Could not read page in col " - + descriptor - + " as the dictionary was missing for encoding " - + dataEncoding); - } - @SuppressWarnings("deprecation") - Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression - if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { - throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); - } - this.dataInputStream = null; - this.dictionaryIdsDecoder = new RunLengthDecoder(); - try { - this.dictionaryIdsDecoder.initFromStream(pageValueCount, in); - } catch (IOException e) { - throw new IOException("could not read dictionary in col " + descriptor, e); - } - this.isCurrentPageDictionaryEncoded = true; - } else { - if (dataEncoding != Encoding.PLAIN) { - throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); - } - this.dictionaryIdsDecoder = null; - LOG.debug("init from page at offset {} for length {}", in.position(), in.available()); - this.dataInputStream = in.remainingStream(); - this.isCurrentPageDictionaryEncoded = false; - } - - afterReadPage(); - } - - final ByteBuffer readDataBuffer(int length) { - try { - return dataInputStream.slice(length).order(ByteOrder.LITTLE_ENDIAN); - } catch (IOException e) { - throw new ParquetDecodingException("Failed to read " + length + " bytes", e); - } - } - - /** - * After read a page, we may need some initialization. - */ - protected void afterReadPage() { - } - - /** - * Support lazy dictionary ids decode. See more in {@link ParquetDictionary}. - * If return false, we will decode all the data first. - */ - protected boolean supportLazyDecode() { - return true; - } - - /** - * Read batch from {@link #runLenDecoder} and {@link #dataInputStream}. - */ - protected abstract void readBatch(int rowId, int num, V column); - - /** - * Decode dictionary ids to data. - * From {@link #runLenDecoder} and {@link #dictionaryIdsDecoder}. - */ - protected abstract void readBatchFromDictionaryIds( - int rowId, - int num, - V column, - WritableIntVector dictionaryIds); -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java deleted file mode 100644 index 67dbb7490260..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java +++ /dev/null @@ -1,473 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.hudi.table.data.vector.VectorizedColumnBatch; -import org.apache.hudi.table.format.cow.vector.HeapArrayVector; -import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; - -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.data.vector.heap.HeapBooleanVector; -import org.apache.flink.table.data.vector.heap.HeapByteVector; -import org.apache.flink.table.data.vector.heap.HeapBytesVector; -import org.apache.flink.table.data.vector.heap.HeapDoubleVector; -import org.apache.flink.table.data.vector.heap.HeapFloatVector; -import org.apache.flink.table.data.vector.heap.HeapIntVector; -import org.apache.flink.table.data.vector.heap.HeapLongVector; -import org.apache.flink.table.data.vector.heap.HeapShortVector; -import org.apache.flink.table.data.vector.heap.HeapTimestampVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.types.logical.ArrayType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.page.PageReader; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.Type; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Array {@link ColumnReader}. - */ -public class ArrayColumnReader extends BaseVectorizedColumnReader { - - // The value read in last time - private Object lastValue; - - // flag to indicate if there is no data in parquet data page - private boolean eof = false; - - // flag to indicate if it's the first time to read parquet data page with this instance - boolean isFirstRow = true; - - public ArrayColumnReader( - ColumnDescriptor descriptor, - PageReader pageReader, - boolean isUtcTimestamp, - Type type, - LogicalType logicalType) - throws IOException { - super(descriptor, pageReader, isUtcTimestamp, type, logicalType); - } - - @Override - public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { - HeapArrayVector lcv = (HeapArrayVector) vector; - // before readBatch, initial the size of offsets & lengths as the default value, - // the actual size will be assigned in setChildrenInfo() after reading complete. - lcv.offsets = new long[VectorizedColumnBatch.DEFAULT_SIZE]; - lcv.lengths = new long[VectorizedColumnBatch.DEFAULT_SIZE]; - // Because the length of ListColumnVector.child can't be known now, - // the valueList will save all data for ListColumnVector temporary. - List valueList = new ArrayList<>(); - - LogicalType category = ((ArrayType) logicalType).getElementType(); - - // read the first row in parquet data page, this will be only happened once for this - // instance - if (isFirstRow) { - if (!fetchNextValue(category)) { - return; - } - isFirstRow = false; - } - - int index = collectDataFromParquetPage(readNumber, lcv, valueList, category); - - // Convert valueList to array for the ListColumnVector.child - fillColumnVector(category, lcv, valueList, index); - } - - /** - * Reads a single value from parquet page, puts it into lastValue. Returns a boolean indicating - * if there is more values to read (true). - * - * @param category - * @return boolean - * @throws IOException - */ - private boolean fetchNextValue(LogicalType category) throws IOException { - int left = readPageIfNeed(); - if (left > 0) { - // get the values of repetition and definitionLevel - readRepetitionAndDefinitionLevels(); - // read the data if it isn't null - if (definitionLevel == maxDefLevel) { - if (isCurrentPageDictionaryEncoded) { - lastValue = dataColumn.readValueDictionaryId(); - } else { - lastValue = readPrimitiveTypedRow(category); - } - } else { - lastValue = null; - } - return true; - } else { - eof = true; - return false; - } - } - - private int readPageIfNeed() throws IOException { - // Compute the number of values we want to read in this page. - int leftInPage = (int) (endOfPageValueCount - valuesRead); - if (leftInPage == 0) { - // no data left in current page, load data from new page - readPage(); - leftInPage = (int) (endOfPageValueCount - valuesRead); - } - return leftInPage; - } - - // Need to be in consistent with that VectorizedPrimitiveColumnReader#readBatchHelper - // TODO Reduce the duplicated code - private Object readPrimitiveTypedRow(LogicalType category) { - switch (category.getTypeRoot()) { - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - return dataColumn.readString(); - case BOOLEAN: - return dataColumn.readBoolean(); - case TIME_WITHOUT_TIME_ZONE: - case DATE: - case INTEGER: - return dataColumn.readInteger(); - case TINYINT: - return dataColumn.readTinyInt(); - case SMALLINT: - return dataColumn.readSmallInt(); - case BIGINT: - return dataColumn.readLong(); - case FLOAT: - return dataColumn.readFloat(); - case DOUBLE: - return dataColumn.readDouble(); - case DECIMAL: - switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { - case INT32: - return dataColumn.readInteger(); - case INT64: - return dataColumn.readLong(); - case BINARY: - case FIXED_LEN_BYTE_ARRAY: - return dataColumn.readString(); - default: - throw new AssertionError(); - } - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return dataColumn.readTimestamp(); - default: - throw new RuntimeException("Unsupported type in the list: " + type); - } - } - - private Object dictionaryDecodeValue(LogicalType category, Integer dictionaryValue) { - if (dictionaryValue == null) { - return null; - } - - switch (category.getTypeRoot()) { - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - return dictionary.readString(dictionaryValue); - case DATE: - case TIME_WITHOUT_TIME_ZONE: - case INTEGER: - return dictionary.readInteger(dictionaryValue); - case BOOLEAN: - return dictionary.readBoolean(dictionaryValue) ? 1 : 0; - case DOUBLE: - return dictionary.readDouble(dictionaryValue); - case FLOAT: - return dictionary.readFloat(dictionaryValue); - case TINYINT: - return dictionary.readTinyInt(dictionaryValue); - case SMALLINT: - return dictionary.readSmallInt(dictionaryValue); - case BIGINT: - return dictionary.readLong(dictionaryValue); - case DECIMAL: - switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) { - case INT32: - return dictionary.readInteger(dictionaryValue); - case INT64: - return dictionary.readLong(dictionaryValue); - case FIXED_LEN_BYTE_ARRAY: - case BINARY: - return dictionary.readString(dictionaryValue); - default: - throw new AssertionError(); - } - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return dictionary.readTimestamp(dictionaryValue); - default: - throw new RuntimeException("Unsupported type in the list: " + type); - } - } - - /** - * Collects data from a parquet page and returns the final row index where it stopped. The - * returned index can be equal to or less than total. - * - * @param total maximum number of rows to collect - * @param lcv column vector to do initial setup in data collection time - * @param valueList collection of values that will be fed into the vector later - * @param category - * @return int - * @throws IOException - */ - private int collectDataFromParquetPage( - int total, HeapArrayVector lcv, List valueList, LogicalType category) - throws IOException { - int index = 0; - /* - * Here is a nested loop for collecting all values from a parquet page. - * A column of array type can be considered as a list of lists, so the two loops are as below: - * 1. The outer loop iterates on rows (index is a row index, so points to a row in the batch), e.g.: - * [0, 2, 3] <- index: 0 - * [NULL, 3, 4] <- index: 1 - * - * 2. The inner loop iterates on values within a row (sets all data from parquet data page - * for an element in ListColumnVector), so fetchNextValue returns values one-by-one: - * 0, 2, 3, NULL, 3, 4 - * - * As described below, the repetition level (repetitionLevel != 0) - * can be used to decide when we'll start to read values for the next list. - */ - while (!eof && index < total) { - // add element to ListColumnVector one by one - lcv.offsets[index] = valueList.size(); - /* - * Let's collect all values for a single list. - * Repetition level = 0 means that a new list started there in the parquet page, - * in that case, let's exit from the loop, and start to collect value for a new list. - */ - do { - /* - * Definition level = 0 when a NULL value was returned instead of a list - * (this is not the same as a NULL value in of a list). - */ - if (definitionLevel == 0) { - lcv.setNullAt(index); - } - valueList.add( - isCurrentPageDictionaryEncoded - ? dictionaryDecodeValue(category, (Integer) lastValue) - : lastValue); - } while (fetchNextValue(category) && (repetitionLevel != 0)); - - lcv.lengths[index] = valueList.size() - lcv.offsets[index]; - index++; - } - return index; - } - - /** - * The lengths & offsets will be initialized as default size (1024), it should be set to the - * actual size according to the element number. - */ - private void setChildrenInfo(HeapArrayVector lcv, int itemNum, int elementNum) { - lcv.setSize(itemNum); - long[] lcvLength = new long[elementNum]; - long[] lcvOffset = new long[elementNum]; - System.arraycopy(lcv.lengths, 0, lcvLength, 0, elementNum); - System.arraycopy(lcv.offsets, 0, lcvOffset, 0, elementNum); - lcv.lengths = lcvLength; - lcv.offsets = lcvOffset; - } - - private void fillColumnVector( - LogicalType category, HeapArrayVector lcv, List valueList, int elementNum) { - int total = valueList.size(); - setChildrenInfo(lcv, total, elementNum); - switch (category.getTypeRoot()) { - case CHAR: - case VARCHAR: - case BINARY: - case VARBINARY: - lcv.child = new HeapBytesVector(total); - ((HeapBytesVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - byte[] src = ((List) valueList).get(i); - if (src == null) { - ((HeapBytesVector) lcv.child).setNullAt(i); - } else { - ((HeapBytesVector) lcv.child).appendBytes(i, src, 0, src.length); - } - } - break; - case BOOLEAN: - lcv.child = new HeapBooleanVector(total); - ((HeapBooleanVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapBooleanVector) lcv.child).setNullAt(i); - } else { - ((HeapBooleanVector) lcv.child).vector[i] = - ((List) valueList).get(i); - } - } - break; - case TINYINT: - lcv.child = new HeapByteVector(total); - ((HeapByteVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapByteVector) lcv.child).setNullAt(i); - } else { - ((HeapByteVector) lcv.child).vector[i] = - (byte) ((List) valueList).get(i).intValue(); - } - } - break; - case SMALLINT: - lcv.child = new HeapShortVector(total); - ((HeapShortVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapShortVector) lcv.child).setNullAt(i); - } else { - ((HeapShortVector) lcv.child).vector[i] = - (short) ((List) valueList).get(i).intValue(); - } - } - break; - case INTEGER: - case DATE: - case TIME_WITHOUT_TIME_ZONE: - lcv.child = new HeapIntVector(total); - ((HeapIntVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapIntVector) lcv.child).setNullAt(i); - } else { - ((HeapIntVector) lcv.child).vector[i] = ((List) valueList).get(i); - } - } - break; - case FLOAT: - lcv.child = new HeapFloatVector(total); - ((HeapFloatVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapFloatVector) lcv.child).setNullAt(i); - } else { - ((HeapFloatVector) lcv.child).vector[i] = ((List) valueList).get(i); - } - } - break; - case BIGINT: - lcv.child = new HeapLongVector(total); - ((HeapLongVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapLongVector) lcv.child).setNullAt(i); - } else { - ((HeapLongVector) lcv.child).vector[i] = ((List) valueList).get(i); - } - } - break; - case DOUBLE: - lcv.child = new HeapDoubleVector(total); - ((HeapDoubleVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapDoubleVector) lcv.child).setNullAt(i); - } else { - ((HeapDoubleVector) lcv.child).vector[i] = - ((List) valueList).get(i); - } - } - break; - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - lcv.child = new HeapTimestampVector(total); - ((HeapTimestampVector) lcv.child).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapTimestampVector) lcv.child).setNullAt(i); - } else { - ((HeapTimestampVector) lcv.child) - .setTimestamp(i, ((List) valueList).get(i)); - } - } - break; - case DECIMAL: - PrimitiveType.PrimitiveTypeName primitiveTypeName = - descriptor.getPrimitiveType().getPrimitiveTypeName(); - switch (primitiveTypeName) { - case INT32: - lcv.child = new ParquetDecimalVector(new HeapIntVector(total)); - ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector) - .setNullAt(i); - } else { - ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector) - .vector[i] = - ((List) valueList).get(i); - } - } - break; - case INT64: - lcv.child = new ParquetDecimalVector(new HeapLongVector(total)); - ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector).reset(); - for (int i = 0; i < valueList.size(); i++) { - if (valueList.get(i) == null) { - ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector) - .setNullAt(i); - } else { - ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector) - .vector[i] = - ((List) valueList).get(i); - } - } - break; - default: - lcv.child = new ParquetDecimalVector(new HeapBytesVector(total)); - ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector).reset(); - for (int i = 0; i < valueList.size(); i++) { - byte[] src = ((List) valueList).get(i); - if (valueList.get(i) == null) { - ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector) - .setNullAt(i); - } else { - ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector) - .appendBytes(i, src, 0, src.length); - } - } - break; - } - break; - default: - throw new RuntimeException("Unsupported type in the list: " + type); - } - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java deleted file mode 100644 index 073c704c4b24..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/BaseVectorizedColumnReader.java +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.bytes.BytesInput; -import org.apache.parquet.bytes.BytesUtils; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.Encoding; -import org.apache.parquet.column.page.DataPage; -import org.apache.parquet.column.page.DataPageV1; -import org.apache.parquet.column.page.DataPageV2; -import org.apache.parquet.column.page.DictionaryPage; -import org.apache.parquet.column.page.PageReader; -import org.apache.parquet.column.values.ValuesReader; -import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; -import org.apache.parquet.io.ParquetDecodingException; -import org.apache.parquet.schema.Type; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayInputStream; -import java.io.IOException; - -import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL; -import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; -import static org.apache.parquet.column.ValuesType.VALUES; - -/** - * Abstract {@link ColumnReader}. part of the code is referred from Apache Hive and Apache Parquet. - */ -public abstract class BaseVectorizedColumnReader implements ColumnReader { - - private static final Logger LOG = LoggerFactory.getLogger(BaseVectorizedColumnReader.class); - - protected boolean isUtcTimestamp; - - /** - * Total number of values read. - */ - protected long valuesRead; - - /** - * value that indicates the end of the current page. That is, if valuesRead == - * endOfPageValueCount, we are at the end of the page. - */ - protected long endOfPageValueCount; - - /** - * The dictionary, if this column has dictionary encoding. - */ - protected final ParquetDataColumnReader dictionary; - - /** - * If true, the current page is dictionary encoded. - */ - protected boolean isCurrentPageDictionaryEncoded; - - /** - * Maximum definition level for this column. - */ - protected final int maxDefLevel; - - protected int definitionLevel; - protected int repetitionLevel; - - /** - * Repetition/Definition/Value readers. - */ - protected IntIterator repetitionLevelColumn; - - protected IntIterator definitionLevelColumn; - protected ParquetDataColumnReader dataColumn; - - /** - * Total values in the current page. - */ - protected int pageValueCount; - - protected final PageReader pageReader; - protected final ColumnDescriptor descriptor; - protected final Type type; - protected final LogicalType logicalType; - - public BaseVectorizedColumnReader( - ColumnDescriptor descriptor, - PageReader pageReader, - boolean isUtcTimestamp, - Type parquetType, - LogicalType logicalType) - throws IOException { - this.descriptor = descriptor; - this.type = parquetType; - this.pageReader = pageReader; - this.maxDefLevel = descriptor.getMaxDefinitionLevel(); - this.isUtcTimestamp = isUtcTimestamp; - this.logicalType = logicalType; - - DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); - if (dictionaryPage != null) { - try { - this.dictionary = - ParquetDataColumnReaderFactory.getDataColumnReaderByTypeOnDictionary( - parquetType.asPrimitiveType(), - dictionaryPage - .getEncoding() - .initDictionary(descriptor, dictionaryPage), - isUtcTimestamp); - this.isCurrentPageDictionaryEncoded = true; - } catch (IOException e) { - throw new IOException("could not decode the dictionary for " + descriptor, e); - } - } else { - this.dictionary = null; - this.isCurrentPageDictionaryEncoded = false; - } - } - - protected void readRepetitionAndDefinitionLevels() { - repetitionLevel = repetitionLevelColumn.nextInt(); - definitionLevel = definitionLevelColumn.nextInt(); - valuesRead++; - } - - protected void readPage() throws IOException { - DataPage page = pageReader.readPage(); - - if (page == null) { - return; - } - - page.accept( - new DataPage.Visitor() { - @Override - public Void visit(DataPageV1 dataPageV1) { - readPageV1(dataPageV1); - return null; - } - - @Override - public Void visit(DataPageV2 dataPageV2) { - readPageV2(dataPageV2); - return null; - } - }); - } - - private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) - throws IOException { - this.pageValueCount = valueCount; - this.endOfPageValueCount = valuesRead + pageValueCount; - if (dataEncoding.usesDictionary()) { - this.dataColumn = null; - if (dictionary == null) { - throw new IOException( - "could not read page in col " - + descriptor - + " as the dictionary was missing for encoding " - + dataEncoding); - } - dataColumn = - ParquetDataColumnReaderFactory.getDataColumnReaderByType( - type.asPrimitiveType(), - dataEncoding.getDictionaryBasedValuesReader( - descriptor, VALUES, dictionary.getDictionary()), - isUtcTimestamp); - this.isCurrentPageDictionaryEncoded = true; - } else { - dataColumn = - ParquetDataColumnReaderFactory.getDataColumnReaderByType( - type.asPrimitiveType(), - dataEncoding.getValuesReader(descriptor, VALUES), - isUtcTimestamp); - this.isCurrentPageDictionaryEncoded = false; - } - - try { - dataColumn.initFromPage(pageValueCount, in); - } catch (IOException e) { - throw new IOException("could not read page in col " + descriptor, e); - } - } - - private void readPageV1(DataPageV1 page) { - ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); - ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL); - this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); - this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); - try { - BytesInput bytes = page.getBytes(); - LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records"); - ByteBufferInputStream in = bytes.toInputStream(); - LOG.debug("reading repetition levels at " + in.position()); - rlReader.initFromPage(pageValueCount, in); - LOG.debug("reading definition levels at " + in.position()); - dlReader.initFromPage(pageValueCount, in); - LOG.debug("reading data at " + in.position()); - initDataReader(page.getValueEncoding(), in, page.getValueCount()); - } catch (IOException e) { - throw new ParquetDecodingException( - "could not read page " + page + " in col " + descriptor, e); - } - } - - private void readPageV2(DataPageV2 page) { - this.pageValueCount = page.getValueCount(); - this.repetitionLevelColumn = - newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); - this.definitionLevelColumn = - newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); - try { - LOG.debug( - "page data size " - + page.getData().size() - + " bytes and " - + pageValueCount - + " records"); - initDataReader( - page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); - } catch (IOException e) { - throw new ParquetDecodingException( - "could not read page " + page + " in col " + descriptor, e); - } - } - - private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { - try { - if (maxLevel == 0) { - return new NullIntIterator(); - } - return new RLEIntIterator( - new RunLengthBitPackingHybridDecoder( - BytesUtils.getWidthFromMaxInt(maxLevel), - new ByteArrayInputStream(bytes.toByteArray()))); - } catch (IOException e) { - throw new ParquetDecodingException( - "could not read levels in page for col " + descriptor, e); - } - } - - /** - * Utility classes to abstract over different way to read ints with different encodings. - */ - abstract static class IntIterator { - abstract int nextInt(); - } - - /** - * read ints from {@link ValuesReader}. - */ - protected static final class ValuesReaderIntIterator extends IntIterator { - ValuesReader delegate; - - public ValuesReaderIntIterator(ValuesReader delegate) { - this.delegate = delegate; - } - - @Override - int nextInt() { - return delegate.readInteger(); - } - } - - /** - * read ints from {@link RunLengthBitPackingHybridDecoder}. - */ - protected static final class RLEIntIterator extends IntIterator { - RunLengthBitPackingHybridDecoder delegate; - - public RLEIntIterator(RunLengthBitPackingHybridDecoder delegate) { - this.delegate = delegate; - } - - @Override - int nextInt() { - try { - return delegate.readInt(); - } catch (IOException e) { - throw new ParquetDecodingException(e); - } - } - } - - /** - * return zero. - */ - protected static final class NullIntIterator extends IntIterator { - @Override - int nextInt() { - return 0; - } - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java deleted file mode 100644 index 8be29289bbab..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/EmptyColumnReader.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; - -import java.io.IOException; - -/** - * Empty {@link ColumnReader}. - *

- * This reader is to handle parquet files that have not been updated to the latest Schema. - * When reading a parquet file with the latest schema, parquet file might not have the new field. - * The EmptyColumnReader is used to handle such scenarios. - */ -public class EmptyColumnReader implements ColumnReader { - - public EmptyColumnReader() {} - - @Override - public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { - vector.fillWithNulls(); - } -} \ No newline at end of file diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java deleted file mode 100644 index 6ebe5f1e6fbf..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/FixedLenBytesColumnReader.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.table.data.vector.writable.WritableBytesVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.data.vector.writable.WritableIntVector; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.page.PageReader; -import org.apache.parquet.io.api.Binary; -import org.apache.parquet.schema.PrimitiveType; - -import java.io.IOException; -import java.nio.ByteBuffer; - -/** - * Fixed length bytes {@code ColumnReader}, just for decimal. - * - *

Note: Reference Flink release 1.13.2 - * {@code org.apache.flink.formats.parquet.vector.reader.FixedLenBytesColumnReader} - * to always write as legacy decimal format. - */ -public class FixedLenBytesColumnReader - extends AbstractColumnReader { - - public FixedLenBytesColumnReader( - ColumnDescriptor descriptor, PageReader pageReader) throws IOException { - super(descriptor, pageReader); - checkTypeName(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY); - } - - @Override - protected void readBatch(int rowId, int num, V column) { - int bytesLen = descriptor.getPrimitiveType().getTypeLength(); - WritableBytesVector bytesVector = (WritableBytesVector) column; - for (int i = 0; i < num; i++) { - if (runLenDecoder.readInteger() == maxDefLevel) { - byte[] bytes = readDataBinary(bytesLen).getBytes(); - bytesVector.appendBytes(rowId + i, bytes, 0, bytes.length); - } else { - bytesVector.setNullAt(rowId + i); - } - } - } - - @Override - protected void readBatchFromDictionaryIds( - int rowId, int num, V column, WritableIntVector dictionaryIds) { - WritableBytesVector bytesVector = (WritableBytesVector) column; - for (int i = rowId; i < rowId + num; ++i) { - if (!bytesVector.isNullAt(i)) { - byte[] v = dictionary.decodeToBinary(dictionaryIds.getInt(i)).getBytes(); - bytesVector.appendBytes(i, v, 0, v.length); - } - } - } - - private Binary readDataBinary(int len) { - ByteBuffer buffer = readDataBuffer(len); - if (buffer.hasArray()) { - return Binary.fromConstantByteArray( - buffer.array(), buffer.arrayOffset() + buffer.position(), len); - } else { - byte[] bytes = new byte[len]; - buffer.get(bytes); - return Binary.fromConstantByteArray(bytes); - } - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java deleted file mode 100644 index 70638a9c4320..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/Int64TimestampColumnReader.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.table.data.TimestampData; -import org.apache.flink.table.data.vector.writable.WritableIntVector; -import org.apache.flink.table.data.vector.writable.WritableTimestampVector; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.page.PageReader; -import org.apache.parquet.schema.PrimitiveType; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.sql.Timestamp; -import java.time.Instant; -import java.time.temporal.ChronoUnit; - -/** - * Timestamp {@link org.apache.flink.formats.parquet.vector.reader.ColumnReader} that supports INT64 8 bytes, - * TIMESTAMP_MILLIS is the deprecated ConvertedType counterpart of a TIMESTAMP logical type - * that is UTC normalized and has MILLIS precision. - * - *

See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp - * TIMESTAMP_MILLIS and TIMESTAMP_MICROS are the deprecated ConvertedType. - */ -public class Int64TimestampColumnReader extends AbstractColumnReader { - - private final boolean utcTimestamp; - - private final ChronoUnit chronoUnit; - - public Int64TimestampColumnReader( - boolean utcTimestamp, - ColumnDescriptor descriptor, - PageReader pageReader, - int precision) throws IOException { - super(descriptor, pageReader); - this.utcTimestamp = utcTimestamp; - if (precision <= 3) { - this.chronoUnit = ChronoUnit.MILLIS; - } else if (precision <= 6) { - this.chronoUnit = ChronoUnit.MICROS; - } else { - throw new IllegalArgumentException( - "Avro does not support TIMESTAMP type with precision: " - + precision - + ", it only support precisions <= 6."); - } - checkTypeName(PrimitiveType.PrimitiveTypeName.INT64); - } - - @Override - protected boolean supportLazyDecode() { - return false; - } - - @Override - protected void readBatch(int rowId, int num, WritableTimestampVector column) { - for (int i = 0; i < num; i++) { - if (runLenDecoder.readInteger() == maxDefLevel) { - ByteBuffer buffer = readDataBuffer(8); - column.setTimestamp(rowId + i, int64ToTimestamp(utcTimestamp, buffer.getLong(), chronoUnit)); - } else { - column.setNullAt(rowId + i); - } - } - } - - @Override - protected void readBatchFromDictionaryIds( - int rowId, - int num, - WritableTimestampVector column, - WritableIntVector dictionaryIds) { - for (int i = rowId; i < rowId + num; ++i) { - if (!column.isNullAt(i)) { - column.setTimestamp(i, decodeInt64ToTimestamp( - utcTimestamp, dictionary, dictionaryIds.getInt(i), chronoUnit)); - } - } - } - - public static TimestampData decodeInt64ToTimestamp( - boolean utcTimestamp, - org.apache.parquet.column.Dictionary dictionary, - int id, - ChronoUnit unit) { - long value = dictionary.decodeToLong(id); - return int64ToTimestamp(utcTimestamp, value, unit); - } - - private static TimestampData int64ToTimestamp( - boolean utcTimestamp, - long interval, - ChronoUnit unit) { - final Instant instant = Instant.EPOCH.plus(interval, unit); - if (utcTimestamp) { - return TimestampData.fromInstant(instant); - } else { - // this applies the local timezone - return TimestampData.fromTimestamp(Timestamp.from(instant)); - } - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java deleted file mode 100644 index 015a867c4f22..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/MapColumnReader.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.hudi.table.format.cow.vector.HeapArrayVector; -import org.apache.hudi.table.format.cow.vector.HeapMapColumnVector; - -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.MapType; - -import java.io.IOException; - -/** - * Map {@link ColumnReader}. - */ -public class MapColumnReader implements ColumnReader { - - private final LogicalType logicalType; - private final ArrayColumnReader keyReader; - private final ArrayColumnReader valueReader; - - public MapColumnReader( - ArrayColumnReader keyReader, ArrayColumnReader valueReader, LogicalType logicalType) { - this.keyReader = keyReader; - this.valueReader = valueReader; - this.logicalType = logicalType; - } - - public void readBatch(int total, ColumnVector column) throws IOException { - HeapMapColumnVector mapColumnVector = (HeapMapColumnVector) column; - MapType mapType = (MapType) logicalType; - // initialize 2 ListColumnVector for keys and values - HeapArrayVector keyArrayColumnVector = new HeapArrayVector(total); - HeapArrayVector valueArrayColumnVector = new HeapArrayVector(total); - // read the keys and values - keyReader.readToVector(total, keyArrayColumnVector); - valueReader.readToVector(total, valueArrayColumnVector); - - // set the related attributes according to the keys and values - mapColumnVector.setKeys(keyArrayColumnVector.child); - mapColumnVector.setValues(valueArrayColumnVector.child); - mapColumnVector.setOffsets(keyArrayColumnVector.offsets); - mapColumnVector.setLengths(keyArrayColumnVector.lengths); - mapColumnVector.setSize(keyArrayColumnVector.getSize()); - for (int i = 0; i < keyArrayColumnVector.getLen(); i++) { - if (keyArrayColumnVector.isNullAt(i)) { - mapColumnVector.setNullAt(i); - } - } - } - - @Override - public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { - readBatch(readNumber, vector); - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java deleted file mode 100644 index 9436305d2955..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.hudi.table.data.ColumnarRowData; -import org.apache.hudi.table.data.vector.VectorizedColumnBatch; -import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; - -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.vector.ColumnVector; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.LogicalTypeRoot; -import org.apache.flink.util.FlinkRuntimeException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.column.page.PageReadStore; -import org.apache.parquet.filter.UnboundRecordFilter; -import org.apache.parquet.filter2.compat.FilterCompat; -import org.apache.parquet.filter2.predicate.FilterPredicate; -import org.apache.parquet.hadoop.ParquetFileReader; -import org.apache.parquet.hadoop.metadata.BlockMetaData; -import org.apache.parquet.hadoop.metadata.ParquetMetadata; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.Type; -import org.apache.parquet.schema.Types; - -import java.io.Closeable; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.stream.IntStream; - -import static org.apache.hudi.table.format.cow.ParquetSplitReaderUtil.createColumnReader; -import static org.apache.hudi.table.format.cow.ParquetSplitReaderUtil.createWritableColumnVector; -import static org.apache.parquet.filter2.compat.FilterCompat.get; -import static org.apache.parquet.filter2.compat.RowGroupFilter.filterRowGroups; -import static org.apache.parquet.format.converter.ParquetMetadataConverter.range; -import static org.apache.parquet.hadoop.ParquetFileReader.readFooter; - -/** - * This reader is used to read a {@link VectorizedColumnBatch} from input split. - * - *

Note: Reference Flink release 1.11.2 - * {@code org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader} - * because it is package scope. - */ -public class ParquetColumnarRowSplitReader implements Closeable { - - private final boolean utcTimestamp; - - private final MessageType fileSchema; - - private final LogicalType[] requestedTypes; - - private final MessageType requestedSchema; - - /** - * The total number of rows this RecordReader will eventually read. The sum of the rows of all - * the row groups. - */ - private final long totalRowCount; - - private final WritableColumnVector[] writableVectors; - - private final VectorizedColumnBatch columnarBatch; - - private final ColumnarRowData row; - - private final int batchSize; - - private ParquetFileReader reader; - - /** - * For each request column, the reader to read this column. This is NULL if this column is - * missing from the file, in which case we populate the attribute with NULL. - */ - private ColumnReader[] columnReaders; - - /** - * The number of rows that have been returned. - */ - private long rowsReturned; - - /** - * The number of rows that have been reading, including the current in flight row group. - */ - private long totalCountLoadedSoFar; - - // the index of the next row to return - private int nextRow; - - // the number of rows in the current batch - private int rowsInBatch; - - public ParquetColumnarRowSplitReader( - boolean utcTimestamp, - boolean caseSensitive, - Configuration conf, - LogicalType[] selectedTypes, - String[] selectedFieldNames, - ColumnBatchGenerator generator, - int batchSize, - Path path, - long splitStart, - long splitLength, - FilterPredicate filterPredicate, - UnboundRecordFilter recordFilter) throws IOException { - this.utcTimestamp = utcTimestamp; - this.batchSize = batchSize; - // then we need to apply the predicate push down filter - ParquetMetadata footer = readFooter(conf, path, range(splitStart, splitStart + splitLength)); - MessageType fileSchema = footer.getFileMetaData().getSchema(); - FilterCompat.Filter filter = get(filterPredicate, recordFilter); - List blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); - - this.fileSchema = footer.getFileMetaData().getSchema(); - - Type[] types = clipParquetSchema(fileSchema, selectedFieldNames, caseSensitive); - int[] requestedIndices = IntStream.range(0, types.length).filter(i -> types[i] != null).toArray(); - Type[] readTypes = Arrays.stream(requestedIndices).mapToObj(i -> types[i]).toArray(Type[]::new); - - this.requestedTypes = Arrays.stream(requestedIndices).mapToObj(i -> selectedTypes[i]).toArray(LogicalType[]::new); - this.requestedSchema = Types.buildMessage().addFields(readTypes).named("flink-parquet"); - this.reader = new ParquetFileReader( - conf, footer.getFileMetaData(), path, blocks, requestedSchema.getColumns()); - - long totalRowCount = 0; - for (BlockMetaData block : blocks) { - totalRowCount += block.getRowCount(); - } - this.totalRowCount = totalRowCount; - this.nextRow = 0; - this.rowsInBatch = 0; - this.rowsReturned = 0; - - checkSchema(); - - this.writableVectors = createWritableVectors(); - ColumnVector[] columnVectors = patchedVector(selectedFieldNames.length, createReadableVectors(), requestedIndices); - this.columnarBatch = generator.generate(columnVectors); - this.row = new ColumnarRowData(columnarBatch); - } - - /** - * Patches the given vectors with nulls. - * The vector position that is not requested (or read from file) is patched as null. - * - * @param fields The total selected fields number - * @param vectors The readable vectors - * @param indices The requested indices from the selected fields - */ - private static ColumnVector[] patchedVector(int fields, ColumnVector[] vectors, int[] indices) { - ColumnVector[] patched = new ColumnVector[fields]; - for (int i = 0; i < indices.length; i++) { - patched[indices[i]] = vectors[i]; - } - return patched; - } - - /** - * Clips `parquetSchema` according to `fieldNames`. - */ - private static Type[] clipParquetSchema( - GroupType parquetSchema, String[] fieldNames, boolean caseSensitive) { - Type[] types = new Type[fieldNames.length]; - if (caseSensitive) { - for (int i = 0; i < fieldNames.length; ++i) { - String fieldName = fieldNames[i]; - types[i] = parquetSchema.containsField(fieldName) ? parquetSchema.getType(fieldName) : null; - } - } else { - Map caseInsensitiveFieldMap = new HashMap<>(); - for (Type type : parquetSchema.getFields()) { - caseInsensitiveFieldMap.compute(type.getName().toLowerCase(Locale.ROOT), - (key, previousType) -> { - if (previousType != null) { - throw new FlinkRuntimeException( - "Parquet with case insensitive mode should have no duplicate key: " + key); - } - return type; - }); - } - for (int i = 0; i < fieldNames.length; ++i) { - Type type = caseInsensitiveFieldMap.get(fieldNames[i].toLowerCase(Locale.ROOT)); - // TODO clip for array,map,row types. - types[i] = type; - } - } - - return types; - } - - private WritableColumnVector[] createWritableVectors() { - WritableColumnVector[] columns = new WritableColumnVector[requestedTypes.length]; - List types = requestedSchema.getFields(); - List descriptors = requestedSchema.getColumns(); - for (int i = 0; i < requestedTypes.length; i++) { - columns[i] = createWritableColumnVector( - batchSize, - requestedTypes[i], - types.get(i), - descriptors); - } - return columns; - } - - /** - * Create readable vectors from writable vectors. - * Especially for decimal, see {@link org.apache.flink.formats.parquet.vector.ParquetDecimalVector}. - */ - private ColumnVector[] createReadableVectors() { - ColumnVector[] vectors = new ColumnVector[writableVectors.length]; - for (int i = 0; i < writableVectors.length; i++) { - vectors[i] = requestedTypes[i].getTypeRoot() == LogicalTypeRoot.DECIMAL - ? new ParquetDecimalVector(writableVectors[i]) - : writableVectors[i]; - } - return vectors; - } - - private void checkSchema() throws IOException, UnsupportedOperationException { - /* - * Check that the requested schema is supported. - */ - for (int i = 0; i < requestedSchema.getFieldCount(); ++i) { - String[] colPath = requestedSchema.getPaths().get(i); - if (fileSchema.containsPath(colPath)) { - ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); - if (!fd.equals(requestedSchema.getColumns().get(i))) { - throw new UnsupportedOperationException("Schema evolution not supported."); - } - } else { - if (requestedSchema.getColumns().get(i).getMaxDefinitionLevel() == 0) { - // Column is missing in data but the required data is non-nullable. This file is invalid. - throw new IOException("Required column is missing in data file. Col: " + Arrays.toString(colPath)); - } - } - } - } - - /** - * Method used to check if the end of the input is reached. - * - * @return True if the end is reached, otherwise false. - * @throws IOException Thrown, if an I/O error occurred. - */ - public boolean reachedEnd() throws IOException { - return !ensureBatch(); - } - - public RowData nextRecord() { - // return the next row - row.setRowId(this.nextRow++); - return row; - } - - /** - * Checks if there is at least one row left in the batch to return. If no more row are - * available, it reads another batch of rows. - * - * @return Returns true if there is one more row to return, false otherwise. - * @throws IOException throw if an exception happens while reading a batch. - */ - private boolean ensureBatch() throws IOException { - if (nextRow >= rowsInBatch) { - // No more rows available in the Rows array. - nextRow = 0; - // Try to read the next batch if rows from the file. - return nextBatch(); - } - // there is at least one Row left in the Rows array. - return true; - } - - /** - * Advances to the next batch of rows. Returns false if there are no more. - */ - private boolean nextBatch() throws IOException { - for (WritableColumnVector v : writableVectors) { - v.reset(); - } - columnarBatch.setNumRows(0); - if (rowsReturned >= totalRowCount) { - return false; - } - if (rowsReturned == totalCountLoadedSoFar) { - readNextRowGroup(); - } - - int num = (int) Math.min(batchSize, totalCountLoadedSoFar - rowsReturned); - for (int i = 0; i < columnReaders.length; ++i) { - //noinspection unchecked - columnReaders[i].readToVector(num, writableVectors[i]); - } - rowsReturned += num; - columnarBatch.setNumRows(num); - rowsInBatch = num; - return true; - } - - private void readNextRowGroup() throws IOException { - PageReadStore pages = reader.readNextRowGroup(); - if (pages == null) { - throw new IOException("expecting more rows but reached last block. Read " - + rowsReturned + " out of " + totalRowCount); - } - List types = requestedSchema.getFields(); - List columns = requestedSchema.getColumns(); - columnReaders = new ColumnReader[types.size()]; - for (int i = 0; i < types.size(); ++i) { - columnReaders[i] = createColumnReader( - utcTimestamp, - requestedTypes[i], - types.get(i), - columns, - pages); - } - totalCountLoadedSoFar += pages.getRowCount(); - } - - /** - * Seek to a particular row number. - */ - public void seekToRow(long rowCount) throws IOException { - if (totalCountLoadedSoFar != 0) { - throw new UnsupportedOperationException("Only support seek at first."); - } - - List blockMetaData = reader.getRowGroups(); - - for (BlockMetaData metaData : blockMetaData) { - if (metaData.getRowCount() > rowCount) { - break; - } else { - reader.skipNextRowGroup(); - rowsReturned += metaData.getRowCount(); - totalCountLoadedSoFar += metaData.getRowCount(); - rowsInBatch = (int) metaData.getRowCount(); - nextRow = (int) metaData.getRowCount(); - rowCount -= metaData.getRowCount(); - } - } - for (int i = 0; i < rowCount; i++) { - boolean end = reachedEnd(); - if (end) { - throw new RuntimeException("Seek to many rows."); - } - nextRecord(); - } - } - - @Override - public void close() throws IOException { - if (reader != null) { - reader.close(); - reader = null; - } - } - - /** - * Interface to gen {@link VectorizedColumnBatch}. - */ - public interface ColumnBatchGenerator { - VectorizedColumnBatch generate(ColumnVector[] readVectors); - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java deleted file mode 100644 index e96cf22d29ef..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReader.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.table.data.TimestampData; -import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.column.Dictionary; - -import java.io.IOException; - -/** - * The interface to wrap the underlying Parquet dictionary and non dictionary encoded page reader. - */ -public interface ParquetDataColumnReader { - - /** - * Initialize the reader by page data. - * - * @param valueCount value count - * @param in page data - * @throws IOException - */ - void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException; - - /** - * @return the next Dictionary ID from the page - */ - int readValueDictionaryId(); - - /** - * @return the next Long from the page - */ - long readLong(); - - /** - * @return the next Integer from the page - */ - int readInteger(); - - /** - * @return the next SmallInt from the page - */ - int readSmallInt(); - - /** - * @return the next TinyInt from the page - */ - int readTinyInt(); - - /** - * @return the next Float from the page - */ - float readFloat(); - - /** - * @return the next Boolean from the page - */ - boolean readBoolean(); - - /** - * @return the next String from the page - */ - byte[] readString(); - - /** - * @return the next Varchar from the page - */ - byte[] readVarchar(); - - /** - * @return the next Char from the page - */ - byte[] readChar(); - - /** - * @return the next Bytes from the page - */ - byte[] readBytes(); - - /** - * @return the next Decimal from the page - */ - byte[] readDecimal(); - - /** - * @return the next Double from the page - */ - double readDouble(); - - /** - * @return the next TimestampData from the page - */ - TimestampData readTimestamp(); - - /** - * @return is data valid - */ - boolean isValid(); - - /** - * @return the underlying dictionary if current reader is dictionary encoded - */ - Dictionary getDictionary(); - - /** - * @param id in dictionary - * @return the Bytes from the dictionary by id - */ - byte[] readBytes(int id); - - /** - * @param id in dictionary - * @return the Float from the dictionary by id - */ - float readFloat(int id); - - /** - * @param id in dictionary - * @return the Double from the dictionary by id - */ - double readDouble(int id); - - /** - * @param id in dictionary - * @return the Integer from the dictionary by id - */ - int readInteger(int id); - - /** - * @param id in dictionary - * @return the Long from the dictionary by id - */ - long readLong(int id); - - /** - * @param id in dictionary - * @return the Small Int from the dictionary by id - */ - int readSmallInt(int id); - - /** - * @param id in dictionary - * @return the tiny int from the dictionary by id - */ - int readTinyInt(int id); - - /** - * @param id in dictionary - * @return the Boolean from the dictionary by id - */ - boolean readBoolean(int id); - - /** - * @param id in dictionary - * @return the Decimal from the dictionary by id - */ - byte[] readDecimal(int id); - - /** - * @param id in dictionary - * @return the TimestampData from the dictionary by id - */ - TimestampData readTimestamp(int id); - - /** - * @param id in dictionary - * @return the String from the dictionary by id - */ - byte[] readString(int id); - - /** - * @param id in dictionary - * @return the Varchar from the dictionary by id - */ - byte[] readVarchar(int id); - - /** - * @param id in dictionary - * @return the Char from the dictionary by id - */ - byte[] readChar(int id); -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java deleted file mode 100644 index 861d5cb00bbe..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetDataColumnReaderFactory.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.table.data.TimestampData; -import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.column.Dictionary; -import org.apache.parquet.column.values.ValuesReader; -import org.apache.parquet.io.api.Binary; -import org.apache.parquet.schema.PrimitiveType; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.sql.Timestamp; - -import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.JULIAN_EPOCH_OFFSET_DAYS; -import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.MILLIS_IN_DAY; -import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.NANOS_PER_MILLISECOND; -import static org.apache.flink.formats.parquet.vector.reader.TimestampColumnReader.NANOS_PER_SECOND; - -/** - * Parquet file has self-describing schema which may differ from the user required schema (e.g. - * schema evolution). This factory is used to retrieve user required typed data via corresponding - * reader which reads the underlying data. - */ -public final class ParquetDataColumnReaderFactory { - - private ParquetDataColumnReaderFactory() { - } - - /** - * default reader for {@link ParquetDataColumnReader}. - */ - public static class DefaultParquetDataColumnReader implements ParquetDataColumnReader { - protected ValuesReader valuesReader; - protected Dictionary dict; - - // After the data is read in the parquet type, isValid will be set to true if the data can - // be returned in the type defined in HMS. Otherwise isValid is set to false. - boolean isValid = true; - - public DefaultParquetDataColumnReader(ValuesReader valuesReader) { - this.valuesReader = valuesReader; - } - - public DefaultParquetDataColumnReader(Dictionary dict) { - this.dict = dict; - } - - @Override - public void initFromPage(int i, ByteBufferInputStream in) throws IOException { - valuesReader.initFromPage(i, in); - } - - @Override - public boolean readBoolean() { - return valuesReader.readBoolean(); - } - - @Override - public boolean readBoolean(int id) { - return dict.decodeToBoolean(id); - } - - @Override - public byte[] readString(int id) { - return dict.decodeToBinary(id).getBytesUnsafe(); - } - - @Override - public byte[] readString() { - return valuesReader.readBytes().getBytesUnsafe(); - } - - @Override - public byte[] readVarchar() { - // we need to enforce the size here even the types are the same - return valuesReader.readBytes().getBytesUnsafe(); - } - - @Override - public byte[] readVarchar(int id) { - return dict.decodeToBinary(id).getBytesUnsafe(); - } - - @Override - public byte[] readChar() { - return valuesReader.readBytes().getBytesUnsafe(); - } - - @Override - public byte[] readChar(int id) { - return dict.decodeToBinary(id).getBytesUnsafe(); - } - - @Override - public byte[] readBytes() { - return valuesReader.readBytes().getBytesUnsafe(); - } - - @Override - public byte[] readBytes(int id) { - return dict.decodeToBinary(id).getBytesUnsafe(); - } - - @Override - public byte[] readDecimal() { - return valuesReader.readBytes().getBytesUnsafe(); - } - - @Override - public byte[] readDecimal(int id) { - return dict.decodeToBinary(id).getBytesUnsafe(); - } - - @Override - public float readFloat() { - return valuesReader.readFloat(); - } - - @Override - public float readFloat(int id) { - return dict.decodeToFloat(id); - } - - @Override - public double readDouble() { - return valuesReader.readDouble(); - } - - @Override - public double readDouble(int id) { - return dict.decodeToDouble(id); - } - - @Override - public TimestampData readTimestamp() { - throw new RuntimeException("Unsupported operation"); - } - - @Override - public TimestampData readTimestamp(int id) { - throw new RuntimeException("Unsupported operation"); - } - - @Override - public int readInteger() { - return valuesReader.readInteger(); - } - - @Override - public int readInteger(int id) { - return dict.decodeToInt(id); - } - - @Override - public boolean isValid() { - return isValid; - } - - @Override - public long readLong(int id) { - return dict.decodeToLong(id); - } - - @Override - public long readLong() { - return valuesReader.readLong(); - } - - @Override - public int readSmallInt() { - return valuesReader.readInteger(); - } - - @Override - public int readSmallInt(int id) { - return dict.decodeToInt(id); - } - - @Override - public int readTinyInt() { - return valuesReader.readInteger(); - } - - @Override - public int readTinyInt(int id) { - return dict.decodeToInt(id); - } - - @Override - public int readValueDictionaryId() { - return valuesReader.readValueDictionaryId(); - } - - public void skip() { - valuesReader.skip(); - } - - @Override - public Dictionary getDictionary() { - return dict; - } - } - - /** - * The reader who reads from the underlying Timestamp value value. - */ - public static class TypesFromInt96PageReader extends DefaultParquetDataColumnReader { - private final boolean isUtcTimestamp; - - public TypesFromInt96PageReader(ValuesReader realReader, boolean isUtcTimestamp) { - super(realReader); - this.isUtcTimestamp = isUtcTimestamp; - } - - public TypesFromInt96PageReader(Dictionary dict, boolean isUtcTimestamp) { - super(dict); - this.isUtcTimestamp = isUtcTimestamp; - } - - private TimestampData convert(Binary binary) { - ByteBuffer buf = binary.toByteBuffer(); - buf.order(ByteOrder.LITTLE_ENDIAN); - long timeOfDayNanos = buf.getLong(); - int julianDay = buf.getInt(); - return int96ToTimestamp(isUtcTimestamp, timeOfDayNanos, julianDay); - } - - @Override - public TimestampData readTimestamp(int id) { - return convert(dict.decodeToBinary(id)); - } - - @Override - public TimestampData readTimestamp() { - return convert(valuesReader.readBytes()); - } - } - - private static ParquetDataColumnReader getDataColumnReaderByTypeHelper( - boolean isDictionary, - PrimitiveType parquetType, - Dictionary dictionary, - ValuesReader valuesReader, - boolean isUtcTimestamp) { - if (parquetType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT96) { - return isDictionary - ? new TypesFromInt96PageReader(dictionary, isUtcTimestamp) - : new TypesFromInt96PageReader(valuesReader, isUtcTimestamp); - } else { - return isDictionary - ? new DefaultParquetDataColumnReader(dictionary) - : new DefaultParquetDataColumnReader(valuesReader); - } - } - - public static ParquetDataColumnReader getDataColumnReaderByTypeOnDictionary( - PrimitiveType parquetType, Dictionary realReader, boolean isUtcTimestamp) { - return getDataColumnReaderByTypeHelper(true, parquetType, realReader, null, isUtcTimestamp); - } - - public static ParquetDataColumnReader getDataColumnReaderByType( - PrimitiveType parquetType, ValuesReader realReader, boolean isUtcTimestamp) { - return getDataColumnReaderByTypeHelper( - false, parquetType, null, realReader, isUtcTimestamp); - } - - private static TimestampData int96ToTimestamp( - boolean utcTimestamp, long nanosOfDay, int julianDay) { - long millisecond = julianDayToMillis(julianDay) + (nanosOfDay / NANOS_PER_MILLISECOND); - - if (utcTimestamp) { - int nanoOfMillisecond = (int) (nanosOfDay % NANOS_PER_MILLISECOND); - return TimestampData.fromEpochMillis(millisecond, nanoOfMillisecond); - } else { - Timestamp timestamp = new Timestamp(millisecond); - timestamp.setNanos((int) (nanosOfDay % NANOS_PER_SECOND)); - return TimestampData.fromTimestamp(timestamp); - } - } - - private static long julianDayToMillis(int julianDay) { - return (julianDay - JULIAN_EPOCH_OFFSET_DAYS) * MILLIS_IN_DAY; - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java deleted file mode 100644 index 524c00f402d4..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RowColumnReader.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.hudi.table.format.cow.vector.HeapRowColumnVector; - -import org.apache.flink.formats.parquet.vector.reader.ColumnReader; -import org.apache.flink.table.data.vector.writable.WritableColumnVector; - -import java.io.IOException; -import java.util.List; - -/** - * Row {@link ColumnReader}. - */ -public class RowColumnReader implements ColumnReader { - - private final List fieldReaders; - - public RowColumnReader(List fieldReaders) { - this.fieldReaders = fieldReaders; - } - - @Override - public void readToVector(int readNumber, WritableColumnVector vector) throws IOException { - HeapRowColumnVector rowColumnVector = (HeapRowColumnVector) vector; - WritableColumnVector[] vectors = rowColumnVector.vectors; - // row vector null array - boolean[] isNulls = new boolean[readNumber]; - for (int i = 0; i < vectors.length; i++) { - fieldReaders.get(i).readToVector(readNumber, vectors[i]); - - for (int j = 0; j < readNumber; j++) { - if (i == 0) { - isNulls[j] = vectors[i].isNullAt(j); - } else { - isNulls[j] = isNulls[j] && vectors[i].isNullAt(j); - } - if (i == vectors.length - 1 && isNulls[j]) { - // rowColumnVector[j] is null only when all fields[j] of rowColumnVector[j] is - // null - rowColumnVector.setNullAt(j); - } - } - } - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java b/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java deleted file mode 100644 index 3266f835e4d1..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/RunLengthDecoder.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.table.format.cow.vector.reader; - -import org.apache.flink.table.data.vector.writable.WritableColumnVector; -import org.apache.flink.table.data.vector.writable.WritableIntVector; -import org.apache.parquet.Preconditions; -import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.bytes.BytesUtils; -import org.apache.parquet.column.values.bitpacking.BytePacker; -import org.apache.parquet.column.values.bitpacking.Packer; -import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; -import org.apache.parquet.io.ParquetDecodingException; - -import java.io.IOException; -import java.nio.ByteBuffer; - -/** - * Run length decoder for data and dictionary ids. - * See https://github.com/apache/parquet-format/blob/master/Encodings.md - * See {@link RunLengthBitPackingHybridDecoder}. - * - *

Note: Reference Flink release 1.11.2 - * {@code org.apache.flink.formats.parquet.vector.reader.RunLengthDecoder} - * because it is package scope. - */ -final class RunLengthDecoder { - - /** - * If true, the bit width is fixed. This decoder is used in different places and this also - * controls if we need to read the bitwidth from the beginning of the data stream. - */ - private final boolean fixedWidth; - private final boolean readLength; - - // Encoded data. - private ByteBufferInputStream in; - - // bit/byte width of decoded data and utility to batch unpack them. - private int bitWidth; - private int bytesWidth; - private BytePacker packer; - - // Current decoding mode and values - MODE mode; - int currentCount; - int currentValue; - - // Buffer of decoded values if the values are PACKED. - int[] currentBuffer = new int[16]; - int currentBufferIdx = 0; - - RunLengthDecoder() { - this.fixedWidth = false; - this.readLength = false; - } - - RunLengthDecoder(int bitWidth) { - this.fixedWidth = true; - this.readLength = bitWidth != 0; - initWidthAndPacker(bitWidth); - } - - RunLengthDecoder(int bitWidth, boolean readLength) { - this.fixedWidth = true; - this.readLength = readLength; - initWidthAndPacker(bitWidth); - } - - /** - * Init from input stream. - */ - void initFromStream(int valueCount, ByteBufferInputStream in) throws IOException { - this.in = in; - if (fixedWidth) { - // initialize for repetition and definition levels - if (readLength) { - int length = readIntLittleEndian(); - this.in = in.sliceStream(length); - } - } else { - // initialize for values - if (in.available() > 0) { - initWidthAndPacker(in.read()); - } - } - if (bitWidth == 0) { - // 0 bit width, treat this as an RLE run of valueCount number of 0's. - this.mode = MODE.RLE; - this.currentCount = valueCount; - this.currentValue = 0; - } else { - this.currentCount = 0; - } - } - - /** - * Initializes the internal state for decoding ints of `bitWidth`. - */ - private void initWidthAndPacker(int bitWidth) { - Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); - this.bitWidth = bitWidth; - this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth); - this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); - } - - int readInteger() { - if (this.currentCount == 0) { - this.readNextGroup(); - } - - this.currentCount--; - switch (mode) { - case RLE: - return this.currentValue; - case PACKED: - return this.currentBuffer[currentBufferIdx++]; - default: - throw new AssertionError(); - } - } - - /** - * Decoding for dictionary ids. The IDs are populated into `values` and the nullability is - * populated into `nulls`. - */ - void readDictionaryIds( - int total, - WritableIntVector values, - WritableColumnVector nulls, - int rowId, - int level, - RunLengthDecoder data) { - int left = total; - while (left > 0) { - if (this.currentCount == 0) { - this.readNextGroup(); - } - int n = Math.min(left, this.currentCount); - switch (mode) { - case RLE: - if (currentValue == level) { - data.readDictionaryIdData(n, values, rowId); - } else { - nulls.setNulls(rowId, n); - } - break; - case PACKED: - for (int i = 0; i < n; ++i) { - if (currentBuffer[currentBufferIdx++] == level) { - values.setInt(rowId + i, data.readInteger()); - } else { - nulls.setNullAt(rowId + i); - } - } - break; - default: - throw new AssertionError(); - } - rowId += n; - left -= n; - currentCount -= n; - } - } - - /** - * It is used to decode dictionary IDs. - */ - private void readDictionaryIdData(int total, WritableIntVector c, int rowId) { - int left = total; - while (left > 0) { - if (this.currentCount == 0) { - this.readNextGroup(); - } - int n = Math.min(left, this.currentCount); - switch (mode) { - case RLE: - c.setInts(rowId, n, currentValue); - break; - case PACKED: - c.setInts(rowId, n, currentBuffer, currentBufferIdx); - currentBufferIdx += n; - break; - default: - throw new AssertionError(); - } - rowId += n; - left -= n; - currentCount -= n; - } - } - - /** - * Reads the next varint encoded int. - */ - private int readUnsignedVarInt() throws IOException { - int value = 0; - int shift = 0; - int b; - do { - b = in.read(); - value |= (b & 0x7F) << shift; - shift += 7; - } while ((b & 0x80) != 0); - return value; - } - - /** - * Reads the next 4 byte little endian int. - */ - private int readIntLittleEndian() throws IOException { - int ch4 = in.read(); - int ch3 = in.read(); - int ch2 = in.read(); - int ch1 = in.read(); - return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4); - } - - /** - * Reads the next byteWidth little endian int. - */ - private int readIntLittleEndianPaddedOnBitWidth() throws IOException { - switch (bytesWidth) { - case 0: - return 0; - case 1: - return in.read(); - case 2: { - int ch2 = in.read(); - int ch1 = in.read(); - return (ch1 << 8) + ch2; - } - case 3: { - int ch3 = in.read(); - int ch2 = in.read(); - int ch1 = in.read(); - return (ch1 << 16) + (ch2 << 8) + ch3; - } - case 4: { - return readIntLittleEndian(); - } - default: - throw new RuntimeException("Unreachable"); - } - } - - /** - * Reads the next group. - */ - void readNextGroup() { - try { - int header = readUnsignedVarInt(); - this.mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; - switch (mode) { - case RLE: - this.currentCount = header >>> 1; - this.currentValue = readIntLittleEndianPaddedOnBitWidth(); - return; - case PACKED: - int numGroups = header >>> 1; - this.currentCount = numGroups * 8; - - if (this.currentBuffer.length < this.currentCount) { - this.currentBuffer = new int[this.currentCount]; - } - currentBufferIdx = 0; - int valueIndex = 0; - while (valueIndex < this.currentCount) { - // values are bit packed 8 at a time, so reading bitWidth will always work - ByteBuffer buffer = in.slice(bitWidth); - this.packer.unpack8Values(buffer, buffer.position(), this.currentBuffer, valueIndex); - valueIndex += 8; - } - return; - default: - throw new ParquetDecodingException("not a valid mode " + this.mode); - } - } catch (IOException e) { - throw new ParquetDecodingException("Failed to read from input stream", e); - } - } - - enum MODE { - RLE, - PACKED - } -} - diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java deleted file mode 100644 index 18686b811c40..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; - -/** - * Adapter clazz for {@link Output}. - */ -public interface OutputAdapter extends Output { -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java deleted file mode 100644 index 8563d2422b64..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.state.StateInitializationContext; - -/** - * Adapter clazz for {@link StateInitializationContext}. - */ -public interface StateInitializationContextAdapter extends StateInitializationContext { -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java deleted file mode 100644 index 176783e8108c..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.accumulators.Accumulator; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; - -import java.util.Map; - -/** - * Adapter clazz for {@link StreamingRuntimeContext}. - */ -public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { - - public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, - Map> accumulators) { - super(operator, env, accumulators); - } - - @Override - public MetricGroup getMetricGroup() { - return new UnregisteredMetricsGroup(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java deleted file mode 100644 index e3088356709f..000000000000 --- a/hudi-flink-datasource/hudi-flink1.13.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.apache.hudi.adapter; - -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableEnvironment; -import org.apache.flink.table.api.internal.TableEnvironmentImpl; - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * TableEnv for test goals. - */ -public class TestTableEnvs { - - public static TableEnvironment getBatchTableEnv() { - EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); - return TableEnvironmentImpl.create(settings); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java deleted file mode 100644 index d4c6bc3a8f4d..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; - -/** - * Adapter clazz for {@code AbstractStreamOperator}. - */ -public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java deleted file mode 100644 index 6dcfe71ccfd9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; - -/** - * Adapter clazz for {@link AbstractStreamOperatorFactory}. - */ -public abstract class AbstractStreamOperatorFactoryAdapter - extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { - - public MailboxExecutorAdapter getMailboxExecutorAdapter() { - return new MailboxExecutorAdapter(getMailboxExecutor()); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java deleted file mode 100644 index 0c836f3db391..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.operators.MailboxExecutor; -import org.apache.flink.util.function.ThrowingRunnable; - -/** - * Adapter clazz for {@link MailboxExecutor}. - */ -public class MailboxExecutorAdapter { - private final MailboxExecutor executor; - - public MailboxExecutorAdapter(MailboxExecutor executor) { - this.executor = executor; - } - - public void execute(ThrowingRunnable command, String description) { - this.executor.execute(command, description); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java deleted file mode 100644 index 865c0c81d4d9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; - -/** - * Bridge class for shaded guava clazz {@code RateLimiter}. - */ -public class RateLimiterAdapter { - private final RateLimiter rateLimiter; - - private RateLimiterAdapter(double permitsPerSecond) { - this.rateLimiter = RateLimiter.create(permitsPerSecond); - } - - public static RateLimiterAdapter create(double permitsPerSecond) { - return new RateLimiterAdapter(permitsPerSecond); - } - - public void acquire() { - this.rateLimiter.acquire(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java index b5c83936b02c..9fd25f163147 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -22,13 +22,6 @@ import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.runtime.io.disk.iomanager.IOManager; import org.apache.flink.runtime.memory.MemoryManager; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTask; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; import org.apache.flink.table.data.RowData; @@ -43,22 +36,6 @@ * Adapter utils. */ public class Utils { - public static SourceFunction.SourceContext getSourceContext( - TimeCharacteristic timeCharacteristic, - ProcessingTimeService processingTimeService, - StreamTask streamTask, - Output> output, - long watermarkInterval) { - return StreamSourceContexts.getSourceContext( - timeCharacteristic, - processingTimeService, - new Object(), // no actual locking needed - output, - watermarkInterval, - -1, - true); - } - public static FactoryUtil.DefaultDynamicTableContext getTableContext( ObjectIdentifier tablePath, ResolvedCatalogTable catalogTable, diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java deleted file mode 100644 index c0d83e6096e3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; - -/** - * Adapter clazz for {@link Output}. - */ -public interface OutputAdapter extends Output { - @Override - default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - // no operation - } -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java deleted file mode 100644 index 1f76ad692f33..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.state.StateInitializationContext; - -import java.util.OptionalLong; - -/** - * Adapter clazz for {@link StateInitializationContext}. - */ -public interface StateInitializationContextAdapter extends StateInitializationContext { - @Override - default OptionalLong getRestoredCheckpointId() { - return OptionalLong.empty(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java deleted file mode 100644 index 4461c28943d3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.accumulators.Accumulator; -import org.apache.flink.metrics.groups.OperatorMetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; - -import java.util.Map; - -/** - * Adapter clazz for {@link StreamingRuntimeContext}. - */ -public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { - - public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, - Map> accumulators) { - super(operator, env, accumulators); - } - - @Override - public OperatorMetricGroup getMetricGroup() { - return UnregisteredMetricsGroup.createOperatorMetricGroup(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java deleted file mode 100644 index d4c6bc3a8f4d..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; - -/** - * Adapter clazz for {@code AbstractStreamOperator}. - */ -public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java deleted file mode 100644 index 6dcfe71ccfd9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; - -/** - * Adapter clazz for {@link AbstractStreamOperatorFactory}. - */ -public abstract class AbstractStreamOperatorFactoryAdapter - extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { - - public MailboxExecutorAdapter getMailboxExecutorAdapter() { - return new MailboxExecutorAdapter(getMailboxExecutor()); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java deleted file mode 100644 index 0c836f3db391..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.operators.MailboxExecutor; -import org.apache.flink.util.function.ThrowingRunnable; - -/** - * Adapter clazz for {@link MailboxExecutor}. - */ -public class MailboxExecutorAdapter { - private final MailboxExecutor executor; - - public MailboxExecutorAdapter(MailboxExecutor executor) { - this.executor = executor; - } - - public void execute(ThrowingRunnable command, String description) { - this.executor.execute(command, description); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java deleted file mode 100644 index 865c0c81d4d9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; - -/** - * Bridge class for shaded guava clazz {@code RateLimiter}. - */ -public class RateLimiterAdapter { - private final RateLimiter rateLimiter; - - private RateLimiterAdapter(double permitsPerSecond) { - this.rateLimiter = RateLimiter.create(permitsPerSecond); - } - - public static RateLimiterAdapter create(double permitsPerSecond) { - return new RateLimiterAdapter(permitsPerSecond); - } - - public void acquire() { - this.rateLimiter.acquire(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/Utils.java index 7c8366dd381b..89ae23f6b649 100644 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/Utils.java +++ b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -22,13 +22,6 @@ import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.runtime.io.disk.iomanager.IOManager; import org.apache.flink.runtime.memory.MemoryManager; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTask; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; import org.apache.flink.table.data.RowData; @@ -45,22 +38,6 @@ * Adapter utils. */ public class Utils { - public static SourceFunction.SourceContext getSourceContext( - TimeCharacteristic timeCharacteristic, - ProcessingTimeService processingTimeService, - StreamTask streamTask, - Output> output, - long watermarkInterval) { - return StreamSourceContexts.getSourceContext( - timeCharacteristic, - processingTimeService, - new Object(), // no actual locking needed - output, - watermarkInterval, - -1, - true); - } - public static FactoryUtil.DefaultDynamicTableContext getTableContext( ObjectIdentifier tablePath, ResolvedCatalogTable catalogTable, diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java deleted file mode 100644 index c0d83e6096e3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; - -/** - * Adapter clazz for {@link Output}. - */ -public interface OutputAdapter extends Output { - @Override - default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - // no operation - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java deleted file mode 100644 index c903ec2ed408..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.state.StateInitializationContext; - -import java.util.OptionalLong; - -/** - * Adapter clazz for {@link StateInitializationContext}. - */ -public interface StateInitializationContextAdapter extends StateInitializationContext { - default OptionalLong getRestoredCheckpointId() { - return OptionalLong.empty(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java deleted file mode 100644 index 4461c28943d3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.accumulators.Accumulator; -import org.apache.flink.metrics.groups.OperatorMetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; - -import java.util.Map; - -/** - * Adapter clazz for {@link StreamingRuntimeContext}. - */ -public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { - - public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, - Map> accumulators) { - super(operator, env, accumulators); - } - - @Override - public OperatorMetricGroup getMetricGroup() { - return UnregisteredMetricsGroup.createOperatorMetricGroup(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java deleted file mode 100644 index e65437609a21..000000000000 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableEnvironment; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; - -/** - * TableEnv for test goals. - */ -public class TestTableEnvs { - - public static TableEnvironment getBatchTableEnv() { - Configuration conf = new Configuration(); - // for batch upsert use cases: current suggestion is to disable these 2 options, - // from 1.14, flink runtime execution mode has switched from streaming - // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), - // current batch execution mode has these limitations: - // - // 1. the keyed stream default to always sort the inputs by key; - // 2. the batch state-backend requires the inputs sort by state key - // - // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, - // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, - // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode - // to keep the strategy before 1.14. - conf.setBoolean("execution.sorted-inputs.enabled", false); - conf.setBoolean("execution.batch-state-backend.enabled", false); - StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); - EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); - return StreamTableEnvironment.create(execEnv, settings); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java deleted file mode 100644 index d4c6bc3a8f4d..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; - -/** - * Adapter clazz for {@code AbstractStreamOperator}. - */ -public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java deleted file mode 100644 index 6dcfe71ccfd9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; - -/** - * Adapter clazz for {@link AbstractStreamOperatorFactory}. - */ -public abstract class AbstractStreamOperatorFactoryAdapter - extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { - - public MailboxExecutorAdapter getMailboxExecutorAdapter() { - return new MailboxExecutorAdapter(getMailboxExecutor()); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java deleted file mode 100644 index 0c836f3db391..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.operators.MailboxExecutor; -import org.apache.flink.util.function.ThrowingRunnable; - -/** - * Adapter clazz for {@link MailboxExecutor}. - */ -public class MailboxExecutorAdapter { - private final MailboxExecutor executor; - - public MailboxExecutorAdapter(MailboxExecutor executor) { - this.executor = executor; - } - - public void execute(ThrowingRunnable command, String description) { - this.executor.execute(command, description); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java deleted file mode 100644 index 865c0c81d4d9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; - -/** - * Bridge class for shaded guava clazz {@code RateLimiter}. - */ -public class RateLimiterAdapter { - private final RateLimiter rateLimiter; - - private RateLimiterAdapter(double permitsPerSecond) { - this.rateLimiter = RateLimiter.create(permitsPerSecond); - } - - public static RateLimiterAdapter create(double permitsPerSecond) { - return new RateLimiterAdapter(permitsPerSecond); - } - - public void acquire() { - this.rateLimiter.acquire(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/Utils.java index 1112b7c7f69e..c418dc3d19db 100644 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/Utils.java +++ b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -22,13 +22,6 @@ import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.runtime.io.disk.iomanager.IOManager; import org.apache.flink.runtime.memory.MemoryManager; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTask; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; import org.apache.flink.table.data.RowData; @@ -45,22 +38,6 @@ * Adapter utils. */ public class Utils { - public static SourceFunction.SourceContext getSourceContext( - TimeCharacteristic timeCharacteristic, - ProcessingTimeService processingTimeService, - StreamTask streamTask, - Output> output, - long watermarkInterval) { - return StreamSourceContexts.getSourceContext( - timeCharacteristic, - processingTimeService, - new Object(), // no actual locking needed - output, - watermarkInterval, - -1, - true); - } - public static FactoryUtil.DefaultDynamicTableContext getTableContext( ObjectIdentifier tablePath, ResolvedCatalogTable catalogTable, diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java deleted file mode 100644 index c0d83e6096e3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; - -/** - * Adapter clazz for {@link Output}. - */ -public interface OutputAdapter extends Output { - @Override - default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - // no operation - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java deleted file mode 100644 index c903ec2ed408..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.state.StateInitializationContext; - -import java.util.OptionalLong; - -/** - * Adapter clazz for {@link StateInitializationContext}. - */ -public interface StateInitializationContextAdapter extends StateInitializationContext { - default OptionalLong getRestoredCheckpointId() { - return OptionalLong.empty(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java deleted file mode 100644 index 4461c28943d3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.accumulators.Accumulator; -import org.apache.flink.metrics.groups.OperatorMetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; - -import java.util.Map; - -/** - * Adapter clazz for {@link StreamingRuntimeContext}. - */ -public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { - - public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, - Map> accumulators) { - super(operator, env, accumulators); - } - - @Override - public OperatorMetricGroup getMetricGroup() { - return UnregisteredMetricsGroup.createOperatorMetricGroup(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java deleted file mode 100644 index e65437609a21..000000000000 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableEnvironment; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; - -/** - * TableEnv for test goals. - */ -public class TestTableEnvs { - - public static TableEnvironment getBatchTableEnv() { - Configuration conf = new Configuration(); - // for batch upsert use cases: current suggestion is to disable these 2 options, - // from 1.14, flink runtime execution mode has switched from streaming - // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), - // current batch execution mode has these limitations: - // - // 1. the keyed stream default to always sort the inputs by key; - // 2. the batch state-backend requires the inputs sort by state key - // - // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, - // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, - // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode - // to keep the strategy before 1.14. - conf.setBoolean("execution.sorted-inputs.enabled", false); - conf.setBoolean("execution.batch-state-backend.enabled", false); - StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); - EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); - return StreamTableEnvironment.create(execEnv, settings); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java deleted file mode 100644 index d4c6bc3a8f4d..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; - -/** - * Adapter clazz for {@code AbstractStreamOperator}. - */ -public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java deleted file mode 100644 index 6dcfe71ccfd9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; - -/** - * Adapter clazz for {@link AbstractStreamOperatorFactory}. - */ -public abstract class AbstractStreamOperatorFactoryAdapter - extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { - - public MailboxExecutorAdapter getMailboxExecutorAdapter() { - return new MailboxExecutorAdapter(getMailboxExecutor()); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java deleted file mode 100644 index 0c836f3db391..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.operators.MailboxExecutor; -import org.apache.flink.util.function.ThrowingRunnable; - -/** - * Adapter clazz for {@link MailboxExecutor}. - */ -public class MailboxExecutorAdapter { - private final MailboxExecutor executor; - - public MailboxExecutorAdapter(MailboxExecutor executor) { - this.executor = executor; - } - - public void execute(ThrowingRunnable command, String description) { - this.executor.execute(command, description); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java deleted file mode 100644 index 865c0c81d4d9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; - -/** - * Bridge class for shaded guava clazz {@code RateLimiter}. - */ -public class RateLimiterAdapter { - private final RateLimiter rateLimiter; - - private RateLimiterAdapter(double permitsPerSecond) { - this.rateLimiter = RateLimiter.create(permitsPerSecond); - } - - public static RateLimiterAdapter create(double permitsPerSecond) { - return new RateLimiterAdapter(permitsPerSecond); - } - - public void acquire() { - this.rateLimiter.acquire(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/Utils.java index 659c65973674..a0c7b36420b9 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/Utils.java +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -22,13 +22,6 @@ import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.runtime.io.disk.iomanager.IOManager; import org.apache.flink.runtime.memory.MemoryManager; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTask; import org.apache.flink.table.api.config.ExecutionConfigOptions; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; @@ -46,22 +39,6 @@ * Adapter utils. */ public class Utils { - public static SourceFunction.SourceContext getSourceContext( - TimeCharacteristic timeCharacteristic, - ProcessingTimeService processingTimeService, - StreamTask streamTask, - Output> output, - long watermarkInterval) { - return StreamSourceContexts.getSourceContext( - timeCharacteristic, - processingTimeService, - new Object(), // no actual locking needed - output, - watermarkInterval, - -1, - true); - } - public static FactoryUtil.DefaultDynamicTableContext getTableContext( ObjectIdentifier tablePath, ResolvedCatalogTable catalogTable, diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java deleted file mode 100644 index c0d83e6096e3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; - -/** - * Adapter clazz for {@link Output}. - */ -public interface OutputAdapter extends Output { - @Override - default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - // no operation - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java deleted file mode 100644 index c903ec2ed408..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.state.StateInitializationContext; - -import java.util.OptionalLong; - -/** - * Adapter clazz for {@link StateInitializationContext}. - */ -public interface StateInitializationContextAdapter extends StateInitializationContext { - default OptionalLong getRestoredCheckpointId() { - return OptionalLong.empty(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java deleted file mode 100644 index 4461c28943d3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.accumulators.Accumulator; -import org.apache.flink.metrics.groups.OperatorMetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; - -import java.util.Map; - -/** - * Adapter clazz for {@link StreamingRuntimeContext}. - */ -public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { - - public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, - Map> accumulators) { - super(operator, env, accumulators); - } - - @Override - public OperatorMetricGroup getMetricGroup() { - return UnregisteredMetricsGroup.createOperatorMetricGroup(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java deleted file mode 100644 index e65437609a21..000000000000 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableEnvironment; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; - -/** - * TableEnv for test goals. - */ -public class TestTableEnvs { - - public static TableEnvironment getBatchTableEnv() { - Configuration conf = new Configuration(); - // for batch upsert use cases: current suggestion is to disable these 2 options, - // from 1.14, flink runtime execution mode has switched from streaming - // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), - // current batch execution mode has these limitations: - // - // 1. the keyed stream default to always sort the inputs by key; - // 2. the batch state-backend requires the inputs sort by state key - // - // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, - // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, - // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode - // to keep the strategy before 1.14. - conf.setBoolean("execution.sorted-inputs.enabled", false); - conf.setBoolean("execution.batch-state-backend.enabled", false); - StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); - EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); - return StreamTableEnvironment.create(execEnv, settings); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java deleted file mode 100644 index d4c6bc3a8f4d..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorAdapter.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; - -/** - * Adapter clazz for {@code AbstractStreamOperator}. - */ -public abstract class AbstractStreamOperatorAdapter extends AbstractStreamOperator { -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java deleted file mode 100644 index 6dcfe71ccfd9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/AbstractStreamOperatorFactoryAdapter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory; -import org.apache.flink.streaming.api.operators.YieldingOperatorFactory; - -/** - * Adapter clazz for {@link AbstractStreamOperatorFactory}. - */ -public abstract class AbstractStreamOperatorFactoryAdapter - extends AbstractStreamOperatorFactory implements YieldingOperatorFactory { - - public MailboxExecutorAdapter getMailboxExecutorAdapter() { - return new MailboxExecutorAdapter(getMailboxExecutor()); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java deleted file mode 100644 index 0c836f3db391..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/MailboxExecutorAdapter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.operators.MailboxExecutor; -import org.apache.flink.util.function.ThrowingRunnable; - -/** - * Adapter clazz for {@link MailboxExecutor}. - */ -public class MailboxExecutorAdapter { - private final MailboxExecutor executor; - - public MailboxExecutorAdapter(MailboxExecutor executor) { - this.executor = executor; - } - - public void execute(ThrowingRunnable command, String description) { - this.executor.execute(command, description); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java deleted file mode 100644 index 865c0c81d4d9..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/RateLimiterAdapter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; - -/** - * Bridge class for shaded guava clazz {@code RateLimiter}. - */ -public class RateLimiterAdapter { - private final RateLimiter rateLimiter; - - private RateLimiterAdapter(double permitsPerSecond) { - this.rateLimiter = RateLimiter.create(permitsPerSecond); - } - - public static RateLimiterAdapter create(double permitsPerSecond) { - return new RateLimiterAdapter(permitsPerSecond); - } - - public void acquire() { - this.rateLimiter.acquire(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java index 659c65973674..fe0351af4310 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/adapter/Utils.java @@ -22,13 +22,6 @@ import org.apache.flink.configuration.ReadableConfig; import org.apache.flink.runtime.io.disk.iomanager.IOManager; import org.apache.flink.runtime.memory.MemoryManager; -import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.StreamSourceContexts; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; -import org.apache.flink.streaming.runtime.tasks.StreamTask; import org.apache.flink.table.api.config.ExecutionConfigOptions; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.catalog.ResolvedCatalogTable; @@ -46,22 +39,6 @@ * Adapter utils. */ public class Utils { - public static SourceFunction.SourceContext getSourceContext( - TimeCharacteristic timeCharacteristic, - ProcessingTimeService processingTimeService, - StreamTask streamTask, - Output> output, - long watermarkInterval) { - return StreamSourceContexts.getSourceContext( - timeCharacteristic, - processingTimeService, - new Object(), // no actual locking needed - output, - watermarkInterval, - -1, - true); - } - public static FactoryUtil.DefaultDynamicTableContext getTableContext( ObjectIdentifier tablePath, ResolvedCatalogTable catalogTable, @@ -70,7 +47,7 @@ public static FactoryUtil.DefaultDynamicTableContext getTableContext( Collections.emptyMap(), conf, Thread.currentThread().getContextClassLoader(), false); } - public static BinaryExternalSorter getBinaryExternalSorter( + public static BinaryExternalSorter getBinaryExternalSorter( final Object owner, MemoryManager memoryManager, long reservedMemorySize, diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java deleted file mode 100644 index c0d83e6096e3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/OutputAdapter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; - -/** - * Adapter clazz for {@link Output}. - */ -public interface OutputAdapter extends Output { - @Override - default void emitWatermarkStatus(WatermarkStatus watermarkStatus) { - // no operation - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java deleted file mode 100644 index c903ec2ed408..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StateInitializationContextAdapter.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.runtime.state.StateInitializationContext; - -import java.util.OptionalLong; - -/** - * Adapter clazz for {@link StateInitializationContext}. - */ -public interface StateInitializationContextAdapter extends StateInitializationContext { - default OptionalLong getRestoredCheckpointId() { - return OptionalLong.empty(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java deleted file mode 100644 index 4461c28943d3..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/StreamingRuntimeContextAdapter.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.api.common.accumulators.Accumulator; -import org.apache.flink.metrics.groups.OperatorMetricGroup; -import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; - -import java.util.Map; - -/** - * Adapter clazz for {@link StreamingRuntimeContext}. - */ -public class StreamingRuntimeContextAdapter extends StreamingRuntimeContext { - - public StreamingRuntimeContextAdapter(AbstractStreamOperator operator, Environment env, - Map> accumulators) { - super(operator, env, accumulators); - } - - @Override - public OperatorMetricGroup getMetricGroup() { - return UnregisteredMetricsGroup.createOperatorMetricGroup(); - } -} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java deleted file mode 100644 index e65437609a21..000000000000 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/adapter/TestTableEnvs.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.adapter; - -import org.apache.flink.configuration.Configuration; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableEnvironment; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; - -/** - * TableEnv for test goals. - */ -public class TestTableEnvs { - - public static TableEnvironment getBatchTableEnv() { - Configuration conf = new Configuration(); - // for batch upsert use cases: current suggestion is to disable these 2 options, - // from 1.14, flink runtime execution mode has switched from streaming - // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode), - // current batch execution mode has these limitations: - // - // 1. the keyed stream default to always sort the inputs by key; - // 2. the batch state-backend requires the inputs sort by state key - // - // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records, - // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct, - // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode - // to keep the strategy before 1.14. - conf.setBoolean("execution.sorted-inputs.enabled", false); - conf.setBoolean("execution.batch-state-backend.enabled", false); - StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf); - EnvironmentSettings settings = EnvironmentSettings.newInstance().inBatchMode().build(); - return StreamTableEnvironment.create(execEnv, settings); - } -} diff --git a/hudi-flink-datasource/pom.xml b/hudi-flink-datasource/pom.xml index e309092a2e97..02a9981cce04 100644 --- a/hudi-flink-datasource/pom.xml +++ b/hudi-flink-datasource/pom.xml @@ -33,7 +33,6 @@ - hudi-flink1.13.x hudi-flink1.14.x hudi-flink1.15.x hudi-flink1.16.x diff --git a/packaging/bundle-validation/README.md b/packaging/bundle-validation/README.md index f18419d98812..41a546486ce4 100644 --- a/packaging/bundle-validation/README.md +++ b/packaging/bundle-validation/README.md @@ -33,17 +33,17 @@ the folder. Here are the docker commands to build the image by specifying differ ```shell docker build \ --build-arg HIVE_VERSION=3.1.3 \ - --build-arg FLINK_VERSION=1.13.6 \ + --build-arg FLINK_VERSION=1.14.6 \ --build-arg SPARK_VERSION=3.1.3 \ --build-arg SPARK_HADOOP_VERSION=2.7 \ - -t hudi-ci-bundle-validation-base:flink1136hive313spark313 . -docker image tag hudi-ci-bundle-validation-base:flink1136hive313spark313 apachehudi/hudi-ci-bundle-validation-base:flink1136hive313spark313 + -t hudi-ci-bundle-validation-base:flink1146hive313spark313 . +docker image tag hudi-ci-bundle-validation-base:flink1146hive313spark313 apachehudi/hudi-ci-bundle-validation-base:flink1146hive313spark313 ``` To upload the image with the tag: ```shell -docker push apachehudi/hudi-ci-bundle-validation-base:flink1136hive313spark313 +docker push apachehudi/hudi-ci-bundle-validation-base:flink1146hive313spark313 ``` Note that for each library like Hive and Spark, the download and extraction happen under one `RUN` instruction so that diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index 59fc5d9df397..6b80ab7078d8 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -38,12 +38,12 @@ if [[ ${SPARK_RUNTIME} == 'spark2.4.8' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=2.3.9 DERBY_VERSION=10.10.2.0 - FLINK_VERSION=1.13.6 + FLINK_VERSION=1.14.6 SPARK_VERSION=2.4.8 SPARK_HADOOP_VERSION=2.7 CONFLUENT_VERSION=5.5.12 KAFKA_CONNECT_HDFS_VERSION=10.1.13 - IMAGE_TAG=flink1136hive239spark248 + IMAGE_TAG=flink1146hive239spark248 elif [[ ${SPARK_RUNTIME} == 'spark3.0.2' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=3.1.3 @@ -58,12 +58,12 @@ elif [[ ${SPARK_RUNTIME} == 'spark3.1.3' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=3.1.3 DERBY_VERSION=10.14.1.0 - FLINK_VERSION=1.13.6 + FLINK_VERSION=1.14.6 SPARK_VERSION=3.1.3 SPARK_HADOOP_VERSION=2.7 CONFLUENT_VERSION=5.5.12 KAFKA_CONNECT_HDFS_VERSION=10.1.13 - IMAGE_TAG=flink1136hive313spark313 + IMAGE_TAG=flink1146hive313spark313 elif [[ ${SPARK_RUNTIME} == 'spark3.2.3' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=3.1.3 @@ -162,9 +162,7 @@ else HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12 fi - if [[ ${FLINK_PROFILE} == 'flink1.13' ]]; then - HUDI_FLINK_BUNDLE_NAME=hudi-flink1.13-bundle - elif [[ ${FLINK_PROFILE} == 'flink1.14' ]]; then + if [[ ${FLINK_PROFILE} == 'flink1.14' ]]; then HUDI_FLINK_BUNDLE_NAME=hudi-flink1.14-bundle elif [[ ${FLINK_PROFILE} == 'flink1.15' ]]; then HUDI_FLINK_BUNDLE_NAME=hudi-flink1.15-bundle diff --git a/packaging/bundle-validation/run_docker_java17.sh b/packaging/bundle-validation/run_docker_java17.sh index d9f50cc90768..1b774eefdf19 100755 --- a/packaging/bundle-validation/run_docker_java17.sh +++ b/packaging/bundle-validation/run_docker_java17.sh @@ -27,12 +27,12 @@ if [[ ${SPARK_RUNTIME} == 'spark2.4.8' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=2.3.9 DERBY_VERSION=10.10.2.0 - FLINK_VERSION=1.13.6 + FLINK_VERSION=1.14.6 SPARK_VERSION=2.4.8 SPARK_HADOOP_VERSION=2.7 CONFLUENT_VERSION=5.5.12 KAFKA_CONNECT_HDFS_VERSION=10.1.13 - IMAGE_TAG=flink1136hive239spark248 + IMAGE_TAG=flink1146hive239spark248 elif [[ ${SPARK_RUNTIME} == 'spark3.0.2' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=3.1.3 @@ -47,12 +47,12 @@ elif [[ ${SPARK_RUNTIME} == 'spark3.1.3' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=3.1.3 DERBY_VERSION=10.14.1.0 - FLINK_VERSION=1.13.6 + FLINK_VERSION=1.14.6 SPARK_VERSION=3.1.3 SPARK_HADOOP_VERSION=2.7 CONFLUENT_VERSION=5.5.12 KAFKA_CONNECT_HDFS_VERSION=10.1.13 - IMAGE_TAG=flink1136hive313spark313 + IMAGE_TAG=flink1146hive313spark313 elif [[ ${SPARK_RUNTIME} == 'spark3.2.3' ]]; then HADOOP_VERSION=2.7.7 HIVE_VERSION=3.1.3 diff --git a/pom.xml b/pom.xml index da214b0ceb26..d5ce8042db33 100644 --- a/pom.xml +++ b/pom.xml @@ -141,7 +141,6 @@ 1.16.2 1.15.1 1.14.5 - 1.13.6 ${flink1.18.version} hudi-flink1.18.x 1.18 @@ -2685,33 +2684,6 @@ - - flink1.13 - - ${flink1.13.version} - hudi-flink1.13.x - 1.13 - 1.5.6 - 1.11.1 - flink-runtime_${scala.binary.version} - flink-table-runtime-blink_${scala.binary.version} - flink-table-planner-blink_${scala.binary.version} - flink-parquet_${scala.binary.version} - flink-statebackend-rocksdb_${scala.binary.version} - flink-test-utils_${scala.binary.version} - flink-streaming-java_${scala.binary.version} - flink-clients_${scala.binary.version} - flink-connector-kafka_${scala.binary.version} - flink-hadoop-compatibility_${scala.binary.version} - ${flink1.13.version} - true - - - - flink1.13 - - - skipShadeSources diff --git a/scripts/release/deploy_staging_jars.sh b/scripts/release/deploy_staging_jars.sh index d36b3bb814da..058fe289fd60 100755 --- a/scripts/release/deploy_staging_jars.sh +++ b/scripts/release/deploy_staging_jars.sh @@ -83,7 +83,6 @@ declare -a ALL_VERSION_OPTS=( "-Dscala-2.12 -Dspark3 -pl packaging/hudi-spark-bundle -am" # for legacy bundle name hudi-spark3-bundle_2.12 # Upload Flink bundles (overwriting previous uploads) -"-Dscala-2.12 -Dflink1.13 -Davro.version=1.10.0 -pl packaging/hudi-flink-bundle -am" "-Dscala-2.12 -Dflink1.14 -Davro.version=1.10.0 -pl packaging/hudi-flink-bundle -am" "-Dscala-2.12 -Dflink1.15 -Davro.version=1.10.0 -pl packaging/hudi-flink-bundle -am" "-Dscala-2.12 -Dflink1.16 -Davro.version=1.11.1 -pl packaging/hudi-flink-bundle -am" diff --git a/scripts/release/validate_staged_bundles.sh b/scripts/release/validate_staged_bundles.sh index 579dc2410d38..1fc7b9f6e1c7 100755 --- a/scripts/release/validate_staged_bundles.sh +++ b/scripts/release/validate_staged_bundles.sh @@ -32,7 +32,7 @@ declare -a extensions=("-javadoc.jar" "-javadoc.jar.asc" "-javadoc.jar.md5" "-ja "-sources.jar.asc" "-sources.jar.md5" "-sources.jar.sha1" ".jar" ".jar.asc" ".jar.md5" ".jar.sha1" ".pom" ".pom.asc" ".pom.md5" ".pom.sha1") -declare -a bundles=("hudi-aws-bundle" "hudi-cli-bundle_2.11" "hudi-cli-bundle_2.12" "hudi-datahub-sync-bundle" "hudi-flink1.13-bundle" "hudi-flink1.14-bundle" +declare -a bundles=("hudi-aws-bundle" "hudi-cli-bundle_2.11" "hudi-cli-bundle_2.12" "hudi-datahub-sync-bundle" "hudi-flink1.14-bundle" "hudi-flink1.15-bundle" "hudi-flink1.16-bundle" "hudi-flink1.17-bundle" "hudi-flink1.18-bundle" "hudi-gcp-bundle" "hudi-hadoop-mr-bundle" "hudi-hive-sync-bundle" "hudi-integ-test-bundle" "hudi-kafka-connect-bundle" "hudi-metaserver-server-bundle" "hudi-presto-bundle" "hudi-spark-bundle_2.11" "hudi-spark-bundle_2.12" "hudi-spark2.4-bundle_2.11" "hudi-spark2.4-bundle_2.12" "hudi-spark3-bundle_2.12" "hudi-spark3.0-bundle_2.12" "hudi-spark3.1-bundle_2.12" From d1366d83aea58175a32dcc629f275ab7dbcd5ac0 Mon Sep 17 00:00:00 2001 From: Fabio Buso Date: Mon, 20 Nov 2023 03:19:41 +0100 Subject: [PATCH 017/112] [MINOR] Add Hopsworks File System to StorageSchemes (#10141) --- .../main/java/org/apache/hudi/common/fs/StorageSchemes.java | 4 +++- .../java/org/apache/hudi/common/fs/TestStorageSchemes.java | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java index a8e7bb63268a..d43259a412a2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java @@ -76,7 +76,9 @@ public enum StorageSchemes { // Volcengine Cloud HDFS CFS("cfs", true, null, null), // Aliyun Apsara File Storage for HDFS - DFS("dfs", true, false, true); + DFS("dfs", true, false, true), + // Hopsworks File System + HOPSFS("hopsfs", false, false, true); private String scheme; private boolean supportsAppend; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java index 7ac8a9bcabb6..7f5f2305bfa8 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java @@ -76,6 +76,7 @@ public void testStorageSchemes() { assertFalse(StorageSchemes.isAtomicCreationSupported("oci")); assertFalse(StorageSchemes.isAtomicCreationSupported("tos")); assertFalse(StorageSchemes.isAtomicCreationSupported("cfs")); + assertTrue(StorageSchemes.isAtomicCreationSupported("hopsfs")); assertThrows(IllegalArgumentException.class, () -> { StorageSchemes.isAppendSupported("s2"); }, "Should throw exception for unsupported schemes"); From 008320ca375e6a73092cdc76107ede42b5c75d84 Mon Sep 17 00:00:00 2001 From: majian <47964462+majian1998@users.noreply.github.com> Date: Thu, 22 Feb 2024 10:51:48 +0800 Subject: [PATCH 018/112] [HUDI-7207] Sequentially delete complete instant files in archival to prevent inconsistency during data reads (#10711) --- .../apache/hudi/client/HoodieTimelineArchiver.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java index d4abfa82d59f..718f8ad2c46c 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java @@ -594,11 +594,13 @@ private boolean deleteArchivedInstants(List archivedInstants, Hoo ); } if (!completedInstants.isEmpty()) { - context.foreach( - completedInstants, - instant -> activeTimeline.deleteInstantFileIfExists(instant), - Math.min(completedInstants.size(), config.getArchiveDeleteParallelism()) - ); + // Due to the concurrency between deleting completed instants and reading data, + // there may be hole in the timeline, which can lead to errors when reading data. + // Therefore, the concurrency of deleting completed instants is temporarily disabled, + // and instants are deleted in ascending order to prevent the occurrence of such holes. + // See HUDI-7207 and #10325. + completedInstants.stream() + .forEach(instant -> activeTimeline.deleteInstantFileIfExists(instant)); } return true; From af3f258ebacd12218319b87343dfdd3e82c6d045 Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Mon, 18 Dec 2023 15:28:48 -0800 Subject: [PATCH 019/112] [HUDI-4699] Claiming RFC for auto record key generation (#10357) --- rfc/README.md | 150 ++++++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 73 deletions(-) diff --git a/rfc/README.md b/rfc/README.md index a43751f98517..941435a30173 100644 --- a/rfc/README.md +++ b/rfc/README.md @@ -34,77 +34,81 @@ The list of all RFCs can be found here. > Older RFC content is still [here](https://cwiki.apache.org/confluence/display/HUDI/RFC+Process). -| RFC Number | Title | Status | -|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| -| 1 | [CSV Source Support for Delta Streamer](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+01+%3A+CSV+Source+Support+for+Delta+Streamer) | `COMPLETED` | -| 2 | [ORC Storage in Hudi](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708439) | `COMPLETED` | -| 3 | [Timeline Service with Incremental File System View Syncing](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708965) | `COMPLETED` | -| 4 | [Faster Hive incremental pull queries](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115513622) | `COMPLETED` | -| 5 | [HUI (Hudi WebUI)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=130027233) | `ABANDONED` | -| 6 | [Add indexing support to the log file](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+06+%3A+Add+indexing+support+to+the+log+file) | `ABANDONED` | -| 7 | [Point in time Time-Travel queries on Hudi table](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+07+%3A+Point+in+time+Time-Travel+queries+on+Hudi+table) | `COMPLETED` | -| 8 | [Metadata based Record Index](./rfc-8/rfc-8.md) | `COMPLETED` | -| 9 | [Hudi Dataset Snapshot Exporter](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+09+%3A+Hudi+Dataset+Snapshot+Exporter) | `COMPLETED` | -| 10 | [Restructuring and auto-generation of docs](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+10+%3A+Restructuring+and+auto-generation+of+docs) | `COMPLETED` | -| 11 | [Refactor of the configuration framework of hudi project](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+11+%3A+Refactor+of+the+configuration+framework+of+hudi+project) | `ABANDONED` | -| 12 | [Efficient Migration of Large Parquet Tables to Apache Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+12+%3A+Efficient+Migration+of+Large+Parquet+Tables+to+Apache+Hudi) | `COMPLETED` | -| 13 | [Integrate Hudi with Flink](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=141724520) | `COMPLETED` | -| 14 | [JDBC incremental puller](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+14+%3A+JDBC+incremental+puller) | `COMPLETED` | -| 15 | [HUDI File Listing Improvements](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements) | `COMPLETED` | -| 16 | [Abstraction for HoodieInputFormat and RecordReader](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+16+Abstraction+for+HoodieInputFormat+and+RecordReader) | `COMPLETED` | -| 17 | [Abstract common meta sync module support multiple meta service](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+17+Abstract+common+meta+sync+module+support+multiple+meta+service) | `COMPLETED` | -| 18 | [Insert Overwrite API](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+18+Insert+Overwrite+API) | `COMPLETED` | -| 19 | [Clustering data for freshness and query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+19+Clustering+data+for+freshness+and+query+performance) | `COMPLETED` | -| 20 | [handle failed records](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+20+%3A+handle+failed+records) | `ONGOING` | -| 21 | [Allow HoodieRecordKey to be Virtual](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+21+%3A+Allow+HoodieRecordKey+to+be+Virtual) | `COMPLETED` | +| RFC Number | Title | Status | +|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| +| 1 | [CSV Source Support for Delta Streamer](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+01+%3A+CSV+Source+Support+for+Delta+Streamer) | `COMPLETED` | +| 2 | [ORC Storage in Hudi](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708439) | `COMPLETED` | +| 3 | [Timeline Service with Incremental File System View Syncing](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=113708965) | `COMPLETED` | +| 4 | [Faster Hive incremental pull queries](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=115513622) | `COMPLETED` | +| 5 | [HUI (Hudi WebUI)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=130027233) | `ABANDONED` | +| 6 | [Add indexing support to the log file](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+06+%3A+Add+indexing+support+to+the+log+file) | `ABANDONED` | +| 7 | [Point in time Time-Travel queries on Hudi table](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+07+%3A+Point+in+time+Time-Travel+queries+on+Hudi+table) | `COMPLETED` | +| 8 | [Metadata based Record Index](./rfc-8/rfc-8.md) | `COMPLETED` | +| 9 | [Hudi Dataset Snapshot Exporter](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+09+%3A+Hudi+Dataset+Snapshot+Exporter) | `COMPLETED` | +| 10 | [Restructuring and auto-generation of docs](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+10+%3A+Restructuring+and+auto-generation+of+docs) | `COMPLETED` | +| 11 | [Refactor of the configuration framework of hudi project](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+11+%3A+Refactor+of+the+configuration+framework+of+hudi+project) | `ABANDONED` | +| 12 | [Efficient Migration of Large Parquet Tables to Apache Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+12+%3A+Efficient+Migration+of+Large+Parquet+Tables+to+Apache+Hudi) | `COMPLETED` | +| 13 | [Integrate Hudi with Flink](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=141724520) | `COMPLETED` | +| 14 | [JDBC incremental puller](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+14+%3A+JDBC+incremental+puller) | `COMPLETED` | +| 15 | [HUDI File Listing Improvements](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+15%3A+HUDI+File+Listing+Improvements) | `COMPLETED` | +| 16 | [Abstraction for HoodieInputFormat and RecordReader](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+16+Abstraction+for+HoodieInputFormat+and+RecordReader) | `COMPLETED` | +| 17 | [Abstract common meta sync module support multiple meta service](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+17+Abstract+common+meta+sync+module+support+multiple+meta+service) | `COMPLETED` | +| 18 | [Insert Overwrite API](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+18+Insert+Overwrite+API) | `COMPLETED` | +| 19 | [Clustering data for freshness and query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+19+Clustering+data+for+freshness+and+query+performance) | `COMPLETED` | +| 20 | [handle failed records](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+20+%3A+handle+failed+records) | `ONGOING` | +| 21 | [Allow HoodieRecordKey to be Virtual](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+21+%3A+Allow+HoodieRecordKey+to+be+Virtual) | `COMPLETED` | | 22 | [Snapshot Isolation using Optimistic Concurrency Control for multi-writers](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+22+%3A+Snapshot+Isolation+using+Optimistic+Concurrency+Control+for+multi-writers) | `COMPLETED` | -| 23 | [Hudi Observability metrics collection](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+23+%3A+Hudi+Observability+metrics+collection) | `ABANDONED` | -| 24 | [Hoodie Flink Writer Proposal](https://cwiki.apache.org/confluence/display/HUDI/RFC-24%3A+Hoodie+Flink+Writer+Proposal) | `COMPLETED` | -| 25 | [Spark SQL Extension For Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+25%3A+Spark+SQL+Extension+For+Hudi) | `COMPLETED` | -| 26 | [Optimization For Hudi Table Query](https://cwiki.apache.org/confluence/display/HUDI/RFC-26+Optimization+For+Hudi+Table+Query) | `COMPLETED` | -| 27 | [Data skipping index to improve query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) | `COMPLETED` | -| 28 | [Support Z-order curve](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181307144) | `COMPLETED` | -| 29 | [Hash Index](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+29%3A+Hash+Index) | `COMPLETED` | -| 30 | [Batch operation](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+30%3A+Batch+operation) | `ABANDONED` | -| 31 | [Hive integration Improvement](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+31%3A+Hive+integration+Improvment) | `ONGOING` | -| 32 | [Kafka Connect Sink for Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC-32+Kafka+Connect+Sink+for+Hudi) | `ONGOING` | -| 33 | [Hudi supports more comprehensive Schema Evolution](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+33++Hudi+supports+more+comprehensive+Schema+Evolution) | `COMPLETED` | -| 34 | [Hudi BigQuery Integration](./rfc-34/rfc-34.md) | `COMPLETED` | -| 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly) | `UNDER REVIEW` | -| 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server) | `ONGOING` | -| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `ONGOING` | -| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `COMPLETED` | -| 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `COMPLETED` | -| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `COMPLETED` | -| 41 | [Hudi Snowflake Integration](./rfc-41/rfc-41.md) | `IN PROGRESS` | -| 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `ONGOING` | -| 43 | [Table Management Service](./rfc-43/rfc-43.md) | `IN PROGRESS` | -| 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `COMPLETED` | -| 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `COMPLETED` | -| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `ONGOING` | -| 47 | [Add Call Produce Command for Spark SQL](./rfc-47/rfc-47.md) | `COMPLETED` | -| 48 | [LogCompaction for MOR tables](./rfc-48/rfc-48.md) | `ONGOING` | -| 49 | [Support sync with DataHub](./rfc-49/rfc-49.md) | `COMPLETED` | -| 50 | [Improve Timeline Server](./rfc-50/rfc-50.md) | `IN PROGRESS` | -| 51 | [Change Data Capture](./rfc-51/rfc-51.md) | `ONGOING` | -| 52 | [Introduce Secondary Index to Improve HUDI Query Performance](./rfc-52/rfc-52.md) | `ONGOING` | -| 53 | [Use Lock-Free Message Queue Improving Hoodie Writing Efficiency](./rfc-53/rfc-53.md) | `COMPLETED` | -| 54 | [New Table APIs and Streamline Hudi Configs](./rfc-54/rfc-54.md) | `UNDER REVIEW` | -| 55 | [Improve Hive/Meta sync class design and hierarchies](./rfc-55/rfc-55.md) | `COMPLETED` | -| 56 | [Early Conflict Detection For Multi-Writer](./rfc-56/rfc-56.md) | `COMPLETED` | -| 57 | [DeltaStreamer Protobuf Support](./rfc-57/rfc-57.md) | `COMPLETED` | -| 58 | [Integrate column stats index with all query engines](./rfc-58/rfc-58.md) | `UNDER REVIEW` | -| 59 | [Multiple event_time Fields Latest Verification in a Single Table](./rfc-59/rfc-59.md) | `UNDER REVIEW` | -| 60 | [Federated Storage Layer](./rfc-60/rfc-60.md) | `IN PROGRESS` | -| 61 | [Snapshot view management](./rfc-61/rfc-61.md) | `UNDER REVIEW` | -| 62 | [Diagnostic Reporter](./rfc-62/rfc-62.md) | `UNDER REVIEW` | -| 63 | [Index on Function and Logical Partitioning](./rfc-63/rfc-63.md) | `UNDER REVIEW` | -| 64 | [New Hudi Table Spec API for Query Integrations](./rfc-64/rfc-64.md) | `UNDER REVIEW` | -| 65 | [Partition TTL Management](./rfc-65/rfc-65.md) | `UNDER REVIEW` | -| 66 | [Lockless Multi-Writer Support](./rfc-66/rfc-66.md) | `UNDER REVIEW` | -| 67 | [Hudi Bundle Standards](./rfc-67/rfc-67.md) | `UNDER REVIEW` | -| 68 | [A More Effective HoodieMergeHandler for COW Table with Parquet](./rfc-68/rfc-68.md) | `UNDER REVIEW` | -| 69 | [Hudi 1.x](./rfc-69/rfc-69.md) | `UNDER REVIEW` | -| 70 | [Hudi Reverse Streamer](./rfc/rfc-70/rfc-70.md) | `UNDER REVIEW` | -| 71 | [Enhance OCC conflict detection](./rfc/rfc-71/rfc-71.md) | `UNDER REVIEW` | -| 72 | [Redesign Hudi-Spark Integration](./rfc/rfc-72/rfc-72.md) | `ONGOING` | +| 23 | [Hudi Observability metrics collection](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+23+%3A+Hudi+Observability+metrics+collection) | `ABANDONED` | +| 24 | [Hoodie Flink Writer Proposal](https://cwiki.apache.org/confluence/display/HUDI/RFC-24%3A+Hoodie+Flink+Writer+Proposal) | `COMPLETED` | +| 25 | [Spark SQL Extension For Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+25%3A+Spark+SQL+Extension+For+Hudi) | `COMPLETED` | +| 26 | [Optimization For Hudi Table Query](https://cwiki.apache.org/confluence/display/HUDI/RFC-26+Optimization+For+Hudi+Table+Query) | `COMPLETED` | +| 27 | [Data skipping index to improve query performance](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) | `COMPLETED` | +| 28 | [Support Z-order curve](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181307144) | `COMPLETED` | +| 29 | [Hash Index](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+29%3A+Hash+Index) | `COMPLETED` | +| 30 | [Batch operation](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+30%3A+Batch+operation) | `ABANDONED` | +| 31 | [Hive integration Improvement](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+31%3A+Hive+integration+Improvment) | `ONGOING` | +| 32 | [Kafka Connect Sink for Hudi](https://cwiki.apache.org/confluence/display/HUDI/RFC-32+Kafka+Connect+Sink+for+Hudi) | `ONGOING` | +| 33 | [Hudi supports more comprehensive Schema Evolution](https://cwiki.apache.org/confluence/display/HUDI/RFC+-+33++Hudi+supports+more+comprehensive+Schema+Evolution) | `COMPLETED` | +| 34 | [Hudi BigQuery Integration](./rfc-34/rfc-34.md) | `COMPLETED` | +| 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly) | `UNDER REVIEW` | +| 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server) | `ONGOING` | +| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `ONGOING` | +| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `COMPLETED` | +| 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `COMPLETED` | +| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `COMPLETED` | +| 41 | [Hudi Snowflake Integration](./rfc-41/rfc-41.md) | `IN PROGRESS` | +| 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `ONGOING` | +| 43 | [Table Management Service](./rfc-43/rfc-43.md) | `IN PROGRESS` | +| 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `COMPLETED` | +| 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `COMPLETED` | +| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `ONGOING` | +| 47 | [Add Call Produce Command for Spark SQL](./rfc-47/rfc-47.md) | `COMPLETED` | +| 48 | [LogCompaction for MOR tables](./rfc-48/rfc-48.md) | `ONGOING` | +| 49 | [Support sync with DataHub](./rfc-49/rfc-49.md) | `COMPLETED` | +| 50 | [Improve Timeline Server](./rfc-50/rfc-50.md) | `IN PROGRESS` | +| 51 | [Change Data Capture](./rfc-51/rfc-51.md) | `ONGOING` | +| 52 | [Introduce Secondary Index to Improve HUDI Query Performance](./rfc-52/rfc-52.md) | `ONGOING` | +| 53 | [Use Lock-Free Message Queue Improving Hoodie Writing Efficiency](./rfc-53/rfc-53.md) | `COMPLETED` | +| 54 | [New Table APIs and Streamline Hudi Configs](./rfc-54/rfc-54.md) | `UNDER REVIEW` | +| 55 | [Improve Hive/Meta sync class design and hierarchies](./rfc-55/rfc-55.md) | `COMPLETED` | +| 56 | [Early Conflict Detection For Multi-Writer](./rfc-56/rfc-56.md) | `COMPLETED` | +| 57 | [DeltaStreamer Protobuf Support](./rfc-57/rfc-57.md) | `COMPLETED` | +| 58 | [Integrate column stats index with all query engines](./rfc-58/rfc-58.md) | `UNDER REVIEW` | +| 59 | [Multiple event_time Fields Latest Verification in a Single Table](./rfc-59/rfc-59.md) | `UNDER REVIEW` | +| 60 | [Federated Storage Layer](./rfc-60/rfc-60.md) | `IN PROGRESS` | +| 61 | [Snapshot view management](./rfc-61/rfc-61.md) | `UNDER REVIEW` | +| 62 | [Diagnostic Reporter](./rfc-62/rfc-62.md) | `UNDER REVIEW` | +| 63 | [Functional Indexes](./rfc-63/rfc-63.md) | `UNDER REVIEW` | +| 64 | [New Hudi Table Spec API for Query Integrations](./rfc-64/rfc-64.md) | `UNDER REVIEW` | +| 65 | [Partition TTL Management](./rfc-65/rfc-65.md) | `UNDER REVIEW` | +| 66 | [Lockless Multi-Writer Support](./rfc-66/rfc-66.md) | `UNDER REVIEW` | +| 67 | [Hudi Bundle Standards](./rfc-67/rfc-67.md) | `UNDER REVIEW` | +| 68 | [A More Effective HoodieMergeHandler for COW Table with Parquet](./rfc-68/rfc-68.md) | `UNDER REVIEW` | +| 69 | [Hudi 1.x](./rfc-69/rfc-69.md) | `UNDER REVIEW` | +| 70 | [Hudi Reverse Streamer](./rfc/rfc-70/rfc-70.md) | `UNDER REVIEW` | +| 71 | [Enhance OCC conflict detection](./rfc/rfc-71/rfc-71.md) | `UNDER REVIEW` | +| 72 | [Redesign Hudi-Spark Integration](./rfc/rfc-72/rfc-72.md) | `ONGOING` | +| 73 | [Multi-Table Transactions](./rfc-73/rfc-73.md) | `UNDER REVIEW` | +| 74 | [`HoodieStorage`: Hudi Storage Abstraction and APIs](./rfc-74/rfc-74.md) | `UNDER REVIEW` | +| 75 | [Hudi-Native HFile Reader and Writer](./rfc-75/rfc-75.md) | `UNDER REVIEW` | +| 76 | [Auto Record key generation](./rfc-76/rfc-76.md) | `IN PROGRESS` | \ No newline at end of file From 50119d28644892c27bde2bce6cfff09904b0badc Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Tue, 19 Dec 2023 02:25:41 -0800 Subject: [PATCH 020/112] [HUDI-4699] Adding RFC for auto record key generation (#10365) --- rfc/rfc-76/rfc-76.md | 156 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 rfc/rfc-76/rfc-76.md diff --git a/rfc/rfc-76/rfc-76.md b/rfc/rfc-76/rfc-76.md new file mode 100644 index 000000000000..1ddc107b5ce7 --- /dev/null +++ b/rfc/rfc-76/rfc-76.md @@ -0,0 +1,156 @@ + +# RFC-76: [Auto record key generation] + +## Proposers + +- @nsivabalan + +## Approvers + - @yihua + - @codope + +## Status + +JIRA: https://issues.apache.org/jira/browse/HUDI-4699 + +> Please keep the status updated in `rfc/README.md`. + +## Abstract + +One of the prerequisites to create an Apache Hudi table is to configure record keys(a.k.a primary keys). Since Hudi’s +origin at Uber revolved around supporting mutable workloads at large scale, these were deemed mandatory. As we started +supporting myriad of use-cases and workloads, we realized that defining a record key may not be natural in all cases +like immutable workloads, log ingestion etc. So, this RFC aims at supporting Hudi tables without configuring record +keys by the users. + +## Background +At present ingesting data into Hudi has a few unavoidable prerequisites one of which is specifying record key configuration (with record key serving as primary key). Necessity to specify primary key is one of the core assumptions built into Hudi model centered around being able to update the target table efficiently. However, some types of data/workloads actually don't have a naturally present record key: for ex, when ingesting some kind of "logs" into Hudi there might be no unique identifier held in every record that could serve the purpose of being record key, while meeting global uniqueness requirements of the primary key. There could be other immutable workloads, where the user does not have much insights into the data schema, but prefers to ingest as Hudi table to do some aggregation down the line. In all such scenarios, we want to ensure Users are able to create Hudi table, while still providing for Hudi's core strength with clustering, table services, file size management, incremental queries etc. + +## Implementation + +### Requirements +Let’s take a look at the requirements we have in order to support generating record keys automatically. + +Auto-generated record keys have to provide for global uniqueness w/in the table, not just w/in the batch. +This is necessary to make sure we're able to support updating such tables. +Keys should be generated in a way that would allow for their efficient compression +This is necessary to make sure that auto-generated keys are not bringing substantial overhead (on storage and in handling) +Suggested approach should be compatible with all major execution environments (Spark, Flink, Kafka Connect, Java, etc) +Tables written using spark should be readable using flink, java and vice versa. + +### Synthetic Key +Efficient way to associate an opaque record with an identifying record key or identity value, that is independent of the record content itself, is to simply enumerate the records. +While enumeration itself doesn't present a challenge, we have to, however, make sure that our auto-generation approach is resilient in the case of present failures while persisting the dataset. Here our analysis will be focused squarely on Spark, but similar derivations could be replicated to other execution environments as well. + +Let's consider following scenario: while persisting the dataset, writing one of the files to Cloud Storage fails and Spark is unable to leverage previously cached state of the RDD (and therefore retry just the failing task) and instead it will now have to recompute the whole RDD chain (and create new files). +To provide for aforementioned requirement of the records obtaining globally unique synthetic keys either of the 2 following properties have to hold true: +Key generation has to be deterministic and reproducible (so that upon Spark retries we could be certain same records will be obtaining the identity value they did during previous pass) +Records have to be getting globally unique identity value every time (such that key collisions are simply impossible) +Note that, deterministic and reproducible identity value association is is only feasible for the incoming datasets represented as "determinate" RDDs. However, It's worth pointing out that other RDD classes (such as "unordered", "indeterminate") are very rare occurrences involving some inherent non-determinism (varying content, order, etc), and pose challenges in terms of their respective handling by Hudi even w/o auto-generation (for ex, for such RDDs Hudi can't provide for uniqueness guarantee even for "insert" operation in the presence of failures). +For achieving our goal of providing globally unique keys we're planning on relying on the following synthetic key format comprised of 2 components +(Reserved) Commit timestamp: Use reserved commit timestamp as prefix (to provide for global uniqueness of rows) +Row id: unique identifier of the row (record) w/in the provided batch +Combining them in a single string key as below +"${commit_timestamp}_${batch_row_id}" + +For row-id generation we plan to use a combination of “spark partition id” and a row Id (sequential Id generation) to generate unique identity value for every row w/in batch (this particular component is available in Spark out-of-the-box, but could be easily implemented for any parallel execution framework like Flink, etc) +Please note, that this setup is very similar to how currently _hoodie_commit_seqno is implemented. + +So, the final format is going to be: +"${commit_timestamp}_${spark partition id}, ${row Id}" + +### Auto generated record key encoding +Given that we have narrowed down the record key has to be an objective function of 3 values namely, commit time, spark partitionId and row Id, let’s discuss how we can go about generating the record keys or in other words, how we can encoding these to create the record keys. + +We have few options to go with to experiment: +- Original key format is a string in the format of "--". +- UUID6/7 key format is implemented by using code from https://github.com/f4b6a3/uuid-creator. +- Base64 encoded key format is a string encoded from a byte array which consists of: the lowest 5 bytes from instantTime (supporting millisecond level epoch), the lowest 3 bytes from partitionId (supporting 4 million # of partitions), and lowest 5 bytes from rowId (supporting 1 trillion # of records). Since the Base64 character may use more than one bytes to encode one byte in the array, the average row key size is higher than 13 ( 5 + 3 + 5) bytes in the file. +- Similarly, ASCII encoded key format does the similar algo as Base64 key; however, after generating the byte array, in order to present valid ASCII code, we distributes the 13 * 8 = 114 bits into 114/7 = 15 bytes, and encode it. + +Going back to one of our key requirements wrt auto record key generation is that, our record key generation should be storage optimized and compress well. It also implicitly means that, the time to encode and decode should also be taken into consideration along with the storage space occupied. + +#### Storage comparison + +Based on our experiments, here is the storage comparison across different key encodings. + +| Format | Uncompessed (bytes) : Size of record key column in a parquet file w/ 100k records | Compressed size (bytes) | Compression Ratio | Example | +|--------|---------|-----------|--------|-----| +|Original| 4000185 | 244373 | 11.1 |20230822185245820_8287654_2123456789 | +|UUID 6/7| 4000184 | 1451897 | 2.74 |1ee3d530-b118-61c8-9d92-1384d7a07f9b | +|Base64| 2400184 | 202095 |11.9 |zzwBAAAAAABqLPkJig== | +|ASCII| 1900185 | 176606 |10.8 |${f$A" | + + +### Runtime comparison to generate the record keys + +| Format | Avg runtime (ms) | Ratio compared to baseline (original format) | +|--------|-----------------|----------------------------------------------| +|Original| 0.00001 | 1 | +|UUID 6/7| 0.0001 | 10 | +|Base64| 0.004 | 400 | +|ASCII| 0.004 | 400 | + + +#### Analysis +Both uncompressed and compressed sizes of record key columns in UUID6/7 are much bigger than our original formats, which means we can discard them. +Compared with the base line format Original, Base64 and ASCII formats can produce better results based on the storage usage.Specifially, Base64 format can produce around 17% of storage reduction after Parquet compression, and ASCII can produce around 28% of reduction. However, to extract relevant bytes and do the bit distribution and encoding, Base64 and ASCII can definitely require more CPU powers during writings (400x). + +#### Consensus +So considering the storage size and runtimes across different encoding formats we will settle with the original format ie. "${commit_timestamp}_${spark partition id}, ${row Id}" for our auto record key generation. + +### Info about few dis-regarded approaches + +#### Why randomId generation may not work +It is natural to think why not we simplify further and generate something like "${commit_timestamp}_${RANDOM_NUMBER}”. While this could look very simple and easier to implement, this is not really deterministic. When a subset of spark tasks failed due to executor failure, if the spark dag is re-triggered, a slice of the input data might go through record key generation and if not for being deterministic, it could lead to data inconsistency issues. Because, down the line, our upsert partitioner (file packing) relies on the hash of the record keys. + +#### monotonically_increasing_id in spark +For the same reason quoted above, we can’t go w/ the ready to use id generation in spark, monotonically_increasing_id. In fact, we heard from one of the open source user they were using monotoically increasing id func to generate record keys before ingesting to hudi, and occasionally they could see some data consistency issues. It was very hard to reproduce and narrow down the issue. + +### Injecting Primary Keys into the Dataset +Auto-generated record keys could be injected at different stages: + +**Approach A**: Injecting prior to handling +Injecting into the incoming batch early on (before handing the batch off to the write-client) +**Pros** +Avoids the need to modify any existing Hudi code (assuming that the primary key is always present). Will work with any operation (insert/upserts/bulk-insert). + +**Cons** +Auto-generated key injection have to be replicated across every supported execution environment (Flink, Java, etc) + +**Approach B**: Injecting when writing to base file +Assign to a record when writing out into an actual file +**Pros** +Straightforward approach (similar to how seq-no is already implemented) +This path is shared across all execution environments making it compatible w/ all execution environments out of the box (OOB) +**Cons** +Requires special handling in Hudi code-base (though could be restricted to bulk-insert only) +Our upsert partitioner which packs/routes incoming records to write handles is dependent on the record key (hash or record key). So, if we were to take this approach, we have to introduce a new Upsert Partitioner. + +Since Approach A seems natural and does not seem a lot of heavy lifting to do, we will go with it. + +## Rollout/Adoption Plan + + - What impact (if any) will there be on existing users? + - If we are changing behavior how will we phase out the older behavior? + - If we need special migration tools, describe them here. + - When will we remove the existing behavior + +## Test Plan + +Describe in few sentences how the RFC will be tested. How will we know that the implementation works as expected? How will we know nothing broke?. \ No newline at end of file From 155a66c13de117c8e5b40733d5bdf5ccbf3ffd0e Mon Sep 17 00:00:00 2001 From: StreamingFlames <18889897088@163.com> Date: Mon, 26 Feb 2024 09:48:59 -0800 Subject: [PATCH 021/112] [HUDI-7190] Fix nested columns vectorized read for spark33+ legacy formats (#10265) * [HUDI-7190] Fix legacy parquet format nested columns vectorized read for spark3.3+ * Fix nested type implicit schema evolution * fix legacy format support batch read * Add exception messages when vectorized read nested type with type change --- .../LegacyHoodieParquetFileFormat.scala | 8 +- .../TestAvroSchemaResolutionSupport.scala | 120 +++++++++++++++--- .../spark/sql/hudi/TestInsertTable.scala | 37 ++++++ .../apache/spark/sql/hudi/TestSpark3DDL.scala | 9 +- ...Spark33LegacyHoodieParquetFileFormat.scala | 12 +- ...Spark34LegacyHoodieParquetFileFormat.scala | 19 +-- ...Spark35LegacyHoodieParquetFileFormat.scala | 19 +-- 7 files changed, 179 insertions(+), 45 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/LegacyHoodieParquetFileFormat.scala index 046640c11c1b..d579c9052a4b 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/LegacyHoodieParquetFileFormat.scala @@ -38,12 +38,8 @@ class LegacyHoodieParquetFileFormat extends ParquetFileFormat with SparkAdapterS override def toString: String = "Hoodie-Parquet" override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = { - if (HoodieSparkUtils.gteqSpark3_4) { - val conf = sparkSession.sessionState.conf - conf.parquetVectorizedReaderEnabled && schema.forall(_.dataType.isInstanceOf[AtomicType]) - } else { - super.supportBatch(sparkSession, schema) - } + sparkAdapter + .createLegacyHoodieParquetFileFormat(true).get.supportBatch(sparkSession, schema) } override def buildReaderWithPartitionValues(sparkSession: SparkSession, diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroSchemaResolutionSupport.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroSchemaResolutionSupport.scala index a8f7c3c10ee1..503cbe64d82d 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroSchemaResolutionSupport.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroSchemaResolutionSupport.scala @@ -23,8 +23,10 @@ import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.exception.SchemaCompatibilityException import org.apache.hudi.testutils.HoodieClientTestBase -import org.apache.spark.sql.types._ + +import org.apache.spark.SparkException import org.apache.spark.sql.{DataFrame, Row, SparkSession} +import org.apache.spark.sql.types._ import org.junit.jupiter.api.{AfterEach, BeforeEach} import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.{CsvSource, ValueSource} @@ -382,11 +384,13 @@ class TestAvroSchemaResolutionSupport extends HoodieClientTestBase with ScalaAss // upsert upsertData(df2, tempRecordPath, isCow) - // read out the table - val readDf = spark.read.format("hudi").load(tempRecordPath) - readDf.printSchema() - readDf.show(false) - readDf.foreach(_ => {}) + withSQLConf("spark.sql.parquet.enableNestedColumnVectorizedReader" -> "false") { + // read out the table + val readDf = spark.read.format("hudi").load(tempRecordPath) + readDf.printSchema() + readDf.show(false) + readDf.foreach(_ => {}) + } } @ParameterizedTest @@ -474,11 +478,13 @@ class TestAvroSchemaResolutionSupport extends HoodieClientTestBase with ScalaAss // upsert upsertData(df2, tempRecordPath, isCow) - // read out the table - val readDf = spark.read.format("hudi").load(tempRecordPath) - readDf.printSchema() - readDf.show(false) - readDf.foreach(_ => {}) + withSQLConf("spark.sql.parquet.enableNestedColumnVectorizedReader" -> "false") { + // read out the table + val readDf = spark.read.format("hudi").load(tempRecordPath) + readDf.printSchema() + readDf.show(false) + readDf.foreach(_ => {}) + } } @ParameterizedTest @@ -536,11 +542,13 @@ class TestAvroSchemaResolutionSupport extends HoodieClientTestBase with ScalaAss // upsert upsertData(df2, tempRecordPath, isCow) - // read out the table - val readDf = spark.read.format("hudi").load(tempRecordPath) - readDf.printSchema() - readDf.show(false) - readDf.foreach(_ => {}) + withSQLConf("spark.sql.parquet.enableNestedColumnVectorizedReader" -> "false") { + // read out the table + val readDf = spark.read.format("hudi").load(tempRecordPath) + readDf.printSchema() + readDf.show(false) + readDf.foreach(_ => {}) + } } @ParameterizedTest @@ -808,4 +816,84 @@ class TestAvroSchemaResolutionSupport extends HoodieClientTestBase with ScalaAss readDf.show(false) readDf.foreach(_ => {}) } + + @ParameterizedTest + @ValueSource(booleans = Array(true, false)) + def testNestedTypeVectorizedReadWithTypeChange(isCow: Boolean): Unit = { + // test to change the value type of a MAP in a column of ARRAY< MAP > type + val tempRecordPath = basePath + "/record_tbl/" + val arrayMapData = Seq( + Row(1, 100, List(Map("2022-12-01" -> 120), Map("2022-12-02" -> 130)), "aaa") + ) + val arrayMapSchema = new StructType() + .add("id", IntegerType) + .add("userid", IntegerType) + .add("salesMap", ArrayType( + new MapType(StringType, IntegerType, true))) + .add("name", StringType) + val df1 = spark.createDataFrame(spark.sparkContext.parallelize(arrayMapData), arrayMapSchema) + df1.printSchema() + df1.show(false) + + // recreate table + initialiseTable(df1, tempRecordPath, isCow) + + // read out the table, will not throw any exception + readTable(tempRecordPath) + + // change value type from integer to long + val newArrayMapData = Seq( + Row(2, 200, List(Map("2022-12-01" -> 220L), Map("2022-12-02" -> 230L)), "bbb") + ) + val newArrayMapSchema = new StructType() + .add("id", IntegerType) + .add("userid", IntegerType) + .add("salesMap", ArrayType( + new MapType(StringType, LongType, true))) + .add("name", StringType) + val df2 = spark.createDataFrame(spark.sparkContext.parallelize(newArrayMapData), newArrayMapSchema) + df2.printSchema() + df2.show(false) + // upsert + upsertData(df2, tempRecordPath, isCow) + + // after implicit type change, read the table with vectorized read enabled + if (HoodieSparkUtils.gteqSpark3_3) { + assertThrows(classOf[SparkException]){ + withSQLConf("spark.sql.parquet.enableNestedColumnVectorizedReader" -> "true") { + readTable(tempRecordPath) + } + } + } + + withSQLConf("spark.sql.parquet.enableNestedColumnVectorizedReader" -> "false") { + readTable(tempRecordPath) + } + } + + + private def readTable(path: String): Unit = { + // read out the table + val readDf = spark.read.format("hudi").load(path) + readDf.printSchema() + readDf.show(false) + readDf.foreach(_ => {}) + } + + protected def withSQLConf[T](pairs: (String, String)*)(f: => T): T = { + val conf = spark.sessionState.conf + val currentValues = pairs.unzip._1.map { k => + if (conf.contains(k)) { + Some(conf.getConfString(k)) + } else None + } + pairs.foreach { case (k, v) => conf.setConfString(k, v) } + try f finally { + pairs.unzip._1.zip(currentValues).foreach { + case (key, Some(value)) => conf.setConfString(key, value) + case (key, None) => conf.unsetConf(key) + } + } + } + } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala index 16215fe485c7..e7324a1354fe 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala @@ -2081,6 +2081,43 @@ class TestInsertTable extends HoodieSparkSqlTestBase { }) } + test("Test vectorized read nested columns for LegacyHoodieParquetFileFormat") { + withSQLConf( + "hoodie.datasource.read.use.new.parquet.file.format" -> "false", + "hoodie.file.group.reader.enabled" -> "false", + "spark.sql.parquet.enableNestedColumnVectorizedReader" -> "true", + "spark.sql.parquet.enableVectorizedReader" -> "true") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | name string, + | attributes map, + | price double, + | ts long, + | dt string + |) using hudi + | tblproperties (primaryKey = 'id') + | partitioned by (dt) + | location '${tmp.getCanonicalPath}' + """.stripMargin) + spark.sql( + s""" + | insert into $tableName values + | (1, 'a1', map('color', 'red', 'size', 'M'), 10, 1000, '2021-01-05'), + | (2, 'a2', map('color', 'blue', 'size', 'L'), 20, 2000, '2021-01-06'), + | (3, 'a3', map('color', 'green', 'size', 'S'), 30, 3000, '2021-01-07') + """.stripMargin) + // Check the inserted records with map type attributes + checkAnswer(s"select id, name, price, ts, dt from $tableName where attributes.color = 'red'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + } + } + } + def ingestAndValidateDataNoPrecombine(tableType: String, tableName: String, tmp: File, expectedOperationtype: WriteOperationType, setOptions: List[String] = List.empty) : Unit = { diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala index 6ca1a72edcdb..6a64c69021c8 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestSpark3DDL.scala @@ -544,12 +544,12 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase { test("Test alter column with complex schema") { withRecordType()(withTempDir { tmp => - Seq("mor").foreach { tableType => + withSQLConf(s"$SPARK_SQL_INSERT_INTO_OPERATION" -> "upsert", + "hoodie.schema.on.read.enable" -> "true", + "spark.sql.parquet.enableNestedColumnVectorizedReader" -> "false") { val tableName = generateTableName val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}" if (HoodieSparkUtils.gteqSpark3_1) { - spark.sql("set hoodie.schema.on.read.enable=true") - spark.sql("set " + SPARK_SQL_INSERT_INTO_OPERATION.key + "=upsert") spark.sql( s""" |create table $tableName ( @@ -561,7 +561,7 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase { |) using hudi | location '$tablePath' | options ( - | type = '$tableType', + | type = 'mor', | primaryKey = 'id', | preCombineField = 'ts' | ) @@ -628,7 +628,6 @@ class TestSpark3DDL extends HoodieSparkSqlTestBase { ) } } - spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key) }) } diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala index 3b53b753b69d..3176668dab64 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark33LegacyHoodieParquetFileFormat.scala @@ -50,6 +50,8 @@ import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{AtomicType, DataType, StructField, StructType} import org.apache.spark.util.SerializableConfiguration +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` + import java.net.URI /** @@ -121,8 +123,7 @@ class Spark33LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu val sqlConf = sparkSession.sessionState.conf val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled val enableVectorizedReader: Boolean = - sqlConf.parquetVectorizedReaderEnabled && - resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + ParquetUtils.isBatchReadSupportedForSchema(sqlConf, resultSchema) val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion val capacity = sqlConf.parquetVectorizedReaderBatchSize @@ -243,6 +244,13 @@ class Spark33LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu implicitTypeChangeInfo } + if (enableVectorizedReader && shouldUseInternalSchema && + !typeChangeInfos.values().forall(_.getLeft.isInstanceOf[AtomicType])) { + throw new IllegalArgumentException( + "Nested types with type changes(implicit or explicit) cannot be read in vectorized mode. " + + "To workaround this issue, set spark.sql.parquet.enableVectorizedReader=false.") + } + val hadoopAttemptContext = new TaskAttemptContextImpl(hadoopAttemptConf, attemptId) diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala index cd76ce6f3b2e..a1cfbb96212b 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark34LegacyHoodieParquetFileFormat.scala @@ -47,6 +47,9 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{AtomicType, DataType, StructField, StructType} import org.apache.spark.util.SerializableConfiguration + +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` + /** * This class is an extension of [[ParquetFileFormat]] overriding Spark-specific behavior * that's not possible to customize in any other way @@ -59,11 +62,6 @@ import org.apache.spark.util.SerializableConfiguration */ class Spark34LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValues: Boolean) extends ParquetFileFormat { - override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = { - val conf = sparkSession.sessionState.conf - conf.parquetVectorizedReaderEnabled && schema.forall(_.dataType.isInstanceOf[AtomicType]) - } - def supportsColumnar(sparkSession: SparkSession, schema: StructType): Boolean = { val conf = sparkSession.sessionState.conf // Only output columnar if there is WSCG to read it. @@ -133,9 +131,7 @@ class Spark34LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu val resultSchema = StructType(partitionSchema.fields ++ requiredSchema.fields) val sqlConf = sparkSession.sessionState.conf val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled - val enableVectorizedReader: Boolean = - sqlConf.parquetVectorizedReaderEnabled && - resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + val enableVectorizedReader: Boolean = supportBatch(sparkSession, resultSchema) val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion val capacity = sqlConf.parquetVectorizedReaderBatchSize @@ -259,6 +255,13 @@ class Spark34LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu implicitTypeChangeInfo } + if (enableVectorizedReader && shouldUseInternalSchema && + !typeChangeInfos.values().forall(_.getLeft.isInstanceOf[AtomicType])) { + throw new IllegalArgumentException( + "Nested types with type changes(implicit or explicit) cannot be read in vectorized mode. " + + "To workaround this issue, set spark.sql.parquet.enableVectorizedReader=false.") + } + val hadoopAttemptContext = new TaskAttemptContextImpl(hadoopAttemptConf, attemptId) diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala index dd70aa08b856..b6177b942fcf 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala @@ -48,6 +48,9 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{AtomicType, DataType, StructField, StructType} import org.apache.spark.util.SerializableConfiguration + +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` + /** * This class is an extension of [[ParquetFileFormat]] overriding Spark-specific behavior * that's not possible to customize in any other way @@ -60,11 +63,6 @@ import org.apache.spark.util.SerializableConfiguration */ class Spark35LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValues: Boolean) extends ParquetFileFormat { - override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = { - val conf = sparkSession.sessionState.conf - conf.parquetVectorizedReaderEnabled && schema.forall(_.dataType.isInstanceOf[AtomicType]) - } - def supportsColumnar(sparkSession: SparkSession, schema: StructType): Boolean = { val conf = sparkSession.sessionState.conf // Only output columnar if there is WSCG to read it. @@ -134,9 +132,7 @@ class Spark35LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu val resultSchema = StructType(partitionSchema.fields ++ requiredSchema.fields) val sqlConf = sparkSession.sessionState.conf val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled - val enableVectorizedReader: Boolean = - sqlConf.parquetVectorizedReaderEnabled && - resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + val enableVectorizedReader: Boolean = supportBatch(sparkSession, resultSchema) val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion val capacity = sqlConf.parquetVectorizedReaderBatchSize @@ -260,6 +256,13 @@ class Spark35LegacyHoodieParquetFileFormat(private val shouldAppendPartitionValu implicitTypeChangeInfo } + if (enableVectorizedReader && shouldUseInternalSchema && + !typeChangeInfos.values().forall(_.getLeft.isInstanceOf[AtomicType])) { + throw new IllegalArgumentException( + "Nested types with type changes(implicit or explicit) cannot be read in vectorized mode. " + + "To workaround this issue, set spark.sql.parquet.enableVectorizedReader=false.") + } + val hadoopAttemptContext = new TaskAttemptContextImpl(hadoopAttemptConf, attemptId) From e1625b1d91f24b2fde5e9f84451c1791993623cd Mon Sep 17 00:00:00 2001 From: leixin <1403342953@qq.com> Date: Thu, 21 Dec 2023 10:07:54 +0800 Subject: [PATCH 022/112] [HUDI-7213] When using wrong tabe.type value in hudi catalog happends npe (#10300) --- .../hudi/table/catalog/TableOptionProperties.java | 12 +++++++++++- .../hudi/table/catalog/TestHoodieHiveCatalog.java | 10 ++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java index 6e327bdc6120..8f3e88417bef 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java @@ -24,6 +24,7 @@ import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.exception.HoodieValidationException; import org.apache.hudi.sync.common.util.SparkDataSourceTableUtils; import org.apache.hudi.util.AvroSchemaConverter; @@ -189,7 +190,16 @@ public static Map translateFlinkTableProperties2Spark( return properties.entrySet().stream() .filter(e -> KEY_MAPPING.containsKey(e.getKey()) && !catalogTable.getOptions().containsKey(KEY_MAPPING.get(e.getKey()))) .collect(Collectors.toMap(e -> KEY_MAPPING.get(e.getKey()), - e -> e.getKey().equalsIgnoreCase(FlinkOptions.TABLE_TYPE.key()) ? VALUE_MAPPING.get(e.getValue()) : e.getValue())); + e -> { + if (e.getKey().equalsIgnoreCase(FlinkOptions.TABLE_TYPE.key())) { + String sparkTableType = VALUE_MAPPING.get(e.getValue()); + if (sparkTableType == null) { + throw new HoodieValidationException(String.format("%s's value is invalid", e.getKey())); + } + return sparkTableType; + } + return e.getValue(); + })); } private static RowType supplementMetaFields(RowType rowType, boolean withOperationField) { diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java index af1549498ed0..8af557c4b649 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java @@ -270,6 +270,16 @@ public void testCreateNonHoodieTable() throws TableAlreadyExistException, Databa } } + @Test + public void testCreateHoodieTableWithWrongTableType() { + HashMap properties = new HashMap<>(); + properties.put(FactoryUtil.CONNECTOR.key(), "hudi"); + properties.put("table.type","wrong type"); + CatalogTable table = + new CatalogTableImpl(schema, properties, "hudi table"); + assertThrows(HoodieCatalogException.class, () -> hoodieCatalog.createTable(tablePath, table, false)); + } + @ParameterizedTest @ValueSource(booleans = {true, false}) public void testDropTable(boolean external) throws TableAlreadyExistException, DatabaseNotExistException, TableNotExistException, IOException { From a8ef9d40206fa0f4e581654b60c0d7ce57f5330b Mon Sep 17 00:00:00 2001 From: Jinpeng Date: Thu, 21 Dec 2023 18:48:04 -0800 Subject: [PATCH 023/112] [HUDI-7242] Avoid unnecessary bigquery table update when using sync tool (#10374) Co-authored-by: jp0317 --- .../hudi/gcp/bigquery/BigQuerySyncTool.java | 2 +- .../bigquery/HoodieBigQuerySyncClient.java | 12 ++++--- .../TestHoodieBigQuerySyncClient.java | 35 +++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java index 4ddd153c43f2..6e064dd59c68 100644 --- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java +++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/BigQuerySyncTool.java @@ -117,7 +117,7 @@ public void syncHoodieTable() { private boolean tableExists(HoodieBigQuerySyncClient bqSyncClient, String tableName) { if (bqSyncClient.tableExists(tableName)) { - LOG.info(tableName + " already exists"); + LOG.info(tableName + " already exists. Skip table creation."); return true; } return false; diff --git a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java index af56194214df..5a23a4079ae2 100644 --- a/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java +++ b/hudi-gcp/src/main/java/org/apache/hudi/gcp/bigquery/HoodieBigQuerySyncClient.java @@ -47,6 +47,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; @@ -182,16 +183,19 @@ public void updateTableSchema(String tableName, Schema schema, List part Table existingTable = bigquery.getTable(TableId.of(projectId, datasetName, tableName)); ExternalTableDefinition definition = existingTable.getDefinition(); Schema remoteTableSchema = definition.getSchema(); - // Add the partition fields into the schema to avoid conflicts while updating - List updatedTableFields = remoteTableSchema.getFields().stream() + List finalTableFields = new ArrayList<>(schema.getFields()); + // Add the partition fields into the schema to avoid conflicts while updating. And ensure the partition fields are at the end to + // avoid unnecessary updates. + List bqPartitionFields = remoteTableSchema.getFields().stream() .filter(field -> partitionFields.contains(field.getName())) .collect(Collectors.toList()); - updatedTableFields.addAll(schema.getFields()); - Schema finalSchema = Schema.of(updatedTableFields); + finalTableFields.addAll(bqPartitionFields); + Schema finalSchema = Schema.of(finalTableFields); boolean sameSchema = definition.getSchema() != null && definition.getSchema().equals(finalSchema); boolean samePartitionFilter = partitionFields.isEmpty() || (requirePartitionFilter == (definition.getHivePartitioningOptions().getRequirePartitionFilter() != null && definition.getHivePartitioningOptions().getRequirePartitionFilter())); if (sameSchema && samePartitionFilter) { + LOG.info("No table update is needed."); return; // No need to update schema. } ExternalTableDefinition.Builder builder = definition.toBuilder(); diff --git a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java index 37b2800b563d..a3cae4c985a1 100644 --- a/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java +++ b/hudi-gcp/src/test/java/org/apache/hudi/gcp/bigquery/TestHoodieBigQuerySyncClient.java @@ -25,13 +25,16 @@ import org.apache.hudi.sync.common.HoodieSyncConfig; import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.ExternalTableDefinition; import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.HivePartitioningOptions; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobInfo; import com.google.cloud.bigquery.JobStatus; import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.Schema; import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.Table; import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -39,12 +42,17 @@ import org.junit.jupiter.api.io.TempDir; import org.mockito.ArgumentCaptor; +import java.util.ArrayList; import java.nio.file.Path; +import java.util.List; import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.when; +import static org.mockito.Mockito.verify; public class TestHoodieBigQuerySyncClient { private static final String PROJECT_ID = "test_project"; @@ -125,4 +133,31 @@ void createTableWithManifestFile_nonPartitioned() throws Exception { String.format("CREATE OR REPLACE EXTERNAL TABLE `%s.%s.%s` ( `field` STRING ) OPTIONS (enable_list_inference=true, uris=[\"%s\"], format=\"PARQUET\", " + "file_set_spec_type=\"NEW_LINE_DELIMITED_MANIFEST\")", PROJECT_ID, TEST_DATASET, TEST_TABLE, MANIFEST_FILE_URI)); } + + @Test + void skipUpdatingSchema_partitioned() throws Exception { + BigQuerySyncConfig config = new BigQuerySyncConfig(properties); + client = new HoodieBigQuerySyncClient(config, mockBigQuery); + Table mockTable = mock(Table.class); + ExternalTableDefinition mockTableDefinition = mock(ExternalTableDefinition.class); + // The table schema has no change: it contains a "field" and a "partition_field". + Schema schema = Schema.of(Field.of("field", StandardSQLTypeName.STRING)); + List partitionFields = new ArrayList(); + partitionFields.add("partition_field"); + List bqFields = new ArrayList(); + // The "partition_field" always follows "field". + bqFields.add(Field.of("field", StandardSQLTypeName.STRING)); + bqFields.add(Field.of("partition_field", StandardSQLTypeName.STRING)); + Schema bqSchema = Schema.of(bqFields); + HivePartitioningOptions hivePartitioningOptions = HivePartitioningOptions.newBuilder().setRequirePartitionFilter(true).build(); + + when(mockBigQuery.getTable(any())).thenReturn(mockTable); + when(mockTable.getDefinition()).thenReturn(mockTableDefinition); + when(mockTableDefinition.getSchema()).thenReturn(bqSchema); + when(mockTableDefinition.getHivePartitioningOptions()).thenReturn(hivePartitioningOptions); + + client.updateTableSchema(TEST_TABLE, schema, partitionFields); + // Expect no update. + verify(mockBigQuery, never()).update(mockTable); + } } From 353d281e19ba009fd42705e21592b109b64ac85e Mon Sep 17 00:00:00 2001 From: zhuanshenbsj1 <34104400+zhuanshenbsj1@users.noreply.github.com> Date: Sat, 23 Dec 2023 10:44:32 +0800 Subject: [PATCH 024/112] [MINOR] Merge logs into check instant file of HoodieActiveTimeline.transitionPendingState (#10392) --- .../hudi/common/table/timeline/HoodieActiveTimeline.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java index 7f247b622d6a..7ba5205c5fc2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java @@ -614,8 +614,8 @@ protected void transitionState(HoodieInstant fromInstant, HoodieInstant toInstan } } else { // Ensures old state exists in timeline - LOG.info("Checking for file exists ?" + getInstantFileNamePath(fromInstant.getFileName())); - ValidationUtils.checkArgument(metaClient.getFs().exists(getInstantFileNamePath(fromInstant.getFileName()))); + ValidationUtils.checkArgument(metaClient.getFs().exists(getInstantFileNamePath(fromInstant.getFileName())), + "File " + getInstantFileNamePath(fromInstant.getFileName()) + " does not exist!"); // Use Write Once to create Target File if (allowRedundantTransitions) { FileIOUtils.createFileInPath(metaClient.getFs(), getInstantFileNamePath(toInstant.getFileName()), data); From 5faefcd01fa894c9d8845d96cc0f07ca4cfa7968 Mon Sep 17 00:00:00 2001 From: xuzifu666 Date: Wed, 27 Dec 2023 13:13:31 +0800 Subject: [PATCH 025/112] [MINOR] DataStream need in closeure in FileSystemBasedLockProvider (#10411) Co-authored-by: xuyu <11161569@vivo.com> --- .../transaction/lock/FileSystemBasedLockProvider.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java index da7e71a20580..1d32620b043a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java @@ -163,12 +163,10 @@ private boolean checkIfExpired() { } private void acquireLock() { - try { + try (FSDataOutputStream fos = fs.create(this.lockFile, false)) { if (!fs.exists(this.lockFile)) { - FSDataOutputStream fos = fs.create(this.lockFile, false); initLockInfo(); fos.writeBytes(lockInfo.toString()); - fos.close(); } } catch (IOException e) { throw new HoodieIOException(generateLogStatement(LockState.FAILED_TO_ACQUIRE), e); @@ -182,11 +180,9 @@ public void initLockInfo() { } public void reloadCurrentOwnerLockInfo() { - try { + try (FSDataInputStream fis = fs.open(this.lockFile)) { if (fs.exists(this.lockFile)) { - FSDataInputStream fis = fs.open(this.lockFile); this.currentOwnerLockInfo = FileIOUtils.readAsUTFString(fis); - fis.close(); } else { this.currentOwnerLockInfo = ""; } From 1be74478d9c9476d80c4bff44b96dd0170310d03 Mon Sep 17 00:00:00 2001 From: hehuiyuan <471627698@qq.com> Date: Thu, 28 Dec 2023 15:19:48 +0800 Subject: [PATCH 026/112] [HUDI-7249] Disable mor compaction scheduling when using append mode (#10388) --- .../src/main/java/org/apache/hudi/table/HoodieTableSink.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java index 94676e6208e2..d6ea0f5dabe9 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java @@ -96,6 +96,8 @@ public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { // Append mode if (OptionsResolver.isAppendMode(conf)) { + // close compaction for append mode + conf.set(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, false); DataStream pipeline = Pipelines.append(conf, rowType, dataStream); if (OptionsResolver.needsAsyncClustering(conf)) { return Pipelines.cluster(conf, rowType, pipeline); From 94a162a4059230f56a786cda4b69c0eae60c008c Mon Sep 17 00:00:00 2001 From: xuzifu666 Date: Mon, 1 Jan 2024 13:14:59 +0800 Subject: [PATCH 027/112] [HUDI-7268] HoodieFlinkStreamer should disable compaction in pipeline with append mode (#10430) Co-authored-by: xuyu <11161569@vivo.com> --- .../main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java index 62d22869f64e..b95fe954a36f 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/HoodieFlinkStreamer.java @@ -103,6 +103,8 @@ public static void main(String[] args) throws Exception { DataStream pipeline; // Append mode if (OptionsResolver.isAppendMode(conf)) { + // append mode should not compaction operator + conf.set(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, false); pipeline = Pipelines.append(conf, rowType, dataStream); if (OptionsResolver.needsAsyncClustering(conf)) { Pipelines.cluster(conf, rowType, pipeline); From acace8f799fac08b70fd6e8f9070aec8e79bc9e2 Mon Sep 17 00:00:00 2001 From: empcl <1515827454@qq.com> Date: Tue, 2 Jan 2024 09:05:01 +0800 Subject: [PATCH 028/112] [HUDI-7260] Fix call repair_overwrite_hoodie_props failure error due to specify hoodie.properties path (#10413) --- .../RepairOverwriteHoodiePropsProcedure.scala | 12 ++++++++- .../hudi/procedure/TestRepairsProcedure.scala | 27 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala index 81a09e147a73..51bafb5e201a 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala @@ -17,7 +17,9 @@ package org.apache.spark.sql.hudi.command.procedures +import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient} import org.apache.spark.internal.Logging @@ -47,6 +49,14 @@ class RepairOverwriteHoodiePropsProcedure extends BaseProcedure with ProcedureBu def outputType: StructType = OUTPUT_TYPE + def loadNewProps(filePath: String, props: Properties):Unit = { + val fs = FSUtils.getFs(filePath, new Configuration()) + val fis = fs.open(new Path(filePath)) + props.load(fis) + + fis.close() + } + override def call(args: ProcedureArgs): Seq[Row] = { super.checkArgs(PARAMETERS, args) @@ -57,7 +67,7 @@ class RepairOverwriteHoodiePropsProcedure extends BaseProcedure with ProcedureBu val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(tablePath).build var newProps = new Properties - newProps.load(new FileInputStream(overwriteFilePath)) + loadNewProps(overwriteFilePath, newProps) val oldProps = metaClient.getTableConfig.propsMap val metaPathDir = new Path(tablePath, METAFOLDER_NAME) HoodieTableConfig.create(metaClient.getFs, metaPathDir, newProps) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala index eaf977e82d1d..7d3c269f8ad4 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala @@ -34,7 +34,9 @@ import org.junit.jupiter.api.Assertions.assertEquals import java.io.IOException import java.net.URL import java.nio.file.{Files, Paths} +import java.util.Properties import scala.collection.JavaConverters.asScalaIteratorConverter +import scala.jdk.CollectionConverters.asScalaSetConverter class TestRepairsProcedure extends HoodieSparkProcedureTestBase { @@ -106,6 +108,22 @@ class TestRepairsProcedure extends HoodieSparkProcedureTestBase { | preCombineField = 'ts' | ) """.stripMargin) + + val filePath = s"""$tablePath/.hoodie/hoodie.properties""" + val fs = FSUtils.getFs(filePath, new Configuration()) + val fis = fs.open(new Path(filePath)) + val prevProps = new Properties + prevProps.load(fis) + fis.close() + + // write props to a file + val curPropPath = s"""${tmp.getCanonicalPath}/tmp/hoodie.properties""" + val path = new Path(curPropPath) + val out = fs.create(path) + prevProps.store(out, "hudi properties") + out.close() + fs.close() + // create commit instant val newProps: URL = this.getClass.getClassLoader.getResource("table-config.properties") @@ -140,6 +158,15 @@ class TestRepairsProcedure extends HoodieSparkProcedureTestBase { .mkString("\n") assertEquals(expectedOutput, actual) + + spark.sql(s"""call repair_overwrite_hoodie_props(table => '$tableName', new_props_file_path => '${curPropPath}')""") + val config = HoodieTableMetaClient.builder().setBasePath(tablePath).setConf(new Configuration()).build().getTableConfig + val props = config.getProps + assertEquals(prevProps.size(), props.size()) + props.entrySet().asScala.foreach((entry) => { + val key = entry.getKey.toString + assertEquals(entry.getValue, prevProps.getProperty(key)) + }) } } From 2601a0e104412207c8659bbe93f7470725f7ca55 Mon Sep 17 00:00:00 2001 From: Dongsj <90449228+eric9204@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:23:07 +0800 Subject: [PATCH 029/112] [MINOR] Fix ArchivalUtils Logger named (#10436) Co-authored-by: dongsj --- .../main/java/org/apache/hudi/client/utils/ArchivalUtils.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/ArchivalUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/ArchivalUtils.java index 1ef85f5ae358..3a6d2509ad9b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/ArchivalUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/ArchivalUtils.java @@ -20,7 +20,6 @@ package org.apache.hudi.client.utils; -import org.apache.hudi.client.HoodieTimelineArchiver; import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; @@ -51,7 +50,7 @@ */ public class ArchivalUtils { - private static final Logger LOG = LoggerFactory.getLogger(HoodieTimelineArchiver.class); + private static final Logger LOG = LoggerFactory.getLogger(ArchivalUtils.class); /** * getMinAndMaxInstantsToKeep is used by archival service to find the From 595d23029d3a109e34d0e359eb9a1119e7bb0244 Mon Sep 17 00:00:00 2001 From: harshal Date: Thu, 4 Jan 2024 12:59:16 +0530 Subject: [PATCH 030/112] [HUDI-7198] Create nested node path if does not exist for zookeeper. (#10438) --- .../lock/ZookeeperBasedLockProvider.java | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java index 31b92dcf914e..4299a603ece9 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/ZookeeperBasedLockProvider.java @@ -31,6 +31,7 @@ import org.apache.curator.framework.recipes.locks.InterProcessMutex; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.hadoop.conf.Configuration; +import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,8 +75,48 @@ public ZookeeperBasedLockProvider(final LockConfiguration lockConfiguration, fin .connectionTimeoutMs(lockConfiguration.getConfig().getInteger(ZK_CONNECTION_TIMEOUT_MS_PROP_KEY, DEFAULT_ZK_CONNECTION_TIMEOUT_MS)) .build(); this.curatorFrameworkClient.start(); + createPathIfNotExists(); } + private String getLockPath() { + return lockConfiguration.getConfig().getString(ZK_BASE_PATH_PROP_KEY) + "/" + + this.lockConfiguration.getConfig().getString(ZK_LOCK_KEY_PROP_KEY); + } + + private void createPathIfNotExists() { + try { + String lockPath = getLockPath(); + LOG.info(String.format("Creating zookeeper path %s if not exists", lockPath)); + String[] parts = lockPath.split("/"); + StringBuilder currentPath = new StringBuilder(); + for (String part : parts) { + if (!part.isEmpty()) { + currentPath.append("/").append(part); + createNodeIfNotExists(currentPath.toString()); + } + } + } catch (Exception e) { + LOG.error("Failed to create ZooKeeper path: " + e.getMessage()); + throw new HoodieLockException("Failed to initialize ZooKeeper path", e); + } + } + + private void createNodeIfNotExists(String path) throws Exception { + if (this.curatorFrameworkClient.checkExists().forPath(path) == null) { + try { + this.curatorFrameworkClient.create().forPath(path); + // to avoid failure due to synchronous calls. + } catch (KeeperException e) { + if (e.code() == KeeperException.Code.NODEEXISTS) { + LOG.debug(String.format("Node already exist for path = %s", path)); + } else { + throw new HoodieLockException("Failed to create zookeeper node", e); + } + } + } + } + + // Only used for testing public ZookeeperBasedLockProvider( final LockConfiguration lockConfiguration, final CuratorFramework curatorFrameworkClient) { @@ -85,6 +126,7 @@ public ZookeeperBasedLockProvider( synchronized (this.curatorFrameworkClient) { if (this.curatorFrameworkClient.getState() != CuratorFrameworkState.STARTED) { this.curatorFrameworkClient.start(); + createPathIfNotExists(); } } } From 37ff8fee231dcd5327b7d2c712b41aee16e0b67f Mon Sep 17 00:00:00 2001 From: leixin <1403342953@qq.com> Date: Fri, 5 Jan 2024 10:44:05 +0800 Subject: [PATCH 031/112] [HUDI-7271] Copy a conf in ClusteringOperator to avoid configuration leak (#10441) Co-authored-by: leixin1 --- .../org/apache/hudi/sink/clustering/ClusteringOperator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java index 75e63d69b5fd..415b1024cfdc 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java @@ -128,7 +128,8 @@ public class ClusteringOperator extends TableStreamOperator Date: Thu, 4 Jan 2024 21:36:41 -0800 Subject: [PATCH 032/112] [MINOR] Updating doap file for 0.14.1 release (#10439) --- doap_HUDI.rdf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doap_HUDI.rdf b/doap_HUDI.rdf index 9a5eb593a3fc..015dab0bfb45 100644 --- a/doap_HUDI.rdf +++ b/doap_HUDI.rdf @@ -131,6 +131,11 @@ 2023-09-28 0.14.0 + + Apache Hudi 0.14.1 + 2024-01-04 + 0.14.1 + From 60b073fea4c031ac2a36434e32538f5afcc7fd4c Mon Sep 17 00:00:00 2001 From: leixin <1403342953@qq.com> Date: Sun, 7 Jan 2024 16:58:28 +0800 Subject: [PATCH 033/112] [HUDI-7266] Add clustering metric for flink (#10420) --- .../hudi/metrics/FlinkClusteringMetrics.java | 105 ++++++++++++++++++ .../sink/clustering/ClusteringCommitSink.java | 12 ++ .../sink/clustering/ClusteringOperator.java | 14 +++ .../clustering/ClusteringPlanOperator.java | 22 +++- .../sink/utils/ClusteringFunctionWrapper.java | 6 + 5 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/metrics/FlinkClusteringMetrics.java diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/metrics/FlinkClusteringMetrics.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/metrics/FlinkClusteringMetrics.java new file mode 100644 index 000000000000..081c8f79a73f --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/metrics/FlinkClusteringMetrics.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.metrics; + +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.sink.clustering.ClusteringOperator; +import org.apache.hudi.sink.clustering.ClusteringPlanOperator; + +import org.apache.flink.metrics.MetricGroup; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.ParseException; +import java.time.Duration; +import java.time.Instant; + +/** + * Metrics for flink clustering. + */ +public class FlinkClusteringMetrics extends FlinkWriteMetrics { + + private static final Logger LOG = LoggerFactory.getLogger(FlinkClusteringMetrics.class); + + /** + * Key for clustering timer. + */ + private static final String CLUSTERING_KEY = "clustering"; + + /** + * Number of pending clustering instants. + * + * @see ClusteringPlanOperator + */ + private long pendingClusteringCount; + + /** + * Duration between the earliest pending clustering instant time and now in seconds. + * + * @see ClusteringPlanOperator + */ + private long clusteringDelay; + + /** + * Cost for consuming a clustering operation in milliseconds. + * + * @see ClusteringOperator + */ + private long clusteringCost; + + public FlinkClusteringMetrics(MetricGroup metricGroup) { + super(metricGroup, CLUSTERING_KEY); + } + + @Override + public void registerMetrics() { + super.registerMetrics(); + metricGroup.gauge(getMetricsName(actionType, "pendingClusteringCount"), () -> pendingClusteringCount); + metricGroup.gauge(getMetricsName(actionType, "clusteringDelay"), () -> clusteringDelay); + metricGroup.gauge(getMetricsName(actionType, "clusteringCost"), () -> clusteringCost); + } + + public void setPendingClusteringCount(long pendingClusteringCount) { + this.pendingClusteringCount = pendingClusteringCount; + } + + public void setFirstPendingClusteringInstant(Option firstPendingClusteringInstant) { + try { + if (!firstPendingClusteringInstant.isPresent()) { + this.clusteringDelay = 0L; + } else { + Instant start = HoodieInstantTimeGenerator.parseDateFromInstantTime((firstPendingClusteringInstant.get()).getTimestamp()).toInstant(); + this.clusteringDelay = Duration.between(start, Instant.now()).getSeconds(); + } + } catch (ParseException e) { + LOG.warn("Invalid input clustering instant" + firstPendingClusteringInstant); + } + } + + public void startClustering() { + startTimer(CLUSTERING_KEY); + } + + public void endClustering() { + this.clusteringCost = stopTimer(CLUSTERING_KEY); + } + +} diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringCommitSink.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringCommitSink.java index 93b6d4fbf951..75f025687e47 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringCommitSink.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringCommitSink.java @@ -35,6 +35,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.exception.HoodieClusteringException; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.metrics.FlinkClusteringMetrics; import org.apache.hudi.sink.CleanFunction; import org.apache.hudi.table.HoodieFlinkTable; import org.apache.hudi.table.action.HoodieWriteMetadata; @@ -42,6 +43,7 @@ import org.apache.hudi.util.FlinkWriteClients; import org.apache.flink.configuration.Configuration; +import org.apache.flink.metrics.MetricGroup; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -88,6 +90,8 @@ public class ClusteringCommitSink extends CleanFunction { */ private transient Map clusteringPlanCache; + private transient FlinkClusteringMetrics clusteringMetrics; + public ClusteringCommitSink(Configuration conf) { super(conf); this.conf = conf; @@ -102,6 +106,7 @@ public void open(Configuration parameters) throws Exception { this.commitBuffer = new HashMap<>(); this.clusteringPlanCache = new HashMap<>(); this.table = writeClient.getHoodieTable(); + registerMetrics(); } @Override @@ -194,6 +199,7 @@ private void doCommit(String instant, HoodieClusteringPlan clusteringPlan, Colle this.writeClient.completeTableService( TableServiceType.CLUSTER, writeMetadata.getCommitMetadata().get(), table, instant, Option.of(HoodieListData.lazy(writeMetadata.getWriteStatuses()))); + clusteringMetrics.updateCommitMetrics(instant, writeMetadata.getCommitMetadata().get()); // whether to clean up the input base parquet files used for clustering if (!conf.getBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED) && !isCleaning) { LOG.info("Running inline clean"); @@ -229,4 +235,10 @@ private static Map> getPartitionToReplacedFileIds( .filter(fg -> !newFilesWritten.contains(fg)) .collect(Collectors.groupingBy(HoodieFileGroupId::getPartitionPath, Collectors.mapping(HoodieFileGroupId::getFileId, Collectors.toList()))); } + + private void registerMetrics() { + MetricGroup metrics = getRuntimeContext().getMetricGroup(); + clusteringMetrics = new FlinkClusteringMetrics(metrics); + clusteringMetrics.registerMetrics(); + } } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java index 415b1024cfdc..6aa5dd9acbac 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java @@ -44,6 +44,7 @@ import org.apache.hudi.io.storage.HoodieAvroFileReader; import org.apache.hudi.io.storage.HoodieFileReader; import org.apache.hudi.io.storage.HoodieFileReaderFactory; +import org.apache.hudi.metrics.FlinkClusteringMetrics; import org.apache.hudi.sink.bulk.BulkInsertWriterHelper; import org.apache.hudi.sink.bulk.sort.SortOperatorGen; import org.apache.hudi.sink.utils.NonThrownExecutor; @@ -58,6 +59,7 @@ import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.configuration.Configuration; import org.apache.flink.metrics.Gauge; +import org.apache.flink.metrics.MetricGroup; import org.apache.flink.runtime.memory.MemoryManager; import org.apache.flink.streaming.api.graph.StreamConfig; import org.apache.flink.streaming.api.operators.BoundedOneInput; @@ -127,6 +129,8 @@ public class ClusteringOperator extends TableStreamOperator(output); + + registerMetrics(); } @Override @@ -213,6 +219,7 @@ public void endInput() { // ------------------------------------------------------------------------- private void doClustering(String instantTime, List clusteringOperations) throws Exception { + clusteringMetrics.startClustering(); BulkInsertWriterHelper writerHelper = new BulkInsertWriterHelper(this.conf, this.table, this.writeConfig, instantTime, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), getRuntimeContext().getAttemptNumber(), this.rowType, true); @@ -247,6 +254,7 @@ instantTime, this.taskID, getRuntimeContext().getNumberOfParallelSubtasks(), get } List writeStatuses = writerHelper.getWriteStatuses(this.taskID); + clusteringMetrics.endClustering(); collector.collect(new ClusteringCommitEvent(instantTime, getFileIds(clusteringOperations), writeStatuses, this.taskID)); writerHelper.close(); } @@ -388,4 +396,10 @@ public void setExecutor(NonThrownExecutor executor) { public void setOutput(Output> output) { this.output = output; } + + private void registerMetrics() { + MetricGroup metrics = getRuntimeContext().getMetricGroup(); + clusteringMetrics = new FlinkClusteringMetrics(metrics); + clusteringMetrics.registerMetrics(); + } } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java index 48b2a9becd43..c16f8ed70801 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.util.ClusteringUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.metrics.FlinkClusteringMetrics; import org.apache.hudi.table.HoodieFlinkTable; import org.apache.hudi.util.ClusteringUtil; import org.apache.hudi.util.FlinkTables; @@ -33,11 +34,14 @@ import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.configuration.Configuration; +import org.apache.flink.metrics.MetricGroup; import org.apache.flink.streaming.api.operators.AbstractStreamOperator; import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import java.util.List; + /** * Operator that generates the clustering plan with pluggable strategies on finished checkpoints. * @@ -57,6 +61,8 @@ public class ClusteringPlanOperator extends AbstractStreamOperator table, long checkpointId) { + List pendingClusteringInstantTimes = + ClusteringUtils.getPendingClusteringInstantTimes(table.getMetaClient()); // the first instant takes the highest priority. Option firstRequested = Option.fromJavaOptional( - ClusteringUtils.getPendingClusteringInstantTimes(table.getMetaClient()).stream() + pendingClusteringInstantTimes.stream() .filter(instant -> instant.getState() == HoodieInstant.State.REQUESTED).findFirst()); + + // record metrics + clusteringMetrics.setFirstPendingClusteringInstant(firstRequested); + clusteringMetrics.setPendingClusteringCount(pendingClusteringInstantTimes.size()); + if (!firstRequested.isPresent()) { // do nothing. LOG.info("No clustering plan for checkpoint " + checkpointId); @@ -136,4 +150,10 @@ private void scheduleClustering(HoodieFlinkTable table, long checkpointId) { public void setOutput(Output> output) { this.output = output; } + + private void registerMetrics() { + MetricGroup metrics = getRuntimeContext().getMetricGroup(); + clusteringMetrics = new FlinkClusteringMetrics(metrics); + clusteringMetrics.registerMetrics(); + } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/ClusteringFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/ClusteringFunctionWrapper.java index e3b75cbf6379..252a48350699 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/ClusteringFunctionWrapper.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/ClusteringFunctionWrapper.java @@ -55,6 +55,10 @@ public class ClusteringFunctionWrapper { * Function that generates the {@code HoodieClusteringPlan}. */ private ClusteringPlanOperator clusteringPlanOperator; + /** + * Output to collect the clustering plan events. + */ + private CollectorOutput planEventOutput; /** * Output to collect the clustering commit events. */ @@ -83,6 +87,8 @@ public ClusteringFunctionWrapper(Configuration conf, StreamTask streamTask public void openFunction() throws Exception { clusteringPlanOperator = new ClusteringPlanOperator(conf); + planEventOutput = new CollectorOutput<>(); + clusteringPlanOperator.setup(streamTask, streamConfig, planEventOutput); clusteringPlanOperator.open(); clusteringOperator = new ClusteringOperator(conf, TestConfigurations.ROW_TYPE); From 6ffc817a1e90ea4425bf33af50a4dc4e1c52882f Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Mon, 8 Jan 2024 13:23:17 -0500 Subject: [PATCH 034/112] [MINOR] Disable flaky test (#10449) Co-authored-by: Jonathan Vexler <=> --- .../scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala index 38221cc05c7e..599e8ae97080 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala @@ -40,7 +40,7 @@ import org.apache.spark.sql.functions.{expr, lit} import org.apache.spark.sql.hudi.HoodieSparkSessionExtension import org.apache.spark.sql.hudi.command.SqlKeyGenerator import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotNull, assertNull, assertTrue, fail} -import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} +import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test} import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.Arguments.arguments import org.junit.jupiter.params.provider._ @@ -1341,8 +1341,9 @@ def testBulkInsertForDropPartitionColumn(): Unit = { /* * Test case for instant is generated with commit timezone when TIMELINE_TIMEZONE set to UTC * related to HUDI-5978 + * Issue [HUDI-7275] is tracking this test being disabled */ - @Test + @Disabled def testInsertDatasetWithTimelineTimezoneUTC(): Unit = { val defaultTimezone = TimeZone.getDefault try { From ef1ccce6774bde6673d6714e07e4bd9a0a903bed Mon Sep 17 00:00:00 2001 From: kongwei Date: Wed, 10 Jan 2024 10:49:12 +0800 Subject: [PATCH 035/112] [HUDI-7279] make sampling rate configurable for BOUNDED_IN_MEMORY executor type (#10459) * make sampling rate configurable for BOUNDED_IN_MEMORY executor type * add sinceVersion for new configs --------- Co-authored-by: wei.kong --- .../apache/hudi/config/HoodieWriteConfig.java | 32 +++++++++++++++++++ .../org/apache/hudi/util/ExecutorFactory.java | 4 +-- .../util/queue/BoundedInMemoryExecutor.java | 14 ++++++++ .../util/queue/BoundedInMemoryQueue.java | 28 +++++++++++++--- 4 files changed, 71 insertions(+), 7 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index be16c3e4cb9e..a964ceef958d 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -347,6 +347,20 @@ public class HoodieWriteConfig extends HoodieConfig { .markAdvanced() .withDocumentation("Size of in-memory buffer used for parallelizing network reads and lake storage writes."); + public static final ConfigProperty WRITE_BUFFER_RECORD_SAMPLING_RATE = ConfigProperty + .key("hoodie.write.buffer.record.sampling.rate") + .defaultValue(String.valueOf(64)) + .markAdvanced() + .sinceVersion("1.0.0") + .withDocumentation("Sampling rate of in-memory buffer used to estimate object size. Higher value lead to lower CPU usage."); + + public static final ConfigProperty WRITE_BUFFER_RECORD_CACHE_LIMIT = ConfigProperty + .key("hoodie.write.buffer.record.cache.limit") + .defaultValue(String.valueOf(128 * 1024)) + .markAdvanced() + .sinceVersion("1.0.0") + .withDocumentation("Maximum queue size of in-memory buffer for parallelizing network reads and lake storage writes."); + public static final ConfigProperty WRITE_EXECUTOR_DISRUPTOR_BUFFER_LIMIT_BYTES = ConfigProperty .key("hoodie.write.executor.disruptor.buffer.limit.bytes") .defaultValue(String.valueOf(1024)) @@ -1322,6 +1336,14 @@ public int getWriteBufferLimitBytes() { return Integer.parseInt(getStringOrDefault(WRITE_BUFFER_LIMIT_BYTES_VALUE)); } + public int getWriteBufferRecordSamplingRate() { + return Integer.parseInt(getStringOrDefault(WRITE_BUFFER_RECORD_SAMPLING_RATE)); + } + + public int getWriteBufferRecordCacheLimit() { + return Integer.parseInt(getStringOrDefault(WRITE_BUFFER_RECORD_CACHE_LIMIT)); + } + public String getWriteExecutorDisruptorWaitStrategy() { return getStringOrDefault(WRITE_EXECUTOR_DISRUPTOR_WAIT_STRATEGY); } @@ -2751,6 +2773,16 @@ public Builder withWriteBufferLimitBytes(int writeBufferLimit) { return this; } + public Builder withWriteBufferRecordSamplingRate(int recordSamplingRate) { + writeConfig.setValue(WRITE_BUFFER_RECORD_SAMPLING_RATE, String.valueOf(recordSamplingRate)); + return this; + } + + public Builder withWriteBufferRecordCacheLimit(int recordCacheLimit) { + writeConfig.setValue(WRITE_BUFFER_RECORD_CACHE_LIMIT, String.valueOf(recordCacheLimit)); + return this; + } + public Builder withWriteExecutorDisruptorWaitStrategy(String waitStrategy) { writeConfig.setValue(WRITE_EXECUTOR_DISRUPTOR_WAIT_STRATEGY, String.valueOf(waitStrategy)); return this; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/util/ExecutorFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/util/ExecutorFactory.java index 49e83733adf0..79bdcfe80d46 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/util/ExecutorFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/util/ExecutorFactory.java @@ -48,8 +48,8 @@ public static HoodieExecutor create(HoodieWriteConfig config, ExecutorType executorType = config.getExecutorType(); switch (executorType) { case BOUNDED_IN_MEMORY: - return new BoundedInMemoryExecutor<>(config.getWriteBufferLimitBytes(), inputItr, consumer, - transformFunction, preExecuteRunnable); + return new BoundedInMemoryExecutor<>(config.getWriteBufferLimitBytes(), config.getWriteBufferRecordSamplingRate(), config.getWriteBufferRecordCacheLimit(), + inputItr, consumer, transformFunction, preExecuteRunnable); case DISRUPTOR: return new DisruptorExecutor<>(config.getWriteExecutorDisruptorWriteBufferLimitBytes(), inputItr, consumer, transformFunction, config.getWriteExecutorDisruptorWaitStrategy(), preExecuteRunnable); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java index 5741aeffd406..70728be031bd 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java @@ -46,12 +46,26 @@ public BoundedInMemoryExecutor(final long bufferLimitInBytes, final Iterator Option.of(consumer), transformFunction, new DefaultSizeEstimator<>(), preExecuteRunnable); } + public BoundedInMemoryExecutor(final long bufferLimitInBytes, int recordSamplingRate, int recordCacheLimit, final Iterator inputItr, + HoodieConsumer consumer, Function transformFunction, Runnable preExecuteRunnable) { + this(bufferLimitInBytes, recordSamplingRate, recordCacheLimit, Collections.singletonList(new IteratorBasedQueueProducer<>(inputItr)), + Option.of(consumer), transformFunction, new DefaultSizeEstimator<>(), preExecuteRunnable); + } + public BoundedInMemoryExecutor(final long bufferLimitInBytes, List> producers, Option> consumer, final Function transformFunction, final SizeEstimator sizeEstimator, Runnable preExecuteRunnable) { super(producers, consumer, new BoundedInMemoryQueue<>(bufferLimitInBytes, transformFunction, sizeEstimator), preExecuteRunnable); } + public BoundedInMemoryExecutor(final long bufferLimitInBytes, int recordSamplingRate, int recordCacheLimit, List> producers, + Option> consumer, final Function transformFunction, + final SizeEstimator sizeEstimator, Runnable preExecuteRunnable) { + super(producers, consumer, + new BoundedInMemoryQueue<>(bufferLimitInBytes, transformFunction, sizeEstimator, recordSamplingRate, recordCacheLimit), + preExecuteRunnable); + } + @Override protected void doConsume(HoodieMessageQueue queue, HoodieConsumer consumer) { LOG.info("Starting consumer, consuming records from the queue"); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java index e9d13b10dca2..fd9edfb0ef23 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryQueue.java @@ -68,7 +68,13 @@ public class BoundedInMemoryQueue implements HoodieMessageQueue, Ite */ public final Semaphore rateLimiter = new Semaphore(1); - /** Used for sampling records with "RECORD_SAMPLING_RATE" frequency. **/ + /** Sampling rate used to determine avg record size in bytes, Default is {@link #RECORD_SAMPLING_RATE} **/ + private final int recordSamplingRate; + + /** Maximum records can be cached, default is {@link #RECORD_CACHING_LIMIT} **/ + private final int recordCacheLimit; + + /** Used for sampling records with "recordSamplingRate" frequency. **/ public final AtomicLong samplingRecordCounter = new AtomicLong(-1); /** Internal queue for records. **/ @@ -120,19 +126,31 @@ public BoundedInMemoryQueue(final long memoryLimit, final Function transfo this(memoryLimit, transformFunction, new DefaultSizeEstimator() {}); } + public BoundedInMemoryQueue(final long memoryLimit, final Function transformFunction, + final SizeEstimator payloadSizeEstimator) { + this(memoryLimit, transformFunction, payloadSizeEstimator, RECORD_SAMPLING_RATE, RECORD_CACHING_LIMIT); + } + /** * Construct BoundedInMemoryQueue with passed in size estimator. * * @param memoryLimit MemoryLimit in bytes * @param transformFunction Transformer Function to convert input payload type to stored payload type * @param payloadSizeEstimator Payload Size Estimator + * @param recordSamplingRate record sampling rate + * @param recordCacheLimit record cache limit */ public BoundedInMemoryQueue(final long memoryLimit, final Function transformFunction, - final SizeEstimator payloadSizeEstimator) { + final SizeEstimator payloadSizeEstimator, + final int recordSamplingRate, + final int recordCacheLimit) { this.memoryLimit = memoryLimit; this.transformFunction = transformFunction; this.payloadSizeEstimator = payloadSizeEstimator; this.iterator = new QueueIterator(); + this.recordSamplingRate = recordSamplingRate; + this.recordCacheLimit = recordCacheLimit; + LOG.info("recordSamplingRate: {}, recordCacheLimit: {}", recordSamplingRate, recordCacheLimit); } @Override @@ -148,7 +166,7 @@ public long size() { * @param payload Payload to size */ private void adjustBufferSizeIfNeeded(final O payload) throws InterruptedException { - if (this.samplingRecordCounter.incrementAndGet() % RECORD_SAMPLING_RATE != 0) { + if (this.samplingRecordCounter.incrementAndGet() % recordSamplingRate != 0) { return; } @@ -156,7 +174,7 @@ private void adjustBufferSizeIfNeeded(final O payload) throws InterruptedExcepti final long newAvgRecordSizeInBytes = Math.max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1)); final int newRateLimit = - (int) Math.min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes)); + (int) Math.min(recordCacheLimit, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes)); // If there is any change in number of records to cache then we will either release (if it increased) or acquire // (if it decreased) to adjust rate limiting to newly computed value. @@ -267,7 +285,7 @@ public void markAsFailed(Throwable e) { this.hasFailed.set(e); // release the permits so that if the queueing thread is waiting for permits then it will // get it. - this.rateLimiter.release(RECORD_CACHING_LIMIT + 1); + this.rateLimiter.release(recordCacheLimit + 1); } @Override From fc587b374f939ab9ab1571c8fb456adc529312bd Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Wed, 10 Jan 2024 00:02:53 -0500 Subject: [PATCH 036/112] [HUDI-5973] Fixing refreshing of schemas in HoodieStreamer continuous mode (#10261) * Add cachedSchema per batch, fix idempotency with getSourceSchema calls --------- Co-authored-by: danielfordfc --- .../schema/FilebasedSchemaProvider.java | 29 +++++++++++---- .../hudi/utilities/schema/SchemaProvider.java | 5 +++ .../schema/SchemaRegistryProvider.java | 36 ++++++++++++++----- .../hudi/utilities/streamer/StreamSync.java | 5 ++- .../schema/TestSchemaRegistryProvider.java | 20 +++++++++++ 5 files changed, 79 insertions(+), 16 deletions(-) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java index 3ca97b01f95b..9dbf66325d7f 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java @@ -45,6 +45,11 @@ public class FilebasedSchemaProvider extends SchemaProvider { private final FileSystem fs; + private final String sourceFile; + private final String targetFile; + private final boolean shouldSanitize; + private final String invalidCharMask; + protected Schema sourceSchema; protected Schema targetSchema; @@ -52,18 +57,21 @@ public class FilebasedSchemaProvider extends SchemaProvider { public FilebasedSchemaProvider(TypedProperties props, JavaSparkContext jssc) { super(props, jssc); checkRequiredConfigProperties(props, Collections.singletonList(FilebasedSchemaProviderConfig.SOURCE_SCHEMA_FILE)); - String sourceFile = getStringWithAltKeys(props, FilebasedSchemaProviderConfig.SOURCE_SCHEMA_FILE); - boolean shouldSanitize = SanitizationUtils.shouldSanitize(props); - String invalidCharMask = SanitizationUtils.getInvalidCharMask(props); + this.sourceFile = getStringWithAltKeys(props, FilebasedSchemaProviderConfig.SOURCE_SCHEMA_FILE); + this.targetFile = getStringWithAltKeys(props, FilebasedSchemaProviderConfig.TARGET_SCHEMA_FILE, sourceFile); + this.shouldSanitize = SanitizationUtils.shouldSanitize(props); + this.invalidCharMask = SanitizationUtils.getInvalidCharMask(props); this.fs = FSUtils.getFs(sourceFile, jssc.hadoopConfiguration(), true); - this.sourceSchema = readAvroSchemaFromFile(sourceFile, this.fs, shouldSanitize, invalidCharMask); + this.sourceSchema = parseSchema(this.sourceFile); if (containsConfigProperty(props, FilebasedSchemaProviderConfig.TARGET_SCHEMA_FILE)) { - this.targetSchema = readAvroSchemaFromFile( - getStringWithAltKeys(props, FilebasedSchemaProviderConfig.TARGET_SCHEMA_FILE), - this.fs, shouldSanitize, invalidCharMask); + this.targetSchema = parseSchema(this.targetFile); } } + private Schema parseSchema(String schemaFile) { + return readAvroSchemaFromFile(schemaFile, this.fs, shouldSanitize, invalidCharMask); + } + @Override public Schema getSourceSchema() { return sourceSchema; @@ -87,4 +95,11 @@ private static Schema readAvroSchemaFromFile(String schemaPath, FileSystem fs, b } return SanitizationUtils.parseAvroSchema(schemaStr, sanitizeSchema, invalidCharMask); } + + // Per write batch, refresh the schemas from the file + @Override + public void refresh() { + this.sourceSchema = parseSchema(this.sourceFile); + this.targetSchema = parseSchema(this.targetFile); + } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java index 2410798d355c..5c8ca8f6c1be 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProvider.java @@ -56,4 +56,9 @@ public Schema getTargetSchema() { // by default, use source schema as target for hoodie table as well return getSourceSchema(); } + + //every schema provider has the ability to refresh itself, which will mean something different per provider. + public void refresh() { + + } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java index 0f65dd338d03..1c2e9181fd71 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java @@ -82,6 +82,12 @@ public static class Config { public static final String SSL_KEY_PASSWORD_PROP = "schema.registry.ssl.key.password"; } + protected Schema cachedSourceSchema; + protected Schema cachedTargetSchema; + + private final String srcSchemaRegistryUrl; + private final String targetSchemaRegistryUrl; + @FunctionalInterface public interface SchemaConverter { /** @@ -160,6 +166,8 @@ protected InputStream getStream(HttpURLConnection connection) throws IOException public SchemaRegistryProvider(TypedProperties props, JavaSparkContext jssc) { super(props, jssc); checkRequiredConfigProperties(props, Collections.singletonList(HoodieSchemaProviderConfig.SRC_SCHEMA_REGISTRY_URL)); + this.srcSchemaRegistryUrl = getStringWithAltKeys(config, HoodieSchemaProviderConfig.SRC_SCHEMA_REGISTRY_URL); + this.targetSchemaRegistryUrl = getStringWithAltKeys(config, HoodieSchemaProviderConfig.TARGET_SCHEMA_REGISTRY_URL, srcSchemaRegistryUrl); if (config.containsKey(Config.SSL_KEYSTORE_LOCATION_PROP) || config.containsKey(Config.SSL_TRUSTSTORE_LOCATION_PROP)) { setUpSSLStores(); @@ -191,30 +199,42 @@ private void setUpSSLStores() { @Override public Schema getSourceSchema() { - String registryUrl = getStringWithAltKeys(config, HoodieSchemaProviderConfig.SRC_SCHEMA_REGISTRY_URL); try { - return parseSchemaFromRegistry(registryUrl); + if (cachedSourceSchema == null) { + cachedSourceSchema = parseSchemaFromRegistry(this.srcSchemaRegistryUrl); + } + return cachedSourceSchema; } catch (Exception e) { throw new HoodieSchemaFetchException(String.format( "Error reading source schema from registry. Please check %s is configured correctly. Truncated URL: %s", Config.SRC_SCHEMA_REGISTRY_URL_PROP, - StringUtils.truncate(registryUrl, 10, 10)), e); + StringUtils.truncate(srcSchemaRegistryUrl, 10, 10)), e); } } @Override public Schema getTargetSchema() { - String registryUrl = getStringWithAltKeys(config, HoodieSchemaProviderConfig.SRC_SCHEMA_REGISTRY_URL); - String targetRegistryUrl = - getStringWithAltKeys(config, HoodieSchemaProviderConfig.TARGET_SCHEMA_REGISTRY_URL, registryUrl); try { - return parseSchemaFromRegistry(targetRegistryUrl); + if (cachedTargetSchema == null) { + cachedTargetSchema = parseSchemaFromRegistry(this.targetSchemaRegistryUrl); + } + return cachedTargetSchema; } catch (Exception e) { throw new HoodieSchemaFetchException(String.format( "Error reading target schema from registry. Please check %s is configured correctly. If that is not configured then check %s. Truncated URL: %s", Config.SRC_SCHEMA_REGISTRY_URL_PROP, Config.TARGET_SCHEMA_REGISTRY_URL_PROP, - StringUtils.truncate(targetRegistryUrl, 10, 10)), e); + StringUtils.truncate(targetSchemaRegistryUrl, 10, 10)), e); } } + + // Per SyncOnce call, the cachedschema for the provider is dropped and SourceSchema re-attained + // Subsequent calls to getSourceSchema within the write batch should be cached. + @Override + public void refresh() { + cachedSourceSchema = null; + cachedTargetSchema = null; + getSourceSchema(); + getTargetSchema(); + } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index e756602b1cdc..17a0ee2e3bfb 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -449,7 +449,10 @@ public Pair, JavaRDD> syncOnce() throws IOException result = writeToSinkAndDoMetaSync(instantTime, inputBatch, metrics, overallTimerContext); } - + // refresh schemas if need be before next batch + if (schemaProvider != null) { + schemaProvider.refresh(); + } metrics.updateStreamerSyncMetrics(System.currentTimeMillis()); return result; } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java index abbe983cbce6..397e72a0ec4a 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/schema/TestSchemaRegistryProvider.java @@ -133,4 +133,24 @@ public String convert(String schema) throws IOException { .toString(); } } + + // The SR is checked when cachedSchema is empty, when not empty, the cachedSchema is used. + @Test + public void testGetSourceSchemaUsesCachedSchema() throws IOException { + TypedProperties props = getProps(); + SchemaRegistryProvider spyUnderTest = getUnderTest(props); + + // Call when cachedSchema is empty + Schema actual = spyUnderTest.getSourceSchema(); + assertNotNull(actual); + verify(spyUnderTest, times(1)).parseSchemaFromRegistry(Mockito.any()); + + assert spyUnderTest.cachedSourceSchema != null; + + Schema actualTwo = spyUnderTest.getSourceSchema(); + + // cachedSchema should now be set, a subsequent call should not call parseSchemaFromRegistry + // Assuming this verify() has the scope of the whole test? so it should still be 1 from previous call? + verify(spyUnderTest, times(1)).parseSchemaFromRegistry(Mockito.any()); + } } From b712666384ea395dbe1ef5d7c4a817c8fa06c767 Mon Sep 17 00:00:00 2001 From: "Geser Dugarov, PhD" Date: Wed, 10 Jan 2024 23:52:36 +0700 Subject: [PATCH 037/112] [MINOR] Fix unit tests (#10362) --- .../org/apache/hudi/client/TestJavaHoodieBackedMetadata.java | 4 ++++ .../utilities/deltastreamer/HoodieDeltaStreamerTestBase.java | 5 +++++ .../utilities/deltastreamer/TestHoodieDeltaStreamer.java | 4 +++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java index bd2fde46cdf4..9f893df6d4e5 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java @@ -71,6 +71,7 @@ import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.util.HoodieTimer; +import org.apache.hudi.common.util.JsonUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.ExternalSpillableMap; @@ -494,6 +495,9 @@ public void testTableOperationsWithMetadataIndex(HoodieTableType tableType) thro .withMaxNumDeltaCommitsBeforeCompaction(12) // cannot restore to before the oldest compaction on MDT as there are no base files before that time .build()) .build(); + // module com.fasterxml.jackson.datatype:jackson-datatype-jsr310 is needed for proper column stats processing for Jackson >= 2.11 (Spark >= 3.3) + // Java 8 date/time type `java.time.LocalDate` is not supported by default + JsonUtils.registerModules(); init(tableType, writeConfig); testTableOperationsForMetaIndexImpl(writeConfig); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java index 80b6479f3189..d9bee058370a 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java @@ -248,6 +248,11 @@ public static void initClass() throws Exception { } @AfterAll + public static void tearDown() { + cleanupKafkaTestUtils(); + UtilitiesTestBase.cleanUpUtilitiesTestServices(); + } + public static void cleanupKafkaTestUtils() { if (testUtils != null) { testUtils.teardown(); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java index 60ed1b6732a5..8c2acac45cf1 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java @@ -2302,7 +2302,9 @@ public void testCsvDFSSourceNoHeaderWithoutSchemaProviderAndWithTransformer() th testCsvDFSSource(false, '\t', false, Collections.singletonList(TripsWithDistanceTransformer.class.getName())); }, "Should error out when doing the transformation."); LOG.debug("Expected error during transformation", e); - assertTrue(e.getMessage().contains("cannot resolve 'begin_lat' given input columns:")); + // first version for Spark >= 3.3, the second one is for Spark < 3.3 + assertTrue(e.getMessage().contains("Column 'begin_lat' does not exist. Did you mean one of the following?") + || e.getMessage().contains("cannot resolve 'begin_lat' given input columns:")); } @Test From d1dd4a4ebb2b09afdf3cd63993cd31afbe344c37 Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Wed, 10 Jan 2024 12:40:48 -0500 Subject: [PATCH 038/112] [HUDI-7284] Stream sync doesn't differentiate replace commits (#10467) Co-authored-by: Jonathan Vexler <=> --- .../table/timeline/HoodieDefaultTimeline.java | 26 +++++++++++++++++++ .../common/table/timeline/HoodieTimeline.java | 12 +++++++++ .../hudi/utilities/streamer/StreamSync.java | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java index 6c8d6b664a08..6bfdac00e778 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java @@ -26,6 +26,9 @@ import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.exception.HoodieException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.io.Serializable; import java.security.MessageDigest; @@ -50,6 +53,8 @@ */ public class HoodieDefaultTimeline implements HoodieTimeline { + private static final Logger LOG = LoggerFactory.getLogger(HoodieDefaultTimeline.class); + private static final long serialVersionUID = 1L; private static final String HASHING_ALGORITHM = "SHA-256"; @@ -492,6 +497,7 @@ public Option getFirstNonSavepointCommit() { return this.firstNonSavepointCommit; } + @Override public Option getLastClusterCommit() { return Option.fromJavaOptional(getCommitsTimeline().filter(s -> s.getAction().equalsIgnoreCase(HoodieTimeline.REPLACE_COMMIT_ACTION)) .getReverseOrderedInstants() @@ -500,6 +506,26 @@ public Option getLastClusterCommit() { HoodieCommitMetadata metadata = TimelineUtils.getCommitMetadata(i, this); return metadata.getOperationType().equals(WriteOperationType.CLUSTER); } catch (IOException e) { + LOG.warn("Unable to read commit metadata for " + i + " due to " + e.getMessage()); + return false; + } + }).findFirst()); + } + + @Override + public Option getLastPendingClusterCommit() { + return Option.fromJavaOptional(getCommitsTimeline().filter(s -> s.getAction().equalsIgnoreCase(HoodieTimeline.REPLACE_COMMIT_ACTION)) + .getReverseOrderedInstants() + .filter(i -> { + try { + if (!i.isCompleted()) { + HoodieCommitMetadata metadata = TimelineUtils.getCommitMetadata(i, this); + return metadata.getOperationType().equals(WriteOperationType.CLUSTER); + } else { + return false; + } + } catch (IOException e) { + LOG.warn("Unable to read commit metadata for " + i + " due to " + e.getMessage()); return false; } }).findFirst()); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java index a1e70c2e22e6..43c70cbc0003 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java @@ -395,6 +395,18 @@ public interface HoodieTimeline extends Serializable { */ Option getFirstNonSavepointCommit(); + /** + * get the most recent cluster commit if present + * + */ + public Option getLastClusterCommit(); + + /** + * get the most recent pending cluster commit if present + * + */ + public Option getLastPendingClusterCommit(); + /** * Read the completed instant details. */ diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index 17a0ee2e3bfb..35bdcb8e7dac 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -459,7 +459,7 @@ public Pair, JavaRDD> syncOnce() throws IOException private Option getLastPendingClusteringInstant(Option commitTimelineOpt) { if (commitTimelineOpt.isPresent()) { - Option pendingClusteringInstant = commitTimelineOpt.get().filterPendingReplaceTimeline().lastInstant(); + Option pendingClusteringInstant = commitTimelineOpt.get().getLastPendingClusterCommit(); return pendingClusteringInstant.isPresent() ? Option.of(pendingClusteringInstant.get().getTimestamp()) : Option.empty(); } return Option.empty(); From c0e59e95f579a819c46cb8c1541890498b9f06c8 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Thu, 11 Jan 2024 01:49:10 +0800 Subject: [PATCH 039/112] [HUDI-7241] Avoid always broadcast HUDI relation if not using HoodieSparkSessionExtension (#10373) * [HUDI-7241] Avoid always broadcast HUDI relation if not using HoodieSparkSessionExtension * Update the logical to check whether HoodieExtension is enabled --- .../scala/org/apache/hudi/HoodieFileIndex.scala | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala index 5416961872b2..f628527c8cd5 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala @@ -104,6 +104,11 @@ case class HoodieFileIndex(spark: SparkSession, */ @transient private lazy val recordLevelIndex = new RecordLevelIndexSupport(spark, metadataConfig, metaClient) + private val enableHoodieExtension = spark.sessionState.conf.getConfString("spark.sql.extensions", "") + .split(",") + .map(_.trim) + .contains("org.apache.spark.sql.hudi.HoodieSparkSessionExtension") + override def rootPaths: Seq[Path] = getQueryPaths.asScala var shouldEmbedFileSlices: Boolean = false @@ -400,7 +405,17 @@ case class HoodieFileIndex(spark: SparkSession, override def inputFiles: Array[String] = getAllFiles().map(_.getPath.toString).toArray - override def sizeInBytes: Long = getTotalCachedFilesSize + override def sizeInBytes: Long = { + val size = getTotalCachedFilesSize + if (size == 0 && !enableHoodieExtension) { + // Avoid always broadcast the hudi table if not enable HoodieExtension + logWarning("Note: Please add 'org.apache.spark.sql.hudi.HoodieSparkSessionExtension' to the Spark SQL configuration property " + + "'spark.sql.extensions'.\n Multiple extensions can be set using a comma-separated list.") + Long.MaxValue + } else { + size + } + } def hasPredicatesPushedDown: Boolean = hasPushedDownPartitionPredicates From 26df317e7788aa9dffcf4bec63e647b6baa3382b Mon Sep 17 00:00:00 2001 From: Tim Brown Date: Wed, 10 Jan 2024 10:20:17 -0800 Subject: [PATCH 040/112] [MINOR] Fix usages of orElse (#10435) --- .../client/BaseHoodieTableServiceClient.java | 5 +++-- .../hudi/client/BaseHoodieWriteClient.java | 2 +- .../hudi/client/utils/TransactionUtils.java | 2 +- .../org/apache/hudi/table/HoodieTable.java | 6 +++-- .../savepoint/SavepointActionExecutor.java | 2 +- .../client/HoodieFlinkTableServiceClient.java | 2 +- .../action/commit/JavaBulkInsertHelper.java | 2 +- .../MultipleSparkJobExecutionStrategy.java | 2 +- .../action/commit/SparkBulkInsertHelper.java | 2 +- ...rkInsertOverwriteCommitActionExecutor.java | 2 +- .../org/apache/hudi/AvroConversionUtils.scala | 22 ++++++++----------- .../apache/hudi/BaseHoodieTableFileIndex.java | 4 ++-- .../hudi/common/config/HoodieConfig.java | 2 +- .../log/AbstractHoodieLogRecordReader.java | 2 +- .../queue/BaseHoodieQueueBasedExecutor.java | 2 +- .../hudi/expression/PartialBindVisitor.java | 4 ++-- .../hudi/metadata/BaseTableMetadata.java | 2 +- .../metadata/HoodieBackedTableMetadata.java | 2 +- .../metadata/HoodieTableMetadataUtil.java | 4 ++-- .../index/SecondaryIndexManager.java | 2 +- .../HoodieCopyOnWriteTableInputFormat.java | 5 +++-- .../TestHoodieRealtimeRecordReader.java | 2 +- .../hudi/connect/utils/KafkaConnectUtils.java | 2 +- ...DatasetBulkInsertCommitActionExecutor.java | 2 +- .../hudi/cli/HDFSParquetImporterUtils.java | 2 +- .../service/handlers/FileSliceHandler.java | 4 ++-- .../service/handlers/TimelineHandler.java | 4 ++-- .../converter/JsonToAvroSchemaConverter.java | 8 +++---- .../hudi/utilities/sources/JsonDFSSource.java | 2 +- .../hudi/utilities/streamer/StreamSync.java | 6 ++--- .../transform/ChainedTransformer.java | 8 ++----- 31 files changed, 57 insertions(+), 61 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java index e4e6f79c5eb0..d3262ef91ca7 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java @@ -494,7 +494,7 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata, preCommit(metadata); } // Update table's metadata (table) - writeTableMetadata(table, clusteringInstant.getTimestamp(), metadata, writeStatuses.orElse(context.emptyHoodieData())); + writeTableMetadata(table, clusteringInstant.getTimestamp(), metadata, writeStatuses.orElseGet(context::emptyHoodieData)); LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata); @@ -1008,7 +1008,8 @@ private List getInstantsToRollbackForLazyCleanPolicy(HoodieTableMetaClie */ @Deprecated public boolean rollback(final String commitInstantTime, Option pendingRollbackInfo, boolean skipLocking) throws HoodieRollbackException { - final String rollbackInstantTime = pendingRollbackInfo.map(entry -> entry.getRollbackInstant().getTimestamp()).orElse(HoodieActiveTimeline.createNewInstantTime()); + final String rollbackInstantTime = pendingRollbackInfo.map(entry -> entry.getRollbackInstant().getTimestamp()) + .orElseGet(HoodieActiveTimeline::createNewInstantTime); return rollback(commitInstantTime, pendingRollbackInfo, rollbackInstantTime, skipLocking); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java index 37f3fe6d04a3..4a36b90ac2bf 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java @@ -297,7 +297,7 @@ private void saveInternalSchema(HoodieTable table, String instantTime, HoodieCom InternalSchema internalSchema; Schema avroSchema = HoodieAvroUtils.createHoodieWriteSchema(config.getSchema(), config.allowOperationMetadataField()); if (historySchemaStr.isEmpty()) { - internalSchema = SerDeHelper.fromJson(config.getInternalSchema()).orElse(AvroInternalSchemaConverter.convert(avroSchema)); + internalSchema = SerDeHelper.fromJson(config.getInternalSchema()).orElseGet(() -> AvroInternalSchemaConverter.convert(avroSchema)); internalSchema.setSchemaId(Long.parseLong(instantTime)); } else { internalSchema = InternalSchemaUtils.searchSchema(Long.parseLong(instantTime), diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java index d162fe28a62b..5f1ad9331ba8 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java @@ -79,7 +79,7 @@ public static Option resolveWriteConflictIfAny( table.getMetaClient(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant), completedInstantsDuringCurrentWriteOperation); - final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElse(new HoodieCommitMetadata())); + final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElseGet(HoodieCommitMetadata::new)); instantStream.forEach(instant -> { try { ConcurrentOperation otherOperation = new ConcurrentOperation(instant, table.getMetaClient()); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java index dfa464d8af8b..ab4777ad677a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java @@ -643,7 +643,8 @@ public void rollbackInflightClustering(HoodieInstant inflightInstant, private void rollbackInflightInstant(HoodieInstant inflightInstant, Function> getPendingRollbackInstantFunc) { final String commitTime = getPendingRollbackInstantFunc.apply(inflightInstant.getTimestamp()).map(entry - -> entry.getRollbackInstant().getTimestamp()).orElse(HoodieActiveTimeline.createNewInstantTime()); + -> entry.getRollbackInstant().getTimestamp()) + .orElseGet(HoodieActiveTimeline::createNewInstantTime); scheduleRollback(context, commitTime, inflightInstant, false, config.shouldRollbackUsingMarkers(), false); rollback(context, commitTime, inflightInstant, false, false); @@ -658,7 +659,8 @@ private void rollbackInflightInstant(HoodieInstant inflightInstant, */ public void rollbackInflightLogCompaction(HoodieInstant inflightInstant, Function> getPendingRollbackInstantFunc) { final String commitTime = getPendingRollbackInstantFunc.apply(inflightInstant.getTimestamp()).map(entry - -> entry.getRollbackInstant().getTimestamp()).orElse(HoodieActiveTimeline.createNewInstantTime()); + -> entry.getRollbackInstant().getTimestamp()) + .orElseGet(HoodieActiveTimeline::createNewInstantTime); scheduleRollback(context, commitTime, inflightInstant, false, config.shouldRollbackUsingMarkers(), false); rollback(context, commitTime, inflightInstant, true, false); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java index 29da31b478cb..1e0330a4defc 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java @@ -90,7 +90,7 @@ public HoodieSavepointMetadata execute() { } catch (IOException e) { throw new HoodieSavepointException("Failed to savepoint " + instantTime, e); } - }).orElse(table.getCompletedCommitsTimeline().firstInstant().get().getTimestamp()); + }).orElseGet(() -> table.getCompletedCommitsTimeline().firstInstant().get().getTimestamp()); // Cannot allow savepoint time on a commit that could have been cleaned ValidationUtils.checkArgument(HoodieTimeline.compareTimestamps(instantTime, HoodieTimeline.GREATER_THAN_OR_EQUALS, lastCommitRetained), diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java index 05e00cf1f181..79bbeecaa56d 100644 --- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java +++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkTableServiceClient.java @@ -133,7 +133,7 @@ protected void completeClustering( // commit to data table after committing to metadata table. // We take the lock here to ensure all writes to metadata table happens within a single lock (single writer). // Because more than one write to metadata table will result in conflicts since all of them updates the same partition. - writeTableMetadata(table, clusteringCommitTime, metadata, writeStatuses.orElse(context.emptyHoodieData())); + writeTableMetadata(table, clusteringCommitTime, metadata, writeStatuses.orElseGet(context::emptyHoodieData)); LOG.info("Committing Clustering {} finished with result {}.", clusteringCommitTime, metadata); table.getActiveTimeline().transitionReplaceInflightToComplete( diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java index 45010bdf230a..5503573656c6 100644 --- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java +++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java @@ -78,7 +78,7 @@ public HoodieWriteMetadata> bulkInsert(final List JavaBulkInsertInternalPartitionerFactory.get(config.getBulkInsertSortMode())); // write new files List writeStatuses = bulkInsert(inputRecords, instantTime, table, config, performDedupe, partitioner, false, diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java index 50d8c528594f..8a39dc79ff31 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java @@ -219,7 +219,7 @@ private BulkInsertPartitioner getPartitioner(Map strategy default: throw new UnsupportedOperationException(String.format("Layout optimization strategy '%s' is not supported", layoutOptStrategy)); } - }).orElse(isRowPartitioner + }).orElseGet(() -> isRowPartitioner ? BulkInsertInternalPartitionerWithRowsFactory.get(getWriteConfig(), getHoodieTable().isPartitioned(), true) : BulkInsertInternalPartitionerFactory.get(getHoodieTable(), getWriteConfig(), true)); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java index fc4b8bf10062..2f57f6bb18b6 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java @@ -74,7 +74,7 @@ public HoodieWriteMetadata> bulkInsert(final HoodieData< executor.getCommitActionType(), instantTime), Option.empty(), config.shouldAllowMultiWriteOnSameInstant()); - BulkInsertPartitioner partitioner = userDefinedBulkInsertPartitioner.orElse(BulkInsertInternalPartitionerFactory.get(table, config)); + BulkInsertPartitioner partitioner = userDefinedBulkInsertPartitioner.orElseGet(() -> BulkInsertInternalPartitionerFactory.get(table, config)); // Write new files HoodieData writeStatuses = diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java index 788e1040783f..ac84475bfa41 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwriteCommitActionExecutor.java @@ -71,7 +71,7 @@ public HoodieWriteMetadata> execute() { protected Partitioner getPartitioner(WorkloadProfile profile) { return table.getStorageLayout().layoutPartitionerClass() .map(c -> getLayoutPartitioner(profile, c)) - .orElse(new SparkInsertOverwritePartitioner(profile, context, table, config)); + .orElseGet(() -> new SparkInsertOverwritePartitioner(profile, context, table, config)); } @Override diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala index d84679eaf923..55877938f8cb 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala @@ -97,19 +97,15 @@ object AvroConversionUtils { * TODO convert directly from GenericRecord into InternalRow instead */ def createDataFrame(rdd: RDD[GenericRecord], schemaStr: String, ss: SparkSession): Dataset[Row] = { - if (rdd.isEmpty()) { - ss.emptyDataFrame - } else { - ss.createDataFrame(rdd.mapPartitions { records => - if (records.isEmpty) Iterator.empty - else { - val schema = new Schema.Parser().parse(schemaStr) - val dataType = convertAvroSchemaToStructType(schema) - val converter = createConverterToRow(schema, dataType) - records.map { r => converter(r) } - } - }, convertAvroSchemaToStructType(new Schema.Parser().parse(schemaStr))) - } + ss.createDataFrame(rdd.mapPartitions { records => + if (records.isEmpty) Iterator.empty + else { + val schema = new Schema.Parser().parse(schemaStr) + val dataType = convertAvroSchemaToStructType(schema) + val converter = createConverterToRow(schema, dataType) + records.map { r => converter(r) } + } + }, convertAvroSchemaToStructType(new Schema.Parser().parse(schemaStr))) } /** diff --git a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java index 824a94abab4b..bf7e25393c86 100644 --- a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java @@ -144,7 +144,7 @@ public BaseHoodieTableFileIndex(HoodieEngineContext engineContext, Option beginInstantTime, Option endInstantTime) { this.partitionColumns = metaClient.getTableConfig().getPartitionFields() - .orElse(new String[0]); + .orElseGet(() -> new String[0]); this.metadataConfig = HoodieMetadataConfig.newBuilder() .fromProperties(configProperties) @@ -284,7 +284,7 @@ private Map> loadFileSlicesForPartitions(List fileSystemView.getLatestMergedFileSlicesBeforeOrOn(partitionPath.path, queryInstant.get()) ) - .orElse(fileSystemView.getLatestFileSlices(partitionPath.path)) + .orElseGet(() -> fileSystemView.getLatestFileSlices(partitionPath.path)) .collect(Collectors.toList()) )); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java index 00b61f5b7db5..f21721391d26 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java @@ -160,7 +160,7 @@ public Integer getInt(ConfigProperty configProperty) { public Integer getIntOrDefault(ConfigProperty configProperty) { Option rawValue = getRawValue(configProperty); return rawValue.map(v -> Integer.parseInt(v.toString())) - .orElse(Integer.parseInt(configProperty.defaultValue().toString())); + .orElseGet(() -> Integer.parseInt(configProperty.defaultValue().toString())); } public Boolean getBoolean(ConfigProperty configProperty) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java index 3678efe78625..7cd6ea9cd237 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java @@ -968,7 +968,7 @@ private Pair, Schema> getRecordsIterator( .orElse(Function.identity()); Schema schema = schemaEvolutionTransformerOpt.map(Pair::getRight) - .orElse(dataBlock.getSchema()); + .orElseGet(dataBlock::getSchema); return Pair.of(new CloseableMappingIterator<>(blockRecordsIterator, transformer), schema); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java index 86011e865dc0..20b9c802f605 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BaseHoodieQueueBasedExecutor.java @@ -131,7 +131,7 @@ private CompletableFuture startConsumingAsync() { return (Void) null; }, consumerExecutorService) ) - .orElse(CompletableFuture.completedFuture(null)); + .orElseGet(() -> CompletableFuture.completedFuture(null)); } @Override diff --git a/hudi-common/src/main/java/org/apache/hudi/expression/PartialBindVisitor.java b/hudi-common/src/main/java/org/apache/hudi/expression/PartialBindVisitor.java index cece36291dff..5e86570d2917 100644 --- a/hudi-common/src/main/java/org/apache/hudi/expression/PartialBindVisitor.java +++ b/hudi-common/src/main/java/org/apache/hudi/expression/PartialBindVisitor.java @@ -108,14 +108,14 @@ public Expression visitPredicate(Predicate predicate) { Predicates.IsNull isNull = (Predicates.IsNull) predicate; return Option.ofNullable(isNull.child.accept(this)) .map(expr -> (Expression)Predicates.isNull(expr)) - .orElse(alwaysTrue()); + .orElseGet(this::alwaysTrue); } if (predicate instanceof Predicates.IsNotNull) { Predicates.IsNotNull isNotNull = (Predicates.IsNotNull) predicate; return Option.ofNullable(isNotNull.child.accept(this)) .map(expr -> (Expression)Predicates.isNotNull(expr)) - .orElse(alwaysTrue()); + .orElseGet(this::alwaysTrue); } if (predicate instanceof Predicates.StringStartsWith) { diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java index 1b7c2db2daa1..ccb0968b169c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java @@ -358,7 +358,7 @@ FileStatus[] fetchAllFilesInPartition(Path partitionPath) throws IOException { throw new HoodieIOException("Failed to extract file-statuses from the payload", e); } }) - .orElse(new FileStatus[0]); + .orElseGet(() -> new FileStatus[0]); LOG.info("Listed file in partition from metadata: partition=" + relativePartitionPath + ", #files=" + statuses.length); return statuses; diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java index d0ec7f020ab3..31ec9806a3a7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java @@ -574,7 +574,7 @@ public HoodieTableFileSystemView getMetadataFileSystemView() { public Map stats() { Set allMetadataPartitionPaths = Arrays.stream(MetadataPartitionType.values()).map(MetadataPartitionType::getPartitionPath).collect(Collectors.toSet()); - return metrics.map(m -> m.getStats(true, metadataMetaClient, this, allMetadataPartitionPaths)).orElse(new HashMap<>()); + return metrics.map(m -> m.getStats(true, metadataMetaClient, this, allMetadataPartitionPaths)).orElseGet(HashMap::new); } @Override diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index acb9dc46446c..78a2883513f2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -1000,7 +1000,7 @@ private static List getPartitionFileSlices(HoodieTableMetaClient meta Option fileSystemView, String partition, boolean mergeFileSlices) { - HoodieTableFileSystemView fsView = fileSystemView.orElse(getFileSystemView(metaClient)); + HoodieTableFileSystemView fsView = fileSystemView.orElseGet(() -> getFileSystemView(metaClient)); Stream fileSliceStream; if (mergeFileSlices) { if (metaClient.getActiveTimeline().filterCompletedInstants().lastInstant().isPresent()) { @@ -1026,7 +1026,7 @@ private static List getPartitionFileSlices(HoodieTableMetaClient meta public static List getPartitionLatestFileSlicesIncludingInflight(HoodieTableMetaClient metaClient, Option fileSystemView, String partition) { - HoodieTableFileSystemView fsView = fileSystemView.orElse(getFileSystemView(metaClient)); + HoodieTableFileSystemView fsView = fileSystemView.orElseGet(() -> getFileSystemView(metaClient)); Stream fileSliceStream = fsView.fetchLatestFileSlicesIncludingInflight(partition); return fileSliceStream .sorted(Comparator.comparing(FileSlice::getFileId)) diff --git a/hudi-common/src/main/java/org/apache/hudi/secondary/index/SecondaryIndexManager.java b/hudi-common/src/main/java/org/apache/hudi/secondary/index/SecondaryIndexManager.java index fbb65bc32104..bab92e8fab10 100644 --- a/hudi-common/src/main/java/org/apache/hudi/secondary/index/SecondaryIndexManager.java +++ b/hudi-common/src/main/java/org/apache/hudi/secondary/index/SecondaryIndexManager.java @@ -118,7 +118,7 @@ public void create( List newSecondaryIndexes = secondaryIndexes.map(h -> { h.add(secondaryIndexToAdd); return h; - }).orElse(Collections.singletonList(secondaryIndexToAdd)); + }).orElseGet(() -> Collections.singletonList(secondaryIndexToAdd)); newSecondaryIndexes.sort(new HoodieSecondaryIndex.HoodieIndexCompactor()); // Persistence secondary indexes' metadata to hoodie.properties file diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java index 75504cdd132d..27326b668fee 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java @@ -45,8 +45,11 @@ import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import javax.annotation.Nonnull; + import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -56,8 +59,6 @@ import java.util.Map; import java.util.Properties; import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import static org.apache.hudi.common.util.ValidationUtils.checkState; diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index 6753a0aa33c1..ceae7022fbfa 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -288,7 +288,7 @@ private File getLogTempFile(long startTime, long endTime, String diskType) { return Arrays.stream(new File("/tmp").listFiles()) .filter(f -> f.isDirectory() && f.getName().startsWith("hudi-" + diskType) && f.lastModified() > startTime && f.lastModified() < endTime) .findFirst() - .orElse(new File("")); + .orElseGet(() -> new File("")); } @Test diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java index 1e27b29ae2d5..cce507b9fca3 100644 --- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java +++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/utils/KafkaConnectUtils.java @@ -189,7 +189,7 @@ public static String getPartitionColumns(KeyGenerator keyGenerator, TypedPropert if (keyGenerator instanceof CustomAvroKeyGenerator) { return ((BaseKeyGenerator) keyGenerator).getPartitionPathFields().stream().map( pathField -> Arrays.stream(pathField.split(CustomAvroKeyGenerator.SPLIT_REGEX)) - .findFirst().orElse("Illegal partition path field format: '$pathField' for ${c.getClass.getSimpleName}")) + .findFirst().orElseGet(() -> "Illegal partition path field format: '$pathField' for ${c.getClass.getSimpleName}")) .collect(Collectors.joining(",")); } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/commit/BaseDatasetBulkInsertCommitActionExecutor.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/commit/BaseDatasetBulkInsertCommitActionExecutor.java index 1e20e4ab663d..6719b7356e18 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/commit/BaseDatasetBulkInsertCommitActionExecutor.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/commit/BaseDatasetBulkInsertCommitActionExecutor.java @@ -82,7 +82,7 @@ private HoodieWriteMetadata> buildHoodieWriteMetadata(Optio hoodieWriteMetadata.setWriteStatuses(HoodieJavaRDD.getJavaRDD(statuses)); hoodieWriteMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(statuses)); return hoodieWriteMetadata; - }).orElse(new HoodieWriteMetadata<>()); + }).orElseGet(HoodieWriteMetadata::new); } public final HoodieWriteResult execute(Dataset records, boolean isTablePartitioned) { diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java index 69dd8ea795a7..9783113117ce 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java +++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java @@ -277,7 +277,7 @@ public static SparkRDDWriteClient createHoodieClient(JavaSp HoodieCompactionConfig compactionConfig = compactionStrategyClass .map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false) .withCompactionStrategy(ReflectionUtils.loadClass(strategy)).build()) - .orElse(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build()); + .orElseGet(() -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build()); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) .withParallelism(parallelism, parallelism) diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java index c2b739c9f8bb..4a4226724f8b 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/FileSliceHandler.java @@ -31,8 +31,8 @@ import org.apache.hadoop.fs.FileSystem; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -97,7 +97,7 @@ public List getLatestFileSlicesStateless(String basePath, String p public List getLatestFileSlice(String basePath, String partitionPath, String fileId) { return viewManager.getFileSystemView(basePath).getLatestFileSlice(partitionPath, fileId) - .map(FileSliceDTO::fromFileSlice).map(Arrays::asList).orElse(new ArrayList<>()); + .map(FileSliceDTO::fromFileSlice).map(Arrays::asList).orElse(Collections.emptyList()); } public List getPendingCompactionOperations(String basePath) { diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java index 5d788ac74fc1..b9a721aae363 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/TimelineHandler.java @@ -27,8 +27,8 @@ import org.apache.hadoop.fs.FileSystem; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -43,7 +43,7 @@ public TimelineHandler(Configuration conf, TimelineService.Config timelineServic public List getLastInstant(String basePath) { return viewManager.getFileSystemView(basePath).getLastInstant().map(InstantDTO::fromInstant) - .map(Arrays::asList).orElse(new ArrayList<>()); + .map(Arrays::asList).orElse(Collections.emptyList()); } public TimelineDTO getTimeline(String basePath) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/converter/JsonToAvroSchemaConverter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/converter/JsonToAvroSchemaConverter.java index 794de225a5e6..9f892ab8f0e3 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/converter/JsonToAvroSchemaConverter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/converter/JsonToAvroSchemaConverter.java @@ -78,12 +78,12 @@ public String convert(String jsonSchema) throws IOException { } private static ArrayNode convertProperties(JsonNode jsonProperties, Set required) { - List avroFields = new ArrayList<>(); + List avroFields = new ArrayList<>(jsonProperties.size()); jsonProperties.fieldNames().forEachRemaining(name -> avroFields.add(tryConvertNestedProperty(name, jsonProperties.get(name)) - .or(tryConvertArrayProperty(name, jsonProperties.get(name))) - .or(tryConvertEnumProperty(name, jsonProperties.get(name))) - .orElse(convertProperty(name, jsonProperties.get(name), required.contains(name))))); + .or(() -> tryConvertArrayProperty(name, jsonProperties.get(name))) + .or(() -> tryConvertEnumProperty(name, jsonProperties.get(name))) + .orElseGet(() -> convertProperty(name, jsonProperties.get(name), required.contains(name))))); return MAPPER.createArrayNode().addAll(avroFields); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java index 64da4f4f50f5..e658bde5853c 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonDFSSource.java @@ -47,7 +47,7 @@ protected InputBatch> fetchNewData(Option lastCkptStr, l pathSelector.getNextFilePathsAndMaxModificationTime(sparkContext, lastCkptStr, sourceLimit); return selPathsWithMaxModificationTime.getLeft() .map(pathStr -> new InputBatch<>(Option.of(fromFiles(pathStr)), selPathsWithMaxModificationTime.getRight())) - .orElse(new InputBatch<>(Option.empty(), selPathsWithMaxModificationTime.getRight())); + .orElseGet(() -> new InputBatch<>(Option.empty(), selPathsWithMaxModificationTime.getRight())); } private JavaRDD fromFiles(String pathStr) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index 35bdcb8e7dac..a084da56345b 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -614,7 +614,7 @@ private InputBatch fetchNextBatchFromSource(Option resumeCheckpointStr, AvroConversionUtils.convertStructTypeToAvroSchema(df.schema(), getAvroRecordQualifiedName(cfg.targetTableName))); schemaProvider = incomingSchemaOpt.map(incomingSchema -> getDeducedSchemaProvider(incomingSchema, dataAndCheckpoint.getSchemaProvider(), metaClient)) - .orElse(dataAndCheckpoint.getSchemaProvider()); + .orElseGet(dataAndCheckpoint::getSchemaProvider); if (useRowWriter) { inputBatchForWriter = new InputBatch(transformed, checkpointStr, schemaProvider); @@ -903,12 +903,12 @@ private WriteClientWriteResult writeToSink(InputBatch inputBatch, String instant instantTime = startCommit(instantTime, !autoGenerateRecordKeys); if (useRowWriter) { - Dataset df = (Dataset) inputBatch.getBatch().orElse(hoodieSparkContext.getSqlContext().emptyDataFrame()); + Dataset df = (Dataset) inputBatch.getBatch().orElseGet(() -> hoodieSparkContext.getSqlContext().emptyDataFrame()); HoodieWriteConfig hoodieWriteConfig = prepareHoodieConfigForRowWriter(inputBatch.getSchemaProvider().getTargetSchema()); BaseDatasetBulkInsertCommitActionExecutor executor = new HoodieStreamerDatasetBulkInsertCommitActionExecutor(hoodieWriteConfig, writeClient, instantTime); writeClientWriteResult = new WriteClientWriteResult(executor.execute(df, !HoodieStreamerUtils.getPartitionColumns(props).isEmpty()).getWriteStatuses()); } else { - JavaRDD records = (JavaRDD) inputBatch.getBatch().orElse(hoodieSparkContext.emptyRDD()); + JavaRDD records = (JavaRDD) inputBatch.getBatch().orElseGet(() -> hoodieSparkContext.emptyRDD()); // filter dupes if needed if (cfg.filterDupes) { records = DataSourceUtils.dropDuplicates(hoodieSparkContext.jsc(), records, writeClient.getConfig()); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java index 367448533b31..4ff7dd6e1c2a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java @@ -124,12 +124,8 @@ private StructType getExpectedTransformedSchema(TransformerInfo transformerInfo, throw new HoodieTransformPlanException("Either source schema or source dataset should be available to fetch the schema"); } StructType incomingStruct = incomingStructOpt - .orElse(sourceSchemaOpt.isPresent() ? AvroConversionUtils.convertAvroSchemaToStructType(sourceSchemaOpt.get()) : rowDatasetOpt.get().schema()); - try { - return transformerInfo.getTransformer().transformedSchema(jsc, sparkSession, incomingStruct, properties).asNullable(); - } catch (Exception e) { - throw e; - } + .orElseGet(() -> sourceSchemaOpt.isPresent() ? AvroConversionUtils.convertAvroSchemaToStructType(sourceSchemaOpt.get()) : rowDatasetOpt.get().schema()); + return transformerInfo.getTransformer().transformedSchema(jsc, sparkSession, incomingStruct, properties).asNullable(); } @Override From fcd6cd96210d7ee007cab01167b4b4ee084b880a Mon Sep 17 00:00:00 2001 From: Tim Brown Date: Wed, 10 Jan 2024 17:06:00 -0800 Subject: [PATCH 041/112] [MINOR] Avoid resource leaks (#10345) --- .../java/org/apache/hudi/metrics/Metrics.java | 35 +++++++++++++------ .../testutils/TestHoodieMetadataBase.java | 2 +- .../table/log/HoodieLogFormatWriter.java | 1 + .../util/collection/LazyFileIterable.java | 9 ++++- .../internal/schema/utils/SerDeHelper.java | 6 ++-- .../HoodieBootstrapRecordIterator.java | 3 +- .../hudi/common/testutils/SchemaTestUtil.java | 5 +-- .../hadoop/TestHoodieHFileInputFormat.java | 1 + .../hadoop/TestHoodieParquetInputFormat.java | 2 ++ .../TestHoodieRealtimeRecordReader.java | 3 ++ 10 files changed, 48 insertions(+), 19 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java index 47ee23bcc2fb..31b0d19da010 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java @@ -50,6 +50,7 @@ public class Metrics { private final List reporters; private final String commonMetricPrefix; private boolean initialized = false; + private transient Thread shutdownThread = null; public Metrics(HoodieWriteConfig metricConfig) { registry = new MetricRegistry(); @@ -65,7 +66,8 @@ public Metrics(HoodieWriteConfig metricConfig) { } reporters.forEach(MetricsReporter::start); - Runtime.getRuntime().addShutdownHook(new Thread(this::shutdown)); + shutdownThread = new Thread(() -> shutdown(true)); + Runtime.getRuntime().addShutdownHook(shutdownThread); this.initialized = true; } @@ -112,16 +114,27 @@ private List addAdditionalMetricsExporters(HoodieWriteConfig me return reporterList; } - public synchronized void shutdown() { - try { - registerHoodieCommonMetrics(); - reporters.forEach(MetricsReporter::report); - LOG.info("Stopping the metrics reporter..."); - reporters.forEach(MetricsReporter::stop); - } catch (Exception e) { - LOG.warn("Error while closing reporter", e); - } finally { - initialized = false; + public void shutdown() { + shutdown(false); + } + + private synchronized void shutdown(boolean fromShutdownHook) { + if (!fromShutdownHook) { + Runtime.getRuntime().removeShutdownHook(shutdownThread); + } else { + LOG.warn("Shutting down the metrics reporter from shutdown hook."); + } + if (initialized) { + try { + registerHoodieCommonMetrics(); + reporters.forEach(MetricsReporter::report); + LOG.info("Stopping the metrics reporter..."); + reporters.forEach(MetricsReporter::stop); + } catch (Exception e) { + LOG.warn("Error while closing reporter", e); + } finally { + initialized = false; + } } } diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java index 59ed08f3684e..5418b508ca86 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/TestHoodieMetadataBase.java @@ -296,7 +296,7 @@ protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesClea .withAutoClean(false).retainCommits(1).retainFileVersions(1) .build()) .withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1024 * 1024 * 1024).build()) - .withEmbeddedTimelineServerEnabled(true).forTable("test-trip-table") + .withEmbeddedTimelineServerEnabled(false).forTable("test-trip-table") .withFileSystemViewConfig(new FileSystemViewStorageConfig.Builder() .withEnableBackupForRemoteFileSystemView(false).build()) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java index 081c18e8f65b..ef910a1b1253 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java @@ -280,6 +280,7 @@ private void addShutDownHook() { shutdownThread = new Thread() { public void run() { try { + LOG.warn("running logformatwriter hook"); if (output != null) { close(); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/LazyFileIterable.java b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/LazyFileIterable.java index 8e2210d61ee0..799aa3d4d564 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/LazyFileIterable.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/LazyFileIterable.java @@ -21,6 +21,9 @@ import org.apache.hudi.common.util.BufferedRandomAccessFile; import org.apache.hudi.exception.HoodieException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.util.Iterator; import java.util.Map; @@ -32,6 +35,7 @@ * the latest value for a key spilled to disk and returns the result. */ public class LazyFileIterable implements Iterable { + private static final Logger LOG = LoggerFactory.getLogger(LazyFileIterable.class); // Used to access the value written at a specific position in the file private final String filePath; @@ -128,7 +132,10 @@ private void closeHandle() { } private void addShutdownHook() { - shutdownThread = new Thread(this::closeHandle); + shutdownThread = new Thread(() -> { + LOG.warn("Failed to properly close LazyFileIterable in application."); + this.closeHandle(); + }); Runtime.getRuntime().addShutdownHook(shutdownThread); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java index f47d7f8da517..7891fc4582cd 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java @@ -18,6 +18,7 @@ package org.apache.hudi.internal.schema.utils; +import org.apache.hudi.common.util.JsonUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; @@ -28,7 +29,6 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import java.io.StringWriter; @@ -295,7 +295,7 @@ public static Option fromJson(String json) { return Option.empty(); } try { - return Option.of(fromJson((new ObjectMapper(new JsonFactory())).readValue(json, JsonNode.class))); + return Option.of(fromJson(JsonUtils.getObjectMapper().readTree(json))); } catch (IOException e) { throw new RuntimeException(e); } @@ -311,7 +311,7 @@ public static Option fromJson(String json) { public static TreeMap parseSchemas(String json) { TreeMap result = new TreeMap<>(); try { - JsonNode jsonNode = (new ObjectMapper(new JsonFactory())).readValue(json, JsonNode.class); + JsonNode jsonNode = JsonUtils.getObjectMapper().readTree(json); if (!jsonNode.has(SCHEMAS)) { throw new IllegalArgumentException(String.format("cannot parser schemas from current json string, missing key name: %s", SCHEMAS)); } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBootstrapRecordIterator.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBootstrapRecordIterator.java index 43f2d1ad1ad5..6fa398a8225b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBootstrapRecordIterator.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBootstrapRecordIterator.java @@ -50,7 +50,8 @@ public HoodieBootstrapRecordIterator(ClosableIterator> skeletonI @Override public void close() { - + skeletonIterator.close(); + dataFileIterator.close(); } @Override diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java index 8f3cbe5b19f2..adc8b6b9d956 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java @@ -38,6 +38,7 @@ import org.apache.avro.util.Utf8; import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.ByteBuffer; @@ -272,8 +273,8 @@ public static GenericRecord generateAvroRecordFromJson(Schema schema, int record } public static Schema getSchemaFromResource(Class clazz, String name, boolean withHoodieMetadata) { - try { - Schema schema = new Schema.Parser().parse(clazz.getResourceAsStream(name)); + try (InputStream schemaInputStream = clazz.getResourceAsStream(name)) { + Schema schema = new Schema.Parser().parse(schemaInputStream); return withHoodieMetadata ? HoodieAvroUtils.addMetadataFields(schema) : schema; } catch (IOException e) { throw new RuntimeException(String.format("Failed to get schema from resource `%s` for class `%s`", name, clazz.getName())); diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java index 55d03c156089..c191a96fd9d2 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieHFileInputFormat.java @@ -516,6 +516,7 @@ private void ensureRecordsInCommit(String msg, String commit, int expectedNumber } totalCount++; } + recordReader.close(); } assertEquals(expectedNumberOfRecordsInCommit, actualCount, msg); assertEquals(totalExpected, totalCount, msg); diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java index 1540aea1023b..37ec5cef24f5 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java @@ -764,6 +764,7 @@ private void ensureRecordsInCommit(String msg, String commit, int expectedNumber } totalCount++; } + recordReader.close(); } assertEquals(expectedNumberOfRecordsInCommit, actualCount, msg); assertEquals(totalExpected, totalCount, msg); @@ -819,6 +820,7 @@ public void testHoodieParquetInputFormatReadTimeType() throws IOException { // test date assertEquals(LocalDate.ofEpochDay(testDate).toString(), String.valueOf(writable.get()[2])); } + recordReader.close(); } } } diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index ceae7022fbfa..0633be72453f 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -509,6 +509,7 @@ public void testReaderWithNestedAndComplexSchema(ExternalSpillableMap.DiskMapTyp } reader.close(); } + recordReader.close(); } @ParameterizedTest @@ -592,6 +593,7 @@ public void testSchemaEvolutionAndRollbackBlockInLastLogFile(ExternalSpillableMa while (recordReader.next(key, value)) { // keep reading } + recordReader.close(); reader.close(); } @@ -649,6 +651,7 @@ public void testSchemaEvolution() throws Exception { while (recordReader.next(key, value)) { // keep reading } + recordReader.close(); reader.close(); } From cdefb4b7473eac5e654e9ab6e6e185fd3ef22057 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Thu, 11 Jan 2024 11:19:09 +0800 Subject: [PATCH 042/112] [HUDI-7288] Fix ArrayIndexOutOfBoundsException when upgrade nonPartitionedTable created by 0.10/0.11 HUDI version (#10482) --- .../org/apache/hudi/table/upgrade/FourToFiveUpgradeHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToFiveUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToFiveUpgradeHandler.java index 4d7c5b8b6df6..2adddf36df50 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToFiveUpgradeHandler.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToFiveUpgradeHandler.java @@ -77,7 +77,7 @@ public Map upgrade(HoodieWriteConfig config, HoodieEngin private boolean hasDefaultPartitionPath(HoodieWriteConfig config, HoodieTable table) throws IOException { HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig(); - if (!tableConfig.getPartitionFields().isPresent()) { + if (!tableConfig.isTablePartitioned()) { return false; } String checkPartitionPath = DEPRECATED_DEFAULT_PARTITION_PATH; From ef7f5237f90d7634acf6248b9ef3d1846ca4a547 Mon Sep 17 00:00:00 2001 From: vinoth chandar Date: Thu, 11 Jan 2024 10:38:31 -0800 Subject: [PATCH 043/112] [MINOR] Turning on publishing of test results to Azure Devops (#10477) --- azure-pipelines-20230430.yml | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml index 21c6d932ef9c..e834d5f75217 100644 --- a/azure-pipelines-20230430.yml +++ b/azure-pipelines-20230430.yml @@ -117,7 +117,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'clean install' options: $(MVN_OPTS_INSTALL) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' - task: Maven@4 displayName: UT common flink client/spark-client @@ -125,7 +126,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'test' options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_MODULES),hudi-client/hudi-spark-client - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' mavenOptions: '-Xmx4g' - task: Maven@4 @@ -134,7 +136,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'test' options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB1_MODULES) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' mavenOptions: '-Xmx4g' - script: | @@ -150,7 +153,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'clean install' options: $(MVN_OPTS_INSTALL) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' - task: Maven@4 displayName: FT client/spark-client @@ -158,7 +162,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'test' options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB2_MODULES) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' mavenOptions: '-Xmx4g' - script: | @@ -174,7 +179,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'clean install' options: $(MVN_OPTS_INSTALL) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' - task: Maven@4 displayName: UT spark-datasource @@ -182,7 +188,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'test' options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB3_MODULES) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' mavenOptions: '-Xmx4g' - script: | @@ -198,7 +205,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'clean install' options: $(MVN_OPTS_INSTALL) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' - task: Maven@4 displayName: UT other modules @@ -206,7 +214,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'test' options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB4_UT_MODULES) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' mavenOptions: '-Xmx4g' - task: Maven@4 @@ -215,7 +224,8 @@ stages: mavenPomFile: 'pom.xml' goals: 'test' options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB4_FT_MODULES) - publishJUnitResults: false + publishJUnitResults: true + testResultsFiles: '**/surefire-reports/TEST-*.xml' jdkVersionOption: '1.8' mavenOptions: '-Xmx4g' - script: | From 635d0c6d507d75faf867f2b8832cdb065c1ab78a Mon Sep 17 00:00:00 2001 From: Prashant Wason Date: Thu, 11 Jan 2024 17:06:50 -0800 Subject: [PATCH 044/112] [MINOR] Parallelized the check for existence of files in IncrementalRelation. (#10480) This speedups the check for large datasets when a very large number of files need to be checked for existence. --- .../scala/org/apache/hudi/IncrementalRelation.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala index 53385bbe2b9c..63877c3bbedc 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala @@ -24,6 +24,7 @@ import org.apache.hudi.HoodieBaseRelation.isSchemaEvolutionEnabledOnRead import org.apache.hudi.HoodieSparkConfUtils.getHollowCommitHandling import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.client.utils.SparkInternalSchemaConverter +import org.apache.hudi.common.config.SerializableConfiguration import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieFileFormat, HoodieRecord, HoodieReplaceCommitMetadata} import org.apache.hudi.common.table.timeline.TimelineUtils.HollowCommitHandling.USE_TRANSITION_TIME @@ -239,11 +240,17 @@ class IncrementalRelation(val sqlContext: SQLContext, var doFullTableScan = false if (fallbackToFullTableScan) { - val fs = basePath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration); + // val fs = basePath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration); val timer = HoodieTimer.start val allFilesToCheck = filteredMetaBootstrapFullPaths ++ filteredRegularFullPaths - val firstNotFoundPath = allFilesToCheck.find(path => !fs.exists(new Path(path))) + val serializedConf = new SerializableConfiguration(sqlContext.sparkContext.hadoopConfiguration) + val localBasePathStr = basePath.toString + val firstNotFoundPath = sqlContext.sparkContext.parallelize(allFilesToCheck.toSeq, allFilesToCheck.size) + .map(path => { + val fs = new Path(localBasePathStr).getFileSystem(serializedConf.get) + fs.exists(new Path(path)) + }).collect().find(v => !v) val timeTaken = timer.endTimer() log.info("Checking if paths exists took " + timeTaken + "ms") From 8546cbfddce6478b0e8f47be61cd87e616e087e8 Mon Sep 17 00:00:00 2001 From: akido <37492907+Akihito-Liang@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:11:30 +0800 Subject: [PATCH 045/112] [HUDI-7282] Avoid verification failure due to append writing of the cow table with cluster configuration when the index is bucket. (#10475) --- .../java/org/apache/hudi/util/ClusteringUtil.java | 2 +- .../org/apache/hudi/utils/TestClusteringUtil.java | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClusteringUtil.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClusteringUtil.java index 75d4ea79815a..ac81b4e7af48 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClusteringUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClusteringUtil.java @@ -49,7 +49,7 @@ public class ClusteringUtil { private static final Logger LOG = LoggerFactory.getLogger(ClusteringUtil.class); public static void validateClusteringScheduling(Configuration conf) { - if (OptionsResolver.isBucketIndexType(conf)) { + if (!OptionsResolver.isAppendMode(conf) && OptionsResolver.isBucketIndexType(conf)) { HoodieIndex.BucketIndexEngineType bucketIndexEngineType = OptionsResolver.getBucketEngineType(conf); switch (bucketIndexEngineType) { case SIMPLE: diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestClusteringUtil.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestClusteringUtil.java index 9a3c17c45c5e..5f58d98a6acd 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestClusteringUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestClusteringUtil.java @@ -33,6 +33,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.table.HoodieFlinkTable; import org.apache.hudi.util.ClusteringUtil; import org.apache.hudi.util.FlinkTables; @@ -114,6 +115,16 @@ void rollbackClustering() throws Exception { .stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()); assertThat(actualInstants, is(oriInstants)); } + + @Test + void validateClusteringScheduling() throws Exception { + beforeEach(); + ClusteringUtil.validateClusteringScheduling(this.conf); + + // validate bucket index + this.conf.setString(FlinkOptions.INDEX_TYPE, HoodieIndex.IndexType.BUCKET.name()); + ClusteringUtil.validateClusteringScheduling(this.conf); + } /** * Generates a clustering plan on the timeline and returns its instant time. From 744befe952bbba3aaaa8ac47130f3485f4e638d9 Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Thu, 11 Jan 2024 19:23:44 -0800 Subject: [PATCH 046/112] [HUDI-6902] Use mvnw command for hadoo-mr test (#10474) The reason is to clean up any orphan resources. --- .github/workflows/bot.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index a52b706fe22b..b7a08d4a9a02 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -134,20 +134,23 @@ jobs: distribution: 'adopt' architecture: x64 cache: maven + - name: Generate Maven Wrapper + run: + mvn -N io.takari:maven:wrapper - name: Build Project env: SCALA_PROFILE: ${{ matrix.scalaProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }} FLINK_PROFILE: ${{ matrix.flinkProfile }} run: - mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -DskipTests=true -Phudi-platform-service $MVN_ARGS -am -pl hudi-hadoop-mr,hudi-client/hudi-java-client + ./mvnw clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -DskipTests=true -Phudi-platform-service $MVN_ARGS -am -pl hudi-hadoop-mr,hudi-client/hudi-java-client - name: UT - hudi-hadoop-mr and hudi-client/hudi-java-client env: SCALA_PROFILE: ${{ matrix.scalaProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }} FLINK_PROFILE: ${{ matrix.flinkProfile }} run: - mvn test -Punit-tests -fae -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -pl hudi-hadoop-mr,hudi-client/hudi-java-client $MVN_ARGS + ./mvnw test -Punit-tests -fae -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -pl hudi-hadoop-mr,hudi-client/hudi-java-client $MVN_ARGS test-spark-java17: runs-on: ubuntu-latest From 36eeb94b26477942c00e45a43bad64989ee46771 Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Thu, 11 Jan 2024 19:26:34 -0800 Subject: [PATCH 047/112] [HUDI-6902] Give minimum memory for unit tests (#10469) Changes: 1. Set initial memory 128M. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d5ce8042db33..e404b0c6e2fd 100644 --- a/pom.xml +++ b/pom.xml @@ -205,7 +205,7 @@ provided - -Xmx2g + -Xmx2g -Xms128m 0.8.8 compile org.apache.hudi. From da6a49061d6db7127c352f530c1d333fd498da7d Mon Sep 17 00:00:00 2001 From: kongwei Date: Fri, 12 Jan 2024 17:37:51 +0800 Subject: [PATCH 048/112] [HUDI-7278] make bloom filter skippable for CPU saving (#10457) * make bloom filter skippable for CPU saving --------- Co-authored-by: wei.kong --- .../apache/hudi/config/HoodieWriteConfig.java | 4 +++ .../storage/HoodieSparkFileWriterFactory.java | 3 +- .../TestHoodieAvroFileWriterFactory.java | 31 +++++++++++++++++++ .../common/config/HoodieStorageConfig.java | 11 +++++++ .../storage/HoodieAvroFileWriterFactory.java | 3 +- .../io/storage/HoodieFileWriterFactory.java | 10 ++++++ 6 files changed, 58 insertions(+), 4 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index a964ceef958d..4e1cdb9f5d3c 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -2090,6 +2090,10 @@ public String parquetFieldIdWriteEnabled() { return getString(HoodieStorageConfig.PARQUET_FIELD_ID_WRITE_ENABLED); } + public boolean parquetBloomFilterEnabled() { + return getBooleanOrDefault(HoodieStorageConfig.PARQUET_WITH_BLOOM_FILTER_ENABLED); + } + public Option getLogDataBlockFormat() { return Option.ofNullable(getString(HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT)) .map(HoodieLogBlock.HoodieLogBlockType::fromId); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java index d2ab83f1481e..5feefa3bee2b 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java @@ -44,8 +44,7 @@ protected HoodieFileWriter newParquetFileWriter( String instantTime, Path path, Configuration conf, HoodieConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException { boolean populateMetaFields = config.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS); - boolean enableBloomFilter = populateMetaFields; - Option filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty(); + Option filter = enableBloomFilter(populateMetaFields, config) ? Option.of(createBloomFilter(config)) : Option.empty(); String compressionCodecName = config.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME); // Support PARQUET_COMPRESSION_CODEC_NAME is "" if (compressionCodecName.isEmpty()) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java index 3afe6ee67081..120ae4fe8917 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileWriterFactory.java @@ -19,9 +19,11 @@ package org.apache.hudi.io.storage; import org.apache.hudi.client.SparkTaskContextSupplier; +import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.testutils.HoodieClientTestBase; @@ -31,6 +33,7 @@ import java.io.IOException; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -74,4 +77,32 @@ public void testGetFileWriter() throws IOException { }, "should fail since log storage writer is not supported yet."); assertTrue(thrown.getMessage().contains("format not supported yet.")); } + + @Test + public void testEnableBloomFilter() { + HoodieWriteConfig config = getConfig(IndexType.BLOOM); + assertTrue(HoodieFileWriterFactory.enableBloomFilter(true, config)); + assertFalse(HoodieFileWriterFactory.enableBloomFilter(false, config)); + + config = getConfig(IndexType.SIMPLE); + assertTrue(HoodieFileWriterFactory.enableBloomFilter(true, config)); + + config = getConfig(IndexType.SIMPLE); + assertTrue(HoodieFileWriterFactory.enableBloomFilter(true, config)); + + config = getConfigBuilder(IndexType.BLOOM) + .withStorageConfig(HoodieStorageConfig.newBuilder() + .parquetBloomFilterEnable(false).build()).build(); + assertTrue(HoodieFileWriterFactory.enableBloomFilter(true, config)); + + config = getConfigBuilder(IndexType.SIMPLE) + .withStorageConfig(HoodieStorageConfig.newBuilder() + .parquetBloomFilterEnable(true).build()).build(); + assertTrue(HoodieFileWriterFactory.enableBloomFilter(true, config)); + + config = getConfigBuilder(IndexType.SIMPLE) + .withStorageConfig(HoodieStorageConfig.newBuilder() + .parquetBloomFilterEnable(false).build()).build(); + assertFalse(HoodieFileWriterFactory.enableBloomFilter(true, config)); + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java index 2660b0b22c83..d68b8326ca8c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java @@ -152,6 +152,12 @@ public class HoodieStorageConfig extends HoodieConfig { .withDocumentation("Would only be effective with Spark 3.3+. Sets spark.sql.parquet.fieldId.write.enabled. " + "If enabled, Spark will write out parquet native field ids that are stored inside StructField's metadata as parquet.field.id to parquet files."); + public static final ConfigProperty PARQUET_WITH_BLOOM_FILTER_ENABLED = ConfigProperty + .key("hoodie.parquet.bloom.filter.enabled") + .defaultValue(true) + .withDocumentation("Control whether to write bloom filter or not. Default true. " + + "We can set to false in non bloom index cases for CPU resource saving."); + public static final ConfigProperty HFILE_COMPRESSION_ALGORITHM_NAME = ConfigProperty .key("hoodie.hfile.compression.algorithm") .defaultValue("GZ") @@ -420,6 +426,11 @@ public Builder parquetFieldIdWrite(String parquetFieldIdWrite) { return this; } + public Builder parquetBloomFilterEnable(boolean parquetBloomFilterEnable) { + storageConfig.setValue(PARQUET_WITH_BLOOM_FILTER_ENABLED, String.valueOf(parquetBloomFilterEnable)); + return this; + } + public Builder hfileCompressionAlgorithm(String hfileCompressionAlgorithm) { storageConfig.setValue(HFILE_COMPRESSION_ALGORITHM_NAME, hfileCompressionAlgorithm); return this; diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java index 8ed597ed920d..471ab149fa58 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java @@ -51,8 +51,7 @@ protected HoodieFileWriter newParquetFileWriter( String instantTime, Path path, Configuration conf, HoodieConfig config, Schema schema, TaskContextSupplier taskContextSupplier) throws IOException { boolean populateMetaFields = config.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS); - boolean enableBloomFilter = populateMetaFields; - HoodieAvroWriteSupport writeSupport = getHoodieAvroWriteSupport(conf, schema, config, enableBloomFilter); + HoodieAvroWriteSupport writeSupport = getHoodieAvroWriteSupport(conf, schema, config, enableBloomFilter(populateMetaFields, config)); String compressionCodecName = config.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME); // Support PARQUET_COMPRESSION_CODEC_NAME is "" diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java index a992886fcdc0..3c521441b1af 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java @@ -128,4 +128,14 @@ protected BloomFilter createBloomFilter(HoodieConfig config) { config.getIntOrDefault(HoodieStorageConfig.BLOOM_FILTER_DYNAMIC_MAX_ENTRIES), config.getStringOrDefault(HoodieStorageConfig.BLOOM_FILTER_TYPE)); } + + /** + * Check if need to enable bloom filter. + */ + public static boolean enableBloomFilter(boolean populateMetaFields, HoodieConfig config) { + return populateMetaFields && (config.getBooleanOrDefault(HoodieStorageConfig.PARQUET_WITH_BLOOM_FILTER_ENABLED) + // HoodieIndexConfig is located in the package hudi-client-common, and the package hudi-client-common depends on the package hudi-common, + // so the class HoodieIndexConfig cannot be accessed in hudi-common, otherwise there will be a circular dependency problem + || (config.contains("hoodie.index.type") && config.getString("hoodie.index.type").contains("BLOOM"))); + } } From 7d97216703bdbcca4a6949894033f3e0fa5d96f8 Mon Sep 17 00:00:00 2001 From: empcl <1515827454@qq.com> Date: Sun, 14 Jan 2024 10:53:00 +0800 Subject: [PATCH 049/112] [HUDI-7293] Incremental read of insert table using rebalance strategy (#10490) --- .../apache/hudi/table/HoodieTableSource.java | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java index 03eb3205e8cc..dc6cddd4a55d 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java @@ -107,8 +107,8 @@ import java.util.stream.IntStream; import static org.apache.hudi.configuration.HadoopConfigurations.getParquetConf; -import static org.apache.hudi.util.ExpressionUtils.splitExprByPartitionCall; import static org.apache.hudi.util.ExpressionUtils.filterSimpleCallExpression; +import static org.apache.hudi.util.ExpressionUtils.splitExprByPartitionCall; /** * Hoodie batch table source that always read the latest snapshot of the underneath table. @@ -207,13 +207,23 @@ public DataStream produceDataStream(StreamExecutionEnvironment execEnv) conf, FilePathUtils.toFlinkPath(path), tableRowType, maxCompactionMemoryInBytes, partitionPruner); InputFormat inputFormat = getInputFormat(true); OneInputStreamOperatorFactory factory = StreamReadOperator.factory((MergeOnReadInputFormat) inputFormat); - SingleOutputStreamOperator source = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor")) + DataStream monitorOperatorStream = execEnv.addSource(monitoringFunction, getSourceOperatorName("split_monitor")) .uid(Pipelines.opUID("split_monitor", conf)) .setParallelism(1) - .keyBy(MergeOnReadInputSplit::getFileId) - .transform("split_reader", typeInfo, factory) - .uid(Pipelines.opUID("split_reader", conf)) - .setParallelism(conf.getInteger(FlinkOptions.READ_TASKS)); + .setMaxParallelism(1); + SingleOutputStreamOperator source; + if (OptionsResolver.isAppendMode(HoodieTableSource.this.conf)) { + source = monitorOperatorStream + .transform("split_reader", typeInfo, factory) + .uid(Pipelines.opUID("split_reader", conf)) + .setParallelism(conf.getInteger(FlinkOptions.READ_TASKS)); + } else { + source = monitorOperatorStream + .keyBy(MergeOnReadInputSplit::getFileId) + .transform("split_reader", typeInfo, factory) + .uid(Pipelines.opUID("split_reader", conf)) + .setParallelism(conf.getInteger(FlinkOptions.READ_TASKS)); + } return new DataStreamSource<>(source); } else { InputFormatSourceFunction func = new InputFormatSourceFunction<>(getInputFormat(), typeInfo); From 2b2e1a0a19a34ffe4e19ef757e4bad7d497dc327 Mon Sep 17 00:00:00 2001 From: akido <37492907+Akihito-Liang@users.noreply.github.com> Date: Tue, 16 Jan 2024 10:39:14 +0800 Subject: [PATCH 050/112] [HUDI-7286] Flink get hudi index type ignore case sensitive (#10476) --- .../hudi/configuration/OptionsResolver.java | 2 +- .../configuration/TestOptionsResolver.java | 56 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/configuration/TestOptionsResolver.java diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java index 934e22f11397..c7e77767418a 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java @@ -327,7 +327,7 @@ public static boolean isReadByTxnCompletionTime(Configuration conf) { * Returns the index type. */ public static HoodieIndex.IndexType getIndexType(Configuration conf) { - return HoodieIndex.IndexType.valueOf(conf.getString(FlinkOptions.INDEX_TYPE)); + return HoodieIndex.IndexType.valueOf(conf.getString(FlinkOptions.INDEX_TYPE).toUpperCase()); } /** diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/configuration/TestOptionsResolver.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/configuration/TestOptionsResolver.java new file mode 100644 index 000000000000..a68a4ab4d41b --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/configuration/TestOptionsResolver.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.configuration; + +import org.apache.flink.configuration.Configuration; +import org.apache.hudi.common.model.WriteConcurrencyMode; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.index.HoodieIndex; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Test for {@link OptionsResolver} + */ +public class TestOptionsResolver { + @TempDir + File tempFile; + + @Test + void testGetIndexType() { + Configuration conf = getConf(); + // set uppercase index + conf.setString(FlinkOptions.INDEX_TYPE, "BLOOM"); + assertEquals(HoodieIndex.IndexType.BLOOM, OptionsResolver.getIndexType(conf)); + // set lowercase index + conf.setString(FlinkOptions.INDEX_TYPE, "bloom"); + assertEquals(HoodieIndex.IndexType.BLOOM, OptionsResolver.getIndexType(conf)); + } + + private Configuration getConf() { + Configuration conf = new Configuration(); + conf.setString(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name()); + conf.setString(FlinkOptions.PATH, tempFile.getAbsolutePath()); + return conf; + } +} From 0de5f0765242470316b3fd9c1ce493b81c65473c Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Tue, 16 Jan 2024 13:26:13 -0800 Subject: [PATCH 051/112] [HUDI-6092] Set the timeout for the forked JVM (#10496) After we set this parameter, the surefire will try to ping the forked JVM after the timeout. --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index e404b0c6e2fd..b4b93e9bee24 100644 --- a/pom.xml +++ b/pom.xml @@ -592,6 +592,7 @@ ${surefire-log4j.file} false + 30 From d414b6033a2b7b56836c6a1583304f3d512b0daa Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Tue, 16 Jan 2024 14:24:23 -0800 Subject: [PATCH 052/112] [MINOR] Clean default Hadoop configuration values in tests (#10495) * [MINOR] Clean default Hadoop configurations for SparkContext These default Hadoop configurations are not used in Hudi tests. * Consolidating the code into a helper class --------- Co-authored-by: vinoth chandar --- .../hudi/testutils/HoodieClientTestUtils.java | 14 ++++++++++++++ .../testutils/HoodieSparkClientTestHarness.java | 9 ++++++--- .../SparkClientFunctionalTestHarness.java | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index 991c615c35dd..55619a2a24bf 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -61,6 +62,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -125,6 +127,18 @@ public static SparkConf getSparkConfForTest(String appName) { return SparkRDDReadClient.addHoodieSupport(sparkConf); } + public static void overrideSparkHadoopConfiguration(SparkContext sparkContext) { + try { + // Clean the default Hadoop configurations since in our Hudi tests they are not used. + Field hadoopConfigurationField = sparkContext.getClass().getDeclaredField("_hadoopConfiguration"); + hadoopConfigurationField.setAccessible(true); + Configuration testHadoopConfig = new Configuration(false); + hadoopConfigurationField.set(sparkContext, testHadoopConfig); + } catch (NoSuchFieldException | IllegalAccessException e) { + LOG.warn(e.getMessage()); + } + } + private static HashMap getLatestFileIDsToFullPath(String basePath, HoodieTimeline commitTimeline, List commitsToReturn) throws IOException { HashMap fileIdToFullPath = new HashMap<>(); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java index 299c4ab4b799..b9b2fe2c869d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java @@ -70,6 +70,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SQLContext; @@ -192,11 +194,12 @@ protected void initSparkContexts(String appName) { } // Initialize a local spark env - jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName)); + SparkConf sc = HoodieClientTestUtils.getSparkConfForTest(appName + "#" + testMethodName); + SparkContext sparkContext = new SparkContext(sc); + HoodieClientTestUtils.overrideSparkHadoopConfiguration(sparkContext); + jsc = new JavaSparkContext(sparkContext); jsc.setLogLevel("ERROR"); - hadoopConf = jsc.hadoopConfiguration(); - sparkSession = SparkSession.builder() .withExtensions(JFunction.toScala(sparkSessionExtensions -> { sparkSessionExtensionsInjector.ifPresent(injector -> injector.accept(sparkSessionExtensions)); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java index 511613d90443..14d325bfdacb 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java @@ -201,6 +201,7 @@ public synchronized void runBeforeEach() { SparkRDDReadClient.addHoodieSupport(sparkConf); spark = SparkSession.builder().config(sparkConf).getOrCreate(); sqlContext = spark.sqlContext(); + HoodieClientTestUtils.overrideSparkHadoopConfiguration(spark.sparkContext()); jsc = new JavaSparkContext(spark.sparkContext()); context = new HoodieSparkEngineContext(jsc); timelineService = HoodieClientTestUtils.initTimelineService( From 9ddcfb166f07caed3982d4e5174aea16f88ef08d Mon Sep 17 00:00:00 2001 From: Rohit Mittapalli Date: Tue, 16 Jan 2024 17:52:07 -0800 Subject: [PATCH 053/112] [HUDI-7300] Merge schema in ParuqetDFSSource (#10199) --- .../config/ParquetDFSSourceConfig.java | 49 +++++++++++++++++++ .../utilities/sources/ParquetDFSSource.java | 6 ++- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 hudi-utilities/src/main/java/org/apache/hudi/utilities/config/ParquetDFSSourceConfig.java diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/ParquetDFSSourceConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/ParquetDFSSourceConfig.java new file mode 100644 index 000000000000..b3bf5678baf5 --- /dev/null +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/ParquetDFSSourceConfig.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.utilities.config; + +import org.apache.hudi.common.config.ConfigClassProperty; +import org.apache.hudi.common.config.ConfigGroups; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; + +import javax.annotation.concurrent.Immutable; + +import static org.apache.hudi.common.util.ConfigUtils.DELTA_STREAMER_CONFIG_PREFIX; +import static org.apache.hudi.common.util.ConfigUtils.STREAMER_CONFIG_PREFIX; + +/** + * Parquet DFS Source Configs + */ +@Immutable +@ConfigClassProperty(name = "Parquet DFS Source Configs", + groupName = ConfigGroups.Names.HUDI_STREAMER, + subGroupName = ConfigGroups.SubGroupNames.DELTA_STREAMER_SOURCE, + description = "Configurations controlling the behavior of Parquet DFS source in Hudi Streamer.") +public class ParquetDFSSourceConfig extends HoodieConfig { + + public static final ConfigProperty PARQUET_DFS_MERGE_SCHEMA = ConfigProperty + .key(STREAMER_CONFIG_PREFIX + "source.parquet.dfs.merge_schema.enable") + .defaultValue(false) + .withAlternatives(DELTA_STREAMER_CONFIG_PREFIX + "source.parquet.dfs.merge_schema.enable") + .markAdvanced() + .sinceVersion("1.0.0") + .withDocumentation("Merge schema across parquet files within a single write"); +} diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java index a56a878f1fe7..a3ee555ec5ab 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/ParquetDFSSource.java @@ -21,6 +21,7 @@ import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.utilities.config.ParquetDFSSourceConfig; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.helpers.DFSPathSelector; @@ -29,6 +30,8 @@ import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; +import static org.apache.hudi.common.util.ConfigUtils.getBooleanWithAltKeys; + /** * DFS Source that reads parquet data. */ @@ -52,6 +55,7 @@ public Pair>, String> fetchNextBatch(Option lastCkpt } private Dataset fromFiles(String pathStr) { - return sparkSession.read().parquet(pathStr.split(",")); + boolean mergeSchemaOption = getBooleanWithAltKeys(this.props, ParquetDFSSourceConfig.PARQUET_DFS_MERGE_SCHEMA); + return sparkSession.read().option("mergeSchema", mergeSchemaOption).parquet(pathStr.split(",")); } } From 5bc160bf0a788cf23fe640c51462f50e38efa4d0 Mon Sep 17 00:00:00 2001 From: KnightChess <981159963@qq.com> Date: Wed, 17 Jan 2024 10:38:27 +0800 Subject: [PATCH 054/112] [MINOR] Fix eager rollback mdt ut (#10506) Signed-off-by: wulingqi <981159963@qq.com> --- .../org/apache/hudi/client/TestJavaHoodieBackedMetadata.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java index 9f893df6d4e5..1e09f7e093c4 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java @@ -1533,8 +1533,8 @@ public void testEagerRollbackinMDT() throws IOException { fileStatus.getPath().getName().equals(rollbackInstant.getTimestamp() + "." + HoodieTimeline.ROLLBACK_ACTION)).collect(Collectors.toList()); // ensure commit3's delta commit in MDT has last mod time > the actual rollback for previous failed commit i.e. commit2. - // if rollback wasn't eager, rollback's last mod time will be lower than the commit3'd delta commit last mod time. - assertTrue(commit3Files.get(0).getModificationTime() > rollbackFiles.get(0).getModificationTime()); + // if rollback wasn't eager, rollback's last mod time will be not larger than the commit3'd delta commit last mod time. + assertTrue(commit3Files.get(0).getModificationTime() >= rollbackFiles.get(0).getModificationTime()); client.close(); } From 8048c9988eb009c40793f1f8a281000d0d409e27 Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Wed, 17 Jan 2024 16:17:19 -0500 Subject: [PATCH 055/112] [HUDI-7296] Reduce CI Time by Minimizing Duplicate Code Coverage in Tests (#10492) * reduce combos of tests * build success --------- Co-authored-by: Jonathan Vexler <=> --- .../hudi/functional/TestBootstrapRead.java | 30 ++++++----- ...odieDeltaStreamerSchemaEvolutionQuick.java | 53 ++++++++++++------- 2 files changed, 53 insertions(+), 30 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrapRead.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrapRead.java index d926a3be5a4e..1e36f491b3f6 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrapRead.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrapRead.java @@ -40,23 +40,29 @@ @Tag("functional") public class TestBootstrapRead extends TestBootstrapReadBase { private static Stream testArgs() { + boolean fullTest = false; Stream.Builder b = Stream.builder(); - String[] bootstrapType = {"full", "metadata", "mixed"}; - Boolean[] dashPartitions = {true,false}; - HoodieTableType[] tableType = {COPY_ON_WRITE, MERGE_ON_READ}; - Integer[] nPartitions = {0, 1, 2}; - for (HoodieTableType tt : tableType) { - for (Boolean dash : dashPartitions) { - for (String bt : bootstrapType) { - for (Integer n : nPartitions) { - // can't be mixed bootstrap if it's nonpartitioned - // don't need to test slash partitions if it's nonpartitioned - if ((!bt.equals("mixed") && dash) || n > 0) { - b.add(Arguments.of(bt, dash, tt, n)); + if (fullTest) { + String[] bootstrapType = {"full", "metadata", "mixed"}; + Boolean[] dashPartitions = {true,false}; + HoodieTableType[] tableType = {COPY_ON_WRITE, MERGE_ON_READ}; + Integer[] nPartitions = {0, 1, 2}; + for (HoodieTableType tt : tableType) { + for (Boolean dash : dashPartitions) { + for (String bt : bootstrapType) { + for (Integer n : nPartitions) { + // can't be mixed bootstrap if it's nonpartitioned + // don't need to test slash partitions if it's nonpartitioned + if ((!bt.equals("mixed") && dash) || n > 0) { + b.add(Arguments.of(bt, dash, tt, n)); + } } } } } + } else { + b.add(Arguments.of("metadata", true, COPY_ON_WRITE, 0)); + b.add(Arguments.of("mixed", false, MERGE_ON_READ, 2)); } return b.build(); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java index de21b33fff4e..81f27eec7fb8 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java @@ -58,25 +58,34 @@ public void teardown() throws Exception { } protected static Stream testArgs() { + boolean fullTest = false; Stream.Builder b = Stream.builder(); - //only testing row-writer enabled for now - for (Boolean rowWriterEnable : new Boolean[] {true}) { - for (Boolean nullForDeletedCols : new Boolean[] {false, true}) { - for (Boolean useKafkaSource : new Boolean[] {false, true}) { - for (Boolean addFilegroups : new Boolean[] {false, true}) { - for (Boolean multiLogFiles : new Boolean[] {false, true}) { - for (Boolean shouldCluster : new Boolean[] {false, true}) { - for (String tableType : new String[] {"COPY_ON_WRITE", "MERGE_ON_READ"}) { - if (!multiLogFiles || tableType.equals("MERGE_ON_READ")) { - b.add(Arguments.of(tableType, shouldCluster, false, rowWriterEnable, addFilegroups, multiLogFiles, useKafkaSource, nullForDeletedCols)); + if (fullTest) { + //only testing row-writer enabled for now + for (Boolean rowWriterEnable : new Boolean[] {true}) { + for (Boolean nullForDeletedCols : new Boolean[] {false, true}) { + for (Boolean useKafkaSource : new Boolean[] {false, true}) { + for (Boolean addFilegroups : new Boolean[] {false, true}) { + for (Boolean multiLogFiles : new Boolean[] {false, true}) { + for (Boolean shouldCluster : new Boolean[] {false, true}) { + for (String tableType : new String[] {"COPY_ON_WRITE", "MERGE_ON_READ"}) { + if (!multiLogFiles || tableType.equals("MERGE_ON_READ")) { + b.add(Arguments.of(tableType, shouldCluster, false, rowWriterEnable, addFilegroups, multiLogFiles, useKafkaSource, nullForDeletedCols)); + } } } + b.add(Arguments.of("MERGE_ON_READ", false, true, rowWriterEnable, addFilegroups, multiLogFiles, useKafkaSource, nullForDeletedCols)); } - b.add(Arguments.of("MERGE_ON_READ", false, true, rowWriterEnable, addFilegroups, multiLogFiles, useKafkaSource, nullForDeletedCols)); } } } } + } else { + b.add(Arguments.of("COPY_ON_WRITE", true, false, true, false, false, true, false)); + b.add(Arguments.of("COPY_ON_WRITE", true, false, true, false, false, true, true)); + b.add(Arguments.of("MERGE_ON_READ", false, true, true, true, true, true, true)); + b.add(Arguments.of("MERGE_ON_READ", false, true, true, true, true, true, true)); + b.add(Arguments.of("MERGE_ON_READ", false, false, true, true, true, false, true)); } return b.build(); } @@ -96,19 +105,27 @@ protected static Stream testReorderedColumn() { } protected static Stream testParamsWithSchemaTransformer() { + boolean fullTest = false; Stream.Builder b = Stream.builder(); - for (Boolean useTransformer : new Boolean[] {false, true}) { - for (Boolean setSchema : new Boolean[] {false, true}) { - for (Boolean rowWriterEnable : new Boolean[] {true}) { - for (Boolean nullForDeletedCols : new Boolean[] {false, true}) { - for (Boolean useKafkaSource : new Boolean[] {false, true}) { - for (String tableType : new String[] {"COPY_ON_WRITE", "MERGE_ON_READ"}) { - b.add(Arguments.of(tableType, rowWriterEnable, useKafkaSource, nullForDeletedCols, useTransformer, setSchema)); + if (fullTest) { + for (Boolean useTransformer : new Boolean[] {false, true}) { + for (Boolean setSchema : new Boolean[] {false, true}) { + for (Boolean rowWriterEnable : new Boolean[] {true}) { + for (Boolean nullForDeletedCols : new Boolean[] {false, true}) { + for (Boolean useKafkaSource : new Boolean[] {false, true}) { + for (String tableType : new String[] {"COPY_ON_WRITE", "MERGE_ON_READ"}) { + b.add(Arguments.of(tableType, rowWriterEnable, useKafkaSource, nullForDeletedCols, useTransformer, setSchema)); + } } } } } } + } else { + b.add(Arguments.of("COPY_ON_WRITE", true, true, true, true, true)); + b.add(Arguments.of("COPY_ON_WRITE", true, false, false, false, true)); + b.add(Arguments.of("MERGE_ON_READ", true, true, true, false, false)); + b.add(Arguments.of("MERGE_ON_READ", true, false, true, true, false)); } return b.build(); } From 7c13eb3e1c5a070db1fe37ea54cd91073457ef42 Mon Sep 17 00:00:00 2001 From: majian <47964462+majian1998@users.noreply.github.com> Date: Thu, 18 Jan 2024 20:16:32 +0800 Subject: [PATCH 056/112] [HUDI-7246] Fix Data Skipping Issue: No Results When Query Conditions Involve Both Columns with and without Column Stats (#10389) --- .../apache/hudi/ColumnStatsIndexSupport.scala | 16 ++- .../spark/sql/hudi/DataSkippingUtils.scala | 12 +- .../apache/hudi/TestDataSkippingUtils.scala | 41 ++++++- .../sql/hudi/TestDataSkippingQuery.scala | 114 ++++++++++++++++++ 4 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala index dd76aee2f187..f38d4318cac5 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala @@ -270,13 +270,17 @@ class ColumnStatsIndexSupport(spark: SparkSession, acc ++= Seq(colStatRecord.getMinValue, colStatRecord.getMaxValue, colStatRecord.getNullCount) case None => // NOTE: This could occur in either of the following cases: - // 1. Particular file does not have this particular column (which is indexed by Column Stats Index): - // in this case we're assuming missing column to essentially contain exclusively - // null values, we set min/max values as null and null-count to be equal to value-count (this - // behavior is consistent with reading non-existent columns from Parquet) + // 1. When certain columns exist in the schema but are absent in some data files due to + // schema evolution or other reasons, these columns will not be present in the column stats. + // In this case, we fill in default values by setting the min, max and null-count to null + // (this behavior is consistent with reading non-existent columns from Parquet). + // 2. When certain columns are present both in the schema and the data files, + // but the column stats are absent for these columns due to their types not supporting indexing, + // we also set these columns to default values. // - // This is a way to determine current column's index without explicit iteration (we're adding 3 stats / column) - acc ++= Seq(null, null, valueCount) + // This approach prevents errors during data skipping and, because the filter includes an isNull check, + // these conditions will not affect the accurate return of files from data skipping. + acc ++= Seq(null, null, null) } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala index 7cb4a3c54284..cfd8d1351d8d 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/DataSkippingUtils.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, EqualNullSafe, EqualTo, Expression, ExtractValue, GetStructField, GreaterThan, GreaterThanOrEqual, In, InSet, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not, Or, StartsWith, SubqueryExpression} import org.apache.spark.sql.functions.col import org.apache.spark.sql.hudi.ColumnStatsExpressionUtils._ -import org.apache.spark.sql.types.{StringType, StructType} +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{AnalysisException, HoodieCatalystExpressionUtils} import org.apache.spark.unsafe.types.UTF8String @@ -211,10 +211,16 @@ object DataSkippingUtils extends Logging { .map(colName => GreaterThan(genColNumNullsExpr(colName), Literal(0))) // Filter "colA is not null" - // Translates to "colA_nullCount < colA_valueCount" for index lookup + // Translates to "colA_nullCount = null or colA_valueCount = null or colA_nullCount < colA_valueCount" for index lookup + // "colA_nullCount = null or colA_valueCount = null" means we are not certain whether the column is null or not, + // hence we return True to ensure this does not affect the query. case IsNotNull(attribute: AttributeReference) => getTargetIndexedColumnName(attribute, indexSchema) - .map(colName => LessThan(genColNumNullsExpr(colName), genColValueCountExpr)) + .map {colName => + val numNullExpr = genColNumNullsExpr(colName) + val valueCountExpr = genColValueCountExpr + Or(Or(IsNull(numNullExpr), IsNull(valueCountExpr)), LessThan(numNullExpr, valueCountExpr)) + } // Filter "expr(colA) in (B1, B2, ...)" // Translates to "(colA_minValue <= B1 AND colA_maxValue >= B1) OR (colA_minValue <= B2 AND colA_maxValue >= B2) ... " diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala index f60b95d8f5aa..cd1846285ffe 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSkippingUtils.scala @@ -48,17 +48,17 @@ case class IndexRow(fileName: String, // Corresponding A column is LongType A_minValue: Long = -1, A_maxValue: Long = -1, - A_nullCount: Long = -1, + A_nullCount: java.lang.Long = null, // Corresponding B column is StringType B_minValue: String = null, B_maxValue: String = null, - B_nullCount: Long = -1, + B_nullCount: java.lang.Long = null, // Corresponding B column is TimestampType C_minValue: Timestamp = null, C_maxValue: Timestamp = null, - C_nullCount: Long = -1) { + C_nullCount: java.lang.Long = null) { def toRow: Row = Row(productIterator.toSeq: _*) } @@ -89,7 +89,8 @@ class TestDataSkippingUtils extends HoodieSparkClientTestBase with SparkAdapterS @MethodSource(Array( "testBasicLookupFilterExpressionsSource", "testAdvancedLookupFilterExpressionsSource", - "testCompositeFilterExpressionsSource" + "testCompositeFilterExpressionsSource", + "testSupportedAndUnsupportedDataSkippingColumnsSource" )) def testLookupFilterExpressions(sourceFilterExprStr: String, input: Seq[IndexRow], expectedOutput: Seq[String]): Unit = { // We have to fix the timezone to make sure all date-bound utilities output @@ -197,6 +198,38 @@ object TestDataSkippingUtils { ) } + def testSupportedAndUnsupportedDataSkippingColumnsSource(): java.util.stream.Stream[Arguments] = { + java.util.stream.Stream.of( + arguments( + "A = 1 and B is not null", + Seq( + IndexRow("file_1", valueCount = 2, A_minValue = 0, A_maxValue = 1, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null), + IndexRow("file_2", valueCount = 2, A_minValue = 1, A_maxValue = 2, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null), + IndexRow("file_3", valueCount = 2, A_minValue = 2, A_maxValue = 3, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null) + ), + Seq("file_1", "file_2") + ), + arguments( + "B = 1 and B is not null", + Seq( + IndexRow("file_1", valueCount = 2, A_minValue = 0, A_maxValue = 1, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null), + IndexRow("file_2", valueCount = 2, A_minValue = 1, A_maxValue = 2, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null), + IndexRow("file_3", valueCount = 2, A_minValue = 2, A_maxValue = 3, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null) + ), + Seq("file_1", "file_2", "file_3") + ), + arguments( + "A = 1 and A is not null and B is not null and B > 2", + Seq( + IndexRow("file_1", valueCount = 2, A_minValue = 0, A_maxValue = 1, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null), + IndexRow("file_2", valueCount = 2, A_minValue = 1, A_maxValue = 2, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null), + IndexRow("file_3", valueCount = 2, A_minValue = 2, A_maxValue = 3, A_nullCount = 0, B_minValue = null, B_maxValue = null, B_nullCount = null) + ), + Seq("file_1", "file_2") + ) + ) + } + def testMiscLookupFilterExpressionsSource(): java.util.stream.Stream[Arguments] = { // NOTE: Have to use [[Arrays.stream]], as Scala can't resolve properly 2 overloads for [[Stream.of]] // (for single element) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala new file mode 100644 index 000000000000..1ac7185f642d --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDataSkippingQuery.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi + +class TestDataSkippingQuery extends HoodieSparkSqlTestBase { + + test("Test the data skipping query involves conditions " + + "that cover both columns supported by column stats and those that are not supported.") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql("set hoodie.metadata.enable = true") + spark.sql("set hoodie.metadata.index.column.stats.enable = true") + spark.sql("set hoodie.enable.data.skipping = true") + spark.sql( + s""" + |create table $tableName ( + | id int, + | name string, + | attributes map, + | price double, + | ts long, + | dt string + |) using hudi + | tblproperties (primaryKey = 'id') + | partitioned by (dt) + | location '${tmp.getCanonicalPath}' + """.stripMargin) + spark.sql( + s""" + | insert into $tableName values + | (1, 'a1', map('color', 'red', 'size', 'M'), 10, 1000, '2021-01-05'), + | (2, 'a2', map('color', 'blue', 'size', 'L'), 20, 2000, '2021-01-06'), + | (3, 'a3', map('color', 'green', 'size', 'S'), 30, 3000, '2021-01-07') + """.stripMargin) + // Check the case where the WHERE condition only includes columns not supported by column stats + checkAnswer(s"select id, name, price, ts, dt from $tableName where attributes.color = 'red'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + // Check the case where the WHERE condition only includes columns supported by column stats + checkAnswer(s"select id, name, price, ts, dt from $tableName where name='a1'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + // Check the case where the WHERE condition includes both columns supported by column stats and those that are not + checkAnswer(s"select id, name, price, ts, dt from $tableName where attributes.color = 'red' and name='a1'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + } + } + + test("Test data skipping when specifying columns with column stats support.") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql("set hoodie.metadata.enable = true") + spark.sql("set hoodie.metadata.index.column.stats.enable = true") + spark.sql("set hoodie.enable.data.skipping = true") + spark.sql("set hoodie.metadata.index.column.stats.column.list = name") + spark.sql( + s""" + |create table $tableName ( + | id int, + | name string, + | attributes map, + | price double, + | ts long, + | dt string + |) using hudi + | tblproperties (primaryKey = 'id') + | partitioned by (dt) + | location '${tmp.getCanonicalPath}' + """.stripMargin) + spark.sql( + s""" + | insert into $tableName values + | (1, 'a1', map('color', 'red', 'size', 'M'), 10, 1000, '2021-01-05'), + | (2, 'a2', map('color', 'blue', 'size', 'L'), 20, 2000, '2021-01-06'), + | (3, 'a3', map('color', 'green', 'size', 'S'), 30, 3000, '2021-01-07') + """.stripMargin) + // Check the case where the WHERE condition only includes columns not supported by column stats + checkAnswer(s"select id, name, price, ts, dt from $tableName where attributes.color = 'red'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + // Check the case where the WHERE condition only includes columns supported by column stats + checkAnswer(s"select id, name, price, ts, dt from $tableName where name='a1'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + // Check the case where the WHERE condition includes both columns supported by column stats and those that are not + checkAnswer(s"select id, name, price, ts, dt from $tableName where attributes.color = 'red' and name='a1'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + // Check WHERE condition that includes both columns with existing column stats and columns of types + // that support column stats but for which column stats do not exist + checkAnswer(s"select id, name, price, ts, dt from $tableName where ts=1000 and name='a1'")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + } + } +} From 23372705171d02070dfd84529916b6b90cffbcbb Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 15:38:57 -0800 Subject: [PATCH 057/112] [HUDI-7170] Implement HFile reader independent of HBase (#10241) This commit adds a Hudi-native HFile reader implementation independent of HBase. --- hudi-common/pom.xml | 14 + .../storage/TestHoodieHFileReaderWriter.java | 45 +- .../storage/TestHoodieReaderWriterUtils.java | 89 +++ hudi-io/README.md | 31 + hudi-io/hfile_format.md | 394 +++++++++++ hudi-io/pom.xml | 126 ++++ .../apache/hudi/common/util/FileIOUtils.java | 0 .../org/apache/hudi/common/util/Option.java | 0 .../util/io/ByteBufferBackedInputStream.java | 0 .../hudi/exception/HoodieException.java | 0 .../hudi/exception/HoodieIOException.java | 0 .../hudi/io/compress/CompressionCodec.java | 44 ++ .../hudi/io/compress/HoodieDecompressor.java | 44 ++ .../compress/HoodieDecompressorFactory.java | 40 ++ .../HoodieAirliftGzipDecompressor.java | 53 ++ .../builtin/HoodieNoneDecompressor.java | 42 ++ .../apache/hudi/io/hfile/BlockIndexEntry.java | 79 +++ .../org/apache/hudi/io/hfile/DataSize.java | 42 ++ .../org/apache/hudi/io/hfile/HFileBlock.java | 216 ++++++ .../hudi/io/hfile/HFileBlockReader.java | 94 +++ .../apache/hudi/io/hfile/HFileBlockType.java | 171 +++++ .../apache/hudi/io/hfile/HFileContext.java | 65 ++ .../org/apache/hudi/io/hfile/HFileCursor.java | 93 +++ .../apache/hudi/io/hfile/HFileDataBlock.java | 134 ++++ .../hudi/io/hfile/HFileFileInfoBlock.java | 62 ++ .../org/apache/hudi/io/hfile/HFileInfo.java | 90 +++ .../apache/hudi/io/hfile/HFileMetaBlock.java | 39 ++ .../org/apache/hudi/io/hfile/HFileReader.java | 127 ++++ .../apache/hudi/io/hfile/HFileReaderImpl.java | 299 ++++++++ .../hudi/io/hfile/HFileRootIndexBlock.java | 77 +++ .../apache/hudi/io/hfile/HFileTrailer.java | 191 ++++++ .../org/apache/hudi/io/hfile/HFileUtils.java | 94 +++ .../java/org/apache/hudi/io/hfile/Key.java | 93 +++ .../org/apache/hudi/io/hfile/KeyValue.java | 100 +++ .../apache/hudi/io/hfile/UTF8StringKey.java | 53 ++ .../java/org/apache/hudi/io/util/IOUtils.java | 252 +++++++ hudi-io/src/main/protobuf/HFile.proto | 53 ++ .../io/compress/TestHoodieDecompressor.java | 106 +++ .../apache/hudi/io/hfile/TestHFileReader.java | 642 ++++++++++++++++++ .../org/apache/hudi/io/util/TestIOUtils.java | 110 +++ ...ase_1_2_3_bootstrap_index_partitions.hfile | Bin .../hudi_0_10_hbase_1_2_3_complex.hfile | Bin .../hfile}/hudi_0_10_hbase_1_2_3_simple.hfile | Bin ...ase_2_4_9_bootstrap_index_partitions.hfile | Bin .../hudi_0_11_hbase_2_4_9_complex.hfile | Bin .../hfile}/hudi_0_11_hbase_2_4_9_simple.hfile | Bin ...ase_1_2_3_bootstrap_index_partitions.hfile | Bin .../hfile}/hudi_0_9_hbase_1_2_3_complex.hfile | Bin .../hfile}/hudi_0_9_hbase_1_2_3_simple.hfile | Bin .../hudi_1_0_hbase_2_4_9_16KB_GZ_20000.hfile | Bin 0 -> 105235 bytes ...base_2_4_9_16KB_GZ_200_20_non_unique.hfile | Bin 0 -> 19476 bytes .../hudi_1_0_hbase_2_4_9_16KB_NONE_5000.hfile | Bin 0 -> 301098 bytes .../hudi_1_0_hbase_2_4_9_512KB_GZ_20000.hfile | Bin 0 -> 101870 bytes .../hudi_1_0_hbase_2_4_9_64KB_NONE_5000.hfile | Bin 0 -> 300065 bytes .../hfile/hudi_1_0_hbase_2_4_9_no_entry.hfile | Bin 0 -> 5087 bytes .../hudi-metaserver-server-bundle/pom.xml | 2 +- pom.xml | 12 +- 57 files changed, 4204 insertions(+), 14 deletions(-) create mode 100644 hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java create mode 100644 hudi-io/README.md create mode 100644 hudi-io/hfile_format.md create mode 100644 hudi-io/pom.xml rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/FileIOUtils.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/Option.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/exception/HoodieException.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/exception/HoodieIOException.java (100%) create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/compress/CompressionCodec.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressor.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressorFactory.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/compress/airlift/HoodieAirliftGzipDecompressor.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/compress/builtin/HoodieNoneDecompressor.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/BlockIndexEntry.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/DataSize.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileRootIndexBlock.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileTrailer.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/KeyValue.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/hfile/UTF8StringKey.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java create mode 100644 hudi-io/src/main/protobuf/HFile.proto create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/compress/TestHoodieDecompressor.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_10_hbase_1_2_3_complex.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_10_hbase_1_2_3_simple.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_11_hbase_2_4_9_complex.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_11_hbase_2_4_9_simple.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_9_hbase_1_2_3_complex.hfile (100%) rename {hudi-common/src/test/resources => hudi-io/src/test/resources/hfile}/hudi_0_9_hbase_1_2_3_simple.hfile (100%) create mode 100644 hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_GZ_20000.hfile create mode 100644 hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_GZ_200_20_non_unique.hfile create mode 100644 hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_NONE_5000.hfile create mode 100644 hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_512KB_GZ_20000.hfile create mode 100644 hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_64KB_NONE_5000.hfile create mode 100644 hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_no_entry.hfile diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 5f59a9fac298..97cdf36d12a5 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -103,6 +103,12 @@ + + org.apache.hudi + hudi-io + ${project.version} + + org.openjdk.jol jol-core @@ -201,6 +207,14 @@ provided + + org.apache.hudi + hudi-io + ${project.version} + tests + test + + org.apache.hudi hudi-tests-common diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index a7de5fe396b6..f7a5a84b344b 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -38,12 +38,14 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.CellComparatorImpl; +import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; import org.mockito.Mockito; @@ -72,6 +74,12 @@ import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; import static org.apache.hudi.common.util.CollectionUtils.toStream; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.apache.hudi.io.hfile.TestHFileReader.BOOTSTRAP_INDEX_HFILE_SUFFIX; +import static org.apache.hudi.io.hfile.TestHFileReader.COMPLEX_SCHEMA_HFILE_SUFFIX; +import static org.apache.hudi.io.hfile.TestHFileReader.KEY_CREATOR; +import static org.apache.hudi.io.hfile.TestHFileReader.SIMPLE_SCHEMA_HFILE_SUFFIX; +import static org.apache.hudi.io.hfile.TestHFileReader.VALUE_CREATOR; +import static org.apache.hudi.io.hfile.TestHFileReader.readHFileFromResources; import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -83,9 +91,6 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase { private static final String DUMMY_BASE_PATH = "dummy_base_path"; // Number of records in HFile fixtures for compatibility tests private static final int NUM_RECORDS_FIXTURE = 50; - private static final String SIMPLE_SCHEMA_HFILE_SUFFIX = "_simple.hfile"; - private static final String COMPLEX_SCHEMA_HFILE_SUFFIX = "_complex.hfile"; - private static final String BOOTSTRAP_INDEX_HFILE_SUFFIX = "_bootstrap_index_partitions.hfile"; @Override protected Path getFilePath() { @@ -402,7 +407,7 @@ public int compare(GenericRecord o1, GenericRecord o2) { @ParameterizedTest @ValueSource(strings = { - "/hudi_0_9_hbase_1_2_3", "/hudi_0_10_hbase_1_2_3", "/hudi_0_11_hbase_2_4_9"}) + "/hfile/hudi_0_9_hbase_1_2_3", "/hfile/hudi_0_10_hbase_1_2_3", "/hfile/hudi_0_11_hbase_2_4_9"}) public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException { // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadPrimitiveRecord() // using different Hudi releases @@ -431,7 +436,8 @@ public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content), hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE); hfileReader = - new HoodieAvroHFileReader(hadoopConf, new Path(DUMMY_BASE_PATH), new CacheConfig(hadoopConf), fs, content, Option.empty()); + new HoodieAvroHFileReader(hadoopConf, new Path(DUMMY_BASE_PATH), new CacheConfig(hadoopConf), fs, content, + Option.empty()); avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithUDT.avsc"); assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords()); verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); @@ -441,6 +447,28 @@ public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException hfilePrefix, false, HFileBootstrapIndex.HoodieKVComparator.class, 4); } + @Disabled("This is used for generating testing HFile only") + @ParameterizedTest + @CsvSource({ + "512,GZ,20000,true", "16,GZ,20000,true", + "64,NONE,5000,true", "16,NONE,5000,true", + "16,GZ,200,false" + }) + void generateHFileForTesting(int blockSizeKB, + String compressionCodec, + int numEntries, + boolean uniqueKeys) throws IOException { + TestHoodieReaderWriterUtils.writeHFileForTesting( + String.format("/tmp/hudi_1_0_hbase_2_4_9_%sKB_%s_%s.hfile", + blockSizeKB, compressionCodec, numEntries), + blockSizeKB * 1024, + Compression.Algorithm.valueOf(compressionCodec), + numEntries, + KEY_CREATOR, + VALUE_CREATOR, + uniqueKeys); + } + private Set getRandomKeys(int count, List keys) { Set rowKeys = new HashSet<>(); int totalKeys = keys.size(); @@ -453,13 +481,6 @@ private Set getRandomKeys(int count, List keys) { return rowKeys; } - private byte[] readHFileFromResources(String filename) throws IOException { - long size = TestHoodieHFileReaderWriter.class - .getResource(filename).openConnection().getContentLength(); - return FileIOUtils.readAsByteArray( - TestHoodieHFileReaderWriter.class.getResourceAsStream(filename), (int) size); - } - private void verifyHFileReader( HFile.Reader reader, String hfileName, boolean mayUseDefaultComparator, Class clazz, int count) { diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java new file mode 100644 index 000000000000..6a5f3cd46b76 --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; +import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.function.Function; + +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.apache.hudi.io.hfile.TestHFileReader.CUSTOM_META_KEY; +import static org.apache.hudi.io.hfile.TestHFileReader.CUSTOM_META_VALUE; +import static org.apache.hudi.io.hfile.TestHFileReader.DUMMY_BLOOM_FILTER; + +/** + * Utils for reader and writer tests. + */ +public class TestHoodieReaderWriterUtils { + static void writeHFileForTesting(String fileLocation, + int blockSize, + Compression.Algorithm compressionAlgo, + int numEntries, + Function keyCreator, + Function valueCreator, + boolean uniqueKeys) throws IOException { + HFileContext context = new HFileContextBuilder() + .withBlockSize(blockSize) + .withCompression(compressionAlgo) + .build(); + Configuration conf = new Configuration(); + CacheConfig cacheConfig = new CacheConfig(conf); + Path filePath = new Path(fileLocation); + FileSystem fs = filePath.getFileSystem(conf); + try (HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig) + .withPath(fs, filePath) + .withFileContext(context) + .create()) { + for (int i = 0; i < numEntries; i++) { + byte[] keyBytes = getUTF8Bytes(keyCreator.apply(i)); + writer.append(new KeyValue(keyBytes, null, null, getUTF8Bytes(valueCreator.apply(i)))); + if (!uniqueKeys) { + for (int j = 0; j < 20; j++) { + writer.append(new KeyValue( + keyBytes, null, null, getUTF8Bytes(valueCreator.apply(i) + "_" + j))); + } + } + } + writer.appendFileInfo(getUTF8Bytes(CUSTOM_META_KEY), getUTF8Bytes(CUSTOM_META_VALUE)); + writer.appendMetaBlock(HoodieAvroHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() { + @Override + public void write(DataOutput out) throws IOException { + out.write(getUTF8Bytes(DUMMY_BLOOM_FILTER)); + } + + @Override + public void readFields(DataInput in) throws IOException { + } + }); + } + } +} diff --git a/hudi-io/README.md b/hudi-io/README.md new file mode 100644 index 000000000000..6235b1738b40 --- /dev/null +++ b/hudi-io/README.md @@ -0,0 +1,31 @@ + + +# `hudi-io` Module + +This module contains classes that are I/O related, including common abstraction and APIs, readers and writers, etc. + +## HFile Reader + +We implement our own HFile reader (`org.apache.hudi.io.hfile.HFileReaderImpl`) that functionally works on reading HBase +HFiles in the Hudi metadata tables, based on the format described below. + +## HFile Format + +Refer to [HFile Format](hfile_format.md) documentation. \ No newline at end of file diff --git a/hudi-io/hfile_format.md b/hudi-io/hfile_format.md new file mode 100644 index 000000000000..192c3d4313f8 --- /dev/null +++ b/hudi-io/hfile_format.md @@ -0,0 +1,394 @@ + + +# HFile Format + +[HFile format](https://hbase.apache.org/book.html#_hfile_format_2) is based on SSTable file format optimized for range +scans/point lookups, originally designed and implemented by [HBase](https://hbase.apache.org/). We use HFile version 3 +as the base file format of the internal metadata table (MDT). Here we describe the HFile format that are relevant to +Hudi, as not all features of HFile are used. + +The HFile is structured as follows: + +``` ++----------+-----------------------+ +| "Scanned | Data Block | +| block" +-----------------------+ +| section | ... | +| +-----------------------+ +| | Data Block | ++----------+-----------------------+ +| "Non- | Meta Block | +| scanned +-----------------------+ +| block" | ... | +| section +-----------------------+ +| | Meta Block | ++----------+-----------------------+ +| "Load- | Root Data Index Block | +| on-open" +-----------------------+ +| section | Meta Index Block | +| +-----------------------+ +| | File Info Block | ++----------+-----------------------+ +| Trailer | Trailer, containing | +| | fields and | +| | HFile Version | ++----------+-----------------------+ +``` + +- **"Scanned block" section**: this section contains all the data in key-value pairs, organized into one or multiple + data + blocks. This section has to be scanned for reading a key-value pair; +- **"Non-scanned block" section**: this section contains meta information, such as bloom filter which is used by Hudi to + store the bloom filter, organized into one or multiple meta blocks. This section can be skipped for reading all + key-value pairs sequentially from the beginning of the file. +- **"Load-on-open" section**: this section contains block index and file info, organized into three blocks: + - **Root Data Index Block**: Index of data blocks in "Scanned block" section, containing the start offset in the + file, size of the block on storage, and the first key of the data block; + - **Meta Index Block**: Index of meta blocks in "Non-scanned block" section, containing the start offset in the + file, size of the block on storage, and the key of the meta block; + - **File Info Block**: HFile information that is useful for scanning the key-value pairs; +- **Trailer**: this section contains the information of all other sections and HFile version for decoding and parsing. + This section is always read first when reading a HFile. + +Next, we describe the block format and each block in details. + +## Block format + +All the blocks except for Trailer share the same format as follows: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | ++ Block Magic + +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| On-disk Size Without Header | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Uncompressed Size Without Header | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | ++ Previous Block Offset + +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Checksum Type | Bytes Per Checksum > ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +> | On-disk Data Size With Header > ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +> | | ++-+-+-+-+-+-+-+-+ + +| | +~ Data ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Checksum ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +Note that one tick mark represents one bit position. +``` + +Header: + +- **Block Magic**: 8 bytes, a sequence of bytes indicating the block type. Supported block types are: + - `DATABLK*`: `DATA` block type for data blocks + - `METABLKc`: `META` block type for meta blocks + - `IDXROOT2`: `ROOT_INDEX` block type for root-level index blocks + - `FILEINF2`: `FILE_INFO` block type for the file info block, a small key-value map of metadata +- **On-disk Size Without Header**: 4 bytes, integer, compressed size of the block's data, not including the header. Can + be used for skipping the current data block when scanning HFile data. +- **Uncompressed Size Without Header**: 4 bytes, integer, uncompressed size of the block's data, not including the + header. This is equal to the compressed size if the compression algorithm is NONE. +- **Previous Block Offset**: 8 bytes, long, file offset of the previous block of the same type. Can be used for seeking + to the previous data/index block. +- **Checksum Type**: 1 byte, type of checksum used. +- **Bytes Per Checksum**: 4 bytes, integer, number of data bytes covered per checksum. +- **On-disk Data Size With Header**: 4 bytes, integer, on disk data size with header. + +Data: + +- **Data**: Compressed data (or uncompressed data if the compression algorithm is NONE). The size is indicated in the + header. The content varies across different types of blocks, which are discussed later in this document. + +Checksum: + +- **Checksum**: checksum of the data. The size of checksums is indicated by the header. + +## Data Block + +The "Data" part of the Data Block consists of one or multiple key-value pairs, with keys sorted in lexicographical +order: + +``` ++--------------------+ +| Key-value Pair 0 | ++--------------------+ +| Key-value Pair 1 | ++--------------------+ +| ... | ++--------------------+ +| Key-value Pair N-1 | ++--------------------+ +``` + +Each key-value pair has the following format: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Key Length | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Value Length | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Key ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Value ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| MVCC Timestamp| ++-+-+-+-+-+-+-+-+ +``` + +Header: + +- **Key Length**: 4 bytes, integer, length of the key part. +- **Value Length**: 4 bytes, integer, lenghth of the value part. + +Key: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Key Content Size | | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +| | +~ Key Content ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Other Information ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +``` + +- **Key Content Size**: 2 byte, short, size of the key content. +- **Key Content**: key content in byte array. In Hudi, we serialize the String into byte array using UTF-8. +- **Other Information**: other information of the key, which is not used by Hudi. + +Value: + +The whole part represents the value in byte array. The size of value is indicated by the header. + +MVCC Timestamp: + +This is used by HBase and written to HFile. For Hudi, this field should always be zero, occupying 1 byte. + +## Meta Block + +The "Data" part of the Meta Block contains the meta information in byte array. The key of the meta block can be found in +the +Meta Index Block. + +## Index Block + +The "Data" part of the Index Block can be empty. When not empty, the "Data" part of Index Block contains one or more +block index entries organized like below: + +``` ++-----------------------+ +| Block Index Entry 0 | ++-----------------------+ +| Block Index Entry 1 | ++-----------------------+ +| ... | ++-----------------------+ +| Block Index Entry N-1 | ++-----------------------+ +``` + +Each block index entry, referencing one relevant Data or Meta Block, has the following format: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | ++ Block Offset + +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Block Size on Disk | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Key Length ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | ++ Key + +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +``` + +- **Block Offset**: 8 bytes, long, the start offset of a data or meta block in the file. +- **Block Size on Disk**: 4 bytes, integer, the on-disk size of the block, so the block can be skipped based on the + size. +- **Key Length**: [variable-length encoded](https://en.wikipedia.org/wiki/Variable-length_quantity) number representing + the length of the "Key" part. + +Key: + +``` ++----------------+-----------+ +| Key Bytes Size | Key Bytes | ++----------------+-----------+ +``` + +For Data Index, the "Key Bytes" part has the following format (same as the key format in the Data Block): + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| Key Content Size | | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +| | +~ Key Content ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Other Information ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +``` + +- **Key Content Size**: 2 byte, short, size of the key content. +- **Key Content**: key content in byte array. In Hudi, we encode the String into bytes using UTF-8. +- **Other Information**: other information of the key, which is not used by Hudi. + +For Meta Index, the "Key Bytes" part is the byte array of the key of the Meta Block. + +## File Info Block + +The "Data" part of the File Info Block has the following format: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| PBUF Magic | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ File Info ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +``` + +- **PBUF Magic**: 4 bytes, magic bytes `PBUF` indicating the block is using Protobuf for serde. +- **File Info**: a small key-value map of metadata serialized in Protobuf. + +Here's the definition of the File Info proto `InfoProto`: + +``` +message BytesBytesPair { + required bytes first = 1; + required bytes second = 2; +} + +message InfoProto { + repeated BytesBytesPair map_entry = 1; +} +``` + +The key and value are represented in byte array. When Hudi adds more key-value metadata entry to the file info, the key +and value are encoded from String into byte array using UTF-8. + +Here are common metadata stored in the File Info Block: + +- `hfile.LASTKEY`: The last key of the file (byte array) +- `hfile.MAX_MEMSTORE_TS_KEY`: Maximum MVCC timestamp of the key-value pairs in the file. In Hudi, this should always be + 0. + +## Trailer + +The HFile Trailer has a fixed size, 4096 bytes. The HFile Trailer has different format compared to other blocks, as +follows: + +``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | ++ Block Magic + +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | +~ Trailer Content ~ +| | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +``` + +- **Block Magic**: 8 bytes, a sequence of bytes indicating the Trailer, i.e., `TRABLK"$`. +- **Trailer Content**: the metadata fields are serialized in Protobuf, defined as follows + +``` +message TrailerProto { + optional uint64 file_info_offset = 1; + optional uint64 load_on_open_data_offset = 2; + optional uint64 uncompressed_data_index_size = 3; + optional uint64 total_uncompressed_bytes = 4; + optional uint32 data_index_count = 5; + optional uint32 meta_index_count = 6; + optional uint64 entry_count = 7; + optional uint32 num_data_index_levels = 8; + optional uint64 first_data_block_offset = 9; + optional uint64 last_data_block_offset = 10; + optional string comparator_class_name = 11; + optional uint32 compression_codec = 12; + optional bytes encryption_key = 13; +} +``` + +Here are the meaning of each field: + +- `file_info_offset`: File info offset +- `load_on_open_data_offset`: The offset of the section ("Load-on-open" section) that we need to load when opening the + file +- `uncompressed_data_index_size`: The total uncompressed size of the whole data block index +- `total_uncompressed_bytes`: Total uncompressed bytes +- `data_index_count`: Number of data index entries +- `meta_index_count`: Number of meta index entries +- `entry_count`: Number of key-value pair entries in the file +- `num_data_index_levels`: The number of levels in the data block index +- `first_data_block_offset`: The offset of the first data block +- `last_data_block_offset`: The offset of the first byte after the last key-value data block +- `comparator_class_name`: Comparator class name (In Hudi, we always assume lexicographical order, so this is ignored) +- `compression_codec`: Compression codec: 0 = LZO, 1 = GZ, 2 = NONE +- `encryption_key`: Encryption key (not used by Hudi) + +The last 4 bytes of the Trailer content contain the HFile version: the number represented by the first byte indicates +the minor version, and the number represented by the last three bytes indicates the major version. In the case of Hudi, +the major version should always be 3, if written by HBase HFile writer. diff --git a/hudi-io/pom.xml b/hudi-io/pom.xml new file mode 100644 index 000000000000..7123278fa23c --- /dev/null +++ b/hudi-io/pom.xml @@ -0,0 +1,126 @@ + + + + + hudi + org.apache.hudi + 0.15.0-SNAPSHOT + + 4.0.0 + + hudi-io + + + ${project.parent.basedir} + 0.6.1 + 1.5.0.Final + + + + + + src/main/resources + + + + + + kr.motd.maven + os-maven-plugin + ${os.maven.version} + + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + ${protobuf.plugin.version} + + + com.google.protobuf:protoc:${protoc.version}:exe:${os.detected.classifier} + + ${basedir}/src/main/protobuf/ + false + true + + + + compile-protoc + generate-sources + + compile + + + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar-plugin.version} + + + + test-jar + + test-compile + + + + false + + + + org.apache.rat + apache-rat-plugin + + + org.jacoco + jacoco-maven-plugin + + + + + + + com.google.protobuf + protobuf-java + + + + io.airlift + aircompressor + + + + org.apache.hadoop + hadoop-common + provided + + + + org.apache.hudi + hudi-tests-common + ${project.version} + test + + + diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/FileIOUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/FileIOUtils.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java b/hudi-io/src/main/java/org/apache/hudi/common/util/Option.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/Option.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/Option.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java b/hudi-io/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieException.java b/hudi-io/src/main/java/org/apache/hudi/exception/HoodieException.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/exception/HoodieException.java rename to hudi-io/src/main/java/org/apache/hudi/exception/HoodieException.java diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/HoodieIOException.java b/hudi-io/src/main/java/org/apache/hudi/exception/HoodieIOException.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/exception/HoodieIOException.java rename to hudi-io/src/main/java/org/apache/hudi/exception/HoodieIOException.java diff --git a/hudi-io/src/main/java/org/apache/hudi/io/compress/CompressionCodec.java b/hudi-io/src/main/java/org/apache/hudi/io/compress/CompressionCodec.java new file mode 100644 index 000000000000..d9c933cdc08e --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/compress/CompressionCodec.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.compress; + +/** + * Available compression codecs. + * There should not be any assumption on the ordering or ordinal of the defined enums. + */ +public enum CompressionCodec { + NONE("none"), + BZIP2("bz2"), + GZIP("gz"), + LZ4("lz4"), + LZO("lzo"), + SNAPPY("snappy"), + ZSTD("zstd"); + + private final String name; + + CompressionCodec(final String name) { + this.name = name; + } + + public String getName() { + return name; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressor.java b/hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressor.java new file mode 100644 index 000000000000..62be27470039 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressor.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.compress; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Provides decompression on input data. + */ +public interface HoodieDecompressor { + /** + * Decompresses the data from {@link InputStream} and writes the decompressed data to the target + * byte array. + * + * @param compressedInput compressed data in {@link InputStream}. + * @param targetByteArray target byte array to store the decompressed data. + * @param offset offset in the target byte array to start to write data. + * @param length maximum amount of decompressed data to write. + * @return size of bytes read. + * @throws IOException upon error. + */ + int decompress(InputStream compressedInput, + byte[] targetByteArray, + int offset, + int length) throws IOException; +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressorFactory.java b/hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressorFactory.java new file mode 100644 index 000000000000..af50b0940799 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/compress/HoodieDecompressorFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.compress; + +import org.apache.hudi.io.compress.airlift.HoodieAirliftGzipDecompressor; +import org.apache.hudi.io.compress.builtin.HoodieNoneDecompressor; + +/** + * Factory for {@link HoodieDecompressor}. + */ +public class HoodieDecompressorFactory { + public static HoodieDecompressor getDecompressor(CompressionCodec compressionCodec) { + switch (compressionCodec) { + case NONE: + return new HoodieNoneDecompressor(); + case GZIP: + return new HoodieAirliftGzipDecompressor(); + default: + throw new IllegalArgumentException( + "The decompression is not supported for compression codec: " + compressionCodec); + } + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/compress/airlift/HoodieAirliftGzipDecompressor.java b/hudi-io/src/main/java/org/apache/hudi/io/compress/airlift/HoodieAirliftGzipDecompressor.java new file mode 100644 index 000000000000..15c2ff3f8271 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/compress/airlift/HoodieAirliftGzipDecompressor.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.compress.airlift; + +import org.apache.hudi.io.compress.CompressionCodec; +import org.apache.hudi.io.compress.HoodieDecompressor; + +import io.airlift.compress.gzip.JdkGzipHadoopStreams; +import io.airlift.compress.hadoop.HadoopInputStream; + +import java.io.IOException; +import java.io.InputStream; + +import static org.apache.hudi.io.util.IOUtils.readFully; + +/** + * Implementation of {@link HoodieDecompressor} for {@link CompressionCodec#GZIP} compression + * codec using airlift aircompressor's GZIP decompressor. + */ +public class HoodieAirliftGzipDecompressor implements HoodieDecompressor { + private final JdkGzipHadoopStreams gzipStreams; + + public HoodieAirliftGzipDecompressor() { + gzipStreams = new JdkGzipHadoopStreams(); + } + + @Override + public int decompress(InputStream compressedInput, + byte[] targetByteArray, + int offset, + int length) throws IOException { + try (HadoopInputStream stream = gzipStreams.createInputStream(compressedInput)) { + return readFully(stream, targetByteArray, offset, length); + } + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/compress/builtin/HoodieNoneDecompressor.java b/hudi-io/src/main/java/org/apache/hudi/io/compress/builtin/HoodieNoneDecompressor.java new file mode 100644 index 000000000000..d702201c6ddd --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/compress/builtin/HoodieNoneDecompressor.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.compress.builtin; + +import org.apache.hudi.io.compress.CompressionCodec; +import org.apache.hudi.io.compress.HoodieDecompressor; + +import java.io.IOException; +import java.io.InputStream; + +import static org.apache.hudi.io.util.IOUtils.readFully; + +/** + * Implementation of {@link HoodieDecompressor} for {@link CompressionCodec#NONE} compression + * codec (no compression) by directly reading the input stream. + */ +public class HoodieNoneDecompressor implements HoodieDecompressor { + @Override + public int decompress(InputStream compressedInput, + byte[] targetByteArray, + int offset, + int length) throws IOException { + return readFully(compressedInput, targetByteArray, offset, length); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/BlockIndexEntry.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/BlockIndexEntry.java new file mode 100644 index 000000000000..635b2fad6f56 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/BlockIndexEntry.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; + +/** + * Represents the index entry of a data block in the Data Index stored in the + * {@link HFileBlockType#ROOT_INDEX} block. + *

+ * This is completely in-memory representation and does not involve byte parsing. + *

+ * When comparing two {@link BlockIndexEntry} instances, the underlying bytes of the keys + * are compared in lexicographical order. + */ +public class BlockIndexEntry implements Comparable { + private final Key firstKey; + private final Option nextBlockFirstKey; + private final long offset; + private final int size; + + public BlockIndexEntry(Key firstKey, Option nextBlockFirstKey, + long offset, + int size) { + this.firstKey = firstKey; + this.nextBlockFirstKey = nextBlockFirstKey; + this.offset = offset; + this.size = size; + } + + public Key getFirstKey() { + return firstKey; + } + + public Option getNextBlockFirstKey() { + return nextBlockFirstKey; + } + + public long getOffset() { + return offset; + } + + public int getSize() { + return size; + } + + @Override + public int compareTo(BlockIndexEntry o) { + return firstKey.compareTo(o.getFirstKey()); + } + + @Override + public String toString() { + return "BlockIndexEntry{firstKey=" + + firstKey.toString() + + ", offset=" + + offset + + ", size=" + + size + + "}"; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/DataSize.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/DataSize.java new file mode 100644 index 000000000000..356180c09157 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/DataSize.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +/** + * Sizes of different primitive data structures used by HFile. + */ +public class DataSize { + // Size of boolean in bytes + public static final int SIZEOF_BOOLEAN = 1; + + // Size of byte in bytes + public static final int SIZEOF_BYTE = 1; + + // Size of int (int32) in bytes + public static final int SIZEOF_INT32 = 4; + + // Size of short (int16) in bytes + public static final int SIZEOF_INT16 = 2; + + // Size of long (int64) in bytes + public static final int SIZEOF_INT64 = 8; + + public static final int MAGIC_LENGTH = 8; +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java new file mode 100644 index 000000000000..8ad2bf4b97c5 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlock.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.compress.CompressionCodec; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH; +import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE; +import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32; +import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64; +import static org.apache.hudi.io.util.IOUtils.readInt; + +/** + * Represents a block in a HFile. The types of blocks are defined in {@link HFileBlockType}. + */ +public abstract class HFileBlock { + // The HFile block header size without checksum + public static final int HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM = + MAGIC_LENGTH + 2 * SIZEOF_INT32 + SIZEOF_INT64; + // The HFile block header size with checksum + // There is a 1 byte checksum type, followed by a 4 byte bytesPerChecksum + // followed by another 4 byte value to store sizeofDataOnDisk. + public static final int HFILEBLOCK_HEADER_SIZE = + HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + SIZEOF_BYTE + 2 * SIZEOF_INT32; + + // Each checksum value is an integer that can be stored in 4 bytes. + static final int CHECKSUM_SIZE = SIZEOF_INT32; + + static class Header { + // Format of header is: + // 8 bytes - block magic + // 4 bytes int - onDiskSizeWithoutHeader + // 4 bytes int - uncompressedSizeWithoutHeader + // 8 bytes long - prevBlockOffset + // The following 3 are only present if header contains checksum information + // (which are present for HFile version 3) + // 1 byte - checksum type + // 4 byte int - bytes per checksum + // 4 byte int - onDiskDataSizeWithHeader + static int BLOCK_MAGIC_INDEX = 0; + static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8; + static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12; + static int PREV_BLOCK_OFFSET_INDEX = 16; + static int CHECKSUM_TYPE_INDEX = 24; + static int BYTES_PER_CHECKSUM_INDEX = 25; + static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29; + } + + protected final HFileContext context; + protected final byte[] byteBuff; + protected final int startOffsetInBuff; + protected final int sizeCheckSum; + protected final int uncompressedEndOffset; + private final HFileBlockType blockType; + protected final int onDiskSizeWithoutHeader; + protected final int uncompressedSizeWithoutHeader; + protected final int bytesPerChecksum; + private boolean isUnpacked = false; + protected byte[] compressedByteBuff; + protected int startOffsetInCompressedBuff; + + protected HFileBlock(HFileContext context, + HFileBlockType blockType, + byte[] byteBuff, + int startOffsetInBuff) { + this.context = context; + this.blockType = blockType; + this.onDiskSizeWithoutHeader = readInt( + byteBuff, startOffsetInBuff + Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + this.uncompressedSizeWithoutHeader = readInt( + byteBuff, startOffsetInBuff + Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX); + this.bytesPerChecksum = readInt( + byteBuff, startOffsetInBuff + Header.BYTES_PER_CHECKSUM_INDEX); + this.sizeCheckSum = numChecksumBytes(getOnDiskSizeWithHeader(), bytesPerChecksum); + if (CompressionCodec.NONE.equals(context.getCompressionCodec())) { + isUnpacked = true; + this.startOffsetInBuff = startOffsetInBuff; + this.byteBuff = byteBuff; + } else { + this.startOffsetInCompressedBuff = startOffsetInBuff; + this.compressedByteBuff = byteBuff; + this.startOffsetInBuff = 0; + this.byteBuff = allocateBufferForUnpacking(); + } + this.uncompressedEndOffset = + this.startOffsetInBuff + HFILEBLOCK_HEADER_SIZE + uncompressedSizeWithoutHeader; + } + + /** + * Parses the HFile block header and returns the {@link HFileBlock} instance based on the input. + * + * @param context HFile context. + * @param byteBuff input data. + * @param startOffsetInBuff offset to start parsing. + * @return the {@link HFileBlock} instance based on the input. + * @throws IOException if the block cannot be parsed. + */ + public static HFileBlock parse(HFileContext context, byte[] byteBuff, int startOffsetInBuff) + throws IOException { + HFileBlockType blockType = HFileBlockType.parse(byteBuff, startOffsetInBuff); + switch (blockType) { + case ROOT_INDEX: + return new HFileRootIndexBlock(context, byteBuff, startOffsetInBuff); + case FILE_INFO: + return new HFileFileInfoBlock(context, byteBuff, startOffsetInBuff); + case DATA: + return new HFileDataBlock(context, byteBuff, startOffsetInBuff); + case META: + return new HFileMetaBlock(context, byteBuff, startOffsetInBuff); + default: + throw new IOException( + "Parsing of the HFile block type " + blockType + " is not supported"); + } + } + + /** + * Returns the number of bytes needed to store the checksums based on data size. + * + * @param numBytes number of bytes of data. + * @param bytesPerChecksum number of bytes covered by one checksum. + * @return the number of bytes needed to store the checksum values. + */ + static int numChecksumBytes(long numBytes, int bytesPerChecksum) { + return numChecksumChunks(numBytes, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE; + } + + /** + * Returns the number of checksum chunks needed to store the checksums based on data size. + * + * @param numBytes number of bytes of data. + * @param bytesPerChecksum number of bytes in a checksum chunk. + * @return the number of checksum chunks. + */ + static int numChecksumChunks(long numBytes, int bytesPerChecksum) { + long numChunks = numBytes / bytesPerChecksum; + if (numBytes % bytesPerChecksum != 0) { + numChunks++; + } + if (numChunks > Integer.MAX_VALUE / HFileBlock.CHECKSUM_SIZE) { + throw new IllegalArgumentException("The number of chunks is too large: " + numChunks); + } + return (int) numChunks; + } + + public HFileBlockType getBlockType() { + return blockType; + } + + public byte[] getByteBuff() { + return byteBuff; + } + + public int getOnDiskSizeWithHeader() { + return onDiskSizeWithoutHeader + HFILEBLOCK_HEADER_SIZE; + } + + /** + * Decodes and decompresses the block content if the block content is compressed. + *

+ * This must be called for an encoded and compressed block before any reads. + * + * @throws IOException upon decoding and decompression error. + */ + public void unpack() throws IOException { + if (!isUnpacked) { + // Should only be called for compressed blocks + CompressionCodec compression = context.getCompressionCodec(); + if (compression != CompressionCodec.NONE) { + // Copy the block header which is not compressed + System.arraycopy( + compressedByteBuff, startOffsetInCompressedBuff, byteBuff, 0, HFILEBLOCK_HEADER_SIZE); + try (InputStream byteBuffInputStream = new ByteArrayInputStream( + compressedByteBuff, startOffsetInCompressedBuff + HFILEBLOCK_HEADER_SIZE, onDiskSizeWithoutHeader)) { + context.getDecompressor().decompress( + byteBuffInputStream, + byteBuff, + HFILEBLOCK_HEADER_SIZE, + byteBuff.length - HFILEBLOCK_HEADER_SIZE); + } + } + isUnpacked = true; + } + } + + /** + * Allocates new byte buffer for the uncompressed bytes. + * + * @return a new byte array based on the size of uncompressed data, holding the same header + * bytes. + */ + protected byte[] allocateBufferForUnpacking() { + int capacity = HFILEBLOCK_HEADER_SIZE + uncompressedSizeWithoutHeader + sizeCheckSum; + return new byte[capacity]; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java new file mode 100644 index 000000000000..bcc1afb64cea --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockReader.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hadoop.fs.FSDataInputStream; + +import java.io.EOFException; +import java.io.IOException; + +/** + * A reader to read one or more HFile blocks based on the start and end offsets. + */ +public class HFileBlockReader { + private final HFileContext context; + private final long streamStartOffset; + private final FSDataInputStream stream; + private final byte[] byteBuff; + private int offset; + private boolean isReadFully = false; + + /** + * Instantiates the {@link HFileBlockReader}. + * + * @param context HFile context. + * @param stream input data. + * @param startOffset start offset to read from. + * @param endOffset end offset to stop at. + */ + public HFileBlockReader(HFileContext context, + FSDataInputStream stream, + long startOffset, + long endOffset) { + this.context = context; + this.stream = stream; + this.streamStartOffset = startOffset; + this.offset = 0; + long length = endOffset - startOffset; + if (length >= 0 && length <= Integer.MAX_VALUE) { + this.byteBuff = new byte[(int) length]; + } else { + throw new IllegalArgumentException( + "The range of bytes is too large or invalid: [" + + startOffset + ", " + endOffset + "], length=" + length); + } + } + + /** + * Reads the next block based on the expected block type. + * + * @param expectedBlockType expected block type. + * @return {@link HFileBlock} instance matching the expected block type. + * @throws IOException if the type of next block does not match the expected type. + */ + public HFileBlock nextBlock(HFileBlockType expectedBlockType) throws IOException { + if (offset >= byteBuff.length) { + throw new EOFException("No more data to read"); + } + + if (!isReadFully) { + // Full range of bytes are read fully into a byte array + stream.seek(streamStartOffset); + stream.readFully(byteBuff); + isReadFully = true; + } + + HFileBlock block = HFileBlock.parse(context, byteBuff, offset); + block.unpack(); + + if (block.getBlockType() != expectedBlockType) { + throw new IOException("Unexpected block type: " + block.getBlockType() + + "; expecting " + expectedBlockType); + } + + offset += block.getOnDiskSizeWithHeader(); + return block; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java new file mode 100644 index 000000000000..72a0ecec78bc --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileBlockType.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.util.IOUtils; + +import java.io.DataInputStream; +import java.io.IOException; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH; + +/** + * Represents the HFile block type. + * These types are copied from HBase HFile definition to maintain compatibility. + * Do not delete or reorder the enums as the ordinal is used as the block type ID. + */ +public enum HFileBlockType { + /** + * Data block + */ + DATA("DATABLK*", BlockCategory.DATA), + + /** + * An encoded data block (e.g. with prefix compression), version 2 + */ + ENCODED_DATA("DATABLKE", BlockCategory.DATA) { + @Override + public int getId() { + return DATA.ordinal(); + } + }, + + /** + * Version 2 leaf index block. Appears in the data block section + */ + LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX), + + /** + * Bloom filter block, version 2 + */ + BLOOM_CHUNK("BLMFBLK2", BlockCategory.BLOOM), + + // Non-scanned block section: these blocks may be skipped for sequential reads. + + /** + * Meta blocks + */ + META("METABLKc", BlockCategory.META), + + /** + * Intermediate-level version 2 index in the non-data block section + */ + INTERMEDIATE_INDEX("IDXINTE2", BlockCategory.INDEX), + + // Load-on-open section: these blocks must be read upon HFile opening to understand + // the file structure. + + /** + * Root index block, also used for the single-level meta index, version 2 + */ + ROOT_INDEX("IDXROOT2", BlockCategory.INDEX), + + /** + * File info, version 2 + */ + FILE_INFO("FILEINF2", BlockCategory.META), + + /** + * General Bloom filter metadata, version 2 + */ + GENERAL_BLOOM_META("BLMFMET2", BlockCategory.BLOOM), + + /** + * Delete Family Bloom filter metadata, version 2 + */ + DELETE_FAMILY_BLOOM_META("DFBLMET2", BlockCategory.BLOOM), + + // Trailer + + /** + * Fixed file trailer, both versions (always just a magic string) + */ + TRAILER("TRABLK\"$", BlockCategory.META), + + // Legacy blocks + + /** + * Block index magic string in version 1 + */ + INDEX_V1("IDXBLK)+", BlockCategory.INDEX); + + public enum BlockCategory { + DATA, META, INDEX, BLOOM, ALL_CATEGORIES, UNKNOWN; + } + + private final byte[] magic; + private final BlockCategory metricCat; + + HFileBlockType(String magicStr, BlockCategory metricCat) { + magic = magicStr.getBytes(UTF_8); + this.metricCat = metricCat; + assert magic.length == MAGIC_LENGTH; + } + + /** + * Parses the block type from the block magic. + * + * @param buf input data. + * @param offset offset to start reading. + * @return the block type. + * @throws IOException if the block magic is invalid. + */ + public static HFileBlockType parse(byte[] buf, int offset) + throws IOException { + for (HFileBlockType blockType : values()) { + if (IOUtils.compareTo( + blockType.magic, 0, MAGIC_LENGTH, buf, offset, MAGIC_LENGTH) == 0) { + return blockType; + } + } + + throw new IOException("Invalid HFile block magic: " + + IOUtils.bytesToString(buf, offset, MAGIC_LENGTH)); + } + + /** + * Uses this instead of {@link #ordinal()}. They work exactly the same, except + * DATA and ENCODED_DATA get the same id using this method (overridden for + * {@link #ENCODED_DATA}). + * + * @return block type id from 0 to the number of block types - 1. + */ + public int getId() { + // Default implementation, can be overridden for individual enum members. + return ordinal(); + } + + /** + * Reads a magic record of the length {@link DataSize#MAGIC_LENGTH} from the given + * stream and expects it to match this block type. + * + * @param in input data. + * @throws IOException when the magic is invalid. + */ + public void readAndCheckMagic(DataInputStream in) throws IOException { + byte[] buf = new byte[MAGIC_LENGTH]; + in.readFully(buf); + if (IOUtils.compareTo(buf, magic) != 0) { + throw new IOException("Invalid magic: expected " + + new String(magic) + ", got " + new String(buf)); + } + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java new file mode 100644 index 000000000000..d47daef30eca --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileContext.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.compress.CompressionCodec; +import org.apache.hudi.io.compress.HoodieDecompressor; +import org.apache.hudi.io.compress.HoodieDecompressorFactory; + +/** + * The context of HFile that contains information of the blocks. + */ +public class HFileContext { + private final CompressionCodec compressionCodec; + private final HoodieDecompressor decompressor; + + private HFileContext(CompressionCodec compressionCodec) { + this.compressionCodec = compressionCodec; + this.decompressor = HoodieDecompressorFactory.getDecompressor(compressionCodec); + } + + CompressionCodec getCompressionCodec() { + return compressionCodec; + } + + HoodieDecompressor getDecompressor() { + return decompressor; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private CompressionCodec compressionCodec = CompressionCodec.NONE; + + public Builder() { + } + + public Builder compressionCodec(CompressionCodec compressionCodec) { + this.compressionCodec = compressionCodec; + return this; + } + + public HFileContext build() { + return new HFileContext(compressionCodec); + } + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java new file mode 100644 index 000000000000..100ae4b5ce5b --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; + +/** + * Stores the current position and {@link KeyValue} at the position in the HFile. + * The same instance is used as a position cursor during HFile reading. + * The {@link KeyValue} can be lazily read and cached. + */ +public class HFileCursor { + private static final int INVALID_POSITION = -1; + + private int offset; + private Option keyValue; + private boolean eof; + + public HFileCursor() { + this.offset = INVALID_POSITION; + this.keyValue = Option.empty(); + this.eof = false; + } + + public boolean isSeeked() { + return offset != INVALID_POSITION || eof; + } + + public boolean isValid() { + return !(offset == INVALID_POSITION || eof); + } + + public int getOffset() { + return offset; + } + + public Option getKeyValue() { + return keyValue; + } + + public void set(int offset, KeyValue keyValue) { + this.offset = offset; + this.keyValue = Option.of(keyValue); + } + + public void setOffset(int offset) { + this.offset = offset; + this.keyValue = Option.empty(); + } + + public void setKeyValue(KeyValue keyValue) { + this.keyValue = Option.of(keyValue); + } + + public void setEof() { + this.eof = true; + } + + public void unsetEof() { + this.eof = false; + } + + public void increment(long incr) { + this.offset += incr; + this.keyValue = Option.empty(); + } + + @Override + public String toString() { + return "HFilePosition{offset=" + + offset + + ", keyValue=" + + keyValue.toString() + + "}"; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java new file mode 100644 index 000000000000..8722d7cbeb4c --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileDataBlock.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; + +import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_FOUND; +import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_IN_RANGE; +import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET; + +/** + * Represents a {@link HFileBlockType#DATA} block. + */ +public class HFileDataBlock extends HFileBlock { + // Hudi does not use HFile MVCC timestamp version so the version + // is always 0, thus the byte length of the version is always 1. + // This assumption is also validated when parsing {@link HFileInfo}, + // i.e., the maximum MVCC timestamp in a HFile must be 0. + private static final long ZERO_TS_VERSION_BYTE_LENGTH = 1; + + // End offset of content in the block, relative to the start of the start of the block + protected final int uncompressedContentEndRelativeOffset; + + protected HFileDataBlock(HFileContext context, + byte[] byteBuff, + int startOffsetInBuff) { + super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff); + + this.uncompressedContentEndRelativeOffset = + this.uncompressedEndOffset - this.sizeCheckSum - this.startOffsetInBuff; + } + + /** + * Seeks to the key to look up. The key may not have an exact match. + * + * @param cursor {@link HFileCursor} containing the current position relative + * to the beginning of the HFile (not the block start offset). + * @param key key to look up. + * @param blockStartOffsetInFile the start offset of the block relative to the beginning of the + * HFile. + * @return 0 if the block contains the exact same key as the lookup key, and the cursor points + * to the key; or 1 if the lookup key does not exist, and the cursor points to the + * lexicographically largest key that is smaller than the lookup key. + */ + public int seekTo(HFileCursor cursor, Key key, int blockStartOffsetInFile) { + int relativeOffset = cursor.getOffset() - blockStartOffsetInFile; + int lastRelativeOffset = relativeOffset; + Option lastKeyValue = cursor.getKeyValue(); + while (relativeOffset < uncompressedContentEndRelativeOffset) { + // Full length is not known yet until parsing + KeyValue kv = readKeyValue(relativeOffset); + int comp = kv.getKey().compareTo(key); + if (comp == 0) { + // The lookup key equals the key `relativeOffset` points to; the key is found. + // Set the cursor to the current offset that points to the exact match + cursor.set(relativeOffset + blockStartOffsetInFile, kv); + return SEEK_TO_FOUND; + } else if (comp > 0) { + // There is no matched key (otherwise, the method should already stop there and return 0) + // and the key `relativeOffset` points to is already greater than the lookup key. + // So set the cursor to the previous offset, pointing the greatest key in the file that is + // less than the lookup key. + if (lastKeyValue.isPresent()) { + // If the key-value pair is already, cache it + cursor.set(lastRelativeOffset + blockStartOffsetInFile, lastKeyValue.get()); + } else { + // Otherwise, defer the read till it's needed + cursor.setOffset(lastRelativeOffset + blockStartOffsetInFile); + } + return SEEK_TO_IN_RANGE; + } + long increment = + (long) KEY_OFFSET + (long) kv.getKeyLength() + (long) kv.getValueLength() + + ZERO_TS_VERSION_BYTE_LENGTH; + lastRelativeOffset = relativeOffset; + relativeOffset += increment; + lastKeyValue = Option.of(kv); + } + // We reach the end of the block. Set the cursor to the offset of last key. + // In this case, the lookup key is greater than the last key. + if (lastKeyValue.isPresent()) { + cursor.set(lastRelativeOffset + blockStartOffsetInFile, lastKeyValue.get()); + } else { + cursor.setOffset(lastRelativeOffset + blockStartOffsetInFile); + } + return SEEK_TO_IN_RANGE; + } + + /** + * Reads the key value at the offset. + * + * @param offset offset to read relative to the start of {@code byteBuff}. + * @return the {@link KeyValue} instance. + */ + public KeyValue readKeyValue(int offset) { + return new KeyValue(byteBuff, offset); + } + + /** + * Moves the cursor to next {@link KeyValue}. + * + * @param cursor {@link HFileCursor} instance containing the current position. + * @param blockStartOffsetInFile the start offset of the block relative to the beginning of the + * HFile. + * @return {@code true} if there is next {@link KeyValue}; {code false} otherwise. + */ + public boolean next(HFileCursor cursor, int blockStartOffsetInFile) { + int offset = cursor.getOffset() - blockStartOffsetInFile; + Option keyValue = cursor.getKeyValue(); + if (!keyValue.isPresent()) { + keyValue = Option.of(readKeyValue(offset)); + } + cursor.increment((long) KEY_OFFSET + (long) keyValue.get().getKeyLength() + + (long) keyValue.get().getValueLength() + ZERO_TS_VERSION_BYTE_LENGTH); + return cursor.getOffset() - blockStartOffsetInFile < uncompressedContentEndRelativeOffset; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java new file mode 100644 index 000000000000..7b3518bd2b27 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.hfile.protobuf.generated.HFileProtos; +import org.apache.hudi.io.util.IOUtils; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Represents a {@link HFileBlockType#FILE_INFO} block. + */ +public class HFileFileInfoBlock extends HFileBlock { + // Magic we put ahead of a serialized protobuf message + public static final byte[] PB_MAGIC = new byte[] {'P', 'B', 'U', 'F'}; + + public HFileFileInfoBlock(HFileContext context, + byte[] byteBuff, + int startOffsetInBuff) { + super(context, HFileBlockType.FILE_INFO, byteBuff, startOffsetInBuff); + } + + public HFileInfo readFileInfo() throws IOException { + int pbMagicLength = PB_MAGIC.length; + if (IOUtils.compareTo(PB_MAGIC, 0, pbMagicLength, + byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength) != 0) { + throw new IOException( + "Unexpected Protobuf magic at the beginning of the HFileFileInfoBlock: " + + new String(byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength)); + } + ByteArrayInputStream inputStream = new ByteArrayInputStream( + byteBuff, + startOffsetInBuff + HFILEBLOCK_HEADER_SIZE + pbMagicLength, uncompressedSizeWithoutHeader); + Map fileInfoMap = new HashMap<>(); + HFileProtos.InfoProto infoProto = HFileProtos.InfoProto.parseDelimitedFrom(inputStream); + for (HFileProtos.BytesBytesPair pair : infoProto.getMapEntryList()) { + fileInfoMap.put( + new UTF8StringKey(pair.getFirst().toByteArray()), pair.getSecond().toByteArray()); + } + return new HFileInfo(fileInfoMap); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java new file mode 100644 index 000000000000..adc7c3129368 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileInfo.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.io.util.IOUtils; + +import java.util.Map; + +/** + * Represents the HFile info read from {@link HFileBlockType#FILE_INFO} block. + */ +public class HFileInfo { + private static final String RESERVED_PREFIX = "hfile."; + private static final UTF8StringKey LAST_KEY = + new UTF8StringKey(RESERVED_PREFIX + "LASTKEY"); + private static final UTF8StringKey FILE_CREATION_TIME_TS = + new UTF8StringKey(RESERVED_PREFIX + "CREATE_TIME_TS"); + private static final UTF8StringKey KEY_VALUE_VERSION = + new UTF8StringKey("KEY_VALUE_VERSION"); + private static final UTF8StringKey MAX_MVCC_TS_KEY = + new UTF8StringKey("MAX_MEMSTORE_TS_KEY"); + + private static final int KEY_VALUE_VERSION_WITH_MVCC_TS = 1; + + private final Map infoMap; + private final long fileCreationTime; + private final Option lastKey; + private final long maxMvccTs; + private final boolean containsMvccTs; + + public HFileInfo(Map infoMap) { + this.infoMap = infoMap; + this.fileCreationTime = parseFileCreationTime(); + this.lastKey = parseLastKey(); + this.maxMvccTs = parseMaxMvccTs(); + this.containsMvccTs = maxMvccTs > 0; + if (containsMvccTs) { + // The HFile written by Hudi does not contain MVCC timestamps. + // Parsing MVCC timestamps is not supported. + throw new UnsupportedOperationException("HFiles with MVCC timestamps are not supported"); + } + } + + public long getFileCreationTime() { + return fileCreationTime; + } + + public Option getLastKey() { + return lastKey; + } + + public byte[] get(UTF8StringKey key) { + return infoMap.get(key); + } + + private long parseFileCreationTime() { + byte[] bytes = infoMap.get(FILE_CREATION_TIME_TS); + return bytes != null ? IOUtils.readLong(bytes, 0) : 0; + } + + private Option parseLastKey() { + byte[] bytes = infoMap.get(LAST_KEY); + return bytes != null ? Option.of(new Key(bytes)) : Option.empty(); + } + + private long parseMaxMvccTs() { + byte[] bytes = infoMap.get(KEY_VALUE_VERSION); + boolean supportsMvccTs = bytes != null + && IOUtils.readInt(bytes, 0) == KEY_VALUE_VERSION_WITH_MVCC_TS; + return supportsMvccTs ? IOUtils.readLong(infoMap.get(MAX_MVCC_TS_KEY), 0) : 0; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java new file mode 100644 index 000000000000..67ab09638244 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileMetaBlock.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import java.nio.ByteBuffer; + +/** + * Represents a {@link HFileBlockType#META} block. + */ +public class HFileMetaBlock extends HFileBlock { + protected HFileMetaBlock(HFileContext context, + byte[] byteBuff, + int startOffsetInBuff) { + super(context, HFileBlockType.META, byteBuff, startOffsetInBuff); + } + + public ByteBuffer readContent() { + return ByteBuffer.wrap( + getByteBuff(), + startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, uncompressedSizeWithoutHeader); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java new file mode 100644 index 000000000000..fcc3be558660 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReader.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * HFile reader that supports seeks. + */ +public interface HFileReader extends Closeable { + // Return code of seekTo(Key) + // When the lookup key is less than the first key of the file + // The cursor points to the first key of the file + int SEEK_TO_BEFORE_FIRST_KEY = -1; + // When the lookup key is found in the file + // The cursor points to the matched key in the file + int SEEK_TO_FOUND = 0; + // When the lookup key is not found, but it's in the range of the file + // The cursor points to the greatest key that is less than the lookup key + int SEEK_TO_IN_RANGE = 1; + // When the lookup key is greater than the last key of the file, EOF is reached + // The cursor points to EOF + int SEEK_TO_EOF = 2; + + /** + * Initializes metadata based on a HFile before other read operations. + * + * @throws IOException upon read errors. + */ + void initializeMetadata() throws IOException; + + /** + * Gets info entry from file info block of a HFile. + * + * @param key meta key. + * @return the content in bytes if present. + * @throws IOException upon read errors. + */ + Option getMetaInfo(UTF8StringKey key) throws IOException; + + /** + * Gets the content of a meta block from HFile. + * + * @param metaBlockName meta block name. + * @return the content in bytes if present. + * @throws IOException upon read errors. + */ + Option getMetaBlock(String metaBlockName) throws IOException; + + /** + * @return total number of key value entries in the HFile. + */ + long getNumKeyValueEntries(); + + /** + * seekTo or just before the passed {@link Key}. Examine the return code to figure whether we + * found the key or not. Consider the key-value pairs in the file, + * kv[0] .. kv[n-1], where there are n KV pairs in the file. + *

+ * The position only moves forward so the caller has to make sure the keys are sorted before + * making multiple calls of this method. + *

+ * + * @param key {@link Key} to seek to. + * @return -1, if key < kv[0], no position; + * 0, such that kv[i].key = key and the reader is left in position i; and + * 1, such that kv[i].key < key if there is no exact match, and the reader is left in + * position i. + * The reader will position itself between kv[i] and kv[i+1] where + * kv[i].key < key <= kv[i+1].key; + * 2, if there is no KV greater than or equal to the input key, and the reader positions + * itself at the end of the file and next() will return {@code false} when it is called. + * @throws IOException upon read errors. + */ + int seekTo(Key key) throws IOException; + + /** + * Positions this reader at the start of the file. + * + * @return {@code false} if empty file; i.e. a call to next would return false and + * the current key and value are undefined. + * @throws IOException upon read errors. + */ + boolean seekTo() throws IOException; + + /** + * Scans to the next entry in the file. + * + * @return {@code false} if the current position is at the end; + * otherwise {@code true} if more in file. + * @throws IOException upon read errors. + */ + boolean next() throws IOException; + + /** + * @return The {@link KeyValue} instance at current position. + */ + Option getKeyValue() throws IOException; + + /** + * @return {@code true} if the reader has had one of the seek calls invoked; i.e. + * {@link #seekTo()} or {@link #seekTo(Key)}. + * Otherwise, {@code false}. + */ + boolean isSeeked(); +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java new file mode 100644 index 000000000000..b792ba6eb321 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.logging.log4j.util.Strings; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.TreeMap; + +import static org.apache.hudi.io.hfile.HFileBlock.HFILEBLOCK_HEADER_SIZE; +import static org.apache.hudi.io.hfile.HFileUtils.readMajorVersion; + +/** + * An implementation a {@link HFileReader}. + */ +public class HFileReaderImpl implements HFileReader { + private final FSDataInputStream stream; + private final long fileSize; + + private final HFileCursor cursor; + private boolean isMetadataInitialized = false; + private HFileTrailer trailer; + private HFileContext context; + private TreeMap dataBlockIndexEntryMap; + private TreeMap metaBlockIndexEntryMap; + private HFileInfo fileInfo; + private Option currentDataBlockEntry; + private Option currentDataBlock; + + public HFileReaderImpl(FSDataInputStream stream, long fileSize) { + this.stream = stream; + this.fileSize = fileSize; + this.cursor = new HFileCursor(); + this.currentDataBlockEntry = Option.empty(); + this.currentDataBlock = Option.empty(); + } + + @Override + public synchronized void initializeMetadata() throws IOException { + if (this.isMetadataInitialized) { + return; + } + + // Read Trailer (serialized in Proto) + this.trailer = readTrailer(stream, fileSize); + this.context = HFileContext.builder() + .compressionCodec(trailer.getCompressionCodec()) + .build(); + HFileBlockReader blockReader = new HFileBlockReader( + context, stream, trailer.getLoadOnOpenDataOffset(), + fileSize - HFileTrailer.getTrailerSize()); + HFileRootIndexBlock dataIndexBlock = + (HFileRootIndexBlock) blockReader.nextBlock(HFileBlockType.ROOT_INDEX); + this.dataBlockIndexEntryMap = dataIndexBlock.readBlockIndex(trailer.getDataIndexCount(), false); + HFileRootIndexBlock metaIndexBlock = + (HFileRootIndexBlock) blockReader.nextBlock(HFileBlockType.ROOT_INDEX); + this.metaBlockIndexEntryMap = metaIndexBlock.readBlockIndex(trailer.getMetaIndexCount(), true); + HFileFileInfoBlock fileInfoBlock = + (HFileFileInfoBlock) blockReader.nextBlock(HFileBlockType.FILE_INFO); + this.fileInfo = fileInfoBlock.readFileInfo(); + this.isMetadataInitialized = true; + } + + @Override + public Option getMetaInfo(UTF8StringKey key) throws IOException { + initializeMetadata(); + return Option.ofNullable(fileInfo.get(key)); + } + + @Override + public Option getMetaBlock(String metaBlockName) throws IOException { + initializeMetadata(); + BlockIndexEntry blockIndexEntry = metaBlockIndexEntryMap.get(new UTF8StringKey(metaBlockName)); + if (blockIndexEntry == null) { + return Option.empty(); + } + HFileBlockReader blockReader = new HFileBlockReader( + context, stream, blockIndexEntry.getOffset(), + blockIndexEntry.getOffset() + blockIndexEntry.getSize()); + HFileMetaBlock block = (HFileMetaBlock) blockReader.nextBlock(HFileBlockType.META); + return Option.of(block.readContent()); + } + + @Override + public long getNumKeyValueEntries() { + try { + initializeMetadata(); + return trailer.getNumKeyValueEntries(); + } catch (IOException e) { + throw new RuntimeException("Cannot read HFile", e); + } + } + + @Override + public int seekTo(Key key) throws IOException { + Option currentKeyValue = getKeyValue(); + if (!currentKeyValue.isPresent()) { + return SEEK_TO_EOF; + } + int compareCurrent = key.compareTo(currentKeyValue.get().getKey()); + if (compareCurrent > 0) { + if (currentDataBlockEntry.get().getNextBlockFirstKey().isPresent()) { + int comparedNextBlockFirstKey = + key.compareTo(currentDataBlockEntry.get().getNextBlockFirstKey().get()); + if (comparedNextBlockFirstKey >= 0) { + // Searches the block that may contain the lookup key based the starting keys of + // all blocks (sorted in the TreeMap of block index entries), using binary search. + // The result contains the greatest key less than or equal to the given key. + + Map.Entry floorEntry = dataBlockIndexEntryMap.floorEntry(key); + if (floorEntry == null) { + // Key smaller than the start key of the first block which should never happen here + throw new IllegalStateException( + "Unexpected state of the HFile reader when looking up the key: " + key + + " data block index: " + + Strings.join(dataBlockIndexEntryMap.values(), ',')); + } + currentDataBlockEntry = Option.of(floorEntry.getValue()); + currentDataBlock = Option.of(instantiateHFileDataBlock(currentDataBlockEntry.get())); + cursor.setOffset( + (int) currentDataBlockEntry.get().getOffset() + HFILEBLOCK_HEADER_SIZE); + } + } + if (!currentDataBlockEntry.get().getNextBlockFirstKey().isPresent()) { + // This is the last data block. Check against the last key. + if (fileInfo.getLastKey().isPresent()) { + int comparedLastKey = key.compareTo(fileInfo.getLastKey().get()); + if (comparedLastKey > 0) { + currentDataBlockEntry = Option.empty(); + currentDataBlock = Option.empty(); + cursor.setEof(); + return SEEK_TO_EOF; + } + } + } + + if (!currentDataBlock.isPresent()) { + currentDataBlock = Option.of(instantiateHFileDataBlock(currentDataBlockEntry.get())); + } + + return currentDataBlock.get() + .seekTo(cursor, key, (int) currentDataBlockEntry.get().getOffset()); + } + if (compareCurrent == 0) { + return SEEK_TO_FOUND; + } + if (!isAtFirstKey()) { + // For backward seekTo after the first key, throw exception + throw new IllegalStateException( + "The current lookup key is less than the current position of the cursor, " + + "i.e., backward seekTo, which is not supported and should be avoided. " + + "key=" + key + " cursor=" + cursor); + } + return SEEK_TO_BEFORE_FIRST_KEY; + } + + @Override + public boolean seekTo() throws IOException { + initializeMetadata(); + if (trailer.getNumKeyValueEntries() == 0) { + cursor.setEof(); + return false; + } + // Move the current position to the beginning of the first data block + cursor.setOffset(dataBlockIndexEntryMap.firstKey().getOffset() + HFILEBLOCK_HEADER_SIZE); + cursor.unsetEof(); + currentDataBlockEntry = Option.of(dataBlockIndexEntryMap.firstEntry().getValue()); + // The data block will be read when {@link #getKeyValue} is called + currentDataBlock = Option.empty(); + return true; + } + + @Override + public boolean next() throws IOException { + if (cursor.isValid()) { + if (!currentDataBlock.isPresent()) { + currentDataBlock = Option.of(instantiateHFileDataBlock(currentDataBlockEntry.get())); + } + if (currentDataBlock.get().next(cursor, (int) currentDataBlockEntry.get().getOffset())) { + // The position is advanced by the data block instance + return true; + } + currentDataBlockEntry = getNextBlockIndexEntry(currentDataBlockEntry.get()); + currentDataBlock = Option.empty(); + if (!currentDataBlockEntry.isPresent()) { + cursor.setEof(); + return false; + } + cursor.setOffset((int) currentDataBlockEntry.get().getOffset() + HFILEBLOCK_HEADER_SIZE); + return true; + } + return false; + } + + @Override + public Option getKeyValue() throws IOException { + if (cursor.isValid()) { + Option keyValue = cursor.getKeyValue(); + if (!keyValue.isPresent()) { + if (!currentDataBlock.isPresent()) { + currentDataBlock = Option.of(instantiateHFileDataBlock(currentDataBlockEntry.get())); + } + keyValue = + Option.of(currentDataBlock.get().readKeyValue( + cursor.getOffset() - (int) currentDataBlockEntry.get().getOffset())); + cursor.setKeyValue(keyValue.get()); + } + return keyValue; + } + return Option.empty(); + } + + @Override + public boolean isSeeked() { + return cursor.isSeeked(); + } + + @Override + public void close() throws IOException { + stream.close(); + } + + /** + * Reads and parses the HFile trailer. + * + * @param stream HFile input. + * @param fileSize HFile size. + * @return {@link HFileTrailer} instance. + * @throws IOException upon error. + */ + private static HFileTrailer readTrailer(FSDataInputStream stream, + long fileSize) throws IOException { + int bufferSize = HFileTrailer.getTrailerSize(); + long seekPos = fileSize - bufferSize; + if (seekPos < 0) { + // It is hard to imagine such a small HFile. + seekPos = 0; + bufferSize = (int) fileSize; + } + stream.seek(seekPos); + + byte[] byteBuff = new byte[bufferSize]; + stream.readFully(byteBuff); + + int majorVersion = readMajorVersion(byteBuff, bufferSize - 3); + int minorVersion = byteBuff[bufferSize - 4]; + + HFileTrailer trailer = new HFileTrailer(majorVersion, minorVersion); + trailer.deserialize(new DataInputStream(new ByteArrayInputStream(byteBuff))); + return trailer; + } + + private Option getNextBlockIndexEntry(BlockIndexEntry entry) { + Map.Entry keyBlockIndexEntryEntry = + dataBlockIndexEntryMap.higherEntry(entry.getFirstKey()); + if (keyBlockIndexEntryEntry == null) { + return Option.empty(); + } + return Option.of(keyBlockIndexEntryEntry.getValue()); + } + + private HFileDataBlock instantiateHFileDataBlock(BlockIndexEntry blockToRead) throws IOException { + HFileBlockReader blockReader = new HFileBlockReader( + context, stream, blockToRead.getOffset(), + blockToRead.getOffset() + (long) blockToRead.getSize()); + return (HFileDataBlock) blockReader.nextBlock(HFileBlockType.DATA); + } + + private boolean isAtFirstKey() { + if (cursor.isValid() && !dataBlockIndexEntryMap.isEmpty()) { + return cursor.getOffset() == dataBlockIndexEntryMap.firstKey().getOffset() + HFILEBLOCK_HEADER_SIZE; + } + return false; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileRootIndexBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileRootIndexBlock.java new file mode 100644 index 000000000000..9612d75ff60f --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileRootIndexBlock.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; + +import java.util.ArrayList; +import java.util.List; +import java.util.TreeMap; + +import static org.apache.hudi.io.util.IOUtils.copy; +import static org.apache.hudi.io.util.IOUtils.decodeVarLongSizeOnDisk; +import static org.apache.hudi.io.util.IOUtils.readInt; +import static org.apache.hudi.io.util.IOUtils.readLong; +import static org.apache.hudi.io.util.IOUtils.readVarLong; + +/** + * Represents a {@link HFileBlockType#ROOT_INDEX} block. + */ +public class HFileRootIndexBlock extends HFileBlock { + public HFileRootIndexBlock(HFileContext context, + byte[] byteBuff, + int startOffsetInBuff) { + super(context, HFileBlockType.ROOT_INDEX, byteBuff, startOffsetInBuff); + } + + /** + * Reads the index block and returns the block index entry to an in-memory {@link TreeMap} + * for searches. + * + * @param numEntries the number of entries in the block. + * @return a {@link TreeMap} of block index entries. + */ + public TreeMap readBlockIndex(int numEntries, boolean contentKeyOnly) { + TreeMap blockIndexEntryMap = new TreeMap<>(); + int buffOffset = startOffsetInBuff + HFILEBLOCK_HEADER_SIZE; + List keyList = new ArrayList<>(); + List offsetList = new ArrayList<>(); + List sizeList = new ArrayList(); + for (int i = 0; i < numEntries; i++) { + long offset = readLong(byteBuff, buffOffset); + int size = readInt(byteBuff, buffOffset + 8); + int varLongSizeOnDist = decodeVarLongSizeOnDisk(byteBuff, buffOffset + 12); + int keyLength = (int) readVarLong(byteBuff, buffOffset + 12, varLongSizeOnDist); + byte[] keyBytes = copy(byteBuff, buffOffset + 12 + varLongSizeOnDist, keyLength); + Key key = contentKeyOnly ? new UTF8StringKey(keyBytes) : new Key(keyBytes); + keyList.add(key); + offsetList.add(offset); + sizeList.add(size); + buffOffset += (12 + varLongSizeOnDist + keyLength); + } + for (int i = 0; i < numEntries; i++) { + Key key = keyList.get(i); + blockIndexEntryMap.put(key, new BlockIndexEntry( + key, i < numEntries - 1 ? Option.of(keyList.get(i + 1)) : Option.empty(), + offsetList.get(i), sizeList.get(i))); + } + return blockIndexEntryMap; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileTrailer.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileTrailer.java new file mode 100644 index 000000000000..7aff7d2c830e --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileTrailer.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.compress.CompressionCodec; +import org.apache.hudi.io.hfile.protobuf.generated.HFileProtos; + +import java.io.DataInputStream; +import java.io.IOException; +import java.util.Arrays; + +import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH; +import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32; +import static org.apache.hudi.io.hfile.HFileUtils.decodeCompressionCodec; + +/** + * Represents a HFile trailer, which is serialized and deserialized using + * {@link HFileProtos.TrailerProto} with Protobuf. + */ +public class HFileTrailer { + // This is the trailer size for HFile V3 + public static final int TRAILER_SIZE = 1024 * 4; + private static final int NOT_PB_SIZE = MAGIC_LENGTH + SIZEOF_INT32; + + // Offset to the fileinfo data, a small block of vitals + private long fileInfoOffset; + + // The offset to the section of the file that should be loaded at the time the file is + // being opened: i.e. on open we load the root index, file info, etc. + private long loadOnOpenDataOffset; + + // The number of entries in the root data index + private int dataIndexCount; + + // Total uncompressed size of all blocks of the data index + private long uncompressedDataIndexSize; + + // The number of entries in the meta index + private int metaIndexCount; + + // The total uncompressed size of keys/values stored in the file + private long totalUncompressedBytes; + + // The number of key/value pairs in the file + private long keyValueEntryCount; + + // The compression codec used for all blocks. + private CompressionCodec compressionCodec = CompressionCodec.NONE; + + // The number of levels in the potentially multi-level data index. + private int numDataIndexLevels; + + // The offset of the first data block. + private long firstDataBlockOffset; + + // It is guaranteed that no key/value data blocks start after this offset in the file + private long lastDataBlockOffset; + + // The comparator class name. We don't use this but for reference we still it + private String comparatorClassName = ""; + + // The encryption key + private byte[] encryptionKey; + + private final int majorVersion; + private final int minorVersion; + + public HFileTrailer(int majorVersion, int minorVersion) { + this.majorVersion = majorVersion; + this.minorVersion = minorVersion; + } + + public static int getTrailerSize() { + return TRAILER_SIZE; + } + + public long getLoadOnOpenDataOffset() { + return loadOnOpenDataOffset; + } + + public int getNumDataIndexLevels() { + return numDataIndexLevels; + } + + public int getDataIndexCount() { + return dataIndexCount; + } + + public int getMetaIndexCount() { + return metaIndexCount; + } + + public long getNumKeyValueEntries() { + return keyValueEntryCount; + } + + public CompressionCodec getCompressionCodec() { + return compressionCodec; + } + + public void deserialize(DataInputStream stream) throws IOException { + HFileBlockType.TRAILER.readAndCheckMagic(stream); + // Read Protobuf + int start = stream.available(); + HFileProtos.TrailerProto trailerProto = + HFileProtos.TrailerProto.PARSER.parseDelimitedFrom(stream); + int size = start - stream.available(); + stream.skip(getTrailerSize() - NOT_PB_SIZE - size); + // May optionally read version again and validate + // process the PB + if (trailerProto.hasFileInfoOffset()) { + fileInfoOffset = trailerProto.getFileInfoOffset(); + } + if (trailerProto.hasLoadOnOpenDataOffset()) { + loadOnOpenDataOffset = trailerProto.getLoadOnOpenDataOffset(); + } + if (trailerProto.hasUncompressedDataIndexSize()) { + uncompressedDataIndexSize = trailerProto.getUncompressedDataIndexSize(); + } + if (trailerProto.hasTotalUncompressedBytes()) { + totalUncompressedBytes = trailerProto.getTotalUncompressedBytes(); + } + if (trailerProto.hasDataIndexCount()) { + dataIndexCount = trailerProto.getDataIndexCount(); + } + if (trailerProto.hasMetaIndexCount()) { + metaIndexCount = trailerProto.getMetaIndexCount(); + } + if (trailerProto.hasEntryCount()) { + keyValueEntryCount = trailerProto.getEntryCount(); + } + if (trailerProto.hasNumDataIndexLevels()) { + numDataIndexLevels = trailerProto.getNumDataIndexLevels(); + } + if (trailerProto.hasFirstDataBlockOffset()) { + firstDataBlockOffset = trailerProto.getFirstDataBlockOffset(); + } + if (trailerProto.hasLastDataBlockOffset()) { + lastDataBlockOffset = trailerProto.getLastDataBlockOffset(); + } + if (trailerProto.hasComparatorClassName()) { + comparatorClassName = trailerProto.getComparatorClassName(); + } + if (trailerProto.hasCompressionCodec()) { + compressionCodec = decodeCompressionCodec(trailerProto.getCompressionCodec()); + } else { + compressionCodec = CompressionCodec.NONE; + } + if (trailerProto.hasEncryptionKey()) { + encryptionKey = trailerProto.getEncryptionKey().toByteArray(); + } + } + + @Override + public String toString() { + return "HFileTrailer{" + + "fileInfoOffset=" + fileInfoOffset + + ", loadOnOpenDataOffset=" + loadOnOpenDataOffset + + ", dataIndexCount=" + dataIndexCount + + ", uncompressedDataIndexSize=" + uncompressedDataIndexSize + + ", metaIndexCount=" + metaIndexCount + + ", totalUncompressedBytes=" + totalUncompressedBytes + + ", entryCount=" + keyValueEntryCount + + ", compressionCodec=" + compressionCodec + + ", numDataIndexLevels=" + numDataIndexLevels + + ", firstDataBlockOffset=" + firstDataBlockOffset + + ", lastDataBlockOffset=" + lastDataBlockOffset + + ", comparatorClassName='" + comparatorClassName + '\'' + + ", encryptionKey=" + Arrays.toString(encryptionKey) + + ", majorVersion=" + majorVersion + + ", minorVersion=" + minorVersion + + '}'; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java new file mode 100644 index 000000000000..8f100c351755 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.compress.CompressionCodec; +import org.apache.hudi.io.util.IOUtils; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Util methods for reading and writing HFile. + */ +public class HFileUtils { + private static final Map HFILE_COMPRESSION_CODEC_MAP = createCompressionCodecMap(); + + /** + * Gets the compression codec based on the ID. This ID is written to the HFile on storage. + * + * @param id ID indicating the compression codec. + * @return compression codec based on the ID. + */ + public static CompressionCodec decodeCompressionCodec(int id) { + CompressionCodec codec = HFILE_COMPRESSION_CODEC_MAP.get(id); + if (codec == null) { + throw new IllegalArgumentException("Compression code not found for ID: " + id); + } + return codec; + } + + /** + * Reads the HFile major version from the input. + * + * @param bytes input data. + * @param offset offset to start reading. + * @return major version of the file. + */ + public static int readMajorVersion(byte[] bytes, int offset) { + int ch1 = bytes[offset] & 0xFF; + int ch2 = bytes[offset + 1] & 0xFF; + int ch3 = bytes[offset + 2] & 0xFF; + return ((ch1 << 16) + (ch2 << 8) + ch3); + } + + /** + * Compares two HFile {@link Key}. + * + * @param key1 left operand key. + * @param key2 right operand key. + * @return 0 if equal, < 0 if left is less than right, > 0 otherwise. + */ + public static int compareKeys(Key key1, Key key2) { + return IOUtils.compareTo( + key1.getBytes(), key1.getContentOffset(), key1.getContentLength(), + key2.getBytes(), key2.getContentOffset(), key2.getContentLength()); + } + + /** + * The ID mapping cannot change or else that breaks all existing HFiles out there, + * even the ones that are not compressed! (They use the NONE algorithm) + * This is because HFile stores the ID to indicate which compression codec is used. + * + * @return the mapping of ID to compression codec. + */ + private static Map createCompressionCodecMap() { + Map result = new HashMap<>(); + result.put(0, CompressionCodec.LZO); + result.put(1, CompressionCodec.GZIP); + result.put(2, CompressionCodec.NONE); + result.put(3, CompressionCodec.SNAPPY); + result.put(4, CompressionCodec.LZ4); + result.put(5, CompressionCodec.BZIP2); + result.put(6, CompressionCodec.ZSTD); + return Collections.unmodifiableMap(result); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java new file mode 100644 index 000000000000..5c00e43ab16f --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.io.util.IOUtils; + +import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16; +import static org.apache.hudi.io.hfile.HFileUtils.compareKeys; +import static org.apache.hudi.io.util.IOUtils.readShort; + +/** + * Represents the key part only. + */ +public class Key implements Comparable { + private static final int CONTENT_LENGTH_SIZE = SIZEOF_INT16; + private final byte[] bytes; + private final int offset; + private final int length; + + public Key(byte[] bytes) { + this(bytes, 0, bytes.length); + } + + public Key(byte[] bytes, int offset, int length) { + this.bytes = bytes; + this.offset = offset; + this.length = length; + } + + public byte[] getBytes() { + return bytes; + } + + public int getOffset() { + return this.offset; + } + + public int getLength() { + return length; + } + + public int getContentOffset() { + return getOffset() + CONTENT_LENGTH_SIZE; + } + + public int getContentLength() { + return readShort(bytes, getOffset()); + } + + @Override + public int hashCode() { + // Only consider key content for hash code + return IOUtils.hashCode(getBytes(), getContentOffset(), getContentLength()); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Key)) { + return false; + } + // Only consider key content for hash code + return compareTo((Key) o) == 0; + } + + @Override + public int compareTo(Key o) { + return compareKeys(this, o); + } + + @Override + public String toString() { + return "Key{" + + new String(getBytes(), getContentOffset(), getContentLength()) + + "}"; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/KeyValue.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/KeyValue.java new file mode 100644 index 000000000000..9ee6b5c36bf1 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/KeyValue.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32; +import static org.apache.hudi.io.util.IOUtils.readInt; + +/** + * Represents a key-value pair in the data block. + */ +public class KeyValue { + // Key part starts after the key length (integer) and value length (integer) + public static final int KEY_OFFSET = SIZEOF_INT32 * 2; + private final byte[] bytes; + private final int offset; + private final Key key; + + public KeyValue(byte[] bytes, int offset) { + this.bytes = bytes; + this.offset = offset; + this.key = new Key(bytes, offset + KEY_OFFSET, readInt(bytes, offset)); + } + + /** + * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array) + */ + public byte[] getBytes() { + return bytes; + } + + public Key getKey() { + return key; + } + + /** + * @return key content offset. + */ + public int getKeyContentOffset() { + return key.getContentOffset(); + } + + /** + * @return length of key portion. + */ + public int getKeyLength() { + return key.getLength(); + } + + /** + * @return key offset in backing buffer. + */ + public int getKeyOffset() { + return key.getOffset(); + } + + /** + * @return key content length. + */ + public int getKeyContentLength() { + return key.getContentLength(); + } + + /** + * @return the value offset. + */ + public int getValueOffset() { + return getKeyOffset() + getKeyLength(); + } + + /** + * @return value length. + */ + public int getValueLength() { + return readInt(this.bytes, this.offset + SIZEOF_INT32); + } + + @Override + public String toString() { + return "KeyValue{key=" + + key.toString() + + "}"; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/UTF8StringKey.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/UTF8StringKey.java new file mode 100644 index 000000000000..672d1a6690a3 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/UTF8StringKey.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import java.nio.charset.StandardCharsets; + +/** + * Represent a UTF8 String key only, with no length information encoded. + */ +public class UTF8StringKey extends Key { + public UTF8StringKey(String key) { + + super(key.getBytes(StandardCharsets.UTF_8)); + } + + public UTF8StringKey(byte[] key) { + super(key); + } + + @Override + public int getContentOffset() { + return getOffset(); + } + + @Override + public int getContentLength() { + return getLength(); + } + + @Override + public String toString() { + return "UTF8StringKey{" + + new String(getBytes()) + + "}"; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java new file mode 100644 index 000000000000..5eeb21011cf0 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.util; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Util methods on I/O. + */ +public class IOUtils { + /** + * Reads four bytes starting from the offset in the input and returns {@code int} value. + * + * @param bytes input byte array. + * @param offset offset to start reading. + * @return the {@code int} value. + */ + public static int readInt(byte[] bytes, int offset) { + return (((bytes[offset] & 0xff) << 24) + | ((bytes[offset + 1] & 0xff) << 16) + | ((bytes[offset + 2] & 0xff) << 8) + | (bytes[offset + 3] & 0xff)); + } + + /** + * Reads eight bytes starting from the offset in the input and returns {@code long} value. + * + * @param bytes input byte array. + * @param offset offset to start reading. + * @return the {@code long} value. + */ + public static long readLong(byte[] bytes, int offset) { + return (((long) (bytes[offset] & 0xff) << 56) + | ((long) (bytes[offset + 1] & 0xff) << 48) + | ((long) (bytes[offset + 2] & 0xff) << 40) + | ((long) (bytes[offset + 3] & 0xff) << 32) + | ((long) (bytes[offset + 4] & 0xff) << 24) + | ((long) (bytes[offset + 5] & 0xff) << 16) + | ((long) (bytes[offset + 6] & 0xff) << 8) + | (long) (bytes[offset + 7] & 0xff)); + } + + /** + * Reads two bytes starting from the offset in the input and returns {@code short} value. + * + * @param bytes input byte array. + * @param offset offset to start reading. + * @return the {@code short} value. + */ + public static short readShort(byte[] bytes, int offset) { + short n = 0; + n = (short) ((n ^ bytes[offset]) & 0xFF); + n = (short) (n << 8); + n ^= (short) (bytes[offset + 1] & 0xFF); + return n; + } + + /** + * Parses the first byte of a variable-length encoded number (integer or long value) to determine + * total number of bytes representing the number on disk. + * + * @param bytes input byte array of the encoded number. + * @param offset offset to start reading. + * @return the total number of bytes (1 to 9) on disk. + */ + public static int decodeVarLongSizeOnDisk(byte[] bytes, int offset) { + byte firstByte = bytes[offset]; + return decodeVarLongSize(firstByte); + } + + /** + * Parses the first byte of a variable-length encoded number (integer or long value) to determine + * total number of bytes representing the number on disk. + * + * @param value the first byte of the encoded number. + * @return the total number of bytes (1 to 9) on disk. + */ + public static int decodeVarLongSize(byte value) { + if (value >= -112) { + return 1; + } else if (value < -120) { + return -119 - value; + } + return -111 - value; + } + + /** + * Reads a variable-length encoded number from input bytes and returns it. + * + * @param bytes input byte array. + * @param offset offset to start reading. + * @return decoded {@code long} from the input. + */ + public static long readVarLong(byte[] bytes, int offset) { + return readVarLong(bytes, offset, decodeVarLongSizeOnDisk(bytes, offset)); + } + + /** + * Reads a variable-length encoded number from input bytes and the decoded size on disk, + * and returns it. + * + * @param bytes input byte array. + * @param offset offset to start reading. + * @param varLongSizeOnDisk the total number of bytes (1 to 9) on disk. + * @return decoded {@code long} from the input. + */ + public static long readVarLong(byte[] bytes, int offset, int varLongSizeOnDisk) { + byte firstByte = bytes[offset]; + if (varLongSizeOnDisk == 1) { + return firstByte; + } + long value = 0; + for (int i = 0; i < varLongSizeOnDisk - 1; i++) { + value = value << 8; + value = value | (bytes[offset + 1 + i] & 0xFF); + } + return (isNegativeVarLong(firstByte) ? (~value) : value); + } + + /** + * Given the first byte of a variable-length encoded number, determines the sign. + * + * @param value the first byte. + * @return is the value negative. + */ + public static boolean isNegativeVarLong(byte value) { + return value < -120 || (value >= -112 && value < 0); + } + + /** + * @param bytes input byte array. + * @param offset offset to start reading. + * @param length length of bytes to copy. + * @return a new copy of the byte array. + */ + public static byte[] copy(byte[] bytes, int offset, int length) { + byte[] copy = new byte[length]; + System.arraycopy(bytes, offset, copy, 0, length); + return copy; + } + + /** + * Lexicographically compares two byte arrays. + * + * @param bytes1 left operand. + * @param bytes2 right operand. + * @return 0 if equal, < 0 if left is less than right, etc. + */ + public static int compareTo(byte[] bytes1, byte[] bytes2) { + return compareTo(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length); + } + + /** + * Lexicographically compares two byte arrays. + * + * @param bytes1 left operand. + * @param bytes2 right operand. + * @param offset1 where to start comparing in the left buffer. + * @param offset2 where to start comparing in the right buffer. + * @param length1 how much to compare from the left buffer. + * @param length2 how much to compare from the right buffer. + * @return 0 if equal, < 0 if left is less than right, > 0 otherwise. + */ + public static int compareTo(byte[] bytes1, int offset1, int length1, + byte[] bytes2, int offset2, int length2) { + if (bytes1 == bytes2 && offset1 == offset2 && length1 == length2) { + return 0; + } + int end1 = offset1 + length1; + int end2 = offset2 + length2; + for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) { + int a = (bytes1[i] & 0xff); + int b = (bytes2[j] & 0xff); + if (a != b) { + return a - b; + } + } + return length1 - length2; + } + + /** + * @param bytes input byte array. + * @param offset offset to start reading. + * @param length length of bytes to read. + * @return {@link String} value based on the byte array. + */ + public static String bytesToString(byte[] bytes, int offset, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = offset; i < offset + length; i++) { + sb.append((char) bytes[i]); + } + return sb.toString(); + } + + /** + * @param bytes byte array to hash. + * @param offset offset to start hashing. + * @param length length of bytes to hash. + * @return the generated hash code. + */ + public static int hashCode(byte[] bytes, int offset, int length) { + int hash = 1; + for (int i = offset; i < offset + length; i++) { + hash = (31 * hash) + bytes[i]; + } + return hash; + } + + /** + * Reads the data fully from the {@link InputStream} to the byte array. + * + * @param inputStream {@link InputStream} containing the data. + * @param targetByteArray target byte array. + * @param offset offset in the target byte array to start to write data. + * @param length maximum amount of data to write. + * @return size of bytes read. + * @throws IOException upon error. + */ + public static int readFully(InputStream inputStream, + byte[] targetByteArray, + int offset, + int length) throws IOException { + int totalBytesRead = 0; + int bytesRead; + while (totalBytesRead < length) { + bytesRead = inputStream.read(targetByteArray, offset + totalBytesRead, length - totalBytesRead); + if (bytesRead < 0) { + break; + } + totalBytesRead += bytesRead; + } + return totalBytesRead; + } +} diff --git a/hudi-io/src/main/protobuf/HFile.proto b/hudi-io/src/main/protobuf/HFile.proto new file mode 100644 index 000000000000..3d838243ae01 --- /dev/null +++ b/hudi-io/src/main/protobuf/HFile.proto @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +syntax = "proto2"; + +package org.apache.hudi.io.hfile; + +option java_package = "org.apache.hudi.io.hfile.protobuf.generated"; +option java_outer_classname = "HFileProtos"; +option java_generic_services = true; +option java_generate_equals_and_hash = true; +option optimize_for = SPEED; + +message BytesBytesPair { + required bytes first = 1; + required bytes second = 2; +} + +message InfoProto { + repeated BytesBytesPair map_entry = 1; +} + +message TrailerProto { + optional uint64 file_info_offset = 1; + optional uint64 load_on_open_data_offset = 2; + optional uint64 uncompressed_data_index_size = 3; + optional uint64 total_uncompressed_bytes = 4; + optional uint32 data_index_count = 5; + optional uint32 meta_index_count = 6; + optional uint64 entry_count = 7; + optional uint32 num_data_index_levels = 8; + optional uint64 first_data_block_offset = 9; + optional uint64 last_data_block_offset = 10; + optional string comparator_class_name = 11; + optional uint32 compression_codec = 12; + optional bytes encryption_key = 13; +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/compress/TestHoodieDecompressor.java b/hudi-io/src/test/java/org/apache/hudi/io/compress/TestHoodieDecompressor.java new file mode 100644 index 000000000000..d6883ce77435 --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/compress/TestHoodieDecompressor.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.compress; + +import org.apache.hudi.io.util.IOUtils; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Random; +import java.util.zip.GZIPOutputStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests all implementation of {@link HoodieDecompressor}. + */ +public class TestHoodieDecompressor { + private static final int INPUT_LENGTH = 394850; + private static final int[] READ_PART_SIZE_LIST = + new int[] {1200, 30956, 204958, INPUT_LENGTH + 50}; + private static final byte[] INPUT_BYTES = generateRandomBytes(INPUT_LENGTH); + + @ParameterizedTest + @EnumSource(CompressionCodec.class) + public void testDefaultDecompressors(CompressionCodec codec) throws IOException { + switch (codec) { + case NONE: + case GZIP: + HoodieDecompressor decompressor = HoodieDecompressorFactory.getDecompressor(codec); + byte[] actualOutput = new byte[INPUT_LENGTH + 100]; + try (InputStream stream = prepareInputStream(codec)) { + for (int sizeToRead : READ_PART_SIZE_LIST) { + stream.mark(INPUT_LENGTH); + int actualSizeRead = + decompressor.decompress(stream, actualOutput, 4, sizeToRead); + assertEquals(actualSizeRead, Math.min(INPUT_LENGTH, sizeToRead)); + assertEquals(0, IOUtils.compareTo( + actualOutput, 4, actualSizeRead, INPUT_BYTES, 0, actualSizeRead)); + stream.reset(); + } + } + break; + default: + assertThrows( + IllegalArgumentException.class, () -> HoodieDecompressorFactory.getDecompressor(codec)); + } + } + + private static InputStream prepareInputStream(CompressionCodec codec) throws IOException { + switch (codec) { + case NONE: + return new ByteArrayInputStream(INPUT_BYTES); + case GZIP: + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + try (GZIPOutputStream gzipOutputStream = new GZIPOutputStream(stream)) { + gzipOutputStream.write(INPUT_BYTES); + } + return new ByteArrayInputStream(stream.toByteArray()); + default: + throw new IllegalArgumentException("Not supported in tests."); + } + } + + private static byte[] generateRandomBytes(int length) { + Random random = new Random(0x8e96); + byte[] result = new byte[length]; + int chunkSize = 16384; + int numChunks = length / chunkSize; + // Fill in the same bytes in all chunks + if (numChunks > 0) { + byte[] chunk = new byte[chunkSize]; + random.nextBytes(chunk); + for (int i = 0; i < numChunks; i++) { + System.arraycopy(chunk, 0, result, chunkSize * i, chunkSize); + } + } + // Fill in random bytes in the remaining + for (int i = numChunks * chunkSize; i < length; i++) { + result[i] = (byte) (random.nextInt() & 0xff); + } + return result; + } +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java new file mode 100644 index 000000000000..e0ee96261390 --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java @@ -0,0 +1,642 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.hfile; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Stream; + +import static org.apache.hudi.common.util.FileIOUtils.readAsByteArray; +import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_BEFORE_FIRST_KEY; +import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_EOF; +import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_FOUND; +import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_IN_RANGE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests {@link HFileReader} + */ +public class TestHFileReader { + public static final String SIMPLE_SCHEMA_HFILE_SUFFIX = "_simple.hfile"; + public static final String COMPLEX_SCHEMA_HFILE_SUFFIX = "_complex.hfile"; + public static final String BOOTSTRAP_INDEX_HFILE_SUFFIX = "_bootstrap_index_partitions.hfile"; + // Custom information added to file info block + public static final String CUSTOM_META_KEY = "hudi_hfile_testing.custom_key"; + public static final String CUSTOM_META_VALUE = "hudi_custom_value"; + // Dummy Bloom filter bytes + public static final String DUMMY_BLOOM_FILTER = + "/////wAAABQBAAABID797Rg6cC9QEnS/mT3C01cdQGaLYH2jbOCLtMA0RWppEH1HQg=="; + public static final Function KEY_CREATOR = i -> String.format("hudi-key-%09d", i); + public static final Function VALUE_CREATOR = i -> String.format("hudi-value-%09d", i); + private static final int SEEK_TO_THROW_EXCEPTION = -2; + + static Stream testArgsReadHFilePointAndPrefixLookup() { + return Stream.of( + Arguments.of( + "/hfile/hudi_1_0_hbase_2_4_9_16KB_GZ_20000.hfile", + 20000, + Arrays.asList( + // before first key + new KeyLookUpInfo("", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("a", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("hudi-key-0000000", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + // first key + new KeyLookUpInfo("hudi-key-000000000", SEEK_TO_FOUND, "hudi-key-000000000", "hudi-value-000000000"), + // key in the block 0 + new KeyLookUpInfo("hudi-key-000000100", SEEK_TO_FOUND, "hudi-key-000000100", "hudi-value-000000100"), + // backward seek not supported + new KeyLookUpInfo("hudi-key-000000099", SEEK_TO_THROW_EXCEPTION, "", ""), + // prefix lookup, the pointer should not move + new KeyLookUpInfo("hudi-key-000000100a", SEEK_TO_IN_RANGE, "hudi-key-000000100", + "hudi-value-000000100"), + new KeyLookUpInfo("hudi-key-000000100b", SEEK_TO_IN_RANGE, "hudi-key-000000100", + "hudi-value-000000100"), + // prefix lookup with a jump, the pointer should not go beyond the lookup key + new KeyLookUpInfo("hudi-key-000000200a", SEEK_TO_IN_RANGE, "hudi-key-000000200", + "hudi-value-000000200"), + new KeyLookUpInfo("hudi-key-000000200b", SEEK_TO_IN_RANGE, "hudi-key-000000200", + "hudi-value-000000200"), + // last key of the block 0 + new KeyLookUpInfo("hudi-key-000000277", SEEK_TO_FOUND, "hudi-key-000000277", "hudi-value-000000277"), + new KeyLookUpInfo("hudi-key-000000277a", SEEK_TO_IN_RANGE, "hudi-key-000000277", + "hudi-value-000000277"), + new KeyLookUpInfo("hudi-key-000000277b", SEEK_TO_IN_RANGE, "hudi-key-000000277", + "hudi-value-000000277"), + // first key of the block 1 + new KeyLookUpInfo("hudi-key-000000278", SEEK_TO_FOUND, "hudi-key-000000278", "hudi-value-000000278"), + // prefix before the first key of the block 9 + new KeyLookUpInfo("hudi-key-000002501a", SEEK_TO_IN_RANGE, "hudi-key-000002501", + "hudi-value-000002501"), + new KeyLookUpInfo("hudi-key-000002501b", SEEK_TO_IN_RANGE, "hudi-key-000002501", + "hudi-value-000002501"), + // first key of the block 30 + new KeyLookUpInfo("hudi-key-000008340", SEEK_TO_FOUND, "hudi-key-000008340", "hudi-value-000008340"), + // last key of the block 49 + new KeyLookUpInfo("hudi-key-000013899", SEEK_TO_FOUND, "hudi-key-000013899", "hudi-value-000013899"), + // seeking again should not move the pointer + new KeyLookUpInfo("hudi-key-000013899", SEEK_TO_FOUND, "hudi-key-000013899", "hudi-value-000013899"), + // adjacent keys + new KeyLookUpInfo("hudi-key-000013900", SEEK_TO_FOUND, "hudi-key-000013900", "hudi-value-000013900"), + new KeyLookUpInfo("hudi-key-000013901", SEEK_TO_FOUND, "hudi-key-000013901", "hudi-value-000013901"), + new KeyLookUpInfo("hudi-key-000013902", SEEK_TO_FOUND, "hudi-key-000013902", "hudi-value-000013902"), + // key in the block 70 + new KeyLookUpInfo("hudi-key-000019500", SEEK_TO_FOUND, "hudi-key-000019500", "hudi-value-000019500"), + // prefix lookups + new KeyLookUpInfo("hudi-key-0000196", SEEK_TO_IN_RANGE, "hudi-key-000019599", "hudi-value-000019599"), + new KeyLookUpInfo("hudi-key-00001960", SEEK_TO_IN_RANGE, "hudi-key-000019599", "hudi-value-000019599"), + new KeyLookUpInfo("hudi-key-000019600a", SEEK_TO_IN_RANGE, "hudi-key-000019600", + "hudi-value-000019600"), + // second to last key + new KeyLookUpInfo("hudi-key-000019998", SEEK_TO_FOUND, "hudi-key-000019998", "hudi-value-000019998"), + // last key + new KeyLookUpInfo("hudi-key-000019999", SEEK_TO_FOUND, "hudi-key-000019999", "hudi-value-000019999"), + // after last key + new KeyLookUpInfo("hudi-key-000019999a", SEEK_TO_EOF, "", ""), + new KeyLookUpInfo("hudi-key-000019999b", SEEK_TO_EOF, "", "") + ) + ), + Arguments.of( + "/hfile/hudi_1_0_hbase_2_4_9_512KB_GZ_20000.hfile", + 20000, + Arrays.asList( + // before first key + new KeyLookUpInfo("", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("a", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("hudi-key-0000000", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + // first key + new KeyLookUpInfo("hudi-key-000000000", SEEK_TO_FOUND, "hudi-key-000000000", "hudi-value-000000000"), + // last key of block 0 + new KeyLookUpInfo("hudi-key-000008886", SEEK_TO_FOUND, "hudi-key-000008886", "hudi-value-000008886"), + // prefix lookup + new KeyLookUpInfo("hudi-key-000008886a", SEEK_TO_IN_RANGE, "hudi-key-000008886", + "hudi-value-000008886"), + new KeyLookUpInfo("hudi-key-000008886b", SEEK_TO_IN_RANGE, "hudi-key-000008886", + "hudi-value-000008886"), + // keys in block 1 + new KeyLookUpInfo("hudi-key-000008888", SEEK_TO_FOUND, "hudi-key-000008888", "hudi-value-000008888"), + new KeyLookUpInfo("hudi-key-000008889", SEEK_TO_FOUND, "hudi-key-000008889", "hudi-value-000008889"), + new KeyLookUpInfo("hudi-key-000008890", SEEK_TO_FOUND, "hudi-key-000008890", "hudi-value-000008890"), + // prefix lookup + new KeyLookUpInfo("hudi-key-0000090", SEEK_TO_IN_RANGE, "hudi-key-000008999", "hudi-value-000008999"), + new KeyLookUpInfo("hudi-key-00000900", SEEK_TO_IN_RANGE, "hudi-key-000008999", "hudi-value-000008999"), + new KeyLookUpInfo("hudi-key-000009000a", SEEK_TO_IN_RANGE, "hudi-key-000009000", + "hudi-value-000009000"), + // last key in block 1 + new KeyLookUpInfo("hudi-key-000017773", SEEK_TO_FOUND, "hudi-key-000017773", "hudi-value-000017773"), + // after last key + new KeyLookUpInfo("hudi-key-000020000", SEEK_TO_EOF, "", ""), + new KeyLookUpInfo("hudi-key-000020001", SEEK_TO_EOF, "", "") + ) + ), + Arguments.of( + "/hfile/hudi_1_0_hbase_2_4_9_16KB_NONE_5000.hfile", + 5000, + Arrays.asList( + // before first key + new KeyLookUpInfo("", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("a", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("hudi-key-0000000", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + // first key + new KeyLookUpInfo("hudi-key-000000000", SEEK_TO_FOUND, "hudi-key-000000000", "hudi-value-000000000"), + // key in the block 0 + new KeyLookUpInfo("hudi-key-000000100", SEEK_TO_FOUND, "hudi-key-000000100", "hudi-value-000000100"), + // backward seek not supported + new KeyLookUpInfo("hudi-key-000000099", SEEK_TO_THROW_EXCEPTION, "", ""), + // prefix lookup, the pointer should not move + new KeyLookUpInfo("hudi-key-000000100a", SEEK_TO_IN_RANGE, "hudi-key-000000100", + "hudi-value-000000100"), + new KeyLookUpInfo("hudi-key-000000100b", SEEK_TO_IN_RANGE, "hudi-key-000000100", + "hudi-value-000000100"), + // prefix lookup with a jump, the pointer should not go beyond the lookup key + new KeyLookUpInfo("hudi-key-000000200a", SEEK_TO_IN_RANGE, "hudi-key-000000200", + "hudi-value-000000200"), + new KeyLookUpInfo("hudi-key-000000200b", SEEK_TO_IN_RANGE, "hudi-key-000000200", + "hudi-value-000000200"), + // last key of the block 0 + new KeyLookUpInfo("hudi-key-000000277", SEEK_TO_FOUND, "hudi-key-000000277", "hudi-value-000000277"), + new KeyLookUpInfo("hudi-key-000000277a", SEEK_TO_IN_RANGE, "hudi-key-000000277", + "hudi-value-000000277"), + new KeyLookUpInfo("hudi-key-000000277b", SEEK_TO_IN_RANGE, "hudi-key-000000277", + "hudi-value-000000277"), + // first key of the block 1 + new KeyLookUpInfo("hudi-key-000000278", SEEK_TO_FOUND, "hudi-key-000000278", "hudi-value-000000278"), + // prefix before the first key of the block 9 + new KeyLookUpInfo("hudi-key-000002501a", SEEK_TO_IN_RANGE, "hudi-key-000002501", + "hudi-value-000002501"), + new KeyLookUpInfo("hudi-key-000002501b", SEEK_TO_IN_RANGE, "hudi-key-000002501", + "hudi-value-000002501"), + // first key of the block 12 + new KeyLookUpInfo("hudi-key-000003336", SEEK_TO_FOUND, "hudi-key-000003336", "hudi-value-000003336"), + // last key of the block 14 + new KeyLookUpInfo("hudi-key-000004169", SEEK_TO_FOUND, "hudi-key-000004169", "hudi-value-000004169"), + // seeking again should not move the pointer + new KeyLookUpInfo("hudi-key-000004169", SEEK_TO_FOUND, "hudi-key-000004169", "hudi-value-000004169"), + // keys in the block 16 + new KeyLookUpInfo("hudi-key-000004600", SEEK_TO_FOUND, "hudi-key-000004600", "hudi-value-000004600"), + new KeyLookUpInfo("hudi-key-000004601", SEEK_TO_FOUND, "hudi-key-000004601", "hudi-value-000004601"), + new KeyLookUpInfo("hudi-key-000004602", SEEK_TO_FOUND, "hudi-key-000004602", "hudi-value-000004602"), + // prefix lookups + new KeyLookUpInfo("hudi-key-0000047", SEEK_TO_IN_RANGE, "hudi-key-000004699", "hudi-value-000004699"), + new KeyLookUpInfo("hudi-key-00000470", SEEK_TO_IN_RANGE, "hudi-key-000004699", "hudi-value-000004699"), + new KeyLookUpInfo("hudi-key-000004700a", SEEK_TO_IN_RANGE, "hudi-key-000004700", + "hudi-value-000004700"), + // second to last key + new KeyLookUpInfo("hudi-key-000004998", SEEK_TO_FOUND, "hudi-key-000004998", "hudi-value-000004998"), + // last key + new KeyLookUpInfo("hudi-key-000004999", SEEK_TO_FOUND, "hudi-key-000004999", "hudi-value-000004999"), + // after last key + new KeyLookUpInfo("hudi-key-000004999a", SEEK_TO_EOF, "", ""), + new KeyLookUpInfo("hudi-key-000004999b", SEEK_TO_EOF, "", "") + ) + ), + Arguments.of( + "/hfile/hudi_1_0_hbase_2_4_9_64KB_NONE_5000.hfile", + 5000, + Arrays.asList( + // before first key + new KeyLookUpInfo("", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("a", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("hudi-key-0000000", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + // first key + new KeyLookUpInfo("hudi-key-000000000", SEEK_TO_FOUND, "hudi-key-000000000", "hudi-value-000000000"), + // last key of block 0 + new KeyLookUpInfo("hudi-key-000001110", SEEK_TO_FOUND, "hudi-key-000001110", "hudi-value-000001110"), + // prefix lookup + new KeyLookUpInfo("hudi-key-000001110a", SEEK_TO_IN_RANGE, "hudi-key-000001110", + "hudi-value-000001110"), + new KeyLookUpInfo("hudi-key-000001110b", SEEK_TO_IN_RANGE, "hudi-key-000001110", + "hudi-value-000001110"), + // keys in block 1 + new KeyLookUpInfo("hudi-key-000001688", SEEK_TO_FOUND, "hudi-key-000001688", "hudi-value-000001688"), + new KeyLookUpInfo("hudi-key-000001689", SEEK_TO_FOUND, "hudi-key-000001689", "hudi-value-000001689"), + new KeyLookUpInfo("hudi-key-000001690", SEEK_TO_FOUND, "hudi-key-000001690", "hudi-value-000001690"), + // prefix lookup + new KeyLookUpInfo("hudi-key-0000023", SEEK_TO_IN_RANGE, "hudi-key-000002299", "hudi-value-000002299"), + new KeyLookUpInfo("hudi-key-00000230", SEEK_TO_IN_RANGE, "hudi-key-000002299", "hudi-value-000002299"), + new KeyLookUpInfo("hudi-key-000002300a", SEEK_TO_IN_RANGE, "hudi-key-000002300", + "hudi-value-000002300"), + // last key in block 2 + new KeyLookUpInfo("hudi-key-000003332", SEEK_TO_FOUND, "hudi-key-000003332", "hudi-value-000003332"), + // after last key + new KeyLookUpInfo("hudi-key-000020000", SEEK_TO_EOF, "", ""), + new KeyLookUpInfo("hudi-key-000020001", SEEK_TO_EOF, "", "") + ) + ) + ); + } + + @ParameterizedTest + @MethodSource("testArgsReadHFilePointAndPrefixLookup") + public void testReadHFilePointAndPrefixLookup(String filename, + int numEntries, + List keyLookUpInfoList) throws IOException { + verifyHFileRead(filename, numEntries, KEY_CREATOR, VALUE_CREATOR, keyLookUpInfoList); + } + + @Test + public void testReadHFileWithNonUniqueKeys() throws IOException { + try (HFileReader reader = getHFileReader("/hfile/hudi_1_0_hbase_2_4_9_16KB_GZ_200_20_non_unique.hfile")) { + reader.initializeMetadata(); + verifyHFileMetadata(reader, 4200); + + assertFalse(reader.isSeeked()); + assertFalse(reader.next()); + assertTrue(reader.seekTo()); + + int numKeys = 200; + // Calling reader.next() + for (int i = 0; i < numKeys; i++) { + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + Key expectedKey = new UTF8StringKey(KEY_CREATOR.apply(i)); + String value = VALUE_CREATOR.apply(i); + assertEquals(expectedKey, keyValue.get().getKey()); + assertEquals(value, getValue(keyValue.get())); + assertTrue(reader.next()); + + for (int j = 0; j < 20; j++) { + keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + assertEquals(expectedKey, keyValue.get().getKey()); + assertEquals(value + "_" + j, getValue(keyValue.get())); + if (i == numKeys - 1 && j == 19) { + assertFalse(reader.next()); + } else { + assertTrue(reader.next()); + } + } + } + + assertTrue(reader.seekTo()); + // Calling reader.seekTo(key) on each key + for (int i = 0; i < numKeys; i++) { + Key expectedKey = new UTF8StringKey(KEY_CREATOR.apply(i)); + + for (int j = 0; j < 1; j++) { + // seekTo twice and the results should be the same + assertEquals(SEEK_TO_FOUND, reader.seekTo(expectedKey)); + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + String value = VALUE_CREATOR.apply(i); + assertEquals(expectedKey, keyValue.get().getKey()); + assertEquals(value, getValue(keyValue.get())); + } + + assertTrue(reader.next()); + for (int j = 0; j < 1; j++) { + // seekTo twice and the results should be the same + assertEquals(SEEK_TO_FOUND, reader.seekTo(expectedKey)); + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + String value = VALUE_CREATOR.apply(i); + assertEquals(expectedKey, keyValue.get().getKey()); + assertEquals(value + "_0", getValue(keyValue.get())); + } + } + + verifyHFileSeekToReads( + reader, + // point and prefix lookups + Arrays.asList( + // before first key + new KeyLookUpInfo("", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("a", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + new KeyLookUpInfo("hudi-key-0000000", SEEK_TO_BEFORE_FIRST_KEY, "", ""), + // first key + new KeyLookUpInfo("hudi-key-000000000", SEEK_TO_FOUND, "hudi-key-000000000", "hudi-value-000000000"), + // key in the block 0 + new KeyLookUpInfo("hudi-key-000000005", SEEK_TO_FOUND, "hudi-key-000000005", "hudi-value-000000005"), + // backward seek not supported + new KeyLookUpInfo("hudi-key-000000004", SEEK_TO_THROW_EXCEPTION, "", ""), + // prefix lookup, the pointer should move to the entry before + new KeyLookUpInfo("hudi-key-000000006a", SEEK_TO_IN_RANGE, "hudi-key-000000006", + "hudi-value-000000006_19"), + new KeyLookUpInfo("hudi-key-000000006b", SEEK_TO_IN_RANGE, "hudi-key-000000006", + "hudi-value-000000006_19"), + // prefix lookup with a jump, the pointer should not go beyond the lookup key + new KeyLookUpInfo("hudi-key-000000008a", SEEK_TO_IN_RANGE, "hudi-key-000000008", + "hudi-value-000000008_19"), + new KeyLookUpInfo("hudi-key-000000008b", SEEK_TO_IN_RANGE, "hudi-key-000000008", + "hudi-value-000000008_19"), + // last key of the block 0 + new KeyLookUpInfo("hudi-key-000000012", SEEK_TO_FOUND, "hudi-key-000000012", "hudi-value-000000012"), + new KeyLookUpInfo("hudi-key-000000012a", SEEK_TO_IN_RANGE, "hudi-key-000000012", + "hudi-value-000000012_19"), + new KeyLookUpInfo("hudi-key-000000012b", SEEK_TO_IN_RANGE, "hudi-key-000000012", + "hudi-value-000000012_19"), + // first key of the block 1 + new KeyLookUpInfo("hudi-key-000000013", SEEK_TO_FOUND, "hudi-key-000000013", "hudi-value-000000013"), + // prefix before the first key of the block 5 + new KeyLookUpInfo("hudi-key-000000064a", SEEK_TO_IN_RANGE, "hudi-key-000000064", + "hudi-value-000000064_19"), + new KeyLookUpInfo("hudi-key-000000064b", SEEK_TO_IN_RANGE, "hudi-key-000000064", + "hudi-value-000000064_19"), + // first key of the block 8 + new KeyLookUpInfo("hudi-key-000000104", SEEK_TO_FOUND, "hudi-key-000000104", "hudi-value-000000104"), + // last key of the block 11 + new KeyLookUpInfo("hudi-key-000000155", SEEK_TO_FOUND, "hudi-key-000000155", "hudi-value-000000155"), + // seeking again should not move the pointer + new KeyLookUpInfo("hudi-key-000000155", SEEK_TO_FOUND, "hudi-key-000000155", "hudi-value-000000155"), + // adjacent keys + new KeyLookUpInfo("hudi-key-000000156", SEEK_TO_FOUND, "hudi-key-000000156", "hudi-value-000000156"), + new KeyLookUpInfo("hudi-key-000000157", SEEK_TO_FOUND, "hudi-key-000000157", "hudi-value-000000157"), + new KeyLookUpInfo("hudi-key-000000158", SEEK_TO_FOUND, "hudi-key-000000158", "hudi-value-000000158"), + // prefix lookups in the block 14 + new KeyLookUpInfo("hudi-key-00000019", SEEK_TO_IN_RANGE, "hudi-key-000000189", + "hudi-value-000000189_19"), + new KeyLookUpInfo("hudi-key-000000190a", SEEK_TO_IN_RANGE, "hudi-key-000000190", + "hudi-value-000000190_19"), + // second to last key + new KeyLookUpInfo("hudi-key-000000198", SEEK_TO_FOUND, "hudi-key-000000198", "hudi-value-000000198"), + // last key + new KeyLookUpInfo("hudi-key-000000199", SEEK_TO_FOUND, "hudi-key-000000199", "hudi-value-000000199"), + // after last key + new KeyLookUpInfo("hudi-key-000000199a", SEEK_TO_EOF, "", ""), + new KeyLookUpInfo("hudi-key-000000199b", SEEK_TO_EOF, "", "") + ) + ); + } + } + + @Test + public void testReadHFileWithoutKeyValueEntries() throws IOException { + try (HFileReader reader = getHFileReader("/hfile/hudi_1_0_hbase_2_4_9_no_entry.hfile")) { + reader.initializeMetadata(); + verifyHFileMetadataCompatibility(reader, 0); + assertFalse(reader.isSeeked()); + assertFalse(reader.next()); + assertFalse(reader.seekTo()); + assertFalse(reader.next()); + assertEquals(2, reader.seekTo(new UTF8StringKey("random"))); + assertFalse(reader.next()); + } + } + + @ParameterizedTest + @ValueSource(strings = { + "/hfile/hudi_0_9_hbase_1_2_3", "/hfile/hudi_0_10_hbase_1_2_3", "/hfile/hudi_0_11_hbase_2_4_9"}) + public void testReadHFileCompatibility(String hfilePrefix) throws IOException { + // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadPrimitiveRecord() + // using different Hudi releases + String simpleHFile = hfilePrefix + SIMPLE_SCHEMA_HFILE_SUFFIX; + // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadComplexRecord() + // using different Hudi releases + String complexHFile = hfilePrefix + COMPLEX_SCHEMA_HFILE_SUFFIX; + // This fixture is generated from TestBootstrapIndex#testBootstrapIndex() + // using different Hudi releases. The file is copied from .hoodie/.aux/.bootstrap/.partitions/ + String bootstrapIndexFile = hfilePrefix + BOOTSTRAP_INDEX_HFILE_SUFFIX; + + Option> keyCreator = Option.of(i -> "key" + String.format("%02d", i)); + verifyHFileReadCompatibility(simpleHFile, 50, keyCreator); + verifyHFileReadCompatibility(complexHFile, 50, keyCreator); + verifyHFileReadCompatibility(bootstrapIndexFile, 4, Option.empty()); + } + + public static byte[] readHFileFromResources(String filename) throws IOException { + long size = TestHFileReader.class + .getResource(filename).openConnection().getContentLength(); + return readAsByteArray( + TestHFileReader.class.getResourceAsStream(filename), (int) size); + } + + public static HFileReader getHFileReader(String filename) throws IOException { + byte[] content = readHFileFromResources(filename); + return new HFileReaderImpl( + new FSDataInputStream(new SeekableByteArrayInputStream(content)), content.length); + } + + private static void verifyHFileRead(String filename, + int numEntries, + Function keyCreator, + Function valueCreator, + List keyLookUpInfoList) throws IOException { + try (HFileReader reader = getHFileReader(filename)) { + reader.initializeMetadata(); + verifyHFileMetadata(reader, numEntries); + verifyHFileValuesInSequentialReads(reader, numEntries, Option.of(keyCreator), Option.of(valueCreator)); + verifyHFileSeekToReads(reader, keyLookUpInfoList); + } + } + + private static void verifyHFileMetadata(HFileReader reader, int numEntries) throws IOException { + assertEquals(numEntries, reader.getNumKeyValueEntries()); + + Option customValue = reader.getMetaInfo(new UTF8StringKey(CUSTOM_META_KEY)); + assertTrue(customValue.isPresent()); + assertEquals(CUSTOM_META_VALUE, new String(customValue.get(), StandardCharsets.UTF_8)); + + Option bloomFilter = reader.getMetaBlock("bloomFilter"); + assertTrue(bloomFilter.isPresent()); + assertEquals(DUMMY_BLOOM_FILTER, new String( + bloomFilter.get().array(), bloomFilter.get().position(), bloomFilter.get().remaining(), + StandardCharsets.UTF_8)); + } + + private static void verifyHFileReadCompatibility(String filename, + int numEntries, + Option> keyCreator) throws IOException { + try (HFileReader reader = getHFileReader(filename)) { + reader.initializeMetadata(); + verifyHFileMetadataCompatibility(reader, numEntries); + verifyHFileValuesInSequentialReads(reader, numEntries, keyCreator); + } + } + + private static void verifyHFileMetadataCompatibility(HFileReader reader, int numEntries) { + assertEquals(numEntries, reader.getNumKeyValueEntries()); + } + + private static void verifyHFileValuesInSequentialReads(HFileReader reader, + int numEntries, + Option> keyCreator) + throws IOException { + verifyHFileValuesInSequentialReads(reader, numEntries, keyCreator, Option.empty()); + } + + private static void verifyHFileValuesInSequentialReads(HFileReader reader, + int numEntries, + Option> keyCreator, + Option> valueCreator) + throws IOException { + assertFalse(reader.isSeeked()); + assertFalse(reader.next()); + boolean result = reader.seekTo(); + assertEquals(numEntries > 0, result); + + // Calling reader.next() + for (int i = 0; i < numEntries; i++) { + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + if (keyCreator.isPresent()) { + assertEquals(new UTF8StringKey(keyCreator.get().apply(i)), keyValue.get().getKey()); + } + if (valueCreator.isPresent()) { + assertEquals(valueCreator.get().apply(i), getValue(keyValue.get())); + } + if (i < numEntries - 1) { + assertTrue(reader.next()); + } else { + assertFalse(reader.next()); + } + } + + if (keyCreator.isPresent()) { + result = reader.seekTo(); + assertEquals(numEntries > 0, result); + // Calling reader.seekTo(key) on each key + for (int i = 0; i < numEntries; i++) { + Key expecedKey = new UTF8StringKey(keyCreator.get().apply(i)); + assertEquals(SEEK_TO_FOUND, reader.seekTo(expecedKey)); + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + assertEquals(expecedKey, keyValue.get().getKey()); + if (valueCreator.isPresent()) { + assertEquals(valueCreator.get().apply(i), getValue(keyValue.get())); + } + } + } + } + + private static void verifyHFileSeekToReads(HFileReader reader, + List keyLookUpInfoList) throws IOException { + assertTrue(reader.seekTo()); + + for (KeyLookUpInfo keyLookUpInfo : keyLookUpInfoList) { + int expectedSeekToResult = keyLookUpInfo.getExpectedSeekToResult(); + if (expectedSeekToResult == SEEK_TO_THROW_EXCEPTION) { + assertThrows( + IllegalStateException.class, + () -> reader.seekTo(new UTF8StringKey(keyLookUpInfo.getLookUpKey()))); + } else { + assertEquals( + expectedSeekToResult, + reader.seekTo(new UTF8StringKey(keyLookUpInfo.getLookUpKey())), + String.format("Unexpected seekTo result for lookup key %s", keyLookUpInfo.getLookUpKey())); + } + switch (expectedSeekToResult) { + case SEEK_TO_THROW_EXCEPTION: + case SEEK_TO_BEFORE_FIRST_KEY: + break; + case SEEK_TO_FOUND: + case SEEK_TO_IN_RANGE: + assertTrue(reader.getKeyValue().isPresent()); + assertEquals(new UTF8StringKey(keyLookUpInfo.getExpectedKey()), + reader.getKeyValue().get().getKey()); + assertEquals(keyLookUpInfo.getExpectedValue(), getValue(reader.getKeyValue().get())); + break; + case SEEK_TO_EOF: + assertFalse(reader.getKeyValue().isPresent()); + assertFalse(reader.next()); + break; + default: + throw new IllegalArgumentException( + "SeekTo result not allowed: " + keyLookUpInfo.expectedSeekToResult); + } + } + } + + private static String getValue(KeyValue kv) { + return new String(kv.getBytes(), kv.getValueOffset(), kv.getValueLength()); + } + + static class KeyLookUpInfo { + private final String lookUpKey; + private final int expectedSeekToResult; + private final String expectedKey; + private final String expectedValue; + + public KeyLookUpInfo(String lookUpKey, + int expectedSeekToResult, + String expectedKey, + String expectedValue) { + this.lookUpKey = lookUpKey; + this.expectedSeekToResult = expectedSeekToResult; + this.expectedKey = expectedKey; + this.expectedValue = expectedValue; + } + + public String getLookUpKey() { + return lookUpKey; + } + + public int getExpectedSeekToResult() { + return expectedSeekToResult; + } + + public String getExpectedKey() { + return expectedKey; + } + + public String getExpectedValue() { + return expectedValue; + } + } + + static class SeekableByteArrayInputStream extends ByteBufferBackedInputStream implements Seekable, + PositionedReadable { + public SeekableByteArrayInputStream(byte[] buf) { + super(buf); + } + + @Override + public long getPos() throws IOException { + return getPosition(); + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) throws IOException { + return copyFrom(position, buffer, offset, length); + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + read(position, buffer, 0, buffer.length); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { + read(position, buffer, offset, length); + } + } +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java b/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java new file mode 100644 index 000000000000..07d4055549be --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.util; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Tests {@link IOUtils}. + */ +public class TestIOUtils { + private static final byte[] BYTE_ARRAY = new byte[] { + (byte) 0xc8, 0x36, 0x09, (byte) 0xf2, (byte) 0xa5, 0x7d, 0x01, (byte) 0x48, + (byte) 0x89, 0x66}; + + @Test + public void testReadInt() { + assertEquals(-935982606, IOUtils.readInt(BYTE_ARRAY, 0)); + assertEquals(906621605, IOUtils.readInt(BYTE_ARRAY, 1)); + assertEquals(166897021, IOUtils.readInt(BYTE_ARRAY, 2)); + } + + @Test + public void testReadLong() { + assertEquals(-4020014679618420408L, IOUtils.readLong(BYTE_ARRAY, 0)); + assertEquals(3893910145419266185L, IOUtils.readLong(BYTE_ARRAY, 1)); + assertEquals(716817247016356198L, IOUtils.readLong(BYTE_ARRAY, 2)); + } + + @Test + public void testReadShort() { + assertEquals(-14282, IOUtils.readShort(BYTE_ARRAY, 0)); + assertEquals(13833, IOUtils.readShort(BYTE_ARRAY, 1)); + assertEquals(2546, IOUtils.readShort(BYTE_ARRAY, 2)); + } + + private static Stream decodeVariableLengthNumberParams() { + // preserveMetaField, partitioned + Object[][] data = new Object[][] { + {new byte[] {0}, 0}, + {new byte[] {-108}, -108}, + {new byte[] {98}, 98}, + {new byte[] {-113, -48}, 208}, + {new byte[] {-114, 125, 80}, 32080}, + {new byte[] {-115, 31, 13, 14}, 2034958}, + {new byte[] {-121, -54}, -203}, + {new byte[] {-116, 37, -77, 17, 62}, 632492350}, + {new byte[] {-124, 1, -10, 100, -127}, -32924802}, + {new byte[] {-116, 127, -1, -1, -1}, Integer.MAX_VALUE}, + {new byte[] {-124, 127, -1, -1, -1}, Integer.MIN_VALUE}, + {new byte[] {-118, 20, -17, -92, -41, 107, -78}, 23019495320498L}, + {new byte[] {-127, 2, -7, -102, -100, -69, -93, -109}, -837392403243924L}, + {new byte[] {-120, 127, -1, -1, -1, -1, -1, -1, -1}, Long.MAX_VALUE}, + {new byte[] {-128, 127, -1, -1, -1, -1, -1, -1, -1}, Long.MIN_VALUE}, + }; + return Stream.of(data).map(Arguments::of); + } + + @ParameterizedTest + @MethodSource("decodeVariableLengthNumberParams") + public void testDecodeVariableLengthNumber(byte[] bytes, long expectedNumber) throws IOException { + int size = IOUtils.decodeVarLongSizeOnDisk(bytes, 0); + assertEquals(bytes.length, size); + assertEquals(bytes.length, IOUtils.decodeVarLongSize(bytes[0])); + assertEquals(expectedNumber, IOUtils.readVarLong(bytes, 0)); + assertEquals(expectedNumber, IOUtils.readVarLong(bytes, 0, size)); + assertEquals(expectedNumber < 0, IOUtils.isNegativeVarLong(bytes[0])); + } + + @Test + public void testByteArrayCompareTo() { + byte[] bytes1 = new byte[] {(byte) 0x9b, 0, 0x18, 0x65, 0x2e, (byte) 0xf3}; + byte[] bytes2 = new byte[] {(byte) 0x9b, 0, 0x18, 0x65, 0x1c, 0x38, (byte) 0x53}; + + assertEquals(0, IOUtils.compareTo(bytes1, 0, 4, bytes1, 0, 4)); + assertEquals(-2, IOUtils.compareTo(bytes1, 0, 4, bytes1, 0, 6)); + assertEquals(1, IOUtils.compareTo(bytes1, 0, 5, bytes1, 0, 4)); + assertEquals(0, IOUtils.compareTo(bytes1, 0, 4, bytes2, 0, 4)); + assertEquals(-2, IOUtils.compareTo(bytes1, 0, 4, bytes2, 0, 6)); + assertEquals(2, IOUtils.compareTo(bytes1, 0, 6, bytes1, 0, 4)); + assertEquals(18, IOUtils.compareTo(bytes1, 0, 5, bytes2, 0, 5)); + assertEquals(18, IOUtils.compareTo(bytes1, 0, 6, bytes2, 0, 6)); + assertEquals(-155, IOUtils.compareTo(bytes1, 1, 4, bytes2, 0, 5)); + assertEquals(22, IOUtils.compareTo(bytes1, 4, 2, bytes2, 2, 4)); + } +} diff --git a/hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile b/hudi-io/src/test/resources/hfile/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_10_hbase_1_2_3_bootstrap_index_partitions.hfile diff --git a/hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile b/hudi-io/src/test/resources/hfile/hudi_0_10_hbase_1_2_3_complex.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_complex.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_10_hbase_1_2_3_complex.hfile diff --git a/hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile b/hudi-io/src/test/resources/hfile/hudi_0_10_hbase_1_2_3_simple.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_10_hbase_1_2_3_simple.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_10_hbase_1_2_3_simple.hfile diff --git a/hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile b/hudi-io/src/test/resources/hfile/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_11_hbase_2_4_9_bootstrap_index_partitions.hfile diff --git a/hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile b/hudi-io/src/test/resources/hfile/hudi_0_11_hbase_2_4_9_complex.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_complex.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_11_hbase_2_4_9_complex.hfile diff --git a/hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile b/hudi-io/src/test/resources/hfile/hudi_0_11_hbase_2_4_9_simple.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_11_hbase_2_4_9_simple.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_11_hbase_2_4_9_simple.hfile diff --git a/hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile b/hudi-io/src/test/resources/hfile/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_9_hbase_1_2_3_bootstrap_index_partitions.hfile diff --git a/hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile b/hudi-io/src/test/resources/hfile/hudi_0_9_hbase_1_2_3_complex.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_complex.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_9_hbase_1_2_3_complex.hfile diff --git a/hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile b/hudi-io/src/test/resources/hfile/hudi_0_9_hbase_1_2_3_simple.hfile similarity index 100% rename from hudi-common/src/test/resources/hudi_0_9_hbase_1_2_3_simple.hfile rename to hudi-io/src/test/resources/hfile/hudi_0_9_hbase_1_2_3_simple.hfile diff --git a/hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_GZ_20000.hfile b/hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_GZ_20000.hfile new file mode 100644 index 0000000000000000000000000000000000000000..243eb66124176b4b39dfb0e6ba3a100f7ee9e919 GIT binary patch literal 105235 zcmeHQ30M=?!o`&;6^T5*x8d6b}0s2er73xcnAMeMxn5N|JUWP;ZOB*H5KarAOTXkIjg>P20! z5=qAbe7K{_4>ER9($Ezxeo4=ybIBZ{Wk?*uH2_aVZObE1Vml}fT$58?<&j+M040el zT7_^Ju7P+uYI14?lEBCfz|+w2<&o*wZb~p$WNK<{d1MB5lv2SJeTrOQZBoWX=hWONunhkEo4}6I^eE2% zv9RxSD=XY*NBm4ZsLp|)s)bK*(t&Cy5mYB;jqvO}v-q>qoxi+oJ4+G#?jjXq%A9Yy zcHXk@bb#xf9Xmc6RpA>YaODZag#xkcidad@Z)_BEgzlOKQI+_VQl$_}G|w*b_*9lc zb*owvLlY(s^|Hexc8Er`g1Mea&vgk#Z8^v3E_<*?Ip|ui3)p1N1uRo^3Vi@iw&AW^ z(P`{BiNOx^HxG&%ZGj(Nd)2q~fvQew3j10>Nc-?0F8vIahLRcVr0ih~DtG461MrjB za^yH_bYgVoXfBx}Ju{t4=AfU&PN9>+?{Uct$RYdJ_U_p=sj>0nqQfeO+c*Z*UoVJj zi>YZjHP;I~Y~sgR1|9S!T*|4rSuo#*H|~uau=6!mF@FEjK%jlHF!`SZpk1H`+HQV2 zprsRm)~)B7ZhdDJ4|eG+y8G^1lE(ku+q83d{dY$v`F*hA@sqM2fAph251kqWaP}g= z*%ZMAjjEwg6Qj}a_$h&Y5&y{4}-*xq{B3TtycnXE0x%%+Y22!{-_K z63@o`kXz^yic@${SxzS1&-jE}rf3mj7tZv>ub@NAq^Ggj6cbYm@#sZJ$^?unf*>A~ ze}sz_4fH9KHmu;H?hxCncc(KH(G&e%U8ItWE~B<(k*6^aibJMoAz~fw>WiD2O3D=Z zBBR1x{qP&8S6O5>=0h<#brl_77I_AnPYKQxEk*{0=lbHNrYbT;OOVOoxqkRl%D#LqhhSTvxm!X2Tbdqhd*u~ zPN&bqPhg`FqZ2tN>2vX8*a&1ddf3^C5maJ);wXCFc@5)UNlrR_K8|5dh|!7uj9t#L z=zB;iHpl1W=#mz{?2Jq~Aqspx1}WO*PiHQz$=kvMjgUFyPDzbHZ~;f zU@H}5Z{Lxop4PT9mEcxUi7BH3Y++#sJjKn?T4PP zIT;)0G>uWY+%nbbo!DU?5*Ji|@!)2)1}!hPw_mQ8iSwDL&7TE7WWH^iZA`r z{=slcYOH)t%Tt?9#`4F8n0Ue0-q!Y94fwbwUeNGu1*!Jxtf;hBKD7QlTuTdt19+`{ z=%)90c`FZ*xiQ-J5HGZ0`(Gk;W3=z#V9w27Y%}3}&HwZCnQGue&c*`}e--v&bp-sS z>hZVtSRMX;Ld0J;?xr3CJc|e3?L6_kbvNPC+Qvz;_i>1XiUH$QdG z&(Bxa%Ve@Ro)FNnRE>3cfkFNtVbofd$z}Kvi)(~H#x_)){qtm6bv)+Do);;DA zo3rNYuTpooC+?Hxipp+m&H)I^QY#dST0x4s?mkVUzC~rJ1QUerTRsu;V^m5i`@h~A zrAnQ^R@aFk!cKE@adUQ?;zr4H$#c$|l1JHtJ3DZ2X{EGS`bb(Poyrlir{(d}O+T_5 zLV!AoN&ea00Y2fI@$9aB!YBXiu6@EcemAR@3HvEheSPIhWM94jj8p#V+2Y!U#^>OutX1+GUsUte(hDh?20UKkAh}-+;8w+jz4qeVdU0PsvIV|bO23S!MVt zu+N`W-qnUSJp7AT-hPA2DA^+e>IcEg+*ef=?eb~$vZhx{v1|UO`d4eZO#i0U9`@=? zmr(H8OkVc?{T2>%5NNaf4r*)lOtUqD=Aya_cf-)h5OqU(bx9?l?#Fu69dTcWx*>+B zyB4yvgI}_=lIZYc{%>#D{}VmDY!zM>K4K(gb8g^&zPTn42psu|pyN_0hZTy#scTf_=efVHJHCk}J;qN00o z`b`u=ueETrtgj`mcmZ6IRKqrDX0;q@t7=&NRpivLO;>3-)K=B7`m4yPVP9Rf8?E!8 z#IWJwT)R})F5KZV-q;l2TZfVoO6Y%;#Zh8>al?~^sBH_9+JccC` z>aT~_5622N4>CCXo-ck4om7@%YHDiuJwN6+(<0bdld2ya<^cz?XZ3Chf-BZ9GF$ zvKJ`_a@)Gbn(s*)L7bgA8ILC5EJ1@Xb(aoj4KxU^ znRERd_Lt=id4E>t?NjFjjB|+k#!rikBs+9z>ealAZ7c?q??VM#7+~AY>bPMIZV1>4Sce$nWB@}6?89y%%JsL zf+i+bi}PkMvC9!V#?3gh!21GcH^r$eCjeiJkQp4MtSQS0#Frps2ByqsGdrkZ^nc~_ zeIq@oe=8gAA9bAb`5G(wliIEjY)4{0d_(|SH~pe^`U@S{1`!vvZapL6Zdgx9ChXYB zd!kBI?HqphyRds-?Dz1$<+XrWA>EMea0-}nVGu%`K%#kZD@CKgg)DWoH#L9DeAY}$DYC*8f0_sZz*kn!w0$Expxi!hucI;gl`LAv0oAWd*va7u7RQ2Si`!}v@2{+D+0iZh(^{DC=j8}OqV8>s3j zrTytWDW2#IWIwjh=fvoizg)}6O%q|*Qk2YK5QRc#l$M@IU*vNDn}r~#@d+QF1k4Kw zMqMeJ=ub*Z)6$JjkTb?=t+R{qgVwz6k#o^F-#O z#wUF6!x(}rMJG|#(C?Mzq|uE|*iszmuBDMDzIJLn8S8ky=D0oUZb5ud{c;;BqcX1f zWLTwYu?>3(Fqs0H4%&$C02^^Eun~`iHsVxZBkl>E#$mu|3@%|cav{Y9O`LH5{5zTbN>{Lk^JCrP}|S#(*4MYp>e$Mv{aeNYR=208N)_Kc+J64f^=!YEH+_ zcAB=ja*>VQ(x}pcEA~SzQ~%F9A(2r>OgwWIE^y0-l&&N|u%{k^=jZ7lcm)xH4Ov@! zQ-)HaXKlBlM}A7rsm~sq1mL$yEK^j`fG=XTuMsQRc_{{$t)qsx@xIekO^jH0;ADX zF9SXUs$+FrtZ4{Me@nZ#1*|x zBM9ZS4FWzpPpFc}ZMv#$akPB}ERsXp|JhZR z4YdxEQ2du|sImE)PHx?{3L)Cg8~4j{0@@OSW|m+&v^5BtnVPj)C$K#-G=Rx@5?#I; z3T%f_`u0JrpM1M7s!3qW1~~`{kvN{Ou1nyn8!yn*4Ylz_Nkw9{LcrH-Vb`lwRYh#g z(*%u@rBcJhgUCcL+@%*T2T8Wdt6C|-RndGL(zZJFZ6Lmsl4p2oS6(nbQi+U&+{Qf= zghJ*Z6Ct?}XPq=FCqixR(mW(nl!h8TS={n}UQ&Q1BcfwuL3KZ~_&P(sn*F4|4qpv}X1F?wSo+sWdHl#OPpO zOg@m%$Pn0K>%yRLltJb&H^Iv;TyYXJ>-0utemw!Jbf{&^*ZLQhA)S3rPRf|z!Bqz+;Y#k7VE41Vtx%b0`)hE_ygKRlB| zT2`QZ#uqxm0NYwkJ}<%>asr97@C`v))vO(7wszSHnr#-m>}7zR4B4#(R|876)ER7` z!({Gc_ygbCK+=}dD41N{>-8w}7AN|b_b(%J_?G*YkvYuB1`mb_;&Bv*vd92@5vf5$ zxpWDlJ+(>pcWUfx`=LQmt12yn4tf*PV*Twyf}%nzEra@6mGyql4;Mn5y~F&#j)1cS zG3}=U9nKnvXi_jLsI7eBB2T_M>U~YRl$+hJMZuTrv!jBodFlpvwd%!cp{8E( zv{q9tDCAX!*C^bj_r`|o=A~&>Uk7-c5MgwTc+-nbPc}W@B-&KFNqh)>P$I>!3ncp! zv<(a7`V(No0=fPKZNq{mlHeR%8aWpyF)-S)KHKIP24!p&uvQ@iG%v_O^Fl5Z zx8;H6g=e67A(9807qpj%3wfT%2!zbB1KsJ|i!rFg`6PV~{vGxY#OOriVth0*ijq`f zV(_Cy+|#Pe$=WFzr1;?-9V5J3K4*Aj&vac>}Loxqn`mb7Azs( zSWpiN6yBJRf5JXwb!FP0cakd{6R*u52r+mu@-UKs!87z2ygpNh!LM<*1|N#Gtv1?Hk3v!p;TgEH)bZ z^;$R>dz->!^bg2LuynHiB_S^sO+-7=M5t9NyDYzJ3<$C3hx9y1fLKC;;+79|5Nj$y zvGJ!(KMRIj?(B*8{bqIU2Fk>(D>+9G-42Te(O%ERVzC{KuelY$R*NUFm9i9mV|{*b zic$=6!iB15d_KETB;l(RY#}ej+aS`5qFSeDXppL(cfmov0E|!^#o=HBISDYT8wPhb z-rE7sy*MbSD2}IaYI~QJo z2pi6ii6J2DLj58(`hX5$*BB59hPz`G!G7}*f~whuDMe%+{QBKpf_QH)1~a1l7g`| zf6oDsBUp|2I;R%|+!cvg9})nUP{puarUP8VDu$aQ{QB>NJ>{_XM*bhaVCt1Vs~2jw z$;6^3KmNMZ+Dn>!b`)$XuLfQ*TwEy3S1XhpYJQ_iq5Wq+o*U1 z&ANCL>J`U1j_=_d_1cpB%vJRhUgYW)#e6LNRg{kJ<|KGUBtLNJ$bB>NLQ0aD9j(Z8 z@dQRypHgXS1I2T?$pUs=xmb%52ZKKMrYl-aj5N%ZY&9{`Fjw-xxW>TsH+3xxtv4N1 z!x%bqS`liU0I`_ZRNK%(vYoy`LHqU%Fxk(HCWyx9TY!q>kZXv!7{g}bB_VB#kQYp zf9K+k?R*8xuk75A%bh&&KHq$jd(Ugdht+BEM`Mb&=7q4M!4aiWPvAE;@TjV%)gV~0 zYMWfC+S;s&AqJRTE2Wns^@U^5y8xoXL8&9L<1zGAb>SJzR|h)A_5G3Am=DDalY^;5 ztCoaSXGy5i$|9StYU#<_^qhHAfcXhB=Kd0!9L~(ZT0eoM?_ZR?^7l!-8S2zg@azr$ zq^yzx@4))GHq_9{QBI8qV^eZ!eC&0T8kVL@eYZ*v2(;JV(-51+2q|CR{8tCGhACf$ zh6V|sY`r#iwVCJabon}#;oq-cq1WWGM~mx$SL_NXzVQZ(t?U9-)ld!k9pxa5M)=Gz zMpc!f0sh=-`30(427F}IN|}ZOw!a{)ke`@;gax{808tBjZgksWFcblXE@or#48>`K zQ^(%)BG*xAs*RoacV-1Q#+$8#1Aa7ZK(7(f+~Q?uNBC&rUH>TWYJ!qA@mGMY9XRLR zJ+~pcqLnlvc@U^y#(wLmL)Xp3WvpAzRj`mjo1xex^?@XBo68ryc41EMOo_~sEm*)z zl2)yn3Yh~Kp?F2Cr18}+?tAmu4PuVG`r46dv4pL@2ud7_Qh@3U5(oEr18EcT9z^HK zTt_`lTL9{F35qVhyc!Oz^^PWXl$e1;I^;DZ5dKglgR0?pOO*`3z@W1?sD6=pG)39Mss`~{jLn`1oDM#uQm58h`WIex zw6{(Ff>%>Z!wyOeurZfQQ_+st$zXx?4gu>oI&_6O=wxu{b9gfZ+S3>Mo+bb-!A6$k zpaWV18`(r#S@%A(iv5AJ;kNC2s`J%*pFSGaXLO&OkB_YoeP`VZ`8nq}?_ya)v^!f3 z4n^bh1fg0X_GT*^QmCr>2ODq47xC39zVZramaM-e1oN8w6k!UHDX`@nr@stTd{xw- zr4`7Oj``x*m>+TrT|#jR4=T&ar2844sK^v8K_-Xi`r%F4izcV8phL@~WKy?ZUX_jGlYc*X=a+`qMX2Rd= zDoc7y$4HL}tS>3>x6jY1ITSm=DLS-rz76em>;?d~G`^d*Cs&;+0_8UlcUc=1r_sSfa`KSxN>TB;L0)p*Q{XZ(H;D< zk|C*r%DEV~fC%k|lo{ z^Grb1s4vlypDG<`{1~-Rp{UonaAh^J=tgFi4No!v*mkA&>hDSKHQZ|=amOHFzC;R5 ztS<*tRAn;s00-su_L*P$|2s|-s zNVnde#XSN#z4xj0VV2<3vwwwv^xEnB27UciT9+OD`ligAKjC8W)@ehj?nC1phYB?> z9#zw*pa7@H+fi~czSzFZHRk)@VNXduN_sjBj3YaUgDlyKHI=6WW?*Kp_{TaRM5L=45NV1#>3 zq2UKj#($ZCMQtpOGBYOZeg}^U)BP^#*!f?nq|(bQxNgC9vvD2ff7v8gyFk3{cKWlc z1iU5q8NQ#U!&^f+!rpO#piK9}cRCe(+TBU@mE->Pb?f$p_2{~G`0|O73m1D${UP~z zVn)@P@EDMd&;YU#ylL#l`w@VwouH|g3Hgmt{NxmBzPeT@0f~!3wJd?Ht`keLXek_B z00MQ2Djjx~Ym3Hq;|Z7@=T!>L5zq;+7%{_nKyCx9>8iW{d_KjjlbczkX1;g;PU>jg zuF*fdC*>*VH>UxbRWcUPtTBL#&Eo+sRz3i5vH5_Dec%tcSmmztR~Lz^+jwUUD8aI8 ziVH^BI9di~-h_C2<4UU=9R%avfL})P{}jZ_ywGSIK?@MOYqC0JCPe)DZz>3k;$B;_!Uv{L5e3O7958texwf4!cnL)H4u(ADM*bB zoXT^2@fFB?3Yo#0@*F?>Q)DTH%)p^M(idNj%%+eTn2YeCh(9H`Jkk$ueXs~Gd>1zc z*9Mq=y}-jJe%!R28q1))-GoayH8%_9+wjI&27k>>VAC|6c1|lU(lWJsI=c~Imav|^ zGN9%4Y?kw9U_CqV09)(o#Tgy0Sb_+f1PRjHX#OG` zJhB_zNN>K|u59ygnJegCMpI@>YXeeLEt#Glcn$LmmSE;706&iED=S+qwlwL`1*Gl% zSjRtldc#X=I?lImsn;=8uag3Z1JBs*KS7{9cYW{=1fX50U(a5T)&Z?S-q-6yt+u7% z)q1wB?8g)SX;%}ThQ+w2P+8!JeogNscAzHzK-DPZnrB-yDk4)0d07HWyFsx6Ftlvd z3g$Xy7;^)Y0h!#J=whUK27++`Z8M2&8%tiFw()~Geia>G)>0ll0R@8^8S0wnZOA)CYZR z1L_c>c`mV7-oyTA-7cT7dt<#)=`ByNqPh0@{)vnG1x3Y^s%HST58JMeCf&>+NPV+k zzP@(CJKkw?;lD@P$efz$1FYnE2ZHKD*3U!2DmR^sl?>qMZadZSZjCQmS|H!$!i zesJIg;j0aky;IZg7R-0L)Rw0d8Fd8m({Z@$_1PfnNOqk?8wArx%FTI*@tO{(90 zWWB6=%s)Sj(zlO?^`KGj=dERK%62%#A&jwrW zDs5wf>bcZdonio_(|Fp-7|5&v%1`y6XlZx9AgZ$6+-75m?>;`)?+5E?`ITQzEcU z_IhDM>*Wlvz~TQ<&_K2gLED(n#%&H{%NDeS3})@lnJG3lYSH-L+Zima3>~zUq3Iv) z@7c;=qHO{eHmsgC@)wA>oLO!}X@ajFajVjFh`XMMxNcl0n7P>RZfB5@VE613KMo5W zUO&B9+GEzF;0vKEHWb=!esL!I-8~?3u?kpuD`lz)Lf~e&6#;157iU5$B%tP8sL2Dd zi_k?@Cj+sIwfJOjVGI2X?vc4m60hNruG1Wn)=sO4youVDMV`hyC=QvTg@|>ytFJ!e zFey{yi;N0)^}}zVUL+C}+H~`Y%A=Rd8$@Rx_NM?Ub`c9YbSGwoQu@z)r3VXAY(B($S~;w^?^`8M4tB?`@3$eN+_s;()SytN0Lu0=WoXN;^YSgM;}s4KH8bjGBh$u-ns|nubee)-*VpThn0TTgWMAcM84z z@D_w$k8io51o$PSfHef_;MY_NSjCfMkv;`%hjm|*DPXjSx~higMwy%~ui!UzoI47W zwb5TM<7u?A=M8y(R_ERb^`IZ2O^a-XJ6>Dzq{!*5BST=%6=&Oe^^x-%^9j?ba@-Z$rdARP*CV z0^;iHb~|;Qp+j6Fb-OJTIEe993mn7_)wL=W6x>Q!a-oo_mda@?HD4l#@Fuz$9K9T0 z^_oLJgBv?c0hYVK_yD@exR^14Y?F+ZoIqB8m09t)r_2c%fT}BcCY{tlPJtUzE;Z#_ zcqNe4U3HR9^=+}1!3Yb>-e#_eK_6i;(JN3xXp=%_OxSiT_jUUJlAFP#r6$C2zN1^b`7c`lNZw0jN$;d_j`Rcdo z+6HxPL$tgBcy`%}M{ywItKo`I0xPAu7RbITCQIrHzPt!1M~SH5{}sSkUc2sOkpreN>8TG^*N4>G%=B$KHrCfjYpEuT<1-UCncpuC;nnD za85dXK8|5d$WipX^BTsz65|tb=w4?BhHFXWNzoiU6&sEipGX(Y!_%;_NCKMW9L&fq zi8L~y+=NsuvSIsQnt@*gO$-kP@a{;nqGGWvSI0W%jSWc^1wZeJNOt*$F}0VlRDMp4 z!Ox*nk^&8?dw^3iWH_*%3g)yt_436@T?${YKB}}pIDpsMt2Vv2`_OYW;G>%O08N)_ zKW6;wH|XOtDwo^D2Swd2IBq}me9g(&IHzfrsW!b6nqW<*D|ublLEIhH>3DYn?&=xa zPIi5DxceGoJHoT~%;L{ZcmDFW?JUKiK4mJ#lsVsY?Yw2*=>XRSJ6bb2WGPiGm>k>^ zhb|C0qAb;lT$f-Jc0ju9!5-zHYrQTsnH@4kr_cxRWE<|v1>ERy5`!JEpur6arC`NY zs{>UXh=55$?Zbn(bTWeg{3MBD1{k+pfF2`tkh5-Z(MjR=xb(BwDN+Od@|~P9j?^Om zYkT+X8gd!jf9R_A7G`MqJmxC?-rUqjcD}~SJNH`$h_r!6Ck7IbcCmhi+kcl1X(Na$ z+}Gp4h`kMheDsKvn^O<`x<%Esh2V%Ql4xGsO3^5AU~s7Rrn2&?@;NWtb}L}p?yCaG z6k+msPKM};=(y;XXgB?3;^JjdszC+hI#)Cs^Ce3D$+hajP%txEa<$Z}nU#vp;2~WO zRZ#jmTvMI7=0BvH;o8t{XBo){3vM16rkx_QQ%GngHtw;=so5WE=M=rF(!<6vuzjk` z&H7wJou*k_4K*FW)$k`*Z{saYMx7;bKaCZ3Jjic;NZ?)SEMQr9Qb*4<>yeea1(vN72P&=BDZ*J zx@v8CWCmtNv`nipu!g=^GT8?38iLYxm$5n^E%&N5HmD$)LqCI1TYL^}3@uHyeFge% zN}exnc(Rl?kd%CyQd?P6aIx(rea6~`&$?bfr7c(l_B}>J^|>dkqdp1hsQbgt>SM68 zx)gL)?}Aa=;3B2cC$Jt|8m^EjsUOZdcNl`~wGjcs2_Q>g5F9_#f$VDxf`Na`@4R{b zvCMcW^z2q_(Wuq+BKd?AjXWl1o2!ch>gC13cH?Q<#qDpAT17&L*91&;M+$btK!ie| zkDrf;12;?gNW2q zGII7b`F;GA6yZ&lA+$H;g+&d~?$i)`zQ*eK%IW?PX}iChvYUXk9{T0%`#sRQzGnwb*XFzeo%o8*3bpLu+Y8OX(*n`}sWundJ^7Hf6jWU^x!=tKS z+^4ZsB9QD=%K_f8mDO?(m7tKrA~Kcwc>-TkSF4HPv!=PZxH-E`aihRxEfCg%6)g<= zl2%HKrH`a#(y1IFds<$8I{Xb%27ZHZNt89Hqs-kej7_OJ=x6LZkcGGkL&ee zrYZ^!#9AsG4DXM+L2Vr~b>x>2V@E`djwfKOj~-*kOwwWOIs=S-E%dA6P@i*V>1sJV zzFIBGSIXsu-h6gLBVQuE4_d}9J2!2Ig(QZc4-y|hd98Ls4Az)?9UmROi_sGtrXmrD zf7eB=g{*2r-5r!>A*Xjl%DF_`cbxxx%#t&k15g*j}#3KA#%oCZ9+Ebk9 zL8Un-=td_dQP$A!mFA?;eelB=f-FUiPK++?Pwz?bL}wuTv4uV-TKuvbx?v6o{qh5? z3_Q@v5CvNqGC(Us2xw(^4vQQ7L2<(+Slj?Ek=^HcA|nto2YE$WWLgjt_UWEDPVqBKBQ~4v0+B1)OF@2wZFbAXn7QM z5B$@g$1TK5`y^hP{kzo9wV{Sqj&f>LDjh|mUE(0nTFv~{g8;Pp1h7u72XsJdkN`IE z6^ncQt=^Q6zLL7%SKb|J+jsx;xku;tzU}a0j>i=}i~9nwxYaz+fFt0mstQxsit2bF zi(ix#Q3wOT3RN{iT>x0MQl{aI;&8O`+Vv<_71_iwXH#MV zT0J>5kHIxL)f5JhL@F#-i>BlQ8i?XG<2>#j3PK@sRJ3i|V;*_l7O=aEe1t#)2jd=< zfLuQ~&QjXajyo$X9SwhaM?>=$gslzJ#xEZY;WdrMdh5vqYNHNbR~wKCh6Ju( zMKH0oA?h2`h6dr%>R+pX9)Tvg2zqh!tqsA6gLqxe#7sO=R|aiog_a)s{f z#JQT2na)jb{WwW))4&03IRe=f0dvy{%+Q`e#{tvKd_@9b-Fo#p^kD3y>|q!!YiROu z9E*(~XD+u&j)B!)b*G~@6f{*8ys3kd0uyxxZ^U20SOl9po}LhICvTVL6Y$ngkGGyn zb$GkM5N{vN?C;qWAFjN|Iv=-V=KNuE27moks>kxa-}1+t{PpptQ-g>#IjTl+f-qm* z@MvY;1Nr(qwL;04SDc0U;b1l*CWghVm0lAmJviZ4>C+I-6}(t<4M#Xry|*L{o#qy) z_)WBwGs@dLDH{Vga~7Oj1Mzg!i&R8{_Nq25vtfteKjasArwrhn1Vy>KV12IX0hLOw zw~{>S-3!1mbW(XvI<|+hhD%?G9A%iCN}@dF(mz8kGVTT9S?K2SoD9s=l&g_qH(P_Q zw{Jo0ofl|RXl&f~Y81UEvxj~Dtn#ilwBbg|9AN!IyZavwSXSj7rAe|4f7kyZ-($1S zrMal?5>a+43!<)YQNl+A)FrHP=iSnwuE|wypZ}DZ75{a1qZV|P&xArpxRF+?$>&9Q z+i_;&FITtLI4evc+bwz+Nza{9f$qe^g1jO}{g{ zrvZGgi;>+Fr?MO}gFt)BEQiEE*VFL&e=WTYrsV*CtIhVC1#jTDW5z>Si$D#f zxA@~5Cm`PDNrRmUcuUoL(P;I8El`mVCDk?u|I!H=uh8c`dh!?8y<_>KE8cE=xzA*z^$_o zUk8*8o#Z=1^(y>?+v?@WozlPy@lOp;cu zGN2Inz}W!xwPKDucg;6-z(LlyMd)@vg{pZjyQL`?T%g(I4`sSFFrUaCiqix~c^9w0I?&s2(;4&K8Ms84^U&R41kCc7Iuy`!5F0d4S7U&%1kz%v7pUqv^w%S}x(mDF$um*N-d{dHCVFeu_~01yNYogo z{NHC)%a}15W?ufb7$q)n1gqLx2|{X2W6VZ&v^q-lyqYh+P2~$&^)hjSCYq+9!lH8* z1b-AsA)3onKZ!m=BD^9HkaN2rqAep0v{_;1cvv-@7P}Z)%rWoZvR>^qX#)9}9b?VI z4n|ZMW(woXmASRAG@9Z93!>V1n?%(1af9aD&;sjA3jFQcd8kZuXyyO!qk^3>oA2kK zBDls4*xz>s0cHsSV9WOEVAdc2Y@(quq0HRMhO-+(*eVt9IY=b1-TXxZTPV1|vE$gi zibS|7Dv>^vmPBsMrS#$y=OiwHEjciwCKT~Q;6WbV%yqZMQ5?!51Mo!%nSps$*wTaT z$-8EjxomXiXs)T_Yx|*rQDK$)W8?nrg9Q7Kz$kjg7wGl&4W1ll%Krh{Blv~6z26P8)D?bKYMyt$_VG&U59U`zrC|N21Hct;8f&8<0c zScI1l)tVhPFd5MBa1(Xsimn)*nwlC6AJC2I7si)IrekJ^6-Y(B>aDe}%lnOeO(yh< zD>5r_YzJEBL5b0N7DzF$@(Ud6a93ZPJlie(6p1J^da`sj)Z8eT{U;SySQ;8SL_@=k z0$?y7Zo>|%j33vS8cWTtiL+O$)YdPgaho8%zJ1HupMbB+^vpqcm=0fKh|Iy%tYA+d z{(9ABo|~aA8}k0R?ZaS~`PvBE9ybOL@XgLS+vm1GEW0I^DH>$BgvM4@)uzzY>c*^u zjlwEfkzDhD^O>?vOapBWK>8K!y47`4Zd7h$?$+E*%qV6gb1QQ_dcg}Q!J-BwO-i1W zv^jZm(&84)v5eBv6X}b54q&qo1RX@7(2Y-cA;G9CWfT2LX=z%zk52+-bYf7c^9lMw zd>{5P;(>Oj^r9P|Fl`tnT8d2@h7BymCJn;|mSWe^$P=Q4_- zcuFumw=^ua+9I82}XvsI$&)xOE+aaOuHG;u^d^1kU4ZrNo)TltJcygy2!iSaBU4SSsn#) zE&Ao{%(lo4Z*Pf)md|5u!KWnyQl9}y6|${-v(BG`Nc-Mu#k&NgC9t=f=jxEwfW0+U z-{8+nYNftGy{&MYys8$IKQj4Bp~R7`P)?v~g!0<^x&reiYni^)k(O;V(~?Kkjp6ZMcoql&f49 zP@Q8H_COducV@t1Z*SiZt*p9q0`zZ>*WS3xQ2iUPIqbH2&!Tq?9^q(mZ_<6z0dW{tV8-Tdn`)%+;3x-)G4QTrP+!ZZzlM%`LJ3IP zb$7|Bg9|rBeUUUhqu=QHEV(fIKM_}_eJua%>uFzkp6Z%08~HM1X6dM9vR`|p7Fr=E zc63jU7?m@y`0nRrt6crNot~Fydn2K2=H^(>(0gmX^6IsbGiLas%M8Uwlb3w>p?&6q z^L<@+os7TC7@3rjb!XT8PL5mFPo7?4Yn9=>rH9+s;G@YxN2KofYGn7!{Jk5C4*AaW z;GL!7BdnI(yuPpTOv?2>s*?4A3gPkp{{UZ%n;jCyS1w9FO315*QMI{Y{MOeN+1=U*?&{PENI zVWXnQ9RD}t7V@6;>66>T{_)d?GkC+E{3GbAQ~iGLmDBfF>`Bd4PaFEmuk4SVafL_PF1ZL5{x->i@e)GU~^IfB*a6*GCkM_+9)?ABp(M;2{qke;wr5 zcZi+um>(}&bsLuVF8yCq(wTdgKO2@F<>>!a$dvK)@yoyL|KqwNC*DfvobcAnk}22A zCUy-w&6#jL^8Ev{AEN#-CVI#YkLibRPJAo$Ahznsr<^5E23c)S9nt%PQcawd{rKVI z1NTKw3-FJg%UD16(~#Vd4fOSMS!<^)Jg{$Az@RY;4bVVtpBh3-XWky9NF-k6-PE1OXPNrOvO1d0} zMhGW5NsfCZmpK(PCYLh8X!!P?>z<2^Jm2?x|L^13J=Hwz+3&mF^;^HSerxTl);q4Z z*s^&M0x|tO0Yeez71IvpVbiWaeG#{&yeNzK9^VP{E zw(2&t71iyr1?Q-}_4{TMtt(V!1c*uCUcHo&_YjjX43AJOE|x?hBkvnn?oQBn8EX)p zp;%liiS&vzFtSWY)YwuXbJIg?havc}s?l$AYc&=Fx2n0U1phhsN65Ftg!BA!-f!v3 zP>i@P#d;EkisyMQ@I`I53Zp%@R+4xsu{*#%in&O}0-6k1OJ=U&sFv_0GuF^!Tj6AQ zi+8~`fmgZ9Uo}_AZrRJnB)2urV{RL=S7KGlFD(11i1QnYeJfx;(;dAt#5;J*#56XB z`a8n}utwW=>+xrf(fI700-P26!qc!fm-?FWZJfvMaOXX*I2!MU_S)kRY4E@zVYkMX zyD~bv#ANltf1OwS*KFi35x~cV;>{E3Ti7^p3cnZSmS!!zWYb&s}1B4a2{w zoN$W?TOA%@a}Z)Vwy%wK<{IW@&Aa26yYC@AP`sl15HU?mwG{b&4yKS%EhiYGg*Cw8?PGhgg zN2M<64zbRz4=St=dbBST`;Gdsi&BEiYwCPd=xQF?WR`_>9UA`J_`bx|sUf<69!NtP z7Y&Iw>tOsVqr}a&^&ASTlnuVpN>TsT6>9xZnM}7uYlfItN5z`OP!FY6YKAD1+cWd! zil-Y@^7{VTiTQqtBA@rBq*rhq65e|qdi}w8!5b$qRHtREzya&MNg2o2fyXa!bZYcU zFy?y;#aEiZJZnE#&Evb#6K@K*FH>^ zWV*R9@4s)8%y4sI-tQsSU;EMlSbPuZs=($M`+x;*uEV2+>s7(HOsuMeY@3Z7IMgay z6vX5&o6Wo(xS@xX#J>XANUPym){MD#8ggnqq*LtEW-K5<6UpKOfg=NGB3Sjr`zlG^ zfJdyVf$RdtWkOu+BAZYmAZUMjmMaYb?IkW~r$1nW_8}Cs>p{?3R~fTGyNIlHXHL<~ z4e3ivj*wR>_^7OCPY%8LOk-nRf7h*g=k5k)bUmdKhc9>+5;AZ&Coe>*pVFSy*k0G| z-C~YC5j;TYAXMdyY%~9)gE23P0&qKEQj{*Pw)S?fsH@HZZC2qBKK7G!5D3_2Ynn-Q zdTfC@x!2a!ns(fKX++PKis>w}gzTMhAgdT?f_Gp0qwhIK$Y5q>!~=n71K1y)?l#f4 zxV{|?J7b=qUU=iSpdICBmqPzD1SD#_+hJ$}E+^WX+WJnun^GNj5BJU~YQIak!RP703@g_(v-n;VIBO z)09@EF#fvWS|ZMa&-ZXxUv7R55BRl4_6z zcrnWkQu1oi;qc`UNVH_U_67z~GotE*Dxyr8E`TeYCJ|==(62cv2Lvor!gAi?hQ;`B zAqTs=a@kXPM-a5@J6aqdpoLJagbEw9uvDv?A%tqTmS|jhA)IPm(uewb8#;sfh8}gR z7p^t?)<=d=ZAzmV6T45g_8bpeDjQ5c-9oRlJqFgZL$PM(sE1SSGy&uuPhF}>f07+< zfw|7(`U0#2@A@`gNxZHfw4Voi@b33(qK!ahX*b@=GIuJw28&3(+=5YC{x@dV&l?^X za{?^^5~es(1}x8mRGT8hNfU5bLw@XydHZ*la~+hRX_+7O9)PG_kf9tV9JNwaLQxA) zEwri~q&PlQn&V^d|29lB9(B5?bp25^YWW`~S{XszhPSiphqCLh_R)IZr!Nh3RmZ;T zMB#hsv_Z<(<9$PK`*AI5>ca#AirQINBb_JSY}bPf!^s9GwpKNn+4Za}iWe^!E&{1G zFSZ#+MW*63LzF=9;;1XD<72VSZ#RR&h|tpV7K?b`f|rMwv6tF;LDB%T<4Okg!lO1K zQCXU|MB&75Qzbynfvs?yIu6pROQ97Khcnou3*!OK8t;xxF2_h>kwd;O!<| zyLc0msxj0@{DmO^0N6AU(9sr(a_BN;1e#`!UC-Oqnl?ad8jOF*_;U=9Y(M{p=GP!< zO^CyJAZick?aYOsHlAD7&W&QD_76x|+gRaMivYB!#ZRXFMCg^8-E01Loo!KAa^viK zPy8jM8= zSd_4X6SElUA?;$TIxvDvh0MN;!INCfLpE(n7>9-jl(jD=mtl8ICc{Pn8TR)p89{rK zMAp1IaS6n&o!7<{5ZoqnaVx);joU&9Zr`#BTczA-bFMu;9r^9pg}O8QPbb&Nt33AC zo~b;m^Klybrh1g4wq29MlJYOd_FQ%<6 z_U$m?9f=Z}-n~-Rl-AZH+O0caP*esMwv5)#7v;i1e5{Xk(3)y4<(dPU6hOH(^+^)M z7_O|MKVjeR=ka~QL2hZs^eTAoz23$rtC&y3_fv)VJ`}z1&$g>b*Ms7Y88%W1&$8-s zc)qi$n589nHz><8xexyCEt4oNizWJGLr|k?fDGn_!ELk*%2=61A-ftUIcoTvlUZpk zrmPtrJ*T)-4Cxkrf0@N=1@Pm03gQ;I7MkIDD@OaxFg!?gDCjH$!?=R)Q8l>tgV|X; zBx;RM72&xb*muea-auvmo36QD#usSEC4mrr`8oI^1i}#NRqSI!7)rfh-xiWEpSssY zZSk@)xK3IDCd^tztqXpiMGtaTA>?)BS8^5Qen3$U&L2Ab=)^alyhfnTt!b}&pAFQx zO)d0*fVFH%`D!a&R2^sqmBfH)Gq0o!wu02HX~&ppGgnf!tq`PvJcvPI(oUceIZClP zzI?l~)^q_hdE@jVaY>9c6t*Q%EMa6kS}O*#0_eTM_JOi$E2(Ltk_f{YU}8YF#lVS8 z|4d5MWB1Iz%l%J4M7=i<`ef(MFiSVG!dhB#DyS^;DKCkagX z#*LF|;-BzpTL^FCpN?j4*Gb8@Yu^HHn zgSZ{lU&fX2b`YWT5%dmjSax0}z_ z?gwh!Q0AbNQpW{CCZ`vq=kNCZ`YLH!-3pZpo0?5i?q)YPi^Y~YTbTlt?LWO${}jx za$8p$MhDl=ZZfk{(LaKN!I>}jai-~nV)PA>6i6KTT-X9Eo;!=n;4{0{t$0A!N2-yZFv{tQE z*Tr1rOgI(}lz20zrC2F4&*J4Oeq~a9{9@%k!;nMYEwyCI zpw^JLF421-I33I3f)iQ-)3F?JJCc)Ud{=!rYf|-PDo^Kpg7u{?f9ITl)V1RXw&0NCs2xC@busKPdrqG+=GDMP5Vc;;(~%I=LdK;QUt*({ zZ(It2U;Cp9FCV-Lx`;PD$yrvg6gLxyC__=S-%Uu7^fg7)wW1X`)Ej|Ms_ ze7cJI1O^44Z0{A!F|x)h<2`YhOg9Hr8$62JH;u#X*Gxvn16n~hLods5+Q&kU97YSv zw#e-b_~bH)Fbm8$cC%)(C+==EEPbZq^wq0C(5}-&B||_9p<08@Y|!#jEw7H;MMY;? zhl|Qxvj$!#m^W8as`$8p>(;B5PELjWignJE_s*22prMb%E=ont5Z*Yi%sVt6%#dBs zYUZpA8GlcgHatR32irgx|3+T$qPcQZfhx}%Kmn=voUhx(qk*(pVYtXd@#d5CJhtE8)r-8A1rv&1kN@{e_HE=YA%LX#czJl}2--d`l|jYsT&4JM-(P zes5Lh}NHvb_`mxC-=nOT)yVBzIjncx+3#0xqh~&BasE!I2C5%f| zJks81)a$-YwD}o(sT)X1{)IzJr#sqCQ87wZNfAh zqes0|99qbaq9s1cD3y+Sb=Vq&-(EJ#RYsff!Q21C5=8{!UUabD)(wo8F5Q4*+YS7( z;(O^55x_mZnub|nM{I0t(%0$AkjcNG4ynbVE`Rb-Bqh$ji&y^Tlh6Ky-{+E&NU~&P z+@%lQpLYC9@syFOB32NCJ(`Z5ckuJX8CDrB#V zf&D4~zkr`rT`XopY=rI=AyU45fy3X|;x6Lxl*?CkTyPde+}90E0`I~+O2t@j+_GWg zHfzvuFA)e45q`swxnCyXH#=ac;<@(U+N4e;MfEn4Y=@G1o08loa<}eZ#ZR9;eXd=p z{fVM5r<>BDtn#n=2PO5nD$RlVwY-7-yZ&bP4vcQxDrYtaJfhh}2S>S((1*QZ4=huG$KGIId literal 0 HcmV?d00001 diff --git a/hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_NONE_5000.hfile b/hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_16KB_NONE_5000.hfile new file mode 100644 index 0000000000000000000000000000000000000000..c12188d330a3ade2ef71ad180f33aa1f9ab2bb8c GIT binary patch literal 301098 zcmagmO{nnOVjlFRs|5@Xs6{TSIMF^Ne^MQ{FA`~hph(&NvoM}bGi69u)vzwxnvwr8m34|x>efECO zlmEN-Z?E_N&2N0`8$bBtKlWR{_~Hltp8M~2|Jtwr^Bwnp`QrZD?*I7O{r7vm`0k(o z_Rsvn*MIS6fA#Ahf8(Do{^+m&Uw_wU_A7tjm%jb8|GyMR3y+q2^4VH>wCZT%(R`}U z*3P3{M+c7%9i2Qnb#(FQ($USMTSpI%9vm$nI(pOl&tr&%Yq}|pa!o~RDqPb|(aJRy zt*LNLH$^MgRJ5kTHQf}gTvO4S3fFYgcWdREZi=H^Q`MR(*K|{~a!plhs$A1e(aJSd zt*LTNH$^MgRJEqcHQf}gTvOGW8rO8wcWdLCZi=H^Q`4Fn*K|{~a!pNZYFyJz(aJS7 zt*LQMH$^Mg)U>9?HQn^x+PS8i;wabDwWiKB-4v}{Q`ee0*K|{~a!p-p>Ri)J(aJS- zt*LWOH$^Mg)U~F;HQn^xI=H5r;waZNw5Gu|-4v}{)6kj**K|{~a!o^P8eG#&(aJRq zt!Z#gH+{EGuIZ*Y$~8@`X>v_BMJv}dwWi56-4v}{)6|+K*K|{~a!pffnq1RO(aJSV zt!Z&hH+{D*uIZ*Y$~7&mX>msW# zx+#uwOa_$~A4RX>(0CMJv~|wWiHA-4v}{)7F|c*K|{~a!p%nI$YCD->rvh zx+#uwO-E}wT+>a_$~7IW>2OUqMJv~Iw5G!~-4v}{)6tp^*L2foYx&TclH#b=R6ek# z@}V^)MXT0SKCq_pp*1B%tJYLLu%_~%H6=x>)>J;Qrt+aRB}J>&R6evOT6mF#7g~7I z_Xm8n`~CBrA`36H@T6$vnq=XH7M>KXT$3!k(880Vm1~lP7g~7IcPlNt$imCjZcIWJ zUTEP-@d5MMBnvOJ@T6$vvq=_SXyHlG%4d@-ywJjvqLt4kS$Ls^Cw;fl!iy}tT$V$-)aQJn6fY7G7lG`3oo?rq-f={Nfusc;YrcTXOk?v(880xTWR4%7GAD)V~DcwLJLod z517v;S$Ls^Cq*ltO|tMp3r~txKAU9Wg%+Ol-AW5DvhZ@X87G7xKN#Cus@FEK@SGzGpS$Ls^C&dTMXOk?v z(880VmCq(wc%g+SMJu08vhYF+Px@}9g%??Px!R3M$ifRPJSje4KAU9Wg%+L^t$a4g z!V4`tDO&k#l7$yqcv7_T*(3`uwD6?wR$6$Gg_o<{7@{n^(881A1Lm_y7G7xKNzuw@ zlPtW@!jqzv&n8)Tp@k=XwpLnrQXJK@smj7DEj%e&^=zuL@Jb6$idH?Fsw}+H!jqy^ z&!#F1ue9)_Xw|c+%EBuxJn6fY7G7oHl@^{9tz45Vyk6~oKM|9nm1~lP*Q?!X)tYGG zRTf^acB@ruqJ>vkc%_9Weg6*A!mBL2(!!IXm1~lPS6X;dv~o?d@Jb6$idH_GWZ{(- zo)oQoHp#*(Ej;PFl@?xQ;guGi6s=s7EWBRr#{a*{!YeI2DL!C6n`GgY7M>KXd^X9# zD=j?fyOkDRW#N?;o)oS6D!j_V>(y>dLKa?W;YslU^VuW|ue9)_Xyvm>7G7!LNzuw@ zlPtW_!jryRY2j5CUTNV;(aJT+!t2#;3{e(dY2iun0rS};3$L{Bq-f={Nfusd;Yr`E zwD2kmue9)_Xyvm>7GAG*V-m9PN()bl517v;S$L&|Cq*ltO|tMx3r~txKAU9Wl@^}# z-AW6uvhYd^Pl{HqNfutOc4LUL@Jb6$iVv92CRuo;g(v+j-~86^`G5Z6Kk>hP^KW`} zKt7!O^VP2osF%g{o1)nf&8yJ763vs|f4=;$m{+2CQXJ(&OlV#&i{rORXkLltN$~;m zAtp4hMDwI*t%5aQD|O?=1K7Z z^C2cQuSD~tXyvm>XkLltNuRBaXr2^Dh2~9Y-iYQ&(W*5yp?M>kCq=8CO-*Rth~`Pr zs%KLZnm3|(Qnc#X)P&}ZXrA=lN;Gdm^F}mJidL>kXx@nCNzuwR3C$bPJSkeaCZTyF znkPjo*CaG=MDwKYR-$HBjfnm3_&;|21hXyuxO=8b5c6s=s7(7X}NlcJSt5}G%nc~Z3U*(5Y?MDwKYR-$C^LAMrLll}fqIpt$z8 z)tb62ywk#yqE*kPE(`Cp@T6$fv#HC%J1sovyOkE+W#OF`o)oQIlPtW`!jqzvYm$X` zT6j{la!s=EP76tOpE(`Cp@T6$vnq=Xf7M>KXT$3!k)54RYm1~lP zcUpK-v~o?d@J8<(g#Sofe)Htz45Vywk#yqLpis zg?Czb(swH@yvxEnEj%e&xh7e7r-dg)E7v3o@3iowXyuw@;hh$q6s=s7EWFdglfGMN z;awKqY2iuH$~DQtJ1smZTDc}!c&CLYMJv}N3-4FE-``0lMJv}N3-7e>r0-T*c$bBD zzDy-4TDc}!c;^N3q-f=uWZ|6_o)oQIlPtW`!jqzvYm$X`T6ofTD=oas!aFTIDO$NE zS$L;~Cq*mQBn$7f@T6$vnq=Xf_rjB+m1~lPcUpMTcPlNt%fdTfrjit`T$3!k^ImvT zv~o?d@Jk7T#&$NzuwR$-+A=Jn6fY7T#syofe)Htz45Vywk#yqLpisg?Czb zQnYeSvhaSj8(%lc!aFTI>GJ~~wD6=jDhnU7@IebtidL;@$ifFLJSkeWrXdR-wD6>8 z)tZJZe9*#^qE*kPAqyY0@TBinTKJHK4_bIqv~o?d@IebtidL>k7Cval^;e!^Q6s=s7EPT+ylcJStl7$aicv7@-O|tMo3r~txu1OX?XyHlUt+enV z3m>%bq-f=uWZ{Dro)oQIlPrAD!jqzvYm$WzT6j{la!s=EK?_g%Zl#3}S@@uZCq*mQ zBnuz3@T6$vny!Wan}6~j{M*0p|NJ-p(wDzF;0J!kSHC)75Y3a~!{h9P=7VUS6s??{ z(0mZhlfGMt=0j*ch~`Pr%Gn9c2hlt!S~)wR`5>AnMJv}NG#^Csq-f=ugyw^2p7h;H zG#^6qK{QW_R<22CK8WT?(aJRm%?HsuDO$NEq4^-1Cq*mQBs3pH^Q7-qqWKV-52ATe zv~o>C^FcIEidL>kXg-MMNzuwR3C#!5JSkeaCZYKtnkRj?63vIud=Sl(qLphBnh&CR zQnYeSLi0g1Pl{HqNoYQZ=1I}YH3`ip(LCw1bzT;KD}PcP)n9IzLi2f9T&?)aEt6=T z6d$nGG==7qXr2_UTGJGoPojBJv}#RLXg-PNN#CtR^C>i+MDwI*<(h=%lW3k4tz47P zd=kx*qLphBnopv6QnYeSLi0&9Px@{pnoptmB$_8hE7v46pG5PdXyuxO=96fi6s=s7 z(0mfjlcJSt5}HqQcC(%49TDc~n`6QYrMJv}NG@nHCq-f=ugyxfIo)oQI zlhAw;&6B=ciRM#iK8fZ@(aJRm%_q@3DO$NEq4^}5Cq*mQBs8By^Q36ynuO+)XrA=l zN;IEB^GP&MidL>kXg-PNNzuwR=>qwrg(pQT*CY#{wD6>8<(g#SlNO%z-AW6evhYa@ zPl{HqNfth7;YrcTHOay!Ej%e&xh7foq=hF%E7v3opS19#?^as)l!Z@Pcv7@-O|tMw z3r~txu1OX?Y2iuH$~DQtCoMcFTDc}!_@sp=eYeuWr!0KZ!jqzvYm$XeT6j{la!s=E zNefSkR<21FK55}e(aJT+!Y3^}>ARH{K4syP7M>KXT$3z((!!IXm1~lPPg;0Vv~o?d z@JS0#idL>k7QSfVNuRBY7M>JGW#LN}zG&e|(W)0cEm`=w+Wi*zq-fQvd6q1EUF}vY zUd^*;;YslUYfVcQzG&e|->tOpB@17)@T6$fvuVk~7cD#~TDc}!_@ad;MJv}N3tzPG zq-f=uWZ{bzp7h;H3tzJEMGH@gR<21FzG&e|(aJT+!WS((DO$NES@@!bCq*mQBnw}( z@TBinTKJNMFIsp~v~o?d@I?zxidL>k7QSfVNzuwR$-);cJSkeaCRzBRg(rQt(!!T4 ze9^*_qLpisg)droQnYeSvhYOKX zT$3z((ZZ9Wm1~lPFIsrgcPlM?$-);cJSkeaCRzBRg(pQT*CY#HwD6>8<(g#Six!>~ ztz45Ve9^*_zFTSGOBTLp;YrcTHOaykEj%e&xh7foqJ<|#E7v3oU$pR~Xyuw@;hPqo z^x3*;;Yo2+7QSWSn--oFt+Mbf3*WTxq-d3eZ&~=Jg(pR;Ud^**;rnX$Ti}zvTWR52 z7QSiWNzuwR$-?*5ZhWq?@J$O(iVs-NrY#HKwD6>8)w5~K!Z$5EDO$NES@@=fCw;fl z!nZ7Z)54RYm1~lPZ(4X#v~o?d@J$O(idL>k7QSiWNzuwR$-*}+Jn6fY7QSWSn--oF ztz6T!@W1-|&wu-W{oX(SJzxInfIs#tU;XNUO*Bu650A4Gns1_cQnYe*Li0^DPl{H~ zPH4V~=1JeJMDr~)-$e7IXyxpL=9_4q6s=s7(0miklcJSt5}I$Kc~Z1;O+xccG*9|& zC7N%c`6iktMJv}NG~Yz?q-f=ugyx%Qo)oQIlhAw<&6A>)YZ97oqIuGHE75!l%{S3J zDO$NEq4_47Cq*mQBsAYd^Q36ynuO+?Xr2_UT$9jz6U~#pTZ!gdXugT&NzuwR3C%ar zJSkeaCZYKznkPjo*CaIGMDwI*<(h=%n`oZ&-AXjyLi0^DPl{HqNoc-_=1I}YH3`i( z(L5k7swASJSkeaCRzBQg(pQT*CY!+wD6>8<(g#ShZdgn-AW5T zvhYI-Pl{HqNfv%+;YrcTHOaycEj%e&xh7fop@kxcv7@- zO|tMq3r~txu1OYtXyHlG$~DQt4=p??TDc}!_@RX-eYeuWk1YJq!jqzvYm$W@T6j{l za!s=ELkmxeR<21FerVxI(aJT+!VfJx>ARH{eq`Z?7M>KXT$3#P(880Vm1~lPA6j@) zv~o?d@IwnvidL>k7Jg{qN#Cus@FNR9wD6>8<(g#ShZdd`tz45V{LsRaqLpisg&$gY zQnYeSvhYI-Px@}9g&$e?p@kIlcJStf`yk4 zS@>uAYP}&|EdH6|DD?6keXr?;Skq^UR_NtD`d-rwv8K-yt8-=ka;Ec_njnqcAgDA$Aw z8-=ka;Ec_njnqcAgDAxoF zzel+ySol54HNnE~QLYIVUOr^upXqx|vhaIUYm$ZEqxzHXYWMReEBEO8=X5vZnqcAg zDAxoFzel+ySol54HNnE~QLYIVevfiZu<-IB3;#^tYm$ZEqgsTfEc_njnqcAgDAxoFzel+ySol54HNnE~QLYIVUOr^upXqx|vhaIU zYm$ZEqgvC|ZmbCwet%uA2^M~ja!s)Cdz5Q}h2NuG6D<55<(gpO_bAr{3ojqC@Xz$U zCRzABsx`^N?@_JkYB$yd3%|cE*8~f{N4X|g_&v%s!NTuRt_c=?k8(}0@OzYNf`yk4 zS@>uAUXv{R9@Uy;;rFQ4bhR66f`#8-murHB-=ka;Ec_njnqcAgDAxoFzel+ySol54 zHOa!SZQ?&p(&w5kbMYwFbWMXtv8MYCERRA6?sqFZ3Jbqqn)2xTCU3c4hwvyY{QjZC zqwkx%w^eIOivO-!(;XX*(!%ctrK4QaweY{|ANnJ|?SJ{TZ~fa}{_23g@b7))s{=|Q znkRk#rV!1G(7X`MlcJTg6Pg#Ic~Z1;c0%()G*60F&Q54vh~`P(twi%8G%rN+q-f)YZ96lqIpuZa!o?>LNrf` zR<22CUWn#N(aJRm%?r^y>ARI^UWDd_Xr2_UT$9ke5Y3aKm1`237ovGmv~o>C^FlOF zidL>kXkLisN#CtR^CC1aMDwI*<(h=%g=n4>tz47Pyb#TkqLphBniryZQnYeSLi0j2 zPx@{pnirvYA(|&eE7v46FGTaCXyuxO=7ngU6s=s7(7X`MlcJSt5}FsHdD3?)(Yy%F z3(-6&TDc~nc_Er7MJv}NG%rN+q-f=ugyw~4o)oQIlhC{n&6B=ciRMLUUWn#N(aJRm z%?r^yDO$NEp?M*iCq*mQBs4EX^Q36ynuO++XrA=hT8ZXKaa3qtb%DGR&6A>4XkK-J zyb{fmqE%>Kb%DGR&6A>4XkLZpl^4j9zFUdrRcKy$fjlW%xh7e7rG+O&E7v3oue?B> z6s=s7EWGjpc~Z1;O|tOH3*<@Pt+ene3$L{Bq-f=uWZ{(;$djU#Ym$XmULa43R<21F zUU`8$DO$NES$O3I@}%!pT6mR(S6X;dv~o?d@X8D1NzuwR$-*lykS9ee*CY$Cyg;55 ztz45Vyz&Bh(swH@yvo8WEj%e&xh7e7k7G8OQJSkeaCRupp1@fe5<(g#Sl^4j9qLpisg;!o6Px@}9g;!a4 zrG+O&E7v3oue?B>6s=s7EWGjpc~Z1;O|tOH3*<@B$~DQtD=&~IeYeuWt1P_I!jqzv zYm$XmULa43R<21FUU`8$DO$NES$O3I@}y|xnq=XX7s!*oTWR4{7G7!LNzuwR$-*ly zkS9ee*CY$Cyg;55tz45Vyz&BhQnYeSvhd0atOpCJS%87oHTYT$3!k@m_dR zv~o?d@Wy-LNzuwR$-*1&g(pQT*CY#XwD6?wR$6$Ig*V;{Pl{HqNfzFCFFYw)xh7e7 zARH{-elp8_rjB+m1~lPH{J_RidL>k7T$O-JSkeaCRupn zz3`-H<(g#SjTWBt-AW5@vhc=x;YrcTHOayo?}aBtE7v3oZ@d?t6s=s7EWGhvcv7@- zO|tMt3s3rPrG+rgyxMG$djU#vlE&(ULa43 zR?bdn-gtpLDOx!@p?N2oCw;bdULa43qxum{7n*loAWw=`{RpND%{woUCq=7%1k;7) zofpWHqE$bF=|c0)3*<@Ptwi%KH19<7q-f=ugyx+W$djU#YZ97wULa43R<22C-g$vM zDO$NEp?T*8@}%!pqInmZccOVxv~o>C^Ue$8NzuwR3C%k%kS9ee*CaIWyg;55tz47P zyz>Hi(swJ-ybH}c(L5&I{y8(aJRm%{woUCq*mQBsA~5K%Vs7N;L06 z^G-BRidL>kXx@2&JSkeaCZT!f1@fe5<(h=%ofpWHqLphBns;6xPx@{pns=djCz>Zk zE7v46@4P^s6s=s7(7f{kc~Z1;O+xd|3*<@B$~6hiJ1>wYeYX)YZ97w zULa43R<22C-g$vMDO$NEp?T*8@}y|xnuO+^7s!*oTZ!gfXx@qDNzuwR3C%k%kS9ee z*CaIWyg;55tz47Pyz>HiQnYeSLi5fGtOpAqyY8K%Nw>T$3z(@B(>Kv~o?d z@WBh@NzuwR$-)OOkS9ee*CY!cwD6?wR$BOwg%4gJPl{HqNfthMfjlW%xh7fo;05xe zXyuw@;e!{*lcJStl7$aic+z((Equtr2QQE(MJv}N3m?2do)oQIlPrAj0(nxja!s=E z!3*R`(aJT+!Urup>ARH{K4js87s!*Mm1~lP4_+WoidL>k7Cv}^JSkeaCRzC41@fe5 z<(g#SgBG6j-AW4|vhcwRKv~o?d@WBh@NzuwR$-)OOkS9ee*CY!cyg;55tz45Ve9*#^zFTSGLl!=G zfjlW%xh7fo;05xeXyuw@;e!{*lcJStl7$amAWw=`u1OX?XyHlUt+enV3m?1}o)oQI zlPrAjUU*Wpa!s=E!F%CJ(aJT+!Uyk#Cq*mQBnzLk@TAYy$$Q~Raa2E9nX>T7d*MmZ zs-LV(S@`6=@T6$fPgbTZeDYp+QnczPD^nIec`rQayOkC`W#N++o)oQIlPrAlUU*Wp za!s=E$$Q~R(aJT+!YA*ACq*mQBnzLs7oPOpN(-N|@JS0#idL>k7Cw0|JSkeaCRzC8 zz3`-H<(g#SllQ`tqLpisg-_lKPx@}9g-==dq=hF%E7v3opS%~I6s=s7EPV1_cv7@- zO|tOGd*MmZ$~DQtC+~$PeYeuWr!0KZ!jqzvYm$Xe-V0BPR<21FK6x)ZDO$NES@`6= z@T6$vnq=XV_rjCDTWR4_7Cvd=NzuwR$-*b^g(pQT*CY#{yceDntz45VeDYp+QnYeS zvhc}!;Yr`EwD2hlpS19#Xyuw@;gk2mlcJStl7&y+3r~txu1OX?c`rQafBl!g_G^Fo zSAO5Ozx>q!-}o(G{px^8G*61pOq4K%=96fi^xaA{pF;CVG*60F!PFF*PojBJv)YZ96-qIuGHE75!j%@@%;DO$NE zq4^@3Cq*mQBs5<{^Q36ynuO+yXr2_UT$9jz5zUjnTZ!gNXugQ%NzuwR3C$PLJSkea zCZYKvnkPjo*CaGwMDwI*<(h=%i)fzo-AXiHLi0s5Pl{HqNoc-^=1I}YH3`iZ(L5fE7v46Uqthy?^dGu5}Gffc~Z1;O+xcUG*60Fu1RRVh~`Pr$~6hi7tuT^ zTDc}!_~Oe{lD=DM;Y$|2XyHlG$~DQt7hk556s=s7EPU~0DoN4GHOaykU#5~2tz45V zeDP%}N#Cus@FfdhwD6>8<(g#Si!W12idL>k7QXm0m859pnq=XNFH=d1R<21FzW6eg zr0-T*_>zS$T6j{la!s=E#h0lhMJv}N3txPhN>a3PO|tOCm#HL0E7v3oUwoNL(r4?Y zg(t;Py-a1x!Z%;0k`%4_$;y_6Z@x?=DO&ZDl`RY3e3?p8wCX1-TNb|gGL@ug)lXKo zEPT_#lfGMN;ae8I`7)KHXyuw@;hQg0Ns3mkNfy5OGL@ug<(g#Sn=eyIidL>k7QSiW zN#Cus@GT49e3?p8v~o?d@XeR0BtPDT(swH@e9OW& zU#5~2tz45VeDh^0NzuwR$-*~Zrjit`T$3z(^JOYY(aJT+!Z$5E>ARH{zGdN?FH=d1 zR<21FzWFkhq-f=uWZ|1HQ%Qk7QT5eJSkeaCRzCAz3`-H z<(g#SoA<(#qLpisg&$gY(r4@8z3`+ss-LVJS@_|-@T6$fPgagB{P13QQnczPD@PW7 zcrQFDTJ@8aBMU#g7oPOpN((=-@IwnvidL>k7JhgyJSkeaCRzC5z3`-H<(g#Shxfvh zqLpisg&*DvPx@}9g&$e?p@k4U#=ga`5~GoeYX4-~1h+`5~GoMXSE~J3{kA zG*60Fu1RQqh~`Pr$~6hi579j7yOn5ugyx56o)oQIlhFJS&6A>)YZ96tqIpuZa!o?> zLo`o{R<22Ceu(Bt->pRRBQ!rm^Q36ynuO+uXr2_UT$9lJ5Y3aKm1`23AEJ3uv~o>C z^FuUG`feqfAEEgnnkPjo*CaGQMDwI*<(h=%hiIM@tz47P{1DBPqLphBnjfNh(swJ- z{0Pkt(L5kV;%{4>Q- zeDimYzSnd^tm!jFE57-=N8f9@A=dPnq7{^IkG|J*L#*jDMJp)b9(}LrhFH^Q`fe4P z*AJli4Y?+u`8~=t0nP7Gt_f&ZlP-|oqxzFhXnv2rne&ER6VUt~<(h!z_bAr{G`~l=CZPE} z$~6Ja?@_J^XkI^r=AY?%O+xc~RBO@&@_STk5}MzmT9Yo2-=q4vNoanLzJCOBL#_!H zevfiZu<(17Yl4N}qg)d#yne{SKhyV`Wa0Oy)+7tRN3|xsOywTcnq=YksMe&HsobMl zlPvrm)mPzqnaVx-{(;^Nxh7cnJ<2t~!s~}D{4;&8Nfv&OYE826dsJ)E%T(@Btw|Ps zk7`YNnaVw?HOa#7QLRZYQ@KaACRzABs_*IaGL?Ju{o~>KAq)RZ-)oYE-=kWSEc_nT zn)EW2dsJ(Zh2NuElU}BBk7`Y_@OxBi(#urtQLRZ9evfKRdYQ^Ssx`^N>jy0Sh70sgaD8HwJh2Nw6DjXJmkMgTe-~1sobM_Hp#;8Q9YaVGL?H&&n8*;J*sDuUZ!%7>e(a< zzen|K(#urtQ9PRpEj;OSO@$Vo6i4-JDzflG3r~txJ)4RwywJjvqE*kPA`36H@T6$f zv#H3!3oSh9yOkDRWZ{Jto)oQIlPtW@!jqzvYm$W*T6j{la!s=ELJLodR<21FUTEP- z->tOpf(zue(880Vm1~lP7g~5yv~o?d@InhuidL>k7G7xKNzuwR$-)aQJn6fY7G7|H zycSw`QnYeSvhYF+Pl{HqNfusc;YrcTHOayYEj%e&xh7e7p@k=Xx6;B3E|Awk3r~tx zu1OYNXyHlG$~DQt3oSe;TDc}!c%g+SMJv}N3oo?rr0-T*c)`8!T4>=((aJT+!V4`t zDO$NES$Ls^Cq*mQBnvOJ@T6$vnq=XH7M}FoN((Qz7hVf3JSkeaCRuo)g(pQT*CY!s zwD6>8<(g#Sg%+L^tz45VywJjvzFTSG1^2>hp@kKXT$3!k(880Vm1~lP7g~7I zXY0M^`^6vqwZHz)cm4dgf94my{wsgrm%jb8U%&Uo@Ad4YII5r1-3y^Qsx{r)e>$q4 z)7`6HI;u6@dp$a;pVQq-96G8s-Ji+psD4g&e*mkaTGRbmoQ~?}boWOqI7$nzvhYd^ zPkR6P+Wq%?zWDCXo&Ms_{_5XYDer|}7u8X&Nfv%hL;s&63%}pM@~GCtd*Szc8GWu? zlPtXQUU<^`&+9)9*CY$Sk01Ru$-*lwJn8!b=DqML3$L{Bq`&EZ_#O3E|NEc&TfY3& z0sr}*{>oPe+&6A>)eC^GY;NidL>kXkLltNzuwR z3C%0fJSkeaCZTyHnkRj?63wg7yb{fmqLphBnpdKEQnYeSLi0*APl{HqNoZb)=1I}Y zH3`it(LCw9m1tgt=9Ork6s=s7(7Y1OlcJSt5}H?{c~Z1;O+xcZG*60Fu1RQKiRMY) ztwi%GG_OSSq-f=ugyxlKo)oQIlhC{p&6A>)YZ97QqIpuZa!o?>N;FUUZY7#mp?M{m zCq*mQBs8x?^Q36ynuO++Xr2_UT$9ke63vsMm1`23SE6~+XKN#xC&f{rc@vtq%i?dU zPl{GCXA_#Y%i?MUnm1k`Pl^v%YidIC#tY<0(W*5yp?Tv4@}%!pqInaVH==n`v~o>C z^TrG0NzuwR3C$bPJSkeaCZT!b1@fe5<(h=%jTgw1zFUdrO=#YT=1I}YH3`id(L5k7T###NzuwR$-)~g zJn6fY7T#pxjTW91tz45VywSpwqLpisg*RGwQnYeSvhYR=Pl{HqNfzE{;Yr`EwD2Yi zZ?y2FXyuw@;f)ra6s=s7EWFXelcJStl7%-~cv7@-O|tMt3s3rPrG+KXT$3!k(ZZ8HTRSa0DUQm*yDYrZ!jqy^7T#syofe)Ht-A8t zW#Rp5_gmnTqE%OZyDYq4?N%$U{B~M+()R~U3-7Y8<(g#Sofe)Htz45Vywk#yzFTSGT^8PH;YrcTHOay|Ej%e&xh7e7r-dg)E7v3o z@3iowXyuw@;hh$q^xaAe@3Qbt3r~txu1OZ&Y2iuH$~DQtJ1smZTDc}!c&CLYMJv}N z3-7e>r0-T*c$bBDT6j{la!s=EP76k7T#&$NzuwR$-+A=Jn6fY7T#syofe)H ztz45Vywk#yqLpisg?CzbQnYeSvhYp|Pl{HqNfzE|;Yr`EwD2wq@3iowXyuw@;hh$q z6s=s7EWFdglcJStl7)9#cv7@-O|tM#3s3rPrGq-f=uWZ|6_ zo)oQIlPtW`!jqzvYm$X`T6ofTD=oas!aFTIDO$NES$L;~Cq*mQBn$7f@T6$vnq=Xf z7M>KXT$3!k)54QJTL UQm*$F=Z3_s9RUfB8TDpa10F`=zfA_>SN3SHAky0fT6s z6d#_@dC^Kn`H{iC&{XyuxO=Hs%sTD2yk`4E~9qIuHy=SnmmLi2H19G|Pud=Sl( z;se$fSVL$&h~`Pr$~6hi2hlt!TDc~n`5>AneYX)YZ96dqIpuZa!o?> zK{QW_R<22CK8WT?(aJRm%?Hsu>ARI^K7{6jXr2_UT$9jz5Y3aKm1`2352ATev~o>C z^FcIEidL>kXg-MMN#CtR^C2`JMDwI*<(h=%gJ_-Ka2hlt!TDc~n`5>AnMJv}NG#^Csq-f=ugyw^2o)oQIlhAw+&6B=ciRMFSK8WT? z(aJRm%?HsuDO$NEq4^-1Cq*mQBs3pH^Q36ynuO+qXrA=hI*H~tOpDGQ&p@T6$vnq=XV7M>KXT$3z( z(!!IXm1~lPPg;0Vv~o?d@JS0#`fjC#Pg(e+g(pQT*CY#{SG(T=pA@ZJlPr8*?N+PS zL<^s?@Oiacty&W;e9FQnEj;P_cbFDFW#RK`H$GQc_@sp=#Rtqa$-*ZsJSkeaCRzBT zg(pQT*CY#{wD6?wR$BO!g-=>|QnYeSvhYa@Pl{HqNfth7;YrcTHOay!Ej%e&xh7fo zq=hGax6;C=EPT?!lcJStl7&xNcv7@-O|tMw3r~txu1OX?Y2iuH$~DQtCoMebyOkC` zW#N++o)oQIlPrAF!jqzvYm$XeT6j{la!s=ENefSkR<21FK55}e->tOpDGQ&p@T6$v znq=XV7M>KXT$3z((!!IXm1~lPPg;0Vv~o?d@JS0#`fjC#Pg(e+g(pQT*CY#{wD6>8 z<(g#SlNO#7tz45VeA2>`qLpisg-=>|(r4?Ug(t;PS@@ELFIsp~v`Pq;EPTzj7M>KX@}?yVU$pR~Xq7iDS@@!bCw;fl!j~+3(ZZ9Wm1~lPFIsp~v~o?d@I?zx zidL>k7QSfVNzuwR$-);cJn6fY7QSTRix!>~tz45Ve9^*_qLpisg)droQnYeSvhYO< zPl{HqNfy3n;Yr`EwD2VhU$pR~Xyuw@;fofY6s=s7EPTHBw>7QSTR>uNVXS6TR?g(t-a%r(iv7cD#~TDc}!_@ad;MJv}N3tzPGr0-T*_>zS$ zT6j{la!s=EMGH@gR<21FzG&e|(aJT+!WS((DO$NES@@!bCw;fl!j~+3(ZZ9Wm1~lP zFIsp~v~o?d@I?zxidL>k7QSfVNzuwR$-);cJn3KjGk^J){+Yky$A0+BUmfs+f9F@f zI$#malfFMQqWKbpRREi~Um z^Q36ynuO+?Xr2_UT$9jz6U~#Nm1`23Z=!ipv~o>C^G!5Q`feqfZ=v}nnkPjo*CaIG zMDwI*<(h=%n`oXCtz47Pd=t%+qLphBns1_c(swJ-d<)Gt(L5O*Bu6R<22C zzKP~Z->pRREi~Um^Q36ynuO+?Xr2_UT$9jz6U~#Nm1`23Z=!ipv~o>C^G!5Q`fjC# zZ&~=Jg(pQT*CY$ywD6>8<(g#Sn--oFtz45VeAB{{qLpisg>PDT(swH@e9OW&Ej%e& zxh7foriCX(E7v3o-?Z?gXyuw@;hPqo6s=s7EPT_#lRjGyEj%fX%EFH<{LsRaqE%mo zA6fXJg(pR;z6w9G@IwnvidKD3cVyv*7M>KX`kwB{!VfJx>ARH{eq`Z?7M>KXT$3#P z(880Vm1~lPA6j@)v~o?d@IwnvidL>k7Jg{qN#Cus@FNR9wD6>8<(g#ShZdd`tz45V z{LsRaqLpisg&$gYQnYeSvhYI-Px@}9g&$e?p@kIlcJStl7$~yc+z((E&RyB4=p??TDc}!_@RX-MJv}N3qQ2*q-f=uWZ{Pvo)oQI zlPvtu!jryRY2il}erVxI(aJT+!VfJxDO$NES@@xaCq*mQBnv;Z@T6$vnq=XJ7M}Fo zN((=-@IwnvidL>k7JjaFzXd)iTDc}!__^AxR;`H^eq`b2YPVXoCR+HBg&$gY()aH$ zE&RyB&(&^xuCnk$3r~sk7Jg{qN#Cus@FNR9wD6>8 z<(g#ShZdd`tz45V{LsRaqLpisg&$gYQnYeSvhYI-Px@}9g&$e?p@kIlcJStl7$~yc+yvE`;diyrZ@@9orf2QvZ%fjzb zJ)30V_vrh3x*Kv$u<(17Yl4N}qg)d#{2t|+VBz;D*8~f{N4X|g_&v%s!NS{zEc`Ql zuSphuk7`Y_@OxBiy4wBxcJLn6pLDYDd-N>?H{_aN;rA%l1Pi}Mxh7cnJ<2t~!tYV8 z2^QWyWZ|Fbdrh+NdsJ(Zh2NuE)75UQ2^M~TT|S#&;rA%l1Pi}Mxh7cnJ<2t~!tYV8 z2^M~ja!s)C_8|-ZOy6sgh2NuElPvrm)tdA&m3vgrCRzABs%Mj4rgD$!t8iKPJ^I$m z8*)uI{g?jL|LEWU<=^&Wf7_S8I^duChrjaG0k?y*1DfBXoE^}-eF)7z)A#Iz=J%*( zCp5oDH9K7(zehDYq4_YG2I`91nZ^BZzaK=bw?H2+NB zYZ98@qgs>D{2tYsgy#3C)^u4Mf0_c#@2|^05`pITDE~+Vn%|>*hyl&-Q9hf1=JzPq z1T=3SLi5k`y(XdgJ*qY7Wh(cm)+98)N42KQ;#d>V{QkOJ6VUt~<(h!z_bAr{G`~l= zCZPE}$~6Ja+lSEnGkvc~Xnv1sO_#;--xX+ne_ejL4m7_blhC{n&67UYR4$9ZCy^9KwWfj#AF$R`gyw~4 zo)oQGQxTdMqIpuZYE4CGUWn#N->pRRA~Y{V^Q36ynuO+sXr2_UT$9ke5Y3aKm1`23 z7ovGmv~o>C^FlOF`feqf7om9}nkPjo*CaGAMDwI*<(h=%g=n4>tz47Pyb#TkqLphB zniryZ(swJ-ya>$;(L5KXT$3!k(880xTWR4%7G7xKNzuwR$-)aQ zJSkeaCRuo)g(pQT*CY!swD6>8<(g#Sg%+Ol-AW5DvhYF+Pl{HqNfusc;YrcTHOayY zEj%e&xh7e7p@k9e)c!js~tEWFCXD=j=JTJ@8aDhsbyyWawz6s`KnN|lAztKDkF9fC>=Pl^v% zYpSyFN()c=Zl#4+S$L&|Cq=8CO;r|NY2iuH$~DQtD=j=JTDc}!c%_9WMJv}N3$L{B zr0-T*c$I}$T6j{la!s=EN()blR<21FUTNV;(aJT+!YeI2DO$NES$L&|Cw;fl!mBL2 z(!!IXm1~lPS6X;dv~o?d@Jb6$idL>k7G7!LNzuwR$-*lwJn6fY7G7oHl@^{9tz45V zywbvxqLpisg;!d5QnYeSvhYd^Pl{HqNfusd;Yr`EwD2kmue9)_Xyuw@;guGi6s=s7 zEWFaflcJStl7&}Vcv7@-O|tMx3s3rPrG-~nc%_9WMJv}N3$L{Bq-f=uWZ{(-o)oQI zlPtW_!jqzvYm$XmT6ofTD=oar!YeI2DO$NES$L&|Cq*mQBnz*!@T6$vnq=XX7M>KX zT$3!k(!!IzTWR4{7G7!LNzuwR$-*lwJSkeaCRuo;g(pQT*CY$CwD6>8<(g#Sl@^}# z-AW6uvhYd^Pl{HqNfusd;YrcTHOay&Ej%e&xh7e7rG+O&E7v3ozc0rCwS!W&ARH{-elp87M>KXT$3!k@ntGW(aJT+!W&YybCe{44+6pZ(r1e|5lr@h86e z)d7uYo)n)M|40;?H==n`wDONcp?M>kCw;dP&707?5zUjLm0w^9%^T4?DO&kQqR_k% z&6A>)YZ96_qIpuZa!o?>Ml?_QZY7#Gp?M>kCq*mQBs6bC^Q36ynuO+!Xr2_UT$9ke z5zUjLm1`23H==pccPr7n3C$bPJSkeaCZTyFnkPjo*CaG=MDwI*<(h=%jcA?}tz47P zyb;ZlzFUdrO=#YT=1I}YH3`id(L5Ml?@~R<22C-iYQ&(aJRm%^T4?DO$NEp?N2oCw;bdqIpsr6(w|CAn!!;q-Yf- zbX_3tMDwI*6-;$qAn!!;q-YgPbzLCuMDwKYR-$aHw-U{}(7Y4PlcJSt5}J3Sc~Z1;O+xcdG*60Fu1RR# ziRMYs$~6hiJJCGpyOn6(h31`To)oQIlhC{q&6A>)YZ97wqIpuZa!s=Eezp7kSEZ7o zm1~lP_p9A%)tYGGT^8PH;Yr`0D=oas!u!>3e6F(aP768<(g#Sofe+--AW7ZvhYp|Pl{HqNfzE|;YrcTHOay|Ej%e&xh7e7r-dg)E7v3o z@3iow?^arPmxXs)cv7@-O|tM#3r~txu1OZ&Y2iuH$~DQtJ1smZTDc}!c&CLYeYeuW zyDYrZ!jqzvYm$X`T6j{la!s=EP76ARH{-euvP7M>KX zT$3!k)54RYm1~lPcUpK-v~o?d@Jk7T#&$N#Cus@GcAQwD6>8<(g#Sofe)H ztz45Vywk#yqLpisg?CzbQnYeSvhYC*Px@>fwD6=jDj^uM@IebtidG51kcAIgcv7@V z2!%br0-T*_>hGUT6j{la!s=EK?_fcR<21FK4{@d(aJT+!UrupDO$NES@@uZ zCw;fl!iOw;(880Vm1~lP4_bIqv~o?d@IebtidL>k7CvaxZakU$tt1Nub!js|y=9*;TgBG3?tz45Ve9*#^qLpisg%4VI(swH@e8|EF zEj%e&xh7fopoJ$zE7v3oAGGkKXyuw@;e!^Q6s=s7EPT+ylfGMN;X@WaXyHlG$~DQt z2Q54)TDc}!_@IR+MJv}N3m>%bq-f=uWZ{Drp7h;H3m>xZK?_fcR<21FK4{@d(aJT+ z!UrupDO$NES@@uZCq*mQBnuz3@TBinTKJHK4_bIqv~o?d@IebtidL>k7Cva`qE+A1OC^GP&M`feqfPoeoFnkPjo*CaHbMDwI*<(h=%lW3k4tz47Pd=kx*qLphBnopv6 z(swJ-d) zYm$X8ULa43R<21FzIcH=DO$NET_9hy@TBinTKJNMFIsp~v~o?d@I?zxidL>k7QSfV zNzuwR$-);cJSkeaCRzBRg(rQt(!!T4e9^*_qLpisg)droQnYeSvhYOk7QSfV zN#Cus@FfdhwD6>8<(g#Six!>~tz45Ve9^*_qLpisg)droQnYeSvhYOk}(Wk}(aJT+!Z$5E>ARH{zGdN?7M>KXT$3z()54RYm1~lPZ(4X#v~o?d@J$O( zidL>k7QSiWN#Cus@GT49wD6>8<(g#Sn--oFtz45VeAB{{qLpisg>PDTQnYeSvhYm{ zPx@}9g>PB-riCX(E7v3o-?Z?gXyuw@;hPqo6s=s7EPT_#lcJStl7(+tc+z((Equ$u zH!VCVTDc}!_@;#?MJv}N3*WTxq-f=uWZ|0@o)oQIlPrAG!jryRY2jNIzG>k}(aJT+ z!Z$5EDO$NES@^!%{TBG7Xyuw@;rnX0TD2xx_?Ct5tKDkVnrPu$7QSiWN#DQ2wD2tp z-&ecwxyr&fEj%ecV6I6PzG>k}(aJT+!Z$5EDO$NES@@=fCw;fl!nZ7Z)54RYm1~lP zZ(4X#v~o?d@J$O(idL>k7QSiWNzuwR$-*}+Jn6fY7QS!#=l;~6_y_*AZ~wdh$Ctl4 z;0J&0t6v?kiRMZ1;c<3C^G!5QidN1}XugT&NzuyL3C%arJSkc^JE8d@nkRj>9-?_t z9Mu-^FuUG`feqfAEEgnnkPjo z*CaGQMDwI*<(h=%hiIM@tz47P{1DBPqLphBnjfNh(swJ-{0Pkt(L5C^FuUGidL>kXnu(1NzuwR3C$1DJSkeaCZYKu znkRj?63vg${1DBPqLphBnjfNhQnYeSLi0m3Pl{HqNoanE=1I}YH3`iR(LCw9m1us1 z=7(sW6s=s7(EJe1lcJSt5}KdO;%}-?idL>kXnrn>t5s_vnjfM0xh$?$t%+!Ugyx56 zp7i}YOf)}2^K)4opR3UP5Y3a~1Lm59=7(sW6s=s7(EJe1lcJSt5}F^PdD3?)(fsHF z`Jsg;MJv}N3qQ2*q-f=uWZ{Pvo)oQIlPvtu!jqzvYl4OM4_WwU`f9ylc|RpSQyj&g zboc0cO*h1vK2x;fPr7^by`~#tO`j=R@h9Cq`d-rwv8K-yt@yg>9(}LrhFH^Q`finl z_YYY34Y?**_&v%s!NTuRt_c=?k8(}0@OzYNf`#9sToWw(9_5-~;rA%l1Pi}Mxh7b6 z|B!`$rtcxj!tc@d*G)I%nqcAgDAxoFzel+ySol54HNnE~QLYIVevfiZu<(17Yl4N} zqg)d#yno2TKhyV`Wa0Oy)^xS|`K9zdsz2#u;rHnKd%7EPO|bBLlxu>8-=ka;Ec_nj znqcAgDAxoFzel+ySa|=Cg@2~+HOa#7QLX7}H`W9TzrQY@O|bBLlxu>8-=ka;Ec_nj znqcAgDAxoFzel+ySol54HNnFBhb;UveXmItevfKRSG%z$Sor;Qxh7cnJ<2t~!tYV8 z2^M~ja!s)Cdz5Q}h2NuG6D<55<(gpO{X-W1nZDN~3%^ITCcRAM9@U?8vhaIUf70n? zD)*?qr;~->qi?;uA=d;8zel+ySol54HNnE~QLYIV-almFpXqx|vhaIUYtjYsdsJ(Z zh2NuElP-|oqgs+;zI3%^JCY=VW~qg)d#{2t|+VB!5k7XF#O z*CY$SN3|wd_&usMUG2tyS6KM{b@_D@Ec_njnqcAgDAxoFzel+ySol54HNnE~QLafA zUTEP-pKB_#@T54ZufmHgywJjvqE%}uvhYF+Pl{HpsmQ_$Ej%e&wWcBqFSPKa?^arP zk%bppcv7@-O|tMp3r~txu1OYNXyHlG$~DQt3oSe;TDc}!c%g+SeYeuW3+{#YLJLod zR<21FUTEP-(aJT+!V4`tDO$NES$Ls^Cq*mQBnvOJ@TBinT6n>|@Lp))NzuwR$-)aQ zJSkeaCRuo)g(pQT*CY!swD6>8<(g#Sg%+Ol-AW5DxEJ0FEj%e&xh7e7p@kUU)CG@T6$vnq=XH7M>KXT$3!k(880Vm1~lP7g~5y zv~o?d@Inhu`s2U)oBpl8`)~bEe)pHZI^d6g?^nJ$pckTf()VXZG%rH)LNrf`R>4#e zniryZQnU)DiqO0e&6A>4Fja)+g=n4>t%9i{G%rN+r0-Uuc@dfyqIpuZa!o?>LNrf` zR<22CUWn#N(aJRm%?r^yDO$NEp?M*iCw;dP&5O{y5Y3aKm1`237ovGmv~o>C^FlOF zidL>kXkLisNzuwR3C%0fJn6Hw63vt1sJ_6eLi0*APl{H3fmMa(m1v$6t@;A13e79g zJSkfB1y&WBSE6~+cPr7n3e79gJSkeaCZTyHnkPjo*CaHrMDwI*<(h=%m1v$6tz47P zyb{fmzFUdrRcKy`=1I}YH3`it(L5&hqIpuZ za!o?>N;FT3R<22CUWw*O(aJRm%`4G7DO$NEp?M{mCw;dP&8yJ763vsMm1`23SE6}R zv~o>C^GY;NidL>kXkLltNzuwR3C%0fJn6fYXkLZpm1v$6tz47Pyb{fmqLphBnpdKE zQnYeSLi0*APl{HqNoZb)=1JeJwD2kmue9)_Xyuw@;guK2lcJStl7&}ZAWw=`u1OYN zd4W7BTDc}!c;yB1r0-T*c$I}$T6j{la!s=E$_wO4(aJT+!YeP3Cq*mQBnz*+K%Nw> zT$3!k@&b9%cPlNt%EBuxJSkeaCRupp1@fe5<(g#Sl^4j9qLpisg;!o6Pl{HqNfust zfjsH6wb8KXT$3!k(ZZ9Wm1~lPH(GeocPlNt$-)~gJSkeaCRuo+ zg(pQT*CY#XwD6>8<(g#SjTW91tz45VywSpwzFTSGO%~p0;YrcTHOayoEj%e&xh7e7 zqlG6$E7v3oZ?y2FXyuw@;f)ra^xaAeZ?f=43r~txu1OZ&XyHlG$~DQt8!bF3TDc}! zc%y|UMJv}N3vaaWr0-T*c$0-UT6j{la!s=EMhj1hR<21F-e}=T(aJT+!W%6-DO$NE zS$Ly`Cw;fl!ka9-(ZZ9Wm1~lPH(Gd7v~o?d@J0(yidL>k7T###NzuwR$-)~gJn6fY z7T#pxjTW91tz45VywSpwqLpisg*RGwQnYeSvhYR=Pl{HqNfzE{;YpvZ_eIh0;iNb! z3%_p^)={nLzCKn*W#RW-p*pHH-4{dZsQ#A0eG8zDYEAc*Z#t^KWpLl`rK4KYeVLPv z>Tem`H}~KuExgOZJ1sov{pV}<-|zY2yMO-MKl2M;|HYsE)vw>z;@|hd-&bd7!?N)E zo&+A%nyz-gf3kA_%Dg^cu1OYt|GFz%wI*8l{kxF*fVn1Fc&CLYz5hJ^yVAnDEWFdg zlcJStl7)9#cv7@-O|tM#3r~txu1OZ&Y2iuH$~DQtJ1sovyOkE+W#OF`o)oQIlPtW` z!jqzvYm$X`T6j{la!s=EP76tOpE(^bZUg;4o3-4FE@n?=Kywk#y z;&bJiWZ|6_o)oQIlPtW`!jt~5?SK4{-~Y?M@*jNps{{VaZ-4cx1Nvog{io0uSVZ$K zH19<7r0?HPqInmZ_sin=T!rSHXr2@wFlQ$;??m&YXyxpL=ACGs6s=s7(7Y4PlfGMt z=3QvsiRMYs$~6hiJJCETTDc~nc_*4DMJv}NH19<7q-f=ugyx-Sp7h;HH19(5PBc%7 zR<22C-ihW((aJRm%{$RNDO$NEp?N2oCq*mQBsA|t^Q7-qqInmZccOVxv~o>C^G-BR zidL>kXx@qDNzuwR3C%mvJSkeaCZTyJnkRj?63x5Nyc5lnqLphBns=gkQnYeSLi0{E zPl{HqNod}Q=1I}YH3`i-(LCw1br8*y;;6p)8$$EJ3*<@BD&`zQ^T7+`Nzp3i976MP zS^WJSM^dy3&4hGUT6j{la!s=EK?_fcR<21FK4{@d(aJT+!UrupDO$NES@@uZCw;fl!iOw;(880V zm1~lP4_bIqv~o?d@IebtidL>k7Cva%bq-f=uWZ{Dro)oQIlPrAD z!jqzvYm$WzT6ofD>!gJz#Zg)Kl!Z@Pcv7^=!lx{J(!!IXRTe&F;gc4g6s@xGDGQ&p z@T6$f1@b8ipS19#?^as)l!Z@TAWw=`u1OX?d4W7BTDc}!_~Zrhq-f=uWa0B__xmR+ zNzuwR$-*ZsJn6fY7CvR+^J+IfS6TR^g(t-atUu|dEPT?!lcH6B(oI?Tq=hF%E7v3o zpS19#?^as)l!Z@Pcv7@-O|tMw3r~txu1OX?Y2iuH$~DQtCoMcFTDc}!_@sp=eYeuW zr!0KZ!jqzvYm$XeT6j{la!s=ENefSkR<21FK55}e(aJT+!Y3^}>ARH{K4syP7M>KX zT$3z((!!IXm1~lPPg;0Vv~o?d@JS0#idL>k7Cvd=N#Cus@F@$QwD6>8<(g#SlNO#7 ztz45VeA2>`qLpisg-=>|QnYeSvhYa@Px@}9g-==dq=hF%E7v3opS19#Xyuw@;gc4g z6s=s7EPT?!lcJStl7&xNc+z((EquztCoMcFTDc}!_@sp=MJv}N3!k*`q-f=uWZ{z* zo)oQIlPrAF!jryRY2i~AK55}e(aJT+!Y3^}DO$NES@@)dCq*mQBnzLk@T6$vnq=XV z7M}Fkx@h4^aa0z*WZ{bzo)oRp%Owk6wD6>8m0m7c_@ad;MXU63$-);cJSkeGmrEAD zXyHlUt+enZ3tzPGq-f=uWZ{bzo)oQIlPrAE!jqzvYm$X8T6j{la!s=EMGH^*Zl#4U zS@@!bCq*mQBnw|xyWawz6s=s-weWxJ7r*s~{`WuiFZ{9p@6`d{|69NK;`{&p{F+~U z*LVJzUw?JL_x}hw^he?nnlGYxQnc!i#3eLeMDwKYR-*Y5nlGYxQncy|tR*yGMDwI* zfE7v46Uqthy zXyuxO=8I^a6s=s7(0mcilfGMt=1XY4h~`Pr$~6hi7tuT^TDc~n`68MpMJv}NG+#vX zq-f=ugyxHAp7h;HG+#pVMKn)}R<22CzKG^Y(aJRm%@@%;DO$NEq4^@3Cq*mQBs5<{ z^Q7-qqWKbC^F=gIidL>kXugQ%NzuwR3C$PLJSkeaCZYKvnkRj?63v&; zd=br)qLphBnlGYxQnYeSLi0s5Pl{HqNoc-^=1I}YH3`iZ(LCw9m1w?%=8I^a6s=s7 z(0mcilcJSt5}Gffc~Z1;O+xcUG*60Fu1RRVh~`P3t($0`6i0>TTWG$C=1I{inA$?~ zO*Bu6R>9O3ns1_cQnZRWx6phO&6A>4%(;c;n`oZ&-AXjyLi0^DPl{HqNoc-_=1I}Y zH3`i((L5#7M>Izu)c2EvhYm{Pl{Hq zNfy3o;YrcTHOay^Ej;PFl@`8b;hPqo6s=s7EPT_#lcJStl7(+tcv7@-O|tM!3r~tx zu1OZYY2iuVt+enh3*WTxq-f=uWZ|0@o)oQIlPrAG!jqzvYm$X;T6j{la!s=EO$$%@ zZl#59S@@=fCq*mQBn#iP@T6$vnq=Xd7M>KXT$3z()54RYm1~lPZ(4ZLcPlM?%fdG; zJSkeaCRzBVg(pQT*CY$ywD6>8<(g#Sn--oFtz45VeAB{{zFTSGTNb`);YrcTHOay^ zEj%e&xh7foriCX(E7v3o-?Z?gXyuw@;hPqo^xaAe-?H#c3r~txu1OZYY2iuH$~DQt zH!VCVTDc}!_@;#?MJv}N3*WTxq|eqv3r~upvhX7dKeX_qXq7h|S@@xaCq=8g>Bzzl zEj%e&vhYI-Px@}9g&$e?p@kIlcJStl7$~yc+z((E&RyB4=p??TDc}!_@RX-MJv}N3qQ2*q-f=uWZ{Pvo)oQI zlPvtu!jryRY2il}erVxI(aJT+!q3(2x4K% z@Iwnv`u-iJg&$e?x!R4-RTh3|;YslUb4{}FLkmxeR<21FerVxI(aJT+!VfJx>ARH{ zeq`Z?7M>KXT$3#P(880Vm1~lPA6j@)v~o?d@IwnvidL>k7Jg{qN#Cus@FNR9wD6>8 z<(g#ShZdd`tz45V{LsRaqLpisg&$gYQnYeSvhYI-Px@}9g&$e?p@kIlcJStl7$~yc+z((E&RyB4=p??TDc}!_@RX-MJv}N3qQ2* zq-f=uWZ{Pvo)oQIlPvtu!jryRY2il}erVxI(aJT+!VfJxDO$NES@@xaCq*mQBnv;Z z@T6$vnq=XJ7M`^JrT_HL|HKb}>l;7#<3IM(_apex`|me@?brYL^}kC0#n1ls{PRzL z;~U@j!JqoU{LkO{&3lpkcm3S_=^r{j^}~PYcmMWZ{#N^;kMh$$^HV?a2Y&qb{b>Ei z|KLyl(2xI9Kk}=Y{O|mY z_uudQn~J{k@5^t(U;pO^e#h6o`0k(o_Rsvn*MIT>>Z>Z8{E9^=wfQ1yA0}hvh zlw$bG1UN2?suFnO1Xe>$On|*(G^Nh3fd^H1W29>n(2)~xr6z7>(TIG)AOlRi65s^P z(mWrQA;FaZ14}b#3@MX4DJMTa*DW)rB(+F;mxijFr;n?rpBpffF|qF+1R0yL44OP)*E*D)gA*VQ*T#6QS2J|s9E zs8omp><%CS(wmW%nUkvL80HR=iuZAa=#}G=<`PCoK@5jUif}1#i9jTsgIpa$fChW| zf=ov;S)Yp+qQb{9I0R%VW^kbtYFtKKvY`AF57rf5l3HAnnU}7YTv}X`pBoP>JA}a5 zEgmXRmY7qT$`jF-6cPj~@|0BkIa>OJ1m?F3Ni?%5yjmfoA!NX4(V_0Z=)n;1x?M0z zH@_%dFR>soIU`jsBQYgEzd$b|DX}RJ(>K@chAG8 zpWiq;=lA(Jp7`|ne!lY~<07A4921OT6@SEhF%doZ8i8RE7*mW97Wgv%^t@D`&uM&X zWl?GCOv?Ao^rm;oO5Nd{)-_{RZ{2GBkGG!Ax$t4{ zKUFUt4y+42oLrZD_}-K#W1%tDc%rpFr&_;6`ib^w@trxN&y3$66ek<9FePWvnd1FH z@iLP>PI_FsOgwbq1N|~-y>^k^RO^>Yk7^f+@5z~PX2$;PIO~vwIW4_$)>YD2F`I=b zC+SSh{_J?GNgppgps<2dcikK$|tf97Q?h=J(kde5bTnQyKm}n+sg3=nQSTj`= zo=HaIv2rDp&|u;-B1Vv0Lv7ath|C$}0z6Evgc2G|Scw&awi;@uW~RuTNlNitxe`i9 zFyYv%5y}Dh#X*@SMeYFL6$ht?D8bwsYMaJSq{<-Y;mqH7D51f`K8-}~0N@n|%|wEr zu7=v72@|O@$rzmZfes}Um`GGQj}z)?$xE%d_0tsn2b2q>7Jp-f{ZPBV*tOjJp5B(9 zaJqNJg?w$E_nTa;Mia%mo%qfkPGiP$7%BT*zMHBs+t%0t4l$sim` zXfP3$q&!X(2$D_Idzz9Ea~3%ThY}i0z-fN@y^luA!#l0KDQLCCSl9lnClfRGG#Y zqRJ+x;ZQ<>2^?=45TjTqU0yki?vLjxEcI~~mYkf0CAU|zW2Y&3wWn~JtFTncQCPa; z3bpJcKCcvbp0m!^!%^o0J69mO#x}5XA!}>{I~TIXHn4LcYi!#r$jcxBc*Q}9ATN^y z;1vg8=R(%lRw!UB2mrj|0PI|d8r#yyi(WD6CZ4XV#;=PhmPU^mjDxUb=ImHX<}553 zIUC4YISWf&9EGLSL29`e$oe_!d@gP}UlG{35H_}fovRd4V;k7H&^5M!oeNcCoAVS= z7y`gA4w?yLh!cQM9Dtn*S!0{?GlKa|5r9t|fSn6bV_R5=dBEi~bob&_rOVsm=^Ab2m>Jux*vyGA1DYV{1Qq$;u!R@BHPsq z8W{g{KoG)^6~aIiK~xlBeoBG7$ukaw18PlvP+M=+u6C*AW(!)!xW4>lbxi8i0(Rih zc2eq#bD2o5ZS24!oFzPQZg(n~@-;i~$YAk{`@3fc9j zM)oH|c0H<*{drkrlmLKN9JIl_0#wH|*FlbX3Zn7t?ktkA z+W_!~13y8Y$MkhGiR-Ei#$D?ge197gc>fqXuy9t#Y>e~CnBF*(3A}G$2i=_Yv`fq< zqiWapOxArEOSsHcfpfSRBU-Rt#RT3Lumn$%83)Y{#4d#AQ%9An1ziq&%MuaC+#=@vZR3}uzTt+nJGTz`E zB{0vAcjF9U7AG3BIB#%{Fr|@KQyRU;4RaaMn9F#BbA%avD9q@);jBd%z{#rtoXm0G zg%6mb@@Kk&^-8NT{pS~*bUD|SMD7+U?uICi_NKc9b)3CpNNfY5yp+FBlvm&_7!NsB z6rM@KutpSzH3o>x86*sA(;?!$%2jt*jTz}#92N@y@a5&Lkzu<%+67Q>W;?};irEX-U>!D5(_a86?kRjnth z@RVdnCs7hmS4%+&4JOWNbfHcqS&mDR9d@EHAg`8!((S>7QRu&D~$ zx7B{AeTH(vY-y?f*@E&i>o?h5FDYlwo)bQ2cBZsSyHk8XkdTj*X0?BvI>2wbg%Xeu|_evWxOG1$ijAUnPO2(u^~t2yTfp3|-KQKU64MP8%xavMT*^(gWHd3)zX*3VoV%pY|>B7R7{? z8HS+?_vXwt*sJyXv}_ikIkg76NnfR9vk1(2)nNZXU#`9J;_Y9NGFMG`60ugU&DZ87 z?0HXb^DnQkzfvEh=pU-oYVp{=rajAHnxo1Sp_Q=5#Ds3mWC@<^`Gd2#j6odOMx!>B zv^B5;Plo=-?ZslIJ#rg6@WhQVCBBO7lG!zcJRd;%sE@NYr^ zWSuj=_wp?79Yz=Y3Q@^KmEE?U!ET!%P@;B+yK?L8HnHnadvCYay<%v-Cl`#lKwL+H1?VT5|dazumoIG*0I-8c_cignD61?;#ESm;Lr!oPJPl zwF%r$n)DWt^Z5*^dWEB@LF@jA(rpvFUL+-xJQrB*6S!@R8pQ3Yf!nAjg*ch#VSTUb z9&TZxgV_;_C%o>efh}#RrWj6sc3(HRYJiA|X(YpH=8f4H@~Q#)W)cLwVr}D3IlReL z1ElA^si6-s(%x0(SzR@7R5ssVc);K`1GgD?(qw|tVHUUsZZjfAkc{NqfSDq5CMiWW zRjG}~>F|CcRR%c^*@T_BJWdK{ny|KM0C>d#(`C0q1HdZ|BuS1U4FIn=2otF?$rxnQ z)9QFT1wx}aHsEzS-TQI-c(Lo?tOZiWm9*5#*q?4(a&HLsIM29Pwk~kCY@Ow7*&5si zJGU0SK6*}Ue9?8bFxs5Bv)mAU#ayOt^J|cunS5od665;zO2zi;>}|S)C#g8`Rg+xP zxR2Hwv{y}C=OZh_uyu$1zz_QmhZp1 z<|%YxUEg2x0A80~Q@{(#YoCIC|JtWG`mgW>QgkOI2Ht`O#=At}8MkfjJ~URM*jVZ9 znT5tmu8oz$W}+lWUDQqunxY>_UDP%WnxfxkL0$$4z$*?)1bLYx0IxVG6flv~0KDP= zq%LZQ21U^?jqLM^QLLmcuOOz@HF9i&RB~^Fbb7lvxoBzw+u+g$*~3{=JI}cdvWasW zWGTls$Z;XzT>Dbx2pgPkYc=Xt2#w9e@Ut#K1{$kF8)mZ)lacgd{Wdl`E3rb*#$y{4 znqrqn$8?+`q6Bk!Y=c5m?DFcEdo@D21Atc?lxb4r4gg+puusGM;s?Mh4w{JsK^<>x zP-u!>Udvvh(&bI%x|y%*s{5OJ|7ezw^v~_t^59nd#RS&E8 zxS@xYS3RuEac|PJGICAA6ZO0E{AFhQ%@!AFncQJaZ=A_w%{Q=vE=EAA+V#D(nhIkH z{fs&ntXGW-aTc%xGh;`!a$xMNYxPl9a~@p6+qCpv^?-Z{w_gb2bSB8h+6U zZANxT0Pp8Nlpq=;Z4BXkMtGRW*+>BJiUXm@d7J>?5eJ_To=J&SyV|9USk%^gvqe8$ z=~2ec?M@|AzGerW*^Tjw`D=>|)!i*{FJK34jPog_zWApurk|LauKlBD z*dq_Oar;J6-HIl_Q^Y($AiCLKkm`kOsu$$P@G3vXd)$!Z21#ym-utA6n*|P%+>lLj zgR~z+(|&lBQwFhggU4M6fo(j?t4k$&yjs)+eM(meOz5yD$Xt~ z9cP!;dCq1tB(`Cq-qmal3Fo^5YLg%kK_jDG4$2-2sKB z@e3?n$`b^d=C-|>!Vum&pwKk8m1)2MMWAVJi@_}>3V>G}MB$7>27pH#R8yWjuVB5> zYD~|c>AKm%t!BnGa&l=oJkea7Tv~OUU0U6oU0Rp9n$6JI24vSGoNzxJgQ!r=7;92* z`230cMn;YuIsMsOW1-~li+w8uBkhDF*-?(uN>Uf)wlSem=@vlY`v%s)Biv4>yFeeH{*k&oT~c=os;|9P9cOJ~fW*IjP> zm+XKtC_qKkW(|M7)*s(*rMKPgeh9NuATA@#q z?$f??r69K&^qJBs?N0FlMN&(RA$y2If%cmyv1{c8gVude881~Qm<@W%wB&fJ&EIIS zcjqUZ>3yZXO40vud;FDeqaETzS%p4Tx>x&#Sfd!-GTsn0WMR9wOtGk?*bpR_8T1*_ zO6`yZd$oR__THS?238A`zDgUKQ){rZSX3s;D)s5oGHt$iha$Ko&MzScr<5xy*bZms*+6 z;_2Rx+x^9^gGx`d{Sc-8!jw$U1#%`|Ya%-syal^&_P!}x@!PRm1V%43jb2_49|U5d znquZWMHGer@QZ_H!WiNN;1dU6?n5`Z4sy{@O|El(Mlk;e9)M3AfT8nuRHU-VNw_e{ z(Lnq*u2K-Z>P!@ZvKS+HsegOHYv2}K1TVOPy`0FAD-gU;j3&sWfqHzJgW3w!1UHsGgK=1;=3w&V1OBSP7(n74DqiU4T2+T+Z7D^X* zJ*cY~pJ^iVHJ=`Q4SGg>Sc)jPj^;lU1Vz<%At z^lx8C-L>cbj?#{C_O#+1Uw3~iYwyY$J3e{ISDk4c_w8DpgJVzC8)nUC?vmAJ%ZZ+! zxGXj{?mw76E@i$p{`AJHf4uRk)Ymuqlk3Jj{a{(pUu*xba!2i`=|`5(!V!t7jjucI zd21Ptqlbu%%-i4nOw@vF?kHkP4=`UC z=A(+ZveiY`-Vqz|kIu6ZrtSY3G*u#L6+Kx#`9oi9-{N=ndM+#4a>v69qZcoTes-bf z`A;%$_)W*S&I!VACXD~q-aYkG;>eLB4O5z? zeB1TFecfH16CY|9emy~W|3iLp!JmGD-TBDj2ebeBROPGhr2YGmN~JL2&&f}PW{0M| zIsVf%yWaOL8ByXpzxk<#Gf&)+Rb3v|@Y3X>i~p^BG+6b}rg;%N8=vsaEY@dqK3BfH za{{(;@079APt%(*LFlAV`BqhUVuC7Ku{Qd-rg%+Rc~jNIHcp;_r^f65Gv$y~oY?S+>!C&w(Gm;2(Hv~_7O r<*rL*-X31S05AXy00Y1PFaQhy1Hb?<01N;FzyL6C8!#{u!$$r;3vfFW literal 0 HcmV?d00001 diff --git a/hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_64KB_NONE_5000.hfile b/hudi-io/src/test/resources/hfile/hudi_1_0_hbase_2_4_9_64KB_NONE_5000.hfile new file mode 100644 index 0000000000000000000000000000000000000000..923bb8446498ca68112500b6e0966869df24741c GIT binary patch literal 300065 zcmaIePss0WVjlD}(;^fM!Wb-~js#R>VAALQf8Ro2-Zv9Wn5iLgB#Nt;8IqVP6*{T2 zY7ny#R9w2zt=*|hDd zeSY8n>PKIF{lg!8=gT*~{KEa`=l5qT^ zyMOSLpMCe^@BH}k*Z%bX_n-aD{_wZ|(ocW#ccnO5c(mM;&(_MLRYx0-HXZFe+I4jB z=+M#0qfOEro4#8s*K|`H<(jJ2RJo>`qLpi^T2tkkZi-f}scKD?Yq}{~ zxu&W$Rj%o#Xyux!*3`JBo4#8c*K|`H<(it-)VQXbqLpiET2teiZi-f}scB7(Yq}{~ zxu&KyHLmHV@7B&W-4sW;rmi)0uIZ*|<(j(I)VZdcqLpjvT2tqmZi-f}scTK0Yq}{~ zxu&i)4X){?@7BRJ-4sW;rlBZ)lxx~r z)8?9PidL>^YfYPLx+z+@rmZz?uIZ*|<(jtEw7I66qLpjfTGQd0Zu)LLT+>Z)lxsR# z)8U$KidL@aXibM}x+z+@rlU0-uIZ*|<(iJxbhxIQK3mI&)|3=SwWjicHI)yoDJfdD zrt*O`l@F~cDO$Cr@_{v#53MOFTD7L~fi;y6ttly5wWji+HPOP0EWFUdlfGZ@)$aHE zIYky;XyHlG$~DQt3oSe;TDc}!c%g+SMJv}N3oo?rr0-T*c#(ydtKFD{EWFUdli~&Q z*(3`uwD6>8<+Di^UTEP-(aL9&EWFUdlcJT+CRuo)g(rQt(!z@@yj<I@ zn9n9zc%g+SMJu08vhYF+Pl{GPn`GgI7M}FoN((Qt@N%^qlaPfMT6j{tU_P5<;e{5S z6s>$V$-)aQJSkfFY?6f+T6j{l^4TN{FSPKa?^arPk%gD5-58=QywJjv;sx{BBnvOJ z@T6$vvq=_SXyHlG%4d@-ywJjvzFTSGMHXJJc4HE<@InhuiWkghlPtW@!jqzv&n8)T zp@k`3oo?rq-f={ zNfusc;YrcTXOk?v(880xTWR4%7GAD)V~DcwLJLod7tCjqEWFUdlcJT+CRuo)g(pQT zpG~sxLJLp&Y^}8Lq&TW)Qe*Cf;guGi6s>wTRatnYg(pR;o=sI2UTNV; z(W+-tm4#PYc+z((ExgLYD=j=JTDc}!c)i;Fej+ACE7v3ouUEU(sx{HVt1P@;?N+PS zL<_I7@Jb6$`u-iJg;!a4rG+O&E7v3oue9)_Xyuw@;guGi6s>$V$-*lwJSkfFY?6gn zT6ofTD=oar!YeI2DO$NES$MtLjsJUKXd^X9#D=j?fyOkDR zW#N?;o)oS6F1*UZ>(y>dLKa?W;Ysm=`D~JfS6X;dwDQ>`3$L{Bq-f={Nfusd;Yr`E zwD2kmue9)_Xyuw@;q_`ahA0cKwD6>O!F)E!!YeI2DO&k#l7&}Vc+z((ExgLYD=j=J zTKQ~}h1aXyn1n36(!!JC1@qY?3$L{Bq-f={Nfusd;YrcTXOk?v(!!IzTWR4{7G7!L zNzuwR$-?W^ZVXWtUTNV;@q+nml7&}Vcv7_T*(3|EwD6?wR$6$Kg;!d5Qnd2fBnz)s zyD8<+Di^UTNV;pRJ7+o)kxA;Y}9aXyHlGsx>uPc%y|U zMXR1oO%~p0;Yrb|XH%1fH(Gd7wCdT^WZ{h#p7h;H3vaUUMhj1hR<21F-e}=T(aJT+ z!W%6-DO$NES$Ly`Cq*mQBnxk}@TBinT6mL%H(Gd7v~o?d@J0(yidL>k7T###NzuwR z$-)~gJSkeaCRuo+g(rQt(!!f8ywSpwqLpisg*RGwQnYeSvha4b`~BToQnYeSvha4b zTdi6XExgIX8!bHP``1bfZ?f>ld*MmZ$~DQt8!bF3TDc}!c%y|UMJv}N3vaaWq-f={ zNfzE{;Yr`EwD2YiZ?y2FXyuw@;f)ra6s=s7EWBOq##2HT-e}=T@q+nml7%-~c+z(( zExgIX8!bF3TIEem7T###Nzp1HXtMBjwHuR=g*RGwQoLY3n`GgQ7M>KXd^X9#8!bHP zyOkE+WZ{h#o)oQoHp#*pEj%e&xh7e7yV{K*%EB8hJSkok z7T#&$NzuwR$-+A=JSkeaCRuo=g(rQt(!#qeywk#yqLpisg?CzbQnYeSvhYp|Pl{Hq zNfzE|;YrcTHOay|Ej;PFl@{J*;hh$q6s=s7EWFdglcJStl7)9#cv7@-O|tM#3r~tx zu1OZ&Y2iuVt+eni3-7e>q-f=uWZ|6_o)oQIlPtW`!jqzvYm$ZctKIKENhU=r*CY$? zwD6?wR$6$Mg?D~NN>a3PO|tOLd*MmZ$~DQtJ1smZTDc}!c&CLYMJv}N3-7e>r0-T* zc$bBDT6j{la!s=EP763Jey?Uofe+-{eo%XT^8PH;Yrad z3-7Yk7Cva< zNzuwR$-)OMJSkeaCRzBPg(pQT*CY!cwD6?wR$BOwg%4VIQnYeSvhYC*Pl{HqNfth6 z;YrcTHOayUEj%e&xh7fopoJ%Wx6;CgEPT+ylcJStl7$aicv7@-O|tMo3r~txu1OX? zXyHlG$~DQt2Q575yOkC`WZ{Dro)oQIlPrAD!jqzvYm$WzT6j{la!s=EK?_fcR<21F zK4{@d->tOpAqyY0@T6$vnq=XF7M>KXT$3z((880Vm1~lP4_bIqv~o?d@Iebt`fjC# z4_Wx2g(pQT*CY!cwD6>8<(g#SgBG3?tz45Ve9*#^qLpisg%4VI(swH@e8|EFEj%e& zxh7fopoJ$zE7v3oAGGkKXyuw@;e!^Q6s=s7EPT+ylfGMN;X@Wau6Dm|Eh$>LCRzC4 zXQU)WE7v3oAG{Zy6s=s7EPT+ylcJStl7$aic+z((Equtr2Q54)TDc}!_@IR+MJv}N z3m>%bq-f=uWZ{Dro)oQIlPrAF!jnE*=hg1_112es>e)19;qz*@TJb*Jq=hHN3)Y&Z zEPT?!lcH5?nzHao3r~txt!c`_CoMebyOkC`W#N++o)oQIlPrAF!jqzvYm$XeT6j{l za!s=ENefSkR<21FK55}e->tOpDGQ&p@T6$vnq=XV7M>KXT$3z((!!IXm1~lPPg;0V zv~o?d@JS0#`fjC#Pg(e+g(pQT*CY#{wD6>8<(g#SlNO#7tz45VeA2>`qLpisg-=>| z(swH@e9FQnEj%e&xh7foq=hF%E7v3opS19#Xyuw@;gc4g6s=s7EPT?!lfGMN;Zqhq zY2iuH$~DQtCoMcFTDc}!_@sp=MJv}N3!k*`q-f=uWZ{z*p7h;H3!k#^NefSkR<21F zK55}e(aJT+!Y3^}DO$NES@@)dCq*mQBnzLk@TBinTKJTOPg;0Vv~o?d@JS0#idL>k z7Cvd=NzuwR$-*ZsJSkeaCRzBTg(rQt(!!@KeA2>`qLpisg-=>|QnYeSvhYa@Pl{Hq zNfth7;YrcTHOay!Ej;PFl@>l_;gc4g6s=s7EPT?!lcJStl7&xNcv7@-O|tMw3r~tx zu1OZYXyHkpt&0|(6h~#@OBTLp;Yrb|zq7Jr;p=MmTi}zTRext?$->vwZnfeQcNQ%? zDPFMFv}ECn7M}FoN(*1I@I?zxidH?FmMnbH!jqzvYm$X8T6j{la!s=EMGH@gR<21F zzG&e|->tOpB@17)@T6$vnq=XN7M>KXT$3z((ZZ9Wm1~lPFIsp~v~o?d@I?zx`fjC# zFIo7ag(pQT*CY#HwD6>8<(g#Six!>~tz45Ve9^*_qLpisg)dro(swH@e96KWEj%e& zxh7foqJ<|#E7v3oU$pR~Xyuw@;fofY6s=s7EPTk7QSfVNzuwR$-);c zJSkeaCRzBRg(rQt(!!T4e9^*_qLpisg)droQnYeSvhYOPDTQncz5ceX5iU+sPieA0I-Equ$uH!VCV zTDc}!_`ceWU#l#9)54SD1?$q>riCX(E7v3o-?Z?g?^as)mW6Lx zcv7@-O|tM!3r~txu1OZYY2iuH$~DQtH!VCVTDc}!_@;#?eYeuWw=8_q!jqzvYm$X; zT6j{la!s=EO$$$oR<21FzG>k}(aJT+!Z$5E>ARH{zGdN?7M>KXT$3z()54RYm1~lP zZ(4X#v~o?d@J$O(idL>k7QSiWN#Cus@GT49wD6>8<(g#Sn--oFtz45VeAB{{qLpis zg>PDTQnYeSvhYm{Px@}9g>PB-riCX(E7v3o-?Z?gXyuw@;hPqo6s=s7EPT_#lcJSt zl7(+tc+z((Equ$uH!VCVTDc}!_@;#?MJv}N3*WTxq-f=uWZ|0@o)oQIlPrAG!jryR zY2jNIzG>k}(aJT+!Z$5EDO$NES@@=fCq*mQBn#iP@T6$vnq=Xd7M}FoN(k7QSiWNzuwR$-*}+JSkeaCRzBVg(pQT*CY!+wD6?Q)0Wty&W;{K&%3)o!(FO|IlfGMN;YSvJXyHlG$~DQt4=p??TDc}!_@RX- zMJv}N3qQ2*q-f=uWZ{Pvp7h;H3qP{(LkmxeR<21FerVxI(aJT+!VfJxDO$NES@@xa zCq*mQBnv;Z@TBinTKJKLA6j@)v~o?d@IwnvidL>k7Jg{qNzuwR$-)mUJSkeaCRzBQ zg(rQt(!!4{{LsRaqLpisg&$gYQnYeSvhYI-Pl{HqNfv%+;YrcTHOaycEj;PFl@@+v z;fEHU6s=s7Ed0>IlcJStl7$~ycv7@-O|tMq3r~txu1OYtXyHlUt+enX3qQ2*q-f=u zWZ{Pvo)oQIlPvtu!jqzvYm$W@T6j{la!s=ELkmy(Zl#4ES@@xaCq*mQBnv;Z@T6$v znq=XJ7M>KXT$3#P(880Vm1}~9mk(L^XZmWrAwDnsGsRKp_ zrW;~SpD9|Qm-pyuAUXv{R9@Uy;;rFQ4bhR66f`#9omurHB-=ka;Ec_njnqcAg zDAxoFzel+ySol54HNnEmhb;UveXmItevfKRvhaIUYr5KvHNnE~&&xHz!tYV82^M~j za!s)Cdz5Q}h2NuG6D<55<(gpOat zfAa79&VTtE>mU4w{?i}-`5*tzpZn$SfAyoUzW(74zVqc9Uw+~K^K<$6^W&RezI^Y? z{V#s;{_o$u|NQ)yZ~fj+fBgI3{ez$U?7R0pocmtP{h{9=0eT^zCq*kKC_pa+^rUFz z1O@1YfSwesoS*=`5YUsNl@k=87Xo_HcPjzC2+#`wJtNxh4U6A)qHkE7v4IF9h_Y zXyuv&=!Jlu^xaB8F9P&JKu?NRu1SDi2kfL;jbNzuwR3D64xJ?XoZfL;XXg@B$Etz44; zy%5loqLphBpcevqQnYeS0`x*aPl{HqNq}Am=t4pdLf`EMJv}NKraOJq-f=u1n7l;o)oQI zlK{OC(37H-YZ9PW0(#PCYbBs3#Zduz)n)QZKu?NR0eaPC@=8EYidF%7)n)QZKu?NR z0eTgnS6(Jh`feqlS6wErwD6>8<(g#Sm6yqrqLpisg;!oCPl{HqNfustnLH_4xh7e7 z zD=oar!YeI2DO$NES$O4T@}y|xnq=XXm&uc&m1~lPS6(JhidL>k7G8OoJn6fY7G7oH zl@^{9tz45Vyz(-6QnYeSvhd2w(8a!s=E%FEtOpDhsc)@T6$vnq=XXm&uc&m1~lPS6(JhidL>k z7G8OoJSkeaCRuppW%8uY)k7T### zN#Cus@FojyyceDntz45VyzyRmQnYeSvhc=x;YrcTHOayo?}aBtE7v3oZ?y2F?^arP zlZ7|l3r~txu1OZ&crQFDTDc}!c;mhBq-f=uWZ{kX!jqzvYm$XGT6ofTD=oaq!W-{} zCq*mQBnxl67oHTYT$3!k@m_dRv~o?d@Wy-LNzuwR$-)~gJn6fY7T#pxjrYQnqLpis zg*V;{Pl{HqNfzFCFFYw)xh7e7ld*MmZ$~DQt8}EfD zMJv}N3vav^o)oQIlPtXPUU*Wpa!s=EP76=^Z0)=ko)ky*&Ptbscisz6idMa|(q-YD z_rjB+Rqw2HS$OBY@T6$fJ1bol-gz%P>ARH{-euvP7M>KXT$3!k^ImvTv~o?d@XmYT zNzuwR$-+DDg(pQT*CY$?yceGI-AW7ZvhYp|Pl{HqNfzFDFFYw)xh7e7=e_WxXyuw@ z;hp!wlcJStl7)BP3s3rPrGk7T$R;JSkeaCRuprz3`-H<(g#S zo%h0%zFTSGT^8PH;YrcTHOay|?}aBtE7v3o@4OeD6s=s7EWGnxcv7@-O|tOLd*Mmn zt+eni3-7e>q-f=uWZ|9n!jqzvYm$X`-V0BPR<21F-gz%PDO$NES$OBY@TBinT6mX* zcUpK-v~o?d@XmYTNzuwR$-+DDg(pQT*CY$?yceDntz45Vyz^do(swH@yvxEnEj%e& zxh7e7=e_WxXyuw@;hp!wlcJStl7)BP3r~txu1OZ&c`rQayOkE+W#OF`o)oQIlPtXR zUU*Wpa!s=E&U@iW(aJT+!aMJUCq*mQBn$7n7oPOlI%wfZaa8xhhb(;XUU*Wp>YbG# z3m?1}o)oQmXJyF32k(U^MXTOf8M5%fd*MmZs&`g~EPT+ylfGMN;X@WacrQFDTDc}! z_~5tOpAqyY87oHTYT$3z(@LqUQ zv~o?d@WFfGNzuwR$-)Qkg(pQT*CY!cwD6?wR$BOwg%92fPl{HqNfthMFFYw)xh7fo z;JxsqXyuw@;e+?WlcJStl7$aic+z((Equtr2k(U^MJv}N3m?1}o)oQIlPrAjUU*Wp za!s=E!F%CJ(aJT+!Urup>ARH{K4js8_rjB+m1~lP58ew;idL>k7Cv|{JSkeaCRzC4 zz3`-H<(g#SgBG6j-AW4|vhcxs;YrcTHOayU?}aBtE7v3oAG{Zy6s=s7EPU`@cv7@- zO|tMo3s3rPrG*b!_~5YbG-3!l6fo)oQmXJyL5C+~$PeYeuWr!0KZ!jqzv zYm$Xe-V0BPR<21FK6x)ZDO$NES@`6=@T6$vnq=XV_rjCDTWR4_7Cvd=NzuwR$-*b^ zg(pQT*CY#{yceDntz45VeDYp+QnYeSvhc}!;Yr`EwD2hlpS19#Xyuw@;gk2mlcJSt zl7&y+3r~txu1OX?c`rOETDc}!_~gCtr0-T*_>_fDT6j{la!s=E$$Q~R(aJT+!YA*A zCq*mQBnzLs7oHTYT$3z(@?Ln-cPlM?%EBitJSkeaCRzC8z3`-H<(g#SllQ`tqLpis zg-_lKPl{HqNfthNFFfhHl@>l_;gc4g6s=s7EPV1_cv7@-O|tOGd*MmZ$~DQtC+~$P zMJv}N3!l6fp7h;H3!k#^NefSkR<21FK6x)ZDO$NES@`6=@T6$vnq=XV_rjB+m1~lP zPu>ep`fjC#Pg(e+g(pQT*CY#{yceDntz45VeDYp+QnYeSvhc}!;YrcTHOay!?}aCQ zx6;C=EPT?!lcJStl7&y+3r~txu1OX?c`rOETDc}!_~gCtq-f=uWZ{$d!jnE*7cD#~ zj_O|cl7%nc3r~txy|c1p;fwdelcH7ctSnjh;=S;sXw^F_OBTL(FFYw)_0GzYg)dro z(swH@e96KW?}aBtE7v3oU%VHd6s=s7EPU}^cv7@-O|tOCd*MmZ$~DQt7cD&LyOkEc zWZ{eV!jqzvYm$X8-V0BPR<21FzIZP@DO$NES@`0;@T6$vnq=XN7M}FoN(*1I@Wp%K zNzuwR$-)=!g(pQT*CY#HyceDntz45VeDPj*QnYeSvhYO7oHTYT$3z((ZZ9yTWR4-7QT2dJSkeaCRzC6z3`-H<(g#S zi}%8lqLpisg)iOzS$-V0BPR<21FzIZP@DO$NES@`0;@T6$vnq=XN z_rjB+m1~lPFIsrgcPlM?$-)=!g(pQT*CY#HyceDntz45VeDPj*QnYeSvhc-w;YrcT zHOay^Ej;P7b@N_$QXJJgD_a)6c`rOETJ_G#mW6NL3r~txy|c1q;hXoulcH7ctZZ5M z=DqNw?^as)mW6Lxcv7@-O|tOKd*MmZ$~DQtH}8cfMJv}N3*WpKo)oQIlPrAmUU<@X zD=mD>!Z$5EDO$NES@`C?@T6$vnq=Xd_rjB+m1~lPZ{7<}idL>k7QT5eJn6fY7QSWS zn--oFtz45VeDhv-QnYeSvhdA&;YrcTHOay^?}aBtE7v3o-@F%|^xaAe-?H#c3r~tx zu1OZYc`rOETDc}!_~yOvq-f=uWZ|3l!jqzvYm$X;-V0CqZl#59S@@=fCq*mQBn#iX z7oHTYT$3z(^ImvTv~o?d@XdSSNzuwR$-+19g(rQt(!#eaeAB{{qLpisg>T*qPl{Hq zNfy3&FFYw)xh7fo=DqNwXyuw@;hXoulfGMN;ae8IY2iuH$~DQtH}8cfMJv}N3*WpK zo)oQIlPrAmUU*Wpa!s=E&3oZV->tOpEeqeY@T6$vnq=Xd_rjB+m1~lPZ{7<}idL>k z7QT5eJSkeaCRzCAz3`;(R$BO$g>PDTQnYeSvhdA&;YrcTHOay^?}aBtE7v3o-@F%| z6s=s7EPV4`c+zLKQQZqavhc%u;Yrb|cUF!p{P13QQnc!wl_Lv3yceDnt$JtW z$iffrg(pR;-dQ=a@Iwnv`fjC#A6fX}z3`-H<(g#ShxfvhqLpisg&*DvPl{HqNfv&1 zFFYw)xh7fop@k=Xx6;CoEd20Zcv7@-O|tOAd*MmZ$~DQt5ATI1MJv}N3qQOUo)oQI zlPvtu!jryRY2il}et0iDDO$NES@_|-@T6$vnq=XJ_rjB+m1~lPAKnX3idL>k7Jg{q zN#Cus@FNR9yceDntz45V{P13QQnYeSvhc%u;YrcTHOayc?}aBtE7v3oKeX_q?^as) zk%b@L3r~txu1OYtcrQFDTDc}!_~E_qq-f=uWZ{SR!jqzvYm$W@T6ofTD=qxU!Vm9- zCq*mQBnv;h7oHTYT$3#P@LqUQv~o?d@WXrINzuwR$-)mUJn6fY7Jg*mhxfvhqLpis zg&*DvPl{HqNfv&1FFYw)xh7fo;l1#rXyuw@;fEHU^xaAeKeF({d*MmZ$~DQt5ATI1 zMJv}N3qQOUo)oQIlPvu3UU*Wpa!s=ELkmy(Zl#4ES@_|-@T6$vnq=XJ_rjB+m1~lP zAKnX3idL>k7JhgyJSkeaCRlj=kcEGyuhtvF!aq|S#XBqa=zC2!#F{=+wBntWd-T1g z8)8kLDO&N)%02pC(+#nv&lIhAXXPG!ujz(Z(`Wi_m4(+2SojUOCRq4A$~D2l?@_J^ z7JiR%O|bBLlxu>8-=ka;Ec_njnqcAgDAxoFzel+ySa|)Ag@2~+AR!tf2Qv> z$-?hZtw|Psk7`Z27k-awO|tNNRNpu0UidxwzL9c6t_c=?k8(}0@OzYNf`#9sToWw3 ze#pW<)AyQW;rFQ4Bn!VswI>0sgaDE~|c3%^JCT{tZK z9_4r8u<(17-#5X+?@@l=1Pi}M`IQb9evk4i9W1?Wvq=_ykLuZ^d*Syeo=t@op7gnk z7G7xKNzuwR$-)aQJSkeaCRuo)g(pQT*CY!swD6?wR$6$$z3^IS;YrcTHOayYEj%e& zxh7e7p@kKXT$3!k z(880Vm1~lP7g~7IcPlNt;9htwwD6>8<(g#Sg%+L^tz45VywJjvqLpisg%?_QQnYeS zvhYF+Px@}9g%{iluZ0$#6s=s7EWFUdlcJStl7$yqcv7@-O|tMp3r~txu1OYNXyHlU zt+eohd*QXv!jqzvYm$W*T6j{la!s=ELJLodR<21FUTEP-(aJT+!V4`t>ARH{US#2g z7M>KXT$3!k(880Vm1~lP7g~5yv~o?d@InhuidL>k7G7xKN#Cus@Pd2cwa~(oqLpis zg%?_QQnYeSvhYF+Pl{HqNfusc;YrcTHOayYEj;P7_1^RS^4I?4|Nrr4fA6P1{{8R% z;cx$?pZ?^#_rCbOo}Cm&^*-Ib5UQhE)4lztqk5n2UiH#Zt?AzD(NVomcQ0}1sMd7f zlh;wbPj}yd)lseKz89yXdY|sTwSuFx@G1+hwD6?&$G7i4KmX-hpF917pZx6atd#e{ zuZ!v^*CY$SrlJ4Kk%iws!1Acp#CzfQ&ocV8a!s=E%6s8S?~m8N9IilfGLC=v9DT3Ft}D$}f)s^h!WaidL>kfL;mcNzuwR3D7G6JtlcJSt5};QCdQ!ANxh4U6C7>roE7v4IuLSg@Xyuv& z=#_w;^x4`7=t*%@fZhb??GpK$?USNakfZlkSJSkeaCINaQpeIEu*Car1yiA@Htz44;z40=6 z(swHXy$R480X->Nxh7e7qlG6$E7v3oZ?y2FXyuw@;f)ra6s=s7EWFXelfGMN;Y}9a zXyHlG$~DQt8!bF3TDc}!c%y|UMJv}N3vaaWq-f=uWZ{h#p7h;H3vaUUMhj1hR<21F z-e}=T(aJT+!W%6-DO$NES$Ly`Cq*mQBnxk}@TBinT6mL%H(Gd7v~o?d@J0(yidL>k z7T###NzuwR$-)~gJSkeaCRuo+g(rQt(!!f8ywSpwqLpisg*RGwQnYeSvhYR=Pl{Hq zNfzE{;YrcTHOayoEj;PFl@{J);f)ra6s=s7EWFXelcJStl7%-~cv7@-O|tMt3r~tx zu1OZ&XyHlUt+ena3vaaWq-f=uWZ{h#o)oQIlPtW^!jqzvYm$XGT6j{la!s=EMhj2+ zZl#4cS$Ly`Cq*mQBnxk}@T6$vnq=XP7M>KXT$3!k(ZZ9Wm1~lPH(GeoXKSa0C&f`& zc$bBDT6j{l%EG%Wywk#yqE**_yDYq4?S2bARH{-euvP7M>KX zT$3!k)54RYm1~lPcUpK-v~o?d@Jk7T#&$N#Cus@GcAQwD6>8<(g#Sofe)H ztz45Vywk#yqLpisg?CzbQnYeSvhYp|Px@}9g?Cwar-dg)E7v3o@3iowXyuw@;hh$q z6s=s7EWFdglcJStl7)9#c+z((ExgOZJ1smZTDc}!c&CLYMJv}N3-7e>q-f=uWZ|6_ zo)oQIlPtW`!jryRY2jTK-f7`U(aJT+!aFTIDO$NES$L;~Cq*mQBn$7f@T6$vnq=Xf z7M}FoN(=9@@Jk7T#&$NzuwR$-+A=JSkeaCRuo=g(pQT*CY$?wD6?wR$6$M zg?CzbQnYeSvhYp|Pl{HqNfzE|;YrcTHOay|Ej%e&xh7e7r-dhdx6;D9EWFdglcJSt zl7)9#cv7@-O|tM#3r~txu1OZ&Y2iuH$~DQtJ1sovvvttIlj5i>e8|EFEj%e&W#K~> zK4{@d(JBicvhYC*Pl{Gq_>hGUT6j{l>Z34*EPT+ylfGMN;X@Wa_`xbk(aJT+!UsQC zB`I3DCRzC42dgASE7v3oAN*jIq-f=uWZ{Drp7h;H3m>xZ!OuuZidL>k7Cx?azu&t` zidL>k7Cx?at5s{Fg%4TyxZ15&t%(*sWZ{Drp7i}YObZ{f@Nu;pzgAiJpoJ&J3+9?+ z;e!^Q6s=s7EPT+ylcJStl7$aic+z((Equtr2Q54)TDc}!_@IR+MJv}N3m>%bq-f=u zWZ{Dro)oQIlPrAD!jryRY2iZ_K4{@d(aJT+!UrupDO$NES@@uZCq*mQBnuz3@T6$v znq=XF7M}FoN(&#d@IebtidL>k7Cva_fDT6j{la!s=ENefSkR<21FK55}e(aJT+ z!Y3^}DO$NES@@)dCw;fl!lx{J(!!IXm1~lP&#T>UflrE7u1OX?uXd|dYodiuS@^u# ztyZmx7CvR+lNO%z{X0wxpR(|IwHv=yS@@)dC&dfqnq=XV7M>KXT$3z((!!IXm1~lP zPg;1=cPlM?%EBitJSkeaCRzBTg(pQT*CY#{wD6>8<(g#SlNO#7tz45VeA2>`zFTSG zQx-mH;YrcTHOay!Ej%e&xh7foq=hF%E7v3opS19#Xyuw@;gc4g^xaAepR({t3r~tx zu1OX?Y2iuH$~DQtCoMcFTDc}!_@sp=MJv}N3!k*`r0-T*_>_fDT6j{la!s=ENefSk zR<21FK55}e(aJT+!Y3^}DO$NES@@)dCw;fl!lx{J(!!IXm1~lPPg;0Vv~o?d@JS0# zidL>k7Cvd=NzuwR$-*ZsJn6G_(ZZABs4RTR!WS((DOx22OBTLp;YradAy~5TMGH@g zR(aEsg)droQnbpOmMnbH!jryRY2iy2zG&e|(aJT+!WS((DO$NES@@!bCq*mQBnw}( z@T6$vnq=XN7M}FoN(*1I@I?zxidL>k7QSfVNzuwR$-);cJSkeaCRzBRg(pQT*CY#H zwD6?wR$BOyg)droQnYeSvhYOk7QU`_t5s{Fg)dq7y4tN)t%(-CWZ{bzp7i}YObcJK z@O8BtzgAiJqJ<~L3+9?+;fofY6s=s7EPTk7QSfVNzuwR$-);cJSkea zCRzBRg(pQT*CY#HwD6?wR$BOyg)droQnYeSvhYOk}(aJT+!Z$5EDO$NES@@=fCq*mQBn#iP z@TBinTKJZQZ(4X#v~o?d@J$O(idL>k7QSiWNzuwR$-*}+JSkeaCRzBVg(rQt(!#ea zeAB{{qLpisg>PDTQnYeSvhYm{Pl{HqNfy3o;YrcTHOay^Ej;PFl@`8b;hPqo6s=s7 zEPT_#lcJStl7(+tcv7@-O|tM!3r~txu1OZYY2iuVt+enh3*WTxq-f=uWa0a2_gmnT zqLpish3~7~YSo%(;ae8IuXd|dYodj3S@@=fCw>18)55nbd|&OxuT>VlY2iung1IJH z_@;#?MJv}N3*WTxq-f=uWZ|0@p7h;H3*WNvO$$$oR<21FzG>k}(aJT+!Z$5EDO$NE zS@@=fCq*mQBn#iP@TBinTKJZQZ(4X#v~o?d@J$O(idL>k7QSiWNzuwR$-*}+JSkea zCRzBVg(rQt(!#eaeAB{{qLpisg>PDTQnYeSvhYm{Pl{HqNfy3o;YrcTHOay^Ej;P7 z_0YnT;;1bA$ifdTJSkfBUHFlOA6j@)wCcO?BMU#Y@T6$fpXrV){LsRaqE&yUJF@UY z3s3rPrG+0^_@RX-MJv}N3qQ2*q-f=uWZ{Pvo)oQIlPvtu!jqzvYm$W@T6ofTD=qxU z!VfJxDO$NES@@xaCq*mQBnv;Z@T6$vnq=XJ7M>KXT$3#P(880xTWR4(7Jg{qNzuwR z$-)mUJSkeaCRzBQg(pQT*CY!+wD6>8<(g#ShZdgn-AW5TvhYI-Pl{HqNfv%+;YrcT zHOaycEj%e&xh7fop@kxcv7@-O|tMq3r~txu1OYtXyHlG z$~DQt4=p??TDc}!_@RX-eYeuWk1YJq!jqzvYm$YZtKDybPl{HqNfv&tcB@ruqJ z__^AxR;`H^eq`Z?7M}F|J4_2dvhZ`Y8^2ar_@RX-#S7+|WZ{Pvo)oQIlPvtu!jqzv zYm$W@T6ofTD=qxU!VfJxDO$NES@@xaCq*mQBnv;Z@T6$vnq=XJ7M>KXT$3#P(880x zTWR4(7Jg{qNzuwR$-)mUJSkeaCRzBQg(pQT*CY!+wD6>8<(g#ShZdgn)!IH};h!mv z!ou&-_nK~qHGQUN#rIA3=zC2!#F{=+wBq}wd-T1g8)8kLDO&Me_&xew(+#nv&lIir zF8m(Vnq=Ya0~UTmt_c=?k8(}0@OzYNf`#9sToWw(9_5-~;rA%l1Pi}Mxh7cnJ<2t~ z!tYV82^QWyWZ|Fbd&9EudsNRRS@=Er{xjVTxh7cnJ<2t~!tYV82^M~ja!s)Cdz5Q} zh2NuG6D<55<(gpO?L!v+nZDN~3%^ITCRzABsx@8he*SUr9@SSmS@=Er7J?gcO|bBL zlxu>8-=ka;Ec_njnqcAgDAxoFZy&Po&-A?}S@=DwHOa#7QLX7}H`W9TzdtXZO|bBL zlxu>8-=ka;Ec_njnqcAgDAxoFzel+ySa|!8g@2~+HOa#7QLRZ9evfKR`izu&RL>?^ z_&us;lRhKm9@Tf@vhaKKt(Q0CnqcAgDAxoFzel+ySa|!8g@2~+HOa#7QLRZ9evfKR zx)*+rYE826dsJ)Ez3_WfYm$ZEqgs15&e=-a|?$Th*j+lMUtGkvc~7JiRv zO|tNNRBMuj-=kX7)oy%|goWRqmtW~%;rA%N(!s*-Q9hes;rA$?O|bBLlxu>8w+~tP zXZl`~Ec_nTnyz-^-zzNq{=EFY2^M~ja!s)Cdz5Q}h2NuG6D<55<(gpO_bAr{3%^IX zCRlj;kcEGy?={K7?@_JkYB$yd3%@@v*8~f{N4X|g_&v%s!NTuRt_c=?k8(}0@OzYN zf`#9sT$3!k(87~G*Ho@{zbBCtN42Jcd*Q8I?N%$k(iK{GQoLZTsmQ_$Ej%e&wWcBq zFSPKaXw{mEEWFUdlfGMN;YAi+XyHlG$~DQt3oSe;TDc}!c%g+SMJv}N3oo?rq-f=u zWZ{Jtp7h;H3oo+pLJLodR<21FUTEP-(aJT+!V4`tDO$NES$Ls^Cq*mQBnvOJ@TBin zT6mF#7g~5yv~o?d@InhuidL>k7G7xKNzuwR$-)aQJSkeaCRuo)g(rQt(!z@@ywJjv zqLpisg%?_QQnYeSvhYF+Pl{HqNfusc;YrcTHOayYEj;PFl@?xP;e{5S6s=s7EWFUd zlcJStl7$yqcv7@-O|tMp3r~txu1OYNXyHlUt+enW3oo?rq-f=uWZ{Jto)oQIlPtW@ z!jqzvYm$W*T6j{la!s=ELJLp&Zl#46S$Ls^Cq*mQBnvOJ@T6$vnq=XH7M>KXT$3!k z(880Vm1~lP7g~7IcPlNt$ifRPJSkeaCRuo)g(pQT*CY!swD6>8<(g#Sg%+L^tz45V zywJjvzFTSGMHXIY;YrcTHOayYEj%e&xh7e7p@kKXT$3!k(!!IXm1~lPS6X;dv~o?d@Jb6$`fjC#S6O(a zg(pQT*CY$CwD6>8<(g#Sl@^{9tz45VywbvxqLpisg;!d5(swH@yvo8WEj%e&xh7e7 zrG+O&E7v3oue9)_Xyuw@;guGi6s=s7EWFaflfGMN;Z+u1Y2iuH$~DQtD=j=JTDc}! zc%_9WMJv}N3$L{Bq-f=uWZ{(-p7h;H3$L>9N()blR<21FUTNV;(aJT+!YeI2DO$NE zS$L&|Cq*mQBnz*!@TBinT6mR(S6X;dv~o?d@Jb6$idL>k7G7!LNzuwR$-*lwJSkea zCRuo;g(rQt(!#4OywbvxqLpisg;!d5QnYeSvhYd^Pl{HqNfusd;YrcTHOay&Ej;PF zl@?xQ;guGi6s=s7EWFaflcJStl7&}Vcv7@-O|tMx3r~txu1OYNY2iuVt+ene3$L{B zq-f=uWZ{(-o)oQIlPtW_!jqzvYm$XmT6j{la!s=E`tpOUq<3lSor;$Cp?NZ-M?nQqxg)Jduv}u zW#Nq$p7cFLT6mL%H-1J+QnYeSvhc>wNJ)xTu1OZ&_!%il(aJT+!W%y$CF#4B7T#px zjTW91t$a4g!W%y$B`I3DCRupnXQU)WE7v3oZ~Tmuq-f=uWZ{jUk&^V?N(*nY@J0(y zidL@aTKFIT^*{b^{>K0PfBokF`49Xfzx2fy{d{pY{+FaHbQ|LR9yef`59eCNwI zzWl=d=jZbCfBEK@fBTzXzI^Y?{V#s;{>N|Me}4YUw|?)ZKmPsi{=rXv_TBqwXZI7r z?hieI1?Y`{o)oS8@+d%W1oWh6<(EeRdLy7GMJvBN3eXz?J?XoZfZhb?jewpMtz44; zy%ErpqLphBpf>_~QnYeS0`x{ePl{HqNr2u6=tI0gDO$NE0eT~#Cq*mQBtUNj^rUFzngr;LfSwes zT$2F35zv#qTM6h*fZhn`NzuwR3D6q>JtNxh4U6BcLZmE7v4I?*#Ou&(=;rPl}^r zg|5rwoq(PctwO4<%jBJao)oP@s;k zfZhq{NzuwR3D7$MJtU!u!>3wQ5bY@GcAQwD6?w-(gyKmxcGM-T1Z2!aFTIDPAzw zBn$7f@T6$vnq=Xf7M>KXT$3!k)54R!TWR547T#&$NzuwR$-+A=JSkeaCRuo=g(pQT z*CY$?wD6>8<(g#Sofe+--AW7ZvhYp|Pl{HqNfzE|;YrcTHOay|Ej%e&xh7e7r-dg) zE7v3o@3iow?^arPmxXs)cv7@-O|tM#3r~txu1OZ&Y2iuH$~DQtJ1smZTDc}!c&CLY zeYeuWyDYrZ!jqzvYm$X`T6j{la!s=EP76ARH{-euvP z7M>KXT$3!k)54RYm1~lPcUpK-v~o?d@Jk7CvaJGB?LnjK4{@d z(JCPrvhYC*Pl{Fv!H|UyT6j{l%A1BPe9*#^zFTSGLl!=0;YrcTHOayUEj%e&xh7fo zpoJ$zE7v3oAGGkKXyuw@;e!^Q^xaAeAF}X43r~txu1OX?XyHlG$~DQt2Q54)TDc}! z_@IR+MJv}N3m>%br0-T*_>hGUT6j{la!s=EK?_fcR<21FK4{@d(aJT+!UrupDO$NE zS@@uZCw;fl!iOw;(880Vm1~lP4_bIqv~o?d@Nu>KE$~Uv$~DQt$JK7NYE888AqyW@ zyVa^S(ZYu;e9*#^zJG^l;X@Wau6E8<(g#SgBG3?tz45Ve9*#^qLpisg%4VI(swH@ ze8|EFEj%e&xh7fopoJ$zE7v3oAGGkKXyuw@;e!^Q6s=s7EPT+ylfGMN;X@WaXyHlG z$~DQt2Q54)TDc}!_@IR+MJv}N3m>%bq-f=uWZ{Drp7h;H3m>xZK?_fcR<21FK4{@d z(aJT+!UrupDO$NES@@uZCq*mQBnzLk@TAYyNefSkqxv)5l!Z@Pcv7_L&va83K55}e z(W*bwOtOpDGQ&p@T6$vnq=XV7M>KXT$3z((!!IXm1~lPPg;0Vv~o?d z@JS0#`fjC#Pg(e+g(pQT*CY#{wD6>8<(g#SlNO#7tz45VeA2>`qLpisg-=>|(swH@ ze9FQnEj%e&xh7foq=hF%E7v3opS19#Xyuw@;gc4g6s=s7EPT?!lfGMN;ZqhqY2iuH z$~DQtCoMcFTDc}!_`KTv7WkxS<(g#S^J=$RwI*8ll!ec$-D=gEXyH>9K55}e-@n7O z@F@$QSG)0Rm4#1Qcv8Gzu1OX?Y2iuH$~DQtCoMcFTDc}!_@sp=eYeuWr!0KZ!jqzv zYm$XeT6j{la!s=ENefSkR<21FK55}e(aJT+!Y3^}>ARH{K4syP7M>KXT$3z((!!IX zm1~lPPg;0Vv~o?d@JS0#idL>k7Cvd=N#Cus@F@$QwD6>8<(g#SlNO#7tz45VeA2>` zqLpisg-=>|QnYeSvhYOtOpB@17)@T6$vnq=XN7M>KXT$3z((ZZ9Wm1~lPFIsp~v~o?d@I?zx`fjC# zFIo7ag(pQT*CY#HwD6>8<(g#Six!>~tz45Ve9^*_qLpisg)dro(swH@e96KWEj%e& zxh7foqJ<|#E7v3oU$pR~Xyuw@;fofY6s=s7EPTARH{zGUHx7M>KXT$3z((ZZ9Wm1~lPFIsp~ zv~o?d@I?zxidL>k7QSfVN#Cus@FfdhwD6>8<(g#Six!>~tz45Ve9^*_qLpisg)dro zQnYeSvhYm{Px@@#wD6=js_&b&EPT_#lcH7MH*Hz?riCX(tG;jAvhYm{Pl{H37rtfT zn--q*-AW7JvhYm{Pl{HqNfy3o;YrcTHOay^Ej%e&xh7foriCX(E7v3o-?Z?g?^as) zmW6Lxcv7@-O|tM!3r~txu1OZYY2iuH$~DQtH!VCVTDc}!_@;#?eYeuWw=8_q!jqzv zYm$X;T6j{la!s=EO$$$oR<21FzG>k}(aJT+!Z$5E>ARH{zGdN?7M>KXT$3z()54RY zm1~lPZ(4X#v~o?d@J$O(idL>k7QSiWN#Cus@GT49wD6>8<(g#Sn--oFtz45VeAB{{ zqLpisg>PDTQnYeSvhYm{Px@}9g>PB-riCX(E7v3o-?Z?gXyuw@;hPqo6s=s7EPT_# zlcJStl7(+tc+z((Equ$uH!VCVTDc}!_@;#?MJv}N3*T3}-vXZ$tz45Vd|&NWtJXvd z-?H$1wOg%P6D@qp!Z$5E>HBw>7QSWS`)W6St+Mb<3r~s{%r(ivH!VCVTDc}!_@;#? zMJv}N3*WTxr0-T*_?CrlT6j{la!s=EO$$$oR<21FzG>k}(aJT+!Z$5EDO$NES@@xa zCw;aaT6j_%)mOSB3qQ2*q-fPwx+4oewD6>8)mOSB3qQ2*q-fRmO-B}fXyHlUt+enX z3qQ2*q-f=uWZ{Pvo)oQIlPvtu!jqzvYm$W@T6j{la!s=ELkmy(Zl#4ES@@xaCq*mQ zBnv;Z@T6$vnq=XJ7M>KXT$3#P(880Vm1~lPA6j_QcPlOY$ifdTJSkeaCRzBQg(pQT z*CY!+wD6>8<(g#ShZdd`tz45V{LsRazFTSGM;3l);YrcTHOaycEj%e&xh7fop@kqMlT6j{la!s=ELkmxeR<21FerVxI(aJT+!VfJxDO$NES@@xa zCw;fl!jCNc(880Vm1~lPA6j@)v~o?d@IwnvidL>k7Jg{qNzuwR$-)mUJn6fY7Jg*m zhZdd`tz45V{LsRaqLpisg`cb4Z-GyWR<21Fey(<_RcoS!A6fXh+O1Zti57lj;fEHU z^!+qR-J|a{-4JW~Owo#G(>?lL(+#nv&lIirN_UUG*K|Xy z=`($|%EJ2xEc}LC6D<55<(gpO_bAr{3%^IXCRq4A$~D2l?@_J^7JiR%O|bBLlxu>8 z-=ka;EWCfn!avjZ5M|-_==&?(4Y?**_&v%s!NTuRt_c=?k8(}0@OzYNf`#9sToWw( z9_5-~;rA%l1PkvUvhdIJy(U@sJ*qWb?S6jqbC2pPohj=vf2Qv>$-?hZt?6nv)&vW`KQGq=3%^IX zCRq4A$~D2l?@_J^7JiR%O|bBLlxu>8-=ka;EWCfn!avjZnq=YksMd6~8*74v-=CLj zf`#9sToWw(9_5-~;rA%l1Pi}Mxh7cnJ<2t~!tYV82^QWzWZ|Fbdrh+NdsJ)EXQbSt z`o2jPevj(=CVfWAJt`rPh2NuZy}TjU1Pi}Mxh7cnJ<2t~!tYV82^QWzWZ|Fbdrh+N zdsJ)Ez3_WfYm$ZEqgs>hh2NuElPvrm)tYoK{2rAz$-?i^w}s!3Yl4N}qg)g2h4&9x z_-FcFlPvrm)tY4C_o&t+3%^ITCRzABsx@8h#+PPT`2BhLY=VW~qkJ~O!tYV82^M~j za!s=ELJLp&TvMTiC&f{H-&ADbg%+L^t@^&H$ifRPJSkeWrXmY3wD6>8)tZVdywJjv zzFTSGMHXIY;YrcTHOayYEj%e&xh7e7p@kKXT$3!k(880Vm1~lP7g~7IcPlNt;9htywD6>8<(g#Sg%+L^ ztz45VywJjvqLpisg%?_QQnYeSvhYF+Px@}9g%{il?}Zkg6s=s7EWFUdlcJStl7$yq zcv7@-O|tMp3r~txu1OYNXyHlUt+eohd*Qv%!jqzvYm$W*T6j{la!s=ELJLodR<21F zUTEP-(aJT+!V4`t>ARH{UT`nG7g~5yv~o?d@InhuidL>k7G7xKNzuwR$-)aQJSkea zCRuo)g(rQt(!vYwh4(@WPl{HqNfusc;YrcTHOayYEj%e&xh7e7p@k8<(g#Sl@^{9tz45VywbvxqLpisg;!d5(swH@yvo8WEj%e& zxh7e7rG+O&E7v3oue9)_Xyuw@;guGi6s=s7EWFaflfGMN;Z+u1Y2iuH$~DQtD=j=J zTDc}!c%_9WMJv}N3$L{Bq-f=uWZ{(-p7h;H3$L>9N()blR<21FUTNV;(aJT+!YeI2 zDO$NES$L&|Cq*mQBnz*!@TBinT6mR(S6X;dv~o?d@Jb6$idL>k7G7!LNzuwR$-*lw zJSkeaCRuo;g(rQt(!#4OywbvxqLpisg;!d5QnYeSvhYd^Pl{HqNfusd;YrcTHOay& zEj;PFl@?xQ;guGi6s=s7EWFaflcJStl7&}Vcv7@-O|tMx3r~txu1OYNY2iuVt+ene z3$L{Bq-f=uWZ{(-o)oQIlPtW_!jqzvYm$XmT6j{la!s=EN()c=Zl#4+S$L&|Cq*mQ zBnz*!@T6$vnq=XX7M>KXT$3!k(!!IXm1~lPS6X<|XKSN{C&f`&c$0-UT6j{l%EFs0 zywSpwqE&y(pvl79)$X^zCq=9NmO+z+x2xT1#b=~6T6og;3#NrPS$Ly`Cq=8CO-&Zw zXyHlGs%KM^g*RGwQnYeSvhYR=Pl{HqNfzE{;Yr`EwD2YiZ?y2FXyuw@;f)ra6s=s7 zEWFXelcJStl7%-~cv7@-O|tMt3s3rPrG+KX zT$3!k(ZZ9yTWR4<7T###NzuwR$-)~gJSkeaCRuo+g(pQT*CY#XwD6>8<(g#SjTWBt z-AW5@vhYR=Pl{HqNfzE{;YrcTHOayoEj%e&xh7e7qlG6$E7v3oZ?y2F?^arPlZ7{0 zcv7@-O|tMt3r~txu1OZ&XyHlG$~DQt8!bF3TDc}!c%y|UeYeuWn=HK1!jqzvYm$XG zT6j{la!s=EMhj1hR<21F-e}=T(aJT+!W%6->ARH{-elp87M>KXT$3!k(ZZ9Wm1~lP zH(Gd7v~o?d@J0(yidL>k7T###NuRCv^Pgd+NpVybem^K!N42K=sjxaK3%?%$s-s%d z{k%sVm4)9A_tR0W>3))%j>^LC$9Czc)^tCkNk{dGJNE;5aFiC_W#OF`p7j3s_WkGQ zzkKWWe){9z|L!0Br1!^TO;@||?^PDwY2iuH$~DQtJ1smZ zTDc}!c&CLYMJv}N3-7e>r0-T*c$bBDT6j{la!s=EP76q<`+e|37}~&-|@_?r;9P|Lwo= z=YQ#+{@efE*Z=ER|Je7x`q5Wk|L_Oj`SRtD?stNJ^iSkBar4}wF0K6sftDOyFILx4VbnLH_4MV>=|K6sftDOv^SLx4VbnLH_4 zMV>=|J_zVZ->n4nAwVAl^rUFznsk|b5YUsNm1`294+45pv~o=X^l^#&{io)nXyux8 znS2n?lfGLC=tF=$2Ru)fj_0s0`ICq*mQBtRbo^rY`r z0{Rf34+45pwCek&AwVAl^rUFzngr;BfSwesT$2EO5YUsNm1`294+479cPjyX2+#)s zJt%bq-f=u zWZ{Dro)oQIlPrAD!jqzvYm$WzT6ofTD=mD;!UrupDO$NES@@uZCq*mQBnuz3@T6$v znq=XF7M>KXT$3z((880xTWR4#7Cva8 z<(g#SgBG6j-AW4|vhYC*Pl{HqNfth6;YrcTHOayUEj%e&xh7fopoJ$zE7v3oAGGkK z&(=u`Pl}_m@F@$QwD6>8l{ZaU_@sp=MXU63%EBitJSkeGms1u#Y2iuHD!rVt@JS0# z`fjC#Pg(e+g(pQT*CY#{wD6>8<(g#SlNO#7tz45VeA2>`qLpisg-=>|(swH@e9FQn zEj%e&xh7foq=hF%E7v3opI5uz0-qGET$3z(UhP(^)U4Y2iung7tmVl!Z@Pcv7@-O|tMw3r~txu1OX?Y2iuVt+end3!k*`q-f=uWZ{z* zo)oQIlPrAF!jqzvYm$XeT6j{la!s=ENefTKXT$3z((!!IXm1~lPPg;1=cPlM?%EBitJSkeaCRzBTg(pQT*CY#{wD6>8<(g#S zlNO#7tz45VeA2>`zFTSGQx-mH;YrcTHOay!Ej%e&xh7foq=hF%E7v3opS19#Xyuw@ z;gc4g^xaAepR({t3r~txu1OX?Y2iuH$~DQtCoMcFTDc}!_@sp=MJv}N3!k*`r0-T* z_>_fDT6j{la!s=ENefSkR<21FK55}e(aJT+!Y3^}DO$NES@@)dCw;aqT6j_%m4z=^ z_@ad;MXS7N$-);cJSke`O-mNOXyHlGDsNh{@I?zxidO06l7%l?c+z((Equws7cD#~ zTDc}!_@ad;MJv}N3tzPGq-f=uWZ{bzo)oQIlPrAE!jryRY2iy2zG&e|(aJT+!WS(( zDO$NES@@!bCq*mQBnw}(@T6$vnq=XN7M}FoN(*1I@I?zxidL>k7QU`_zXd)iTDc}! z_`2GyR;`H^zGUI+YPVXoCR+HCg)dro()aH$Equws7e81fDO$NES@_~-q$EWv*CY#H z{EU>OXyvm>7QXlyDM``FXOk>^(ZZ9yTWR4-7QXlyDM``FHOa!))o%RXt1Nuc!js|! z^VuW|U$pR~Xyvm>7QSfVN#Cus@FfdhwD6>8l@Kgh_@ad;MJv}N3tzPGq-f=uWZ{bz zo)oQIlPrAE!jryRY2iy2zG&e|(aJT+!WS((DO$NES@@!bCq*mQBnw}(@T6$vnq=XN z7M}FoN(*1I@I?zxidL>k7QSfVNzuwR$-);cJSkeaCRzBRg(pQT*CY#HwD6?wR$BOy zg)droQnYeSvhYO9#C<)54R!TWR527QSiW zNzuwR$-*}+JSkeaCRzBVg(pQT*CY$ywD6>8<(g#Sn--q*-AW7JvhYm{Pl{HqNfy3o z;YrcTHOay^Ej%e&xh7foriCX(E7v3o-?Z?g?^as)mW6Lxcv7@-O|tM!3r~txu1OZY zY2iuH$~DQtH!VCVTDc}!_@;#?eYeuWw=8_q!jqzvYm$X;T6j{la!s=EO$$$oR<21F zzG>k}(aJT+!Z$5E>ARH{zGdN?7M>KXT$3z()54RYm1~lP@2lN!flrE7u1OZYuXd|d zYodj3S@@=fCw>1~Y2jNIzOQ!U*D4F&wD6>O!4iTk3*WTxq-f=uWZ|0@o)oQIlPrAG z!jryRY2jNIzG>k}(aJT+!Z$5EDO$NES@@=fCq*mQBn#iP@T6$vnq=Xd7M}FoN(k7QSiWNzuwR$-*}+JSkeaCRzBVg(pQT*CY$ywD6?wR$BO$g>PDTQnYeS zvhYm{Pl{HqNfy3o;YrcTHOay^Ej%e&xh7foriCYcwjNq|QXG|qA6fXJg(pR;z6(FH z@IwnvidKCWeq`Z?7M>KX`Y!y)!VfJxDO&Ysx+4oewD6?wR$BOxg&$gYQnYeSvhYI- zPl{HqNfv%+;YrcTHOaycEj%e&xh7fop@k=Xx6;CoEd0>IlcJStl7$~ycv7@-O|tMq z3r~txu1OYtXyHlG$~DQt4=p_DyOkDxWZ{Pvo)oQIlPvtu!jqzvYm$W@T6j{la!s=E zLkmxeR<21FerVxI->tOpBMU#Y@T6$vnq=XJ7M>KXT$3#P(880Vm1~lPA6j@)v~o?d z@Iwnv`fjC#A6fXJg(pQT*CY!+wD6>8<(g#ShZdd`tz45V{LsRaqLpisg&$gY(swH@ z{K&!&Ej%e&xh7fox!U~}_@rp%nq=YUYPVXoCR+HBg`cb4YSo%(;YSvJXyHlUzr(cf zBMU!QyYXw4g&$gYQoLZUNfv%+;YrcTHOaycEj%e&xh7fop@k=Xx6;CoEd0>IlcJSt zl7$~ycv7@-O|tMq3r~txu1OYtXyHlG$~DQt4=p_DyOkDxWZ{Pvo)oQIlPvtu!jqzv zYm$W@T6j{la!s=ELkmxeR<21FerVxIfA|0W*5CLa|Kz{=oB!Eg`QQKgU;LFHef9MZ zfABl^mHg`c=lg&1r$4^=<;(Z(fBy25zmOk)^{cPG`uaD%&j0+i@81jUzxliKcYgW& z#;^SA|NLM0*M8J~`J?>KkALHz{p}zAOTSwG)qmyJfBA=hp#jjLI2bJ=WqSJMgLFQK7V}U%h%uj^8ZaJqckN` zH#@ab*8m0()geq712(0GKmb?zAFPy#p#`f_BOpLi%FJ*Kt5RbiKvT-X$icwS5QyPc z6Cgk+Rj|CMgXDJ^po1j9j$&y%1Pf$v24`Stj08$#awp~F=jXa*=9HusDW;TixOw`x zdiuEm1CEguh#AoWj!`MVDb(#ZBbOwXptoyee3+w8sB3(fYf!MKzn>5b&_qTqc`jjJ z$B1}eSKr_e{~*`+kl=WrQX!!8ApoQ|BP}y0RnIZZ9V8X+;|kF$$0f}rjF5sD4wDq& zQs5GSNID0(I)(rZ_Vfjrj%2bv7cWGGk7IBM$WqMULP@Y(MqIL>sE-HhiZ4kmF3HSG z*Gn!fF3Hc02c`!hL9jGbpe!+`G?gc!FDWDll#G>B{5gKN3JDx)7LvHmsIYgbkOr#( zqeX|h1EU8+Ku426lx}`ex?W;IVsb{RUPfX{etv;oMp9yNs-AahWf;gX74I@X zqQsK?q6A&1yCN+Mm~ zBO8uz)M-V%Uc9M!PVn7FVOJk}1-+V-`0|)-W?#*=@{IdY#!v4&vx4XyB?oN?9C-i5 z{CGXv6p^WH3^k7~HhQ{51o``i7y%tD0dh22ieNBBqzGoXr(PO9EI`tz3DXQrK|;`^ z#NdX|n4HkSsHXUC$(%obVz!@|GwF&F!>x$>y&(I3^RLx)^Yn4`^m79`fe~mD;}SF{ zFd~^$>v!I-*+8N_qvz)%$AjG;7kt|zaelXAS=!ATJO+<8>F_q6GYwK`Jvy;9{G;lg z-#OcwZu@<^RI~rTe*ffq&jn)+9%bS8*OvRM{04+ z*#_T3F7H>*f4E68S3G*(n|Ib(2ONJ|&iPpI_>zY3$A&Ps<_%v2eqMXHq$6xf`?2Z! zT$jdl?DF`d6~Mk!F}z$e;$EDqoEz7o*%O{`+H2@tbTUIoe1-Mxe>*!)*qyLmt8(+1 zn2|=Ub7}f=&6$7xJ-lohAFwg_jDWZI)|9y_8FA-d%lB*Vj#mo$Aed1``98W}i`S~fCsUA*=#i`z5&iT0oiA9M(sRW>PqhK@yMnhmU1V%$(Gz3ONU^E0q cLtr!nMnhmU1V%$(Gz3ONU;sh@I6A`&0Fpl#Q~&?~ literal 0 HcmV?d00001 diff --git a/packaging/hudi-metaserver-server-bundle/pom.xml b/packaging/hudi-metaserver-server-bundle/pom.xml index d3f205233016..460d3a0e8bc1 100644 --- a/packaging/hudi-metaserver-server-bundle/pom.xml +++ b/packaging/hudi-metaserver-server-bundle/pom.xml @@ -99,7 +99,7 @@ - + org.apache.hudi:hudi-common org.apache.hudi:hudi-metaserver-server org.apache.thrift:libthrift diff --git a/pom.xml b/pom.xml index b4b93e9bee24..9f99be88feb3 100644 --- a/pom.xml +++ b/pom.xml @@ -41,6 +41,7 @@ hudi-aws hudi-gcp hudi-hadoop-mr + hudi-io hudi-spark-datasource hudi-timeline-service hudi-utilities @@ -127,7 +128,7 @@ 1.6.0 1.5.6 0.9.47 - 0.16 + 0.25 0.8.0 4.5.13 4.4.13 @@ -453,6 +454,8 @@ + org.apache.hudi:hudi-io + io.airlift:aircompressor org.apache.httpcomponents:httpclient org.apache.httpcomponents:httpcore @@ -930,6 +933,13 @@ provided + + + io.airlift + aircompressor + ${airlift.version} + + org.xerial.snappy From a508d54e132c62a91f4f66dd8ca7e950a0cecf7f Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:17:42 -0800 Subject: [PATCH 058/112] [HUDI-6902] Fix a unit test (#10513) fixed a test. --- .../utilities/sources/TestGcsEventsSource.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java index 936a6e45a1bc..5f0343ed5073 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGcsEventsSource.java @@ -143,10 +143,10 @@ public void shouldReturnDataOnValidMessages() { @Test public void shouldFetchMessagesInBatches() { - ReceivedMessage msg1 = fileCreateMessage("objectId-1", "{'data':{'bucket':'bucket-1'}}"); - ReceivedMessage msg2 = fileCreateMessage("objectId-2", "{'data':{'bucket':'bucket-2'}}"); - ReceivedMessage msg3 = fileCreateMessage("objectId-3", "{'data':{'bucket':'bucket-3'}}"); - ReceivedMessage msg4 = fileCreateMessage("objectId-4", "{'data':{'bucket':'bucket-4'}}"); + ReceivedMessage msg1 = fileCreateMessage("objectId-1", "{\"data\":{\"bucket\":\"bucket-1\"}, \"size\": \"1024\"}"); + ReceivedMessage msg2 = fileCreateMessage("objectId-2", "{\"data\":{\"bucket\":\"bucket-2\"}, \"size\": \"1024\"}"); + ReceivedMessage msg3 = fileCreateMessage("objectId-3", "{\"data\":{\"bucket\":\"bucket-3\"}, \"size\": \"1024\"}"); + ReceivedMessage msg4 = fileCreateMessage("objectId-4", "{\"data\":{\"bucket\":\"bucket-4\"}, \"size\": \"1024\"}"); // dataFetcher should return only two messages each time it's called when(pubsubMessagesFetcher.fetchMessages()) @@ -175,9 +175,9 @@ public void shouldFetchMessagesInBatches() { @Test public void shouldSkipInvalidMessages1() { - ReceivedMessage invalid1 = fileDeleteMessage("objectId-1", "{'data':{'bucket':'bucket-1'}}"); - ReceivedMessage invalid2 = fileCreateMessageWithOverwroteGen("objectId-2", "{'data':{'bucket':'bucket-2'}}"); - ReceivedMessage valid1 = fileCreateMessage("objectId-3", "{'data':{'bucket':'bucket-3'}}"); + ReceivedMessage invalid1 = fileDeleteMessage("objectId-1", "{\"data\":{\"bucket\":\"bucket-1\"}, \"size\": \"1024\"}"); + ReceivedMessage invalid2 = fileCreateMessageWithOverwroteGen("objectId-2", "{\"data\":{\"bucket\":\"bucket-2\"}, \"size\": \"1024\"}"); + ReceivedMessage valid1 = fileCreateMessage("objectId-3", "{\"data\":{\"bucket\":\"bucket-3\"}, \"size\": \"1024\"}"); when(pubsubMessagesFetcher.fetchMessages()).thenReturn(Arrays.asList(invalid1, valid1, invalid2)); @@ -198,8 +198,8 @@ public void shouldSkipInvalidMessages1() { @Test public void shouldGcsEventsSourceDoesNotDedupeInternally() { - ReceivedMessage dupe1 = fileCreateMessage("objectId-1", "{'data':{'bucket':'bucket-1'}}"); - ReceivedMessage dupe2 = fileCreateMessage("objectId-1", "{'data':{'bucket':'bucket-1'}}"); + ReceivedMessage dupe1 = fileCreateMessage("objectId-1", "{\"data\":{\"bucket\":\"bucket-1\"}, \"size\": \"1024\"}"); + ReceivedMessage dupe2 = fileCreateMessage("objectId-1", "{\"data\":{\"bucket\":\"bucket-1\"}, \"size\": \"1024\"}"); when(pubsubMessagesFetcher.fetchMessages()).thenReturn(Arrays.asList(dupe1, dupe2)); From 3facb0a25847d4871e3fde36581139567175f84b Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:17:56 -0800 Subject: [PATCH 059/112] [HUDI-6902] Shutdown metric hooks properly (#10520) --- .../scala/org/apache/hudi/DefaultSource.scala | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala index f982fb1e1c31..1685b9abf303 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala @@ -143,16 +143,19 @@ class DefaultSource extends RelationProvider mode: SaveMode, optParams: Map[String, String], df: DataFrame): BaseRelation = { - if (optParams.get(OPERATION.key).contains(BOOTSTRAP_OPERATION_OPT_VAL)) { - HoodieSparkSqlWriter.bootstrap(sqlContext, mode, optParams, df) - HoodieSparkSqlWriter.cleanup() - } else { - val (success, _, _, _, _, _) = HoodieSparkSqlWriter.write(sqlContext, mode, optParams, df) - HoodieSparkSqlWriter.cleanup() - if (!success) { - throw new HoodieException("Write to Hudi failed") + try { + if (optParams.get(OPERATION.key).contains(BOOTSTRAP_OPERATION_OPT_VAL)) { + HoodieSparkSqlWriter.bootstrap(sqlContext, mode, optParams, df) + } else { + val (success, _, _, _, _, _) = HoodieSparkSqlWriter.write(sqlContext, mode, optParams, df) + if (!success) { + throw new HoodieException("Failed to write to Hudi") + } } } + finally { + HoodieSparkSqlWriter.cleanup() + } new HoodieEmptyRelation(sqlContext, df.schema) } From e8f34c3ecd50fc3b5dcc4f491c7817d5ecfb02be Mon Sep 17 00:00:00 2001 From: stream2000 <18889897088@163.com> Date: Fri, 19 Jan 2024 10:12:43 +0800 Subject: [PATCH 060/112] [HUDI-7305] Fix cast exception for byte/short/float partitioned field (#10518) --- .../spark/sql/hudi/TestInsertTable.scala | 37 +++++++++++++++++++ .../Spark3ParsePartitionUtil.scala | 10 +++-- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala index e7324a1354fe..ef62a6947722 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala @@ -2242,6 +2242,43 @@ class TestInsertTable extends HoodieSparkSqlTestBase { }) } + test("Test various data types as partition fields") { + withRecordType()(withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |CREATE TABLE $tableName ( + | id INT, + | boolean_field BOOLEAN, + | float_field FLOAT, + | byte_field BYTE, + | short_field SHORT, + | decimal_field DECIMAL(10, 5), + | date_field DATE, + | string_field STRING, + | timestamp_field TIMESTAMP + |) USING hudi + | TBLPROPERTIES (primaryKey = 'id') + | PARTITIONED BY (boolean_field, float_field, byte_field, short_field, decimal_field, date_field, string_field, timestamp_field) + |LOCATION '${tmp.getCanonicalPath}' + """.stripMargin) + + // Insert data into partitioned table + spark.sql( + s""" + |INSERT INTO $tableName VALUES + |(1, TRUE, CAST(1.0 as FLOAT), 1, 1, 1234.56789, DATE '2021-01-05', 'partition1', TIMESTAMP '2021-01-05 10:00:00'), + |(2, FALSE,CAST(2.0 as FLOAT), 2, 2, 6789.12345, DATE '2021-01-06', 'partition2', TIMESTAMP '2021-01-06 11:00:00') + """.stripMargin) + + checkAnswer(s"SELECT id, boolean_field FROM $tableName ORDER BY id")( + Seq(1, true), + Seq(2, false) + ) + }) + } + + def ingestAndValidateDataDupPolicy(tableType: String, tableName: String, tmp: File, expectedOperationtype: WriteOperationType = WriteOperationType.INSERT, setOptions: List[String] = List.empty, diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/execution/datasources/Spark3ParsePartitionUtil.scala b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/execution/datasources/Spark3ParsePartitionUtil.scala index ebe92a5a32a9..fca21d202a99 100644 --- a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/execution/datasources/Spark3ParsePartitionUtil.scala +++ b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/execution/datasources/Spark3ParsePartitionUtil.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.datasources import org.apache.hadoop.fs.Path import org.apache.hudi.common.util.PartitionPathEncodeUtils.DEFAULT_PARTITION_PATH import org.apache.hudi.spark3.internal.ReflectUtil -import org.apache.hudi.util.JFunction import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.unescapePathName import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} @@ -29,10 +28,9 @@ import org.apache.spark.sql.execution.datasources.PartitioningUtils.timestampPar import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String -import java.lang.{Boolean => JBoolean, Double => JDouble, Long => JLong} +import java.lang.{Double => JDouble, Long => JLong} import java.math.{BigDecimal => JBigDecimal} import java.time.ZoneId -import java.util import java.util.concurrent.ConcurrentHashMap import java.util.{Locale, TimeZone} import scala.collection.convert.Wrappers.JConcurrentMapWrapper @@ -259,10 +257,12 @@ object Spark3ParsePartitionUtil extends SparkParsePartitionUtil { zoneId: ZoneId): Any = desiredType match { case _ if value == DEFAULT_PARTITION_PATH => null case NullType => null - case BooleanType => JBoolean.parseBoolean(value) case StringType => UTF8String.fromString(unescapePathName(value)) + case ByteType => Integer.parseInt(value).toByte + case ShortType => Integer.parseInt(value).toShort case IntegerType => Integer.parseInt(value) case LongType => JLong.parseLong(value) + case FloatType => JDouble.parseDouble(value).toFloat case DoubleType => JDouble.parseDouble(value) case _: DecimalType => Literal(new JBigDecimal(value)).value case DateType => @@ -274,6 +274,8 @@ object Spark3ParsePartitionUtil extends SparkParsePartitionUtil { }.getOrElse { Cast(Cast(Literal(value), DateType, Some(zoneId.getId)), dt).eval() } + case BinaryType => value.getBytes() + case BooleanType => value.toBoolean case dt => throw new IllegalArgumentException(s"Unexpected type $dt") } From 975ba221571093c19c481e3f6e9da3e1b00aaf1b Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Mon, 26 Feb 2024 08:50:07 -0800 Subject: [PATCH 061/112] [HUDI-7297] Fix ambiguous error message when field type defined in schema mismatches that in parquet file (#10497) --- .../format/cow/ParquetSplitReaderUtil.java | 48 +++++++----- .../reader/ParquetColumnarRowSplitReader.java | 16 ++-- .../format/cow/ParquetSplitReaderUtil.java | 48 +++++++----- .../reader/ParquetColumnarRowSplitReader.java | 16 ++-- .../format/cow/ParquetSplitReaderUtil.java | 48 +++++++----- .../reader/ParquetColumnarRowSplitReader.java | 16 ++-- .../format/cow/ParquetSplitReaderUtil.java | 48 +++++++----- .../reader/ParquetColumnarRowSplitReader.java | 16 ++-- .../format/cow/ParquetSplitReaderUtil.java | 76 +++++++++++-------- .../format/cow/vector/HeapDecimalVector.java | 40 ++++++++++ .../reader/ParquetColumnarRowSplitReader.java | 16 ++-- 11 files changed, 259 insertions(+), 129 deletions(-) create mode 100644 hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 76aa827a84a6..aa12d9050faa 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -459,60 +459,52 @@ private static WritableColumnVector createWritableColumnVector( switch (fieldType.getTypeRoot()) { case BOOLEAN: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBooleanVector(batchSize); case TINYINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapByteVector(batchSize); case DOUBLE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDoubleVector(batchSize); case FLOAT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.FLOAT, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapFloatVector(batchSize); case INTEGER: case DATE: case TIME_WITHOUT_TIME_ZONE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapIntVector(batchSize); case BIGINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT64, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapLongVector(batchSize); case SMALLINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapShortVector(batchSize); case CHAR: case VARCHAR: case BINARY: case VARBINARY: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BINARY, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBytesVector(batchSize); case TIMESTAMP_WITHOUT_TIME_ZONE: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, - "TIME_MICROS original type is not "); + getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType)); return new HeapTimestampVector(batchSize); case DECIMAL: checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.BINARY) && primitiveType.getOriginalType() == OriginalType.DECIMAL, - "Unexpected type: %s", typeName); + getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDecimalVector(batchSize); case ARRAY: ArrayType arrayType = (ArrayType) fieldType; @@ -576,4 +568,24 @@ private static int getFieldIndexInPhysicalType(String fieldName, GroupType group // get index from fileSchema type, else, return -1 return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; } + + /** + * Construct the error message when primitive type mismatches. + * @param primitiveType Primitive type + * @param fieldType Logical field type + * @return The error message + */ + private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) { + return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name()); + } + + /** + * Construct the error message when original type mismatches. + * @param originalType Original type + * @param fieldType Logical field type + * @return The error message + */ + private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) { + return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name()); + } } diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java index 4eb919884030..5af1b8e8aa1b 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java @@ -218,11 +218,17 @@ private WritableColumnVector[] createWritableVectors() { List types = requestedSchema.getFields(); List descriptors = requestedSchema.getColumns(); for (int i = 0; i < requestedTypes.length; i++) { - columns[i] = createWritableColumnVector( - batchSize, - requestedTypes[i], - types.get(i), - descriptors); + try { + columns[i] = createWritableColumnVector( + batchSize, + requestedTypes[i], + types.get(i), + descriptors); + } catch (IllegalArgumentException e) { + String fieldName = requestedSchema.getFieldName(i); + String message = e.getMessage() + " Field name: " + fieldName; + throw new IllegalArgumentException(message); + } } return columns; } diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 1b636c63b2f6..bd86c68cc8bc 100644 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -459,60 +459,52 @@ private static WritableColumnVector createWritableColumnVector( switch (fieldType.getTypeRoot()) { case BOOLEAN: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBooleanVector(batchSize); case TINYINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapByteVector(batchSize); case DOUBLE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDoubleVector(batchSize); case FLOAT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.FLOAT, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapFloatVector(batchSize); case INTEGER: case DATE: case TIME_WITHOUT_TIME_ZONE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapIntVector(batchSize); case BIGINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT64, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapLongVector(batchSize); case SMALLINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapShortVector(batchSize); case CHAR: case VARCHAR: case BINARY: case VARBINARY: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BINARY, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBytesVector(batchSize); case TIMESTAMP_WITHOUT_TIME_ZONE: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, - "TIME_MICROS original type is not "); + getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType)); return new HeapTimestampVector(batchSize); case DECIMAL: checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.BINARY) && primitiveType.getOriginalType() == OriginalType.DECIMAL, - "Unexpected type: %s", typeName); + getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDecimalVector(batchSize); case ARRAY: ArrayType arrayType = (ArrayType) fieldType; @@ -576,4 +568,24 @@ private static int getFieldIndexInPhysicalType(String fieldName, GroupType group // get index from fileSchema type, else, return -1 return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; } + + /** + * Construct the error message when primitive type mismatches. + * @param primitiveType Primitive type + * @param fieldType Logical field type + * @return The error message + */ + private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) { + return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name()); + } + + /** + * Construct the error message when original type mismatches. + * @param originalType Original type + * @param fieldType Logical field type + * @return The error message + */ + private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) { + return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name()); + } } diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java index 65912cef671b..4c1e51c74fc1 100644 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java +++ b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java @@ -218,11 +218,17 @@ private WritableColumnVector[] createWritableVectors() { List types = requestedSchema.getFields(); List descriptors = requestedSchema.getColumns(); for (int i = 0; i < requestedTypes.length; i++) { - columns[i] = createWritableColumnVector( - batchSize, - requestedTypes[i], - types.get(i), - descriptors); + try { + columns[i] = createWritableColumnVector( + batchSize, + requestedTypes[i], + types.get(i), + descriptors); + } catch (IllegalArgumentException e) { + String fieldName = requestedSchema.getFieldName(i); + String message = e.getMessage() + " Field name: " + fieldName; + throw new IllegalArgumentException(message); + } } return columns; } diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 1b636c63b2f6..bd86c68cc8bc 100644 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -459,60 +459,52 @@ private static WritableColumnVector createWritableColumnVector( switch (fieldType.getTypeRoot()) { case BOOLEAN: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBooleanVector(batchSize); case TINYINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapByteVector(batchSize); case DOUBLE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDoubleVector(batchSize); case FLOAT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.FLOAT, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapFloatVector(batchSize); case INTEGER: case DATE: case TIME_WITHOUT_TIME_ZONE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapIntVector(batchSize); case BIGINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT64, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapLongVector(batchSize); case SMALLINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapShortVector(batchSize); case CHAR: case VARCHAR: case BINARY: case VARBINARY: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BINARY, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBytesVector(batchSize); case TIMESTAMP_WITHOUT_TIME_ZONE: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, - "TIME_MICROS original type is not "); + getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType)); return new HeapTimestampVector(batchSize); case DECIMAL: checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.BINARY) && primitiveType.getOriginalType() == OriginalType.DECIMAL, - "Unexpected type: %s", typeName); + getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDecimalVector(batchSize); case ARRAY: ArrayType arrayType = (ArrayType) fieldType; @@ -576,4 +568,24 @@ private static int getFieldIndexInPhysicalType(String fieldName, GroupType group // get index from fileSchema type, else, return -1 return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; } + + /** + * Construct the error message when primitive type mismatches. + * @param primitiveType Primitive type + * @param fieldType Logical field type + * @return The error message + */ + private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) { + return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name()); + } + + /** + * Construct the error message when original type mismatches. + * @param originalType Original type + * @param fieldType Logical field type + * @return The error message + */ + private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) { + return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name()); + } } diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java index 65912cef671b..4c1e51c74fc1 100644 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java +++ b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java @@ -218,11 +218,17 @@ private WritableColumnVector[] createWritableVectors() { List types = requestedSchema.getFields(); List descriptors = requestedSchema.getColumns(); for (int i = 0; i < requestedTypes.length; i++) { - columns[i] = createWritableColumnVector( - batchSize, - requestedTypes[i], - types.get(i), - descriptors); + try { + columns[i] = createWritableColumnVector( + batchSize, + requestedTypes[i], + types.get(i), + descriptors); + } catch (IllegalArgumentException e) { + String fieldName = requestedSchema.getFieldName(i); + String message = e.getMessage() + " Field name: " + fieldName; + throw new IllegalArgumentException(message); + } } return columns; } diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 1b636c63b2f6..bd86c68cc8bc 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -459,60 +459,52 @@ private static WritableColumnVector createWritableColumnVector( switch (fieldType.getTypeRoot()) { case BOOLEAN: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBooleanVector(batchSize); case TINYINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapByteVector(batchSize); case DOUBLE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDoubleVector(batchSize); case FLOAT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.FLOAT, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapFloatVector(batchSize); case INTEGER: case DATE: case TIME_WITHOUT_TIME_ZONE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapIntVector(batchSize); case BIGINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT64, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapLongVector(batchSize); case SMALLINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapShortVector(batchSize); case CHAR: case VARCHAR: case BINARY: case VARBINARY: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BINARY, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBytesVector(batchSize); case TIMESTAMP_WITHOUT_TIME_ZONE: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, - "TIME_MICROS original type is not "); + getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType)); return new HeapTimestampVector(batchSize); case DECIMAL: checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.BINARY) && primitiveType.getOriginalType() == OriginalType.DECIMAL, - "Unexpected type: %s", typeName); + getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDecimalVector(batchSize); case ARRAY: ArrayType arrayType = (ArrayType) fieldType; @@ -576,4 +568,24 @@ private static int getFieldIndexInPhysicalType(String fieldName, GroupType group // get index from fileSchema type, else, return -1 return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; } + + /** + * Construct the error message when primitive type mismatches. + * @param primitiveType Primitive type + * @param fieldType Logical field type + * @return The error message + */ + private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) { + return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name()); + } + + /** + * Construct the error message when original type mismatches. + * @param originalType Original type + * @param fieldType Logical field type + * @return The error message + */ + private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) { + return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name()); + } } diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java index 65912cef671b..4c1e51c74fc1 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java @@ -218,11 +218,17 @@ private WritableColumnVector[] createWritableVectors() { List types = requestedSchema.getFields(); List descriptors = requestedSchema.getColumns(); for (int i = 0; i < requestedTypes.length; i++) { - columns[i] = createWritableColumnVector( - batchSize, - requestedTypes[i], - types.get(i), - descriptors); + try { + columns[i] = createWritableColumnVector( + batchSize, + requestedTypes[i], + types.get(i), + descriptors); + } catch (IllegalArgumentException e) { + String fieldName = requestedSchema.getFieldName(i); + String message = e.getMessage() + " Field name: " + fieldName; + throw new IllegalArgumentException(message); + } } return columns; } diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 9bf5390ee26c..414d4f506b58 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -20,9 +20,9 @@ import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.table.format.cow.vector.HeapArrayVector; +import org.apache.hudi.table.format.cow.vector.HeapDecimalVector; import org.apache.hudi.table.format.cow.vector.HeapMapColumnVector; import org.apache.hudi.table.format.cow.vector.HeapRowColumnVector; -import org.apache.hudi.table.format.cow.vector.ParquetDecimalVector; import org.apache.hudi.table.format.cow.vector.reader.ArrayColumnReader; import org.apache.hudi.table.format.cow.vector.reader.EmptyColumnReader; import org.apache.hudi.table.format.cow.vector.reader.FixedLenBytesColumnReader; @@ -65,7 +65,6 @@ import org.apache.flink.table.types.logical.MapType; import org.apache.flink.table.types.logical.RowType; import org.apache.flink.table.types.logical.TimestampType; -import org.apache.flink.table.types.logical.VarBinaryType; import org.apache.flink.util.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.parquet.ParquetRuntimeException; @@ -234,17 +233,18 @@ private static ColumnVector createVectorFromConstant( } return lv; case DECIMAL: - DecimalType decimalType = (DecimalType) type; - int precision = decimalType.getPrecision(); - int scale = decimalType.getScale(); - DecimalData decimal = value == null - ? null - : Preconditions.checkNotNull(DecimalData.fromBigDecimal((BigDecimal) value, precision, scale)); - ColumnVector internalVector = createVectorFromConstant( - new VarBinaryType(), - decimal == null ? null : decimal.toUnscaledBytes(), - batchSize); - return new ParquetDecimalVector(internalVector); + HeapDecimalVector decv = new HeapDecimalVector(batchSize); + if (value == null) { + decv.fillWithNulls(); + } else { + DecimalType decimalType = (DecimalType) type; + int precision = decimalType.getPrecision(); + int scale = decimalType.getScale(); + DecimalData decimal = Preconditions.checkNotNull( + DecimalData.fromBigDecimal((BigDecimal) value, precision, scale)); + decv.fill(decimal.toUnscaledBytes()); + } + return decv; case FLOAT: HeapFloatVector fv = new HeapFloatVector(batchSize); if (value == null) { @@ -459,61 +459,53 @@ private static WritableColumnVector createWritableColumnVector( switch (fieldType.getTypeRoot()) { case BOOLEAN: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BOOLEAN, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBooleanVector(batchSize); case TINYINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapByteVector(batchSize); case DOUBLE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.DOUBLE, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapDoubleVector(batchSize); case FLOAT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.FLOAT, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.FLOAT, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapFloatVector(batchSize); case INTEGER: case DATE: case TIME_WITHOUT_TIME_ZONE: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapIntVector(batchSize); case BIGINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT64, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT64, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapLongVector(batchSize); case SMALLINT: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.INT32, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.INT32, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapShortVector(batchSize); case CHAR: case VARCHAR: case BINARY: case VARBINARY: checkArgument( - typeName == PrimitiveType.PrimitiveTypeName.BINARY, - "Unexpected type: %s", typeName); + typeName == PrimitiveType.PrimitiveTypeName.BINARY, getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); return new HeapBytesVector(batchSize); case TIMESTAMP_WITHOUT_TIME_ZONE: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: checkArgument(primitiveType.getOriginalType() != OriginalType.TIME_MICROS, - "TIME_MICROS original type is not "); + getOriginalTypeCheckFailureMessage(primitiveType.getOriginalType(), fieldType)); return new HeapTimestampVector(batchSize); case DECIMAL: checkArgument( (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY || typeName == PrimitiveType.PrimitiveTypeName.BINARY) && primitiveType.getOriginalType() == OriginalType.DECIMAL, - "Unexpected type: %s", typeName); - return new HeapBytesVector(batchSize); + getPrimitiveTypeCheckFailureMessage(typeName, fieldType)); + return new HeapDecimalVector(batchSize); case ARRAY: ArrayType arrayType = (ArrayType) fieldType; return new HeapArrayVector( @@ -576,4 +568,24 @@ private static int getFieldIndexInPhysicalType(String fieldName, GroupType group // get index from fileSchema type, else, return -1 return groupType.containsField(fieldName) ? groupType.getFieldIndex(fieldName) : -1; } + + /** + * Construct the error message when primitive type mismatches. + * @param primitiveType Primitive type + * @param fieldType Logical field type + * @return The error message + */ + private static String getPrimitiveTypeCheckFailureMessage(PrimitiveType.PrimitiveTypeName primitiveType, LogicalType fieldType) { + return String.format("Unexpected type exception. Primitive type: %s. Field type: %s.", primitiveType, fieldType.getTypeRoot().name()); + } + + /** + * Construct the error message when original type mismatches. + * @param originalType Original type + * @param fieldType Logical field type + * @return The error message + */ + private static String getOriginalTypeCheckFailureMessage(OriginalType originalType, LogicalType fieldType) { + return String.format("Unexpected type exception. Original type: %s. Field type: %s.", originalType, fieldType.getTypeRoot().name()); + } } diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java new file mode 100644 index 000000000000..c84bb9e036b9 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/HeapDecimalVector.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.table.format.cow.vector; + +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.columnar.vector.DecimalColumnVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector; + +/** + * This class represents a nullable heap map decimal vector. + */ +public class HeapDecimalVector extends HeapBytesVector implements DecimalColumnVector { + + public HeapDecimalVector(int len) { + super(len); + } + + @Override + public DecimalData getDecimal(int i, int precision, int scale) { + return DecimalData.fromUnscaledBytes( + this.getBytes(i).getBytes(), precision, scale); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java index 65912cef671b..4c1e51c74fc1 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ParquetColumnarRowSplitReader.java @@ -218,11 +218,17 @@ private WritableColumnVector[] createWritableVectors() { List types = requestedSchema.getFields(); List descriptors = requestedSchema.getColumns(); for (int i = 0; i < requestedTypes.length; i++) { - columns[i] = createWritableColumnVector( - batchSize, - requestedTypes[i], - types.get(i), - descriptors); + try { + columns[i] = createWritableColumnVector( + batchSize, + requestedTypes[i], + types.get(i), + descriptors); + } catch (IllegalArgumentException e) { + String fieldName = requestedSchema.getFieldName(i); + String message = e.getMessage() + " Field name: " + fieldName; + throw new IllegalArgumentException(message); + } } return columns; } From cefc5300145d6418f1d4f1e609ff4ff2b3176c0b Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Fri, 19 Jan 2024 10:27:36 +0800 Subject: [PATCH 062/112] [HUDI-7309] Disable constructing AND & OR filter predicates when filter pushing down for any of its operand's logical type for is unsupported in ExpressionPredicates::toParquetPredicate (#10524) --- .../hudi/source/ExpressionPredicates.java | 6 ++++++ .../hudi/source/TestExpressionPredicates.java | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java index 046e4b739ada..34bb58f6c8e2 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java @@ -548,6 +548,9 @@ public Predicate bindPredicates(Predicate... predicates) { @Override public FilterPredicate filter() { + if (null == predicates[0].filter() || null == predicates[1].filter()) { + return null; + } return and(predicates[0].filter(), predicates[1].filter()); } @@ -586,6 +589,9 @@ public Predicate bindPredicates(Predicate... predicates) { @Override public FilterPredicate filter() { + if (null == predicates[0].filter() || null == predicates[1].filter()) { + return null; + } return or(predicates[0].filter(), predicates[1].filter()); } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java index 97b06644266d..b8c4b1caf2ef 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java @@ -42,6 +42,7 @@ import org.apache.parquet.filter2.predicate.Operators.Lt; import org.junit.jupiter.api.Test; +import java.math.BigDecimal; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -58,6 +59,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.notEq; import static org.apache.parquet.filter2.predicate.FilterApi.or; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; /** * Test cases for {@link ExpressionPredicates}. @@ -164,4 +166,19 @@ public void testFilterPredicateFromExpression() { assertEquals(predicate19.toString(), predicate20.toString()); assertEquals(or(lt, gt), predicate20.filter()); } + + @Test + public void testDisablePredicatesPushDownForUnsupportedType() { + FieldReferenceExpression fieldReference = new FieldReferenceExpression("f_decimal", DataTypes.DECIMAL(7, 2), 0, 0); + ValueLiteralExpression valueLiteral = new ValueLiteralExpression(BigDecimal.valueOf(100.00)); + List expressions = Arrays.asList(fieldReference, valueLiteral); + + CallExpression greaterThanExpression = new CallExpression(BuiltInFunctionDefinitions.GREATER_THAN, expressions, DataTypes.DECIMAL(7, 2)); + Predicate greaterThanPredicate = fromExpression(greaterThanExpression); + CallExpression lessThanExpression = new CallExpression(BuiltInFunctionDefinitions.LESS_THAN, expressions, DataTypes.DECIMAL(7, 2)); + Predicate lessThanPredicate = fromExpression(lessThanExpression); + + assertNull(And.getInstance().bindPredicates(greaterThanPredicate, lessThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); + assertNull(Or.getInstance().bindPredicates(greaterThanPredicate, lessThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); + } } From 0705849cf1f8b85371261a699b8936539af4b8ce Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Mon, 26 Feb 2024 10:14:26 -0800 Subject: [PATCH 063/112] [HUDI-7284] Fix cluster stream sync check (#10501) Co-authored-by: Jonathan Vexler <=> --- .../table/timeline/HoodieDefaultTimeline.java | 17 ++--------- .../hudi/common/util/ClusteringUtils.java | 30 ++++++++++++++----- .../hudi/common/util/TestClusteringUtils.java | 15 ++++++++++ 3 files changed, 40 insertions(+), 22 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java index 6bfdac00e778..046ef8e7591d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java @@ -21,6 +21,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.table.timeline.HoodieInstant.State; +import org.apache.hudi.common.util.ClusteringUtils; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; @@ -514,21 +515,9 @@ public Option getLastClusterCommit() { @Override public Option getLastPendingClusterCommit() { - return Option.fromJavaOptional(getCommitsTimeline().filter(s -> s.getAction().equalsIgnoreCase(HoodieTimeline.REPLACE_COMMIT_ACTION)) + return Option.fromJavaOptional(filterPendingReplaceTimeline() .getReverseOrderedInstants() - .filter(i -> { - try { - if (!i.isCompleted()) { - HoodieCommitMetadata metadata = TimelineUtils.getCommitMetadata(i, this); - return metadata.getOperationType().equals(WriteOperationType.CLUSTER); - } else { - return false; - } - } catch (IOException e) { - LOG.warn("Unable to read commit metadata for " + i + " due to " + e.getMessage()); - return false; - } - }).findFirst()); + .filter(i -> ClusteringUtils.isPendingClusteringInstant(this, i)).findFirst()); } @Override diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java index e50431c7398b..6fe46c6c1099 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java @@ -82,12 +82,12 @@ public static boolean isClusteringCommit(HoodieTableMetaClient metaClient, Hoodi /** * Get requested replace metadata from timeline. - * @param metaClient - * @param pendingReplaceInstant - * @return + * @param timeline used to get the bytes stored in the requested replace instant in the timeline + * @param pendingReplaceInstant can be in any state, because it will always be converted to requested state + * @return option of the replace metadata if present, else empty * @throws IOException */ - private static Option getRequestedReplaceMetadata(HoodieTableMetaClient metaClient, HoodieInstant pendingReplaceInstant) throws IOException { + private static Option getRequestedReplaceMetadata(HoodieTimeline timeline, HoodieInstant pendingReplaceInstant) throws IOException { final HoodieInstant requestedInstant; if (!pendingReplaceInstant.isRequested()) { // inflight replacecommit files don't have clustering plan. @@ -97,7 +97,7 @@ private static Option getRequestedReplaceMetadat } else { requestedInstant = pendingReplaceInstant; } - Option content = metaClient.getActiveTimeline().getInstantDetails(requestedInstant); + Option content = timeline.getInstantDetails(requestedInstant); if (!content.isPresent() || content.get().length == 0) { // few operations create requested file without any content. Assume these are not clustering return Option.empty(); @@ -107,13 +107,23 @@ private static Option getRequestedReplaceMetadat /** * Get Clustering plan from timeline. - * @param metaClient + * @param metaClient used to get the active timeline + * @param pendingReplaceInstant can be in any state, because it will always be converted to requested state + * @return option of the replace metadata if present, else empty + */ + public static Option> getClusteringPlan(HoodieTableMetaClient metaClient, HoodieInstant pendingReplaceInstant) { + return getClusteringPlan(metaClient.getActiveTimeline(), pendingReplaceInstant); + } + + /** + * Get Clustering plan from timeline. + * @param timeline * @param pendingReplaceInstant * @return */ - public static Option> getClusteringPlan(HoodieTableMetaClient metaClient, HoodieInstant pendingReplaceInstant) { + public static Option> getClusteringPlan(HoodieTimeline timeline, HoodieInstant pendingReplaceInstant) { try { - Option requestedReplaceMetadata = getRequestedReplaceMetadata(metaClient, pendingReplaceInstant); + Option requestedReplaceMetadata = getRequestedReplaceMetadata(timeline, pendingReplaceInstant); if (requestedReplaceMetadata.isPresent() && WriteOperationType.CLUSTER.name().equals(requestedReplaceMetadata.get().getOperationType())) { return Option.of(Pair.of(pendingReplaceInstant, requestedReplaceMetadata.get().getClusteringPlan())); } @@ -235,6 +245,10 @@ public static boolean isPendingClusteringInstant(HoodieTableMetaClient metaClien return getClusteringPlan(metaClient, instant).isPresent(); } + public static boolean isPendingClusteringInstant(HoodieTimeline timeline, HoodieInstant instant) { + return getClusteringPlan(timeline, instant).isPresent(); + } + /** * Returns the oldest instant to retain. * Make sure the clustering instant won't be archived before cleaned, and the oldest inflight clustering instant has a previous commit. diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java index 28def8fddcfc..244ee1dba3ae 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java @@ -104,6 +104,21 @@ public void testClusteringPlanMultipleInstants() throws Exception { validateClusteringInstant(fileIds1, partitionPath1, clusterTime1, fileGroupToInstantMap); validateClusteringInstant(fileIds2, partitionPath1, clusterTime, fileGroupToInstantMap); validateClusteringInstant(fileIds3, partitionPath1, clusterTime, fileGroupToInstantMap); + Option lastPendingClustering = metaClient.getActiveTimeline().getLastPendingClusterCommit(); + assertTrue(lastPendingClustering.isPresent()); + assertEquals("2", lastPendingClustering.get().getTimestamp()); + + //check that it still gets picked if it is inflight + HoodieInstant inflight = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(lastPendingClustering.get(), Option.empty()); + assertEquals(HoodieInstant.State.INFLIGHT, inflight.getState()); + lastPendingClustering = metaClient.reloadActiveTimeline().getLastPendingClusterCommit(); + assertEquals("2", lastPendingClustering.get().getTimestamp()); + + //now that it is complete, the first instant should be picked + HoodieInstant complete = metaClient.getActiveTimeline().transitionReplaceInflightToComplete(inflight, Option.empty()); + assertEquals(HoodieInstant.State.COMPLETED, complete.getState()); + lastPendingClustering = metaClient.reloadActiveTimeline().getLastPendingClusterCommit(); + assertEquals("1", lastPendingClustering.get().getTimestamp()); } // replacecommit.inflight doesn't have clustering plan. From 4361432dc6358e745aeb0661448d12748302cad9 Mon Sep 17 00:00:00 2001 From: xuzifu666 Date: Sat, 20 Jan 2024 07:33:35 +0800 Subject: [PATCH 064/112] [HUDI-7314] Hudi Create table support index type check (#10536) Co-authored-by: xuyu <11161569@vivo.com> Co-authored-by: Y Ethan Guo --- .../spark/sql/hudi/HoodieOptionConfig.scala | 7 ++++ .../spark/sql/hudi/TestInsertTable.scala | 32 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala index 9678a5b5cdac..7da2753aeb81 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala @@ -22,6 +22,7 @@ import org.apache.hudi.avro.HoodieAvroUtils.getRootLevelFieldName import org.apache.hudi.common.model.{HoodieRecordMerger, HoodieTableType} import org.apache.hudi.common.table.HoodieTableConfig import org.apache.hudi.common.util.ValidationUtils +import org.apache.hudi.config.HoodieIndexConfig import org.apache.spark.sql.SparkSession import org.apache.spark.sql.types.StructType @@ -225,6 +226,12 @@ object HoodieOptionConfig { tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_COW) || tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_MOR), s"'type' must be '$SQL_VALUE_TABLE_TYPE_COW' or '$SQL_VALUE_TABLE_TYPE_MOR'") + + // validate table index type + val indexType = sqlOptions.get(HoodieIndexConfig.INDEX_TYPE.key()) + if (!indexType.isEmpty) { + HoodieIndexConfig.INDEX_TYPE.checkValues(indexType.get) + } } def buildConf[T](): HoodieSQLOptionBuilder[T] = { diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala index ef62a6947722..2a093ac7b08f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala @@ -2081,6 +2081,38 @@ class TestInsertTable extends HoodieSparkSqlTestBase { }) } + test("Test inaccurate index type") { + withRecordType()(withTempDir { tmp => + val targetTable = generateTableName + + assertThrows[IllegalArgumentException] { + try { + spark.sql( + s""" + |create table ${targetTable} ( + | `id` string, + | `name` string, + | `dt` bigint, + | `day` STRING, + | `hour` INT + |) using hudi + |OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 'false', 'hoodie.datasource.meta.sync.enable' 'false', 'hoodie.datasource.hive_sync.enable' 'false') + |tblproperties ( + | 'primaryKey' = 'id', + | 'type' = 'mor', + | 'preCombineField'='dt', + | 'hoodie.index.type' = 'BUCKET_aa', + | 'hoodie.bucket.index.hash.field' = 'id', + | 'hoodie.bucket.index.num.buckets'=512 + | ) + |partitioned by (`day`,`hour`) + |location '${tmp.getCanonicalPath}' + |""".stripMargin) + } + } + }) + } + test("Test vectorized read nested columns for LegacyHoodieParquetFileFormat") { withSQLConf( "hoodie.datasource.read.use.new.parquet.file.format" -> "false", From ccb59939d37bd6c8f87d2aeac52389cd911f044c Mon Sep 17 00:00:00 2001 From: KnightChess <981159963@qq.com> Date: Sat, 20 Jan 2024 10:33:02 +0800 Subject: [PATCH 065/112] [HUDI-7277] Fix `hoodie.bulkinsert.shuffle.parallelism` not activated with no-partitioned table (#10532) Signed-off-by: wulingqi <981159963@qq.com> --- .../hudi/HoodieDatasetBulkInsertHelper.scala | 29 +++++----- .../TestHoodieDatasetBulkInsertHelper.java | 53 +++++++++++++++++++ 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala index 75ec069946d2..0214b0a10302 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala @@ -76,6 +76,9 @@ object HoodieDatasetBulkInsertHelper val updatedSchema = StructType(metaFields ++ schema.fields) + val targetParallelism = + deduceShuffleParallelism(df, config.getBulkInsertShuffleParallelism) + val updatedDF = if (populateMetaFields) { val keyGeneratorClassName = config.getStringOrThrow(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME, "Key-generator class name is required") @@ -110,7 +113,7 @@ object HoodieDatasetBulkInsertHelper } val dedupedRdd = if (config.shouldCombineBeforeInsert) { - dedupeRows(prependedRdd, updatedSchema, config.getPreCombineField, SparkHoodieIndexFactory.isGlobalIndex(config)) + dedupeRows(prependedRdd, updatedSchema, config.getPreCombineField, SparkHoodieIndexFactory.isGlobalIndex(config), targetParallelism) } else { prependedRdd } @@ -127,9 +130,6 @@ object HoodieDatasetBulkInsertHelper HoodieUnsafeUtils.createDataFrameFrom(df.sparkSession, prependedQuery) } - val targetParallelism = - deduceShuffleParallelism(updatedDF, config.getBulkInsertShuffleParallelism) - partitioner.repartitionRecords(updatedDF, targetParallelism) } @@ -193,7 +193,7 @@ object HoodieDatasetBulkInsertHelper table.getContext.parallelize(writeStatuses.toList.asJava) } - private def dedupeRows(rdd: RDD[InternalRow], schema: StructType, preCombineFieldRef: String, isGlobalIndex: Boolean): RDD[InternalRow] = { + private def dedupeRows(rdd: RDD[InternalRow], schema: StructType, preCombineFieldRef: String, isGlobalIndex: Boolean, targetParallelism: Int): RDD[InternalRow] = { val recordKeyMetaFieldOrd = schema.fieldIndex(HoodieRecord.RECORD_KEY_METADATA_FIELD) val partitionPathMetaFieldOrd = schema.fieldIndex(HoodieRecord.PARTITION_PATH_METADATA_FIELD) // NOTE: Pre-combine field could be a nested field @@ -212,16 +212,15 @@ object HoodieDatasetBulkInsertHelper // since Spark might be providing us with a mutable copy (updated during the iteration) (rowKey, row.copy()) } - .reduceByKey { - (oneRow, otherRow) => - val onePreCombineVal = getNestedInternalRowValue(oneRow, preCombineFieldPath).asInstanceOf[Comparable[AnyRef]] - val otherPreCombineVal = getNestedInternalRowValue(otherRow, preCombineFieldPath).asInstanceOf[Comparable[AnyRef]] - if (onePreCombineVal.compareTo(otherPreCombineVal.asInstanceOf[AnyRef]) >= 0) { - oneRow - } else { - otherRow - } - } + .reduceByKey ((oneRow, otherRow) => { + val onePreCombineVal = getNestedInternalRowValue(oneRow, preCombineFieldPath).asInstanceOf[Comparable[AnyRef]] + val otherPreCombineVal = getNestedInternalRowValue(otherRow, preCombineFieldPath).asInstanceOf[Comparable[AnyRef]] + if (onePreCombineVal.compareTo(otherPreCombineVal.asInstanceOf[AnyRef]) >= 0) { + oneRow + } else { + otherRow + } + }, targetParallelism) .values } diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java index 50ec641c182f..bb24ee0e52a1 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieDatasetBulkInsertHelper.java @@ -37,8 +37,11 @@ import org.apache.avro.Schema; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.ReduceFunction; +import org.apache.spark.scheduler.SparkListener; +import org.apache.spark.scheduler.SparkListenerStageSubmitted; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.HoodieUnsafeUtils; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; import org.apache.spark.sql.types.StructType; @@ -59,6 +62,7 @@ import scala.Tuple2; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -348,4 +352,53 @@ public void testNoPropsSet() { private ExpressionEncoder getEncoder(StructType schema) { return SparkAdapterSupport$.MODULE$.sparkAdapter().getCatalystExpressionUtils().getEncoder(schema); } + + @Test + public void testBulkInsertParallelismParam() { + HoodieWriteConfig config = getConfigBuilder(schemaStr).withProps(getPropsAllSet("_row_key")) + .combineInput(true, true) + .withPreCombineField("ts").build(); + int checkParallelism = 7; + config.setValue("hoodie.bulkinsert.shuffle.parallelism", String.valueOf(checkParallelism)); + StageCheckBulkParallelismListener stageCheckBulkParallelismListener = + new StageCheckBulkParallelismListener("org.apache.hudi.HoodieDatasetBulkInsertHelper$.dedupeRows"); + sqlContext.sparkContext().addSparkListener(stageCheckBulkParallelismListener); + List inserts = DataSourceTestUtils.generateRandomRows(10); + Dataset dataset = sqlContext.createDataFrame(inserts, structType).repartition(3); + assertNotEquals(checkParallelism, HoodieUnsafeUtils.getNumPartitions(dataset)); + assertNotEquals(checkParallelism, sqlContext.sparkContext().defaultParallelism()); + Dataset result = HoodieDatasetBulkInsertHelper.prepareForBulkInsert(dataset, config, + new NonSortPartitionerWithRows(), "000001111"); + // trigger job + result.count(); + assertEquals(checkParallelism, stageCheckBulkParallelismListener.getParallelism()); + sqlContext.sparkContext().removeSparkListener(stageCheckBulkParallelismListener); + } + + class StageCheckBulkParallelismListener extends SparkListener { + + private boolean checkFlag = false; + private String checkMessage; + private int parallelism; + + StageCheckBulkParallelismListener(String checkMessage) { + this.checkMessage = checkMessage; + } + + @Override + public void onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) { + if (checkFlag) { + // dedup next stage is reduce task + this.parallelism = stageSubmitted.stageInfo().numTasks(); + checkFlag = false; + } + if (stageSubmitted.stageInfo().details().contains(checkMessage)) { + checkFlag = true; + } + } + + public int getParallelism() { + return parallelism; + } + } } From 38525de1763610e57364e61b0de80b2e8ba8905c Mon Sep 17 00:00:00 2001 From: Prathit malik <53890994+prathit06@users.noreply.github.com> Date: Sat, 20 Jan 2024 08:07:14 +0530 Subject: [PATCH 066/112] [MINOR] Added descriptive exception if column present in required avro schema does not exist in hudi table (#10527) --- .../apache/hudi/table/format/cow/ParquetSplitReaderUtil.java | 4 ++++ .../apache/hudi/table/format/cow/ParquetSplitReaderUtil.java | 4 ++++ .../apache/hudi/table/format/cow/ParquetSplitReaderUtil.java | 4 ++++ .../apache/hudi/table/format/cow/ParquetSplitReaderUtil.java | 4 ++++ .../apache/hudi/table/format/cow/ParquetSplitReaderUtil.java | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index aa12d9050faa..7f18f725acd7 100644 --- a/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.14.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -119,6 +119,10 @@ public static ParquetColumnarRowSplitReader genPartColumnarRowReader( long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { + + ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), + "One or more specified columns does not exist in the hudi table."); + List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) diff --git a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index bd86c68cc8bc..8bbbb1288e53 100644 --- a/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.15.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -119,6 +119,10 @@ public static ParquetColumnarRowSplitReader genPartColumnarRowReader( long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { + + ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), + "One or more specified columns does not exist in the hudi table."); + List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) diff --git a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index bd86c68cc8bc..8bbbb1288e53 100644 --- a/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.16.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -119,6 +119,10 @@ public static ParquetColumnarRowSplitReader genPartColumnarRowReader( long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { + + ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), + "One or more specified columns does not exist in the hudi table."); + List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index bd86c68cc8bc..8bbbb1288e53 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -119,6 +119,10 @@ public static ParquetColumnarRowSplitReader genPartColumnarRowReader( long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { + + ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), + "One or more specified columns does not exist in the hudi table."); + List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java index 414d4f506b58..f57030fb89d0 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java @@ -119,6 +119,10 @@ public static ParquetColumnarRowSplitReader genPartColumnarRowReader( long splitLength, FilterPredicate filterPredicate, UnboundRecordFilter recordFilter) throws IOException { + + ValidationUtils.checkState(Arrays.stream(selectedFields).noneMatch(x -> x == -1), + "One or more specified columns does not exist in the hudi table."); + List selNonPartNames = Arrays.stream(selectedFields) .mapToObj(i -> fullFieldNames[i]) .filter(n -> !partitionSpec.containsKey(n)) From e5cabe6f168f998c4a7f04d8a1ef7faf4bf89399 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Sat, 20 Jan 2024 10:39:04 +0800 Subject: [PATCH 067/112] [HUDI-7315] Disable constructing NOT filter predicate when pushing down its wrapped filter unsupported, as its operand's primitive value is incomparable (#10537) --- .../hudi/source/ExpressionPredicates.java | 18 +++++++++++++----- .../hudi/source/TestExpressionPredicates.java | 1 + 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java index 34bb58f6c8e2..bdf8fd996309 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java @@ -510,7 +510,11 @@ public Predicate bindPredicate(Predicate predicate) { @Override public FilterPredicate filter() { - return not(predicate.filter()); + FilterPredicate filterPredicate = predicate.filter(); + if (null == filterPredicate) { + return null; + } + return not(filterPredicate); } @Override @@ -548,10 +552,12 @@ public Predicate bindPredicates(Predicate... predicates) { @Override public FilterPredicate filter() { - if (null == predicates[0].filter() || null == predicates[1].filter()) { + FilterPredicate filterPredicate0 = predicates[0].filter(); + FilterPredicate filterPredicate1 = predicates[1].filter(); + if (null == filterPredicate0 || null == filterPredicate1) { return null; } - return and(predicates[0].filter(), predicates[1].filter()); + return and(filterPredicate0, filterPredicate1); } @Override @@ -589,10 +595,12 @@ public Predicate bindPredicates(Predicate... predicates) { @Override public FilterPredicate filter() { - if (null == predicates[0].filter() || null == predicates[1].filter()) { + FilterPredicate filterPredicate0 = predicates[0].filter(); + FilterPredicate filterPredicate1 = predicates[1].filter(); + if (null == filterPredicate0 || null == filterPredicate1) { return null; } - return or(predicates[0].filter(), predicates[1].filter()); + return or(filterPredicate0, filterPredicate1); } @Override diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java index b8c4b1caf2ef..02af3a85006a 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java @@ -180,5 +180,6 @@ public void testDisablePredicatesPushDownForUnsupportedType() { assertNull(And.getInstance().bindPredicates(greaterThanPredicate, lessThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); assertNull(Or.getInstance().bindPredicates(greaterThanPredicate, lessThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); + assertNull(Not.getInstance().bindPredicate(greaterThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); } } From c9cdc2a04fa360b09f31d80fc7257f2a7905301c Mon Sep 17 00:00:00 2001 From: xuzifu666 Date: Mon, 22 Jan 2024 13:29:29 +0800 Subject: [PATCH 068/112] [HUDI-7317] FlinkTableFactory snatifyCheck should contains index type (#10541) Co-authored-by: xuyu <11161569@vivo.com> --- .../apache/hudi/table/HoodieTableFactory.java | 12 +++++++++ .../hudi/table/TestHoodieTableFactory.java | 25 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java index e2395abedfe3..030d9b15f6b9 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java @@ -22,6 +22,7 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.configuration.OptionsResolver; @@ -172,6 +173,7 @@ public Set> optionalOptions() { */ private void sanityCheck(Configuration conf, ResolvedSchema schema) { checkTableType(conf); + checkIndexType(conf); if (!OptionsResolver.isAppendMode(conf)) { checkRecordKey(conf, schema); @@ -179,6 +181,16 @@ private void sanityCheck(Configuration conf, ResolvedSchema schema) { StreamerUtil.checkPreCombineKey(conf, schema.getColumnNames()); } + /** + * Validate the index type. + */ + private void checkIndexType(Configuration conf) { + String indexType = conf.get(FlinkOptions.INDEX_TYPE); + if (!StringUtils.isNullOrEmpty(indexType)) { + HoodieIndexConfig.INDEX_TYPE.checkValues(indexType); + } + } + /** * Validate the table type. */ diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java index 64145abd5bba..6469fb5c634f 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java @@ -191,6 +191,31 @@ void testRequiredOptions() { assertDoesNotThrow(() -> new HoodieTableFactory().createDynamicTableSink(sourceContext6)); } + @Test + void testIndexTypeCheck() { + ResolvedSchema schema = SchemaBuilder.instance() + .field("f0", DataTypes.INT().notNull()) + .field("f1", DataTypes.VARCHAR(20)) + .field("f2", DataTypes.TIMESTAMP(3)) + .field("ts", DataTypes.TIMESTAMP(3)) + .primaryKey("f0") + .build(); + + // Index type unset. The default value will be ok + final MockContext sourceContext1 = MockContext.getInstance(this.conf, schema, "f2"); + assertDoesNotThrow(() -> new HoodieTableFactory().createDynamicTableSink(sourceContext1)); + + // Invalid index type will throw exception + this.conf.set(FlinkOptions.INDEX_TYPE, "BUCKET_AA"); + final MockContext sourceContext2 = MockContext.getInstance(this.conf, schema, "f2"); + assertThrows(IllegalArgumentException.class, () -> new HoodieTableFactory().createDynamicTableSink(sourceContext2)); + + // Valid index type will be ok + this.conf.set(FlinkOptions.INDEX_TYPE, "BUCKET"); + final MockContext sourceContext3 = MockContext.getInstance(this.conf, schema, "f2"); + assertDoesNotThrow(() -> new HoodieTableFactory().createDynamicTableSink(sourceContext3)); + } + @Test void testTableTypeCheck() { ResolvedSchema schema = SchemaBuilder.instance() From 288898e005880b69c8fa3d7a700760896a092ef2 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Tue, 23 Jan 2024 10:13:09 +0800 Subject: [PATCH 069/112] [HUDI-7303] Fix date field type unexpectedly convert to Long when using date comparison operator (#10517) --- .../java/org/apache/hudi/source/ExpressionPredicates.java | 2 +- .../src/main/java/org/apache/hudi/util/ExpressionUtils.java | 4 ++-- .../test/java/org/apache/hudi/util/TestExpressionUtils.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java index bdf8fd996309..8faf705a81f9 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java @@ -616,10 +616,10 @@ private static FilterPredicate toParquetPredicate(FunctionDefinition functionDef case TINYINT: case SMALLINT: case INTEGER: + case DATE: case TIME_WITHOUT_TIME_ZONE: return predicateSupportsLtGt(functionDefinition, intColumn(columnName), (Integer) literal); case BIGINT: - case DATE: case TIMESTAMP_WITHOUT_TIME_ZONE: return predicateSupportsLtGt(functionDefinition, longColumn(columnName), (Long) literal); case FLOAT: diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ExpressionUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ExpressionUtils.java index 78245fb80a0d..1783057beeb7 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ExpressionUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ExpressionUtils.java @@ -160,7 +160,7 @@ public static Object getValueFromLiteral(ValueLiteralExpression expr) { .orElse(null); case DATE: return expr.getValueAs(LocalDate.class) - .map(LocalDate::toEpochDay) + .map(date -> (int) date.toEpochDay()) .orElse(null); // NOTE: All integral types of size less than Int are encoded as Ints in MT case BOOLEAN: @@ -212,7 +212,7 @@ public static Object getKeyFromLiteral(ValueLiteralExpression expr, boolean logi case TIMESTAMP_WITHOUT_TIME_ZONE: return logicalTimestamp ? new Timestamp((long) val) : val; case DATE: - return LocalDate.ofEpochDay((long) val); + return LocalDate.ofEpochDay((int) val); default: return val; } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java index c9eb5ac54959..64c205a8f716 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/util/TestExpressionUtils.java @@ -140,7 +140,7 @@ void getValueFromLiteralForNonNull() { if (dataList.get(i) instanceof LocalTime) { assertEquals(((LocalTime) dataList.get(i)).get(ChronoField.MILLI_OF_DAY), ExpressionUtils.getValueFromLiteral((ValueLiteralExpression) childExprs.get(1))); } else if (dataList.get(i) instanceof LocalDate) { - assertEquals(((LocalDate) dataList.get(i)).toEpochDay(), ExpressionUtils.getValueFromLiteral((ValueLiteralExpression) childExprs.get(1))); + assertEquals((int) ((LocalDate) dataList.get(i)).toEpochDay(), ExpressionUtils.getValueFromLiteral((ValueLiteralExpression) childExprs.get(1))); } else if (dataList.get(i) instanceof LocalDateTime) { assertEquals(((LocalDateTime) dataList.get(i)).toInstant(ZoneOffset.UTC).toEpochMilli(), ExpressionUtils.getValueFromLiteral((ValueLiteralExpression) childExprs.get(1))); } else { From 1554908a2fd89afc8fc20f6055fdb50442d11467 Mon Sep 17 00:00:00 2001 From: vinoth chandar Date: Tue, 23 Jan 2024 10:24:29 +0530 Subject: [PATCH 070/112] [MINOR] Reduce UT spark-datasource test times (#10547) * [MINOR] Reduce UT spark-datasource test times * Reverting the parallelism change --- .../hudi/TestHoodieSparkSqlWriter.scala | 51 ++++++------ .../hudi/functional/TestCOWDataSource.scala | 23 +++--- .../TestDataSourceForBootstrap.scala | 35 ++++---- .../hudi/functional/TestSparkDataSource.scala | 80 ++++++------------- 4 files changed, 75 insertions(+), 114 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala index 599e8ae97080..1c6766063d24 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala @@ -702,15 +702,11 @@ def testBulkInsertForDropPartitionColumn(): Unit = { */ @ParameterizedTest @CsvSource(value = Array( - "COPY_ON_WRITE,true", - "COPY_ON_WRITE,false", - "MERGE_ON_READ,true", - "MERGE_ON_READ,false" + "COPY_ON_WRITE", + "MERGE_ON_READ" )) - def testSchemaEvolutionForTableType(tableType: String, allowColumnDrop: Boolean): Unit = { - val opts = getCommonParams(tempPath, hoodieFooTableName, tableType) ++ Map( - HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.key -> allowColumnDrop.toString - ) + def testSchemaEvolutionForTableType(tableType: String): Unit = { + val opts = getCommonParams(tempPath, hoodieFooTableName, tableType) // Create new table // NOTE: We disable Schema Reconciliation by default (such that Writer's @@ -801,28 +797,30 @@ def testBulkInsertForDropPartitionColumn(): Unit = { val df5 = spark.createDataFrame(sc.parallelize(recordsSeq), structType) - if (allowColumnDrop) { - HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, noReconciliationOpts, df5) - - val snapshotDF5 = spark.read.format("org.apache.hudi") - .load(tempBasePath + "/*/*/*/*") - - assertEquals(35, snapshotDF5.count()) + // assert error is thrown when dropping is not allowed + val disallowOpts = noReconciliationOpts ++ Map( + HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.key -> false.toString + ) + assertThrows[SchemaCompatibilityException] { + HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, disallowOpts, df5) + } - assertEquals(df5.intersect(dropMetaFields(snapshotDF5)).except(df5).count, 0) + // passes when allowed. + val allowOpts = noReconciliationOpts ++ Map( + HoodieWriteConfig.SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP.key -> true.toString + ) + HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, allowOpts, df5) - val fifthBatchActualSchema = fetchActualSchema() - val fifthBatchExpectedSchema = { - val (structName, nameSpace) = AvroConversionUtils.getAvroRecordNameAndNamespace(hoodieFooTableName) - AvroConversionUtils.convertStructTypeToAvroSchema(df5.schema, structName, nameSpace) - } + val snapshotDF5 = spark.read.format("org.apache.hudi").load(tempBasePath + "/*/*/*/*") + assertEquals(35, snapshotDF5.count()) + assertEquals(df5.intersect(dropMetaFields(snapshotDF5)).except(df5).count, 0) - assertEquals(fifthBatchExpectedSchema, fifthBatchActualSchema) - } else { - assertThrows[SchemaCompatibilityException] { - HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, noReconciliationOpts, df5) - } + val fifthBatchActualSchema = fetchActualSchema() + val fifthBatchExpectedSchema = { + val (structName, nameSpace) = AvroConversionUtils.getAvroRecordNameAndNamespace(hoodieFooTableName) + AvroConversionUtils.convertStructTypeToAvroSchema(df5.schema, structName, nameSpace) } + assertEquals(fifthBatchExpectedSchema, fifthBatchActualSchema) } /** @@ -1419,7 +1417,6 @@ object TestHoodieSparkSqlWriter { def deletePartitionsWildcardTestParams(): java.util.stream.Stream[Arguments] = { java.util.stream.Stream.of( - arguments("2015/03/*", Seq("2016/03/15")), arguments("*5/03/1*", Seq("2016/03/15")), arguments("2016/03/*", Seq("2015/03/16", "2015/03/17"))) } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala index f500ea83120d..b6b881c2b70a 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala @@ -658,7 +658,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup val countDownLatch = new CountDownLatch(2) for (x <- 1 to 2) { val thread = new Thread(new UpdateThread(dataGen, spark, commonOpts, basePath, x + "00", countDownLatch, numRetries)) - thread.setName((x + "00_THREAD").toString()) + thread.setName(x + "00_THREAD") thread.start() } countDownLatch.await(1, TimeUnit.MINUTES) @@ -682,15 +682,18 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup val insertRecs = recordsToStrings(dataGen.generateInserts(instantTime, 1000)).toList val updateDf = spark.read.json(spark.sparkContext.parallelize(updateRecs, 2)) val insertDf = spark.read.json(spark.sparkContext.parallelize(insertRecs, 2)) - updateDf.union(insertDf).write.format("org.apache.hudi") - .options(commonOpts) - .option("hoodie.write.concurrency.mode", "optimistic_concurrency_control") - .option("hoodie.cleaner.policy.failed.writes", "LAZY") - .option("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider") - .option(HoodieWriteConfig.NUM_RETRIES_ON_CONFLICT_FAILURES.key(), numRetries.toString) - .mode(SaveMode.Append) - .save(basePath) - countDownLatch.countDown() + try { + updateDf.union(insertDf).write.format("org.apache.hudi") + .options(commonOpts) + .option("hoodie.write.concurrency.mode", "optimistic_concurrency_control") + .option("hoodie.cleaner.policy.failed.writes", "LAZY") + .option("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider") + .option(HoodieWriteConfig.NUM_RETRIES_ON_CONFLICT_FAILURES.key(), numRetries.toString) + .mode(SaveMode.Append) + .save(basePath) + } finally { + countDownLatch.countDown() + } } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala index 9949b396abf1..c8445fefd075 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala @@ -171,8 +171,8 @@ class TestDataSourceForBootstrap { @CsvSource(value = Array( "org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector,AVRO", // TODO(HUDI-5807) enable for spark native records - /* "org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector,SPARK", */ - "org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector,AVRO", + /* "org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector,SPARK", + "org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector,AVRO",*/ "org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector,SPARK" )) def testMetadataBootstrapCOWHiveStylePartitioned(bootstrapSelector: String, recordType: HoodieRecordType): Unit = { @@ -252,11 +252,8 @@ class TestDataSourceForBootstrap { verifyIncrementalViewResult(commitInstantTime1, commitInstantTime2, isPartitioned = true, isHiveStylePartitioned = true) } - @ParameterizedTest - @EnumSource(value = classOf[HoodieRecordType], - // TODO(HUDI-5807) enable for spark native records - names = Array("AVRO" /*, "SPARK" */)) - def testMetadataBootstrapCOWPartitioned(recordType: HoodieRecordType): Unit = { + @Test + def testMetadataBootstrapCOWPartitioned(): Unit = { val timestamp = Instant.now.toEpochMilli val jsc = JavaSparkContext.fromSparkContext(spark.sparkContext) @@ -268,7 +265,7 @@ class TestDataSourceForBootstrap { .mode(SaveMode.Overwrite) .save(srcPath) - val writeOpts = commonOpts ++ getRecordTypeOpts(recordType) ++ Map( + val writeOpts = commonOpts ++ getRecordTypeOpts(HoodieRecordType.AVRO) ++ Map( DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key -> "true", DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "datestr" ) @@ -331,9 +328,8 @@ class TestDataSourceForBootstrap { verifyIncrementalViewResult(commitInstantTime1, commitInstantTime3, isPartitioned = true, isHiveStylePartitioned = true) } - @ParameterizedTest - @ValueSource(booleans = Array(true, false)) - def testMetadataBootstrapMORPartitionedInlineClustering(enableRowWriter: Boolean): Unit = { + @Test + def testMetadataBootstrapMORPartitionedInlineClustering(): Unit = { val timestamp = Instant.now.toEpochMilli val jsc = JavaSparkContext.fromSparkContext(spark.sparkContext) // Prepare source data @@ -343,7 +339,7 @@ class TestDataSourceForBootstrap { .mode(SaveMode.Overwrite) .save(srcPath) - val writeOpts = commonOpts ++ getRecordTypeOpts(HoodieRecordType.AVRO) ++ Map( + val writeOpts = commonOpts ++ Map( DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key -> "true", DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "datestr" ) @@ -370,7 +366,6 @@ class TestDataSourceForBootstrap { .options(writeOpts) .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.ENABLE_ROW_WRITER.key, enableRowWriter.toString) .option(HoodieClusteringConfig.INLINE_CLUSTERING.key, "true") .option(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS.key, "1") .option(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key, "datestr") @@ -464,9 +459,8 @@ class TestDataSourceForBootstrap { assertEquals(numRecordsUpdate, hoodieROViewDFWithBasePath.filter(s"timestamp == $updateTimestamp").count()) } - @ParameterizedTest - @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK")) - def testMetadataBootstrapMORPartitioned(recordType: HoodieRecordType): Unit = { + @Test + def testMetadataBootstrapMORPartitioned(): Unit = { val timestamp = Instant.now.toEpochMilli val jsc = JavaSparkContext.fromSparkContext(spark.sparkContext) @@ -478,7 +472,7 @@ class TestDataSourceForBootstrap { .mode(SaveMode.Overwrite) .save(srcPath) - val writeOpts = commonOpts ++ getRecordTypeOpts(recordType) ++ Map( + val writeOpts = commonOpts ++ Map( DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key -> "true", DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "datestr" ) @@ -550,9 +544,8 @@ class TestDataSourceForBootstrap { assertEquals(0, hoodieROViewDF3.filter(s"timestamp == $updateTimestamp").count()) } - @ParameterizedTest - @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK")) - def testFullBootstrapCOWPartitioned(recordType: HoodieRecordType): Unit = { + @Test + def testFullBootstrapCOWPartitioned(): Unit = { val timestamp = Instant.now.toEpochMilli val jsc = JavaSparkContext.fromSparkContext(spark.sparkContext) @@ -564,7 +557,7 @@ class TestDataSourceForBootstrap { .mode(SaveMode.Overwrite) .save(srcPath) - val writeOpts = commonOpts ++ getRecordTypeOpts(recordType) ++ Map( + val writeOpts = commonOpts ++ Map( DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key -> "true", DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "datestr" ) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala index 3f64e24dfc9f..7b93f98b97ca 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala @@ -51,26 +51,16 @@ class TestSparkDataSource extends SparkClientFunctionalTestHarness { @ParameterizedTest @CsvSource(value = Array( - "COPY_ON_WRITE|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|false|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|true|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|true|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|false|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|true|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|true|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM" + "COPY_ON_WRITE|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", + "COPY_ON_WRITE|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", + "COPY_ON_WRITE|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", + "MERGE_ON_READ|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", + "MERGE_ON_READ|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", + "MERGE_ON_READ|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM" ), delimiter = '|') - def testCoreFlow(tableType: String, isMetadataEnabledOnWrite: Boolean, isMetadataEnabledOnRead: Boolean, keyGenClass: String, indexType: String): Unit = { + def testCoreFlow(tableType: String, keyGenClass: String, indexType: String): Unit = { + val isMetadataEnabledOnWrite = true + val isMetadataEnabledOnRead = true val partitionField = if (classOf[NonpartitionedKeyGenerator].getName.equals(keyGenClass)) "" else "partition" val options: Map[String, String] = commonOpts + (HoodieMetadataConfig.ENABLE.key -> String.valueOf(isMetadataEnabledOnWrite)) + @@ -216,44 +206,22 @@ class TestSparkDataSource extends SparkClientFunctionalTestHarness { @ParameterizedTest @CsvSource(value = Array( - "COPY_ON_WRITE|insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|insert|false|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|insert|true|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|insert|true|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|insert|false|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|insert|true|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|insert|true|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|bulk_insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|bulk_insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|bulk_insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "COPY_ON_WRITE|bulk_insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|bulk_insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|bulk_insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "COPY_ON_WRITE|bulk_insert|false|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|bulk_insert|true|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "COPY_ON_WRITE|bulk_insert|true|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|bulk_insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|bulk_insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|bulk_insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", - "MERGE_ON_READ|bulk_insert|false|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|bulk_insert|true|false|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|bulk_insert|true|true|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", - "MERGE_ON_READ|bulk_insert|false|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|bulk_insert|true|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", - "MERGE_ON_READ|bulk_insert|true|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM" + "COPY_ON_WRITE|insert|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", + "COPY_ON_WRITE|insert|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", + "COPY_ON_WRITE|insert|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", + "MERGE_ON_READ|insert|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", + "MERGE_ON_READ|insert|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", + "MERGE_ON_READ|insert|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", + "COPY_ON_WRITE|bulk_insert|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", + "COPY_ON_WRITE|bulk_insert|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", + "COPY_ON_WRITE|bulk_insert|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM", + "MERGE_ON_READ|bulk_insert|org.apache.hudi.keygen.SimpleKeyGenerator|BLOOM", + "MERGE_ON_READ|bulk_insert|org.apache.hudi.keygen.SimpleKeyGenerator|SIMPLE", + "MERGE_ON_READ|bulk_insert|org.apache.hudi.keygen.NonpartitionedKeyGenerator|GLOBAL_BLOOM" ), delimiter = '|') - def testImmutableUserFlow(tableType: String, operation: String, isMetadataEnabledOnWrite: Boolean, isMetadataEnabledOnRead: Boolean, keyGenClass: String, indexType: String): Unit = { + def testImmutableUserFlow(tableType: String, operation: String, keyGenClass: String, indexType: String): Unit = { + val isMetadataEnabledOnWrite = true + val isMetadataEnabledOnRead = true val partitionField = if (classOf[NonpartitionedKeyGenerator].getName.equals(keyGenClass)) "" else "partition" val options: Map[String, String] = commonOpts + (HoodieMetadataConfig.ENABLE.key -> String.valueOf(isMetadataEnabledOnWrite)) + From 1b37ee267ea2a2ff8eac0036dc36d719672e6d0a Mon Sep 17 00:00:00 2001 From: Tim Brown Date: Tue, 23 Jan 2024 18:53:22 -0600 Subject: [PATCH 071/112] [HUDI-7237] Hudi Streamer: Handle edge case with null schema, minor cleanups (#10342) --- .../utils/AvroSchemaEvolutionUtils.java | 2 +- .../SchemaProviderWithPostProcessor.java | 13 ++- .../hudi/utilities/sources/InputBatch.java | 8 +- .../hudi/utilities/streamer/StreamSync.java | 89 +++++++-------- .../TestHoodieDeltaStreamer.java | 101 +++++++++++++----- .../TestSourceFormatAdapter.java | 2 +- 6 files changed, 139 insertions(+), 76 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java index 35ca13820f24..809cd2837c76 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/AvroSchemaEvolutionUtils.java @@ -144,7 +144,7 @@ public static Schema reconcileSchemaRequirements(Schema sourceSchema, Schema tar return sourceSchema; } - if (sourceSchema.getType() == Schema.Type.NULL || sourceSchema.getFields().isEmpty()) { + if (sourceSchema == null || sourceSchema.getType() == Schema.Type.NULL || sourceSchema.getFields().isEmpty()) { return targetSchema; } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProviderWithPostProcessor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProviderWithPostProcessor.java index bd5bae4601d1..c1965e86989d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProviderWithPostProcessor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaProviderWithPostProcessor.java @@ -18,9 +18,10 @@ package org.apache.hudi.utilities.schema; -import org.apache.avro.Schema; import org.apache.hudi.common.util.Option; +import org.apache.avro.Schema; + /** * A schema provider which applies schema post process hook on schema. */ @@ -38,14 +39,16 @@ public SchemaProviderWithPostProcessor(SchemaProvider schemaProvider, @Override public Schema getSourceSchema() { - return schemaPostProcessor.map(processor -> processor.processSchema(schemaProvider.getSourceSchema())) - .orElse(schemaProvider.getSourceSchema()); + Schema sourceSchema = schemaProvider.getSourceSchema(); + return schemaPostProcessor.map(processor -> processor.processSchema(sourceSchema)) + .orElse(sourceSchema); } @Override public Schema getTargetSchema() { - return schemaPostProcessor.map(processor -> processor.processSchema(schemaProvider.getTargetSchema())) - .orElse(schemaProvider.getTargetSchema()); + Schema targetSchema = schemaProvider.getTargetSchema(); + return schemaPostProcessor.map(processor -> processor.processSchema(targetSchema)) + .orElse(targetSchema); } public SchemaProvider getOriginalSchemaProvider() { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java index 04e3a574dc5c..206909317fcb 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/InputBatch.java @@ -55,12 +55,16 @@ public SchemaProvider getSchemaProvider() { if (batch.isPresent() && schemaProvider == null) { throw new HoodieException("Please provide a valid schema provider class!"); } - return Option.ofNullable(schemaProvider).orElse(new NullSchemaProvider()); + return Option.ofNullable(schemaProvider).orElseGet(NullSchemaProvider::getInstance); } public static class NullSchemaProvider extends SchemaProvider { + private static final NullSchemaProvider INSTANCE = new NullSchemaProvider(); + public static NullSchemaProvider getInstance() { + return INSTANCE; + } - public NullSchemaProvider() { + private NullSchemaProvider() { this(null, null); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index a084da56345b..3ce82b9fe9ff 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -274,18 +274,16 @@ public StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession, SchemaPr this.processedSchema = new SchemaSet(); this.autoGenerateRecordKeys = KeyGenUtils.enableAutoGenerateRecordKeys(props); this.keyGenClassName = getKeyGeneratorClassName(new TypedProperties(props)); - refreshTimeline(); - // Register User Provided schema first - registerAvroSchemas(schemaProvider); - - - this.metrics = (HoodieIngestionMetrics) ReflectionUtils.loadClass(cfg.ingestionMetricsClass, getHoodieClientConfig(this.schemaProvider)); - this.hoodieMetrics = new HoodieMetrics(getHoodieClientConfig(this.schemaProvider)); this.conf = conf; + + HoodieWriteConfig hoodieWriteConfig = getHoodieClientConfig(); + this.metrics = (HoodieIngestionMetrics) ReflectionUtils.loadClass(cfg.ingestionMetricsClass, hoodieWriteConfig); + this.hoodieMetrics = new HoodieMetrics(hoodieWriteConfig); if (props.getBoolean(ERROR_TABLE_ENABLED.key(), ERROR_TABLE_ENABLED.defaultValue())) { this.errorTableWriter = ErrorTableUtils.getErrorTableWriter(cfg, sparkSession, props, hoodieSparkContext, fs); this.errorWriteFailureStrategy = ErrorTableUtils.getErrorWriteFailureStrategy(props); } + refreshTimeline(); Source source = UtilHelpers.createSource(cfg.sourceClassName, props, hoodieSparkContext.jsc(), sparkSession, schemaProvider, metrics); this.formatAdapter = new SourceFormatAdapter(source, this.errorTableWriter, Option.of(props)); @@ -309,7 +307,7 @@ public void refreshTimeline() throws IOException { if (fs.exists(new Path(cfg.targetBasePath))) { try { HoodieTableMetaClient meta = HoodieTableMetaClient.builder() - .setConf(new Configuration(fs.getConf())) + .setConf(conf) .setBasePath(cfg.targetBasePath) .setPayloadClassName(cfg.payloadClassName) .setRecordMergerStrategy(props.getProperty(HoodieWriteConfig.RECORD_MERGER_STRATEGY.key(), HoodieWriteConfig.RECORD_MERGER_STRATEGY.defaultValue())) @@ -337,7 +335,7 @@ public void refreshTimeline() throws IOException { LOG.warn("Base path exists, but table is not fully initialized. Re-initializing again"); initializeEmptyTable(); // reload the timeline from metaClient and validate that its empty table. If there are any instants found, then we should fail the pipeline, bcoz hoodie.properties got deleted by mistake. - HoodieTableMetaClient metaClientToValidate = HoodieTableMetaClient.builder().setConf(new Configuration(fs.getConf())).setBasePath(cfg.targetBasePath).build(); + HoodieTableMetaClient metaClientToValidate = HoodieTableMetaClient.builder().setConf(conf).setBasePath(cfg.targetBasePath).build(); if (metaClientToValidate.reloadActiveTimeline().countInstants() > 0) { // Deleting the recreated hoodie.properties and throwing exception. fs.delete(new Path(String.format("%s%s/%s", basePathWithForwardSlash, HoodieTableMetaClient.METAFOLDER_NAME, HoodieTableConfig.HOODIE_PROPERTIES_FILE))); @@ -396,7 +394,7 @@ public Pair, JavaRDD> syncOnce() throws IOException refreshTimeline(); String instantTime = HoodieActiveTimeline.createNewInstantTime(); HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder() - .setConf(new Configuration(fs.getConf())) + .setConf(conf) .setBasePath(cfg.targetBasePath) .setRecordMergerStrategy(props.getProperty(HoodieWriteConfig.RECORD_MERGER_STRATEGY.key(), HoodieWriteConfig.RECORD_MERGER_STRATEGY.defaultValue())) .build(); @@ -431,7 +429,7 @@ public Pair, JavaRDD> syncOnce() throws IOException } // complete the pending compaction before writing to sink - if (cfg.retryLastPendingInlineCompactionJob && getHoodieClientConfig(this.schemaProvider).inlineCompactionEnabled()) { + if (cfg.retryLastPendingInlineCompactionJob && writeClient.getConfig().inlineCompactionEnabled()) { Option pendingCompactionInstant = getLastPendingCompactionInstant(allCommitsTimelineOpt); if (pendingCompactionInstant.isPresent()) { HoodieWriteMetadata> writeMetadata = writeClient.compact(pendingCompactionInstant.get()); @@ -439,7 +437,7 @@ public Pair, JavaRDD> syncOnce() throws IOException refreshTimeline(); reInitWriteClient(schemaProvider.getSourceSchema(), schemaProvider.getTargetSchema(), null); } - } else if (cfg.retryLastPendingInlineClusteringJob && getHoodieClientConfig(this.schemaProvider).inlineClusteringEnabled()) { + } else if (cfg.retryLastPendingInlineClusteringJob && writeClient.getConfig().inlineClusteringEnabled()) { // complete the pending clustering before writing to sink Option pendingClusteringInstant = getLastPendingClusteringInstant(allCommitsTimelineOpt); if (pendingClusteringInstant.isPresent()) { @@ -1001,7 +999,7 @@ public void runMetaSync() { * this constraint. */ private void setupWriteClient(Option> recordsOpt) throws IOException { - if ((null != schemaProvider)) { + if (null != schemaProvider) { Schema sourceSchema = schemaProvider.getSourceSchema(); Schema targetSchema = schemaProvider.getTargetSchema(); reInitWriteClient(sourceSchema, targetSchema, recordsOpt); @@ -1013,8 +1011,9 @@ private void reInitWriteClient(Schema sourceSchema, Schema targetSchema, Option< if (HoodieStreamerUtils.isDropPartitionColumns(props)) { targetSchema = HoodieAvroUtils.removeFields(targetSchema, HoodieStreamerUtils.getPartitionColumns(props)); } - registerAvroSchemas(sourceSchema, targetSchema); - final HoodieWriteConfig initialWriteConfig = getHoodieClientConfig(targetSchema); + final Pair initialWriteConfigAndSchema = getHoodieClientConfigAndWriterSchema(targetSchema, true); + final HoodieWriteConfig initialWriteConfig = initialWriteConfigAndSchema.getLeft(); + registerAvroSchemas(sourceSchema, initialWriteConfigAndSchema.getRight()); final HoodieWriteConfig writeConfig = SparkSampleWritesUtils .getWriteConfigWithRecordSizeEstimate(hoodieSparkContext.jsc(), recordsOpt, initialWriteConfig) .orElse(initialWriteConfig); @@ -1036,20 +1035,21 @@ private void reInitWriteClient(Schema sourceSchema, Schema targetSchema, Option< } /** - * Helper to construct Write Client config. - * - * @param schemaProvider Schema Provider + * Helper to construct Write Client config without a schema. */ - private HoodieWriteConfig getHoodieClientConfig(SchemaProvider schemaProvider) { - return getHoodieClientConfig(schemaProvider != null ? schemaProvider.getTargetSchema() : null); + private HoodieWriteConfig getHoodieClientConfig() { + return getHoodieClientConfigAndWriterSchema(null, false).getLeft(); } /** * Helper to construct Write Client config. * - * @param schema Schema + * @param schema initial writer schema. If null or Avro Null type, the schema will be fetched from previous commit metadata for the table. + * @param requireSchemaInConfig whether the schema should be present in the config. This is an optimization to avoid fetching schema from previous commits if not needed. + * + * @return Pair of HoodieWriteConfig and writer schema. */ - private HoodieWriteConfig getHoodieClientConfig(Schema schema) { + private Pair getHoodieClientConfigAndWriterSchema(Schema schema, boolean requireSchemaInConfig) { final boolean combineBeforeUpsert = true; final boolean autoCommit = false; @@ -1075,8 +1075,13 @@ private HoodieWriteConfig getHoodieClientConfig(Schema schema) { .withAutoCommit(autoCommit) .withProps(props); - if (schema != null) { - builder.withSchema(getSchemaForWriteConfig(schema).toString()); + // If schema is required in the config, we need to handle the case where the target schema is null and should be fetched from previous commits + final Schema returnSchema; + if (requireSchemaInConfig) { + returnSchema = getSchemaForWriteConfig(schema); + builder.withSchema(returnSchema.toString()); + } else { + returnSchema = schema; } HoodieWriteConfig config = builder.build(); @@ -1108,30 +1113,28 @@ private HoodieWriteConfig getHoodieClientConfig(Schema schema) { String.format("%s should be set to %s", COMBINE_BEFORE_INSERT.key(), cfg.filterDupes)); ValidationUtils.checkArgument(config.shouldCombineBeforeUpsert(), String.format("%s should be set to %s", COMBINE_BEFORE_UPSERT.key(), combineBeforeUpsert)); - return config; + return Pair.of(config, returnSchema); } private Schema getSchemaForWriteConfig(Schema targetSchema) { Schema newWriteSchema = targetSchema; try { - if (targetSchema != null) { - // check if targetSchema is equal to NULL schema - if (SchemaCompatibility.checkReaderWriterCompatibility(targetSchema, InputBatch.NULL_SCHEMA).getType() == SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE - && SchemaCompatibility.checkReaderWriterCompatibility(InputBatch.NULL_SCHEMA, targetSchema).getType() == SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE) { - // target schema is null. fetch schema from commit metadata and use it - HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(new Configuration(fs.getConf())) - .setBasePath(cfg.targetBasePath) - .setPayloadClassName(cfg.payloadClassName) - .build(); - int totalCompleted = meta.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants(); - if (totalCompleted > 0) { - TableSchemaResolver schemaResolver = new TableSchemaResolver(meta); - Option tableSchema = schemaResolver.getTableAvroSchemaIfPresent(false); - if (tableSchema.isPresent()) { - newWriteSchema = tableSchema.get(); - } else { - LOG.warn("Could not fetch schema from table. Falling back to using target schema from schema provider"); - } + // check if targetSchema is equal to NULL schema + if (targetSchema == null || (SchemaCompatibility.checkReaderWriterCompatibility(targetSchema, InputBatch.NULL_SCHEMA).getType() == SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE + && SchemaCompatibility.checkReaderWriterCompatibility(InputBatch.NULL_SCHEMA, targetSchema).getType() == SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE)) { + // target schema is null. fetch schema from commit metadata and use it + HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(conf) + .setBasePath(cfg.targetBasePath) + .setPayloadClassName(cfg.payloadClassName) + .build(); + int totalCompleted = meta.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants(); + if (totalCompleted > 0) { + TableSchemaResolver schemaResolver = new TableSchemaResolver(meta); + Option tableSchema = schemaResolver.getTableAvroSchemaIfPresent(false); + if (tableSchema.isPresent()) { + newWriteSchema = tableSchema.get(); + } else { + LOG.warn("Could not fetch schema from table. Falling back to using target schema from schema provider"); } } } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java index 8c2acac45cf1..83307a912367 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java @@ -188,7 +188,7 @@ private void addRecordMerger(HoodieRecordType type, List hoodieConfig) { if (type == HoodieRecordType.SPARK) { Map opts = new HashMap<>(); opts.put(HoodieWriteConfig.RECORD_MERGER_IMPLS.key(), HoodieSparkRecordMerger.class.getName()); - opts.put(HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT.key(),"parquet"); + opts.put(HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT.key(), "parquet"); for (Map.Entry entry : opts.entrySet()) { hoodieConfig.add(String.format("%s=%s", entry.getKey(), entry.getValue())); } @@ -206,7 +206,7 @@ protected HoodieDeltaStreamer initialHoodieDeltaStreamer(String tableBasePath, i } protected HoodieDeltaStreamer initialHoodieDeltaStreamer(String tableBasePath, int totalRecords, String asyncCluster, HoodieRecordType recordType, - WriteOperationType writeOperationType, Set customConfigs) throws IOException { + WriteOperationType writeOperationType, Set customConfigs) throws IOException { HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, writeOperationType); addRecordMerger(recordType, cfg.configs); cfg.continuousMode = true; @@ -465,16 +465,16 @@ public void testBulkInsertsAndUpsertsWithBootstrap(HoodieRecordType recordType) // Initial bulk insert HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT); addRecordMerger(recordType, cfg.configs); - syncAndAssertRecordCount(cfg, 1000, tableBasePath, "00000", 1); + syncAndAssertRecordCount(cfg, 1000, tableBasePath, "00000", 1); // No new data => no commits. cfg.sourceLimit = 0; - syncAndAssertRecordCount(cfg, 1000, tableBasePath, "00000", 1); + syncAndAssertRecordCount(cfg, 1000, tableBasePath, "00000", 1); // upsert() #1 cfg.sourceLimit = 2000; cfg.operation = WriteOperationType.UPSERT; - syncAndAssertRecordCount(cfg,1950, tableBasePath, "00001", 2); + syncAndAssertRecordCount(cfg, 1950, tableBasePath, "00001", 2); List counts = countsPerCommit(tableBasePath, sqlContext); assertEquals(1950, counts.stream().mapToLong(entry -> entry.getLong(1)).sum()); @@ -534,7 +534,7 @@ public void testModifiedTableConfigs() throws Exception { cfg.sourceLimit = 2000; cfg.operation = WriteOperationType.UPSERT; cfg.configs.add(HoodieTableConfig.RECORDKEY_FIELDS.key() + "=differentval"); - assertThrows(HoodieException.class, () -> syncAndAssertRecordCount(cfg,1000,tableBasePath,"00000",1)); + assertThrows(HoodieException.class, () -> syncAndAssertRecordCount(cfg, 1000, tableBasePath, "00000", 1)); List counts = countsPerCommit(tableBasePath, sqlContext); assertEquals(1000, counts.stream().mapToLong(entry -> entry.getLong(1)).sum()); @@ -647,7 +647,7 @@ public void testUpsertsCOWContinuousMode(HoodieRecordType recordType) throws Exc @ParameterizedTest @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"}) public void testUpsertsCOW_ContinuousModeDisabled(HoodieRecordType recordType) throws Exception { - String tableBasePath = basePath + "/non_continuous_cow"; + String tableBasePath = basePath + "/non_continuous_cow"; HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT); addRecordMerger(recordType, cfg.configs); cfg.tableType = HoodieTableType.COPY_ON_WRITE.name(); @@ -678,7 +678,7 @@ public void testUpsertsMORContinuousMode(HoodieRecordType recordType) throws Exc @ParameterizedTest @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"}) public void testUpsertsMOR_ContinuousModeDisabled(HoodieRecordType recordType) throws Exception { - String tableBasePath = basePath + "/non_continuous_mor"; + String tableBasePath = basePath + "/non_continuous_mor"; HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT); addRecordMerger(recordType, cfg.configs); cfg.tableType = HoodieTableType.MERGE_ON_READ.name(); @@ -846,7 +846,7 @@ public void testDeltaSyncWithPendingCompaction() throws Exception { prepareParquetDFSSource(false, false, "source.avsc", "target.avsc", PROPS_FILENAME_TEST_PARQUET, PARQUET_SOURCE_ROOT, false, "partition_path", "", extraProps); String tableBasePath = basePath + "test_parquet_table" + testNum; - HoodieDeltaStreamer.Config deltaCfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, ParquetDFSSource.class.getName(), + HoodieDeltaStreamer.Config deltaCfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT, ParquetDFSSource.class.getName(), null, PROPS_FILENAME_TEST_PARQUET, false, false, 100000, false, null, "MERGE_ON_READ", "timestamp", null); deltaCfg.retryLastPendingInlineCompactionJob = false; @@ -995,7 +995,7 @@ public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean, HoodieR private List getAllMultiWriterConfigs() { List configs = new ArrayList<>(); configs.add(String.format("%s=%s", HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), InProcessLockProvider.class.getCanonicalName())); - configs.add(String.format("%s=%s", LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000")); + configs.add(String.format("%s=%s", LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000")); configs.add(String.format("%s=%s", HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name())); configs.add(String.format("%s=%s", HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.key(), HoodieFailedWritesCleaningPolicy.LAZY.name())); return configs; @@ -1041,7 +1041,7 @@ private HoodieIndexer.Config buildIndexerConfig(String basePath, } @ParameterizedTest - @EnumSource(value = HoodieRecordType.class, names = {"AVRO","SPARK"}) + @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"}) public void testHoodieIndexer(HoodieRecordType recordType) throws Exception { String tableBasePath = basePath + "/asyncindexer"; HoodieDeltaStreamer ds = initialHoodieDeltaStreamer(tableBasePath, 1000, "false", recordType, WriteOperationType.INSERT, @@ -1429,7 +1429,7 @@ private void testBulkInsertRowWriterContinuousMode(Boolean useSchemaProvider, Li int counter = 2; while (counter < 100) { // lets keep going. if the test times out, we will cancel the future within finally. So, safe to generate 100 batches. LOG.info("Generating data for batch " + counter); - prepareParquetDFSFiles(100, PARQUET_SOURCE_ROOT, Integer.toString(counter) + ".parquet", false, null, null); + prepareParquetDFSFiles(100, PARQUET_SOURCE_ROOT, Integer.toString(counter) + ".parquet", false, null, null); counter++; Thread.sleep(2000); } @@ -1474,9 +1474,9 @@ private void testBulkInsertRowWriterContinuousMode(Boolean useSchemaProvider, Li * 1 ===============> HUDI TABLE 2 (incr-pull with transform) (incr-pull) Hudi Table 1 is synced with Hive. */ @ParameterizedTest - @EnumSource(value = HoodieRecordType.class, names = {"AVRO","SPARK"}) + @EnumSource(value = HoodieRecordType.class, names = {"AVRO", "SPARK"}) public void testBulkInsertsAndUpsertsWithSQLBasedTransformerFor2StepPipeline(HoodieRecordType recordType) throws Exception { - String tableBasePath = basePath + "/" + recordType.toString() + "/test_table2"; + String tableBasePath = basePath + "/" + recordType.toString() + "/test_table2"; String downstreamTableBasePath = basePath + "/" + recordType.toString() + "/test_downstream_table2"; // Initial bulk insert to ingest to first hudi table @@ -1605,8 +1605,8 @@ public void testPayloadClassUpdate() throws Exception { public void testPartialPayloadClass() throws Exception { String dataSetBasePath = basePath + "/test_dataset_mor"; HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(dataSetBasePath, WriteOperationType.BULK_INSERT, - Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false, - true, true, PartialUpdateAvroPayload.class.getName(), "MERGE_ON_READ"); + Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false, + true, true, PartialUpdateAvroPayload.class.getName(), "MERGE_ON_READ"); new HoodieDeltaStreamer(cfg, jsc, fs, hiveServer.getHiveConf()).sync(); assertRecordCount(1000, dataSetBasePath, sqlContext); @@ -1842,7 +1842,7 @@ private void prepareJsonKafkaDFSSource(String propsFileName, String autoResetVal prepareJsonKafkaDFSSource(propsFileName, autoResetValue, topicName, null, false); } - private void prepareJsonKafkaDFSSource(String propsFileName, String autoResetValue, String topicName, Map extraProps, boolean shouldAddOffsets) throws IOException { + private void prepareJsonKafkaDFSSource(String propsFileName, String autoResetValue, String topicName, Map extraProps, boolean shouldAddOffsets) throws IOException { // Properties used for testing delta-streamer with JsonKafka source TypedProperties props = new TypedProperties(); populateAllCommonProps(props, basePath, testUtils.brokerAddress()); @@ -2043,7 +2043,7 @@ public void testDeltaStreamerMultiwriterCheckpoint() throws Exception { ObjectMapper objectMapper = new ObjectMapper(); HoodieCommitMetadata commitMetadata = HoodieCommitMetadata .fromBytes(metaClient.getCommitsTimeline().getInstantDetails(instants.get(0)).get(), HoodieCommitMetadata.class); - Map checkpointVals = objectMapper.readValue(commitMetadata.getExtraMetadata().get(CHECKPOINT_KEY), Map.class); + Map checkpointVals = objectMapper.readValue(commitMetadata.getExtraMetadata().get(CHECKPOINT_KEY), Map.class); String parquetFirstcheckpoint = checkpointVals.get("parquet"); assertNotNull(parquetFirstcheckpoint); @@ -2059,7 +2059,7 @@ public void testDeltaStreamerMultiwriterCheckpoint() throws Exception { checkpointVals = objectMapper.readValue(commitMetadata.getExtraMetadata().get(CHECKPOINT_KEY), Map.class); String parquetSecondCheckpoint = checkpointVals.get("parquet"); assertNotNull(parquetSecondCheckpoint); - assertEquals(kafkaCheckpoint,checkpointVals.get("kafka")); + assertEquals(kafkaCheckpoint, checkpointVals.get("kafka")); assertTrue(Long.parseLong(parquetSecondCheckpoint) > Long.parseLong(parquetFirstcheckpoint)); parquetDs.shutdownGracefully(); kafkaDs.shutdownGracefully(); @@ -2085,6 +2085,43 @@ public void testParquetDFSSourceForEmptyBatch() throws Exception { testParquetDFSSource(false, null, true); } + @Test + public void testEmptyBatchWithNullSchemaValue() throws Exception { + PARQUET_SOURCE_ROOT = basePath + "/parquetFilesDfs" + testNum; + int parquetRecordsCount = 10; + prepareParquetDFSFiles(parquetRecordsCount, PARQUET_SOURCE_ROOT, FIRST_PARQUET_FILE_NAME, false, null, null); + prepareParquetDFSSource(false, false, "source.avsc", "target.avsc", PROPS_FILENAME_TEST_PARQUET, + PARQUET_SOURCE_ROOT, false, "partition_path", "0"); + + String tableBasePath = basePath + "/test_parquet_table" + testNum; + HoodieDeltaStreamer.Config config = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ParquetDFSSource.class.getName(), + null, PROPS_FILENAME_TEST_PARQUET, false, + false, 100000, false, null, null, "timestamp", null); + HoodieDeltaStreamer deltaStreamer1 = new HoodieDeltaStreamer(config, jsc); + deltaStreamer1.sync(); + assertRecordCount(parquetRecordsCount, tableBasePath, sqlContext); + HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(jsc.hadoopConfiguration()).build(); + HoodieInstant firstCommit = metaClient.getActiveTimeline().lastInstant().get(); + deltaStreamer1.shutdownGracefully(); + + prepareParquetDFSFiles(100, PARQUET_SOURCE_ROOT, "2.parquet", false, null, null); + HoodieDeltaStreamer.Config updatedConfig = config; + updatedConfig.schemaProviderClassName = NullValueSchemaProvider.class.getName(); + updatedConfig.sourceClassName = TestParquetDFSSourceEmptyBatch.class.getName(); + HoodieDeltaStreamer deltaStreamer2 = new HoodieDeltaStreamer(updatedConfig, jsc); + deltaStreamer2.sync(); + // since we mimic'ed empty batch, total records should be same as first sync(). + assertRecordCount(parquetRecordsCount, tableBasePath, sqlContext); + + // validate schema is set in commit even if target schema returns null on empty batch + TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient); + HoodieInstant secondCommit = metaClient.reloadActiveTimeline().lastInstant().get(); + Schema lastCommitSchema = tableSchemaResolver.getTableAvroSchema(secondCommit, true); + assertNotEquals(firstCommit, secondCommit); + assertNotEquals(lastCommitSchema, Schema.create(Schema.Type.NULL)); + deltaStreamer2.shutdownGracefully(); + } + @Test public void testDeltaStreamerRestartAfterMissingHoodieProps() throws Exception { testDeltaStreamerRestartAfterMissingHoodieProps(true); @@ -2322,7 +2359,7 @@ private void prepareSqlSource() throws IOException { sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false"); sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key"); sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path"); - sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query","select * from test_sql_table"); + sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query", "select * from test_sql_table"); UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, fs, basePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE); @@ -2548,8 +2585,8 @@ public void testFetchingCheckpointFromPreviousCommits() throws IOException { HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(basePath + "/testFetchPreviousCheckpoint", WriteOperationType.BULK_INSERT); TypedProperties properties = new TypedProperties(); - properties.setProperty("hoodie.datasource.write.recordkey.field","key"); - properties.setProperty("hoodie.datasource.write.partitionpath.field","pp"); + properties.setProperty("hoodie.datasource.write.recordkey.field", "key"); + properties.setProperty("hoodie.datasource.write.partitionpath.field", "pp"); TestStreamSync testDeltaSync = new TestStreamSync(cfg, sparkSession, null, properties, jsc, fs, jsc.hadoopConfiguration(), null); @@ -2590,7 +2627,7 @@ public void testDropPartitionColumns(HoodieRecordType recordType) throws Excepti TestHelpers.assertAtLeastNCommits(1, tableBasePath, fs); TableSchemaResolver tableSchemaResolver = new TableSchemaResolver( - HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(fs.getConf()).build()); + HoodieTableMetaClient.builder().setBasePath(tableBasePath).setConf(fs.getConf()).build()); // get schema from data file written in the latest commit Schema tableSchema = tableSchemaResolver.getTableAvroSchemaFromDataFile(); assertNotNull(tableSchema); @@ -2769,7 +2806,7 @@ public void testAutoGenerateRecordKeys() throws Exception { } @ParameterizedTest - @CsvSource(value = {"COPY_ON_WRITE, AVRO", "MERGE_ON_READ, AVRO", + @CsvSource(value = {"COPY_ON_WRITE, AVRO", "MERGE_ON_READ, AVRO", "COPY_ON_WRITE, SPARK", "MERGE_ON_READ, SPARK"}) public void testConfigurationHotUpdate(HoodieTableType tableType, HoodieRecordType recordType) throws Exception { String tableBasePath = basePath + String.format("/configurationHotUpdate_%s_%s", tableType.name(), recordType.name()); @@ -2931,4 +2968,20 @@ private static Stream testORCDFSSource() { arguments(true, Collections.singletonList(TripsWithDistanceTransformer.class.getName())) ); } + + public static class NullValueSchemaProvider extends SchemaProvider { + + public NullValueSchemaProvider(TypedProperties props) { + super(props); + } + + public NullValueSchemaProvider(TypedProperties props, JavaSparkContext jssc) { + super(props, jssc); + } + + @Override + public Schema getSourceSchema() { + return null; + } + } } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java index 30b997e856ae..1d6f2f110b2b 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSourceFormatAdapter.java @@ -130,7 +130,7 @@ private void verifySanitization(InputBatch> inputBatch, String sani @MethodSource("provideDataFiles") public void testRowSanitization(String unsanitizedDataFile, String sanitizedDataFile, StructType unsanitizedSchema, StructType sanitizedSchema) { JavaRDD unsanitizedRDD = jsc.textFile(unsanitizedDataFile); - SchemaProvider schemaProvider = new InputBatch.NullSchemaProvider(); + SchemaProvider schemaProvider = InputBatch.NullSchemaProvider.getInstance(); verifySanitization(fetchRowData(unsanitizedRDD, unsanitizedSchema, schemaProvider), sanitizedDataFile, sanitizedSchema); verifySanitization(fetchRowData(unsanitizedRDD, unsanitizedSchema, null), sanitizedDataFile, sanitizedSchema); From cef039f6cda87a1fb750356b5dba181e3fcfad8d Mon Sep 17 00:00:00 2001 From: Krishen <22875197+kbuci@users.noreply.github.com> Date: Tue, 23 Jan 2024 19:58:20 -0800 Subject: [PATCH 072/112] [HUDI-7316] AbstractHoodieLogRecordReader should accept HoodieTableMetaClient in order to reduce occurences of executors making file listing calls when reloading active timeline (#10540) Summary: Currently some implementors of AbstractHoodieLogRecordReader create a HoodieTableMetaClient on construction, which implicitly reloads active timeline, causing a `listStatus` HDFS call. Since these are created in executors, each of the hundreds to thousands of executors will make a `listStatus` call at the same time during a stage. To avoid these redundant calls to HDFS NameNode, AbstractHoodieLogRecordReader and the following implementations have been updated to allow an existing HoodieTableMetaClient to be passed in. - HoodieUnMergedLogRecordScanner - HoodieMergedLogRecordScanner - HoodieMetadataMergedLogRecordReader As long as the caller passed in a HoodieTableMetaClient with active timeline already loaded, and the implementation doesn't need to re-load the timeline (such as in order to get a more "fresh" timeline) than `listStatus` calls can be avoided in the executor, without causing the logic to be incorrect. Co-authored-by: Krishen Bhan --- .../apache/hudi/io/HoodieMergedReadHandle.java | 1 + .../table/action/compact/HoodieCompactor.java | 1 + .../HoodieLogCompactionPlanGenerator.java | 1 + .../MultipleSparkJobExecutionStrategy.java | 1 + .../TestHoodieClientOnMergeOnReadStorage.java | 2 ++ .../log/AbstractHoodieLogRecordReader.java | 9 +++++++-- .../table/log/HoodieMergedLogRecordScanner.java | 17 ++++++++++++++--- .../log/HoodieUnMergedLogRecordScanner.java | 17 ++++++++++++++--- .../metadata/HoodieBackedTableMetadata.java | 1 + .../metadata/HoodieMetadataLogRecordReader.java | 6 ++++++ .../hudi/metadata/HoodieTableMetadataUtil.java | 1 + 11 files changed, 49 insertions(+), 8 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergedReadHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergedReadHandle.java index 738688c62193..e74ab37f4b69 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergedReadHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergedReadHandle.java @@ -135,6 +135,7 @@ private HoodieMergedLogRecordScanner getLogRecordScanner(FileSlice fileSlice) { .withDiskMapType(config.getCommonConfig().getSpillableDiskMapType()) .withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled()) .withRecordMerger(config.getRecordMerger()) + .withTableMetaClient(hoodieTable.getMetaClient()) .build(); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java index 906ea6473a4b..d1d69be16dcf 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java @@ -207,6 +207,7 @@ public List compact(HoodieCompactionHandler compactionHandler, .withOptimizedLogBlocksScan(executionHelper.enableOptimizedLogBlockScan(config)) .withRecordMerger(config.getRecordMerger()) .withInstantRange(instantRange) + .withTableMetaClient(metaClient) .build(); Option oldDataFileOpt = diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/HoodieLogCompactionPlanGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/HoodieLogCompactionPlanGenerator.java index 2b7047265802..7cc0e338bcf9 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/HoodieLogCompactionPlanGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/plan/generators/HoodieLogCompactionPlanGenerator.java @@ -98,6 +98,7 @@ private boolean isFileSliceEligibleForLogCompaction(FileSlice fileSlice, String .withBufferSize(writeConfig.getMaxDFSStreamBufferSize()) .withOptimizedLogBlocksScan(true) .withRecordMerger(writeConfig.getRecordMerger()) + .withTableMetaClient(metaClient) .build(); scanner.scan(true); int totalBlocks = scanner.getCurrentInstantLogBlocks().size(); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java index 8a39dc79ff31..17400acfc050 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java @@ -318,6 +318,7 @@ private HoodieData> readRecordsForGroupWithLogs(JavaSparkContext .withDiskMapType(config.getCommonConfig().getSpillableDiskMapType()) .withBitCaskDiskMapCompressionEnabled(config.getCommonConfig().isBitCaskDiskMapCompressionEnabled()) .withRecordMerger(config.getRecordMerger()) + .withTableMetaClient(table.getMetaClient()) .build(); Option baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath()) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnMergeOnReadStorage.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnMergeOnReadStorage.java index 92c246268cdb..0b4c50d0a7c9 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnMergeOnReadStorage.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnMergeOnReadStorage.java @@ -445,6 +445,7 @@ private void validateBlockInstantsBeforeAndAfterRollback(HoodieWriteConfig confi .withLatestInstantTime(instant) .withBufferSize(config.getMaxDFSStreamBufferSize()) .withOptimizedLogBlocksScan(true) + .withTableMetaClient(metaClient) .build(); scanner.scan(true); List prevInstants = scanner.getValidBlockInstants(); @@ -458,6 +459,7 @@ private void validateBlockInstantsBeforeAndAfterRollback(HoodieWriteConfig confi .withLatestInstantTime(currentInstant) .withBufferSize(config.getMaxDFSStreamBufferSize()) .withOptimizedLogBlocksScan(true) + .withTableMetaClient(table.getMetaClient()) .build(); scanner2.scan(true); List currentInstants = scanner2.getValidBlockInstants(); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java index 7cd6ea9cd237..60554e2e4cfc 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java @@ -157,10 +157,11 @@ protected AbstractHoodieLogRecordReader(FileSystem fs, String basePath, List keyFieldOverride, boolean enableOptimizedLogBlocksScan, - HoodieRecordMerger recordMerger) { + HoodieRecordMerger recordMerger, + Option hoodieTableMetaClientOption) { this.readerSchema = readerSchema; this.latestInstantTime = latestInstantTime; - this.hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build(); + this.hoodieTableMetaClient = hoodieTableMetaClientOption.orElseGet(() -> HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build()); // load class from the payload fully qualified class name HoodieTableConfig tableConfig = this.hoodieTableMetaClient.getTableConfig(); this.payloadClassFQN = tableConfig.getPayloadClass(); @@ -1047,6 +1048,10 @@ public Builder withOptimizedLogBlocksScan(boolean enableOptimizedLogBlocksScan) throw new UnsupportedOperationException(); } + public Builder withTableMetaClient(HoodieTableMetaClient hoodieTableMetaClient) { + throw new UnsupportedOperationException(); + } + public abstract AbstractHoodieLogRecordReader build(); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java index 85008a03e13c..9062641f1a73 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.model.HoodieRecordMerger; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.cdc.HoodieCDCUtils; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.DefaultSizeEstimator; @@ -100,9 +101,11 @@ private HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List partitionName, InternalSchema internalSchema, Option keyFieldOverride, - boolean enableOptimizedLogBlocksScan, HoodieRecordMerger recordMerger) { + boolean enableOptimizedLogBlocksScan, HoodieRecordMerger recordMerger, + Option hoodieTableMetaClientOption) { super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, - instantRange, withOperationField, forceFullScan, partitionName, internalSchema, keyFieldOverride, enableOptimizedLogBlocksScan, recordMerger); + instantRange, withOperationField, forceFullScan, partitionName, internalSchema, keyFieldOverride, enableOptimizedLogBlocksScan, recordMerger, + hoodieTableMetaClientOption); try { this.maxMemorySizeInBytes = maxMemorySizeInBytes; // Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize @@ -336,6 +339,7 @@ public static class Builder extends AbstractHoodieLogRecordReader.Builder { private boolean forceFullScan = true; private boolean enableOptimizedLogBlocksScan = false; private HoodieRecordMerger recordMerger = HoodiePreCombineAvroRecordMerger.INSTANCE; + protected HoodieTableMetaClient hoodieTableMetaClient; @Override public Builder withFileSystem(FileSystem fs) { @@ -452,6 +456,12 @@ public Builder withForceFullScan(boolean forceFullScan) { return this; } + @Override + public Builder withTableMetaClient(HoodieTableMetaClient hoodieTableMetaClient) { + this.hoodieTableMetaClient = hoodieTableMetaClient; + return this; + } + @Override public HoodieMergedLogRecordScanner build() { if (this.partitionName == null && CollectionUtils.nonEmpty(this.logFilePaths)) { @@ -463,7 +473,8 @@ public HoodieMergedLogRecordScanner build() { latestInstantTime, maxMemorySizeInBytes, readBlocksLazily, reverseReader, bufferSize, spillableMapBasePath, instantRange, diskMapType, isBitCaskDiskMapCompressionEnabled, withOperationField, forceFullScan, - Option.ofNullable(partitionName), internalSchema, Option.ofNullable(keyFieldOverride), enableOptimizedLogBlocksScan, recordMerger); + Option.ofNullable(partitionName), internalSchema, Option.ofNullable(keyFieldOverride), enableOptimizedLogBlocksScan, recordMerger, + Option.ofNullable(hoodieTableMetaClient)); } } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java index f62ec0febd57..4d870618e7b6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieUnMergedLogRecordScanner.java @@ -22,6 +22,7 @@ import org.apache.hudi.common.model.HoodiePreCombineAvroRecordMerger; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordMerger; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.cdc.HoodieCDCUtils; import org.apache.hudi.common.util.HoodieRecordUtils; import org.apache.hudi.common.util.Option; @@ -44,9 +45,11 @@ public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordReade private HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath, List logFilePaths, Schema readerSchema, String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize, LogRecordScannerCallback callback, Option instantRange, InternalSchema internalSchema, - boolean enableOptimizedLogBlocksScan, HoodieRecordMerger recordMerger) { + boolean enableOptimizedLogBlocksScan, HoodieRecordMerger recordMerger, + Option hoodieTableMetaClientOption) { super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, instantRange, - false, true, Option.empty(), internalSchema, Option.empty(), enableOptimizedLogBlocksScan, recordMerger); + false, true, Option.empty(), internalSchema, Option.empty(), enableOptimizedLogBlocksScan, recordMerger, + hoodieTableMetaClientOption); this.callback = callback; } @@ -109,6 +112,7 @@ public static class Builder extends AbstractHoodieLogRecordReader.Builder { private LogRecordScannerCallback callback; private boolean enableOptimizedLogBlocksScan; private HoodieRecordMerger recordMerger = HoodiePreCombineAvroRecordMerger.INSTANCE; + private HoodieTableMetaClient hoodieTableMetaClient; public Builder withFileSystem(FileSystem fs) { this.fs = fs; @@ -180,13 +184,20 @@ public Builder withRecordMerger(HoodieRecordMerger recordMerger) { return this; } + @Override + public HoodieUnMergedLogRecordScanner.Builder withTableMetaClient( + HoodieTableMetaClient hoodieTableMetaClient) { + this.hoodieTableMetaClient = hoodieTableMetaClient; + return this; + } + @Override public HoodieUnMergedLogRecordScanner build() { ValidationUtils.checkArgument(recordMerger != null); return new HoodieUnMergedLogRecordScanner(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize, callback, instantRange, - internalSchema, enableOptimizedLogBlocksScan, recordMerger); + internalSchema, enableOptimizedLogBlocksScan, recordMerger, Option.ofNullable(hoodieTableMetaClient)); } } } diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java index 31ec9806a3a7..a1dd3959f79e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java @@ -495,6 +495,7 @@ public Pair getLogRecordScanner(List readRecordKeysFromFileSlices(HoodieEngine engineType, Collections.emptyList(), // TODO: support different merger classes, which is currently only known to write config metaClient.getTableConfig().getRecordMergerStrategy())) + .withTableMetaClient(metaClient) .build(); ClosableIterator recordKeyIterator = ClosableIterator.wrap(mergedLogRecordScanner.getRecords().keySet().iterator()); return new ClosableIterator() { From 492daf0272fd5d2aa9cec4538b1504067ca9b6d9 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Wed, 24 Jan 2024 17:15:07 +0800 Subject: [PATCH 073/112] [HUDI-7311] Add implicit literal type conversion before filter push down (#10531) --- .../hudi/source/ExpressionPredicates.java | 4 +- .../hudi/util/ImplicitTypeConverter.java | 134 ++++++++++++++++++ .../hudi/source/TestExpressionPredicates.java | 61 ++++++++ 3 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ImplicitTypeConverter.java diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java index 8faf705a81f9..58ee59a81766 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/ExpressionPredicates.java @@ -26,6 +26,7 @@ import org.apache.flink.table.functions.BuiltInFunctionDefinitions; import org.apache.flink.table.functions.FunctionDefinition; import org.apache.flink.table.types.logical.LogicalType; +import org.apache.hudi.util.ImplicitTypeConverter; import org.apache.parquet.filter2.predicate.FilterPredicate; import org.apache.parquet.filter2.predicate.Operators; import org.slf4j.Logger; @@ -223,7 +224,8 @@ public ColumnPredicate bindValueLiteral(ValueLiteralExpression valueLiteral) { @Override public FilterPredicate filter() { - return toParquetPredicate(getFunctionDefinition(), literalType, columnName, literal); + Serializable convertedLiteral = ImplicitTypeConverter.convertImplicitly(literalType, literal); + return toParquetPredicate(getFunctionDefinition(), literalType, columnName, convertedLiteral); } /** diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ImplicitTypeConverter.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ImplicitTypeConverter.java new file mode 100644 index 000000000000..601b878655fc --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ImplicitTypeConverter.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.util; + +import org.apache.flink.table.types.logical.LogicalType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Serializable; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneOffset; +import java.time.temporal.ChronoField; + +/** + * Implicit type converter for predicates push down. + */ +public class ImplicitTypeConverter { + + private static final Logger LOG = LoggerFactory.getLogger(ImplicitTypeConverter.class); + + /** + * Convert the literal to the corresponding type. + * @param literalType The type of the literal. + * @param literal The literal value. + * @return The converted literal. + */ + public static Serializable convertImplicitly(LogicalType literalType, Serializable literal) { + try { + switch (literalType.getTypeRoot()) { + case BOOLEAN: + if (literal instanceof Boolean) { + return literal; + } else { + return Boolean.valueOf(String.valueOf(literal)); + } + case TINYINT: + case SMALLINT: + case INTEGER: + if (literal instanceof Integer) { + return literal; + } else { + return Integer.valueOf(String.valueOf(literal)); + } + case BIGINT: + if (literal instanceof Long) { + return literal; + } else if (literal instanceof Integer) { + return new Long((Integer) literal); + } else { + return Long.valueOf(String.valueOf(literal)); + } + case FLOAT: + if (literal instanceof Float) { + return literal; + } else { + return Float.valueOf(String.valueOf(literal)); + } + case DOUBLE: + if (literal instanceof Double) { + return literal; + } else { + return Double.valueOf(String.valueOf(literal)); + } + case BINARY: + case VARBINARY: + if (literal instanceof byte[]) { + return literal; + } else { + return String.valueOf(literal).getBytes(); + } + case DATE: + if (literal instanceof LocalDate) { + return (int) ((LocalDate) literal).toEpochDay(); + } else if (literal instanceof Integer) { + return literal; + } else if (literal instanceof Long) { + return ((Long) literal).intValue(); + } else { + return (int) LocalDate.parse(String.valueOf(literal)).toEpochDay(); + } + case CHAR: + case VARCHAR: + if (literal instanceof String) { + return literal; + } else { + return String.valueOf(literal); + } + case TIME_WITHOUT_TIME_ZONE: + if (literal instanceof LocalTime) { + return ((LocalTime) literal).get(ChronoField.MILLI_OF_DAY); + } else if (literal instanceof Integer) { + return literal; + } else if (literal instanceof Long) { + return ((Long) literal).intValue(); + } else { + return LocalTime.parse(String.valueOf(literal)).get(ChronoField.MILLI_OF_DAY); + } + case TIMESTAMP_WITHOUT_TIME_ZONE: + if (literal instanceof LocalDateTime) { + return ((LocalDateTime) literal).toInstant(ZoneOffset.UTC).toEpochMilli(); + } else if (literal instanceof Long) { + return literal; + } else if (literal instanceof Integer) { + return new Long((Integer) literal); + } else { + return LocalDateTime.parse(String.valueOf(literal)).toInstant(ZoneOffset.UTC).toEpochMilli(); + } + default: + return literal; + } + } catch (RuntimeException e) { + LOG.warn("Failed to convert literal [{}] to type [{}]. Use its original type", literal, literalType); + return literal; + } + } +} diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java index 02af3a85006a..869b69a1a2db 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestExpressionPredicates.java @@ -18,6 +18,7 @@ package org.apache.hudi.source; +import org.apache.flink.table.types.DataType; import org.apache.hudi.source.ExpressionPredicates.And; import org.apache.hudi.source.ExpressionPredicates.Equals; import org.apache.hudi.source.ExpressionPredicates.GreaterThan; @@ -41,11 +42,18 @@ import org.apache.parquet.filter2.predicate.Operators.IntColumn; import org.apache.parquet.filter2.predicate.Operators.Lt; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; import java.math.BigDecimal; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Stream; import static org.apache.hudi.source.ExpressionPredicates.fromExpression; import static org.apache.parquet.filter2.predicate.FilterApi.and; @@ -58,6 +66,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.not; import static org.apache.parquet.filter2.predicate.FilterApi.notEq; import static org.apache.parquet.filter2.predicate.FilterApi.or; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; @@ -66,6 +75,8 @@ */ public class TestExpressionPredicates { + private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with fieldName={0}, dataType={1}, literalValue={2}"; + @Test public void testFilterPredicateFromExpression() { FieldReferenceExpression fieldReference = new FieldReferenceExpression("f_int", DataTypes.INT(), 0, 0); @@ -182,4 +193,54 @@ public void testDisablePredicatesPushDownForUnsupportedType() { assertNull(Or.getInstance().bindPredicates(greaterThanPredicate, lessThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); assertNull(Not.getInstance().bindPredicate(greaterThanPredicate).filter(), "Decimal type push down is unsupported, so we expect null"); } + + public static Stream testColumnPredicateLiteralTypeConversionParams() { + return Stream.of( + Arguments.of("f_boolean", DataTypes.BOOLEAN(), Boolean.TRUE), + Arguments.of("f_boolean", DataTypes.BOOLEAN(), "true"), + Arguments.of("f_tinyint", DataTypes.TINYINT(), 12345), + Arguments.of("f_tinyint", DataTypes.TINYINT(), "12345"), + Arguments.of("f_smallint", DataTypes.SMALLINT(), 12345), + Arguments.of("f_smallint", DataTypes.SMALLINT(), "12345"), + Arguments.of("f_integer", DataTypes.INT(), 12345), + Arguments.of("f_integer", DataTypes.INT(), "12345"), + Arguments.of("f_bigint", DataTypes.BIGINT(), 12345L), + Arguments.of("f_bigint", DataTypes.BIGINT(), 12345), + Arguments.of("f_bigint", DataTypes.BIGINT(), "12345"), + Arguments.of("f_float", DataTypes.FLOAT(), 123.45f), + Arguments.of("f_float", DataTypes.FLOAT(), "123.45f"), + Arguments.of("f_double", DataTypes.DOUBLE(), 123.45), + Arguments.of("f_double", DataTypes.DOUBLE(), "123.45"), + Arguments.of("f_varbinary", DataTypes.VARBINARY(10), "a".getBytes()), + Arguments.of("f_varbinary", DataTypes.VARBINARY(10), "a"), + Arguments.of("f_binary", DataTypes.BINARY(10), "a".getBytes()), + Arguments.of("f_binary", DataTypes.BINARY(10), "a"), + Arguments.of("f_date", DataTypes.DATE(), LocalDate.now()), + Arguments.of("f_date", DataTypes.DATE(), 19740), + Arguments.of("f_date", DataTypes.DATE(), 19740L), + Arguments.of("f_date", DataTypes.DATE(), "2024-01-18"), + Arguments.of("f_char", DataTypes.CHAR(1), "a"), + Arguments.of("f_char", DataTypes.CHAR(1), 1), + Arguments.of("f_varchar", DataTypes.VARCHAR(1), "a"), + Arguments.of("f_varchar", DataTypes.VARCHAR(1), 1), + Arguments.of("f_time", DataTypes.TIME(), LocalTime.now()), + Arguments.of("f_time", DataTypes.TIME(), 12345), + Arguments.of("f_time", DataTypes.TIME(), 60981896000L), + Arguments.of("f_time", DataTypes.TIME(), "20:00:00"), + Arguments.of("f_timestamp", DataTypes.TIMESTAMP(), LocalDateTime.now()), + Arguments.of("f_timestamp", DataTypes.TIMESTAMP(), 12345), + Arguments.of("f_timestamp", DataTypes.TIMESTAMP(), 1705568913701L), + Arguments.of("f_timestamp", DataTypes.TIMESTAMP(), "2024-01-18T15:00:00") + ); + } + + @ParameterizedTest(name = TEST_NAME_WITH_PARAMS) + @MethodSource("testColumnPredicateLiteralTypeConversionParams") + public void testColumnPredicateLiteralTypeConversion(String fieldName, DataType dataType, Object literalValue) { + FieldReferenceExpression fieldReference = new FieldReferenceExpression(fieldName, dataType, 0, 0); + ValueLiteralExpression valueLiteral = new ValueLiteralExpression(literalValue); + + ExpressionPredicates.ColumnPredicate predicate = Equals.getInstance().bindFieldReference(fieldReference).bindValueLiteral(valueLiteral); + assertDoesNotThrow(predicate::filter, () -> String.format("Convert from %s to %s failed", literalValue.getClass().getName(), dataType)); + } } From 126010b803f0a29f28692fb05520d5c5e142486f Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Mon, 26 Feb 2024 10:12:59 -0800 Subject: [PATCH 074/112] [HUDI-7228] Fix eager closure of log reader input streams with log record reader (#10340) --- .../hudi/common/table/log/HoodieLogFileReader.java | 9 +++++---- .../hudi/common/table/log/HoodieLogFormatReader.java | 8 ++++---- .../hudi/common/table/log/block/HoodieDataBlock.java | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java index cf21ef5f42c8..42722228e4ab 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java @@ -343,9 +343,10 @@ private long scanForNextAvailableBlockOffset() throws IOException { @Override public void close() throws IOException { if (!closed) { - LOG.info("Closing Log file reader " + logFile.getFileName()); - this.inputStream.close(); - this.inputStream = null; + LOG.info("Closing Log file reader " + logFile.getFileName()); + if (null != this.inputStream) { + this.inputStream.close(); + } closed = true; } } @@ -483,7 +484,7 @@ private static FSDataInputStream getFSDataInputStream(FileSystem fs, try { fsDataInputStream = fs.open(logFile.getPath(), bufferSize); } catch (IOException e) { - throw new HoodieIOException("Exception create input stream from file: " + logFile, e); + throw new HoodieIOException("Exception creating input stream from file: " + logFile, e); } if (FSUtils.isGCSFileSystem(fs)) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java index 955f5485ed45..3c4737af8d0b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatReader.java @@ -40,7 +40,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader { private HoodieLogFileReader currentReader; private final FileSystem fs; private final Schema readerSchema; - private InternalSchema internalSchema; + private final InternalSchema internalSchema; private final boolean readBlocksLazily; private final String recordKeyField; private final boolean enableInlineReading; @@ -66,13 +66,14 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader { } } - @Override /** - * Closes latest reader. + * Closes any resources held */ + @Override public void close() throws IOException { if (currentReader != null) { currentReader.close(); + currentReader = null; } } @@ -119,5 +120,4 @@ public boolean hasPrev() { public HoodieLogBlock prev() throws IOException { return this.currentReader.prev(); } - } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java index e96704f6c6ad..874f7ebab25a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java @@ -18,14 +18,14 @@ package org.apache.hudi.common.table.log.block; +import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; -import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieIOException; import org.apache.avro.Schema; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hudi.common.model.HoodieRecord; import java.io.IOException; import java.util.HashSet; From 9002a02a2d8c4dfba30615f169bf577fb929e740 Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Mon, 26 Feb 2024 17:04:48 -0800 Subject: [PATCH 075/112] [HUDI-7298] Write bad records to error table in more cases instead of failing stream (#10500) Cases: - No transformers, with schema provider. Records will go to the error table if they cannot be rewritten in the deduced schema. - recordkey is null, even if the column is nullable in the schema --- .../hudi/config/HoodieErrorTableConfig.java | 6 ++ .../org/apache/hudi/HoodieSparkUtils.scala | 21 ++++ .../org/apache/hudi/avro/HoodieAvroUtils.java | 33 +++++- .../apache/hudi/TestHoodieSparkUtils.scala | 4 + .../hudi/utilities/streamer/ErrorEvent.java | 6 +- .../streamer/HoodieStreamerUtils.java | 68 ++++++++---- .../hudi/utilities/streamer/StreamSync.java | 19 +++- ...oodieDeltaStreamerSchemaEvolutionBase.java | 65 ++++++++++++ ...DeltaStreamerSchemaEvolutionExtensive.java | 100 +++++++++++++++++- ...odieDeltaStreamerSchemaEvolutionQuick.java | 15 ++- .../sources/TestGenericRddTransform.java | 29 +++++ .../testMissingRecordKey.json | 2 + 12 files changed, 334 insertions(+), 34 deletions(-) create mode 100644 hudi-utilities/src/test/resources/data/schema-evolution/testMissingRecordKey.json diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieErrorTableConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieErrorTableConfig.java index 68e2097c33be..8ba013b00eed 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieErrorTableConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieErrorTableConfig.java @@ -72,6 +72,12 @@ public class HoodieErrorTableConfig { .defaultValue(false) .withDocumentation("Records with schema mismatch with Target Schema are sent to Error Table."); + public static final ConfigProperty ERROR_ENABLE_VALIDATE_RECORD_CREATION = ConfigProperty + .key("hoodie.errortable.validate.recordcreation.enable") + .defaultValue(true) + .sinceVersion("0.14.2") + .withDocumentation("Records that fail to be created due to keygeneration failure or other issues will be sent to the Error Table"); + public static final ConfigProperty ERROR_TABLE_WRITE_FAILURE_STRATEGY = ConfigProperty .key("hoodie.errortable.write.failure.strategy") .defaultValue(ErrorWriteFailureStrategy.ROLLBACK_COMMIT.name()) diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala index 527864fcf244..535af8db1933 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala @@ -199,6 +199,27 @@ object HoodieSparkUtils extends SparkAdapterSupport with SparkVersionsSupport wi } } + /** + * Rerwite the record into the target schema. + * Return tuple of rewritten records and records that could not be converted + */ + def safeRewriteRDD(df: RDD[GenericRecord], serializedTargetSchema: String): Tuple2[RDD[GenericRecord], RDD[String]] = { + val rdds: RDD[Either[GenericRecord, String]] = df.mapPartitions { recs => + if (recs.isEmpty) { + Iterator.empty + } else { + val schema = new Schema.Parser().parse(serializedTargetSchema) + val transform: GenericRecord => Either[GenericRecord, String] = record => try { + Left(HoodieAvroUtils.rewriteRecordDeep(record, schema, true)) + } catch { + case _: Throwable => Right(HoodieAvroUtils.avroToJsonString(record, false)) + } + recs.map(transform) + } + } + (rdds.filter(_.isLeft).map(_.left.get), rdds.filter(_.isRight).map(_.right.get)) + } + def getCatalystRowSerDe(structType: StructType): SparkRowSerDe = { sparkAdapter.createSparkRowSerDe(structType) } diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 18f5b3631a07..4d95e697e0d4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -189,6 +189,16 @@ public static byte[] indexedRecordToBytes(T record) { } } + /** + * Convert a given avro record to json and return the string + * + * @param record The GenericRecord to convert + * @param pretty Whether to pretty-print the json output + */ + public static String avroToJsonString(GenericRecord record, boolean pretty) throws IOException { + return avroToJsonHelper(record, pretty).toString(); + } + /** * Convert a given avro record to json and return the encoded bytes. * @@ -196,12 +206,16 @@ public static byte[] indexedRecordToBytes(T record) { * @param pretty Whether to pretty-print the json output */ public static byte[] avroToJson(GenericRecord record, boolean pretty) throws IOException { + return avroToJsonHelper(record, pretty).toByteArray(); + } + + private static ByteArrayOutputStream avroToJsonHelper(GenericRecord record, boolean pretty) throws IOException { DatumWriter writer = new GenericDatumWriter<>(record.getSchema()); ByteArrayOutputStream out = new ByteArrayOutputStream(); JsonEncoder jsonEncoder = EncoderFactory.get().jsonEncoder(record.getSchema(), out, pretty); writer.write(record, jsonEncoder); jsonEncoder.flush(); - return out.toByteArray(); + return out; } /** @@ -330,6 +344,23 @@ public static String addMetadataColumnTypes(String hiveColumnTypes) { return "string,string,string,string,string," + hiveColumnTypes; } + public static Schema makeFieldNonNull(Schema schema, String fieldName, Object fieldDefaultValue) { + ValidationUtils.checkArgument(fieldDefaultValue != null); + List filteredFields = schema.getFields() + .stream() + .map(field -> { + if (Objects.equals(field.name(), fieldName)) { + return new Schema.Field(field.name(), AvroSchemaUtils.resolveNullableSchema(field.schema()), field.doc(), fieldDefaultValue); + } else { + return new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()); + } + }) + .collect(Collectors.toList()); + Schema withNonNullField = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false); + withNonNullField.setFields(filteredFields); + return withNonNullField; + } + private static Schema initRecordKeySchema() { Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE); diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala index 36ac37cfd6d4..15b6b2b35da7 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala @@ -228,6 +228,10 @@ object TestHoodieSparkUtils { }) } + def getSchemaColumnNotNullable(structType: StructType, columnName: String): StructType = { + setNullableRec(structType, columnName.split('.'), 0) + } + def setColumnNotNullable(df: DataFrame, columnName: String): DataFrame = { // get schema val schema = df.schema diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java index 714225f23ab1..f268464d6f1a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/ErrorEvent.java @@ -53,6 +53,10 @@ public enum ErrorReason { // Failure during hudi writes HUDI_WRITE_FAILURES, // Failure during transformation of source to target RDD - CUSTOM_TRANSFORMER_FAILURE + CUSTOM_TRANSFORMER_FAILURE, + // record schema is not valid for the table + INVALID_RECORD_SCHEMA, + // exception when attempting to create HoodieRecord + RECORD_CREATION } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java index a6f9513a14e3..44c367ba3843 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamerUtils.java @@ -31,9 +31,11 @@ import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieSparkRecord; import org.apache.hudi.common.model.WriteOperationType; +import org.apache.hudi.common.util.Either; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.CloseableMappingIterator; +import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.keygen.BuiltinKeyGenerator; import org.apache.hudi.keygen.KeyGenUtils; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; @@ -50,6 +52,7 @@ import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.StructType; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; @@ -58,6 +61,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.table.HoodieTableConfig.DROP_PARTITION_COLUMNS; +import static org.apache.hudi.config.HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_RECORD_CREATION; /** @@ -70,39 +74,49 @@ public class HoodieStreamerUtils { * Takes care of dropping columns, precombine, auto key generation. * Both AVRO and SPARK record types are supported. */ - static Option> createHoodieRecords(HoodieStreamer.Config cfg, TypedProperties props, Option> avroRDDOptional, - SchemaProvider schemaProvider, HoodieRecord.HoodieRecordType recordType, boolean autoGenerateRecordKeys, - String instantTime) { + public static Option> createHoodieRecords(HoodieStreamer.Config cfg, TypedProperties props, Option> avroRDDOptional, + SchemaProvider schemaProvider, HoodieRecord.HoodieRecordType recordType, boolean autoGenerateRecordKeys, + String instantTime, Option errorTableWriter) { boolean shouldCombine = cfg.filterDupes || cfg.operation.equals(WriteOperationType.UPSERT); + boolean shouldErrorTable = errorTableWriter.isPresent() && props.getBoolean(ERROR_ENABLE_VALIDATE_RECORD_CREATION.key(), ERROR_ENABLE_VALIDATE_RECORD_CREATION.defaultValue()); Set partitionColumns = getPartitionColumns(props); return avroRDDOptional.map(avroRDD -> { - JavaRDD records; SerializableSchema avroSchema = new SerializableSchema(schemaProvider.getTargetSchema()); SerializableSchema processedAvroSchema = new SerializableSchema(isDropPartitionColumns(props) ? HoodieAvroUtils.removeMetadataFields(avroSchema.get()) : avroSchema.get()); + JavaRDD> records; if (recordType == HoodieRecord.HoodieRecordType.AVRO) { records = avroRDD.mapPartitions( - (FlatMapFunction, HoodieRecord>) genericRecordIterator -> { + (FlatMapFunction, Either>) genericRecordIterator -> { if (autoGenerateRecordKeys) { props.setProperty(KeyGenUtils.RECORD_KEY_GEN_PARTITION_ID_CONFIG, String.valueOf(TaskContext.getPartitionId())); props.setProperty(KeyGenUtils.RECORD_KEY_GEN_INSTANT_TIME_CONFIG, instantTime); } BuiltinKeyGenerator builtinKeyGenerator = (BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(props); - List avroRecords = new ArrayList<>(); + List> avroRecords = new ArrayList<>(); while (genericRecordIterator.hasNext()) { GenericRecord genRec = genericRecordIterator.next(); - HoodieKey hoodieKey = new HoodieKey(builtinKeyGenerator.getRecordKey(genRec), builtinKeyGenerator.getPartitionPath(genRec)); - GenericRecord gr = isDropPartitionColumns(props) ? HoodieAvroUtils.removeFields(genRec, partitionColumns) : genRec; - HoodieRecordPayload payload = shouldCombine ? DataSourceUtils.createPayload(cfg.payloadClassName, gr, - (Comparable) HoodieAvroUtils.getNestedFieldVal(gr, cfg.sourceOrderingField, false, props.getBoolean( - KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), - Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue())))) - : DataSourceUtils.createPayload(cfg.payloadClassName, gr); - avroRecords.add(new HoodieAvroRecord<>(hoodieKey, payload)); + try { + HoodieKey hoodieKey = new HoodieKey(builtinKeyGenerator.getRecordKey(genRec), builtinKeyGenerator.getPartitionPath(genRec)); + GenericRecord gr = isDropPartitionColumns(props) ? HoodieAvroUtils.removeFields(genRec, partitionColumns) : genRec; + HoodieRecordPayload payload = shouldCombine ? DataSourceUtils.createPayload(cfg.payloadClassName, gr, + (Comparable) HoodieAvroUtils.getNestedFieldVal(gr, cfg.sourceOrderingField, false, props.getBoolean( + KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), + Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue())))) + : DataSourceUtils.createPayload(cfg.payloadClassName, gr); + avroRecords.add(Either.left(new HoodieAvroRecord<>(hoodieKey, payload))); + } catch (Exception e) { + if (!shouldErrorTable) { + throw e; + } + avroRecords.add(Either.right(HoodieAvroUtils.avroToJsonString(genRec, false))); + } } return avroRecords.iterator(); }); + } else if (recordType == HoodieRecord.HoodieRecordType.SPARK) { // TODO we should remove it if we can read InternalRow from source. + records = avroRDD.mapPartitions(itr -> { if (autoGenerateRecordKeys) { props.setProperty(KeyGenUtils.RECORD_KEY_GEN_PARTITION_ID_CONFIG, String.valueOf(TaskContext.getPartitionId())); @@ -116,16 +130,32 @@ static Option> createHoodieRecords(HoodieStreamer.Config c return new CloseableMappingIterator<>(ClosableIterator.wrap(itr), rec -> { InternalRow row = (InternalRow) deserializer.deserialize(rec).get(); - String recordKey = builtinKeyGenerator.getRecordKey(row, baseStructType).toString(); - String partitionPath = builtinKeyGenerator.getPartitionPath(row, baseStructType).toString(); - return new HoodieSparkRecord(new HoodieKey(recordKey, partitionPath), - HoodieInternalRowUtils.getCachedUnsafeProjection(baseStructType, targetStructType).apply(row), targetStructType, false); + try { + String recordKey = builtinKeyGenerator.getRecordKey(row, baseStructType).toString(); + String partitionPath = builtinKeyGenerator.getPartitionPath(row, baseStructType).toString(); + return Either.left(new HoodieSparkRecord(new HoodieKey(recordKey, partitionPath), + HoodieInternalRowUtils.getCachedUnsafeProjection(baseStructType, targetStructType).apply(row), targetStructType, false)); + } catch (Exception e) { + if (!shouldErrorTable) { + throw e; + } + try { + return Either.right(HoodieAvroUtils.avroToJsonString(rec, false)); + } catch (IOException ex) { + throw new HoodieIOException("Failed to convert illegal record to json", ex); + } + } }); + }); } else { throw new UnsupportedOperationException(recordType.name()); } - return records; + if (shouldErrorTable) { + errorTableWriter.get().addErrorEvents(records.filter(Either::isRight).map(Either::asRight).map(evStr -> new ErrorEvent<>(evStr, + ErrorEvent.ErrorReason.RECORD_CREATION))); + } + return records.filter(Either::isLeft).map(Either::asLeft); }); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index 3ce82b9fe9ff..eb648e49ff53 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -544,7 +544,7 @@ private InputBatch fetchFromSourceAndPrepareRecords(Option resumeCheckpo return inputBatch; } else { Option> recordsOpt = HoodieStreamerUtils.createHoodieRecords(cfg, props, inputBatch.getBatch(), schemaProvider, - recordType, autoGenerateRecordKeys, instantTime); + recordType, autoGenerateRecordKeys, instantTime, errorTableWriter); return new InputBatch(recordsOpt, checkpointStr, schemaProvider); } } @@ -632,8 +632,21 @@ private InputBatch fetchNextBatchFromSource(Option resumeCheckpointStr, // Rewrite transformed records into the expected target schema schemaProvider = getDeducedSchemaProvider(dataAndCheckpoint.getSchemaProvider().getTargetSchema(), dataAndCheckpoint.getSchemaProvider(), metaClient); String serializedTargetSchema = schemaProvider.getTargetSchema().toString(); - avroRDDOptional = dataAndCheckpoint.getBatch().map(t -> t.mapPartitions(iterator -> - new LazyCastingIterator(iterator, serializedTargetSchema))); + if (errorTableWriter.isPresent() + && props.getBoolean(HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_TARGET_SCHEMA.key(), + HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_TARGET_SCHEMA.defaultValue())) { + avroRDDOptional = dataAndCheckpoint.getBatch().map( + records -> { + Tuple2, RDD> safeCreateRDDs = HoodieSparkUtils.safeRewriteRDD(records.rdd(), serializedTargetSchema); + errorTableWriter.get().addErrorEvents(safeCreateRDDs._2().toJavaRDD() + .map(evStr -> new ErrorEvent<>(evStr, + ErrorEvent.ErrorReason.INVALID_RECORD_SCHEMA))); + return safeCreateRDDs._1.toJavaRDD(); + }); + } else { + avroRDDOptional = dataAndCheckpoint.getBatch().map(t -> t.mapPartitions(iterator -> + new LazyCastingIterator(iterator, serializedTargetSchema))); + } } } if (useRowWriter) { diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionBase.java index 87dc5b89da06..a0ba7d4a4019 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionBase.java @@ -22,29 +22,37 @@ import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.HoodieSparkUtils; +import org.apache.hudi.TestHoodieSparkUtils; import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieClusteringConfig; import org.apache.hudi.config.HoodieCompactionConfig; +import org.apache.hudi.config.HoodieErrorTableConfig; import org.apache.hudi.utilities.schema.FilebasedSchemaProvider; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hudi.utilities.sources.AvroKafkaSource; import org.apache.hudi.utilities.sources.ParquetDFSSource; +import org.apache.hudi.utilities.streamer.BaseErrorTableWriter; import org.apache.hudi.utilities.streamer.HoodieStreamer; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileSystem; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Metadata; @@ -58,8 +66,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.Set; @@ -77,6 +87,7 @@ public class TestHoodieDeltaStreamerSchemaEvolutionBase extends HoodieDeltaStrea protected String tableType; protected String tableBasePath; + protected String tableName; protected Boolean shouldCluster; protected Boolean shouldCompact; protected Boolean rowWriterEnable; @@ -87,6 +98,7 @@ public class TestHoodieDeltaStreamerSchemaEvolutionBase extends HoodieDeltaStrea protected String sourceSchemaFile; protected String targetSchemaFile; protected boolean useKafkaSource; + protected boolean withErrorTable; protected boolean useTransformer; protected boolean userProvidedSchema; @@ -98,8 +110,11 @@ public static void initKafka() { @BeforeEach public void setupTest() { super.setupTest(); + TestErrorTable.commited = new HashMap<>(); + TestErrorTable.errorEvents = new ArrayList<>(); useSchemaProvider = false; hasTransformer = false; + withErrorTable = false; sourceSchemaFile = ""; targetSchemaFile = ""; topicName = "topic" + testNum; @@ -164,6 +179,16 @@ protected HoodieDeltaStreamer.Config getDeltaStreamerConfig(String[] transformer extraProps.setProperty(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key(), "_row_key"); } + if (withErrorTable) { + extraProps.setProperty(HoodieErrorTableConfig.ERROR_TABLE_ENABLED.key(), "true"); + extraProps.setProperty(HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_TARGET_SCHEMA.key(), "true"); + extraProps.setProperty(HoodieErrorTableConfig.ERROR_ENABLE_VALIDATE_RECORD_CREATION.key(), "true"); + extraProps.setProperty(HoodieErrorTableConfig.ERROR_TARGET_TABLE.key(), tableName + "ERROR"); + extraProps.setProperty(HoodieErrorTableConfig.ERROR_TABLE_BASE_PATH.key(), basePath + tableName + "ERROR"); + extraProps.setProperty(HoodieErrorTableConfig.ERROR_TABLE_WRITE_CLASS.key(), TestErrorTable.class.getName()); + extraProps.setProperty("hoodie.base.path", tableBasePath); + } + List transformerClassNames = new ArrayList<>(); Collections.addAll(transformerClassNames, transformerClasses); @@ -186,6 +211,9 @@ protected HoodieDeltaStreamer.Config getDeltaStreamerConfig(String[] transformer protected void addData(Dataset df, Boolean isFirst) { if (useSchemaProvider) { TestSchemaProvider.sourceSchema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema(), HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE); + if (withErrorTable && isFirst) { + TestSchemaProvider.setTargetSchema(AvroConversionUtils.convertStructTypeToAvroSchema(TestHoodieSparkUtils.getSchemaColumnNotNullable(df.schema(), "_row_key"),"idk", "idk")); + } } if (useKafkaSource) { addKafkaData(df, isFirst); @@ -293,4 +321,41 @@ public static void resetTargetSchema() { TestSchemaProvider.targetSchema = null; } } + + public static class TestErrorTable extends BaseErrorTableWriter { + + public static List errorEvents = new ArrayList<>(); + public static Map> commited = new HashMap<>(); + public TestErrorTable(HoodieStreamer.Config cfg, SparkSession sparkSession, TypedProperties props, HoodieSparkEngineContext hoodieSparkContext, + FileSystem fs) { + super(cfg, sparkSession, props, hoodieSparkContext, fs); + } + + @Override + public void addErrorEvents(JavaRDD errorEvent) { + errorEvents.add(errorEvent); + } + + @Override + public boolean upsertAndCommit(String baseTableInstantTime, Option commitedInstantTime) { + if (errorEvents.size() > 0) { + JavaRDD errorsCombined = errorEvents.get(0); + for (int i = 1; i < errorEvents.size(); i++) { + errorsCombined = errorsCombined.union(errorEvents.get(i)); + } + commited.put(baseTableInstantTime, Option.of(errorsCombined)); + errorEvents = new ArrayList<>(); + + } else { + commited.put(baseTableInstantTime, Option.empty()); + } + return true; + } + + @Override + public Option> getErrorEvents(String baseTableInstantTime, Option commitedInstantTime) { + return Option.empty(); + } + } + } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionExtensive.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionExtensive.java index 723971f6fa1f..0def43fd4b67 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionExtensive.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionExtensive.java @@ -20,7 +20,10 @@ package org.apache.hudi.utilities.deltastreamer; import org.apache.hudi.TestHoodieSparkUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.utilities.streamer.ErrorEvent; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.sql.Column; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -31,7 +34,9 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -45,16 +50,24 @@ public class TestHoodieDeltaStreamerSchemaEvolutionExtensive extends TestHoodieDeltaStreamerSchemaEvolutionBase { protected void testBase(String updateFile, String updateColumn, String condition, int count) throws Exception { + testBase(updateFile, updateColumn, condition, count, null); + } + + protected void testBase(String updateFile, String updateColumn, String condition, int count, ErrorEvent.ErrorReason reason) throws Exception { Map conditions = new HashMap<>(); conditions.put(condition, count); - testBase(updateFile, updateColumn, conditions, true); + testBase(updateFile, updateColumn, conditions, true, reason); //adding non-nullable cols should fail, but instead it is adding nullable cols //assertThrows(Exception.class, () -> testBase(tableType, shouldCluster, shouldCompact, reconcileSchema, rowWriterEnable, updateFile, updateColumn, condition, count, false)); } protected void testBase(String updateFile, String updateColumn, Map conditions) throws Exception { - testBase(updateFile, updateColumn, conditions, true); + testBase(updateFile, updateColumn, conditions, null); + } + + protected void testBase(String updateFile, String updateColumn, Map conditions, ErrorEvent.ErrorReason reason) throws Exception { + testBase(updateFile, updateColumn, conditions, true, reason); } protected void doFirstDeltaWrite() throws Exception { @@ -100,10 +113,11 @@ protected void doDeltaWriteBase(String resourceString, Boolean isFirst, Boolean /** * Main testing logic for non-type promotion tests */ - protected void testBase(String updateFile, String updateColumn, Map conditions, Boolean nullable) throws Exception { + protected void testBase(String updateFile, String updateColumn, Map conditions, Boolean nullable, ErrorEvent.ErrorReason reason) throws Exception { boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + testNum++; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; this.deltaStreamer = new HoodieDeltaStreamer(getDeltaStreamerConfig(), jsc); //first write @@ -149,6 +163,8 @@ protected void testBase(String updateFile, String updateColumn, Map recs = new ArrayList<>(); + for (String key : TestErrorTable.commited.keySet()) { + Option errors = TestErrorTable.commited.get(key); + if (errors.isPresent()) { + if (!errors.get().isEmpty()) { + recs.addAll(errors.get().collect()); + } + } + } + assertEquals(1, recs.size()); + assertEquals(recs.get(0).getReason(), reason); + } } protected static Stream testArgs() { @@ -183,6 +212,66 @@ protected static Stream testArgs() { return b.build(); } + @ParameterizedTest + @MethodSource("testArgs") + public void testErrorTable(String tableType, + Boolean shouldCluster, + Boolean shouldCompact, + Boolean rowWriterEnable, + Boolean addFilegroups, + Boolean multiLogFiles) throws Exception { + this.withErrorTable = true; + this.useSchemaProvider = false; + this.useTransformer = false; + this.tableType = tableType; + this.shouldCluster = shouldCluster; + this.shouldCompact = shouldCompact; + this.rowWriterEnable = rowWriterEnable; + this.addFilegroups = addFilegroups; + this.multiLogFiles = multiLogFiles; + testBase("testMissingRecordKey.json", "driver", "driver = 'driver-003'", 1, ErrorEvent.ErrorReason.RECORD_CREATION); + } + + @ParameterizedTest + @MethodSource("testArgs") + public void testErrorTableWithSchemaProvider(String tableType, + Boolean shouldCluster, + Boolean shouldCompact, + Boolean rowWriterEnable, + Boolean addFilegroups, + Boolean multiLogFiles) throws Exception { + this.withErrorTable = true; + this.useSchemaProvider = true; + this.useTransformer = false; + this.tableType = tableType; + this.shouldCluster = shouldCluster; + this.shouldCompact = shouldCompact; + this.rowWriterEnable = rowWriterEnable; + this.addFilegroups = addFilegroups; + this.multiLogFiles = multiLogFiles; + testBase("testMissingRecordKey.json", "driver", "driver = 'driver-003'", 1, ErrorEvent.ErrorReason.INVALID_RECORD_SCHEMA); + } + + @ParameterizedTest + @MethodSource("testArgs") + public void testErrorTableWithTransformer(String tableType, + Boolean shouldCluster, + Boolean shouldCompact, + Boolean rowWriterEnable, + Boolean addFilegroups, + Boolean multiLogFiles) throws Exception { + this.withErrorTable = true; + this.useSchemaProvider = true; + this.useTransformer = true; + this.tableType = tableType; + this.shouldCluster = shouldCluster; + this.shouldCompact = shouldCompact; + this.rowWriterEnable = rowWriterEnable; + this.addFilegroups = addFilegroups; + this.multiLogFiles = multiLogFiles; + testBase("testMissingRecordKey.json", "driver", "driver = 'driver-003'", 1, ErrorEvent.ErrorReason.AVRO_DESERIALIZATION_FAILURE); + } + /** * Add a new column at root level at the end */ @@ -367,7 +456,8 @@ protected void testTypeDemotionBase(String colName, DataType startType, DataType protected void testTypePromotionBase(String colName, DataType startType, DataType updateType, DataType endType) throws Exception { boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + testNum++; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; this.deltaStreamer = new HoodieDeltaStreamer(getDeltaStreamerConfig(), jsc); //first write diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java index 81f27eec7fb8..eee30c844111 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamerSchemaEvolutionQuick.java @@ -156,7 +156,8 @@ public void testBase(String tableType, this.useTransformer = true; boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + ++testNum; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; this.deltaStreamer = new HoodieDeltaStreamer(getDeltaStreamerConfig(allowNullForDeletedCols), jsc); //first write @@ -282,7 +283,8 @@ public void testReorderingColumn(String tableType, boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + ++testNum; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; //first write String datapath = String.class.getResource("/data/schema-evolution/startTestEverything.json").getPath(); @@ -352,7 +354,8 @@ public void testDroppedColumn(String tableType, boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + ++testNum; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; //first write String datapath = String.class.getResource("/data/schema-evolution/startTestEverything.json").getPath(); @@ -430,7 +433,8 @@ public void testTypePromotion(String tableType, boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + ++testNum; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; //first write String datapath = String.class.getResource("/data/schema-evolution/startTestEverything.json").getPath(); @@ -517,7 +521,8 @@ public void testTypeDemotion(String tableType, boolean isCow = tableType.equals("COPY_ON_WRITE"); PARQUET_SOURCE_ROOT = basePath + "parquetFilesDfs" + ++testNum; - tableBasePath = basePath + "test_parquet_table" + testNum; + tableName = "test_parquet_table" + testNum; + tableBasePath = basePath + tableName; //first write String datapath = String.class.getResource("/data/schema-evolution/startTestEverything.json").getPath(); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGenericRddTransform.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGenericRddTransform.java index 78bc21ecf92b..8adfdb4dc377 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGenericRddTransform.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestGenericRddTransform.java @@ -20,11 +20,13 @@ import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.HoodieSparkUtils; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; import org.apache.hudi.testutils.SparkClientFunctionalTestHarness; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; +import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.rdd.RDD; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.types.DataTypes; @@ -33,8 +35,11 @@ import org.apache.spark.sql.types.StructType; import org.junit.jupiter.api.Test; +import java.util.List; + import scala.Tuple2; +import static org.apache.hudi.avro.HoodieAvroUtils.makeFieldNonNull; import static org.apache.spark.sql.functions.expr; import static org.apache.spark.sql.functions.lit; import static org.apache.spark.sql.functions.when; @@ -54,4 +59,28 @@ public void testGenericRddTransform() { assertEquals(5, failSafeRdds._1.count()); assertEquals(5, failSafeRdds._2.count()); } + + @Test + public void testGenericRddConvert() { + String fieldToNull = "partition_path"; + String schemaStr = makeFieldNonNull(HoodieTestDataGenerator.AVRO_SCHEMA, fieldToNull, "").toString(); + HoodieTestDataGenerator datagen = new HoodieTestDataGenerator(); + List recs = datagen.generateGenericRecords(10); + for (int i = 0; i < recs.size(); i++) { + if (i % 2 == 0) { + recs.get(i).put(fieldToNull, null); + } + } + JavaSparkContext jsc = jsc(); + RDD rdd = jsc.parallelize(recs).rdd(); + Tuple2, RDD> failSafeRdds = HoodieSparkUtils.safeRewriteRDD(rdd, schemaStr); + assertEquals(5, failSafeRdds._1.count()); + assertEquals(5, failSafeRdds._2.count()); + + //if field is nullable, no records should fail validation + failSafeRdds = HoodieSparkUtils.safeRewriteRDD(rdd, HoodieTestDataGenerator.AVRO_SCHEMA.toString()); + assertEquals(10, failSafeRdds._1.count()); + assertEquals(0, failSafeRdds._2.count()); + } + } diff --git a/hudi-utilities/src/test/resources/data/schema-evolution/testMissingRecordKey.json b/hudi-utilities/src/test/resources/data/schema-evolution/testMissingRecordKey.json new file mode 100644 index 000000000000..c3b65587e2d1 --- /dev/null +++ b/hudi-utilities/src/test/resources/data/schema-evolution/testMissingRecordKey.json @@ -0,0 +1,2 @@ +{"timestamp":3,"_row_key":"154fee81-6e2a-4c32-94f5-be5c456fdd0a","partition_path":"2016/03/15","trip_type":"BLACK","rider":"rider-003","driver":"driver-003","begin_lat":0.21927838567558522,"begin_lon":0.5594020723099724,"end_lat":0.7161653985926594,"end_lon":0.49716798979953447,"distance_in_meters":936143957,"seconds_since_epoch":3794105168659998336,"weight":0.18520206,"nation":"three","current_date":"1970-01-15","current_ts":1244853103,"height":0.272661,"city_to_state":{"LA":"CA"},"fare":{"amount":12.671341480371346,"currency":"USD"},"tip_history":[{"amount":90.26735894145568,"currency":"USD"}],"_hoodie_is_deleted":false} +{"timestamp":3,"_row_key":null,"partition_path":"2015/03/16","trip_type":"BLACK","rider":"rider-003","driver":"driver-003","begin_lat":0.7471407629318884,"begin_lon":0.8776437421395643,"end_lat":0.9648524370990681,"end_lon":0.3911456751705831,"distance_in_meters":1137109733,"seconds_since_epoch":5028439681953251637,"weight":0.023411155,"nation":"three","current_date":"1970-01-12","current_ts":986645693,"height":0.898042,"city_to_state":{"LA":"CA"},"fare":{"amount":85.97606478430822,"currency":"USD"},"tip_history":[{"amount":13.7534224373558,"currency":"USD"}],"_hoodie_is_deleted":false} From 31adbb92fe17639c5904ef04823bd30bcc9750d1 Mon Sep 17 00:00:00 2001 From: Tim Brown Date: Wed, 24 Jan 2024 23:24:51 -0600 Subject: [PATCH 076/112] [HUDI-7323] Use a schema supplier instead of a static value (#10549) --- .../apache/hudi/utilities/UtilHelpers.java | 7 +++-- .../hudi/utilities/streamer/StreamSync.java | 15 ++++------ .../transform/ChainedTransformer.java | 12 ++++---- .../ErrorTableAwareChainedTransformer.java | 5 ++-- .../functional/TestChainedTransformer.java | 29 +++++++++++++++++-- ...TestErrorTableAwareChainedTransformer.java | 4 +-- 6 files changed, 48 insertions(+), 24 deletions(-) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java index 9d15f14584df..2881b72c47d9 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java @@ -109,6 +109,7 @@ import java.util.Objects; import java.util.Properties; import java.util.function.Function; +import java.util.function.Supplier; import static org.apache.hudi.common.util.ConfigUtils.getBooleanWithAltKeys; import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys; @@ -206,13 +207,13 @@ public static StructType getSourceSchema(SchemaProvider schemaProvider) { return null; } - public static Option createTransformer(Option> classNamesOpt, Option sourceSchema, + public static Option createTransformer(Option> classNamesOpt, Supplier> sourceSchemaSupplier, boolean isErrorTableWriterEnabled) throws IOException { try { Function, Transformer> chainedTransformerFunction = classNames -> - isErrorTableWriterEnabled ? new ErrorTableAwareChainedTransformer(classNames, sourceSchema) - : new ChainedTransformer(classNames, sourceSchema); + isErrorTableWriterEnabled ? new ErrorTableAwareChainedTransformer(classNames, sourceSchemaSupplier) + : new ChainedTransformer(classNames, sourceSchemaSupplier); return classNamesOpt.map(classNames -> classNames.isEmpty() ? null : chainedTransformerFunction.apply(classNames)); } catch (Throwable e) { throw new IOException("Could not load transformer class(es) " + classNamesOpt.get(), e); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index eb648e49ff53..4db7e622cfb1 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -123,6 +123,7 @@ import java.util.Map; import java.util.Objects; import java.util.function.Function; +import java.util.function.Supplier; import java.util.stream.Collectors; import scala.Tuple2; @@ -287,15 +288,11 @@ public StreamSync(HoodieStreamer.Config cfg, SparkSession sparkSession, SchemaPr Source source = UtilHelpers.createSource(cfg.sourceClassName, props, hoodieSparkContext.jsc(), sparkSession, schemaProvider, metrics); this.formatAdapter = new SourceFormatAdapter(source, this.errorTableWriter, Option.of(props)); - this.transformer = UtilHelpers.createTransformer(Option.ofNullable(cfg.transformerClassNames), - Option.ofNullable(schemaProvider).map(SchemaProvider::getSourceSchema), this.errorTableWriter.isPresent()); - if (this.cfg.operation == WriteOperationType.BULK_INSERT && source.getSourceType() == Source.SourceType.ROW - && this.props.getBoolean(DataSourceWriteOptions.ENABLE_ROW_WRITER().key(), false)) { - // enable row writer only when operation is BULK_INSERT, and source is ROW type and if row writer is not explicitly disabled. - this.useRowWriter = true; - } else { - this.useRowWriter = false; - } + Supplier> schemaSupplier = schemaProvider == null ? Option::empty : () -> Option.ofNullable(schemaProvider.getSourceSchema()); + this.transformer = UtilHelpers.createTransformer(Option.ofNullable(cfg.transformerClassNames), schemaSupplier, this.errorTableWriter.isPresent()); + // enable row writer only when operation is BULK_INSERT, and source is ROW type and if row writer is not explicitly disabled. + this.useRowWriter = this.cfg.operation == WriteOperationType.BULK_INSERT && source.getSourceType() == Source.SourceType.ROW + && this.props.getBoolean(DataSourceWriteOptions.ENABLE_ROW_WRITER().key(), false); } /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java index 4ff7dd6e1c2a..4d5276998b12 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java @@ -40,6 +40,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.function.Supplier; import java.util.stream.Collectors; /** @@ -51,26 +52,26 @@ public class ChainedTransformer implements Transformer { private static final String ID_TRANSFORMER_CLASS_NAME_DELIMITER = ":"; protected final List transformers; - private final Option sourceSchemaOpt; + private final Supplier> sourceSchemaSupplier; public ChainedTransformer(List transformersList) { this.transformers = new ArrayList<>(transformersList.size()); for (Transformer transformer : transformersList) { this.transformers.add(new TransformerInfo(transformer)); } - this.sourceSchemaOpt = Option.empty(); + this.sourceSchemaSupplier = Option::empty; } /** * Creates a chained transformer using the input transformer class names. Refer {@link HoodieStreamer.Config#transformerClassNames} * for more information on how the transformers can be configured. * - * @param sourceSchemaOpt Schema from the dataset the transform is applied to + * @param sourceSchemaSupplier Supplies the schema (if schema provider is present) for the dataset the transform is applied to * @param configuredTransformers List of configured transformer class names. */ - public ChainedTransformer(List configuredTransformers, Option sourceSchemaOpt) { + public ChainedTransformer(List configuredTransformers, Supplier> sourceSchemaSupplier) { this.transformers = new ArrayList<>(configuredTransformers.size()); - this.sourceSchemaOpt = sourceSchemaOpt; + this.sourceSchemaSupplier = sourceSchemaSupplier; Set identifiers = new HashSet<>(); for (String configuredTransformer : configuredTransformers) { @@ -120,6 +121,7 @@ private void validateIdentifier(String id, Set identifiers, String confi private StructType getExpectedTransformedSchema(TransformerInfo transformerInfo, JavaSparkContext jsc, SparkSession sparkSession, Option incomingStructOpt, Option> rowDatasetOpt, TypedProperties properties) { + Option sourceSchemaOpt = sourceSchemaSupplier.get(); if (!sourceSchemaOpt.isPresent() && !rowDatasetOpt.isPresent()) { throw new HoodieTransformPlanException("Either source schema or source dataset should be available to fetch the schema"); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ErrorTableAwareChainedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ErrorTableAwareChainedTransformer.java index 122f563d6982..4d18ea9f11ba 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ErrorTableAwareChainedTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ErrorTableAwareChainedTransformer.java @@ -31,6 +31,7 @@ import org.apache.spark.sql.types.StructType; import java.util.List; +import java.util.function.Supplier; /** * A {@link Transformer} to chain other {@link Transformer}s and apply sequentially. @@ -38,8 +39,8 @@ * if that column is not dropped in any of the transformations. */ public class ErrorTableAwareChainedTransformer extends ChainedTransformer { - public ErrorTableAwareChainedTransformer(List configuredTransformers, Option sourceSchemaOpt) { - super(configuredTransformers, sourceSchemaOpt); + public ErrorTableAwareChainedTransformer(List configuredTransformers, Supplier> sourceSchemaSupplier) { + super(configuredTransformers, sourceSchemaSupplier); } public ErrorTableAwareChainedTransformer(List transformers) { diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestChainedTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestChainedTransformer.java index e3ec9d47fb05..cb4bffd7e823 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestChainedTransformer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestChainedTransformer.java @@ -26,6 +26,7 @@ import org.apache.hudi.utilities.transform.ChainedTransformer; import org.apache.hudi.utilities.transform.Transformer; +import org.apache.avro.Schema; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; @@ -39,13 +40,17 @@ import java.util.Arrays; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.AVRO_SCHEMA; import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.NESTED_AVRO_SCHEMA; import static org.apache.spark.sql.types.DataTypes.IntegerType; import static org.apache.spark.sql.types.DataTypes.StringType; import static org.apache.spark.sql.types.DataTypes.createStructField; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -88,7 +93,7 @@ public void testChainedTransformation() { }) public void testChainedTransformerValidationFails(String transformerName) { try { - ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), Option.empty()); + ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), Option::empty); fail(); } catch (Exception e) { assertTrue(e instanceof HoodieTransformPlanException, e.getMessage()); @@ -103,18 +108,36 @@ public void testChainedTransformerValidationFails(String transformerName) { "org.apache.hudi.utilities.transform.FlatteningTransformer,org.apache.hudi.utilities.transform.FlatteningTransformer" }) public void testChainedTransformerValidationPasses(String transformerName) { - ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), Option.empty()); + ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), Option::empty); assertNotNull(transformer); } @Test public void testChainedTransformerTransformedSchema() { String transformerName = "org.apache.hudi.utilities.transform.FlatteningTransformer"; - ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), Option.of(NESTED_AVRO_SCHEMA)); + ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), () -> Option.of(NESTED_AVRO_SCHEMA)); StructType transformedSchema = transformer.transformedSchema(jsc(), spark(), null, new TypedProperties()); // Verify transformed nested fields are present in the transformed schema assertTrue(Arrays.asList(transformedSchema.fieldNames()).contains("fare_amount")); assertTrue(Arrays.asList(transformedSchema.fieldNames()).contains("fare_currency")); assertNotNull(transformer); } + + @Test + public void assertSchemaSupplierIsCalledPerInvocationOfTransformedSchema() { + String transformerName = "org.apache.hudi.utilities.transform.FlatteningTransformer"; + AtomicInteger count = new AtomicInteger(0); + Supplier> schemaSupplier = () -> { + if (count.getAndIncrement() == 0) { + return Option.of(AVRO_SCHEMA); + } else { + return Option.of(NESTED_AVRO_SCHEMA); + } + }; + ChainedTransformer transformer = new ChainedTransformer(Arrays.asList(transformerName.split(",")), schemaSupplier); + StructType transformedSchema1 = transformer.transformedSchema(jsc(), spark(), null, new TypedProperties()); + StructType transformedSchema2 = transformer.transformedSchema(jsc(), spark(), null, new TypedProperties()); + assertNotEquals(transformedSchema1, transformedSchema2); + assertEquals(2, count.get()); + } } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestErrorTableAwareChainedTransformer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestErrorTableAwareChainedTransformer.java index bdd83ed61d30..08074e6d6789 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestErrorTableAwareChainedTransformer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestErrorTableAwareChainedTransformer.java @@ -129,7 +129,7 @@ private Transformer getErrorRecordColumnDropTransformer() { }) public void testErrorTableAwareChainedTransformerValidationFails(String transformerName) { assertThrows(HoodieTransformException.class, - () -> new ErrorTableAwareChainedTransformer(Arrays.asList(transformerName.split(",")), Option.empty())); + () -> new ErrorTableAwareChainedTransformer(Arrays.asList(transformerName.split(",")), Option::empty)); } @ParameterizedTest @@ -141,7 +141,7 @@ public void testErrorTableAwareChainedTransformerValidationFails(String transfor }) public void testErrorTableAwareChainedTransformerValidationPasses(String transformerName) { ErrorTableAwareChainedTransformer transformer = new ErrorTableAwareChainedTransformer(Arrays.asList(transformerName.split(",")), - Option.empty()); + Option::empty); assertNotNull(transformer); } } From 6f27d81c1690fe907c1ab685fb0f4d7e45c12762 Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Mon, 26 Feb 2024 17:15:09 -0800 Subject: [PATCH 077/112] [HUDI-7327] remove meta cols from incoming schema in stream sync (#10556) --------- Co-authored-by: Jonathan Vexler <=> --- .../java/org/apache/hudi/avro/HoodieAvroUtils.java | 7 +++++++ .../apache/hudi/common/config/HoodieCommonConfig.java | 1 + .../scala/org/apache/hudi/HoodieSparkSqlWriter.scala | 10 ++-------- .../org/apache/hudi/utilities/streamer/StreamSync.java | 2 +- .../deltastreamer/HoodieDeltaStreamerTestBase.java | 2 ++ 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 4d95e697e0d4..12bf01736c7c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -325,7 +325,14 @@ public static Schema addMetadataFields(Schema schema, boolean withOperationField return mergedSchema; } + public static boolean isSchemaNull(Schema schema) { + return schema == null || schema.getType() == Schema.Type.NULL; + } + public static Schema removeMetadataFields(Schema schema) { + if (isSchemaNull(schema)) { + return schema; + } return removeFields(schema, HoodieRecord.HOODIE_META_COLUMNS_WITH_OPERATION); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java index 7aa62975b7f5..97b2462e3eff 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java @@ -77,6 +77,7 @@ public class HoodieCommonConfig extends HoodieConfig { .key("hoodie.write.set.null.for.missing.columns") .defaultValue("false") .markAdvanced() + .withAlternatives("hoodie.write.set.null.for.missing.columns") .withDocumentation("When a non-nullable column is missing from incoming batch during a write operation, the write " + " operation will fail schema compatibility check. Set this option to true will make the missing " + " column be filled with null values to successfully complete the write operation."); diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index 41e8ba902a7e..5c6f5b451cdf 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -138,18 +138,12 @@ object HoodieSparkSqlWriter { *
  • Target table's schema (including Hudi's [[InternalSchema]] representation)
  • * */ - def deduceWriterSchema(sourceSchema: Schema, - latestTableSchemaOpt: Option[Schema], - internalSchemaOpt: Option[InternalSchema], - opts: Map[String, String]): Schema = { - HoodieSchemaUtils.deduceWriterSchema(sourceSchema, latestTableSchemaOpt, internalSchemaOpt, opts) - } - def deduceWriterSchema(sourceSchema: Schema, latestTableSchemaOpt: Option[Schema], internalSchemaOpt: Option[InternalSchema], props: TypedProperties): Schema = { - deduceWriterSchema(sourceSchema, latestTableSchemaOpt, internalSchemaOpt, HoodieConversionUtils.fromProperties(props)) + HoodieSchemaUtils.deduceWriterSchema(sourceSchema, latestTableSchemaOpt, + internalSchemaOpt, HoodieConversionUtils.fromProperties(props)) } def cleanup(): Unit = { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index 4db7e622cfb1..d030b08b7612 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -668,7 +668,7 @@ private SchemaProvider getDeducedSchemaProvider(Schema incomingSchema, SchemaPro // Deduce proper target (writer's) schema for the input dataset, reconciling its // schema w/ the table's one Schema targetSchema = HoodieSparkSqlWriter.deduceWriterSchema( - incomingSchema, + HoodieAvroUtils.removeMetadataFields(incomingSchema), HoodieConversionUtils.toScalaOption(latestTableSchemaOpt), HoodieConversionUtils.toScalaOption(internalSchemaOpt), props); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java index d9bee058370a..c4b3ba265d67 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamerTestBase.java @@ -69,6 +69,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.Function; +import static org.apache.hudi.common.config.HoodieCommonConfig.SET_NULL_FOR_MISSING_COLUMNS; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.nonEmpty; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; @@ -613,6 +614,7 @@ static HoodieDeltaStreamer.Config makeConfigForHudiIncrSrc(String srcBasePath, S cfg.schemaProviderClassName = schemaProviderClassName; } List cfgs = new ArrayList<>(); + cfgs.add(SET_NULL_FOR_MISSING_COLUMNS.key() + "=true"); cfgs.add("hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=" + addReadLatestOnMissingCkpt); cfgs.add("hoodie.deltastreamer.source.hoodieincr.path=" + srcBasePath); // No partition From 54a3b67459405e4c84ccfe91cfff7491e42325d7 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Fri, 26 Jan 2024 03:01:18 +0100 Subject: [PATCH 078/112] [HUDI-6230] Handle aws glue partition index (#8743) --- .../aws/sync/AWSGlueCatalogSyncClient.java | 137 +++++++++++++++++- .../config/GlueCatalogSyncClientConfig.java | 19 +++ 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index 0e7609aba5cd..23f382435fdd 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -40,14 +40,20 @@ import software.amazon.awssdk.services.glue.model.Column; import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; import software.amazon.awssdk.services.glue.model.CreateDatabaseResponse; +import software.amazon.awssdk.services.glue.model.CreatePartitionIndexRequest; import software.amazon.awssdk.services.glue.model.CreateTableRequest; import software.amazon.awssdk.services.glue.model.CreateTableResponse; import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.DeletePartitionIndexRequest; import software.amazon.awssdk.services.glue.model.EntityNotFoundException; import software.amazon.awssdk.services.glue.model.GetDatabaseRequest; +import software.amazon.awssdk.services.glue.model.GetPartitionIndexesRequest; +import software.amazon.awssdk.services.glue.model.GetPartitionIndexesResponse; import software.amazon.awssdk.services.glue.model.GetPartitionsRequest; import software.amazon.awssdk.services.glue.model.GetPartitionsResponse; import software.amazon.awssdk.services.glue.model.GetTableRequest; +import software.amazon.awssdk.services.glue.model.PartitionIndex; +import software.amazon.awssdk.services.glue.model.PartitionIndexDescriptor; import software.amazon.awssdk.services.glue.model.PartitionInput; import software.amazon.awssdk.services.glue.model.PartitionValueList; import software.amazon.awssdk.services.glue.model.SerDeInfo; @@ -55,12 +61,14 @@ import software.amazon.awssdk.services.glue.model.Table; import software.amazon.awssdk.services.glue.model.TableInput; import software.amazon.awssdk.services.glue.model.UpdateTableRequest; + import org.apache.parquet.schema.MessageType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.time.Instant; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -74,6 +82,8 @@ import static org.apache.hudi.common.util.MapUtils.containsAll; import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty; import static org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING; +import static org.apache.hudi.config.GlueCatalogSyncClientConfig.META_SYNC_PARTITION_INDEX_FIELDS; +import static org.apache.hudi.config.GlueCatalogSyncClientConfig.META_SYNC_PARTITION_INDEX_FIELDS_ENABLE; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE; import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType; @@ -94,7 +104,8 @@ public class AWSGlueCatalogSyncClient extends HoodieSyncClient { private static final int MAX_PARTITIONS_PER_REQUEST = 100; private static final int MAX_DELETE_PARTITIONS_PER_REQUEST = 25; private final GlueAsyncClient awsGlue; - private static final long BATCH_REQUEST_SLEEP_MILLIS = 1000L; + private static final String GLUE_PARTITION_INDEX_ENABLE = "partition_filtering.enabled"; + private static final int PARTITION_INDEX_MAX_NUMBER = 3; /** * athena v2/v3 table property * see https://docs.aws.amazon.com/athena/latest/ug/querying-hudi.html @@ -429,6 +440,120 @@ public void createTable(String tableName, } } + /** + * This will manage partitions indexes. Users can activate/deactivate them on existing tables. + * Removing index definition, will result in dropping the index. + *

    + * reference doc for partition indexes: + * https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html#partition-index-getpartitions + * + * @param tableName + */ + public void managePartitionIndexes(String tableName) throws ExecutionException, InterruptedException { + if (!config.getBooleanOrDefault(META_SYNC_PARTITION_INDEX_FIELDS_ENABLE)) { + // deactivate indexing if enabled + if (getPartitionIndexEnable(tableName)) { + LOG.warn("Deactivating partition indexing"); + updatePartitionIndexEnable(tableName, false); + } + // also drop all existing indexes + GetPartitionIndexesRequest indexesRequest = GetPartitionIndexesRequest.builder().databaseName(databaseName).tableName(tableName).build(); + GetPartitionIndexesResponse existingIdxsResp = awsGlue.getPartitionIndexes(indexesRequest).get(); + for (PartitionIndexDescriptor idsToDelete : existingIdxsResp.partitionIndexDescriptorList()) { + LOG.warn("Dropping partition index: " + idsToDelete.indexName()); + DeletePartitionIndexRequest idxToDelete = DeletePartitionIndexRequest.builder() + .databaseName(databaseName).tableName(tableName).indexName(idsToDelete.indexName()).build(); + awsGlue.deletePartitionIndex(idxToDelete).get(); + } + } else { + // activate indexing usage if disabled + if (!getPartitionIndexEnable(tableName)) { + LOG.warn("Activating partition indexing"); + updatePartitionIndexEnable(tableName, true); + } + + // get indexes to be created + List> partitionsIndexNeeded = parsePartitionsIndexConfig(); + // get existing indexes + GetPartitionIndexesRequest indexesRequest = GetPartitionIndexesRequest.builder() + .databaseName(databaseName).tableName(tableName).build(); + GetPartitionIndexesResponse existingIdxsResp = awsGlue.getPartitionIndexes(indexesRequest).get(); + + // for each existing index remove if not relevant anymore + boolean indexesChanges = false; + for (PartitionIndexDescriptor existingIdx: existingIdxsResp.partitionIndexDescriptorList()) { + List idxColumns = existingIdx.keys().stream().map(key -> key.name()).collect(Collectors.toList()); + Boolean toBeRemoved = true; + for (List neededIdx : partitionsIndexNeeded) { + if (neededIdx.equals(idxColumns)) { + toBeRemoved = false; + } + } + if (toBeRemoved) { + indexesChanges = true; + DeletePartitionIndexRequest idxToDelete = DeletePartitionIndexRequest.builder() + .databaseName(databaseName).tableName(tableName).indexName(existingIdx.indexName()).build(); + LOG.warn("Dropping irrelevant index: " + existingIdx.indexName()); + awsGlue.deletePartitionIndex(idxToDelete).get(); + } + } + if (indexesChanges) { // refresh indexes list + existingIdxsResp = awsGlue.getPartitionIndexes(indexesRequest).get(); + } + + // for each needed index create if not exist + for (List neededIdx : partitionsIndexNeeded) { + Boolean toBeCreated = true; + for (PartitionIndexDescriptor existingIdx: existingIdxsResp.partitionIndexDescriptorList()) { + List collect = existingIdx.keys().stream().map(key -> key.name()).collect(Collectors.toList()); + if (collect.equals(neededIdx)) { + toBeCreated = false; + } + } + if (toBeCreated) { + String newIdxName = String.format("hudi_managed_%s", neededIdx.toString()); + PartitionIndex newIdx = PartitionIndex.builder() + .indexName(newIdxName) + .keys(neededIdx).build(); + LOG.warn("Creating new partition index: " + newIdxName); + CreatePartitionIndexRequest creationRequest = CreatePartitionIndexRequest.builder() + .databaseName(databaseName).tableName(tableName).partitionIndex(newIdx).build(); + awsGlue.createPartitionIndex(creationRequest).get(); + } + } + } + } + + protected List> parsePartitionsIndexConfig() { + config.setDefaultValue(META_SYNC_PARTITION_INDEX_FIELDS); + String rawPartitionIndex = config.getString(META_SYNC_PARTITION_INDEX_FIELDS); + List> indexes = Arrays.stream(rawPartitionIndex.split(",")) + .map(idx -> Arrays.stream(idx.split(";")) + .collect(Collectors.toList())).collect(Collectors.toList()); + if (indexes.size() > PARTITION_INDEX_MAX_NUMBER) { + LOG.warn(String.format("Only considering first %s indexes", PARTITION_INDEX_MAX_NUMBER)); + return indexes.subList(0, PARTITION_INDEX_MAX_NUMBER); + } + return indexes; + } + + public Boolean getPartitionIndexEnable(String tableName) { + try { + Table table = getTable(awsGlue, databaseName, tableName); + return Boolean.valueOf(table.parameters().get(GLUE_PARTITION_INDEX_ENABLE)); + } catch (Exception e) { + throw new HoodieGlueSyncException("Fail to get parameter " + GLUE_PARTITION_INDEX_ENABLE + " time for " + tableId(databaseName, tableName), e); + } + } + + public void updatePartitionIndexEnable(String tableName, Boolean enable) { + try { + updateTableParameters(awsGlue, databaseName, tableName, Collections.singletonMap(GLUE_PARTITION_INDEX_ENABLE, enable.toString()), false); + } catch (Exception e) { + throw new HoodieGlueSyncException("Fail to update parameter " + GLUE_PARTITION_INDEX_ENABLE + " time for " + tableId(databaseName, tableName), e); + } + } + @Override public Map getMetastoreSchema(String tableName) { try { @@ -537,6 +662,16 @@ public void updateLastCommitTimeSynced(String tableName) { } catch (Exception e) { throw new HoodieGlueSyncException("Fail to update last sync commit time for " + tableId(databaseName, tableName), e); } + try { + // as a side effect, we also refresh the partition indexes if needed + // people may wan't to add indexes, without re-creating the table + // therefore we call this at each commit as a workaround + managePartitionIndexes(tableName); + } catch (ExecutionException e) { + LOG.warn("An indexation process is currently running.", e); + } catch (Exception e) { + LOG.warn("Something went wrong with partition index", e); + } } @Override diff --git a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java index efffae5bd893..21244e651547 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java +++ b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java @@ -22,6 +22,9 @@ import org.apache.hudi.common.config.ConfigGroups; import org.apache.hudi.common.config.ConfigProperty; import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.keygen.constant.KeyGeneratorOptions; /** * Hoodie Configs for Glue. @@ -46,4 +49,20 @@ public class GlueCatalogSyncClientConfig extends HoodieConfig { .markAdvanced() .sinceVersion("0.14.0") .withDocumentation("Makes athena use the metadata table to list partitions and files. Currently it won't benefit from other features such stats indexes"); + + public static final ConfigProperty META_SYNC_PARTITION_INDEX_FIELDS_ENABLE = ConfigProperty + .key(GLUE_CLIENT_PROPERTY_PREFIX + "partition_index_fields.enable") + .defaultValue(false) + .sinceVersion("1.0.0") + .withDocumentation("Enable aws glue partition index feature, to speedup partition based query pattern"); + + public static final ConfigProperty META_SYNC_PARTITION_INDEX_FIELDS = ConfigProperty + .key(GLUE_CLIENT_PROPERTY_PREFIX + "partition_index_fields") + .noDefaultValue() + .withInferFunction(cfg -> Option.ofNullable(cfg.getString(HoodieTableConfig.PARTITION_FIELDS)) + .or(() -> Option.ofNullable(cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME)))) + .sinceVersion("1.0.0") + .withDocumentation(String.join(" ", "Specify the partitions fields to index on aws glue. Separate the fields by semicolon.", + "By default, when the feature is enabled, all the partition will be indexed.", + "You can create up to three indexes, separate them by comma. Eg: col1;col2;col3,col2,col3")); } From e76f2e84ebd1463347f1ef655efd573984cdd00d Mon Sep 17 00:00:00 2001 From: Dongsj <90449228+eric9204@users.noreply.github.com> Date: Fri, 26 Jan 2024 10:19:05 +0800 Subject: [PATCH 079/112] [MINOR] add logger to CompactionPlanOperator & ClusteringPlanOperator (#10562) Co-authored-by: dongsj --- .../apache/hudi/sink/clustering/ClusteringPlanOperator.java | 3 +++ .../org/apache/hudi/sink/compact/CompactionPlanOperator.java | 3 +++ 2 files changed, 6 insertions(+) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java index c16f8ed70801..327d688f951a 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringPlanOperator.java @@ -39,6 +39,8 @@ import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; @@ -49,6 +51,7 @@ */ public class ClusteringPlanOperator extends AbstractStreamOperator implements OneInputStreamOperator { + private static final Logger LOG = LoggerFactory.getLogger(ClusteringPlanOperator.class); /** * Config options. diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionPlanOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionPlanOperator.java index 00591806cc80..3cbd70a5f03f 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionPlanOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionPlanOperator.java @@ -38,6 +38,8 @@ import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.apache.flink.streaming.api.operators.Output; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.List; @@ -51,6 +53,7 @@ */ public class CompactionPlanOperator extends AbstractStreamOperator implements OneInputStreamOperator, BoundedOneInput { + private static final Logger LOG = LoggerFactory.getLogger(CompactionPlanOperator.class); /** * Config options. From 6dd4beaed636cabc252f4b54309c4f8e3f2eac25 Mon Sep 17 00:00:00 2001 From: Krishen <22875197+kbuci@users.noreply.github.com> Date: Fri, 26 Jan 2024 19:01:05 -0800 Subject: [PATCH 080/112] [HUDI-7308] LockManager::unlock should not call updateLockHeldTimerMetrics if lockDurationTimer has not been started (#10523) --- .../client/transaction/lock/LockManager.java | 7 ++- ...InProcessLockProviderWithRuntimeError.java | 43 +++++++++++++++++++ .../transaction/TestTransactionManager.java | 27 ++++++++++-- 3 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/InProcessLockProviderWithRuntimeError.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java index 598f7cd70721..663a03b79079 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieLockException; import org.apache.hadoop.fs.FileSystem; @@ -107,7 +108,11 @@ public void lock() { */ public void unlock() { getLockProvider().unlock(); - metrics.updateLockHeldTimerMetrics(); + try { + metrics.updateLockHeldTimerMetrics(); + } catch (HoodieException e) { + LOG.error(String.format("Exception encountered when updating lock metrics: %s", e)); + } close(); } diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/InProcessLockProviderWithRuntimeError.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/InProcessLockProviderWithRuntimeError.java new file mode 100644 index 000000000000..f825012f1312 --- /dev/null +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/InProcessLockProviderWithRuntimeError.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.client.transaction; + +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.conf.Configuration; +import org.apache.hudi.client.transaction.lock.InProcessLockProvider; +import org.apache.hudi.common.config.LockConfiguration; + +public class InProcessLockProviderWithRuntimeError extends InProcessLockProvider { + + public InProcessLockProviderWithRuntimeError( + LockConfiguration lockConfiguration, + Configuration conf) { + super(lockConfiguration, conf); + } + + @Override + public boolean tryLock(long time, TimeUnit unit) { + throw new RuntimeException(); + } + + @Override + public void unlock() { + return; + } +} diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java index 4222754a1949..c0fb8de8691f 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java @@ -29,15 +29,19 @@ import org.apache.hudi.config.HoodieCleanConfig; import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.config.metrics.HoodieMetricsConfig; import org.apache.hudi.exception.HoodieLockException; +import org.apache.hudi.metrics.MetricsReporterType; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import org.junit.jupiter.api.TestInfo; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -47,14 +51,14 @@ public class TestTransactionManager extends HoodieCommonTestHarness { TransactionManager transactionManager; @BeforeEach - private void init() throws IOException { + private void init(TestInfo testInfo) throws IOException { initPath(); initMetaClient(); - this.writeConfig = getWriteConfig(); + this.writeConfig = getWriteConfig(testInfo.getTags().contains("useLockProviderWithRuntimeError")); this.transactionManager = new TransactionManager(this.writeConfig, this.metaClient.getFs()); } - private HoodieWriteConfig getWriteConfig() { + private HoodieWriteConfig getWriteConfig(boolean useLockProviderWithRuntimeError) { return HoodieWriteConfig.newBuilder() .withPath(basePath) .withCleanConfig(HoodieCleanConfig.newBuilder() @@ -62,13 +66,15 @@ private HoodieWriteConfig getWriteConfig() { .build()) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .withLockProvider(InProcessLockProvider.class) + .withLockProvider(useLockProviderWithRuntimeError ? InProcessLockProviderWithRuntimeError.class : InProcessLockProvider.class) .withLockWaitTimeInMillis(50L) .withNumRetries(2) .withRetryWaitTimeInMillis(10L) .withClientNumRetries(2) .withClientRetryWaitTimeInMillis(10L) .build()) + .forTable("testtable") + .withMetricsConfig(HoodieMetricsConfig.newBuilder().withReporterType(MetricsReporterType.INMEMORY.toString()).withLockingMetrics(true).on(true).build()) .build(); } @@ -245,6 +251,19 @@ public void testTransactionsWithInstantTime() { Assertions.assertFalse(transactionManager.getLastCompletedTransactionOwner().isPresent()); } + @Test + @Tag("useLockProviderWithRuntimeError") + public void testTransactionsWithUncheckedLockProviderRuntimeException() { + assertThrows(RuntimeException.class, () -> { + try { + transactionManager.beginTransaction(Option.empty(), Option.empty()); + } finally { + transactionManager.endTransaction(Option.empty()); + } + }); + + } + private Option getInstant(String timestamp) { return Option.of(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, timestamp)); } From 86e3ca6d9bdca153b14ac82aaac9a7ee19761e66 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 17:08:03 -0800 Subject: [PATCH 081/112] [HUDI-7335] Create hudi-hadoop-common for hadoop-specific implementation (#10564) This commit creates a new module `hudi-hadoop-common` for hadoop-specific implementation. This serves as the first step to decouple `hudi-common` module from hadoop dependencies. --- .../hudi/aws/sync/AwsGlueCatalogSyncTool.java | 4 +- .../java/org/apache/hudi/cli/HoodieCLI.java | 4 +- .../cli/commands/ArchivedCommitsCommand.java | 10 +- .../hudi/cli/commands/ExportCommand.java | 7 +- .../cli/commands/HoodieLogFileCommand.java | 2 +- .../apache/hudi/cli/commands/SparkMain.java | 8 +- .../org/apache/hudi/cli/utils/SparkUtil.java | 4 +- .../hudi/cli/commands/TestCleansCommand.java | 4 +- .../cli/commands/TestCompactionCommand.java | 8 +- .../hudi/cli/commands/TestDiffCommand.java | 4 +- .../commands/TestHoodieLogFileCommand.java | 4 +- .../hudi/cli/commands/TestRepairsCommand.java | 3 +- .../HoodieTestCommitMetadataGenerator.java | 3 +- .../apache/hudi/client/BaseHoodieClient.java | 4 +- .../hudi/client/CompactionAdminClient.java | 2 +- .../hudi/client/HoodieTimelineArchiver.java | 4 +- .../embedded/EmbeddedTimelineService.java | 4 +- .../lock/FileSystemBasedLockProvider.java | 6 +- .../bucket/ConsistentBucketIndexUtils.java | 2 +- .../HoodieBackedTableMetadataWriter.java | 4 +- .../java/org/apache/hudi/metrics/Metrics.java | 4 +- .../org/apache/hudi/table/HoodieTable.java | 4 +- .../ListingBasedRollbackStrategy.java | 2 +- .../hudi/table/marker/DirectWriteMarkers.java | 3 +- ...pleDirectMarkerBasedDetectionStrategy.java | 2 +- ...ionDirectMarkerBasedDetectionStrategy.java | 2 +- .../table/marker/WriteMarkersFactory.java | 6 +- .../upgrade/TwoToOneDowngradeHandler.java | 3 +- .../hudi/HoodieTestCommitGenerator.java | 5 +- .../FileSystemBasedLockProviderTestClass.java | 4 +- .../table/marker/TestWriteMarkersFactory.java | 2 +- .../HoodieFlinkClientTestHarness.java | 4 +- .../HoodieJavaClientTestHarness.java | 5 +- .../hudi/client/SparkRDDWriteClient.java | 2 +- .../client/utils/SparkPartitionUtils.java | 2 +- .../io/storage/row/HoodieRowCreateHandle.java | 2 +- .../SparkBootstrapCommitActionExecutor.java | 4 +- .../org/apache/hudi/HoodieSparkUtils.scala | 9 +- .../org/apache/hudi/client/TestMultiFS.java | 6 +- .../client/TestUpdateSchemaEvolution.java | 4 +- .../fs/TestHoodieSerializableFileStatus.java | 2 +- .../hudi/io/TestHoodieTimelineArchiver.java | 2 +- .../hudi/table/TestConsistencyGuard.java | 2 +- .../action/compact/TestHoodieCompactor.java | 4 +- .../table/marker/TestDirectWriteMarkers.java | 4 +- .../TestTimelineServerBasedWriteMarkers.java | 4 +- .../hudi/testutils/HoodieClientTestUtils.java | 4 +- .../HoodieSparkClientTestHarness.java | 3 +- .../SparkClientFunctionalTestHarness.java | 3 +- hudi-common/pom.xml | 6 ++ .../apache/hudi/BaseHoodieTableFileIndex.java | 4 +- .../config/DFSPropertiesConfiguration.java | 6 +- .../DirectMarkerBasedDetectionStrategy.java | 2 +- .../org/apache/hudi/common/fs/FSUtils.java | 72 +++---------- .../common/fs/FailSafeConsistencyGuard.java | 13 +-- .../common/fs/OptimisticConsistencyGuard.java | 2 + .../apache/hudi/common/model/BaseFile.java | 2 +- .../hudi/common/model/HoodieBaseFile.java | 4 +- .../common/model/HoodieCommitMetadata.java | 3 +- .../hudi/common/model/HoodieLogFile.java | 2 +- .../common/table/HoodieTableMetaClient.java | 16 +-- .../log/AbstractHoodieLogRecordReader.java | 2 +- .../common/table/log/HoodieLogFileReader.java | 8 +- .../table/log/HoodieLogFormatWriter.java | 2 +- .../hudi/common/table/log/LogReaderUtils.java | 4 +- .../table/log/block/HoodieHFileDataBlock.java | 3 +- .../timeline/HoodieArchivedTimeline.java | 4 +- .../table/timeline/dto/FilePathDTO.java | 2 +- .../HoodieTablePreCommitFileSystemView.java | 2 +- .../hudi/common/util/InternalSchemaCache.java | 4 +- .../org/apache/hudi/common/util/OrcUtils.java | 4 +- .../apache/hudi/common/util/ParquetUtils.java | 8 +- ...FileBasedInternalSchemaStorageManager.java | 4 +- .../io/storage/HoodieAvroHFileReader.java | 4 +- .../io/storage/HoodieAvroHFileWriter.java | 2 +- .../hudi/io/storage/HoodieAvroOrcWriter.java | 2 +- .../io/storage/HoodieBaseParquetWriter.java | 10 +- .../metadata/AbstractHoodieTableMetadata.java | 7 +- .../FileSystemBackedTableMetadata.java | 5 +- .../hudi/metadata/HoodieMetadataPayload.java | 4 +- .../metadata/HoodieTableMetadataUtil.java | 7 +- .../apache/hudi/common/fs/TestFSUtils.java | 7 +- .../fs/TestFSUtilsWithRetryWrapperEnable.java | 13 ++- .../fs/TestHoodieWrapperFileSystem.java | 5 +- .../hudi/common/fs/TestStorageSchemes.java | 3 + .../functional/TestHoodieLogFormat.java | 25 ++--- .../timeline/TestHoodieActiveTimeline.java | 4 +- .../view/TestHoodieTableFileSystemView.java | 2 +- .../testutils/HoodieTestDataGenerator.java | 10 +- .../common/testutils/HoodieTestUtils.java | 2 +- .../util/TestDFSPropertiesConfiguration.java | 4 +- .../hudi/common/util/TestMarkerUtils.java | 6 +- .../storage/TestHoodieHFileReaderWriter.java | 6 +- .../java/HoodieJavaWriteClientExample.java | 4 +- .../spark/HoodieWriteClientExample.java | 4 +- .../hudi/schema/FilebasedSchemaProvider.java | 6 +- .../apache/hudi/sink/meta/CkpMetadata.java | 4 +- .../partitioner/profile/WriteProfiles.java | 4 +- .../hudi/table/catalog/HoodieCatalog.java | 4 +- .../hudi/table/catalog/HoodieHiveCatalog.java | 4 +- .../table/catalog/TableOptionProperties.java | 6 +- .../hudi/table/format/FilePathUtils.java | 6 +- .../apache/hudi/table/format/FormatUtils.java | 8 +- .../hudi/table/format/cdc/CdcInputFormat.java | 4 +- .../format/cow/CopyOnWriteInputFormat.java | 12 +-- .../java/org/apache/hudi/util/ClientIds.java | 6 +- .../org/apache/hudi/util/StreamerUtil.java | 7 +- .../hudi/util/ViewStorageProperties.java | 6 +- .../TestStreamWriteOperatorCoordinator.java | 6 +- .../sink/bucket/ITTestBucketStreamWrite.java | 3 +- .../ITTestConsistentBucketStreamWrite.java | 4 +- .../compact/ITTestHoodieFlinkCompactor.java | 2 +- .../hudi/sink/meta/TestCkpMetadata.java | 4 +- .../apache/hudi/sink/utils/TestWriteBase.java | 4 +- .../table/catalog/TestHoodieHiveCatalog.java | 6 +- .../apache/hudi/utils/TestStreamerUtil.java | 4 +- hudi-hadoop-common/pom.xml | 102 ++++++++++++++++++ .../hadoop}/fs/BoundedFsDataInputStream.java | 16 +-- .../apache/hudi/hadoop/fs}/CachingPath.java | 15 +-- .../hudi/hadoop}/fs/ConsistencyGuard.java | 15 +-- .../apache/hudi/hadoop/fs/HadoopFSUtils.java | 85 +++++++++++++++ .../fs/HoodieRetryWrapperFileSystem.java | 15 +-- .../fs/HoodieSerializableFileStatus.java | 15 +-- .../hadoop}/fs/HoodieWrapperFileSystem.java | 28 ++--- .../hudi/hadoop}/fs/NoOpConsistencyGuard.java | 15 +-- .../fs/SchemeAwareFSDataInputStream.java | 15 +-- .../hudi/hadoop/fs}/SerializablePath.java | 15 +-- .../fs/SizeAwareFSDataOutputStream.java | 15 +-- .../hadoop}/fs/TimedFSDataInputStream.java | 15 +-- .../HoodieMergeOnReadSnapshotReader.java | 4 +- .../RealtimeCompactedRecordReader.java | 4 +- .../hudi/hadoop/realtime/RealtimeSplit.java | 2 +- .../RealtimeUnmergedRecordReader.java | 4 +- .../TestHoodieMergeOnReadSnapshotReader.java | 2 +- .../TestHoodieRealtimeRecordReader.java | 3 +- .../integ/testsuite/HoodieTestSuiteJob.java | 8 +- .../SparkDataSourceContinuousIngestTool.java | 4 +- .../dag/nodes/ValidateAsyncOperations.java | 4 +- .../testsuite/generator/DeltaGenerator.java | 4 +- .../testsuite/reader/DFSDeltaInputReader.java | 5 +- .../writer/AvroFileDeltaInputWriter.java | 12 ++- .../spark/sql/SparkSqlCreateTableNode.scala | 4 +- .../TestDFSHoodieTestSuiteWriterAdapter.java | 4 +- .../testsuite/TestFileDeltaInputWriter.java | 5 +- .../reader/TestDFSAvroDeltaInputReader.java | 5 +- .../apache/hudi/common/metrics/Counter.java | 0 .../hudi/common/metrics/LocalRegistry.java | 0 .../apache/hudi/common/metrics/Metric.java | 0 .../apache/hudi/common/metrics/Registry.java | 0 .../apache/hudi/common/util/HoodieTimer.java | 0 .../hudi/common/util/ReflectionUtils.java | 0 .../apache/hudi/common/util/RetryHelper.java | 13 +-- .../apache/hudi/common/util/StringUtils.java | 13 +-- .../hudi/common/util/ValidationUtils.java | 0 .../apache/hudi/storage}/StorageSchemes.java | 15 +-- .../KafkaConnectTransactionServices.java | 4 +- .../scala/org/apache/hudi/DefaultSource.scala | 6 +- .../org/apache/hudi/HoodieBaseRelation.scala | 24 +++-- .../scala/org/apache/hudi/Iterators.scala | 7 +- .../spark/sql/hudi/HoodieSqlCommonUtils.scala | 2 +- .../hudi/command/DropHoodieTableCommand.scala | 4 +- .../command/TruncateHoodieTableCommand.scala | 4 +- .../hudi/cli/HDFSParquetImporterUtils.java | 4 +- .../spark/sql/hudi/DedupeSparkJob.scala | 2 +- .../procedures/ExportInstantsProcedure.scala | 9 +- .../RepairAddpartitionmetaProcedure.scala | 2 +- .../RepairDeduplicateProcedure.scala | 8 +- .../RepairOverwriteHoodiePropsProcedure.scala | 8 +- .../procedures/RunBootstrapProcedure.scala | 7 +- .../ShowFileSystemViewProcedure.scala | 9 +- .../ShowFsPathDetailProcedure.scala | 6 +- .../ShowHoodieLogFileMetadataProcedure.scala | 2 +- .../ShowInvalidParquetProcedure.scala | 4 +- .../procedures/StatsFileSizeProcedure.scala | 2 +- .../apache/hudi/functional/TestBootstrap.java | 3 +- .../hudi/functional/TestCOWDataSource.scala | 2 +- .../functional/TestCOWDataSourceStorage.scala | 8 +- .../TestColumnStatsIndexWithSQL.scala | 2 +- .../TestDataSourceForBootstrap.scala | 6 +- .../functional/TestMORDataSourceStorage.scala | 8 +- .../hudi/functional/TestSparkDataSource.scala | 9 +- .../functional/TestSparkSqlCoreFlow.scala | 9 +- .../sql/hudi/HoodieSparkSqlTestBase.scala | 6 +- .../apache/spark/sql/hudi/TestDropTable.scala | 10 +- .../spark/sql/hudi/TestMergeIntoTable.scala | 6 +- .../TestHdfsParquetImportProcedure.scala | 9 +- .../hudi/procedure/TestRepairsProcedure.scala | 7 +- .../TestShowInvalidParquetProcedure.scala | 6 +- .../apache/hudi/hive/ddl/HMSDDLExecutor.java | 2 +- .../hudi/hive/ddl/QueryBasedDDLExecutor.java | 2 +- .../hudi/sync/common/HoodieSyncClient.java | 2 +- .../hudi/sync/common/HoodieSyncConfig.java | 4 +- .../sync/common/util/TestSyncUtilHelpers.java | 4 +- .../timeline/service/TimelineService.java | 8 +- .../hudi/utilities/HDFSParquetImporter.java | 4 +- .../utilities/HoodieCompactionAdminTool.java | 4 +- .../hudi/utilities/HoodieCompactor.java | 4 +- .../utilities/HoodieDropPartitionsTool.java | 4 +- .../HoodieMetadataTableValidator.java | 2 +- .../hudi/utilities/HoodieRepairTool.java | 9 +- .../hudi/utilities/HoodieSnapshotCopier.java | 7 +- .../utilities/HoodieSnapshotExporter.java | 15 +-- .../apache/hudi/utilities/TableSizeStats.java | 4 +- .../utilities/perf/TimelineServerPerf.java | 5 +- .../schema/FilebasedSchemaProvider.java | 4 +- .../utilities/sources/HiveIncrPullSource.java | 4 +- .../utilities/sources/SqlFileBasedSource.java | 4 +- .../helpers/CloudObjectsSelectorCommon.java | 4 +- .../sources/helpers/DFSPathSelector.java | 4 +- .../streamer/HoodieMultiTableStreamer.java | 4 +- .../utilities/streamer/HoodieStreamer.java | 6 +- .../streamer/SparkSampleWritesUtils.java | 8 +- .../hudi/utilities/streamer/StreamSync.java | 4 +- .../transform/SqlFileBasedTransformer.java | 4 +- .../TestHoodieDeltaStreamer.java | 10 +- .../functional/TestHoodieSnapshotCopier.java | 3 +- .../TestHoodieSnapshotExporter.java | 4 +- .../helpers/TestSanitizationUtils.java | 4 +- packaging/hudi-aws-bundle/pom.xml | 1 + packaging/hudi-datahub-sync-bundle/pom.xml | 1 + packaging/hudi-flink-bundle/pom.xml | 1 + packaging/hudi-gcp-bundle/pom.xml | 1 + packaging/hudi-hadoop-mr-bundle/pom.xml | 1 + packaging/hudi-hive-sync-bundle/pom.xml | 1 + packaging/hudi-integ-test-bundle/pom.xml | 1 + packaging/hudi-kafka-connect-bundle/pom.xml | 1 + packaging/hudi-presto-bundle/pom.xml | 1 + packaging/hudi-spark-bundle/pom.xml | 1 + packaging/hudi-timeline-server-bundle/pom.xml | 1 + packaging/hudi-trino-bundle/pom.xml | 1 + packaging/hudi-utilities-bundle/pom.xml | 1 + packaging/hudi-utilities-slim-bundle/pom.xml | 1 + pom.xml | 1 + 233 files changed, 876 insertions(+), 591 deletions(-) create mode 100644 hudi-hadoop-common/pom.xml rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/BoundedFsDataInputStream.java (81%) rename {hudi-common/src/main/java/org/apache/hudi/hadoop => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs}/CachingPath.java (93%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/ConsistencyGuard.java (85%) create mode 100644 hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/HoodieRetryWrapperFileSystem.java (97%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/HoodieSerializableFileStatus.java (90%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/HoodieWrapperFileSystem.java (97%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/NoOpConsistencyGuard.java (71%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/SchemeAwareFSDataInputStream.java (75%) rename {hudi-common/src/main/java/org/apache/hudi/hadoop => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs}/SerializablePath.java (78%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/SizeAwareFSDataOutputStream.java (86%) rename {hudi-common/src/main/java/org/apache/hudi/common => hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop}/fs/TimedFSDataInputStream.java (86%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/metrics/Counter.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/metrics/LocalRegistry.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/metrics/Metric.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/metrics/Registry.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/HoodieTimer.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/RetryHelper.java (92%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/StringUtils.java (93%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/common/util/ValidationUtils.java (100%) rename {hudi-common/src/main/java/org/apache/hudi/common/fs => hudi-io/src/main/java/org/apache/hudi/storage}/StorageSchemes.java (91%) diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java index eed9486d69cd..e86a6b99f5cc 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AwsGlueCatalogSyncTool.java @@ -19,7 +19,7 @@ package org.apache.hudi.aws.sync; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HiveSyncTool; @@ -62,7 +62,7 @@ public static void main(String[] args) { } // HiveConf needs to load fs conf to allow instantiation via AWSGlueClientFactory TypedProperties props = params.toProps(); - Configuration hadoopConf = FSUtils.getFs(props.getString(META_SYNC_BASE_PATH.key()), new Configuration()).getConf(); + Configuration hadoopConf = HadoopFSUtils.getFs(props.getString(META_SYNC_BASE_PATH.key()), new Configuration()).getConf(); try (AwsGlueCatalogSyncTool tool = new AwsGlueCatalogSyncTool(props, hadoopConf)) { tool.syncHoodieTable(); } diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java index 7b54760cddce..7cec0172b157 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodieCLI.java @@ -21,10 +21,10 @@ import org.apache.hudi.cli.utils.SparkTempViewProvider; import org.apache.hudi.cli.utils.TempViewProvider; import org.apache.hudi.common.fs.ConsistencyGuardConfig; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -72,7 +72,7 @@ private static void setLayoutVersion(Integer layoutVersion) { public static boolean initConf() { if (HoodieCLI.conf == null) { - HoodieCLI.conf = FSUtils.prepareHadoopConf(new Configuration()); + HoodieCLI.conf = HadoopFSUtils.prepareHadoopConf(new Configuration()); return true; } return false; diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java index 90724929df40..075a57d541c0 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java @@ -26,7 +26,6 @@ import org.apache.hudi.cli.commands.SparkMain.SparkCommand; import org.apache.hudi.cli.utils.InputStreamConsumer; import org.apache.hudi.cli.utils.SparkUtil; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; @@ -38,6 +37,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; @@ -110,11 +110,11 @@ public String showArchivedCommits( if (folder != null && !folder.isEmpty()) { archivePath = new Path(basePath + "/.hoodie/" + folder); } - FileStatus[] fsStatuses = FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); + FileStatus[] fsStatuses = HadoopFSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); List allStats = new ArrayList<>(); for (FileStatus fs : fsStatuses) { // read the archived file - Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(basePath, HoodieCLI.conf), + Reader reader = HoodieLogFormat.newReader(HadoopFSUtils.getFs(basePath, HoodieCLI.conf), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema()); List readRecords = new ArrayList<>(); @@ -184,11 +184,11 @@ public String showCommits( String basePath = metaClient.getBasePath(); Path archivePath = new Path(metaClient.getArchivePath() + "/.commits_.archive*"); FileStatus[] fsStatuses = - FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); + HadoopFSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); List allCommits = new ArrayList<>(); for (FileStatus fs : fsStatuses) { // read the archived file - HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(basePath, HoodieCLI.conf), + HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(HadoopFSUtils.getFs(basePath, HoodieCLI.conf), new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema()); List readRecords = new ArrayList<>(); diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java index fedc2712d4c9..40e7154b5f99 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java @@ -30,7 +30,6 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.avro.model.HoodieSavepointMetadata; import org.apache.hudi.cli.HoodieCLI; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; @@ -44,6 +43,8 @@ import org.apache.hudi.common.table.timeline.TimelineMetadataUtils; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.shell.standard.ShellComponent; @@ -97,7 +98,7 @@ public String exportInstants( List nonArchivedInstants = timeline.getInstants(); // Archived instants are in the commit archive files - FileStatus[] statuses = FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); + FileStatus[] statuses = HadoopFSUtils.getFs(basePath, HoodieCLI.conf).globStatus(archivePath); List archivedStatuses = Arrays.stream(statuses).sorted((f1, f2) -> (int) (f1.getModificationTime() - f2.getModificationTime())).collect(Collectors.toList()); if (descending) { @@ -119,7 +120,7 @@ public String exportInstants( private int copyArchivedInstants(List statuses, Set actionSet, int limit, String localFolder) throws Exception { int copyCount = 0; - FileSystem fileSystem = FSUtils.getFs(HoodieCLI.getTableMetaClient().getBasePath(), HoodieCLI.conf); + FileSystem fileSystem = HadoopFSUtils.getFs(HoodieCLI.getTableMetaClient().getBasePath(), HoodieCLI.conf); for (FileStatus fs : statuses) { // read the archived file diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java index 58eff5f7b31c..46a9e787ea6e 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java @@ -43,7 +43,7 @@ import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieMemoryConfig; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.avro.Schema; diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java index 281ab3994f75..742540d0ff5b 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java @@ -26,7 +26,6 @@ import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; import org.apache.hudi.common.model.HoodieRecord; @@ -44,6 +43,7 @@ import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieSavepointException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.keygen.constant.KeyGeneratorType; import org.apache.hudi.table.HoodieSparkTable; @@ -437,7 +437,7 @@ private static int cluster(JavaSparkContext jsc, String basePath, String tableNa private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath, String repairedOutputPath, String basePath, boolean dryRun, String dedupeType) { DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc), - FSUtils.getFs(basePath, jsc.hadoopConfiguration()), DeDupeType.withName(dedupeType)); + HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()), DeDupeType.withName(dedupeType)); job.fixDuplicates(dryRun); return 0; } @@ -469,7 +469,7 @@ public static int renamePartition(JavaSparkContext jsc, String basePath, String // after re-writing, we can safely delete older partition. deleteOlderPartition(basePath, oldPartition, recordsToRewrite, propsMap); // also, we can physically delete the old partition. - FileSystem fs = FSUtils.getFs(new Path(basePath), metaClient.getHadoopConf()); + FileSystem fs = HadoopFSUtils.getFs(new Path(basePath), metaClient.getHadoopConf()); try { fs.delete(new Path(basePath, oldPartition), true); } catch (IOException e) { @@ -555,7 +555,7 @@ private static int doBootstrap(JavaSparkContext jsc, String tableName, String ta cfg.payloadClassName = payloadClassName; cfg.enableHiveSync = Boolean.valueOf(enableHiveSync); - new BootstrapExecutor(cfg, jsc, FSUtils.getFs(basePath, jsc.hadoopConfiguration()), + new BootstrapExecutor(cfg, jsc, HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()), jsc.hadoopConfiguration(), properties).execute(); return 0; } diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java index fd09a27271a8..5726c4142d43 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java @@ -21,9 +21,9 @@ import org.apache.hudi.cli.HoodieCliSparkConfig; import org.apache.hudi.cli.commands.SparkEnvCommand; import org.apache.hudi.cli.commands.SparkMain; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; @@ -126,7 +126,7 @@ public static JavaSparkContext initJavaSparkContext(String name, Option public static JavaSparkContext initJavaSparkContext(SparkConf sparkConf) { JavaSparkContext jsc = new JavaSparkContext(sparkConf); jsc.hadoopConfiguration().setBoolean(HoodieCliSparkConfig.CLI_PARQUET_ENABLE_SUMMARY_METADATA, false); - FSUtils.prepareHadoopConf(jsc.hadoopConfiguration()); + HadoopFSUtils.prepareHadoopConf(jsc.hadoopConfiguration()); return jsc; } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java index 1b45fdd4d072..2fc5baa70029 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java @@ -26,7 +26,6 @@ import org.apache.hudi.cli.functional.CLIFunctionalTestHarness; import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator; import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; @@ -39,6 +38,7 @@ import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -93,7 +93,7 @@ public void init() throws Exception { metaClient = HoodieCLI.getTableMetaClient(); String fileId1 = UUID.randomUUID().toString(); String fileId2 = UUID.randomUUID().toString(); - FileSystem fs = FSUtils.getFs(basePath(), hadoopConf()); + FileSystem fs = HadoopFSUtils.getFs(basePath(), hadoopConf()); HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath); // Create four commits diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java index f1ea09470d35..c040d931187e 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java @@ -25,9 +25,8 @@ import org.apache.hudi.cli.functional.CLIFunctionalTestHarness; import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator; import org.apache.hudi.client.HoodieTimelineArchiver; -import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; -import org.apache.hudi.common.fs.NoOpConsistencyGuard; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -44,6 +43,7 @@ import org.apache.hudi.config.HoodieCleanConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.table.HoodieSparkTable; import org.junit.jupiter.api.BeforeEach; @@ -166,7 +166,7 @@ private void generateCompactionInstances() throws IOException { // so the archival in data table can happen HoodieTestUtils.createCompactionCommitInMetadataTable(hadoopConf(), new HoodieWrapperFileSystem( - FSUtils.getFs(tablePath, hadoopConf()), new NoOpConsistencyGuard()), tablePath, "007"); + HadoopFSUtils.getFs(tablePath, hadoopConf()), new NoOpConsistencyGuard()), tablePath, "007"); } private void generateArchive() throws IOException { diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestDiffCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestDiffCommand.java index c12ad676d41c..1ce777c71b35 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestDiffCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestDiffCommand.java @@ -26,7 +26,6 @@ import org.apache.hudi.cli.functional.CLIFunctionalTestHarness; import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator; import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; @@ -38,6 +37,7 @@ import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.util.NumericUtils; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -92,7 +92,7 @@ public void testDiffFile() throws Exception { HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); String fileId1 = UUID.randomUUID().toString(); String fileId2 = UUID.randomUUID().toString(); - FileSystem fs = FSUtils.getFs(basePath(), hadoopConf()); + FileSystem fs = HadoopFSUtils.getFs(basePath(), hadoopConf()); HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath); // Create four commits diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java index 0f796c8195a1..ff3898d9d65a 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java @@ -27,7 +27,6 @@ import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator; import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil; import org.apache.hudi.common.config.HoodieCommonConfig; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieLogFile; @@ -44,6 +43,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieMemoryConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -108,7 +108,7 @@ public void init() throws IOException, InterruptedException, URISyntaxException "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload"); Files.createDirectories(Paths.get(partitionPath)); - fs = FSUtils.getFs(tablePath, hadoopConf()); + fs = HadoopFSUtils.getFs(tablePath, hadoopConf()); try (HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder() .onParentPath(new Path(partitionPath)) diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java index db9e85acc844..6756ec267808 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java @@ -38,6 +38,7 @@ import org.apache.hudi.common.testutils.RawTripTestPayload; import org.apache.hudi.common.util.PartitionPathEncodeUtils; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.SimpleKeyGenerator; import org.apache.hudi.testutils.Assertions; @@ -105,7 +106,7 @@ public class TestRepairsCommand extends CLIFunctionalTestHarness { public void init() throws IOException { String tableName = tableName(); tablePath = tablePath(tableName); - fs = FSUtils.getFs(tablePath, hadoopConf()); + fs = HadoopFSUtils.getFs(tablePath, hadoopConf()); // Create table and connect new TableCommand().createTable( diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java index a26c8d008393..1ade400414b9 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.testutils.FileCreateUtils; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -113,7 +114,7 @@ public static void createCommitFileWithMetadata(String basePath, String commitTi static void createFileWithMetadata(String basePath, Configuration configuration, String name, String content) throws IOException { Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + name); - try (FSDataOutputStream os = FSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { + try (FSDataOutputStream os = HadoopFSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { os.writeBytes(new String(getUTF8Bytes(content))); } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java index 9236197a4802..73bafa691d8a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java @@ -25,7 +25,6 @@ import org.apache.hudi.client.transaction.TransactionManager; import org.apache.hudi.client.utils.TransactionUtils; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieWriteStat; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -38,6 +37,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieWriteConflictException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.metrics.HoodieMetrics; import org.apache.hudi.table.HoodieTable; @@ -85,7 +85,7 @@ protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig client protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig clientConfig, Option timelineServer) { this.hadoopConf = context.getHadoopConf().get(); - this.fs = FSUtils.getFs(clientConfig.getBasePath(), hadoopConf); + this.fs = HadoopFSUtils.getFs(clientConfig.getBasePath(), hadoopConf); this.context = context; this.basePath = clientConfig.getBasePath(); this.config = clientConfig; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java index 257d2cd855cc..e5ae98644c18 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java @@ -41,7 +41,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.table.action.compact.OperationResult; import org.apache.hadoop.fs.FileStatus; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java index 718f8ad2c46c..e08bcbf6957b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java @@ -25,8 +25,6 @@ import org.apache.hudi.client.utils.MetadataConversionUtils; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; -import org.apache.hudi.common.fs.StorageSchemes; import org.apache.hudi.common.model.HoodieArchivedLogFile; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieAvroPayload; @@ -56,7 +54,9 @@ import org.apache.hudi.exception.HoodieCommitException; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.metadata.HoodieTableMetadata; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.action.compact.CompactionTriggerStrategy; import org.apache.hudi.table.marker.WriteMarkers; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java index f1290bb9cc31..1138e98e9ce2 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java @@ -20,7 +20,6 @@ import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.metrics.Registry; import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.common.table.view.FileSystemViewManager; @@ -28,6 +27,7 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.util.NetworkUtils; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.timeline.service.TimelineService; import org.apache.hadoop.conf.Configuration; @@ -176,7 +176,7 @@ private void startServer(TimelineServiceCreator timelineServiceCreator) throws I this.serviceConfig = timelineServiceConfBuilder.build(); server = timelineServiceCreator.create(context, hadoopConf.newCopy(), serviceConfig, - FSUtils.getFs(writeConfig.getBasePath(), hadoopConf.newCopy()), viewManager); + HadoopFSUtils.getFs(writeConfig.getBasePath(), hadoopConf.newCopy()), viewManager); serverPort = server.startService(); LOG.info("Started embedded timeline server at " + hostAddr + ":" + serverPort); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java index 1d32620b043a..52e8e0285b41 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java @@ -22,8 +22,6 @@ import org.apache.hudi.common.config.HoodieCommonConfig; import org.apache.hudi.common.config.LockConfiguration; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.StorageSchemes; import org.apache.hudi.common.lock.LockProvider; import org.apache.hudi.common.lock.LockState; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -34,6 +32,8 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieLockException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -83,7 +83,7 @@ public FileSystemBasedLockProvider(final LockConfiguration lockConfiguration, fi this.lockFile = new Path(lockDirectory + Path.SEPARATOR + LOCK_FILE_NAME); this.lockInfo = new LockInfo(); this.sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); - this.fs = FSUtils.getFs(this.lockFile.toString(), configuration); + this.fs = HadoopFSUtils.getFs(this.lockFile.toString(), configuration); List customSupportedFSs = lockConfiguration.getConfig().getStringList(HoodieCommonConfig.HOODIE_FS_ATOMIC_CREATION_SUPPORT.key(), ",", new ArrayList<>()); if (!customSupportedFSs.contains(this.fs.getScheme()) && !StorageSchemes.isAtomicCreationSupported(this.fs.getScheme())) { throw new HoodieLockException("Unsupported scheme :" + this.fs.getScheme() + ", since this fs can not support atomic creation"); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java index 6ff4d1b6d099..3bf40d1f1388 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java @@ -19,7 +19,6 @@ package org.apache.hudi.index.bucket; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.ConsistentHashingNode; import org.apache.hudi.common.model.HoodieConsistentHashingMetadata; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -30,6 +29,7 @@ import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIndexException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.table.HoodieTable; import org.apache.hadoop.fs.FSDataOutputStream; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index d6e7a8f626eb..2ad169d51261 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -67,8 +67,8 @@ import org.apache.hudi.exception.HoodieIndexException; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.exception.TableNotFoundException; -import org.apache.hudi.hadoop.CachingPath; -import org.apache.hudi.hadoop.SerializablePath; +import org.apache.hudi.hadoop.fs.CachingPath; +import org.apache.hudi.hadoop.fs.SerializablePath; import org.apache.hudi.table.BulkInsertPartitioner; import org.apache.hadoop.conf.Configuration; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java index 31b0d19da010..ef088091732b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/Metrics.java @@ -18,11 +18,11 @@ package org.apache.hudi.metrics; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.metrics.Registry; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import com.codahale.metrics.MetricRegistry; import org.apache.hadoop.conf.Configuration; @@ -95,7 +95,7 @@ public static synchronized void shutdownAllMetrics() { private List addAdditionalMetricsExporters(HoodieWriteConfig metricConfig) { List reporterList = new ArrayList<>(); List propPathList = StringUtils.split(metricConfig.getMetricReporterFileBasedConfigs(), ","); - try (FileSystem fs = FSUtils.getFs(propPathList.get(0), new Configuration())) { + try (FileSystem fs = HadoopFSUtils.getFs(propPathList.get(0), new Configuration())) { for (String propPath : propPathList) { HoodieWriteConfig secondarySourceConfig = HoodieWriteConfig.newBuilder().fromInputStream( fs.open(new Path(propPath))).withPath(metricConfig.getBasePath()).build(); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java index ab4777ad677a..cdefb1533987 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java @@ -37,8 +37,6 @@ import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.engine.HoodieLocalEngineContext; import org.apache.hudi.common.engine.TaskContextSupplier; -import org.apache.hudi.common.fs.ConsistencyGuard; -import org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility; import org.apache.hudi.common.fs.ConsistencyGuardConfig; import org.apache.hudi.common.fs.FailSafeConsistencyGuard; import org.apache.hudi.common.fs.OptimisticConsistencyGuard; @@ -69,6 +67,8 @@ import org.apache.hudi.exception.HoodieInsertException; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.exception.HoodieUpsertException; +import org.apache.hudi.hadoop.fs.ConsistencyGuard; +import org.apache.hudi.hadoop.fs.ConsistencyGuard.FileVisibility; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.metadata.HoodieTableMetadataWriter; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java index 820e998c368f..a622c5ae4334 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java @@ -21,7 +21,6 @@ import org.apache.hudi.avro.model.HoodieRollbackRequest; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; @@ -34,6 +33,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieRollbackException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.table.HoodieTable; import org.apache.hadoop.fs.FileStatus; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/DirectWriteMarkers.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/DirectWriteMarkers.java index f9c30ca17367..a540c21a8a78 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/DirectWriteMarkers.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/DirectWriteMarkers.java @@ -33,6 +33,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.table.HoodieTable; import org.apache.hadoop.fs.FileStatus; @@ -113,7 +114,7 @@ public Set createdAndMergedDataPaths(HoodieEngineContext context, int pa context.setJobStatus(this.getClass().getSimpleName(), "Obtaining marker files for all created, merged paths"); dataFiles.addAll(context.flatMap(subDirectories, directory -> { Path path = new Path(directory); - FileSystem fileSystem = FSUtils.getFs(path, serializedConf.get()); + FileSystem fileSystem = HadoopFSUtils.getFs(path, serializedConf.get()); RemoteIterator itr = fileSystem.listFiles(path, true); List result = new ArrayList<>(); while (itr.hasNext()) { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleDirectMarkerBasedDetectionStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleDirectMarkerBasedDetectionStrategy.java index 038d21ae05c1..7c85a5a18058 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleDirectMarkerBasedDetectionStrategy.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleDirectMarkerBasedDetectionStrategy.java @@ -19,13 +19,13 @@ package org.apache.hudi.table.marker; import org.apache.hudi.common.conflict.detection.DirectMarkerBasedDetectionStrategy; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.MarkerUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieEarlyConflictDetectionException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleTransactionDirectMarkerBasedDetectionStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleTransactionDirectMarkerBasedDetectionStrategy.java index b22fff750c8f..f17f166656c6 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleTransactionDirectMarkerBasedDetectionStrategy.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/SimpleTransactionDirectMarkerBasedDetectionStrategy.java @@ -19,10 +19,10 @@ package org.apache.hudi.table.marker; import org.apache.hudi.client.transaction.DirectMarkerTransactionManager; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieEarlyConflictDetectionException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/WriteMarkersFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/WriteMarkersFactory.java index 7a8234c8d8a6..70cecf475d84 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/WriteMarkersFactory.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/WriteMarkersFactory.java @@ -18,10 +18,10 @@ package org.apache.hudi.table.marker; -import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.StorageSchemes; import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hudi.table.HoodieTable; import org.slf4j.Logger; @@ -52,7 +52,7 @@ public static WriteMarkers get(MarkerType markerType, HoodieTable table, String } String basePath = table.getMetaClient().getBasePath(); if (StorageSchemes.HDFS.getScheme().equals( - FSUtils.getFs(basePath, table.getContext().getHadoopConf().newCopy()).getScheme())) { + HadoopFSUtils.getFs(basePath, table.getContext().getHadoopConf().newCopy()).getScheme())) { LOG.warn("Timeline-server-based markers are not supported for HDFS: " + "base path " + basePath + ". Falling back to direct markers."); return new DirectWriteMarkers(table, instantTime); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java index cb0fca5ffee0..34d671a7cf0b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java @@ -29,6 +29,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.marker.DirectWriteMarkers; @@ -92,7 +93,7 @@ private void convertToDirectMarkers(final String commitInstantTime, HoodieEngineContext context, int parallelism) throws IOException { String markerDir = table.getMetaClient().getMarkerFolderPath(commitInstantTime); - FileSystem fileSystem = FSUtils.getFs(markerDir, context.getHadoopConf().newCopy()); + FileSystem fileSystem = HadoopFSUtils.getFs(markerDir, context.getHadoopConf().newCopy()); Option markerTypeOption = MarkerUtils.readMarkerType(fileSystem, markerDir); if (markerTypeOption.isPresent()) { switch (markerTypeOption.get()) { diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java index b41649f5207d..04f975ebe52d 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java @@ -28,6 +28,7 @@ import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.collection.ImmutablePair; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -162,7 +163,7 @@ public static void createCommitFileWithMetadata( String basePath, Configuration configuration, String filename, String content) throws IOException { Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + filename); - try (FSDataOutputStream os = FSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { + try (FSDataOutputStream os = HadoopFSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { os.writeBytes(new String(getUTF8Bytes(content))); } } @@ -170,7 +171,7 @@ public static void createCommitFileWithMetadata( public static void createDataFile( String basePath, Configuration configuration, String partitionPath, String filename) throws IOException { - FileSystem fs = FSUtils.getFs(basePath, configuration); + FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); Path filePath = new Path(new Path(basePath, partitionPath), filename); Path parent = filePath.getParent(); if (!fs.exists(parent)) { diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/FileSystemBasedLockProviderTestClass.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/FileSystemBasedLockProviderTestClass.java index 97ad050e7240..9488d5bab6cc 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/FileSystemBasedLockProviderTestClass.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/FileSystemBasedLockProviderTestClass.java @@ -22,10 +22,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hudi.common.config.LockConfiguration; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.lock.LockProvider; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieLockException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import java.io.IOException; import java.io.Serializable; @@ -56,7 +56,7 @@ public FileSystemBasedLockProviderTestClass(final LockConfiguration lockConfigur this.retryWaitTimeMs = lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY); this.retryMaxCount = lockConfiguration.getConfig().getInteger(LOCK_ACQUIRE_NUM_RETRIES_PROP_KEY); this.lockFile = new Path(lockDirectory + "/" + LOCK); - this.fs = FSUtils.getFs(this.lockFile.toString(), configuration); + this.fs = HadoopFSUtils.getFs(this.lockFile.toString(), configuration); } @Override diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersFactory.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersFactory.java index 21c0e8108a53..d78b88306822 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersFactory.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersFactory.java @@ -21,12 +21,12 @@ import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.table.HoodieTable; import org.apache.hadoop.conf.Configuration; diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkClientTestHarness.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkClientTestHarness.java index 1cb2375123f8..ded254bf44cb 100644 --- a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkClientTestHarness.java +++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkClientTestHarness.java @@ -23,13 +23,13 @@ import org.apache.hudi.client.common.HoodieFlinkEngineContext; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.data.HoodieListData; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.bloom.TestFlinkHoodieBloomIndex; import org.apache.hudi.table.HoodieTable; @@ -71,7 +71,7 @@ protected void initFileSystem() { private void initFileSystemWithConfiguration(Configuration configuration) { checkState(basePath != null); - fs = FSUtils.getFs(basePath, configuration); + fs = HadoopFSUtils.getFs(basePath, configuration); if (fs instanceof LocalFileSystem) { LocalFileSystem lfs = (LocalFileSystem) fs; // With LocalFileSystem, with checksum disabled, fs.open() returns an inputStream which is FSInputStream diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java index 38bbe528891b..48726efcd6b8 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java @@ -62,6 +62,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieMetadataException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.JavaHoodieIndexFactory; import org.apache.hudi.io.storage.HoodieHFileUtils; @@ -197,7 +198,7 @@ protected void initFileSystem(String basePath, Configuration hadoopConf) { throw new IllegalStateException("The base path has not been initialized."); } - fs = FSUtils.getFs(basePath, hadoopConf); + fs = HadoopFSUtils.getFs(basePath, hadoopConf); if (fs instanceof LocalFileSystem) { LocalFileSystem lfs = (LocalFileSystem) fs; // With LocalFileSystem, with checksum disabled, fs.open() returns an inputStream which is FSInputStream @@ -996,7 +997,7 @@ public Stream readHFile(String[] paths) { // TODO: this should be ported to use HoodieStorageReader List valuesAsList = new LinkedList<>(); - FileSystem fs = FSUtils.getFs(paths[0], context.getHadoopConf().get()); + FileSystem fs = HadoopFSUtils.getFs(paths[0], context.getHadoopConf().get()); CacheConfig cacheConfig = new CacheConfig(fs.getConf()); Schema schema = null; for (String path : paths) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java index a12fc6a7ea1b..6fdfee16bbe0 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java @@ -23,7 +23,6 @@ import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.data.HoodieData.HoodieDataCacheKey; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.metrics.Registry; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieKey; @@ -36,6 +35,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.data.HoodieJavaRDD; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.SparkHoodieIndexFactory; import org.apache.hudi.metadata.HoodieTableMetadata; diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkPartitionUtils.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkPartitionUtils.java index 6dc344ec7347..d6545f247b63 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkPartitionUtils.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkPartitionUtils.java @@ -22,7 +22,7 @@ import org.apache.hudi.HoodieSparkUtils; import org.apache.hudi.SparkAdapterSupport$; import org.apache.hudi.common.util.Option; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java index 05019d2e814c..da0d3a4fe0b6 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java @@ -34,7 +34,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieInsertException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.marker.WriteMarkersFactory; diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java index db7fceecb077..6f94139b4b71 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java @@ -32,7 +32,6 @@ import org.apache.hudi.common.bootstrap.index.BootstrapIndex; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.data.HoodieData; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.BootstrapFileMapping; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieKey; @@ -52,6 +51,7 @@ import org.apache.hudi.exception.HoodieCommitException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieKeyGeneratorException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.KeyGeneratorInterface; import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory; import org.apache.hudi.table.HoodieSparkTable; @@ -105,7 +105,7 @@ public SparkBootstrapCommitActionExecutor(HoodieSparkEngineContext context, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, WriteOperationType.BOOTSTRAP, extraMetadata); - bootstrapSourceFileSystem = FSUtils.getFs(config.getBootstrapSourceBasePath(), hadoopConf); + bootstrapSourceFileSystem = HadoopFSUtils.getFs(config.getBootstrapSourceBasePath(), hadoopConf); } private void validate() { diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala index 535af8db1933..975135c13d58 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala @@ -18,14 +18,15 @@ package org.apache.hudi -import org.apache.avro.Schema -import org.apache.avro.generic.GenericRecord -import org.apache.hadoop.fs.Path import org.apache.hudi.HoodieConversionUtils.toScalaOption import org.apache.hudi.avro.{AvroSchemaUtils, HoodieAvroUtils} import org.apache.hudi.client.utils.SparkRowSerDe import org.apache.hudi.common.model.HoodieRecord -import org.apache.hudi.hadoop.CachingPath +import org.apache.hudi.hadoop.fs.CachingPath + +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord +import org.apache.hadoop.fs.Path import org.apache.spark.SPARK_VERSION import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java index cfa0a5b95d9b..8c5e6d710867 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java @@ -18,7 +18,6 @@ package org.apache.hudi.client; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; @@ -30,6 +29,7 @@ import org.apache.hudi.common.testutils.minicluster.HdfsTestService; import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; import org.apache.hudi.testutils.HoodieSparkClientTestHarness; @@ -132,7 +132,7 @@ public void readLocalWriteHDFS() throws Exception { hdfsWriteClient.upsert(writeRecords, readCommitTime); // Read from hdfs - FileSystem fs = FSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultHadoopConf()); + FileSystem fs = HadoopFSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultHadoopConf()); HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(dfsBasePath).build(); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime); @@ -153,7 +153,7 @@ public void readLocalWriteHDFS() throws Exception { localWriteClient.upsert(localWriteRecords, writeCommitTime); LOG.info("Reading from path: " + tablePath); - fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf()); + fs = HadoopFSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf()); metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(tablePath).build(); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset localReadRecords = diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java index ea1c54b80ffa..cb389d7ca9ba 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java @@ -18,7 +18,6 @@ package org.apache.hudi.client; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; @@ -32,6 +31,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieUpsertException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.io.CreateHandleFactory; import org.apache.hudi.io.HoodieMergeHandle; import org.apache.hudi.io.HoodieWriteHandle; @@ -99,7 +99,7 @@ private WriteStatus prepareFirstRecordCommit(List recordsStrs) throws IO }).collect(); final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100")); - FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile); + HadoopFSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile); return statuses.get(0); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/common/fs/TestHoodieSerializableFileStatus.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/common/fs/TestHoodieSerializableFileStatus.java index 9d5e4e700c6e..5cd9c4228c45 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/common/fs/TestHoodieSerializableFileStatus.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/common/fs/TestHoodieSerializableFileStatus.java @@ -20,13 +20,13 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.engine.HoodieEngineContext; +import org.apache.hudi.hadoop.fs.HoodieSerializableFileStatus; import org.apache.hudi.testutils.HoodieSparkClientTestHarness; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.SparkException; - import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java index bed16dcbefa5..3a9402a2e3f7 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java @@ -24,7 +24,6 @@ import org.apache.hudi.client.transaction.lock.InProcessLockProvider; import org.apache.hudi.client.utils.MetadataConversionUtils; import org.apache.hudi.common.config.HoodieMetadataConfig; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; @@ -54,6 +53,7 @@ import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.metadata.HoodieTableMetadataWriter; import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter; diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java index c65ddb651bd8..62140bd0f536 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java @@ -18,12 +18,12 @@ package org.apache.hudi.table; -import org.apache.hudi.common.fs.ConsistencyGuard; import org.apache.hudi.common.fs.ConsistencyGuardConfig; import org.apache.hudi.common.fs.FailSafeConsistencyGuard; import org.apache.hudi.common.fs.OptimisticConsistencyGuard; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.testutils.FileCreateUtils; +import org.apache.hudi.hadoop.fs.ConsistencyGuard; import org.apache.hudi.testutils.HoodieSparkClientTestHarness; import org.apache.hadoop.fs.Path; diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java index 3fd09d5704fc..3595f80b76f5 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java @@ -24,7 +24,6 @@ import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.data.HoodieListData; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; @@ -42,6 +41,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.config.metrics.HoodieMetricsConfig; import org.apache.hudi.exception.HoodieNotSupportedException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.bloom.HoodieBloomIndex; import org.apache.hudi.index.bloom.SparkHoodieBloomIndexHelper; @@ -80,7 +80,7 @@ public void setUp() throws Exception { // Create a temp folder as the base path initPath(); hadoopConf = HoodieTestUtils.getDefaultHadoopConf(); - fs = FSUtils.getFs(basePath, hadoopConf); + fs = HadoopFSUtils.getFs(basePath, hadoopConf); metaClient = HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ); initTestDataGenerator(); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java index 0e9f990048e2..b680a7b2eff7 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestDirectWriteMarkers.java @@ -19,9 +19,9 @@ package org.apache.hudi.table.marker; import org.apache.hudi.client.common.HoodieSparkEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.testutils.FileSystemTestUtils; import org.apache.hudi.common.util.CollectionUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.testutils.HoodieClientTestUtils; import org.apache.hadoop.fs.FileStatus; @@ -47,7 +47,7 @@ public void setup() throws IOException { this.jsc = new JavaSparkContext( HoodieClientTestUtils.getSparkConfForTest(TestDirectWriteMarkers.class.getName())); this.context = new HoodieSparkEngineContext(jsc); - this.fs = FSUtils.getFs(metaClient.getBasePathV2().toString(), metaClient.getHadoopConf()); + this.fs = HadoopFSUtils.getFs(metaClient.getBasePathV2().toString(), metaClient.getHadoopConf()); this.markerFolderPath = new Path(Paths.get(metaClient.getMarkerFolderPath("000")).toUri()); this.writeMarkers = new DirectWriteMarkers( fs, metaClient.getBasePathV2().toString(), markerFolderPath.toString(), "000"); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java index 61ee844b1917..17bc372a14f9 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java @@ -22,7 +22,6 @@ import org.apache.hudi.common.config.HoodieCommonConfig; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.engine.HoodieLocalEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.common.table.view.FileSystemViewManager; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; @@ -30,6 +29,7 @@ import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.MarkerUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.testutils.HoodieClientTestUtils; import org.apache.hudi.timeline.service.TimelineService; @@ -61,7 +61,7 @@ public void setup() throws IOException { this.jsc = new JavaSparkContext( HoodieClientTestUtils.getSparkConfForTest(TestTimelineServerBasedWriteMarkers.class.getName())); this.context = new HoodieSparkEngineContext(jsc); - this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf()); + this.fs = HadoopFSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf()); this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000")); FileSystemViewStorageConfig storageConf = diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index 55619a2a24bf..ff9e73065460 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -22,7 +22,6 @@ import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.client.SparkRDDReadClient; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieFileFormat; @@ -40,6 +39,7 @@ import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.io.storage.HoodieHFileUtils; import org.apache.hudi.timeline.service.TimelineService; @@ -275,7 +275,7 @@ public static Stream readHFile(JavaSparkContext jsc, String[] pat // TODO: this should be ported to use HoodieStorageReader List valuesAsList = new LinkedList<>(); - FileSystem fs = FSUtils.getFs(paths[0], jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(paths[0], jsc.hadoopConfiguration()); CacheConfig cacheConfig = new CacheConfig(fs.getConf()); Schema schema = null; for (String path : paths) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java index b9b2fe2c869d..4bb426d09c4f 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java @@ -51,6 +51,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.data.HoodieJavaRDD; import org.apache.hudi.exception.HoodieMetadataException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.metadata.FileSystemBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter; @@ -378,7 +379,7 @@ private void initFileSystemWithConfiguration(Configuration configuration) { throw new IllegalStateException("The base path has not been initialized."); } - fs = FSUtils.getFs(basePath, configuration); + fs = HadoopFSUtils.getFs(basePath, configuration); if (fs instanceof LocalFileSystem) { LocalFileSystem lfs = (LocalFileSystem) fs; // With LocalFileSystem, with checksum disabled, fs.open() returns an inputStream which is FSInputStream diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java index 14d325bfdacb..4dc0ae927df9 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java @@ -46,6 +46,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.data.HoodieJavaRDD; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieTable; @@ -144,7 +145,7 @@ public Configuration hadoopConf() { public FileSystem fs() { if (fileSystem == null) { - fileSystem = FSUtils.getFs(basePath(), hadoopConf()); + fileSystem = HadoopFSUtils.getFs(basePath(), hadoopConf()); } return fileSystem; } diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 97cdf36d12a5..3cb5bcc233ee 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -109,6 +109,12 @@ ${project.version} + + org.apache.hudi + hudi-hadoop-common + ${project.version} + + org.openjdk.jol jol-core diff --git a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java index bf7e25393c86..a8fd7e21d8ef 100644 --- a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java @@ -39,7 +39,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.expression.Expression; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.internal.schema.Types; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.metadata.HoodieTableMetadataUtil; @@ -65,7 +65,7 @@ import static org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE; import static org.apache.hudi.common.table.timeline.TimelineUtils.validateTimestampAsOf; import static org.apache.hudi.common.util.CollectionUtils.combine; -import static org.apache.hudi.hadoop.CachingPath.createRelativePathUnsafe; +import static org.apache.hudi.hadoop.fs.CachingPath.createRelativePathUnsafe; /** * Common (engine-agnostic) File Index implementation enabling individual query engines to diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java b/hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java index 3850ca495dc8..4ec0db224000 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/DFSPropertiesConfiguration.java @@ -18,12 +18,12 @@ package org.apache.hudi.common.config; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -144,7 +144,7 @@ public void addPropsFromFile(Path filePath) { throw new IllegalStateException("Loop detected; file " + filePath + " already referenced"); } - FileSystem fs = FSUtils.getFs( + FileSystem fs = HadoopFSUtils.getFs( filePath.toString(), Option.ofNullable(hadoopConfig).orElseGet(Configuration::new) ); @@ -182,7 +182,7 @@ public void addPropsFromStream(BufferedReader reader, Path cfgFilePath) throws I String[] split = splitProperty(line); if (line.startsWith("include=") || line.startsWith("include =")) { Path providedPath = new Path(split[1]); - FileSystem providedFs = FSUtils.getFs(split[1], hadoopConfig); + FileSystem providedFs = HadoopFSUtils.getFs(split[1], hadoopConfig); // In the case that only filename is provided, assume it's in the same directory. if ((!providedPath.isAbsolute() || StringUtils.isNullOrEmpty(providedFs.getScheme())) && cfgFilePath != null) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java b/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java index 126c395eea4e..1f3f4f2536d8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java @@ -21,12 +21,12 @@ import org.apache.hudi.ApiMaturityLevel; import org.apache.hudi.PublicAPIClass; import org.apache.hudi.common.config.HoodieConfig; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.util.MarkerUtils; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java index 91c966d00a2b..e3d4a43fe592 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java @@ -7,13 +7,14 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.hudi.common.fs; @@ -34,8 +35,12 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieValidationException; import org.apache.hudi.exception.InvalidHoodiePathException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.metadata.HoodieTableMetadata; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -58,7 +63,6 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Objects; import java.util.Set; import java.util.UUID; @@ -69,7 +73,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.apache.hudi.hadoop.CachingPath.getPathWithoutSchemeAndAuthority; +import static org.apache.hudi.hadoop.fs.CachingPath.getPathWithoutSchemeAndAuthority; /** * Utility functions related to accessing the file storage. @@ -83,23 +87,11 @@ public class FSUtils { Pattern.compile("^\\.(.+)_(.*)\\.(log|archive)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?"); public static final Pattern PREFIX_BY_FILE_ID_PATTERN = Pattern.compile("^(.+)-(\\d+)"); private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10; - private static final String HOODIE_ENV_PROPS_PREFIX = "HOODIE_ENV_"; private static final String LOG_FILE_EXTENSION = ".log"; private static final PathFilter ALLOW_ALL_FILTER = file -> true; - public static Configuration prepareHadoopConf(Configuration conf) { - // look for all properties, prefixed to be picked up - for (Entry prop : System.getenv().entrySet()) { - if (prop.getKey().startsWith(HOODIE_ENV_PROPS_PREFIX)) { - LOG.info("Picking up value for hoodie env var :" + prop.getKey()); - conf.set(prop.getKey().replace(HOODIE_ENV_PROPS_PREFIX, "").replaceAll("_DOT_", "."), prop.getValue()); - } - } - return conf; - } - public static Configuration buildInlineConf(Configuration conf) { Configuration inlineConf = new Configuration(conf); inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName()); @@ -107,28 +99,6 @@ public static Configuration buildInlineConf(Configuration conf) { return inlineConf; } - public static FileSystem getFs(String pathStr, Configuration conf) { - return getFs(new Path(pathStr), conf); - } - - public static FileSystem getFs(Path path, Configuration conf) { - FileSystem fs; - prepareHadoopConf(conf); - try { - fs = path.getFileSystem(conf); - } catch (IOException e) { - throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(), e); - } - return fs; - } - - public static FileSystem getFs(String pathStr, Configuration conf, boolean localByDefault) { - if (localByDefault) { - return getFs(addSchemeIfLocalPath(pathStr), conf); - } - return getFs(pathStr, conf); - } - /** * Check if table already exists in the given path. * @param path base path of the table. @@ -139,18 +109,6 @@ public static boolean isTableExists(String path, FileSystem fs) throws IOExcepti return fs.exists(new Path(path + "/" + HoodieTableMetaClient.METAFOLDER_NAME)); } - public static Path addSchemeIfLocalPath(String path) { - Path providedPath = new Path(path); - File localFile = new File(path); - if (!providedPath.isAbsolute() && localFile.exists()) { - Path resolvedPath = new Path("file://" + localFile.getAbsolutePath()); - LOG.info("Resolving file " + path + " to be a local file."); - return resolvedPath; - } - LOG.info("Resolving file " + path + "to be a remote file."); - return providedPath; - } - /** * Makes path qualified w/ {@link FileSystem}'s URI * @@ -664,7 +622,7 @@ public static boolean isCHDFileSystem(FileSystem fs) { public static Configuration registerFileSystem(Path file, Configuration conf) { Configuration returnConf = new Configuration(conf); - String scheme = FSUtils.getFs(file.toString(), conf).getScheme(); + String scheme = HadoopFSUtils.getFs(file.toString(), conf).getScheme(); returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl", HoodieWrapperFileSystem.class.getName()); return returnConf; @@ -679,7 +637,7 @@ public static Configuration registerFileSystem(Path file, Configuration conf) { */ public static HoodieWrapperFileSystem getFs(String path, SerializableConfiguration hadoopConf, ConsistencyGuardConfig consistencyGuardConfig) { - FileSystem fileSystem = FSUtils.getFs(path, hadoopConf.newCopy()); + FileSystem fileSystem = HadoopFSUtils.getFs(path, hadoopConf.newCopy()); return new HoodieWrapperFileSystem(fileSystem, consistencyGuardConfig.isConsistencyCheckEnabled() ? new FailSafeConsistencyGuard(fileSystem, consistencyGuardConfig) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java index d42a5d362d20..fa964e0bb248 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java @@ -19,6 +19,7 @@ package org.apache.hudi.common.fs; import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.hadoop.fs.ConsistencyGuard; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -141,9 +142,9 @@ private void waitForFileVisibility(Path filePath, FileVisibility visibility) thr /** * Retries the predicate for configurable number of times till we the predicate returns success. * - * @param dir directory of interest in which list of files are checked for visibility + * @param dir directory of interest in which list of files are checked for visibility * @param files List of files to check for visibility - * @param event {@link org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility} event of interest. + * @param event {@link ConsistencyGuard.FileVisibility} event of interest. * @throws TimeoutException when retries are exhausted */ private void retryTillSuccess(Path dir, List files, FileVisibility event) throws TimeoutException { @@ -164,12 +165,12 @@ private void retryTillSuccess(Path dir, List files, FileVisibility event } /** - * Helper to check for file visibility based on {@link org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility} event. + * Helper to check for file visibility based on {@link ConsistencyGuard.FileVisibility} event. * * @param retryNum retry attempt count. - * @param dir directory of interest in which list of files are checked for visibility - * @param files List of files to check for visibility - * @param event {@link org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility} event of interest. + * @param dir directory of interest in which list of files are checked for visibility + * @param files List of files to check for visibility + * @param event {@link ConsistencyGuard.FileVisibility} event of interest. * @return {@code true} if condition succeeded. else {@code false}. */ protected boolean checkFilesVisibility(int retryNum, Path dir, List files, FileVisibility event) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java index eda3394feb6b..3441288940c9 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/OptimisticConsistencyGuard.java @@ -18,6 +18,8 @@ package org.apache.hudi.common.fs; +import org.apache.hudi.hadoop.fs.ConsistencyGuard; + import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/BaseFile.java b/hudi-common/src/main/java/org/apache/hudi/common/model/BaseFile.java index cfca6c50c75f..b57168aaac30 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/BaseFile.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/BaseFile.java @@ -18,7 +18,7 @@ package org.apache.hudi.common.model; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java index 1fddf02711ac..3602d52e0c39 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieBaseFile.java @@ -20,12 +20,12 @@ import org.apache.hudi.common.util.ExternalFilePathUtil; import org.apache.hudi.common.util.Option; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import static org.apache.hudi.hadoop.CachingPath.createRelativePathUnsafe; +import static org.apache.hudi.hadoop.fs.CachingPath.createRelativePathUnsafe; /** * Hoodie base file - Represents metadata about Hudi file in DFS. diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java index 795e6cfe7a66..4d3596ccc271 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java @@ -23,6 +23,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.JsonNode; @@ -182,7 +183,7 @@ public Map getFullPathToFileStatus(Configuration hadoopConf, String relativeFilePath = stat.getPath(); Path fullPath = relativeFilePath != null ? FSUtils.getPartitionPath(basePath, relativeFilePath) : null; if (fullPath != null) { - long blockSize = FSUtils.getFs(fullPath.toString(), hadoopConf).getDefaultBlockSize(fullPath); + long blockSize = HadoopFSUtils.getFs(fullPath.toString(), hadoopConf).getDefaultBlockSize(fullPath); FileStatus fileStatus = new FileStatus(stat.getFileSizeInBytes(), false, 0, blockSize, 0, fullPath); fullPathToFileStatus.put(fullPath.getName(), fileStatus); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java index ecfbd925dd14..9415407325e7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java @@ -20,7 +20,7 @@ import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.exception.InvalidHoodiePathException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java index 589f1e6cfbf7..1d9f38a1d263 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java @@ -23,12 +23,8 @@ import org.apache.hudi.common.config.HoodieMetaserverConfig; import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.fs.ConsistencyGuardConfig; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.fs.FailSafeConsistencyGuard; import org.apache.hudi.common.fs.FileSystemRetryConfig; -import org.apache.hudi.common.fs.HoodieRetryWrapperFileSystem; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; -import org.apache.hudi.common.fs.NoOpConsistencyGuard; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTimelineTimeZone; @@ -45,8 +41,12 @@ import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.TableNotFoundException; -import org.apache.hudi.hadoop.CachingPath; -import org.apache.hudi.hadoop.SerializablePath; +import org.apache.hudi.hadoop.fs.CachingPath; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieRetryWrapperFileSystem; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; +import org.apache.hudi.hadoop.fs.SerializablePath; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -305,7 +305,7 @@ public TimelineLayoutVersion getTimelineLayoutVersion() { */ public HoodieWrapperFileSystem getFs() { if (fs == null) { - FileSystem fileSystem = FSUtils.getFs(metaPath.get(), hadoopConf.newCopy()); + FileSystem fileSystem = HadoopFSUtils.getFs(metaPath.get(), hadoopConf.newCopy()); if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) { fileSystem = new HoodieRetryWrapperFileSystem(fileSystem, @@ -476,7 +476,7 @@ public static HoodieTableMetaClient initTableAndGetMetaClient(Configuration hado Properties props) throws IOException { LOG.info("Initializing " + basePath + " as hoodie table " + basePath); Path basePathDir = new Path(basePath); - final FileSystem fs = FSUtils.getFs(basePath, hadoopConf); + final FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); if (!fs.exists(basePathDir)) { fs.mkdirs(basePathDir); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java index 60554e2e4cfc..6ce80da6d4a3 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java @@ -40,7 +40,7 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.internal.schema.InternalSchema; import org.apache.hudi.internal.schema.action.InternalSchemaMerger; import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java index 42722228e4ab..32177c82f9ea 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java @@ -18,11 +18,7 @@ package org.apache.hudi.common.table.log; -import org.apache.hudi.common.fs.BoundedFsDataInputStream; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.SchemeAwareFSDataInputStream; -import org.apache.hudi.common.fs.StorageSchemes; -import org.apache.hudi.common.fs.TimedFSDataInputStream; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; @@ -39,7 +35,11 @@ import org.apache.hudi.exception.CorruptedLogFileException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieNotSupportedException; +import org.apache.hudi.hadoop.fs.BoundedFsDataInputStream; +import org.apache.hudi.hadoop.fs.SchemeAwareFSDataInputStream; +import org.apache.hudi.hadoop.fs.TimedFSDataInputStream; import org.apache.hudi.internal.schema.InternalSchema; +import org.apache.hudi.storage.StorageSchemes; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java index ef910a1b1253..fd4f24f89d84 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java @@ -19,12 +19,12 @@ package org.apache.hudi.common.table.log; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.StorageSchemes; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.table.log.HoodieLogFormat.WriterBuilder; import org.apache.hudi.common.table.log.block.HoodieLogBlock; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogReaderUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogReaderUtils.java index 0b1a1d5c84d8..768085c322c7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogReaderUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/LogReaderUtils.java @@ -18,7 +18,6 @@ package org.apache.hudi.common.table.log; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.log.HoodieLogFormat.Reader; @@ -29,6 +28,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.Base64CodecUtil; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; @@ -79,7 +79,7 @@ public static Schema readLatestSchemaFromLogFiles(String basePath, List deltaFilePathToFileStatus = logFiles.stream().map(entry -> Pair.of(entry.getPath().toString(), entry)) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); for (String logPath : deltaPaths) { - FileSystem fs = FSUtils.getFs(logPath, config); + FileSystem fs = HadoopFSUtils.getFs(logPath, config); Schema schemaFromLogFile = readSchemaFromLogFileInReverse(fs, metaClient.getActiveTimeline(), deltaFilePathToFileStatus.get(logPath)); if (schemaFromLogFile != null) { return schemaFromLogFile; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index 42c47c696d86..34d69eb2288b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -28,6 +28,7 @@ import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.io.storage.HoodieAvroHFileReader; import org.apache.hudi.io.storage.HoodieHBaseKVComparator; @@ -175,7 +176,7 @@ protected ClosableIterator> deserializeRecords(byte[] conten checkState(readerSchema != null, "Reader's schema has to be non-null"); Configuration hadoopConf = FSUtils.buildInlineConf(getBlockContentLocation().get().getHadoopConf()); - FileSystem fs = FSUtils.getFs(pathForReader.toString(), hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(pathForReader.toString(), hadoopConf); // Read the content try (HoodieAvroHFileReader reader = new HoodieAvroHFileReader(hadoopConf, pathForReader, new CacheConfig(hadoopConf), fs, content, Option.of(getSchemaFromHeader()))) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java index eb4dc631ed60..764a357692d6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java @@ -21,7 +21,6 @@ import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.avro.model.HoodieArchivedMetaEntry; import org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.model.HoodieRecord; @@ -30,12 +29,13 @@ import org.apache.hudi.common.table.log.HoodieLogFormat; import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; import org.apache.hudi.common.table.log.block.HoodieLogBlock; -import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/FilePathDTO.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/FilePathDTO.java index 55dc3ef4410d..419b1da4140f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/FilePathDTO.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/FilePathDTO.java @@ -18,7 +18,7 @@ package org.apache.hudi.common.table.timeline.dto; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java index f25737228e69..afae30ca8e24 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java @@ -21,7 +21,7 @@ import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieWriteStat; import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import java.util.Collections; import java.util.List; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java b/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java index 05b482506f4d..c11a2cfd4bb8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java @@ -19,13 +19,13 @@ package org.apache.hudi.common.util; import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.internal.schema.InternalSchema; import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter; import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager; @@ -185,7 +185,7 @@ public static InternalSchema getInternalSchemaByVersionId(long versionId, String Set commitSet = Arrays.stream(validCommits.split(",")).collect(Collectors.toSet()); List validateCommitList = commitSet.stream().map(HoodieInstant::extractTimestamp).collect(Collectors.toList()); - FileSystem fs = FSUtils.getFs(tablePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(tablePath, hadoopConf); Path hoodieMetaPath = new Path(tablePath, HoodieTableMetaClient.METAFOLDER_NAME); //step1: Path candidateCommitFile = commitSet.stream().filter(fileName -> HoodieInstant.extractTimestamp(fileName).equals(versionId + "")) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java index dfbb80cfb638..66e9ab237fcc 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java @@ -19,7 +19,6 @@ package org.apache.hudi.common.util; import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -27,6 +26,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.MetadataNotFoundException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.BaseKeyGenerator; import org.apache.avro.Schema; @@ -71,7 +71,7 @@ public class OrcUtils extends BaseFileUtils { public ClosableIterator getHoodieKeyIterator(Configuration configuration, Path filePath) { try { Configuration conf = new Configuration(configuration); - conf.addResource(FSUtils.getFs(filePath.toString(), conf).getConf()); + conf.addResource(HadoopFSUtils.getFs(filePath.toString(), conf).getConf()); Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf)); Schema readSchema = HoodieAvroUtils.getRecordKeyPartitionPathSchema(); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java index de5572523c1e..a1e51cd69d42 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java @@ -19,7 +19,6 @@ package org.apache.hudi.common.util; import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieColumnRangeMetadata; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieKey; @@ -27,6 +26,7 @@ import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.MetadataNotFoundException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.BaseKeyGenerator; import org.apache.avro.Schema; @@ -90,7 +90,7 @@ public static ParquetMetadata readMetadata(Configuration conf, Path parquetFileP ParquetMetadata footer; try { // TODO(vc): Should we use the parallel reading version here? - footer = ParquetFileReader.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath); + footer = ParquetFileReader.readFooter(HadoopFSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath); } catch (IOException e) { throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e); } @@ -114,7 +114,7 @@ private static Set filterParquetRowKeys(Configuration configuration, Pat filterFunction = Option.of(new RecordKeysFilterFunction(filter)); } Configuration conf = new Configuration(configuration); - conf.addResource(FSUtils.getFs(filePath.toString(), conf).getConf()); + conf.addResource(HadoopFSUtils.getFs(filePath.toString(), conf).getConf()); AvroReadSupport.setAvroReadSchema(conf, readSchema); AvroReadSupport.setRequestedProjection(conf, readSchema); Set rowKeys = new HashSet<>(); @@ -167,7 +167,7 @@ public ClosableIterator getHoodieKeyIterator(Configuration configurat public ClosableIterator getHoodieKeyIterator(Configuration configuration, Path filePath, Option keyGeneratorOpt) { try { Configuration conf = new Configuration(configuration); - conf.addResource(FSUtils.getFs(filePath.toString(), conf).getConf()); + conf.addResource(HadoopFSUtils.getFs(filePath.toString(), conf).getConf()); Schema readSchema = keyGeneratorOpt.map(keyGenerator -> { List fields = new ArrayList<>(); fields.addAll(keyGenerator.getRecordKeyFieldNames()); diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java index 74368dc2a815..ea251aec0fd5 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java @@ -18,7 +18,6 @@ package org.apache.hudi.internal.schema.io; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; @@ -26,6 +25,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.internal.schema.InternalSchema; import org.apache.hudi.internal.schema.utils.InternalSchemaUtils; import org.apache.hudi.internal.schema.utils.SerDeHelper; @@ -144,7 +144,7 @@ public String getHistorySchemaStr() { public String getHistorySchemaStrByGivenValidCommits(List validCommits) { List commitList = validCommits == null || validCommits.isEmpty() ? getValidInstants() : validCommits; try { - FileSystem fs = FSUtils.getFs(baseSchemaPath.toString(), conf); + FileSystem fs = HadoopFSUtils.getFs(baseSchemaPath.toString(), conf); if (fs.exists(baseSchemaPath)) { List validaSchemaFiles = Arrays.stream(fs.listStatus(baseSchemaPath)) .filter(f -> f.isFile() && f.getPath().getName().endsWith(SCHEMA_COMMIT_ACTION)) diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java index fead46d06948..6f6b3485c210 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java @@ -21,7 +21,6 @@ import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.bloom.BloomFilterFactory; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.Option; @@ -31,6 +30,7 @@ import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.util.Lazy; import org.apache.avro.Schema; @@ -103,7 +103,7 @@ public class HoodieAvroHFileReader extends HoodieAvroFileReaderBase implements H private final Object sharedLock = new Object(); public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig) throws IOException { - this(path, FSUtils.getFs(path.toString(), hadoopConf), hadoopConf, cacheConfig, Option.empty()); + this(path, HadoopFSUtils.getFs(path.toString(), hadoopConf), hadoopConf, cacheConfig, Option.empty()); } public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, FileSystem fs, Option schemaOpt) throws IOException { diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java index 6c440e7c5596..b274abdbc2c7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java @@ -22,10 +22,10 @@ import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieDuplicateKeyException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java index 77f2a5cc72d6..4ba164a6fac1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroOrcWriter.java @@ -23,9 +23,9 @@ import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter; import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.util.AvroOrcUtils; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java index 34736e5b4d26..e8c765aaaa17 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java @@ -18,13 +18,12 @@ package org.apache.hudi.io.storage; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; - import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.util.VisibleForTesting; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.parquet.hadoop.ParquetWriter; @@ -33,9 +32,8 @@ import java.io.Closeable; import java.io.IOException; import java.lang.reflect.InvocationTargetException; -import java.util.concurrent.atomic.AtomicLong; - import java.lang.reflect.Method; +import java.util.concurrent.atomic.AtomicLong; /** * Base class of Hudi's custom {@link ParquetWriter} implementations diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java index f62786e9517e..e84c646cb504 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java @@ -18,17 +18,18 @@ package org.apache.hudi.metadata; -import org.apache.hadoop.fs.Path; import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.util.PartitionPathEncodeUtils; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.expression.ArrayData; -import org.apache.hudi.hadoop.CachingPath; -import org.apache.hudi.hadoop.SerializablePath; +import org.apache.hudi.hadoop.fs.CachingPath; +import org.apache.hudi.hadoop.fs.SerializablePath; import org.apache.hudi.internal.schema.Type; import org.apache.hudi.internal.schema.Types; +import org.apache.hadoop.fs.Path; + import java.util.Collections; import java.util.List; import java.util.stream.Collectors; diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java index 51797677016c..c74f287aeb48 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java @@ -24,7 +24,6 @@ import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieSerializableFileStatus; import org.apache.hudi.common.model.HoodiePartitionMetadata; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordGlobalLocation; @@ -40,6 +39,8 @@ import org.apache.hudi.expression.Expression; import org.apache.hudi.expression.PartialBindVisitor; import org.apache.hudi.expression.Predicates; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieSerializableFileStatus; import org.apache.hudi.internal.schema.Types; import org.apache.hadoop.fs.FileStatus; @@ -83,7 +84,7 @@ public FileSystemBackedTableMetadata(HoodieEngineContext engineContext, boolean assumeDatePartitioning) { super(engineContext, conf, datasetBasePath); - FileSystem fs = FSUtils.getFs(dataBasePath.get(), conf.get()); + FileSystem fs = HadoopFSUtils.getFs(dataBasePath.get(), conf.get()); Path metaPath = new Path(dataBasePath.get(), HoodieTableMetaClient.METAFOLDER_NAME); TableNotFoundException.checkTableValidity(fs, this.dataBasePath.get(), metaPath); HoodieTableConfig tableConfig = new HoodieTableConfig(fs, metaPath.toString(), null, null); diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java index 8b637be447f0..a814a2fe2121 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java @@ -37,7 +37,7 @@ import org.apache.hudi.common.util.hash.FileIndexID; import org.apache.hudi.common.util.hash.PartitionIndexID; import org.apache.hudi.exception.HoodieMetadataException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.io.storage.HoodieAvroHFileReader; import org.apache.hudi.util.Lazy; @@ -70,7 +70,7 @@ import static org.apache.hudi.common.util.TypeUtils.unsafeCast; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.common.util.ValidationUtils.checkState; -import static org.apache.hudi.hadoop.CachingPath.createRelativePathUnsafe; +import static org.apache.hudi.hadoop.fs.CachingPath.createRelativePathUnsafe; import static org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST; /** diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index e43b889c2a22..d7514e36bcfa 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -68,6 +68,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieMetadataException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.io.storage.HoodieFileReader; import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.util.Lazy; @@ -310,7 +311,7 @@ public static void deleteMetadataPartition(String basePath, HoodieEngineContext */ public static boolean metadataPartitionExists(String basePath, HoodieEngineContext context, MetadataPartitionType partitionType) { final String metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath); - FileSystem fs = FSUtils.getFs(metadataTablePath, context.getHadoopConf().get()); + FileSystem fs = HadoopFSUtils.getFs(metadataTablePath, context.getHadoopConf().get()); try { return fs.exists(new Path(metadataTablePath, partitionType.getPartitionPath())); } catch (Exception e) { @@ -1415,7 +1416,7 @@ private static List getRollbackedCommits(HoodieInstant instant, HoodieAc */ public static String deleteMetadataTable(HoodieTableMetaClient dataMetaClient, HoodieEngineContext context, boolean backup) { final Path metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(dataMetaClient.getBasePathV2()); - FileSystem fs = FSUtils.getFs(metadataTablePath.toString(), context.getHadoopConf().get()); + FileSystem fs = HadoopFSUtils.getFs(metadataTablePath.toString(), context.getHadoopConf().get()); dataMetaClient.getTableConfig().clearMetadataPartitions(dataMetaClient); try { if (!fs.exists(metadataTablePath)) { @@ -1470,7 +1471,7 @@ public static String deleteMetadataTablePartition(HoodieTableMetaClient dataMeta } final Path metadataTablePartitionPath = new Path(HoodieTableMetadata.getMetadataTableBasePath(dataMetaClient.getBasePath()), partitionType.getPartitionPath()); - FileSystem fs = FSUtils.getFs(metadataTablePartitionPath.toString(), context.getHadoopConf().get()); + FileSystem fs = HadoopFSUtils.getFs(metadataTablePartitionPath.toString(), context.getHadoopConf().get()); dataMetaClient.getTableConfig().setMetadataPartitionState(dataMetaClient, partitionType, false); try { if (!fs.exists(metadataTablePartitionPath)) { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java index 250304c7fd0e..14ba96c01f46 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java @@ -30,6 +30,7 @@ import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -180,7 +181,7 @@ public void testGetFileNameWithoutMeta() { @Test public void testEnvVarVariablesPickedup() { environmentVariables.set("HOODIE_ENV_fs_DOT_key1", "value1"); - Configuration conf = FSUtils.prepareHadoopConf(HoodieTestUtils.getDefaultHadoopConf()); + Configuration conf = HadoopFSUtils.prepareHadoopConf(HoodieTestUtils.getDefaultHadoopConf()); assertEquals("value1", conf.get("fs.key1")); conf.set("fs.key1", "value11"); conf.set("fs.key2", "value2"); @@ -387,9 +388,9 @@ public void testFileNameRelatedFunctions() throws Exception { String log3 = FSUtils.makeLogFileName(fileId, LOG_EXTENSION, instantTime, 3, writeToken); Files.createFile(partitionPath.resolve(log3)); - assertEquals(3, (int) FSUtils.getLatestLogVersion(FSUtils.getFs(basePath, new Configuration()), + assertEquals(3, (int) FSUtils.getLatestLogVersion(HadoopFSUtils.getFs(basePath, new Configuration()), new Path(partitionPath.toString()), fileId, LOG_EXTENSION, instantTime).get().getLeft()); - assertEquals(4, FSUtils.computeNextLogVersion(FSUtils.getFs(basePath, new Configuration()), + assertEquals(4, FSUtils.computeNextLogVersion(HadoopFSUtils.getFs(basePath, new Configuration()), new Path(partitionPath.toString()), fileId, LOG_EXTENSION, instantTime)); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java index b4052750fa53..da82a4f6138f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java @@ -18,6 +18,11 @@ package org.apache.hudi.common.fs; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieRetryWrapperFileSystem; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -61,7 +66,7 @@ public void setUp() throws IOException { maxRetryNumbers = fileSystemRetryConfig.getMaxRetryNumbers(); initialRetryIntervalMs = fileSystemRetryConfig.getInitialRetryIntervalMs(); - FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 2); + FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(HadoopFSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 2); FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, ""); HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard()); @@ -71,7 +76,7 @@ public void setUp() throws IOException { // Test the scenario that fs keeps retrying until it fails. @Test public void testProcessFilesWithExceptions() throws Exception { - FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100); + FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(HadoopFSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100); FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, ""); HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard()); metaClient.setFs(fs); @@ -82,7 +87,7 @@ public void testProcessFilesWithExceptions() throws Exception { @Test public void testGetSchema() { - FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100); + FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(HadoopFSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100); FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, ""); HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard()); assertDoesNotThrow(fs::getScheme, "Method #getSchema does not implement correctly"); @@ -90,7 +95,7 @@ public void testGetSchema() { @Test public void testGetDefaultReplication() { - FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100); + FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(HadoopFSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100); FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, ""); HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard()); assertEquals(fs.getDefaultReplication(), 3); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java index 75c09024f682..15887cb80e27 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java @@ -21,6 +21,9 @@ import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.minicluster.HdfsTestService; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -65,7 +68,7 @@ public static void cleanUp() { @Test public void testCreateImmutableFileInPath() throws IOException { - HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(FSUtils.getFs(basePath, new Configuration()), new NoOpConsistencyGuard()); + HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(HadoopFSUtils.getFs(basePath, new Configuration()), new NoOpConsistencyGuard()); String testContent = "test content"; Path testFile = new Path(basePath + Path.SEPARATOR + "clean.00000001"); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java index 7f5f2305bfa8..5bbd798b4d8e 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java @@ -18,6 +18,9 @@ package org.apache.hudi.common.fs; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.storage.StorageSchemes; + import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.Test; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index 2f94f6cb8636..ccab16771133 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -60,6 +60,7 @@ import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.ExternalSpillableMap; import org.apache.hudi.exception.CorruptedLogFileException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; @@ -356,7 +357,7 @@ public void testMultipleAppend(HoodieLogBlockType dataBlockType) throws IOExcept public void testAppendNotSupported(@TempDir java.nio.file.Path tempDir) throws IOException, URISyntaxException, InterruptedException { // Use some fs like LocalFileSystem, that does not support appends Path localTempDir = new Path(tempDir.toUri()); - FileSystem localFs = FSUtils.getFs(localTempDir.toString(), HoodieTestUtils.getDefaultHadoopConf()); + FileSystem localFs = HadoopFSUtils.getFs(localTempDir.toString(), HoodieTestUtils.getDefaultHadoopConf()); assertTrue(localFs instanceof LocalFileSystem); Path testPath = new Path(localTempDir, "append_test"); localFs.mkdirs(testPath); @@ -958,7 +959,7 @@ public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxExcep HoodieLogFile logFile = addValidBlock("test-fileId1", "100", 100); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(logFile.getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -1057,7 +1058,7 @@ public void testMissingBlockExceptMagicBytes() throws IOException, URISyntaxExce HoodieLogFile logFile = addValidBlock("test-fileId1", "100", 100); // Append just magic bytes and move onto next block - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(logFile.getPath()); outputStream.write(HoodieLogFormat.MAGIC); outputStream.flush(); @@ -1108,7 +1109,7 @@ public void testValidateCorruptBlockEndPosition() throws IOException, URISyntaxE writer.close(); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -1286,7 +1287,7 @@ public void testAvroLogRecordReaderWithFailedPartialBlock(ExternalSpillableMap.D // Write 2 header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101"); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2110,7 +2111,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(ExternalS FileCreateUtils.createDeltaCommit(basePath, "100", fs); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2123,7 +2124,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(ExternalS outputStream.close(); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2143,7 +2144,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(ExternalS writer.close(); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2233,7 +2234,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsRollbackAndMergedLogB FileCreateUtils.createDeltaCommit(basePath, "102", fs); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2246,7 +2247,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsRollbackAndMergedLogB outputStream.close(); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2583,7 +2584,7 @@ public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily) FileCreateUtils.createDeltaCommit(basePath, "100", fs); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); @@ -2941,7 +2942,7 @@ private HoodieLogFormat.Reader createCorruptedFile(String fileId) throws Excepti writer.close(); // Append some arbitrary byte[] to the end of the log (mimics a partially written commit) - fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); + fs = HadoopFSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java index 86b05912a624..87b857335a92 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java @@ -18,8 +18,7 @@ package org.apache.hudi.common.table.timeline; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; -import org.apache.hudi.common.fs.NoOpConsistencyGuard; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant.State; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; @@ -28,6 +27,7 @@ import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.AfterEach; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java index 695f4fc03b3a..3a6d38480966 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java @@ -28,7 +28,6 @@ import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter; import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.BaseFile; import org.apache.hudi.common.model.BootstrapFileMapping; import org.apache.hudi.common.model.CompactionOperation; @@ -58,6 +57,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ImmutablePair; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java index 26a85a6f806d..3434680117a9 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java @@ -21,7 +21,6 @@ import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.avro.model.HoodieCompactionPlan; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieCommitMetadata; @@ -38,6 +37,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.Conversions; import org.apache.avro.LogicalTypes; @@ -536,7 +536,7 @@ private static void createMetadataFile(String f, String basePath, Configuration basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f); FSDataOutputStream os = null; try { - FileSystem fs = FSUtils.getFs(basePath, configuration); + FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); os = fs.create(commitFile, true); // Write empty commit metadata os.write(content); @@ -586,7 +586,7 @@ public static void createEmptyCleanRequestedFile(String basePath, String instant } private static void createEmptyFile(String basePath, Path filePath, Configuration configuration) throws IOException { - FileSystem fs = FSUtils.getFs(basePath, configuration); + FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); FSDataOutputStream os = fs.create(filePath, true); os.close(); } @@ -602,7 +602,7 @@ public static void createCompactionAuxiliaryMetadata(String basePath, HoodieInst Configuration configuration) throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instant.getFileName()); - FileSystem fs = FSUtils.getFs(basePath, configuration); + FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); try (FSDataOutputStream os = fs.create(commitFile, true)) { HoodieCompactionPlan workload = HoodieCompactionPlan.newBuilder().setVersion(1).build(); // Write empty commit metadata @@ -614,7 +614,7 @@ public static void createSavepointFile(String basePath, String instantTime, Conf throws IOException { Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeSavePointFileName(instantTime)); - FileSystem fs = FSUtils.getFs(basePath, configuration); + FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); try (FSDataOutputStream os = fs.create(commitFile, true)) { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); // Write empty commit metadata diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java index a8e5ffda7078..c26b7e02d4e3 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java @@ -18,7 +18,6 @@ package org.apache.hudi.common.testutils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieTableType; @@ -26,6 +25,7 @@ import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.metadata.HoodieTableMetadata; import com.esotericsoftware.kryo.Kryo; diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java index 4dd32d840b18..cb978de86188 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java @@ -20,9 +20,9 @@ import org.apache.hudi.common.config.DFSPropertiesConfiguration; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.testutils.minicluster.HdfsTestService; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -184,7 +184,7 @@ public void testNoGlobalConfFileConfigured() { ENVIRONMENT_VARIABLES.clear(DFSPropertiesConfiguration.CONF_FILE_DIR_ENV_NAME); DFSPropertiesConfiguration.refreshGlobalProps(); try { - if (!FSUtils.getFs(DFSPropertiesConfiguration.DEFAULT_PATH, new Configuration()).exists(DFSPropertiesConfiguration.DEFAULT_PATH)) { + if (!HadoopFSUtils.getFs(DFSPropertiesConfiguration.DEFAULT_PATH, new Configuration()).exists(DFSPropertiesConfiguration.DEFAULT_PATH)) { assertEquals(0, DFSPropertiesConfiguration.getGlobalProps().size()); } } catch (IOException e) { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java index 68660b117ce0..9ff262f8e639 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestMarkerUtils.java @@ -21,10 +21,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.fs.FSUtils; + import org.apache.hudi.common.table.marker.MarkerType; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -41,7 +43,7 @@ class TestMarkerUtils extends HoodieCommonTestHarness { @BeforeEach public void setup() { initPath(); - fs = FSUtils.getFs(basePath, new Configuration()); + fs = HadoopFSUtils.getFs(basePath, new Configuration()); } @Test diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index f7a5a84b344b..22cca7004d56 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -21,7 +21,6 @@ import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.engine.TaskContextSupplier; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.EmptyHoodieRecordPayload; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; @@ -29,6 +28,7 @@ import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; @@ -213,7 +213,7 @@ public void testWriteReadWithEvolvedSchema(String evolvedSchemaPath) throws Exce @Test public void testReadHFileFormatRecords() throws Exception { writeFileWithSimpleSchema(); - FileSystem fs = FSUtils.getFs(getFilePath().toString(), new Configuration()); + FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); byte[] content = FileIOUtils.readAsByteArray( fs.open(getFilePath()), (int) fs.getFileStatus(getFilePath()).getLen()); // Reading byte array in HFile format, without actual file path @@ -419,7 +419,7 @@ public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException // using different Hudi releases. The file is copied from .hoodie/.aux/.bootstrap/.partitions/ String bootstrapIndexFile = hfilePrefix + BOOTSTRAP_INDEX_HFILE_SUFFIX; - FileSystem fs = FSUtils.getFs(getFilePath().toString(), new Configuration()); + FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); byte[] content = readHFileFromResources(simpleHFile); verifyHFileReader( HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content), diff --git a/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java b/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java index 50b2d7026561..fe6dd497b2f2 100644 --- a/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java +++ b/hudi-examples/hudi-examples-java/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java @@ -20,7 +20,6 @@ import org.apache.hudi.client.HoodieJavaWriteClient; import org.apache.hudi.client.common.HoodieJavaEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; @@ -31,6 +30,7 @@ import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.examples.common.HoodieExampleDataGenerator; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hadoop.conf.Configuration; @@ -70,7 +70,7 @@ public static void main(String[] args) throws Exception { Configuration hadoopConf = new Configuration(); // initialize the table, if not done already Path path = new Path(tablePath); - FileSystem fs = FSUtils.getFs(tablePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(tablePath, hadoopConf); if (!fs.exists(path)) { HoodieTableMetaClient.withPropertyBuilder() .setTableType(tableType) diff --git a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java index 27a6e80461a3..cbe505b70126 100644 --- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java +++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java @@ -21,7 +21,6 @@ import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.common.HoodieSparkEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -34,6 +33,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.examples.common.HoodieExampleDataGenerator; import org.apache.hudi.examples.common.HoodieExampleSparkUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.table.action.HoodieWriteMetadata; @@ -84,7 +84,7 @@ public static void main(String[] args) throws Exception { // initialize the table, if not done already Path path = new Path(tablePath); - FileSystem fs = FSUtils.getFs(tablePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(tablePath, jsc.hadoopConfiguration()); if (!fs.exists(path)) { HoodieTableMetaClient.withPropertyBuilder() .setTableType(tableType) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java index 945cb64da347..f30612bd0671 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/FilebasedSchemaProvider.java @@ -20,10 +20,10 @@ import org.apache.hudi.common.config.ConfigProperty; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.Schema; import org.apache.flink.configuration.Configuration; @@ -69,7 +69,7 @@ public static class Config { public FilebasedSchemaProvider(TypedProperties props) { checkRequiredConfigProperties(props, Collections.singletonList(Config.SOURCE_SCHEMA_FILE)); String sourceSchemaFile = getStringWithAltKeys(props, Config.SOURCE_SCHEMA_FILE); - FileSystem fs = FSUtils.getFs(sourceSchemaFile, HadoopConfigurations.getHadoopConf(new Configuration())); + FileSystem fs = HadoopFSUtils.getFs(sourceSchemaFile, HadoopConfigurations.getHadoopConf(new Configuration())); try { this.sourceSchema = new Schema.Parser().parse(fs.open(new Path(sourceSchemaFile))); if (containsConfigProperty(props, Config.TARGET_SCHEMA_FILE)) { @@ -83,7 +83,7 @@ public FilebasedSchemaProvider(TypedProperties props) { public FilebasedSchemaProvider(Configuration conf) { final String sourceSchemaPath = conf.getString(FlinkOptions.SOURCE_AVRO_SCHEMA_PATH); - final FileSystem fs = FSUtils.getFs(sourceSchemaPath, HadoopConfigurations.getHadoopConf(conf)); + final FileSystem fs = HadoopFSUtils.getFs(sourceSchemaPath, HadoopConfigurations.getHadoopConf(conf)); try { this.sourceSchema = new Schema.Parser().parse(fs.open(new Path(sourceSchemaPath))); } catch (IOException ioe) { diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java index 9b0457845e9b..c182528344c1 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java @@ -18,7 +18,6 @@ package org.apache.hudi.sink.meta; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.ValidationUtils; @@ -26,6 +25,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.flink.configuration.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -77,7 +77,7 @@ public class CkpMetadata implements Serializable, AutoCloseable { private List instantCache; private CkpMetadata(Configuration config) { - this(FSUtils.getFs(config.getString(FlinkOptions.PATH), HadoopConfigurations.getHadoopConf(config)), + this(HadoopFSUtils.getFs(config.getString(FlinkOptions.PATH), HadoopConfigurations.getHadoopConf(config)), config.getString(FlinkOptions.PATH), config.getString(FlinkOptions.WRITE_CLIENT_ID)); } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java index 2f959b241dd8..03b1626c4968 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java @@ -19,7 +19,6 @@ package org.apache.hudi.sink.partitioner.profile; import org.apache.hudi.client.common.HoodieFlinkEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.timeline.HoodieInstant; @@ -28,6 +27,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.util.StreamerUtil; import org.apache.flink.core.fs.Path; @@ -117,7 +117,7 @@ public static FileStatus[] getFilesFromMetadata( List metadataList, HoodieTableType tableType, boolean ignoreMissingFiles) { - FileSystem fs = FSUtils.getFs(basePath.toString(), hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(basePath.toString(), hadoopConf); Map uniqueIdToFileStatus = new HashMap<>(); // If a file has been touched multiple times in the given commits, the return value should keep the one // from the latest commit, so here we traverse in reverse order diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java index d60592c5172e..58b76ce59b3a 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java @@ -20,7 +20,6 @@ import org.apache.hudi.avro.AvroSchemaUtils; import org.apache.hudi.client.HoodieFlinkWriteClient; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; @@ -30,6 +29,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieMetadataException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator; import org.apache.hudi.util.AvroSchemaConverter; import org.apache.hudi.util.DataTypeUtils; @@ -115,7 +115,7 @@ public HoodieCatalog(String name, Configuration options) { @Override public void open() throws CatalogException { - fs = FSUtils.getFs(catalogPathStr, hadoopConf); + fs = HadoopFSUtils.getFs(catalogPathStr, hadoopConf); catalogPath = new Path(catalogPathStr); try { if (!fs.exists(catalogPath)) { diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java index 5ea7a585a0d2..285c01472618 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieHiveCatalog.java @@ -21,7 +21,6 @@ import org.apache.hudi.adapter.HiveCatalogConstants.AlterHiveDatabaseOp; import org.apache.hudi.avro.AvroSchemaUtils; import org.apache.hudi.client.HoodieFlinkWriteClient; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -35,6 +34,7 @@ import org.apache.hudi.configuration.OptionsResolver; import org.apache.hudi.exception.HoodieCatalogException; import org.apache.hudi.exception.HoodieMetadataException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator; import org.apache.hudi.table.HoodieTableFactory; @@ -397,7 +397,7 @@ private Table translateSparkTable2Flink(ObjectPath tablePath, Table hiveTable) { } else { // fallback to the partition path pattern Path hoodieTablePath = new Path(path); - hiveStyle = Arrays.stream(FSUtils.getFs(hoodieTablePath, hiveConf).listStatus(hoodieTablePath)) + hiveStyle = Arrays.stream(HadoopFSUtils.getFs(hoodieTablePath, hiveConf).listStatus(hoodieTablePath)) .map(fileStatus -> fileStatus.getPath().getName()) .filter(f -> !f.equals(".hoodie") && !f.equals("default")) .anyMatch(FilePathUtils::isHiveStylePartitioning); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java index 8f3e88417bef..6844a4136e2c 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java @@ -18,13 +18,13 @@ package org.apache.hudi.table.catalog; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieValidationException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.sync.common.util.SparkDataSourceTableUtils; import org.apache.hudi.util.AvroSchemaConverter; @@ -105,7 +105,7 @@ public static void createProperties(String basePath, Configuration hadoopConf, Map options) throws IOException { Path propertiesFilePath = getPropertiesFilePath(basePath); - FileSystem fs = FSUtils.getFs(basePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); try (FSDataOutputStream outputStream = fs.create(propertiesFilePath)) { Properties properties = new Properties(); properties.putAll(options); @@ -123,7 +123,7 @@ public static Map loadFromProperties(String basePath, Configurat Map options = new HashMap<>(); Properties props = new Properties(); - FileSystem fs = FSUtils.getFs(basePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); try (FSDataInputStream inputStream = fs.open(propertiesFilePath)) { props.load(inputStream); for (final String name : props.stringPropertyNames()) { diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java index 1e343d20658b..826b96f617fc 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java @@ -18,8 +18,8 @@ package org.apache.hudi.table.format; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.configuration.FlinkOptions; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.util.DataTypeUtils; import org.apache.flink.api.java.tuple.Tuple2; @@ -278,7 +278,7 @@ public static List, Path>> searchPartKeyVal } public static FileStatus[] getFileStatusRecursively(Path path, int expectLevel, Configuration conf) { - return getFileStatusRecursively(path, expectLevel, FSUtils.getFs(path.toString(), conf)); + return getFileStatusRecursively(path, expectLevel, HadoopFSUtils.getFs(path.toString(), conf)); } public static FileStatus[] getFileStatusRecursively(Path path, int expectLevel, FileSystem fs) { @@ -345,7 +345,7 @@ public static List> getPartitions( try { return FilePathUtils .searchPartKeyValueAndPaths( - FSUtils.getFs(path.toString(), hadoopConf), + HadoopFSUtils.getFs(path.toString(), hadoopConf), path, hivePartition, partitionKeys.toArray(new String[0])) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java index f408ae316ebd..baa9f21216b5 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java @@ -20,7 +20,6 @@ import java.util.stream.Collectors; import org.apache.hudi.common.engine.EngineType; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieOperation; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordMerger; @@ -38,6 +37,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.config.HoodieRealtimeConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.internal.schema.InternalSchema; import org.apache.hudi.table.format.mor.MergeOnReadInputSplit; import org.apache.hudi.util.FlinkWriteClients; @@ -151,7 +151,7 @@ public static HoodieMergedLogRecordScanner logScanner( org.apache.flink.configuration.Configuration flinkConf, Configuration hadoopConf) { HoodieWriteConfig writeConfig = FlinkWriteClients.getHoodieClientConfig(flinkConf); - FileSystem fs = FSUtils.getFs(split.getTablePath(), hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(split.getTablePath(), hadoopConf); return HoodieMergedLogRecordScanner.newBuilder() .withFileSystem(fs) .withBasePath(split.getTablePath()) @@ -195,7 +195,7 @@ public BoundedMemoryRecords( HoodieRecordMerger merger = HoodieRecordUtils.createRecordMerger( split.getTablePath(), EngineType.FLINK, mergers, flinkConf.getString(FlinkOptions.RECORD_MERGER_STRATEGY)); HoodieUnMergedLogRecordScanner.Builder scannerBuilder = HoodieUnMergedLogRecordScanner.newBuilder() - .withFileSystem(FSUtils.getFs(split.getTablePath(), hadoopConf)) + .withFileSystem(HadoopFSUtils.getFs(split.getTablePath(), hadoopConf)) .withBasePath(split.getTablePath()) .withLogFilePaths(split.getLogPaths().get()) .withReaderSchema(logSchema) @@ -260,7 +260,7 @@ public static HoodieMergedLogRecordScanner logScanner( Configuration hadoopConf) { String basePath = writeConfig.getBasePath(); return HoodieMergedLogRecordScanner.newBuilder() - .withFileSystem(FSUtils.getFs(basePath, hadoopConf)) + .withFileSystem(HadoopFSUtils.getFs(basePath, hadoopConf)) .withBasePath(basePath) .withLogFilePaths(logPaths) .withReaderSchema(logSchema) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cdc/CdcInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cdc/CdcInputFormat.java index 154df81a0d49..e7ee905cf4ef 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cdc/CdcInputFormat.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cdc/CdcInputFormat.java @@ -20,7 +20,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.BaseFile; import org.apache.hudi.common.model.FileSlice; import org.apache.hudi.common.model.HoodieLogFile; @@ -37,6 +36,7 @@ import org.apache.hudi.configuration.OptionsResolver; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.internal.schema.InternalSchema; import org.apache.hudi.source.ExpressionPredicates.Predicate; import org.apache.hudi.table.format.FormatUtils; @@ -334,7 +334,7 @@ abstract static class BaseImageIterator implements ClosableIterator { this.recordBuilder = new GenericRecordBuilder(requiredSchema); this.avroToRowDataConverter = AvroToRowDataConverters.createRowConverter(tableState.getRequiredRowType()); Path hadoopTablePath = new Path(tablePath); - FileSystem fs = FSUtils.getFs(hadoopTablePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(hadoopTablePath, hadoopConf); HoodieLogFile[] cdcLogFiles = fileSplit.getCdcFiles().stream().map(cdcFile -> { try { return new HoodieLogFile(fs.getFileStatus(new Path(hadoopTablePath, cdcFile))); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java index 5b365a589903..6f90e4822180 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java @@ -18,8 +18,8 @@ package org.apache.hudi.table.format.cow; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.source.ExpressionPredicates.Predicate; import org.apache.hudi.table.format.FilePathUtils; import org.apache.hudi.table.format.InternalSchemaManager; @@ -59,7 +59,7 @@ * to support TIMESTAMP_MILLIS. * *

    Note: Override the {@link #createInputSplits} method from parent to rewrite the logic creating the FileSystem, - * use {@link FSUtils#getFs} to get a plugin filesystem. + * use {@link HadoopFSUtils#getFs} to get a plugin filesystem. * * @see ParquetSplitReaderUtil */ @@ -161,7 +161,7 @@ public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { for (Path path : getFilePaths()) { final org.apache.hadoop.fs.Path hadoopPath = new org.apache.hadoop.fs.Path(path.toUri()); - final FileSystem fs = FSUtils.getFs(hadoopPath.toString(), this.conf.conf()); + final FileSystem fs = HadoopFSUtils.getFs(hadoopPath.toString(), this.conf.conf()); final FileStatus pathFile = fs.getFileStatus(hadoopPath); if (pathFile.isDirectory()) { @@ -178,7 +178,7 @@ public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { if (unsplittable) { int splitNum = 0; for (final FileStatus file : files) { - final FileSystem fs = FSUtils.getFs(file.getPath().toString(), this.conf.conf()); + final FileSystem fs = HadoopFSUtils.getFs(file.getPath().toString(), this.conf.conf()); final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, file.getLen()); Set hosts = new HashSet<>(); for (BlockLocation block : blocks) { @@ -202,7 +202,7 @@ public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { int splitNum = 0; for (final FileStatus file : files) { - final FileSystem fs = FSUtils.getFs(file.getPath().toString(), this.conf.conf()); + final FileSystem fs = HadoopFSUtils.getFs(file.getPath().toString(), this.conf.conf()); final long len = file.getLen(); final long blockSize = file.getBlockSize(); @@ -306,7 +306,7 @@ public void close() throws IOException { private long addFilesInDir(org.apache.hadoop.fs.Path path, List files, boolean logExcludedFiles) throws IOException { final org.apache.hadoop.fs.Path hadoopPath = new org.apache.hadoop.fs.Path(path.toUri()); - final FileSystem fs = FSUtils.getFs(hadoopPath.toString(), this.conf.conf()); + final FileSystem fs = HadoopFSUtils.getFs(hadoopPath.toString(), this.conf.conf()); long length = 0; diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java index 804d9248a366..2fb8bd893072 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java @@ -18,12 +18,12 @@ package org.apache.hudi.util; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.VisibleForTesting; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieHeartbeatException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.flink.configuration.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -180,7 +180,7 @@ public String nextId(Configuration conf) { private String nextId(Configuration conf, String basePath) { Path heartbeatFolderPath = new Path(getHeartbeatFolderPath(basePath)); - FileSystem fs = FSUtils.getFs(heartbeatFolderPath, HadoopConfigurations.getHadoopConf(conf)); + FileSystem fs = HadoopFSUtils.getFs(heartbeatFolderPath, HadoopConfigurations.getHadoopConf(conf)); try { if (!fs.exists(heartbeatFolderPath)) { return INIT_CLIENT_ID; @@ -251,7 +251,7 @@ public Builder clientId(String clientId) { public Builder conf(Configuration conf) { this.basePath = conf.getString(FlinkOptions.PATH); - this.fs = FSUtils.getFs(this.basePath, HadoopConfigurations.getHadoopConf(conf)); + this.fs = HadoopFSUtils.getFs(this.basePath, HadoopConfigurations.getHadoopConf(conf)); this.clientId = conf.getString(FlinkOptions.WRITE_CLIENT_ID); return this; } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java index c3c92d9f9b29..648a108d8673 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java @@ -43,6 +43,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieValidationException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.SimpleAvroKeyGenerator; import org.apache.hudi.schema.FilebasedSchemaProvider; import org.apache.hudi.sink.transform.ChainedTransformer; @@ -241,7 +242,7 @@ public static HoodieTableMetaClient initTableIfNotExists( */ public static boolean tableExists(String basePath, org.apache.hadoop.conf.Configuration hadoopConf) { // Hadoop FileSystem - FileSystem fs = FSUtils.getFs(basePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); try { return fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME)) && fs.exists(new Path(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME), HoodieTableConfig.HOODIE_PROPERTIES_FILE)); @@ -259,7 +260,7 @@ public static boolean tableExists(String basePath, org.apache.hadoop.conf.Config */ public static boolean partitionExists(String tablePath, String partitionPath, org.apache.hadoop.conf.Configuration hadoopConf) { // Hadoop FileSystem - FileSystem fs = FSUtils.getFs(tablePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(tablePath, hadoopConf); try { return fs.exists(new Path(tablePath, partitionPath)); } catch (IOException e) { @@ -311,7 +312,7 @@ public static HoodieTableMetaClient createMetaClient(Configuration conf) { * Returns the table config or empty if the table does not exist. */ public static Option getTableConfig(String basePath, org.apache.hadoop.conf.Configuration hadoopConf) { - FileSystem fs = FSUtils.getFs(basePath, hadoopConf); + FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); Path metaPath = new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME); try { if (fs.exists(new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE))) { diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java index 2a1f523fdb0e..7eea95369907 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java @@ -18,12 +18,12 @@ package org.apache.hudi.util; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.flink.configuration.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -55,7 +55,7 @@ public static void createProperties( FileSystemViewStorageConfig config, Configuration flinkConf) throws IOException { Path propertyPath = getPropertiesFilePath(basePath, flinkConf.getString(FlinkOptions.WRITE_CLIENT_ID)); - FileSystem fs = FSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(flinkConf)); + FileSystem fs = HadoopFSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(flinkConf)); fs.delete(propertyPath, false); try (FSDataOutputStream outputStream = fs.create(propertyPath)) { config.getProps().store(outputStream, @@ -69,7 +69,7 @@ public static void createProperties( public static FileSystemViewStorageConfig loadFromProperties(String basePath, Configuration conf) { Path propertyPath = getPropertiesFilePath(basePath, conf.getString(FlinkOptions.WRITE_CLIENT_ID)); LOG.info("Loading filesystem view storage properties from " + propertyPath); - FileSystem fs = FSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(conf)); + FileSystem fs = HadoopFSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(conf)); Properties props = new Properties(); try { try (FSDataInputStream inputStream = fs.open(propertyPath)) { diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java index 186500b1f385..f5ed7627c917 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java @@ -20,8 +20,6 @@ import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.heartbeat.HoodieHeartbeatClient; -import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; import org.apache.hudi.common.model.HoodieWriteStat; import org.apache.hudi.common.model.WriteConcurrencyMode; @@ -33,6 +31,8 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.sink.event.WriteMetadataEvent; import org.apache.hudi.sink.utils.MockCoordinatorExecutor; @@ -121,7 +121,7 @@ void testInstantState() { public void testTableInitialized() throws IOException { final org.apache.hadoop.conf.Configuration hadoopConf = HadoopConfigurations.getHadoopConf(new Configuration()); String basePath = tempFile.getAbsolutePath(); - try (FileSystem fs = FSUtils.getFs(basePath, hadoopConf)) { + try (FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf)) { assertTrue(fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))); } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java index 3d6d0918ef08..0978b1cc4e64 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.testutils.FileCreateUtils; import org.apache.hudi.configuration.FlinkOptions; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.util.StreamerUtil; import org.apache.hudi.utils.FlinkMiniCluster; @@ -86,7 +87,7 @@ public void testBucketStreamWriteAfterRollbackFirstFileGroupCreation(boolean isC if (isCow) { TestData.checkWrittenData(tempFile, EXPECTED, 4); } else { - FileSystem fs = FSUtils.getFs(tempFile.getAbsolutePath(), new org.apache.hadoop.conf.Configuration()); + FileSystem fs = HadoopFSUtils.getFs(tempFile.getAbsolutePath(), new org.apache.hadoop.conf.Configuration()); TestData.checkWrittenDataMOR(fs, tempFile, EXPECTED, 4); } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestConsistentBucketStreamWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestConsistentBucketStreamWrite.java index 5309b2225fb9..91b3340f25b0 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestConsistentBucketStreamWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestConsistentBucketStreamWrite.java @@ -18,7 +18,6 @@ package org.apache.hudi.sink.bucket; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.config.HoodieClusteringConfig; @@ -27,6 +26,7 @@ import org.apache.hudi.configuration.OptionsInference; import org.apache.hudi.configuration.OptionsResolver; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.sink.utils.Pipelines; import org.apache.hudi.util.AvroSchemaConverter; import org.apache.hudi.util.JsonDeserializationFunction; @@ -202,7 +202,7 @@ private void testWriteToHoodie( // ignored } } - FileSystem fs = FSUtils.getFs(tempFile.getAbsolutePath(), new org.apache.hadoop.conf.Configuration()); + FileSystem fs = HadoopFSUtils.getFs(tempFile.getAbsolutePath(), new org.apache.hadoop.conf.Configuration()); TestData.checkWrittenDataMOR(fs, tempFile, expected, 4); } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java index 7b07f3069826..c47ec62be761 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/compact/ITTestHoodieFlinkCompactor.java @@ -22,7 +22,6 @@ import org.apache.hudi.client.HoodieFlinkWriteClient; import org.apache.hudi.client.common.HoodieFlinkEngineContext; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableVersion; @@ -32,6 +31,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.configuration.FlinkOptions; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.table.HoodieFlinkTable; import org.apache.hudi.table.upgrade.FlinkUpgradeDowngradeHelper; import org.apache.hudi.table.upgrade.UpgradeDowngrade; diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java index 1ef2254ff8e9..6a115ddff0ab 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/meta/TestCkpMetadata.java @@ -18,8 +18,8 @@ package org.apache.hudi.sink.meta; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.configuration.HadoopConfigurations; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.util.StreamerUtil; import org.apache.hudi.utils.TestConfigurations; @@ -96,7 +96,7 @@ void testBootstrap() throws Exception { private CkpMetadata getCkpMetadata(String uniqueId) { String basePath = tempFile.getAbsolutePath(); - FileSystem fs = FSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(new Configuration())); + FileSystem fs = HadoopFSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(new Configuration())); return CkpMetadata.getInstance(fs, basePath, uniqueId); } } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java index 43198cf0b2df..d385846be057 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java @@ -19,7 +19,6 @@ package org.apache.hudi.sink.utils; import org.apache.hudi.client.WriteStatus; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -29,6 +28,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.configuration.OptionsResolver; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.sink.event.WriteMetadataEvent; import org.apache.hudi.sink.meta.CkpMetadata; import org.apache.hudi.util.StreamerUtil; @@ -414,7 +414,7 @@ public TestHarness checkWrittenData( } private void checkWrittenDataMor(File baseFile, Map expected, int partitions) throws Exception { - FileSystem fs = FSUtils.getFs(basePath, new org.apache.hadoop.conf.Configuration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, new org.apache.hadoop.conf.Configuration()); TestData.checkWrittenDataMOR(fs, baseFile, expected, partitions); } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java index 8af557c4b649..3ee85a46fc46 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/catalog/TestHoodieHiveCatalog.java @@ -19,7 +19,6 @@ package org.apache.hudi.table.catalog; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieReplaceCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; @@ -29,6 +28,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.exception.HoodieCatalogException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator; import org.apache.hudi.keygen.SimpleAvroKeyGenerator; import org.apache.hudi.sink.partitioner.profile.WriteProfiles; @@ -255,7 +255,7 @@ public void testCreateExternalTable() throws TableAlreadyExistException, Databas catalog.dropTable(tablePath, false); Path path = new Path(table1.getParameters().get(FlinkOptions.PATH.key())); - boolean created = StreamerUtil.fileExists(FSUtils.getFs(path, new Configuration()), path); + boolean created = StreamerUtil.fileExists(HadoopFSUtils.getFs(path, new Configuration()), path); assertTrue(created, "Table should have been created"); } @@ -293,7 +293,7 @@ public void testDropTable(boolean external) throws TableAlreadyExistException, D catalog.dropTable(tablePath, false); Path path = new Path(table.getParameters().get(FlinkOptions.PATH.key())); - boolean existing = StreamerUtil.fileExists(FSUtils.getFs(path, new Configuration()), path); + boolean existing = StreamerUtil.fileExists(HadoopFSUtils.getFs(path, new Configuration()), path); assertEquals(external, existing); } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java index d3bdc479d318..072e43bba7d3 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestStreamerUtil.java @@ -18,12 +18,12 @@ package org.apache.hudi.utils; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.SimpleAvroKeyGenerator; import org.apache.hudi.util.StreamerUtil; @@ -114,7 +114,7 @@ void testTableExist() throws IOException { assertFalse(StreamerUtil.tableExists(basePath, HadoopConfigurations.getHadoopConf(conf))); - try (FileSystem fs = FSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(conf))) { + try (FileSystem fs = HadoopFSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(conf))) { fs.mkdirs(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME)); assertFalse(StreamerUtil.tableExists(basePath, HadoopConfigurations.getHadoopConf(conf))); diff --git a/hudi-hadoop-common/pom.xml b/hudi-hadoop-common/pom.xml new file mode 100644 index 000000000000..be5a3ab610d8 --- /dev/null +++ b/hudi-hadoop-common/pom.xml @@ -0,0 +1,102 @@ + + + + + hudi + org.apache.hudi + 0.15.0-SNAPSHOT + + 4.0.0 + + hudi-hadoop-common + + + ${project.parent.basedir} + + + + + + src/main/resources + + + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar-plugin.version} + + + + test-jar + + test-compile + + + + false + + + + org.apache.rat + apache-rat-plugin + + + org.jacoco + jacoco-maven-plugin + + + + + + + org.apache.hudi + hudi-io + ${project.version} + + + + + org.apache.hadoop + hadoop-client + + + javax.servlet + * + + + provided + + + org.apache.hadoop + hadoop-hdfs + provided + + + + org.apache.hudi + hudi-tests-common + ${project.version} + test + + + diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/BoundedFsDataInputStream.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/BoundedFsDataInputStream.java similarity index 81% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/BoundedFsDataInputStream.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/BoundedFsDataInputStream.java index 0f2e5909610a..68a28ab6989c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/BoundedFsDataInputStream.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/BoundedFsDataInputStream.java @@ -6,14 +6,18 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; diff --git a/hudi-common/src/main/java/org/apache/hudi/hadoop/CachingPath.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/CachingPath.java similarity index 93% rename from hudi-common/src/main/java/org/apache/hudi/hadoop/CachingPath.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/CachingPath.java index 698eabcd7967..f5e63736cc7c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/hadoop/CachingPath.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/CachingPath.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.hadoop; +package org.apache.hudi.hadoop.fs; import org.apache.hudi.exception.HoodieException; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/ConsistencyGuard.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/ConsistencyGuard.java similarity index 85% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/ConsistencyGuard.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/ConsistencyGuard.java index cd649a682876..164e9d2b0239 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/ConsistencyGuard.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/ConsistencyGuard.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.Path; diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java new file mode 100644 index 000000000000..d9abbd5c1643 --- /dev/null +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopFSUtils.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.hadoop.fs; + +import org.apache.hudi.exception.HoodieIOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.Map; + +/** + * Utility functions related to accessing the file storage on Hadoop. + */ +public class HadoopFSUtils { + private static final Logger LOG = LoggerFactory.getLogger(HadoopFSUtils.class); + private static final String HOODIE_ENV_PROPS_PREFIX = "HOODIE_ENV_"; + + public static Configuration prepareHadoopConf(Configuration conf) { + // look for all properties, prefixed to be picked up + for (Map.Entry prop : System.getenv().entrySet()) { + if (prop.getKey().startsWith(HOODIE_ENV_PROPS_PREFIX)) { + LOG.info("Picking up value for hoodie env var :" + prop.getKey()); + conf.set(prop.getKey().replace(HOODIE_ENV_PROPS_PREFIX, "").replaceAll("_DOT_", "."), prop.getValue()); + } + } + return conf; + } + + public static FileSystem getFs(String pathStr, Configuration conf) { + return getFs(new Path(pathStr), conf); + } + + public static FileSystem getFs(Path path, Configuration conf) { + FileSystem fs; + prepareHadoopConf(conf); + try { + fs = path.getFileSystem(conf); + } catch (IOException e) { + throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(), e); + } + return fs; + } + + public static FileSystem getFs(String pathStr, Configuration conf, boolean localByDefault) { + if (localByDefault) { + return getFs(addSchemeIfLocalPath(pathStr), conf); + } + return getFs(pathStr, conf); + } + + public static Path addSchemeIfLocalPath(String path) { + Path providedPath = new Path(path); + File localFile = new File(path); + if (!providedPath.isAbsolute() && localFile.exists()) { + Path resolvedPath = new Path("file://" + localFile.getAbsolutePath()); + LOG.info("Resolving file " + path + " to be a local file."); + return resolvedPath; + } + LOG.info("Resolving file " + path + "to be a remote file."); + return providedPath; + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieRetryWrapperFileSystem.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieRetryWrapperFileSystem.java similarity index 97% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieRetryWrapperFileSystem.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieRetryWrapperFileSystem.java index 68bbe0a0bc42..69ef3e9b25b6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieRetryWrapperFileSystem.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieRetryWrapperFileSystem.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hudi.common.util.RetryHelper; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieSerializableFileStatus.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieSerializableFileStatus.java similarity index 90% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieSerializableFileStatus.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieSerializableFileStatus.java index 99c7e35935cd..d9b0d10163c4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieSerializableFileStatus.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieSerializableFileStatus.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java similarity index 97% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java index 0789ef4e27f0..326b24353cff 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java @@ -7,24 +7,24 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hudi.common.metrics.Registry; -import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.HoodieTimer; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; @@ -61,7 +61,7 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeoutException; -import static org.apache.hudi.common.fs.StorageSchemes.HDFS; +import static org.apache.hudi.storage.StorageSchemes.HDFS; /** * HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in the file system to @@ -73,6 +73,8 @@ public class HoodieWrapperFileSystem extends FileSystem { private static final String TMP_PATH_POSTFIX = ".tmp"; + private static final String METAFOLDER_NAME = ".hoodie"; + /** * Names for metrics. */ @@ -105,7 +107,7 @@ public interface CheckedFunction { } private static Registry getMetricRegistryForPath(Path p) { - return ((p != null) && (p.toString().contains(HoodieTableMetaClient.METAFOLDER_NAME))) + return ((p != null) && (p.toString().contains(METAFOLDER_NAME))) ? METRICS_REGISTRY_META : METRICS_REGISTRY_DATA; } @@ -142,7 +144,7 @@ public HoodieWrapperFileSystem(FileSystem fileSystem, ConsistencyGuard consisten public static Path convertToHoodiePath(Path file, Configuration conf) { try { - String scheme = FSUtils.getFs(file.toString(), conf).getScheme(); + String scheme = HadoopFSUtils.getFs(file.toString(), conf).getScheme(); return convertPathWithScheme(file, getHoodieScheme(scheme)); } catch (HoodieIOException e) { throw e; @@ -186,7 +188,7 @@ public void initialize(URI uri, Configuration conf) { } else { this.uri = uri; } - this.fileSystem = FSUtils.getFs(path.toString(), conf); + this.fileSystem = HadoopFSUtils.getFs(path.toString(), conf); // Do not need to explicitly initialize the default filesystem, its done already in the above // FileSystem.get // fileSystem.initialize(FileSystem.getDefaultUri(conf), conf); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/NoOpConsistencyGuard.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/NoOpConsistencyGuard.java similarity index 71% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/NoOpConsistencyGuard.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/NoOpConsistencyGuard.java index ef4d7a403530..acda6aefd1a8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/NoOpConsistencyGuard.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/NoOpConsistencyGuard.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.Path; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/SchemeAwareFSDataInputStream.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SchemeAwareFSDataInputStream.java similarity index 75% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/SchemeAwareFSDataInputStream.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SchemeAwareFSDataInputStream.java index 8795bf19d356..d213ed9fee53 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/SchemeAwareFSDataInputStream.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SchemeAwareFSDataInputStream.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.FSDataInputStream; diff --git a/hudi-common/src/main/java/org/apache/hudi/hadoop/SerializablePath.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SerializablePath.java similarity index 78% rename from hudi-common/src/main/java/org/apache/hudi/hadoop/SerializablePath.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SerializablePath.java index 796600a7e838..c814a3ed969c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/hadoop/SerializablePath.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SerializablePath.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.hadoop; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.Path; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareFSDataOutputStream.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java similarity index 86% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareFSDataOutputStream.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java index 361d418c2f7f..bcce7f2b917e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/SizeAwareFSDataOutputStream.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/SizeAwareFSDataOutputStream.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hudi.exception.HoodieException; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/TimedFSDataInputStream.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/TimedFSDataInputStream.java similarity index 86% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/TimedFSDataInputStream.java rename to hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/TimedFSDataInputStream.java index eca8ec368b86..52c5c31f79d5 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/TimedFSDataInputStream.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/TimedFSDataInputStream.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.hadoop.fs; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadSnapshotReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadSnapshotReader.java index 1cc8bf91b25c..4a39b6548f9d 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadSnapshotReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadSnapshotReader.java @@ -18,7 +18,6 @@ package org.apache.hudi.hadoop.realtime; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieRecord; @@ -29,6 +28,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.ExternalSpillableMap; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; import org.apache.hudi.io.storage.HoodieFileReader; @@ -179,7 +179,7 @@ private static HoodieRealtimeFileSplit getRealtimeSplit(String tableBasePath, St private HoodieMergedLogRecordScanner getMergedLogRecordScanner() { return HoodieMergedLogRecordScanner.newBuilder() - .withFileSystem(FSUtils.getFs(split.getPath().toString(), jobConf)) + .withFileSystem(HadoopFSUtils.getFs(split.getPath().toString(), jobConf)) .withBasePath(tableBasePath) .withLogFilePaths(logFilePaths.stream().map(logFile -> logFile.getPath().toString()).collect(Collectors.toList())) .withReaderSchema(readerSchema) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java index 941b28fa7156..61933608e94c 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java @@ -20,7 +20,6 @@ import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.config.HoodieCommonConfig; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieAvroRecordMerger; import org.apache.hudi.common.model.HoodieRecord; @@ -28,6 +27,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.hadoop.config.HoodieRealtimeConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.utils.HiveAvroSerializer; import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils; @@ -83,7 +83,7 @@ private HoodieMergedLogRecordScanner getMergedLogRecordScanner() throws IOExcept // but can return records for completed commits > the commit we are trying to read (if using // readCommit() API) return HoodieMergedLogRecordScanner.newBuilder() - .withFileSystem(FSUtils.getFs(split.getPath().toString(), jobConf)) + .withFileSystem(HadoopFSUtils.getFs(split.getPath().toString(), jobConf)) .withBasePath(split.getBasePath()) .withLogFilePaths(split.getDeltaLogPaths()) .withReaderSchema(getLogScannerReaderSchema()) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java index 043122fbdf86..23d849593151 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java @@ -20,8 +20,8 @@ import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.util.Option; -import org.apache.hudi.hadoop.CachingPath; import org.apache.hudi.hadoop.InputSplitUtils; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.InputSplitWithLocationInfo; diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java index a40519df92db..dd0ef5bf15d7 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java @@ -18,7 +18,6 @@ package org.apache.hudi.hadoop.realtime; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner; import org.apache.hudi.common.util.DefaultSizeEstimator; import org.apache.hudi.common.util.Functions; @@ -30,6 +29,7 @@ import org.apache.hudi.hadoop.RecordReaderValueIterator; import org.apache.hudi.hadoop.SafeParquetRecordReaderWrapper; import org.apache.hudi.hadoop.config.HoodieRealtimeConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils; import org.apache.avro.generic.GenericRecord; @@ -76,7 +76,7 @@ public RealtimeUnmergedRecordReader(RealtimeSplit split, JobConf job, HoodieUnMergedLogRecordScanner.Builder scannerBuilder = HoodieUnMergedLogRecordScanner.newBuilder() - .withFileSystem(FSUtils.getFs(split.getPath().toString(), this.jobConf)) + .withFileSystem(HadoopFSUtils.getFs(split.getPath().toString(), this.jobConf)) .withBasePath(split.getBasePath()) .withLogFilePaths(split.getDeltaLogPaths()) .withReaderSchema(getReaderSchema()) diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java index adee06cc20d9..718edeccf79a 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieMergeOnReadSnapshotReader.java @@ -58,7 +58,7 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.apache.hudi.common.fs.FSUtils.getFs; +import static org.apache.hudi.hadoop.fs.HadoopFSUtils.getFs; import static org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath; import static org.apache.hudi.hadoop.testutils.InputFormatTestUtil.writeDataBlockToLogFile; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java index 0633be72453f..487225175a47 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java @@ -44,6 +44,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.hadoop.RealtimeFileStatus; import org.apache.hudi.hadoop.config.HoodieRealtimeConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.testutils.InputFormatTestUtil; import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils; @@ -115,7 +116,7 @@ public void setUp() { hadoopConf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); baseJobConf = new JobConf(hadoopConf); baseJobConf.set(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1024 * 1024)); - fs = FSUtils.getFs(basePath.toUri().toString(), baseJobConf); + fs = HadoopFSUtils.getFs(basePath.toUri().toString(), baseJobConf); } @AfterEach diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java index d50915d26e25..fc4d68c72053 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.java @@ -21,7 +21,6 @@ import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; @@ -29,6 +28,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config; import org.apache.hudi.integ.testsuite.dag.DagUtils; import org.apache.hudi.integ.testsuite.dag.WorkflowDag; @@ -109,9 +109,9 @@ public HoodieTestSuiteJob(HoodieTestSuiteConfig cfg, JavaSparkContext jsc, boole this.cfg = cfg; this.jsc = jsc; this.stopJsc = stopJsc; - cfg.propsFilePath = FSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString(); + cfg.propsFilePath = HadoopFSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString(); this.sparkSession = SparkSession.builder().config(jsc.getConf()).enableHiveSupport().getOrCreate(); - this.fs = FSUtils.getFs(cfg.inputBasePath, jsc.hadoopConfiguration()); + this.fs = HadoopFSUtils.getFs(cfg.inputBasePath, jsc.hadoopConfiguration()); this.props = UtilHelpers.readConfig(fs.getConf(), new Path(cfg.propsFilePath), cfg.configs).getProps(); log.info("Creating workload generator with configs : {}", props.toString()); this.hiveConf = getDefaultHiveConf(jsc.hadoopConfiguration()); @@ -188,7 +188,7 @@ public WorkflowDag createWorkflowDag() throws IOException { WorkflowDag workflowDag = this.cfg.workloadYamlPath == null ? ((WorkflowDagGenerator) ReflectionUtils .loadClass((this.cfg).workloadDagGenerator)).build() : DagUtils.convertYamlPathToDag( - FSUtils.getFs(this.cfg.workloadYamlPath, jsc.hadoopConfiguration(), true), + HadoopFSUtils.getFs(this.cfg.workloadYamlPath, jsc.hadoopConfiguration(), true), this.cfg.workloadYamlPath); return workflowDag; } diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java index 6094479bb6b3..a7a46c1d97a9 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/SparkDataSourceContinuousIngestTool.java @@ -21,7 +21,7 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.HoodieRepairTool; import org.apache.hudi.utilities.IdentitySplitter; import org.apache.hudi.utilities.UtilHelpers; @@ -76,7 +76,7 @@ public class SparkDataSourceContinuousIngestTool { public SparkDataSourceContinuousIngestTool(JavaSparkContext jsc, Config cfg) { if (cfg.propsFilePath != null) { - cfg.propsFilePath = FSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString(); + cfg.propsFilePath = HadoopFSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString(); } this.context = new HoodieSparkEngineContext(jsc); this.sparkSession = SparkSession.builder().config(jsc.getConf()).getOrCreate(); diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateAsyncOperations.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateAsyncOperations.java index 9c8dc4d82c77..5fc3666559e2 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateAsyncOperations.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateAsyncOperations.java @@ -19,13 +19,13 @@ package org.apache.hudi.integ.testsuite.dag.nodes; import org.apache.hudi.avro.model.HoodieCleanMetadata; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.CleanerUtils; import org.apache.hudi.common.util.ValidationUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config; import org.apache.hudi.integ.testsuite.dag.ExecutionContext; @@ -58,7 +58,7 @@ public void execute(ExecutionContext executionContext, int curItrCount) throws E String basePath = executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath; int maxCommitsRetained = executionContext.getHoodieTestSuiteWriter().getWriteConfig().getCleanerCommitsRetained() + 1; - FileSystem fs = FSUtils.getFs(basePath, executionContext.getHoodieTestSuiteWriter().getConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, executionContext.getHoodieTestSuiteWriter().getConfiguration()); HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setBasePath(executionContext.getHoodieTestSuiteWriter().getCfg().targetBasePath) .setConf(executionContext.getJsc().hadoopConfiguration()).build(); diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java index 260fa8822b48..e9ef3b714a74 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java @@ -18,9 +18,9 @@ package org.apache.hudi.integ.testsuite.generator; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.integ.testsuite.configuration.DFSDeltaConfig; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config; import org.apache.hudi.integ.testsuite.converter.Converter; @@ -91,7 +91,7 @@ public Pair> writeRecords(JavaRDD 1) { Path oldInputDir = new Path(deltaOutputConfig.getDeltaBasePath(), Integer.toString(batchId - 1)); try { - FileSystem fs = FSUtils.getFs(oldInputDir.toString(), deltaOutputConfig.getConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(oldInputDir.toString(), deltaOutputConfig.getConfiguration()); fs.delete(oldInputDir, true); } catch (IOException e) { log.error("Failed to delete older input data directory " + oldInputDir, e); diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java index ad6ef1046300..24005ef86353 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java @@ -29,8 +29,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hudi.common.fs.FSUtils; + import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; /** * This class helps to estimate the number of files to read a given number of total records. @@ -40,7 +41,7 @@ public abstract class DFSDeltaInputReader implements DeltaInputReader getFilePathsToRead(String basePath, PathFilter filter, long totalRecordsToRead) throws IOException { - FileSystem fs = FSUtils.getFs(basePath, new Configuration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, new Configuration()); // TODO : Sort list by file size and take the median file status to ensure fair calculation and change to remote // iterator List fileStatuses = Arrays.asList(fs.globStatus(new Path(basePath, "*/*"), filter)); diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java index 24181527ca63..fa072c95e7e9 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/writer/AvroFileDeltaInputWriter.java @@ -18,9 +18,9 @@ package org.apache.hudi.integ.testsuite.writer; -import java.io.IOException; -import java.io.OutputStream; -import java.util.UUID; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; + import org.apache.avro.Schema; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; @@ -30,11 +30,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.io.OutputStream; +import java.util.UUID; + /** * Implementation of {@link DeltaInputWriter} that writes avro records to the result file. */ diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala index dabe54d822ba..28c686165bb7 100644 --- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala +++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/spark/sql/SparkSqlCreateTableNode.scala @@ -23,10 +23,12 @@ import org.apache.hadoop.fs.Path import org.apache.hudi.AvroConversionUtils import org.apache.hudi.client.WriteStatus import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config import org.apache.hudi.integ.testsuite.dag.ExecutionContext import org.apache.hudi.integ.testsuite.dag.nodes.DagNode import org.apache.hudi.integ.testsuite.utils.SparkSqlUtils + import org.apache.spark.rdd.RDD import org.slf4j.{Logger, LoggerFactory} @@ -72,7 +74,7 @@ class SparkSqlCreateTableNode(dagNodeConfig: Config) extends DagNode[RDD[WriteSt sparkSession.sql("drop table if exists " + targetTableName) if (config.isTableExternal) { LOG.info("Clean up " + targetBasePath) - val fs = FSUtils.getFs(targetBasePath, context.getJsc.hadoopConfiguration()) + val fs = HadoopFSUtils.getFs(targetBasePath, context.getJsc.hadoopConfiguration()) val targetPath = new Path(targetBasePath) if (fs.exists(targetPath)) { fs.delete(targetPath, true) diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java index 0c0e920305d5..70430328553f 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestDFSHoodieTestSuiteWriterAdapter.java @@ -19,7 +19,7 @@ package org.apache.hudi.integ.testsuite; import org.apache.hudi.common.config.SerializableConfiguration; -import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.integ.testsuite.configuration.DFSDeltaConfig; import org.apache.hudi.integ.testsuite.configuration.DeltaConfig; import org.apache.hudi.integ.testsuite.generator.FlexibleSchemaRecordGenerationIterator; @@ -138,7 +138,7 @@ public void testDFSWorkloadSinkWithMultipleFilesFunctional() throws IOException FlexibleSchemaRecordGenerationIterator itr = new FlexibleSchemaRecordGenerationIterator(1000, schemaProvider.getSourceSchema().toString()); dfsDeltaWriterAdapter.write(itr); - FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()); FileStatus[] fileStatuses = fs.listStatus(new Path(basePath)); // Since maxFileSize was 10240L and we produced 1K records each close to 1K size, we should produce more than // 1 file diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestFileDeltaInputWriter.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestFileDeltaInputWriter.java index f2d582ca8063..4f99292b3fd2 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestFileDeltaInputWriter.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/TestFileDeltaInputWriter.java @@ -32,8 +32,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.fs.FSUtils; + import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.integ.testsuite.reader.SparkBasedReader; import org.apache.hudi.integ.testsuite.writer.AvroFileDeltaInputWriter; import org.apache.hudi.integ.testsuite.writer.DeltaInputWriter; @@ -96,7 +97,7 @@ public void testAvroFileSinkWriter() throws IOException { }); fileSinkWriter.close(); DeltaWriteStats deltaWriteStats = fileSinkWriter.getDeltaWriteStats(); - FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()); FileStatus[] fileStatuses = fs.listStatus(new Path(deltaWriteStats.getFilePath())); // Atleast 1 file was written assertEquals(1, fileStatuses.length); diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/reader/TestDFSAvroDeltaInputReader.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/reader/TestDFSAvroDeltaInputReader.java index 0bc1044fd4cd..089a9d9fb559 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/reader/TestDFSAvroDeltaInputReader.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/reader/TestDFSAvroDeltaInputReader.java @@ -26,8 +26,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hudi.common.fs.FSUtils; + import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.integ.testsuite.utils.TestUtils; import org.apache.hudi.utilities.testutils.UtilitiesTestBase; import org.junit.jupiter.api.AfterAll; @@ -59,7 +60,7 @@ public void setup() throws Exception { @Test @Disabled public void testDFSSinkReader() throws IOException { - FileSystem fs = FSUtils.getFs(basePath, new Configuration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, new Configuration()); // Create 10 avro files with 10 records each TestUtils.createAvroFiles(jsc, sparkSession, basePath, 10, 10); FileStatus[] statuses = fs.globStatus(new Path(basePath + "/*/*.avro")); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/metrics/Counter.java b/hudi-io/src/main/java/org/apache/hudi/common/metrics/Counter.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/metrics/Counter.java rename to hudi-io/src/main/java/org/apache/hudi/common/metrics/Counter.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/metrics/LocalRegistry.java b/hudi-io/src/main/java/org/apache/hudi/common/metrics/LocalRegistry.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/metrics/LocalRegistry.java rename to hudi-io/src/main/java/org/apache/hudi/common/metrics/LocalRegistry.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/metrics/Metric.java b/hudi-io/src/main/java/org/apache/hudi/common/metrics/Metric.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/metrics/Metric.java rename to hudi-io/src/main/java/org/apache/hudi/common/metrics/Metric.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/metrics/Registry.java b/hudi-io/src/main/java/org/apache/hudi/common/metrics/Registry.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/metrics/Registry.java rename to hudi-io/src/main/java/org/apache/hudi/common/metrics/Registry.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/HoodieTimer.java b/hudi-io/src/main/java/org/apache/hudi/common/util/HoodieTimer.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/HoodieTimer.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/HoodieTimer.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/ReflectionUtils.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java b/hudi-io/src/main/java/org/apache/hudi/common/util/RetryHelper.java similarity index 92% rename from hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/RetryHelper.java index e63262d90238..26ef5b3bed7d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/RetryHelper.java @@ -7,13 +7,14 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.hudi.common.util; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java similarity index 93% rename from hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java index 5b95bc60312d..5143bd680b08 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/StringUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java @@ -7,13 +7,14 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.hudi.common.util; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ValidationUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/ValidationUtils.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/common/util/ValidationUtils.java rename to hudi-io/src/main/java/org/apache/hudi/common/util/ValidationUtils.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java similarity index 91% rename from hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java rename to hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java index d43259a412a2..30567a435bf0 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/StorageSchemes.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java @@ -7,16 +7,17 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ -package org.apache.hudi.common.fs; +package org.apache.hudi.storage; import java.util.Arrays; diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java index a5e6b3a7afed..7239b7115d89 100644 --- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java +++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectTransactionServices.java @@ -24,7 +24,6 @@ import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.EngineType; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieTableType; @@ -34,6 +33,7 @@ import org.apache.hudi.connect.transaction.TransactionCoordinator; import org.apache.hudi.connect.utils.KafkaConnectUtils; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.KeyGenerator; import org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory; import org.apache.hudi.sync.common.HoodieSyncConfig; @@ -161,7 +161,7 @@ private void syncMeta() { if (connectConfigs.isMetaSyncEnabled()) { Set syncClientToolClasses = new HashSet<>( Arrays.asList(connectConfigs.getMetaSyncClasses().split(","))); - FileSystem fs = FSUtils.getFs(tableBasePath, new Configuration()); + FileSystem fs = HadoopFSUtils.getFs(tableBasePath, new Configuration()); for (String impl : syncClientToolClasses) { // TODO kafka connect config needs to support setting base file format String baseFileFormat = connectConfigs.getStringOrDefault(HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT); diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala index 1685b9abf303..704b3751e784 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala @@ -31,14 +31,16 @@ import org.apache.hudi.common.util.ValidationUtils.checkState import org.apache.hudi.config.HoodieBootstrapConfig.DATA_QUERIES_ONLY import org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE import org.apache.hudi.exception.HoodieException +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.util.PathUtils + import org.apache.spark.sql.execution.streaming.{Sink, Source} import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.isUsingHiveCatalog import org.apache.spark.sql.hudi.streaming.{HoodieEarliestOffsetRangeLimit, HoodieLatestOffsetRangeLimit, HoodieSpecifiedOffsetRangeLimit, HoodieStreamSource} import org.apache.spark.sql.sources._ import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, SparkSession} +import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, SQLContext} import org.slf4j.LoggerFactory import scala.collection.JavaConversions.mapAsJavaMap @@ -87,7 +89,7 @@ class DefaultSource extends RelationProvider val readPaths = readPathsStr.map(p => p.split(",").toSeq).getOrElse(Seq()) val allPaths = path.map(p => Seq(p)).getOrElse(Seq()) ++ readPaths - val fs = FSUtils.getFs(allPaths.head, sqlContext.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(allPaths.head, sqlContext.sparkContext.hadoopConfiguration) val globPaths = if (path.exists(_.contains("*")) || readPaths.nonEmpty) { PathUtils.checkAndGlobPathIfNecessary(allPaths, fs) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala index f97e18079250..d2ba5a7a4bd4 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala @@ -17,12 +17,6 @@ package org.apache.hudi -import org.apache.avro.Schema -import org.apache.avro.generic.GenericRecord -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileStatus, Path} -import org.apache.hadoop.hbase.io.hfile.CacheConfig -import org.apache.hadoop.mapred.JobConf import org.apache.hudi.AvroConversionUtils.getAvroSchemaWithDefaults import org.apache.hudi.HoodieBaseRelation._ import org.apache.hudi.HoodieConversionUtils.toScalaOption @@ -32,25 +26,33 @@ import org.apache.hudi.common.config.{ConfigProperty, HoodieMetadataConfig, Seri import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath import org.apache.hudi.common.model.{FileSlice, HoodieFileFormat, HoodieRecord} +import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver} import org.apache.hudi.common.table.timeline.HoodieTimeline import org.apache.hudi.common.table.timeline.TimelineUtils.validateTimestampAsOf import org.apache.hudi.common.table.view.HoodieTableFileSystemView -import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver} +import org.apache.hudi.common.util.{ConfigUtils, StringUtils} import org.apache.hudi.common.util.StringUtils.isNullOrEmpty import org.apache.hudi.common.util.ValidationUtils.checkState -import org.apache.hudi.common.util.{ConfigUtils, StringUtils} import org.apache.hudi.config.HoodieBootstrapConfig.DATA_QUERIES_ONLY import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.exception.HoodieException -import org.apache.hudi.hadoop.CachingPath +import org.apache.hudi.hadoop.fs.CachingPath +import org.apache.hudi.internal.schema.{HoodieSchemaException, InternalSchema} import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper} -import org.apache.hudi.internal.schema.{HoodieSchemaException, InternalSchema} import org.apache.hudi.io.storage.HoodieAvroHFileReader import org.apache.hudi.metadata.HoodieTableMetadata + +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.hadoop.hbase.io.hfile.CacheConfig +import org.apache.hadoop.mapred.JobConf import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Row, SparkSession, SQLContext} import org.apache.spark.sql.HoodieCatalystExpressionUtils.{convertToCatalystExpression, generateUnsafeProjection} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.Resolver @@ -63,9 +65,9 @@ import org.apache.spark.sql.execution.datasources.parquet.{LegacyHoodieParquetFi import org.apache.spark.sql.hudi.HoodieSqlCommonUtils import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan} import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{Row, SQLContext, SparkSession} import java.net.URI + import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/Iterators.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/Iterators.scala index 728251c9da94..3a86a2cc738c 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/Iterators.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/Iterators.scala @@ -32,16 +32,18 @@ import org.apache.hudi.common.engine.{EngineType, HoodieLocalEngineContext} import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.fs.FSUtils.{buildInlineConf, getRelativePartitionPath} import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType -import org.apache.hudi.common.model.{HoodieSparkRecord, _} +import org.apache.hudi.common.model._ import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner import org.apache.hudi.common.util.HoodieRecordUtils import org.apache.hudi.config.HoodiePayloadConfig import org.apache.hudi.hadoop.config.HoodieRealtimeConfig +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes import org.apache.hudi.internal.schema.InternalSchema import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata} import org.apache.hudi.util.CachingIterator + import org.apache.spark.sql.HoodieCatalystExpressionUtils.generateUnsafeProjection import org.apache.spark.sql.HoodieInternalRowUtils import org.apache.spark.sql.catalyst.InternalRow @@ -49,6 +51,7 @@ import org.apache.spark.sql.catalyst.expressions.Projection import org.apache.spark.sql.types.StructType import java.io.Closeable + import scala.annotation.tailrec import scala.collection.JavaConverters._ import scala.collection.mutable @@ -343,7 +346,7 @@ object LogFileIterator extends SparkAdapterSupport { hadoopConf: Configuration, internalSchema: InternalSchema = InternalSchema.getEmptyInternalSchema): mutable.Map[String, HoodieRecord[_]] = { val tablePath = tableState.tablePath - val fs = FSUtils.getFs(tablePath, hadoopConf) + val fs = HadoopFSUtils.getFs(tablePath, hadoopConf) if (HoodieTableMetadata.isMetadataTable(tablePath)) { val metadataConfig = HoodieMetadataConfig.newBuilder() diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala index 6497c64d5ab8..56119e409a73 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala @@ -21,7 +21,6 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.common.config.{DFSPropertiesConfiguration, HoodieMetadataConfig, TypedProperties} -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieRecord import org.apache.hudi.common.table.timeline.HoodieActiveTimeline.parseDateFromInstantTime import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstantTimeGenerator, HoodieTimeline} @@ -29,6 +28,7 @@ import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver} import org.apache.hudi.common.util.PartitionPathEncodeUtils import org.apache.hudi.exception.HoodieException import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, SparkAdapterSupport} +import org.apache.hudi.common.fs.FSUtils import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.Resolver diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala index 481fe2775f84..d827254a13c4 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala @@ -22,6 +22,8 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.common.util.ConfigUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier} @@ -87,7 +89,7 @@ case class DropHoodieTableCommand( logInfo("Clean up " + basePath) val targetPath = new Path(basePath) val engineContext = new HoodieSparkEngineContext(sparkSession.sparkContext) - val fs = FSUtils.getFs(basePath, sparkSession.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, sparkSession.sparkContext.hadoopConfiguration) FSUtils.deleteDir(engineContext, fs, targetPath, sparkSession.sparkContext.defaultParallelism) } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala index fcf40bd2da09..17b919eb3c66 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala @@ -23,6 +23,8 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.exception.HoodieException +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable} @@ -67,7 +69,7 @@ case class TruncateHoodieTableCommand( if (partitionSpec.isEmpty) { val targetPath = new Path(basePath) val engineContext = new HoodieSparkEngineContext(sparkSession.sparkContext) - val fs = FSUtils.getFs(basePath, sparkSession.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, sparkSession.sparkContext.hadoopConfiguration) FSUtils.deleteDir(engineContext, fs, targetPath, sparkSession.sparkContext.defaultParallelism) // ReInit hoodie.properties diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java index 9783113117ce..0795acffc4d7 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java +++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java @@ -25,7 +25,6 @@ import org.apache.hudi.common.config.DFSPropertiesConfiguration; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -38,6 +37,7 @@ import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.avro.Schema; @@ -125,7 +125,7 @@ public boolean isUpsert() { } public int dataImport(JavaSparkContext jsc) { - FileSystem fs = FSUtils.getFs(this.targetPath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(this.targetPath, jsc.hadoopConfiguration()); this.props = this.propsFilePath == null || this.propsFilePath.isEmpty() ? buildProperties(this.configs) : readConfig(fs.getConf(), new Path(this.propsFilePath), this.configs).getProps(true); LOG.info("Starting data import with configs : " + props.toString()); diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/DedupeSparkJob.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/DedupeSparkJob.scala index 65d07e28bb4f..9177474d7812 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/DedupeSparkJob.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/DedupeSparkJob.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.hudi -import org.apache.hadoop.fs.{FileSystem, FileUtil, Path} import org.apache.hudi.common.fs.FSUtils +import org.apache.hadoop.fs.{FileSystem, FileUtil, Path} import org.apache.hudi.common.model.{HoodieBaseFile, HoodieRecord} import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.view.HoodieTableFileSystemView diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala index 31918ad080c6..99b70519de65 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala @@ -30,6 +30,7 @@ import org.apache.hudi.common.table.log.HoodieLogFormat import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline, TimelineMetadataUtils} import org.apache.hudi.exception.HoodieException + import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} @@ -38,6 +39,8 @@ import java.util import java.util.Collections import java.util.function.Supplier import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import scala.collection.JavaConverters._ import scala.util.control.Breaks.break @@ -89,7 +92,7 @@ class ExportInstantsProcedure extends BaseProcedure with ProcedureBuilder with L .toList.asJava // Archived instants are in the commit archive files - val statuses: Array[FileStatus] = FSUtils.getFs(basePath, jsc.hadoopConfiguration()).globStatus(archivePath) + val statuses: Array[FileStatus] = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()).globStatus(archivePath) val archivedStatuses = List(statuses: _*) .sortWith((f1, f2) => (f1.getModificationTime - f2.getModificationTime).toInt > 0).asJava @@ -112,7 +115,7 @@ class ExportInstantsProcedure extends BaseProcedure with ProcedureBuilder with L private def copyArchivedInstants(basePath: String, statuses: util.List[FileStatus], actionSet: util.Set[String], limit: Int, localFolder: String) = { import scala.collection.JavaConversions._ var copyCount = 0 - val fileSystem = FSUtils.getFs(basePath, jsc.hadoopConfiguration()) + val fileSystem = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()) for (fs <- statuses) { // read the archived file val reader = HoodieLogFormat.newReader(fileSystem, new HoodieLogFile(fs.getPath), HoodieArchivedMetaEntry.getClassSchema) @@ -176,7 +179,7 @@ class ExportInstantsProcedure extends BaseProcedure with ProcedureBuilder with L var copyCount = 0 if (instants.nonEmpty) { val timeline = metaClient.getActiveTimeline - val fileSystem = FSUtils.getFs(metaClient.getBasePath, jsc.hadoopConfiguration()) + val fileSystem = HadoopFSUtils.getFs(metaClient.getBasePath, jsc.hadoopConfiguration()) for (instant <- instants) { val localPath = localFolder + Path.SEPARATOR + instant.getFileName val data: Array[Byte] = instant.getAction match { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairAddpartitionmetaProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairAddpartitionmetaProcedure.scala index d636b7328b9c..2b05a134a804 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairAddpartitionmetaProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairAddpartitionmetaProcedure.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.hudi.command.procedures -import org.apache.hadoop.fs.Path import org.apache.hudi.common.fs.FSUtils +import org.apache.hadoop.fs.Path import org.apache.hudi.common.model.HoodiePartitionMetadata import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.spark.internal.Logging diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairDeduplicateProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairDeduplicateProcedure.scala index d4d22364fe8b..8de9c08faac1 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairDeduplicateProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairDeduplicateProcedure.scala @@ -19,12 +19,14 @@ package org.apache.spark.sql.hudi.command.procedures import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.exception.HoodieException +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} -import java.util.function.Supplier -import org.apache.spark.sql.hudi.{DeDupeType, DedupeSparkJob} +import java.util.function.Supplier +import org.apache.spark.sql.hudi.{DedupeSparkJob, DeDupeType} import scala.util.{Failure, Success, Try} @@ -61,7 +63,7 @@ class RepairDeduplicateProcedure extends BaseProcedure with ProcedureBuilder wit Try { val job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, spark.sqlContext, - FSUtils.getFs(basePath, jsc.hadoopConfiguration), DeDupeType.withName(dedupeType)) + HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration), DeDupeType.withName(dedupeType)) job.fixDuplicates(dryRun) } match { case Success(_) => diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala index 51bafb5e201a..fe8efc99c789 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RepairOverwriteHoodiePropsProcedure.scala @@ -17,11 +17,14 @@ package org.apache.spark.sql.hudi.command.procedures +import org.apache.hudi.common.fs.FSUtils + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient} +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} @@ -30,6 +33,7 @@ import java.io.FileInputStream import java.util import java.util.Properties import java.util.function.Supplier + import scala.collection.JavaConversions._ import scala.collection.JavaConverters.asScalaIteratorConverter @@ -50,7 +54,7 @@ class RepairOverwriteHoodiePropsProcedure extends BaseProcedure with ProcedureBu def outputType: StructType = OUTPUT_TYPE def loadNewProps(filePath: String, props: Properties):Unit = { - val fs = FSUtils.getFs(filePath, new Configuration()) + val fs = HadoopFSUtils.getFs(filePath, new Configuration()) val fis = fs.open(new Path(filePath)) props.load(fis) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunBootstrapProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunBootstrapProcedure.scala index c2f18edaeeb2..00356e4b95a8 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunBootstrapProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RunBootstrapProcedure.scala @@ -21,11 +21,13 @@ import org.apache.hadoop.fs.Path import org.apache.hudi.cli.BootstrapExecutorUtils import org.apache.hudi.cli.HDFSParquetImporterUtils.{buildProperties, readConfig} import org.apache.hudi.common.config.TypedProperties -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.util.StringUtils import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig} import org.apache.hudi.keygen.constant.KeyGeneratorType import org.apache.hudi.{DataSourceWriteOptions, HoodieCLIUtils} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} @@ -33,6 +35,7 @@ import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} import java.util import java.util.Locale import java.util.function.Supplier + import scala.collection.JavaConverters._ class RunBootstrapProcedure extends BaseProcedure with ProcedureBuilder with Logging { private val PARAMETERS = Array[ProcedureParameter]( @@ -112,7 +115,7 @@ class RunBootstrapProcedure extends BaseProcedure with ProcedureBuilder with Log properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, rowKeyField) properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, partitionPathField) - val fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration) val cfg = new BootstrapExecutorUtils.Config() cfg.setTableName(tableName) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFileSystemViewProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFileSystemViewProcedure.scala index 27712195d9cd..f3dac3e53589 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFileSystemViewProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFileSystemViewProcedure.scala @@ -17,21 +17,22 @@ package org.apache.spark.sql.hudi.command.procedures -import org.apache.hadoop.fs.{FileStatus, Path} -import org.apache.hudi.common.fs.{FSUtils, HoodieWrapperFileSystem} +import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.{FileSlice, HoodieLogFile} import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.timeline.{HoodieDefaultTimeline, HoodieInstant, HoodieTimeline} import org.apache.hudi.common.table.view.HoodieTableFileSystemView import org.apache.hudi.common.util -import org.apache.hudi.common.util.StringUtils + +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} import java.util.function.{Function, Supplier} import java.util.stream.Collectors + import scala.collection.JavaConversions -import scala.collection.JavaConverters.{asJavaIterableConverter, asJavaIteratorConverter, asScalaIteratorConverter} +import scala.collection.JavaConverters.asScalaIteratorConverter class ShowFileSystemViewProcedure(showLatest: Boolean) extends BaseProcedure with ProcedureBuilder { private val PARAMETERS_ALL: Array[ProcedureParameter] = Array[ProcedureParameter]( diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFsPathDetailProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFsPathDetailProcedure.scala index b3a3b0b700ce..33bbdff15e1a 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFsPathDetailProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowFsPathDetailProcedure.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.hudi.command.procedures -import org.apache.hadoop.fs.{ContentSummary, FileStatus, Path} import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + +import org.apache.hadoop.fs.{ContentSummary, FileStatus, Path} import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} @@ -55,7 +57,7 @@ class ShowFsPathDetailProcedure extends BaseProcedure with ProcedureBuilder { val sort = getArgValueOrDefault(args, PARAMETERS(2)).get.asInstanceOf[Boolean] val path: Path = new Path(srcPath) - val fs = FSUtils.getFs(path, jsc.hadoopConfiguration()) + val fs = HadoopFSUtils.getFs(path, jsc.hadoopConfiguration()) val status: Array[FileStatus] = if (isSub) fs.listStatus(path) else fs.globStatus(path) val rows: java.util.List[Row] = new java.util.ArrayList[Row]() diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowHoodieLogFileMetadataProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowHoodieLogFileMetadataProcedure.scala index d1da7cfed068..e2e5408cce17 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowHoodieLogFileMetadataProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowHoodieLogFileMetadataProcedure.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.hudi.command.procedures +import org.apache.hudi.common.fs.FSUtils import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.fs.Path -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieLogFile import org.apache.hudi.common.table.log.HoodieLogFormat import org.apache.hudi.common.table.log.block.HoodieLogBlock.{HeaderMetadataType, HoodieLogBlockType} diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowInvalidParquetProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowInvalidParquetProcedure.scala index d87239675ed9..95164e0a54d0 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowInvalidParquetProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowInvalidParquetProcedure.scala @@ -21,6 +21,8 @@ import org.apache.hadoop.fs.Path import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.common.config.SerializableConfiguration import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS import org.apache.parquet.hadoop.ParquetFileReader import org.apache.spark.api.java.JavaRDD @@ -50,7 +52,7 @@ class ShowInvalidParquetProcedure extends BaseProcedure with ProcedureBuilder { val javaRdd: JavaRDD[String] = jsc.parallelize(partitionPaths, partitionPaths.size()) val serHadoopConf = new SerializableConfiguration(jsc.hadoopConfiguration()) javaRdd.rdd.map(part => { - val fs = FSUtils.getFs(new Path(srcPath), serHadoopConf.get()) + val fs = HadoopFSUtils.getFs(new Path(srcPath), serHadoopConf.get()) FSUtils.getAllDataFilesInPartition(fs, FSUtils.getPartitionPath(srcPath, part)) }).flatMap(_.toList) .filter(status => { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/StatsFileSizeProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/StatsFileSizeProcedure.scala index feff232c80d3..a9254c1b8272 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/StatsFileSizeProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/StatsFileSizeProcedure.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.hudi.command.procedures +import org.apache.hudi.common.fs.FSUtils import com.codahale.metrics.{Histogram, Snapshot, UniformReservoir} import org.apache.hadoop.fs.Path -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.util.ValidationUtils import org.apache.spark.sql.Row diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java index f20c743cf041..c3baf0f52354 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java @@ -52,6 +52,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.HoodieParquetInputFormat; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.io.storage.HoodieAvroParquetReader; @@ -494,7 +495,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta private void verifyNoMarkerInTempFolder() throws IOException { String tempFolderPath = metaClient.getTempFolderPath(); - FileSystem fileSystem = FSUtils.getFs(tempFolderPath, jsc.hadoopConfiguration()); + FileSystem fileSystem = HadoopFSUtils.getFs(tempFolderPath, jsc.hadoopConfiguration()); assertEquals(0, fileSystem.listStatus(new Path(tempFolderPath)).length); } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala index b6b881c2b70a..39d093b7ffc3 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala @@ -25,7 +25,6 @@ import org.apache.hudi.QuickstartUtils.{convertToStringList, getQuickstartWriteC import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.common.config.TimestampKeyGeneratorConfig.{TIMESTAMP_INPUT_DATE_FORMAT, TIMESTAMP_OUTPUT_DATE_FORMAT, TIMESTAMP_TIMEZONE_FORMAT, TIMESTAMP_TYPE_FIELD} import org.apache.hudi.common.config.{HoodieCommonConfig, HoodieMetadataConfig} -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType import org.apache.hudi.common.model.{HoodieRecord, WriteOperationType} import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline, TimelineUtils} @@ -46,6 +45,7 @@ import org.apache.hudi.metrics.{Metrics, MetricsReporterType} import org.apache.hudi.testutils.HoodieSparkClientTestBase import org.apache.hudi.util.JFunction import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, QuickstartUtils, ScalaAssertionSupport} +import org.apache.hudi.common.fs.FSUtils import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.hudi.HoodieSparkSessionExtension diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala index bed951238f16..0807c0f9ff4f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala @@ -22,7 +22,6 @@ package org.apache.hudi.functional import org.apache.hudi.client.validator.{SqlQueryEqualityPreCommitValidator, SqlQueryInequalityPreCommitValidator} import org.apache.hudi.common.config.HoodieMetadataConfig import org.apache.hudi.common.config.TimestampKeyGeneratorConfig.{TIMESTAMP_INPUT_DATE_FORMAT, TIMESTAMP_OUTPUT_DATE_FORMAT, TIMESTAMP_TYPE_FIELD} -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.WriteOperationType import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline} @@ -34,6 +33,9 @@ import org.apache.hudi.keygen.{NonpartitionedKeyGenerator, TimestampBasedKeyGene import org.apache.hudi.testutils.SparkClientFunctionalTestHarness import org.apache.hudi.testutils.SparkClientFunctionalTestHarness.getSparkSqlConf import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.SparkConf import org.apache.spark.sql._ import org.apache.spark.sql.functions.{col, lit} @@ -92,7 +94,7 @@ class TestCOWDataSourceStorage extends SparkClientFunctionalTestHarness { options += TIMESTAMP_OUTPUT_DATE_FORMAT.key -> "yyyyMMdd" } val dataGen = new HoodieTestDataGenerator(0xDEED) - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) // Insert Operation val records0 = recordsToStrings(dataGen.generateInserts("000", 100)).toList val inputDF0 = spark.read.json(spark.sparkContext.parallelize(records0, 2)) @@ -316,7 +318,7 @@ class TestCOWDataSourceStorage extends SparkClientFunctionalTestHarness { } val dataGen = new HoodieTestDataGenerator(0xDEED) - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) val records = recordsToStrings(dataGen.generateInserts("001", 100)).toList // First commit, new partition, no existing table schema diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndexWithSQL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndexWithSQL.scala index 9c4099035b12..29da27b0c865 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndexWithSQL.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndexWithSQL.scala @@ -23,7 +23,6 @@ import org.apache.hudi.client.SparkRDDWriteClient import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.client.utils.MetadataConversionUtils import org.apache.hudi.common.config.{HoodieMetadataConfig, TypedProperties} -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieTableType, WriteOperationType} import org.apache.hudi.common.table.HoodieTableConfig import org.apache.hudi.common.table.timeline.HoodieInstant @@ -33,6 +32,7 @@ import org.apache.hudi.index.HoodieIndex.IndexType.INMEMORY import org.apache.hudi.metadata.HoodieMetadataFileSystemView import org.apache.hudi.util.JavaConversions import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieFileIndex} +import org.apache.hudi.common.fs.FSUtils import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, Expression, GreaterThan, Literal} import org.apache.spark.sql.types.StringType diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala index c8445fefd075..6088d33a32fc 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala @@ -20,7 +20,6 @@ package org.apache.hudi.functional import org.apache.hudi.bootstrap.SparkParquetBootstrapDataProvider import org.apache.hudi.client.bootstrap.selector.{FullRecordBootstrapModeSelector, MetadataOnlyBootstrapModeSelector} import org.apache.hudi.common.config.HoodieStorageConfig -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieRecord import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType import org.apache.hudi.common.table.timeline.HoodieTimeline @@ -29,6 +28,8 @@ import org.apache.hudi.functional.TestDataSourceForBootstrap.{dropMetaCols, sort import org.apache.hudi.keygen.{NonpartitionedKeyGenerator, SimpleKeyGenerator} import org.apache.hudi.testutils.HoodieClientTestUtils import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, HoodieSparkRecordMerger} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.api.java.JavaSparkContext @@ -42,6 +43,7 @@ import org.junit.jupiter.params.provider.{CsvSource, EnumSource, ValueSource} import java.time.Instant import java.util.Collections + import scala.collection.JavaConverters._ class TestDataSourceForBootstrap { @@ -89,7 +91,7 @@ class TestDataSourceForBootstrap { spark = SparkSession.builder.config(sparkConf).getOrCreate basePath = tempDir.toAbsolutePath.toString + "/base" srcPath = tempDir.toAbsolutePath.toString + "/src" - fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) } @AfterEach def tearDown(): Unit ={ diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceStorage.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceStorage.scala index a1b4f3e307e0..32b188aa7d03 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceStorage.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceStorage.scala @@ -20,7 +20,6 @@ package org.apache.hudi.functional import org.apache.hudi.common.config.HoodieMetadataConfig -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.testutils.HoodieTestDataGenerator import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings @@ -29,6 +28,9 @@ import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.testutils.SparkClientFunctionalTestHarness import org.apache.hudi.testutils.SparkClientFunctionalTestHarness.getSparkSqlConf import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.SparkConf import org.apache.spark.sql._ import org.apache.spark.sql.functions.{col, lit} @@ -71,7 +73,7 @@ class TestMORDataSourceStorage extends SparkClientFunctionalTestHarness { options += (DataSourceWriteOptions.PRECOMBINE_FIELD.key() -> preCombineField) } val dataGen = new HoodieTestDataGenerator(0xDEEF) - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) // Bulk Insert Operation val records1 = recordsToStrings(dataGen.generateInserts("001", 100)).toList val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2)) @@ -147,7 +149,7 @@ class TestMORDataSourceStorage extends SparkClientFunctionalTestHarness { var options: Map[String, String] = commonOpts options += (DataSourceWriteOptions.PRECOMBINE_FIELD.key() -> preCombineField) val dataGen = new HoodieTestDataGenerator(0xDEEF) - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) // Bulk Insert Operation val records1 = recordsToStrings(dataGen.generateInserts("001", 100)).toList val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2)) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala index 7b93f98b97ca..1e7dc3a5b854 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala @@ -20,7 +20,6 @@ package org.apache.hudi.functional import org.apache.hudi.common.config.HoodieMetadataConfig -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieRecord import org.apache.hudi.common.testutils.HoodieTestDataGenerator import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings @@ -28,6 +27,10 @@ import org.apache.hudi.config.{HoodieCompactionConfig, HoodieIndexConfig, Hoodie import org.apache.hudi.keygen.NonpartitionedKeyGenerator import org.apache.hudi.testutils.SparkClientFunctionalTestHarness import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + +import org.apache.spark.SparkConf import org.apache.spark.sql._ import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.params.ParameterizedTest @@ -71,7 +74,7 @@ class TestSparkDataSource extends SparkClientFunctionalTestHarness { // order of cols in inputDf and hudiDf differs slightly. so had to choose columns specifically to compare df directly. val colsToSelect = "_row_key, begin_lat, begin_lon, city_to_state.LA, current_date, current_ts, distance_in_meters, driver, end_lat, end_lon, fare.amount, fare.currency, partition, partition_path, rider, timestamp, weight, _hoodie_is_deleted" val dataGen = new HoodieTestDataGenerator(0xDEED) - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) // Insert Operation val records0 = recordsToStrings(dataGen.generateInserts("000", 10)).toList val inputDf0 = spark.read.json(spark.sparkContext.parallelize(records0, parallelism)).cache @@ -232,7 +235,7 @@ class TestSparkDataSource extends SparkClientFunctionalTestHarness { // order of cols in inputDf and hudiDf differs slightly. so had to choose columns specifically to compare df directly. val colsToSelect = "_row_key, begin_lat, begin_lon, city_to_state.LA, current_date, current_ts, distance_in_meters, driver, end_lat, end_lon, fare.amount, fare.currency, partition, partition_path, rider, timestamp, weight, _hoodie_is_deleted" val dataGen = new HoodieTestDataGenerator(0xDEED) - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) // Insert Operation val records0 = recordsToStrings(dataGen.generateInserts("000", 10)).toList val inputDf0 = spark.read.json(spark.sparkContext.parallelize(records0, parallelism)).cache diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala index 220c6930c4f5..b554aa735ec8 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala @@ -22,7 +22,6 @@ package org.apache.hudi.functional import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_SNAPSHOT_OPT_VAL} import org.apache.hudi.HoodieDataSourceHelpers.{hasNewCommits, latestCommit, listCommitsSince} import org.apache.hudi.common.config.HoodieMetadataConfig -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.WriteOperationType.{BULK_INSERT, INSERT, UPSERT} import org.apache.hudi.common.model.{HoodieRecord, WriteOperationType} import org.apache.hudi.common.table.HoodieTableMetaClient @@ -31,6 +30,9 @@ import org.apache.hudi.common.testutils.HoodieTestDataGenerator import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings import org.apache.hudi.keygen.NonpartitionedKeyGenerator import org.apache.hudi.{DataSourceReadOptions, HoodieSparkUtils} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.sql import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase import org.apache.spark.sql.{Dataset, Row} @@ -38,6 +40,7 @@ import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.scalatest.Inspectors.forAll import java.io.File + import scala.collection.JavaConversions._ @SparkSQLCoreFlow @@ -85,7 +88,7 @@ class TestSparkSqlCoreFlow extends HoodieSparkSqlTestBase { val tableBasePath = basePath.getCanonicalPath + "/" + tableName val writeOptions = getWriteOptions(tableName, tableType, keyGenClass, indexType) createTable(tableName, keyGenClass, writeOptions, tableBasePath) - val fs = FSUtils.getFs(tableBasePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(tableBasePath, spark.sparkContext.hadoopConfiguration) val dataGen = new HoodieTestDataGenerator(HoodieTestDataGenerator.TRIP_NESTED_EXAMPLE_SCHEMA, 0xDEED) //Bulk insert first set of records @@ -431,7 +434,7 @@ class TestSparkSqlCoreFlow extends HoodieSparkSqlTestBase { val tableBasePath = basePath.getCanonicalPath + "/" + tableName val writeOptions = getWriteOptions(tableName, tableType, keyGenClass, indexType) createTable(tableName, keyGenClass, writeOptions, tableBasePath) - val fs = FSUtils.getFs(tableBasePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(tableBasePath, spark.sparkContext.hadoopConfiguration) //Insert Operation val dataGen = new HoodieTestDataGenerator(HoodieTestDataGenerator.TRIP_NESTED_EXAMPLE_SCHEMA, 0xDEED) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala index bc2a169779c5..b9628d05af14 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/HoodieSparkSqlTestBase.scala @@ -19,16 +19,18 @@ package org.apache.spark.sql.hudi import org.apache.hadoop.fs.Path import org.apache.hudi.HoodieSparkRecordMerger -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.config.HoodieStorageConfig +import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieAvroRecordMerger import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.timeline.TimelineMetadataUtils import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.exception.ExceptionUtil.getRootCause +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex import org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest + import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.checkMessageContains @@ -173,7 +175,7 @@ class HoodieSparkSqlTestBase extends FunSuite with BeforeAndAfterAll { protected def existsPath(filePath: String): Boolean = { val path = new Path(filePath) - val fs = FSUtils.getFs(filePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(filePath, spark.sparkContext.hadoopConfiguration) fs.exists(path) } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala index 3f5dc3a1d64a..0781fc6af06f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDropTable.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.hudi -import org.apache.hadoop.fs.{LocalFileSystem, Path} import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + +import org.apache.hadoop.fs.{LocalFileSystem, Path} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.SessionCatalog @@ -247,7 +249,7 @@ class TestDropTable extends HoodieSparkSqlTestBase { withTempDir { tmp => val tableName = generateTableName val tablePath = s"${tmp.getCanonicalPath}/$tableName" - val filesystem = FSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration); + val filesystem = HadoopFSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration); spark.sql( s""" |create table $tableName ( @@ -274,7 +276,7 @@ class TestDropTable extends HoodieSparkSqlTestBase { withTempDir { tmp => val tableName = generateTableName val tablePath = s"${tmp.getCanonicalPath}/$tableName" - val filesystem = FSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration); + val filesystem = HadoopFSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration); spark.sql( s""" |create table $tableName ( @@ -345,7 +347,7 @@ class TestDropTable extends HoodieSparkSqlTestBase { val tablePath = new Path( spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location) - val filesystem = FSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration); + val filesystem = HadoopFSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration); assert(filesystem.exists(tablePath), s"Table path doesn't exists ($tablePath).") filesystem.delete(tablePath, true) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala index 80ee86ee6f21..90398f4689fa 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.hudi import org.apache.hudi.DataSourceWriteOptions.SPARK_SQL_OPTIMIZED_WRITES -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.{DataSourceReadOptions, HoodieDataSourceHelpers, HoodieSparkUtils, ScalaAssertionSupport} +import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + import org.apache.spark.sql.internal.SQLConf class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSupport { @@ -1025,7 +1027,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo checkAnswer(s"select id, name, price, _ts from $targetTable")( Seq(1, "a1", 10, 1000) ) - val fs = FSUtils.getFs(targetBasePath, spark.sessionState.newHadoopConf()) + val fs = HadoopFSUtils.getFs(targetBasePath, spark.sessionState.newHadoopConf()) val firstCommitTime = HoodieDataSourceHelpers.latestCommit(fs, targetBasePath) // Second merge diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala index abe3858b03c5..595e9173cbeb 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala @@ -17,14 +17,17 @@ package org.apache.spark.sql.hudi.procedure +import org.apache.hudi.common.fs.FSUtils + import org.apache.avro.generic.GenericRecord import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.common.table.timeline.HoodieActiveTimeline import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils} import org.apache.hudi.common.util.StringUtils.getUTF8Bytes +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.testutils.HoodieClientTestUtils + import org.apache.parquet.avro.AvroParquetWriter import org.apache.parquet.hadoop.ParquetWriter import org.apache.spark.api.java.JavaSparkContext @@ -41,7 +44,7 @@ class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase { test("Test Call hdfs_parquet_import Procedure with insert operation") { withTempDir { tmp => - val fs: FileSystem = FSUtils.getFs(tmp.getCanonicalPath, spark.sparkContext.hadoopConfiguration) + val fs: FileSystem = HadoopFSUtils.getFs(tmp.getCanonicalPath, spark.sparkContext.hadoopConfiguration) val tableName = generateTableName val tablePath = tmp.getCanonicalPath + Path.SEPARATOR + tableName val sourcePath = new Path(tmp.getCanonicalPath, "source") @@ -74,7 +77,7 @@ class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase { test("Test Call hdfs_parquet_import Procedure with upsert operation") { withTempDir { tmp => - val fs: FileSystem = FSUtils.getFs(tmp.getCanonicalPath, spark.sparkContext.hadoopConfiguration) + val fs: FileSystem = HadoopFSUtils.getFs(tmp.getCanonicalPath, spark.sparkContext.hadoopConfiguration) val tableName = generateTableName val tablePath = tmp.getCanonicalPath + Path.SEPARATOR + tableName val sourcePath = new Path(tmp.getCanonicalPath, "source") diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala index 7d3c269f8ad4..7126a614987e 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestRepairsProcedure.scala @@ -27,7 +27,9 @@ import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.timeline.HoodieTimeline import org.apache.hudi.common.table.view.HoodieTableFileSystemView import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, SchemaTestUtil} +import org.apache.hudi.hadoop.fs.HadoopFSUtils import org.apache.hudi.testutils.HoodieSparkWriteableTestTable + import org.apache.spark.api.java.JavaSparkContext import org.junit.jupiter.api.Assertions.assertEquals @@ -35,6 +37,7 @@ import java.io.IOException import java.net.URL import java.nio.file.{Files, Paths} import java.util.Properties + import scala.collection.JavaConverters.asScalaIteratorConverter import scala.jdk.CollectionConverters.asScalaSetConverter @@ -110,7 +113,7 @@ class TestRepairsProcedure extends HoodieSparkProcedureTestBase { """.stripMargin) val filePath = s"""$tablePath/.hoodie/hoodie.properties""" - val fs = FSUtils.getFs(filePath, new Configuration()) + val fs = HadoopFSUtils.getFs(filePath, new Configuration()) val fis = fs.open(new Path(filePath)) val prevProps = new Properties prevProps.load(fis) @@ -554,7 +557,7 @@ class TestRepairsProcedure extends HoodieSparkProcedureTestBase { @throws[IOException] def createEmptyCleanRequestedFile(basePath: String, instantTime: String, configuration: Configuration): Unit = { val commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeRequestedCleanerFileName(instantTime)) - val fs = FSUtils.getFs(basePath, configuration) + val fs = HadoopFSUtils.getFs(basePath, configuration) val os = fs.create(commitFilePath, true) os.close() } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestShowInvalidParquetProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestShowInvalidParquetProcedure.scala index 4d0c9c7b3461..94b410dad26f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestShowInvalidParquetProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestShowInvalidParquetProcedure.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.hudi.procedure -import org.apache.hadoop.fs.Path import org.apache.hudi.common.fs.FSUtils +import org.apache.hudi.hadoop.fs.HadoopFSUtils + +import org.apache.hadoop.fs.Path class TestShowInvalidParquetProcedure extends HoodieSparkProcedureTestBase { test("Test Call show_invalid_parquet Procedure") { @@ -49,7 +51,7 @@ class TestShowInvalidParquetProcedure extends HoodieSparkProcedureTestBase { checkExceptionContain(s"""call show_invalid_parquet(limit => 10)""")( s"Argument: path is required") - val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) + val fs = HadoopFSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration) val invalidPath1 = new Path(basePath, "ts=1000/1.parquet") val out1 = fs.create(invalidPath1) out1.write(1) diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java index b86ab6c6e8b1..f1f15d6df1cf 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HMSDDLExecutor.java @@ -19,13 +19,13 @@ package org.apache.hudi.hive.ddl; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.StorageSchemes; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HoodieHiveSyncException; import org.apache.hudi.hive.util.HivePartitionUtil; import org.apache.hudi.hive.util.HiveSchemaUtil; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hudi.sync.common.model.PartitionValueExtractor; import org.apache.hadoop.fs.Path; diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java index 1c4dcec592e7..5e2dee7f050c 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/QueryBasedDDLExecutor.java @@ -19,13 +19,13 @@ package org.apache.hudi.hive.ddl; import org.apache.hudi.common.fs.FSUtils; -import org.apache.hudi.common.fs.StorageSchemes; import org.apache.hudi.common.util.PartitionPathEncodeUtils; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HoodieHiveSyncException; import org.apache.hudi.hive.util.HiveSchemaUtil; +import org.apache.hudi.storage.StorageSchemes; import org.apache.hudi.sync.common.model.PartitionValueExtractor; import org.apache.hadoop.fs.Path; diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java index 4c5fb01b9e75..2c2d77651cb8 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncClient.java @@ -27,7 +27,7 @@ import org.apache.hudi.common.table.timeline.TimelineUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ReflectionUtils; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.sync.common.model.Partition; import org.apache.hudi.sync.common.model.PartitionEvent; import org.apache.hudi.sync.common.model.PartitionValueExtractor; diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java index 80b2b1bdd352..534d6b5524be 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java @@ -24,11 +24,11 @@ import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.ConfigUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; import com.beust.jcommander.Parameter; @@ -222,7 +222,7 @@ public Configuration getHadoopConf() { } public FileSystem getHadoopFileSystem() { - return FSUtils.getFs(getString(META_SYNC_BASE_PATH), getHadoopConf()); + return HadoopFSUtils.getFs(getString(META_SYNC_BASE_PATH), getHadoopConf()); } public String getAbsoluteBasePath() { diff --git a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestSyncUtilHelpers.java b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestSyncUtilHelpers.java index 2e730493bb4f..02c6e035a3e1 100644 --- a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestSyncUtilHelpers.java +++ b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/util/TestSyncUtilHelpers.java @@ -18,8 +18,8 @@ package org.apache.hudi.sync.common.util; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.sync.common.HoodieSyncTool; import org.apache.hadoop.conf.Configuration; @@ -52,7 +52,7 @@ public class TestSyncUtilHelpers { @BeforeEach public void setUp() throws IOException { - fileSystem = FSUtils.getFs(BASE_PATH, new Configuration()); + fileSystem = HadoopFSUtils.getFs(BASE_PATH, new Configuration()); hadoopConf = fileSystem.getConf(); } diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java index a6691e8bb0ac..adfc734d1c55 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java @@ -23,10 +23,10 @@ import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.engine.HoodieLocalEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.view.FileSystemViewManager; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.table.view.FileSystemViewStorageType; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; @@ -66,7 +66,7 @@ public int getServerPort() { public TimelineService(HoodieEngineContext context, Configuration hadoopConf, Config timelineServerConf, FileSystem fileSystem, FileSystemViewManager globalFileSystemViewManager) throws IOException { - this.conf = FSUtils.prepareHadoopConf(hadoopConf); + this.conf = HadoopFSUtils.prepareHadoopConf(hadoopConf); this.timelineServerConf = timelineServerConf; this.serverPort = timelineServerConf.serverPort; this.context = context; @@ -432,10 +432,10 @@ public static void main(String[] args) throws Exception { System.exit(1); } - Configuration conf = FSUtils.prepareHadoopConf(new Configuration()); + Configuration conf = HadoopFSUtils.prepareHadoopConf(new Configuration()); FileSystemViewManager viewManager = buildFileSystemViewManager(cfg, new SerializableConfiguration(conf)); TimelineService service = new TimelineService( - new HoodieLocalEngineContext(FSUtils.prepareHadoopConf(new Configuration())), + new HoodieLocalEngineContext(HadoopFSUtils.prepareHadoopConf(new Configuration())), new Configuration(), cfg, FileSystem.get(new Configuration()), viewManager); service.run(); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java index 7ee5fa83ca2e..5ebb1a3bc775 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java @@ -24,7 +24,6 @@ import org.apache.hudi.common.HoodieJsonPayload; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroRecord; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; @@ -32,6 +31,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.streamer.HoodieStreamer; import com.beust.jcommander.IValueValidator; @@ -111,7 +111,7 @@ private boolean isUpsert() { } public int dataImport(JavaSparkContext jsc, int retry) { - this.fs = FSUtils.getFs(cfg.targetPath, jsc.hadoopConfiguration()); + this.fs = HadoopFSUtils.getFs(cfg.targetPath, jsc.hadoopConfiguration()); this.props = cfg.propsFilePath == null ? UtilHelpers.buildProperties(cfg.configs) : UtilHelpers.readConfig(fs.getConf(), new Path(cfg.propsFilePath), cfg.configs).getProps(true); LOG.info("Starting data import with configs : " + props.toString()); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java index d7642c46fd12..d296a65ceb4f 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java @@ -22,9 +22,9 @@ import org.apache.hudi.client.CompactionAdminClient.RenameOpResult; import org.apache.hudi.client.CompactionAdminClient.ValidationOpResult; import org.apache.hudi.client.common.HoodieSparkEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; @@ -62,7 +62,7 @@ public static void main(String[] args) throws Exception { public void run(JavaSparkContext jsc) throws Exception { HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath).build(); try (CompactionAdminClient admin = new CompactionAdminClient(new HoodieSparkEngineContext(jsc), cfg.basePath)) { - final FileSystem fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); + final FileSystem fs = HadoopFSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); if (cfg.outputPath != null && fs.exists(new Path(cfg.outputPath))) { throw new IllegalStateException("Output File Path already exists"); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java index d3bcb5b52a82..82acce6a4eb5 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java @@ -21,7 +21,6 @@ import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.TableSchemaResolver; @@ -30,6 +29,7 @@ import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.config.HoodieCleanConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.table.action.HoodieWriteMetadata; import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy; @@ -184,7 +184,7 @@ public static void main(String[] args) { } public int compact(int retry) { - this.fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); + this.fs = HadoopFSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); // need to do validate in case that users call compact() directly without setting cfg.runningMode validateRunningMode(cfg); LOG.info(cfg.toString()); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java index 04db656d492a..1695462a30ea 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java @@ -20,7 +20,6 @@ import org.apache.hudi.DataSourceWriteOptions; import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieRecordPayload; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; @@ -30,6 +29,7 @@ import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HiveSyncConfigHolder; import org.apache.hudi.hive.HiveSyncTool; @@ -375,7 +375,7 @@ private void syncHive(HiveSyncConfig hiveSyncConfig) { + hiveSyncConfig.getStringOrDefault(HiveSyncConfigHolder.HIVE_URL) + ", basePath :" + cfg.basePath); LOG.info("Hive Sync Conf => " + hiveSyncConfig.toString()); - FileSystem fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration()); HiveConf hiveConf = new HiveConf(); if (!StringUtils.isNullOrEmpty(cfg.hiveHMSUris)) { hiveConf.set("hive.metastore.uris", cfg.hiveHMSUris); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index bb97e17a6d70..e8fbe611937e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -101,7 +101,7 @@ import static org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME; import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.apache.hudi.hadoop.CachingPath.getPathWithoutSchemeAndAuthority; +import static org.apache.hudi.hadoop.fs.CachingPath.getPathWithoutSchemeAndAuthority; import static org.apache.hudi.metadata.HoodieTableMetadata.getMetadataTableBasePath; /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java index 70146ef55c8d..fd47c3f52a7b 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java @@ -32,6 +32,7 @@ import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.ImmutablePair; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.metadata.FileSystemBackedTableMetadata; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.table.repair.RepairUtils; @@ -151,7 +152,7 @@ public class HoodieRepairTool { public HoodieRepairTool(JavaSparkContext jsc, Config cfg) { if (cfg.propsFilePath != null) { - cfg.propsFilePath = FSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString(); + cfg.propsFilePath = HadoopFSUtils.addSchemeIfLocalPath(cfg.propsFilePath).toString(); } this.context = new HoodieSparkEngineContext(jsc); this.cfg = cfg; @@ -248,7 +249,7 @@ static boolean copyFiles( List allResults = context.parallelize(relativeFilePaths) .mapPartitions(iterator -> { List results = new ArrayList<>(); - FileSystem fs = FSUtils.getFs(destBasePath, conf.get()); + FileSystem fs = HadoopFSUtils.getFs(destBasePath, conf.get()); iterator.forEachRemaining(filePath -> { boolean success = false; Path sourcePath = new Path(sourceBasePath, filePath); @@ -284,7 +285,7 @@ static boolean copyFiles( */ static List listFilesFromBasePath( HoodieEngineContext context, String basePathStr, int expectedLevel, int parallelism) { - FileSystem fs = FSUtils.getFs(basePathStr, context.getHadoopConf().get()); + FileSystem fs = HadoopFSUtils.getFs(basePathStr, context.getHadoopConf().get()); Path basePath = new Path(basePathStr); return FSUtils.getFileStatusAtLevel( context, fs, basePath, expectedLevel, parallelism).stream() @@ -310,7 +311,7 @@ static boolean deleteFiles( SerializableConfiguration conf = context.getHadoopConf(); return context.parallelize(relativeFilePaths) .mapPartitions(iterator -> { - FileSystem fs = FSUtils.getFs(basePath, conf.get()); + FileSystem fs = HadoopFSUtils.getFs(basePath, conf.get()); List results = new ArrayList<>(); iterator.forEachRemaining(relativeFilePath -> { boolean success = false; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java index 08f2234fa9d9..2ecc5d4e066d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java @@ -32,6 +32,7 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; @@ -81,7 +82,7 @@ static class Config implements Serializable { public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning, final boolean useFileListingFromMetadata) throws IOException { - FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(baseDir, jsc.hadoopConfiguration()); final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration()); final HoodieTableMetaClient tableMetadata = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir).build(); final BaseFileOnlyView fsView = new HoodieTableFileSystemView(tableMetadata, @@ -113,7 +114,7 @@ public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDi List> filesToCopy = context.flatMap(partitions, partition -> { // Only take latest version files <= latestCommit. - FileSystem fs1 = FSUtils.getFs(baseDir, serConf.newCopy()); + FileSystem fs1 = HadoopFSUtils.getFs(baseDir, serConf.newCopy()); List> filePaths = new ArrayList<>(); Stream dataFiles = fsView.getLatestBaseFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach(hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()))); @@ -132,7 +133,7 @@ public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDi String partition = tuple._1(); Path sourceFilePath = new Path(tuple._2()); Path toPartitionPath = FSUtils.getPartitionPath(outputDir, partition); - FileSystem ifs = FSUtils.getFs(baseDir, serConf.newCopy()); + FileSystem ifs = HadoopFSUtils.getFs(baseDir, serConf.newCopy()); if (!ifs.exists(toPartitionPath)) { ifs.mkdirs(toPartitionPath); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java index be6b06bbf909..683ba35aac62 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotExporter.java @@ -36,6 +36,7 @@ import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.exception.HoodieSnapshotExporterException; import com.beust.jcommander.IValueValidator; @@ -119,12 +120,12 @@ public static class Config implements Serializable { } public void export(JavaSparkContext jsc, Config cfg) throws IOException { - FileSystem outputFs = FSUtils.getFs(cfg.targetOutputPath, jsc.hadoopConfiguration()); + FileSystem outputFs = HadoopFSUtils.getFs(cfg.targetOutputPath, jsc.hadoopConfiguration()); if (outputFs.exists(new Path(cfg.targetOutputPath))) { throw new HoodieSnapshotExporterException("The target output path already exists."); } - FileSystem sourceFs = FSUtils.getFs(cfg.sourceBasePath, jsc.hadoopConfiguration()); + FileSystem sourceFs = HadoopFSUtils.getFs(cfg.sourceBasePath, jsc.hadoopConfiguration()); final String latestCommitTimestamp = getLatestCommitTimestamp(sourceFs, cfg) .orElseThrow(() -> { throw new HoodieSnapshotExporterException("No commits present. Nothing to snapshot."); @@ -210,7 +211,7 @@ private void exportAsHudi(JavaSparkContext jsc, FileSystem sourceFs, .map(f -> Pair.of(partition, f.getPath())) .collect(Collectors.toList()); // also need to copy over partition metadata - FileSystem fs = FSUtils.getFs(cfg.sourceBasePath, serConf.newCopy()); + FileSystem fs = HadoopFSUtils.getFs(cfg.sourceBasePath, serConf.newCopy()); Path partitionMetaFile = HoodiePartitionMetadata.getPartitionMetafilePath(fs, FSUtils.getPartitionPath(cfg.sourceBasePath, partition)).get(); if (fs.exists(partitionMetaFile)) { @@ -223,8 +224,8 @@ private void exportAsHudi(JavaSparkContext jsc, FileSystem sourceFs, String partition = partitionAndFile.getLeft(); Path sourceFilePath = new Path(partitionAndFile.getRight()); Path toPartitionPath = FSUtils.getPartitionPath(cfg.targetOutputPath, partition); - FileSystem executorSourceFs = FSUtils.getFs(cfg.sourceBasePath, serConf.newCopy()); - FileSystem executorOutputFs = FSUtils.getFs(cfg.targetOutputPath, serConf.newCopy()); + FileSystem executorSourceFs = HadoopFSUtils.getFs(cfg.sourceBasePath, serConf.newCopy()); + FileSystem executorOutputFs = HadoopFSUtils.getFs(cfg.targetOutputPath, serConf.newCopy()); if (!executorOutputFs.exists(toPartitionPath)) { executorOutputFs.mkdirs(toPartitionPath); @@ -254,8 +255,8 @@ private void exportAsHudi(JavaSparkContext jsc, FileSystem sourceFs, context.foreach(Arrays.asList(commitFilesToCopy), commitFile -> { Path targetFilePath = new Path(cfg.targetOutputPath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitFile.getPath().getName()); - FileSystem executorSourceFs = FSUtils.getFs(cfg.sourceBasePath, serConf.newCopy()); - FileSystem executorOutputFs = FSUtils.getFs(cfg.targetOutputPath, serConf.newCopy()); + FileSystem executorSourceFs = HadoopFSUtils.getFs(cfg.sourceBasePath, serConf.newCopy()); + FileSystem executorOutputFs = HadoopFSUtils.getFs(cfg.targetOutputPath, serConf.newCopy()); if (!executorOutputFs.exists(targetFilePath.getParent())) { executorOutputFs.mkdirs(targetFilePath.getParent()); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java index d26c82841913..4c37a5d3f9a3 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java @@ -24,7 +24,6 @@ import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.HoodieLocalEngineContext; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.view.FileSystemViewManager; @@ -33,6 +32,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.TableNotFoundException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.metadata.HoodieTableMetadata; import com.beust.jcommander.JCommander; @@ -357,7 +357,7 @@ private static boolean isMetadataEnabled(String basePath, JavaSparkContext jsc) private static List getFilePaths(String propsPath, Configuration hadoopConf) { List filePaths = new ArrayList<>(); - FileSystem fs = FSUtils.getFs( + FileSystem fs = HadoopFSUtils.getFs( propsPath, Option.ofNullable(hadoopConf).orElseGet(Configuration::new) ); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index 3490c0689656..d17fe76668ca 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -31,6 +31,7 @@ import org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView; import org.apache.hudi.common.table.view.SyncableFileSystemView; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.timeline.service.TimelineService; import org.apache.hudi.utilities.UtilHelpers; @@ -78,10 +79,10 @@ public TimelineServerPerf(Config cfg) throws IOException { useExternalTimelineServer = (cfg.serverHost != null); TimelineService.Config timelineServiceConf = cfg.getTimelineServerConfig(); this.timelineServer = new TimelineService( - new HoodieLocalEngineContext(FSUtils.prepareHadoopConf(new Configuration())), + new HoodieLocalEngineContext(HadoopFSUtils.prepareHadoopConf(new Configuration())), new Configuration(), timelineServiceConf, FileSystem.get(new Configuration()), TimelineService.buildFileSystemViewManager(timelineServiceConf, - new SerializableConfiguration(FSUtils.prepareHadoopConf(new Configuration())))); + new SerializableConfiguration(HadoopFSUtils.prepareHadoopConf(new Configuration())))); } private void setHostAddrFromSparkConf(SparkConf sparkConf) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java index 9dbf66325d7f..2b2e0dab7369 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java @@ -19,8 +19,8 @@ package org.apache.hudi.utilities.schema; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.config.FilebasedSchemaProviderConfig; import org.apache.hudi.utilities.exception.HoodieSchemaProviderException; import org.apache.hudi.utilities.sources.helpers.SanitizationUtils; @@ -61,7 +61,7 @@ public FilebasedSchemaProvider(TypedProperties props, JavaSparkContext jssc) { this.targetFile = getStringWithAltKeys(props, FilebasedSchemaProviderConfig.TARGET_SCHEMA_FILE, sourceFile); this.shouldSanitize = SanitizationUtils.shouldSanitize(props); this.invalidCharMask = SanitizationUtils.getInvalidCharMask(props); - this.fs = FSUtils.getFs(sourceFile, jssc.hadoopConfiguration(), true); + this.fs = HadoopFSUtils.getFs(sourceFile, jssc.hadoopConfiguration(), true); this.sourceSchema = parseSchema(this.sourceFile); if (containsConfigProperty(props, FilebasedSchemaProviderConfig.TARGET_SCHEMA_FILE)) { this.targetSchema = parseSchema(this.targetFile); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java index ee76e383a42b..b658154f1adf 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HiveIncrPullSource.java @@ -19,8 +19,8 @@ package org.apache.hudi.utilities.sources; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.HiveIncrementalPuller; import org.apache.hudi.utilities.config.HiveIncrPullSourceConfig; import org.apache.hudi.utilities.exception.HoodieReadFromSourceException; @@ -83,7 +83,7 @@ public HiveIncrPullSource(TypedProperties props, JavaSparkContext sparkContext, super(props, sparkContext, sparkSession, schemaProvider); checkRequiredConfigProperties(props, Collections.singletonList(HiveIncrPullSourceConfig.ROOT_INPUT_PATH)); this.incrPullRootPath = getStringWithAltKeys(props, HiveIncrPullSourceConfig.ROOT_INPUT_PATH); - this.fs = FSUtils.getFs(incrPullRootPath, sparkContext.hadoopConfiguration()); + this.fs = HadoopFSUtils.getFs(incrPullRootPath, sparkContext.hadoopConfiguration()); } /** diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlFileBasedSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlFileBasedSource.java index 96c27f784f82..a6a93a7d073b 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlFileBasedSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlFileBasedSource.java @@ -19,10 +19,10 @@ package org.apache.hudi.utilities.sources; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.schema.SchemaProvider; import org.apache.hadoop.fs.FileSystem; @@ -80,7 +80,7 @@ public SqlFileBasedSource( protected Pair>, String> fetchNextBatch( Option lastCkptStr, long sourceLimit) { Dataset rows = null; - final FileSystem fs = FSUtils.getFs(sourceSqlFile, sparkContext.hadoopConfiguration(), true); + final FileSystem fs = HadoopFSUtils.getFs(sourceSqlFile, sparkContext.hadoopConfiguration(), true); try { final Scanner scanner = new Scanner(fs.open(new Path(sourceSqlFile))); scanner.useDelimiter(";"); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java index 4098448b7936..750d619258e0 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.java @@ -22,11 +22,11 @@ import org.apache.hudi.AvroConversionUtils; import org.apache.hudi.common.config.SerializableConfiguration; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.config.CloudSourceConfig; import org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig; import org.apache.hudi.utilities.schema.SchemaProvider; @@ -142,7 +142,7 @@ private static Option getUrlForFile(Row row, String storageUrlSchemePref private static boolean checkIfFileExists(String storageUrlSchemePrefix, String bucket, String filePathUrl, Configuration configuration) { try { - FileSystem fs = FSUtils.getFs(storageUrlSchemePrefix + bucket, configuration); + FileSystem fs = HadoopFSUtils.getFs(storageUrlSchemePrefix + bucket, configuration); return fs.exists(new Path(filePathUrl)); } catch (IOException ioe) { String errMsg = String.format("Error while checking path exists for %s ", filePathUrl); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java index 2a486bef83cb..c323ab4a3f60 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/DFSPathSelector.java @@ -19,13 +19,13 @@ package org.apache.hudi.utilities.sources.helpers; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.common.util.collection.ImmutablePair; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.config.DFSPathSelectorConfig; import org.apache.hadoop.conf.Configuration; @@ -72,7 +72,7 @@ public DFSPathSelector(TypedProperties props, Configuration hadoopConf) { checkRequiredConfigProperties( props, Collections.singletonList(DFSPathSelectorConfig.ROOT_INPUT_PATH)); this.props = props; - this.fs = FSUtils.getFs( + this.fs = HadoopFSUtils.getFs( getStringWithAltKeys(props, DFSPathSelectorConfig.ROOT_INPUT_PATH), hadoopConf); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java index 4a7134180fbb..d7e3bca49897 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieMultiTableStreamer.java @@ -21,13 +21,13 @@ import org.apache.hudi.client.utils.OperationConverter; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload; import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hive.HiveSyncTool; import org.apache.hudi.sync.common.HoodieSyncConfig; import org.apache.hudi.utilities.IdentitySplitter; @@ -86,7 +86,7 @@ public HoodieMultiTableStreamer(Config config, JavaSparkContext jssc) throws IOE String configFolder = config.configFolder; ValidationUtils.checkArgument(!config.filterDupes || config.operation != WriteOperationType.UPSERT, "'--filter-dupes' needs to be disabled when '--op' is 'UPSERT' to ensure updates are not missed."); - FileSystem fs = FSUtils.getFs(commonPropsFile, jssc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(commonPropsFile, jssc.hadoopConfiguration()); configFolder = configFolder.charAt(configFolder.length() - 1) == '/' ? configFolder.substring(0, configFolder.length() - 1) : configFolder; checkIfPropsFileAndConfigFolderExist(commonPropsFile, configFolder, fs); TypedProperties commonProperties = UtilHelpers.readConfig(fs.getConf(), new Path(commonPropsFile), new ArrayList()).getProps(); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java index 11998f2cfacd..9ff666b049cc 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/HoodieStreamer.java @@ -34,7 +34,6 @@ import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.engine.EngineProperty; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload; import org.apache.hudi.common.model.WriteOperationType; @@ -56,6 +55,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieUpsertException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hive.HiveSyncTool; import org.apache.hudi.utilities.HiveIncrementalPuller; import org.apache.hudi.utilities.IdentitySplitter; @@ -130,12 +130,12 @@ public class HoodieStreamer implements Serializable { public static final String STREAMSYNC_POOL_NAME = "hoodiedeltasync"; public HoodieStreamer(Config cfg, JavaSparkContext jssc) throws IOException { - this(cfg, jssc, FSUtils.getFs(cfg.targetBasePath, jssc.hadoopConfiguration()), + this(cfg, jssc, HadoopFSUtils.getFs(cfg.targetBasePath, jssc.hadoopConfiguration()), jssc.hadoopConfiguration(), Option.empty()); } public HoodieStreamer(Config cfg, JavaSparkContext jssc, Option props) throws IOException { - this(cfg, jssc, FSUtils.getFs(cfg.targetBasePath, jssc.hadoopConfiguration()), + this(cfg, jssc, HadoopFSUtils.getFs(cfg.targetBasePath, jssc.hadoopConfiguration()), jssc.hadoopConfiguration(), props); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java index 0fd7a41ab556..11a19b030fc5 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java @@ -24,7 +24,6 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; @@ -33,7 +32,8 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; -import org.apache.hudi.hadoop.CachingPath; +import org.apache.hudi.hadoop.fs.CachingPath; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -139,7 +139,7 @@ private static Pair doSampleWrites(JavaSparkContext jsc, Option private static String getSampleWritesBasePath(JavaSparkContext jsc, HoodieWriteConfig writeConfig, String instantTime) throws IOException { Path basePath = new CachingPath(writeConfig.getBasePath(), SAMPLE_WRITES_FOLDER_PATH + Path.SEPARATOR + instantTime); - FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()); if (fs.exists(basePath)) { fs.delete(basePath, true); } @@ -159,7 +159,7 @@ private static long getAvgSizeFromSampleWrites(JavaSparkContext jsc, String samp } private static HoodieTableMetaClient getMetaClient(JavaSparkContext jsc, String basePath) { - FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()); return HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build(); } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index d030b08b7612..a55509eadc05 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -38,7 +38,6 @@ import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; @@ -66,6 +65,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieMetaSyncException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HiveSyncTool; import org.apache.hudi.internal.schema.InternalSchema; @@ -970,7 +970,7 @@ public void runMetaSync() { } if (cfg.enableMetaSync) { LOG.debug("[MetaSync] Starting sync"); - FileSystem fs = FSUtils.getFs(cfg.targetBasePath, hoodieSparkContext.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, hoodieSparkContext.hadoopConfiguration()); TypedProperties metaProps = new TypedProperties(); metaProps.putAll(props); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java index c760ec5397a2..6c3b10bd2647 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java @@ -19,7 +19,7 @@ package org.apache.hudi.utilities.transform; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.config.SqlTransformerConfig; import org.apache.hudi.utilities.exception.HoodieTransformException; import org.apache.hudi.utilities.exception.HoodieTransformExecutionException; @@ -77,7 +77,7 @@ public Dataset apply( "Missing required configuration : (" + SqlTransformerConfig.TRANSFORMER_SQL_FILE.key() + ")"); } - final FileSystem fs = FSUtils.getFs(sqlFile, jsc.hadoopConfiguration(), true); + final FileSystem fs = HadoopFSUtils.getFs(sqlFile, jsc.hadoopConfiguration(), true); // tmp table name doesn't like dashes final String tmpTable = TMP_TABLE.concat(UUID.randomUUID().toString().replace("-", "_")); LOG.info("Registering tmp table : " + tmpTable); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java index 83307a912367..e05a0c0d05e4 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java @@ -31,7 +31,6 @@ import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.config.LockConfiguration; import org.apache.hudi.common.config.TypedProperties; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieBaseFile; import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; @@ -63,6 +62,7 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieIncrementalPathNotFoundException; import org.apache.hudi.exception.TableNotFoundException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hive.HiveSyncConfig; import org.apache.hudi.hive.HoodieHiveSyncClient; import org.apache.hudi.keygen.ComplexKeyGenerator; @@ -632,7 +632,7 @@ public void testSchemaEvolution(String tableType, boolean useUserProvidedSchema, // clean up and reinit UtilitiesTestBase.Helpers.deleteFileFromDfs(fs, tableBasePath); - UtilitiesTestBase.Helpers.deleteFileFromDfs(FSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()), basePath + "/" + PROPS_FILENAME_TEST_SOURCE); + UtilitiesTestBase.Helpers.deleteFileFromDfs(HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()), basePath + "/" + PROPS_FILENAME_TEST_SOURCE); writeCommonPropsToFile(fs, basePath); defaultSchemaProviderClassName = FilebasedSchemaProvider.class.getName(); } @@ -1593,7 +1593,7 @@ public void testPayloadClassUpdate() throws Exception { //now assert that hoodie.properties file now has updated payload class name Properties props = new Properties(); String metaPath = dataSetBasePath + "/.hoodie/hoodie.properties"; - FileSystem fs = FSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); try (FSDataInputStream inputStream = fs.open(new Path(metaPath))) { props.load(inputStream); } @@ -1613,7 +1613,7 @@ public void testPartialPayloadClass() throws Exception { //now assert that hoodie.properties file now has updated payload class name Properties props = new Properties(); String metaPath = dataSetBasePath + "/.hoodie/hoodie.properties"; - FileSystem fs = FSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); try (FSDataInputStream inputStream = fs.open(new Path(metaPath))) { props.load(inputStream); } @@ -1638,7 +1638,7 @@ public void testPayloadClassUpdateWithCOWTable() throws Exception { //now assert that hoodie.properties file does not have payload class prop since it is a COW table Properties props = new Properties(); String metaPath = dataSetBasePath + "/.hoodie/hoodie.properties"; - FileSystem fs = FSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); + FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); try (FSDataInputStream inputStream = fs.open(new Path(metaPath))) { props.load(inputStream); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java index 9d4ce71d8f25..453188a19b1e 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java @@ -22,6 +22,7 @@ import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.testutils.FunctionalTestHarness; import org.apache.hudi.utilities.HoodieSnapshotCopier; @@ -58,7 +59,7 @@ public void init() throws IOException { outputPath = rootPath + "/output"; final Configuration hadoopConf = HoodieTestUtils.getDefaultHadoopConf(); - fs = FSUtils.getFs(basePath, hadoopConf); + fs = HadoopFSUtils.getFs(basePath, hadoopConf); HoodieTestUtils.init(hadoopConf, basePath); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java index b6187e989d9e..53536f35e421 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotExporter.java @@ -20,7 +20,6 @@ import org.apache.hudi.HoodieSparkUtils; import org.apache.hudi.client.SparkRDDWriteClient; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; @@ -28,6 +27,7 @@ import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.config.HoodieIndexConfig; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex.IndexType; import org.apache.hudi.testutils.SparkClientFunctionalTestHarness; import org.apache.hudi.utilities.HoodieSnapshotExporter; @@ -83,7 +83,7 @@ public void init() throws Exception { // Initialize test data dirs sourcePath = Paths.get(basePath(), "source").toString(); targetPath = Paths.get(basePath(), "target").toString(); - lfs = (LocalFileSystem) FSUtils.getFs(basePath(), jsc().hadoopConfiguration()); + lfs = (LocalFileSystem) HadoopFSUtils.getFs(basePath(), jsc().hadoopConfiguration()); HoodieTableMetaClient.withPropertyBuilder() .setTableType(HoodieTableType.COPY_ON_WRITE) diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java index 21154a970b0c..0919a8c31eda 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java @@ -19,9 +19,9 @@ package org.apache.hudi.utilities.sources.helpers; -import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.utilities.deltastreamer.TestSourceFormatAdapter; import org.apache.hudi.utilities.testutils.SanitizationTestUtils; @@ -124,7 +124,7 @@ public void testBadAvroSchemaDisabledTest() { @Test private String getJson(String path) { - FileSystem fs = FSUtils.getFs(path, jsc.hadoopConfiguration(), true); + FileSystem fs = HadoopFSUtils.getFs(path, jsc.hadoopConfiguration(), true); String schemaStr; try (FSDataInputStream in = fs.open(new Path(path))) { schemaStr = FileIOUtils.readAsUTFString(in); diff --git a/packaging/hudi-aws-bundle/pom.xml b/packaging/hudi-aws-bundle/pom.xml index 74c12c2bb945..3ed4b99d9f21 100644 --- a/packaging/hudi-aws-bundle/pom.xml +++ b/packaging/hudi-aws-bundle/pom.xml @@ -72,6 +72,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-hadoop-mr org.apache.hudi:hudi-sync-common org.apache.hudi:hudi-hive-sync diff --git a/packaging/hudi-datahub-sync-bundle/pom.xml b/packaging/hudi-datahub-sync-bundle/pom.xml index 34b931b316ec..95017e22e950 100644 --- a/packaging/hudi-datahub-sync-bundle/pom.xml +++ b/packaging/hudi-datahub-sync-bundle/pom.xml @@ -68,6 +68,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-sync-common org.apache.hudi:hudi-datahub-sync diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index 1d15f1b1d99b..d00f6b654e13 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -75,6 +75,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-flink-client diff --git a/packaging/hudi-gcp-bundle/pom.xml b/packaging/hudi-gcp-bundle/pom.xml index 112f6f4c96d2..ad18eac5942e 100644 --- a/packaging/hudi-gcp-bundle/pom.xml +++ b/packaging/hudi-gcp-bundle/pom.xml @@ -92,6 +92,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-hadoop-mr org.apache.hudi:hudi-sync-common diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index 8c9dc5f9a157..62db2cae77e4 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -66,6 +66,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-hadoop-mr diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml index 0567e3d7a3f6..b384870c0c99 100644 --- a/packaging/hudi-hive-sync-bundle/pom.xml +++ b/packaging/hudi-hive-sync-bundle/pom.xml @@ -66,6 +66,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-hadoop-mr org.apache.hudi:hudi-sync-common diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index c0abd00e7ab3..01825a1ab993 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -72,6 +72,7 @@ commons-lang:commons-lang commons-pool:commons-pool + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-spark-client diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml index da9ecb0f2c41..d085e460a46f 100644 --- a/packaging/hudi-kafka-connect-bundle/pom.xml +++ b/packaging/hudi-kafka-connect-bundle/pom.xml @@ -72,6 +72,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-java-client diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml index 2324cf32a058..a0eadc1fbd15 100644 --- a/packaging/hudi-presto-bundle/pom.xml +++ b/packaging/hudi-presto-bundle/pom.xml @@ -66,6 +66,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-hadoop-mr diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index 361e83013202..e0c7c1463653 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -69,6 +69,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-spark-client diff --git a/packaging/hudi-timeline-server-bundle/pom.xml b/packaging/hudi-timeline-server-bundle/pom.xml index 4ef131174071..ff9a9712e090 100644 --- a/packaging/hudi-timeline-server-bundle/pom.xml +++ b/packaging/hudi-timeline-server-bundle/pom.xml @@ -158,6 +158,7 @@ Include hudi-timeline-server with javalin dependencies. hadoop deps are to be provided at runtime. see run_server.sh --> + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-timeline-service org.mortbay.jetty:jetty diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml index 30e17b6deff7..97a6523f00ff 100644 --- a/packaging/hudi-trino-bundle/pom.xml +++ b/packaging/hudi-trino-bundle/pom.xml @@ -67,6 +67,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-java-client diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 0d01bace432e..3bac795c91b9 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -91,6 +91,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-spark-client diff --git a/packaging/hudi-utilities-slim-bundle/pom.xml b/packaging/hudi-utilities-slim-bundle/pom.xml index 3fce33ae6efd..1d2b338cb8f5 100644 --- a/packaging/hudi-utilities-slim-bundle/pom.xml +++ b/packaging/hudi-utilities-slim-bundle/pom.xml @@ -91,6 +91,7 @@ + org.apache.hudi:hudi-hadoop-common org.apache.hudi:hudi-common org.apache.hudi:hudi-client-common org.apache.hudi:hudi-utilities_${scala.binary.version} diff --git a/pom.xml b/pom.xml index 9f99be88feb3..ab51c9988f37 100644 --- a/pom.xml +++ b/pom.xml @@ -40,6 +40,7 @@ hudi-client hudi-aws hudi-gcp + hudi-hadoop-common hudi-hadoop-mr hudi-io hudi-spark-datasource From b5200bfed284c459bcb4629828d1afe4aa3902fa Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Mon, 29 Jan 2024 03:54:02 +0100 Subject: [PATCH 082/112] [HUDI-7351] Fix missing implementation for glue metastore schema retrieval (#10572) --- .../apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index 23f382435fdd..e038b9539a70 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -575,6 +575,14 @@ public Map getMetastoreSchema(String tableName) { } } + @Override + public List getMetastoreFieldSchemas(String tableName) { + Map schema = getMetastoreSchema(tableName); + return schema.entrySet().stream() + .map(f -> new FieldSchema(f.getKey(), f.getValue())) + .collect(Collectors.toList()); + } + @Override public boolean tableExists(String tableName) { GetTableRequest request = GetTableRequest.builder() From 005c7584958b75f954b321f4c4fa0b10430f5bfa Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 28 Jan 2024 21:27:16 -0800 Subject: [PATCH 083/112] [HUDI-7336] Introduce new HoodieStorage abstraction (#10567) This commit introduces `HoodieStorage` abstraction and Hudi's counterpart classes for Hadoop File System classes (`org.apache.hadoop.fs.`[`FileSystem`, `Path`, `PathFilter`, `FileStatus`]) to decouple Hudi's implementation from Hadoop classes, so it's much easier to plugin different file system implementation. --- hudi-hadoop-common/pom.xml | 8 + .../storage/hadoop/HoodieHadoopStorage.java | 201 ++++++++++ .../storage/TestHoodieHadoopStorage.java | 53 +++ .../org/apache/hudi/ApiMaturityLevel.java | 0 .../java/org/apache/hudi/PublicAPIClass.java | 0 .../java/org/apache/hudi/PublicAPIMethod.java | 0 .../java/org/apache/hudi/io/util/IOUtils.java | 16 + .../apache/hudi/storage/HoodieFileStatus.java | 120 ++++++ .../apache/hudi/storage/HoodieLocation.java | 262 +++++++++++++ .../hudi/storage/HoodieLocationFilter.java | 42 +++ .../apache/hudi/storage/HoodieStorage.java | 355 ++++++++++++++++++ .../hudi/io/storage/TestHoodieFileStatus.java | 102 +++++ .../hudi/io/storage/TestHoodieLocation.java | 192 ++++++++++ .../io/storage/TestHoodieLocationFilter.java | 73 ++++ .../io/storage/TestHoodieStorageBase.java | 353 +++++++++++++++++ 15 files changed, 1777 insertions(+) create mode 100644 hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java create mode 100644 hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/storage/TestHoodieHadoopStorage.java rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/ApiMaturityLevel.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/PublicAPIClass.java (100%) rename {hudi-common => hudi-io}/src/main/java/org/apache/hudi/PublicAPIMethod.java (100%) create mode 100644 hudi-io/src/main/java/org/apache/hudi/storage/HoodieFileStatus.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocationFilter.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieFileStatus.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocationFilter.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java diff --git a/hudi-hadoop-common/pom.xml b/hudi-hadoop-common/pom.xml index be5a3ab610d8..e4fbf2d94a99 100644 --- a/hudi-hadoop-common/pom.xml +++ b/hudi-hadoop-common/pom.xml @@ -98,5 +98,13 @@ ${project.version} test + + + org.apache.hudi + hudi-io + tests + ${project.version} + test + diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java new file mode 100644 index 000000000000..b863e97cba16 --- /dev/null +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.storage.hadoop; + +import org.apache.hudi.storage.HoodieFileStatus; +import org.apache.hudi.storage.HoodieLocation; +import org.apache.hudi.storage.HoodieLocationFilter; +import org.apache.hudi.storage.HoodieStorage; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Implementation of {@link HoodieStorage} using Hadoop's {@link FileSystem} + */ +public class HoodieHadoopStorage extends HoodieStorage { + private final FileSystem fs; + + public HoodieHadoopStorage(FileSystem fs) { + this.fs = fs; + } + + @Override + public String getScheme() { + return fs.getScheme(); + } + + @Override + public OutputStream create(HoodieLocation location, boolean overwrite) throws IOException { + return fs.create(convertHoodieLocationToPath(location), overwrite); + } + + @Override + public InputStream open(HoodieLocation location) throws IOException { + return fs.open(convertHoodieLocationToPath(location)); + } + + @Override + public OutputStream append(HoodieLocation location) throws IOException { + return fs.append(convertHoodieLocationToPath(location)); + } + + @Override + public boolean exists(HoodieLocation location) throws IOException { + return fs.exists(convertHoodieLocationToPath(location)); + } + + @Override + public HoodieFileStatus getFileStatus(HoodieLocation location) throws IOException { + return convertToHoodieFileStatus(fs.getFileStatus(convertHoodieLocationToPath(location))); + } + + @Override + public boolean createDirectory(HoodieLocation location) throws IOException { + return fs.mkdirs(convertHoodieLocationToPath(location)); + } + + @Override + public List listDirectEntries(HoodieLocation location) throws IOException { + return Arrays.stream(fs.listStatus(convertHoodieLocationToPath(location))) + .map(this::convertToHoodieFileStatus) + .collect(Collectors.toList()); + } + + @Override + public List listFiles(HoodieLocation location) throws IOException { + List result = new ArrayList<>(); + RemoteIterator iterator = fs.listFiles(convertHoodieLocationToPath(location), true); + while (iterator.hasNext()) { + result.add(convertToHoodieFileStatus(iterator.next())); + } + return result; + } + + @Override + public List listDirectEntries(List locationList) throws IOException { + return Arrays.stream(fs.listStatus(locationList.stream() + .map(this::convertHoodieLocationToPath) + .toArray(Path[]::new))) + .map(this::convertToHoodieFileStatus) + .collect(Collectors.toList()); + } + + @Override + public List listDirectEntries(HoodieLocation location, + HoodieLocationFilter filter) + throws IOException { + return Arrays.stream(fs.listStatus( + convertHoodieLocationToPath(location), path -> + filter.accept(convertPathToHoodieLocation(path)))) + .map(this::convertToHoodieFileStatus) + .collect(Collectors.toList()); + } + + @Override + public List globEntries(HoodieLocation locationPattern) + throws IOException { + return Arrays.stream(fs.globStatus(convertHoodieLocationToPath(locationPattern))) + .map(this::convertToHoodieFileStatus) + .collect(Collectors.toList()); + } + + @Override + public List globEntries(HoodieLocation locationPattern, HoodieLocationFilter filter) + throws IOException { + return Arrays.stream(fs.globStatus(convertHoodieLocationToPath(locationPattern), path -> + filter.accept(convertPathToHoodieLocation(path)))) + .map(this::convertToHoodieFileStatus) + .collect(Collectors.toList()); + } + + @Override + public boolean rename(HoodieLocation oldLocation, HoodieLocation newLocation) throws IOException { + return fs.rename(convertHoodieLocationToPath(oldLocation), convertHoodieLocationToPath(newLocation)); + } + + @Override + public boolean deleteDirectory(HoodieLocation location) throws IOException { + return fs.delete(convertHoodieLocationToPath(location), true); + } + + @Override + public boolean deleteFile(HoodieLocation location) throws IOException { + return fs.delete(convertHoodieLocationToPath(location), false); + } + + @Override + public HoodieLocation makeQualified(HoodieLocation location) { + return convertPathToHoodieLocation( + fs.makeQualified(convertHoodieLocationToPath(location))); + } + + @Override + public Object getFileSystem() { + return fs; + } + + @Override + public Object getConf() { + return fs.getConf(); + } + + @Override + public OutputStream create(HoodieLocation location) throws IOException { + return fs.create(convertHoodieLocationToPath(location)); + } + + @Override + public boolean createNewFile(HoodieLocation location) throws IOException { + return fs.createNewFile(convertHoodieLocationToPath(location)); + } + + private Path convertHoodieLocationToPath(HoodieLocation loc) { + return new Path(loc.toUri()); + } + + private HoodieLocation convertPathToHoodieLocation(Path path) { + return new HoodieLocation(path.toUri()); + } + + private HoodieFileStatus convertToHoodieFileStatus(FileStatus fileStatus) { + return new HoodieFileStatus( + convertPathToHoodieLocation(fileStatus.getPath()), + fileStatus.getLen(), + fileStatus.isDirectory(), + fileStatus.getModificationTime()); + } + + @Override + public void close() throws IOException { + fs.close(); + } +} diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/storage/TestHoodieHadoopStorage.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/storage/TestHoodieHadoopStorage.java new file mode 100644 index 000000000000..3eaf4135032d --- /dev/null +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/hadoop/storage/TestHoodieHadoopStorage.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.hadoop.storage; + +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.io.storage.TestHoodieStorageBase; +import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.hadoop.HoodieHadoopStorage; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +/** + * Tests {@link HoodieHadoopStorage}. + */ +public class TestHoodieHadoopStorage extends TestHoodieStorageBase { + private static final String CONF_KEY = "hudi.testing.key"; + private static final String CONF_VALUE = "value"; + + @Override + protected HoodieStorage getHoodieStorage(Object fs, Object conf) { + return new HoodieHadoopStorage((FileSystem) fs); + } + + @Override + protected Object getFileSystem(Object conf) { + return HadoopFSUtils.getFs(getTempDir(), (Configuration) conf, true); + } + + @Override + protected Object getConf() { + Configuration conf = new Configuration(); + conf.set(CONF_KEY, CONF_VALUE); + return conf; + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/ApiMaturityLevel.java b/hudi-io/src/main/java/org/apache/hudi/ApiMaturityLevel.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/ApiMaturityLevel.java rename to hudi-io/src/main/java/org/apache/hudi/ApiMaturityLevel.java diff --git a/hudi-common/src/main/java/org/apache/hudi/PublicAPIClass.java b/hudi-io/src/main/java/org/apache/hudi/PublicAPIClass.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/PublicAPIClass.java rename to hudi-io/src/main/java/org/apache/hudi/PublicAPIClass.java diff --git a/hudi-common/src/main/java/org/apache/hudi/PublicAPIMethod.java b/hudi-io/src/main/java/org/apache/hudi/PublicAPIMethod.java similarity index 100% rename from hudi-common/src/main/java/org/apache/hudi/PublicAPIMethod.java rename to hudi-io/src/main/java/org/apache/hudi/PublicAPIMethod.java diff --git a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java index 5eeb21011cf0..96cc6df95cc8 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java @@ -19,8 +19,10 @@ package org.apache.hudi.io.util; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; /** * Util methods on I/O. @@ -249,4 +251,18 @@ public static int readFully(InputStream inputStream, } return totalBytesRead; } + + public static byte[] readAsByteArray(InputStream input, int outputSize) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(outputSize); + copy(input, bos); + return bos.toByteArray(); + } + + public static void copy(InputStream inputStream, OutputStream outputStream) throws IOException { + byte[] buffer = new byte[1024]; + int len; + while ((len = inputStream.read(buffer)) != -1) { + outputStream.write(buffer, 0, len); + } + } } diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieFileStatus.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieFileStatus.java new file mode 100644 index 000000000000..6f033c5bc954 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieFileStatus.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.storage; + +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; + +import java.io.Serializable; + +/** + * Represents the information of a directory or a file. + * The APIs are mainly based on {@code org.apache.hadoop.fs.FileStatus} class + * with simplification based on what Hudi needs. + */ +@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING) +public class HoodieFileStatus implements Serializable { + private final HoodieLocation location; + private final long length; + private final boolean isDirectory; + private final long modificationTime; + + public HoodieFileStatus(HoodieLocation location, + long length, + boolean isDirectory, + long modificationTime) { + this.location = location; + this.length = length; + this.isDirectory = isDirectory; + this.modificationTime = modificationTime; + } + + /** + * @return the location. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public HoodieLocation getLocation() { + return location; + } + + /** + * @return the length of a file in bytes. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public long getLength() { + return length; + } + + /** + * @return whether this is a file. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public boolean isFile() { + return !isDirectory; + } + + /** + * @return whether this is a directory. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public boolean isDirectory() { + return isDirectory; + } + + /** + * @return the modification of a file. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public long getModificationTime() { + return modificationTime; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HoodieFileStatus that = (HoodieFileStatus) o; + // PLEASE NOTE that here we follow the same contract hadoop's FileStatus provides, + // i.e., the equality is purely based on the location. + return getLocation().equals(that.getLocation()); + } + + @Override + public int hashCode() { + // PLEASE NOTE that here we follow the same contract hadoop's FileStatus provides, + // i.e., the hash code is purely based on the location. + return getLocation().hashCode(); + } + + @Override + public String toString() { + return "HoodieFileStatus{" + + "location=" + location + + ", length=" + length + + ", isDirectory=" + isDirectory + + ", modificationTime=" + modificationTime + + '}'; + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java new file mode 100644 index 000000000000..3b3a05dc9b42 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.storage; + +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; + +import java.io.Serializable; +import java.net.URI; +import java.net.URISyntaxException; + +/** + * Names a file or directory on storage. + * Location strings use slash (`/`) as the directory separator. + * The APIs are mainly based on {@code org.apache.hadoop.fs.Path} class. + */ +@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING) +public class HoodieLocation implements Comparable, Serializable { + public static final char SEPARATOR_CHAR = '/'; + public static final char COLON_CHAR = ':'; + public static final String SEPARATOR = "" + SEPARATOR_CHAR; + private final URI uri; + private transient volatile HoodieLocation cachedParent; + private transient volatile String cachedName; + private transient volatile String uriString; + + public HoodieLocation(URI uri) { + this.uri = uri.normalize(); + } + + public HoodieLocation(String path) { + try { + // This part of parsing is compatible with hadoop's Path + // and required for properly handling encoded path with URI + String scheme = null; + String authority = null; + + int start = 0; + + // Parse URI scheme, if any + int colon = path.indexOf(COLON_CHAR); + int slash = path.indexOf(SEPARATOR_CHAR); + if (colon != -1 + && ((slash == -1) || (colon < slash))) { + scheme = path.substring(0, colon); + start = colon + 1; + } + + // Parse URI authority, if any + if (path.startsWith("//", start) + && (path.length() - start > 2)) { + int nextSlash = path.indexOf(SEPARATOR_CHAR, start + 2); + int authEnd = nextSlash > 0 ? nextSlash : path.length(); + authority = path.substring(start + 2, authEnd); + start = authEnd; + } + + // URI path is the rest of the string -- query & fragment not supported + String uriPath = path.substring(start); + + this.uri = new URI(scheme, authority, normalize(uriPath, true), null, null).normalize(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + public HoodieLocation(String parent, String child) { + this(new HoodieLocation(parent), child); + } + + public HoodieLocation(HoodieLocation parent, String child) { + URI parentUri = parent.toUri(); + String normalizedChild = normalize(child, false); + + if (normalizedChild.isEmpty()) { + this.uri = parentUri; + return; + } + + if (!child.contains(SEPARATOR)) { + this.cachedParent = parent; + } + String parentPathWithSeparator = parentUri.getPath(); + if (!parentPathWithSeparator.endsWith(SEPARATOR)) { + parentPathWithSeparator = parentPathWithSeparator + SEPARATOR; + } + try { + URI resolvedUri = new URI( + parentUri.getScheme(), + parentUri.getAuthority(), + parentPathWithSeparator, + null, + parentUri.getFragment()).resolve(normalizedChild); + this.uri = new URI( + parentUri.getScheme(), + parentUri.getAuthority(), + resolvedUri.getPath(), + null, + resolvedUri.getFragment()).normalize(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public boolean isAbsolute() { + return uri.getPath().startsWith(SEPARATOR); + } + + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public HoodieLocation getParent() { + // This value could be overwritten concurrently and that's okay, since + // {@code HoodieLocation} is immutable + if (cachedParent == null) { + String path = uri.getPath(); + int lastSlash = path.lastIndexOf(SEPARATOR_CHAR); + if (path.isEmpty() || path.equals(SEPARATOR)) { + throw new IllegalStateException("Cannot get parent location of a root location"); + } + String parentPath = lastSlash == -1 + ? "" : path.substring(0, lastSlash == 0 ? 1 : lastSlash); + try { + cachedParent = new HoodieLocation(new URI( + uri.getScheme(), uri.getAuthority(), parentPath, null, uri.getFragment())); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + return cachedParent; + } + + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public String getName() { + // This value could be overwritten concurrently and that's okay, since + // {@code HoodieLocation} is immutable + if (cachedName == null) { + String path = uri.getPath(); + int slash = path.lastIndexOf(SEPARATOR); + cachedName = path.substring(slash + 1); + } + return cachedName; + } + + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public HoodieLocation getLocationWithoutSchemeAndAuthority() { + try { + return new HoodieLocation( + new URI(null, null, uri.getPath(), uri.getQuery(), uri.getFragment())); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public int depth() { + String path = uri.getPath(); + int depth = 0; + int slash = path.length() == 1 && path.charAt(0) == SEPARATOR_CHAR ? -1 : 0; + while (slash != -1) { + depth++; + slash = path.indexOf(SEPARATOR_CHAR, slash + 1); + } + return depth; + } + + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public URI toUri() { + return uri; + } + + @Override + public String toString() { + // This value could be overwritten concurrently and that's okay, since + // {@code HoodieLocation} is immutable + if (uriString == null) { + // We can't use uri.toString(), which escapes everything, because we want + // illegal characters unescaped in the string, for glob processing, etc. + StringBuilder buffer = new StringBuilder(); + if (uri.getScheme() != null) { + buffer.append(uri.getScheme()) + .append(":"); + } + if (uri.getAuthority() != null) { + buffer.append("//") + .append(uri.getAuthority()); + } + if (uri.getPath() != null) { + String path = uri.getPath(); + buffer.append(path); + } + if (uri.getFragment() != null) { + buffer.append("#").append(uri.getFragment()); + } + uriString = buffer.toString(); + } + return uriString; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof HoodieLocation)) { + return false; + } + return this.uri.equals(((HoodieLocation) o).toUri()); + } + + @Override + public int hashCode() { + return uri.hashCode(); + } + + @Override + public int compareTo(HoodieLocation o) { + return this.uri.compareTo(o.uri); + } + + /** + * Normalizes the path by removing the trailing slashes (`/`). + * When {@code keepSingleSlash} is {@code true}, `/` as the path is not changed; + * otherwise ({@code false}), `/` becomes empty String after normalization. + * + * @param path {@link String} path to normalize. + * @param keepSingleSlash whether to keep `/` as the path. + * @return normalized path. + */ + private static String normalize(String path, boolean keepSingleSlash) { + int indexOfLastSlash = path.length() - 1; + while (indexOfLastSlash >= 0) { + if (path.charAt(indexOfLastSlash) != SEPARATOR_CHAR) { + break; + } + indexOfLastSlash--; + } + indexOfLastSlash++; + if (indexOfLastSlash == path.length()) { + return path; + } + if (keepSingleSlash && indexOfLastSlash == 0) { + // All slashes and we want to keep one slash + return SEPARATOR; + } + return path.substring(0, indexOfLastSlash); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocationFilter.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocationFilter.java new file mode 100644 index 000000000000..d33686c030c0 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocationFilter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.storage; + +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; + +import java.io.Serializable; + +/** + * Filter for {@link HoodieLocation} + * The APIs are mainly based on {@code org.apache.hadoop.fs.PathFilter} class. + */ +@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING) +public interface HoodieLocationFilter extends Serializable { + /** + * Tests whether the specified location should be included in a location list. + * + * @param location the location to be tested. + * @return {@code true} if and only if location should be included. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + boolean accept(HoodieLocation location); +} diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java new file mode 100644 index 000000000000..eea2c3ff692c --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java @@ -0,0 +1,355 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.storage; + +import org.apache.hudi.ApiMaturityLevel; +import org.apache.hudi.PublicAPIClass; +import org.apache.hudi.PublicAPIMethod; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieIOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * Provides I/O APIs on files and directories on storage. + * The APIs are mainly based on {@code org.apache.hadoop.fs.FileSystem} class. + */ +@PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING) +public abstract class HoodieStorage implements Closeable { + public static final Logger LOG = LoggerFactory.getLogger(HoodieStorage.class); + public static final String TMP_PATH_POSTFIX = ".tmp"; + + /** + * @return the scheme of the storage. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract String getScheme(); + + /** + * Creates an OutputStream at the indicated location. + * + * @param location the file to create. + * @param overwrite if a file with this name already exists, then if {@code true}, + * the file will be overwritten, and if {@code false} an exception will be thrown. + * @return the OutputStream to write to. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract OutputStream create(HoodieLocation location, boolean overwrite) throws IOException; + + /** + * Opens an InputStream at the indicated location. + * + * @param location the file to open. + * @return the InputStream to read from. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract InputStream open(HoodieLocation location) throws IOException; + + /** + * Appends to an existing file (optional operation). + * + * @param location the file to append. + * @return the OutputStream to write to. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract OutputStream append(HoodieLocation location) throws IOException; + + /** + * Checks if a location exists. + * + * @param location location to check. + * @return {@code true} if the location exists. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract boolean exists(HoodieLocation location) throws IOException; + + /** + * Returns a file status object that represents the location. + * + * @param location location to check. + * @return a {@link HoodieFileStatus} object. + * @throws FileNotFoundException when the path does not exist. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract HoodieFileStatus getFileStatus(HoodieLocation location) throws IOException; + + /** + * Creates the directory and non-existent parent directories. + * + * @param location location to create. + * @return {@code true} if the directory was created. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract boolean createDirectory(HoodieLocation location) throws IOException; + + /** + * Lists the statuses of the direct files/directories in the given location if the path is a directory. + * + * @param location given location. + * @return the statuses of the files/directories in the given location. + * @throws FileNotFoundException when the location does not exist. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract List listDirectEntries(HoodieLocation location) throws IOException; + + /** + * Lists the statuses of all files under the give location recursively. + * + * @param location given location. + * @return the statuses of the files under the given location. + * @throws FileNotFoundException when the location does not exist. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract List listFiles(HoodieLocation location) throws IOException; + + /** + * Lists the statuses of the direct files/directories in the given location + * and filters the results, if the path is a directory. + * + * @param location given location. + * @param filter filter to apply. + * @return the statuses of the files/directories in the given location. + * @throws FileNotFoundException when the location does not exist. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract List listDirectEntries(HoodieLocation location, + HoodieLocationFilter filter) throws IOException; + + /** + * Returns all the files that match the locationPattern and are not checksum files, + * and filters the results. + * + * @param locationPattern given pattern. + * @param filter filter to apply. + * @return the statuses of the files. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract List globEntries(HoodieLocation locationPattern, + HoodieLocationFilter filter) throws IOException; + + /** + * Renames the location from old to new. + * + * @param oldLocation source location. + * @param newLocation destination location. + * @return {@true} if rename is successful. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract boolean rename(HoodieLocation oldLocation, + HoodieLocation newLocation) throws IOException; + + /** + * Deletes a directory at location. + * + * @param location directory to delete. + * @return {@code true} if successful. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract boolean deleteDirectory(HoodieLocation location) throws IOException; + + /** + * Deletes a file at location. + * + * @param location file to delete. + * @return {@code true} if successful. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract boolean deleteFile(HoodieLocation location) throws IOException; + + /** + * Qualifies a path to one which uses this storage and, if relative, made absolute. + * + * @param location to qualify. + * @return Qualified location. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract HoodieLocation makeQualified(HoodieLocation location); + + /** + * @return the underlying file system instance if exists. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract Object getFileSystem(); + + /** + * @return the underlying configuration instance if exists. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract Object getConf(); + + /** + * Creates a new file with overwrite set to false. This ensures files are created + * only once and never rewritten, also, here we take care if the content is not + * empty, will first write the content to a temp file if {needCreateTempFile} is + * true, and then rename it back after the content is written. + * + * @param location file Path. + * @param content content to be stored. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public final void createImmutableFileInPath(HoodieLocation location, + Option content) throws IOException { + OutputStream fsout = null; + HoodieLocation tmpLocation = null; + + boolean needTempFile = needCreateTempFile(); + + try { + if (!content.isPresent()) { + fsout = create(location, false); + } + + if (content.isPresent() && needTempFile) { + HoodieLocation parent = location.getParent(); + tmpLocation = new HoodieLocation(parent, location.getName() + TMP_PATH_POSTFIX); + fsout = create(tmpLocation, false); + fsout.write(content.get()); + } + + if (content.isPresent() && !needTempFile) { + fsout = create(location, false); + fsout.write(content.get()); + } + } catch (IOException e) { + String errorMsg = "Failed to create file " + (tmpLocation != null ? tmpLocation : location); + throw new HoodieIOException(errorMsg, e); + } finally { + try { + if (null != fsout) { + fsout.close(); + } + } catch (IOException e) { + String errorMsg = "Failed to close file " + (needTempFile ? tmpLocation : location); + throw new HoodieIOException(errorMsg, e); + } + + boolean renameSuccess = false; + try { + if (null != tmpLocation) { + renameSuccess = rename(tmpLocation, location); + } + } catch (IOException e) { + throw new HoodieIOException( + "Failed to rename " + tmpLocation + " to the target " + location, + e); + } finally { + if (!renameSuccess && null != tmpLocation) { + try { + deleteFile(tmpLocation); + LOG.warn("Fail to rename " + tmpLocation + " to " + location + + ", target file exists: " + exists(location)); + } catch (IOException e) { + throw new HoodieIOException("Failed to delete tmp file " + tmpLocation, e); + } + } + } + } + } + + /** + * @return whether a temporary file needs to be created for immutability. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public final boolean needCreateTempFile() { + return StorageSchemes.HDFS.getScheme().equals(getScheme()); + } + + /** + * Create an OutputStream at the indicated location. + * The file is overwritten by default. + * + * @param location the file to create. + * @return the OutputStream to write to. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public OutputStream create(HoodieLocation location) throws IOException { + return create(location, true); + } + + /** + * Creates an empty new file at the indicated location. + * + * @param location the file to create. + * @return {@code true} if successfully created; {@code false} if already exists. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public boolean createNewFile(HoodieLocation location) throws IOException { + if (exists(location)) { + return false; + } else { + create(location, false).close(); + return true; + } + } + + /** + * Lists the statuses of the direct files/directories in the given list of locations, + * if the locations are directory. + * + * @param locationList given location list. + * @return the statuses of the files/directories in the given locations. + * @throws FileNotFoundException when the location does not exist. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public List listDirectEntries(List locationList) throws IOException { + List result = new ArrayList<>(); + for (HoodieLocation location : locationList) { + result.addAll(listDirectEntries(location)); + } + return result; + } + + /** + * Returns all the files that match the locationPattern and are not checksum files. + * + * @param locationPattern given pattern. + * @return the statuses of the files. + * @throws IOException IO error. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public List globEntries(HoodieLocation locationPattern) throws IOException { + return globEntries(locationPattern, e -> true); + } +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieFileStatus.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieFileStatus.java new file mode 100644 index 000000000000..903fc4b4e3ad --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieFileStatus.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.storage.HoodieFileStatus; +import org.apache.hudi.storage.HoodieLocation; + +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +/** + * Tests {@link HoodieFileStatus} + */ +public class TestHoodieFileStatus { + private static final Logger LOG = LoggerFactory.getLogger(TestHoodieFileStatus.class); + private static final long LENGTH = 100; + private static final long MODIFICATION_TIME = System.currentTimeMillis(); + private static final String PATH1 = "/abc/xyz1"; + private static final String PATH2 = "/abc/xyz2"; + private static final HoodieLocation LOCATION1 = new HoodieLocation(PATH1); + private static final HoodieLocation LOCATION2 = new HoodieLocation(PATH2); + + @Test + public void testConstructor() { + HoodieFileStatus fileStatus = new HoodieFileStatus(LOCATION1, LENGTH, false, MODIFICATION_TIME); + validateAccessors(fileStatus, PATH1, LENGTH, false, MODIFICATION_TIME); + fileStatus = new HoodieFileStatus(LOCATION2, -1, true, MODIFICATION_TIME + 2L); + validateAccessors(fileStatus, PATH2, -1, true, MODIFICATION_TIME + 2L); + } + + @Test + public void testSerializability() throws IOException, ClassNotFoundException { + HoodieFileStatus fileStatus = new HoodieFileStatus(LOCATION1, LENGTH, false, MODIFICATION_TIME); + try (ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(baos)) { + oos.writeObject(fileStatus); + try (ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + ObjectInputStream ois = new ObjectInputStream(bais)) { + HoodieFileStatus deserialized = (HoodieFileStatus) ois.readObject(); + validateAccessors(deserialized, PATH1, LENGTH, false, MODIFICATION_TIME); + } + } + } + + @Test + public void testEquals() { + HoodieFileStatus fileStatus1 = new HoodieFileStatus( + new HoodieLocation(PATH1), LENGTH, false, MODIFICATION_TIME); + HoodieFileStatus fileStatus2 = new HoodieFileStatus( + new HoodieLocation(PATH1), LENGTH + 2, false, MODIFICATION_TIME + 2L); + assertEquals(fileStatus1, fileStatus2); + } + + @Test + public void testNotEquals() { + HoodieFileStatus fileStatus1 = new HoodieFileStatus( + LOCATION1, LENGTH, false, MODIFICATION_TIME); + HoodieFileStatus fileStatus2 = new HoodieFileStatus( + LOCATION2, LENGTH, false, MODIFICATION_TIME + 2L); + assertFalse(fileStatus1.equals(fileStatus2)); + assertFalse(fileStatus2.equals(fileStatus1)); + } + + private void validateAccessors(HoodieFileStatus fileStatus, + String location, + long length, + boolean isDirectory, + long modificationTime) { + assertEquals(new HoodieLocation(location), fileStatus.getLocation()); + assertEquals(length, fileStatus.getLength()); + assertEquals(isDirectory, fileStatus.isDirectory()); + assertEquals(!isDirectory, fileStatus.isFile()); + assertEquals(modificationTime, fileStatus.getModificationTime()); + } +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java new file mode 100644 index 000000000000..4c765d2cc3f3 --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.storage.HoodieLocation; + +import org.junit.jupiter.api.Test; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests {@link HoodieLocation} + */ +public class TestHoodieLocation { + @Test + public void testToString() { + Arrays.stream( + new String[] { + "/", + "/foo", + "/foo/bar", + "foo", + "foo/bar", + "/foo/bar#boo", + "foo/bar#boo", + "file:/a/b/c", + "s3://a/b/c"}) + .forEach(this::toStringTest); + } + + @Test + public void testNormalize() throws URISyntaxException { + assertEquals("", new HoodieLocation(".").toString()); + assertEquals("..", new HoodieLocation("..").toString()); + assertEquals("/", new HoodieLocation("/").toString()); + assertEquals("/", new HoodieLocation("//").toString()); + assertEquals("/", new HoodieLocation("///").toString()); + assertEquals("//foo/", new HoodieLocation("//foo/").toString()); + assertEquals("//foo/", new HoodieLocation("//foo//").toString()); + assertEquals("//foo/bar", new HoodieLocation("//foo//bar").toString()); + assertEquals("/foo", new HoodieLocation("/foo/").toString()); + assertEquals("/foo", new HoodieLocation("/foo/").toString()); + assertEquals("foo", new HoodieLocation("foo/").toString()); + assertEquals("foo", new HoodieLocation("foo//").toString()); + assertEquals("foo/bar", new HoodieLocation("foo//bar").toString()); + assertEquals("file:/a/b/c", new HoodieLocation("file:///a/b/c").toString()); + assertEquals("s3://a/b/c/d/e", new HoodieLocation("s3://a/b/c", "d/e").toString()); + assertEquals("s3://a/b/c/d/e", new HoodieLocation("s3://a/b/c/", "d/e").toString()); + assertEquals("s3://a/b/c/d/e", new HoodieLocation("s3://a/b/c/", "d/e/").toString()); + assertEquals("s3://a/b/c", new HoodieLocation("s3://a/b/c/", "/").toString()); + assertEquals("s3://a/b/c", new HoodieLocation("s3://a/b/c/", "").toString()); + assertEquals("s3://a/b/c/d/e", new HoodieLocation(new HoodieLocation("s3://a/b/c"), "d/e").toString()); + assertEquals("s3://a/b/c/d/e", new HoodieLocation(new HoodieLocation("s3://a/b/c/"), "d/e").toString()); + assertEquals("s3://a/b/c/d/e", new HoodieLocation(new HoodieLocation("s3://a/b/c/"), "d/e/").toString()); + assertEquals("s3://a/b/c", new HoodieLocation(new HoodieLocation("s3://a/b/c/"), "/").toString()); + assertEquals("s3://a/b/c", new HoodieLocation(new HoodieLocation("s3://a/b/c/"), "").toString()); + assertEquals("hdfs://foo/foo2/bar/baz/", new HoodieLocation(new URI("hdfs://foo//foo2///bar/baz///")).toString()); + } + + @Test + public void testIsAbsolute() { + assertTrue(new HoodieLocation("/").isAbsolute()); + assertTrue(new HoodieLocation("/foo").isAbsolute()); + assertFalse(new HoodieLocation("foo").isAbsolute()); + assertFalse(new HoodieLocation("foo/bar").isAbsolute()); + assertFalse(new HoodieLocation(".").isAbsolute()); + } + + @Test + public void testGetParent() { + assertEquals(new HoodieLocation("/foo"), new HoodieLocation("/foo/bar").getParent()); + assertEquals(new HoodieLocation("foo"), new HoodieLocation("foo/bar").getParent()); + assertEquals(new HoodieLocation("/"), new HoodieLocation("/foo").getParent()); + assertEquals(new HoodieLocation("/foo/bar/x"), new HoodieLocation("/foo/bar", "x/y").getParent()); + assertEquals(new HoodieLocation("/foo/bar"), new HoodieLocation("/foo/bar/", "y").getParent()); + assertEquals(new HoodieLocation("/foo"), new HoodieLocation("/foo/bar/", "/").getParent()); + assertThrows(IllegalStateException.class, () -> new HoodieLocation("/").getParent()); + } + + @Test + public void testURI() throws URISyntaxException { + URI uri = new URI("file:///bar#baz"); + HoodieLocation location = new HoodieLocation(uri); + assertEquals(uri, new URI(location.toString())); + assertEquals("foo://bar/baz#boo", new HoodieLocation("foo://bar/", "/baz#boo").toString()); + assertEquals("foo://bar/baz/fud#boo", + new HoodieLocation(new HoodieLocation(new URI("foo://bar/baz#bud")), "fud#boo").toString()); + assertEquals("foo://bar/fud#boo", + new HoodieLocation(new HoodieLocation(new URI("foo://bar/baz#bud")), "/fud#boo").toString()); + } + + @Test + public void testPathToUriConversion() throws URISyntaxException { + assertEquals(new URI(null, null, "/foo?bar", null, null), + new HoodieLocation("/foo?bar").toUri()); + assertEquals(new URI(null, null, "/foo\"bar", null, null), + new HoodieLocation("/foo\"bar").toUri()); + assertEquals(new URI(null, null, "/foo bar", null, null), + new HoodieLocation("/foo bar").toUri()); + assertEquals("/foo?bar", new HoodieLocation("http://localhost/foo?bar").toUri().getPath()); + assertEquals("/foo", new URI("http://localhost/foo?bar").getPath()); + assertEquals((new URI("/foo;bar")).getPath(), new HoodieLocation("/foo;bar").toUri().getPath()); + assertEquals(new URI("/foo;bar"), new HoodieLocation("/foo;bar").toUri()); + assertEquals(new URI("/foo+bar"), new HoodieLocation("/foo+bar").toUri()); + assertEquals(new URI("/foo-bar"), new HoodieLocation("/foo-bar").toUri()); + assertEquals(new URI("/foo=bar"), new HoodieLocation("/foo=bar").toUri()); + assertEquals(new URI("/foo,bar"), new HoodieLocation("/foo,bar").toUri()); + } + + @Test + public void testGetName() { + assertEquals("", new HoodieLocation("/").getName()); + assertEquals("foo", new HoodieLocation("foo").getName()); + assertEquals("foo", new HoodieLocation("/foo").getName()); + assertEquals("foo", new HoodieLocation("/foo/").getName()); + assertEquals("bar", new HoodieLocation("/foo/bar").getName()); + assertEquals("bar", new HoodieLocation("hdfs://host/foo/bar").getName()); + assertEquals("bar", new HoodieLocation("hdfs://host", "foo/bar").getName()); + assertEquals("bar", new HoodieLocation("hdfs://host/foo/", "bar").getName()); + } + + @Test + public void testGetLocationWithoutSchemeAndAuthority() { + assertEquals( + new HoodieLocation("/foo/bar/boo"), + new HoodieLocation("/foo/bar/boo").getLocationWithoutSchemeAndAuthority()); + assertEquals( + new HoodieLocation("/foo/bar/boo"), + new HoodieLocation("file:///foo/bar/boo").getLocationWithoutSchemeAndAuthority()); + assertEquals( + new HoodieLocation("/bar/boo"), + new HoodieLocation("s3://foo/bar/boo").getLocationWithoutSchemeAndAuthority()); + } + + @Test + public void testDepth() throws URISyntaxException { + assertEquals(0, new HoodieLocation("/").depth()); + assertEquals(0, new HoodieLocation("///").depth()); + assertEquals(0, new HoodieLocation("//foo/").depth()); + assertEquals(1, new HoodieLocation("//foo//bar").depth()); + assertEquals(5, new HoodieLocation("/a/b/c/d/e").depth()); + assertEquals(4, new HoodieLocation("s3://a/b/c", "d/e").depth()); + assertEquals(2, new HoodieLocation("s3://a/b/c/", "").depth()); + assertEquals(4, new HoodieLocation(new HoodieLocation("s3://a/b/c"), "d/e").depth()); + } + + @Test + public void testEquals() { + assertEquals(new HoodieLocation("/foo"), new HoodieLocation("/foo")); + assertEquals(new HoodieLocation("/foo"), new HoodieLocation("/foo/")); + assertEquals(new HoodieLocation("/foo/bar"), new HoodieLocation("/foo//bar/")); + assertNotEquals(new HoodieLocation("/"), new HoodieLocation("/foo")); + } + + @Test + public void testCachedResults() { + HoodieLocation location = new HoodieLocation("s3://x/y/z/"); + assertSame(location.getParent(), location.getParent()); + assertSame(location.getName(), location.getName()); + assertSame(location.toString(), location.toString()); + } + + private void toStringTest(String pathString) { + assertEquals(pathString, new HoodieLocation(pathString).toString()); + } +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocationFilter.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocationFilter.java new file mode 100644 index 000000000000..2d66cc23f87e --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocationFilter.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.storage.HoodieLocation; +import org.apache.hudi.storage.HoodieLocationFilter; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Tests {@link HoodieLocationFilter} + */ +public class TestHoodieLocationFilter { + @Test + public void testFilter() { + HoodieLocation location1 = new HoodieLocation("/x/y/1"); + HoodieLocation location2 = new HoodieLocation("/x/y/2"); + HoodieLocation location3 = new HoodieLocation("/x/z/1"); + HoodieLocation location4 = new HoodieLocation("/x/z/2"); + + List locationList = Arrays.stream( + new HoodieLocation[] {location1, location2, location3, location4} + ).collect(Collectors.toList()); + + List expected = Arrays.stream( + new HoodieLocation[] {location1, location2} + ).collect(Collectors.toList()); + + assertEquals(expected.stream().sorted().collect(Collectors.toList()), + locationList.stream() + .filter(e -> new HoodieLocationFilter() { + @Override + public boolean accept(HoodieLocation location) { + return location.getParent().equals(new HoodieLocation("/x/y")); + } + }.accept(e)) + .sorted() + .collect(Collectors.toList())); + assertEquals(locationList, + locationList.stream() + .filter(e -> new HoodieLocationFilter() { + @Override + public boolean accept(HoodieLocation location) { + return true; + } + }.accept(e)) + .sorted() + .collect(Collectors.toList())); + } +} diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java new file mode 100644 index 000000000000..0424d22157d6 --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.io.util.IOUtils; +import org.apache.hudi.storage.HoodieFileStatus; +import org.apache.hudi.storage.HoodieLocation; +import org.apache.hudi.storage.HoodieStorage; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Base class for testing different implementation of {@link HoodieStorage}. + */ +public abstract class TestHoodieStorageBase { + @TempDir + protected Path tempDir; + + protected static final String[] RELATIVE_FILE_PATHS = new String[] { + "w/1.file", "w/2.file", "x/1.file", "x/2.file", + "x/y/1.file", "x/y/2.file", "x/z/1.file", "x/z/2.file" + }; + private static final byte[] EMPTY_BYTES = new byte[] {}; + + /** + * @param fs file system instance. + * @param conf configuration instance. + * @return {@link HoodieStorage} instance based on the implementation for testing. + */ + protected abstract HoodieStorage getHoodieStorage(Object fs, Object conf); + + /** + * @param conf configuration instance. + * @return the underlying file system instance used if required. + */ + protected abstract Object getFileSystem(Object conf); + + /** + * @return configurations for the storage. + */ + protected abstract Object getConf(); + + @AfterEach + public void cleanUpTempDir() { + HoodieStorage storage = getHoodieStorage(); + try { + for (HoodieFileStatus status : storage.listDirectEntries(new HoodieLocation(getTempDir()))) { + HoodieLocation location = status.getLocation(); + if (status.isDirectory()) { + storage.deleteDirectory(location); + } else { + storage.deleteFile(location); + } + } + } catch (IOException e) { + // Silently fail + } + } + + @Test + public void testGetScheme() { + assertEquals("file", getHoodieStorage().getScheme()); + } + + @Test + public void testCreateWriteAndRead() throws IOException { + HoodieStorage storage = getHoodieStorage(); + + HoodieLocation location = new HoodieLocation(getTempDir(), "testCreateAppendAndRead/1.file"); + assertFalse(storage.exists(location)); + storage.create(location).close(); + validateFileStatus(storage, location, EMPTY_BYTES, false); + + byte[] data = new byte[] {2, 42, 49, (byte) 158, (byte) 233, 66, 9}; + + // By default, create overwrites the file + try (OutputStream stream = storage.create(location)) { + stream.write(data); + stream.flush(); + } + validateFileStatus(storage, location, data, false); + + assertThrows(IOException.class, () -> storage.create(location, false)); + validateFileStatus(storage, location, data, false); + + assertThrows(IOException.class, () -> storage.create(location, false)); + validateFileStatus(storage, location, data, false); + + HoodieLocation location2 = new HoodieLocation(getTempDir(), "testCreateAppendAndRead/2.file"); + assertFalse(storage.exists(location2)); + assertTrue(storage.createNewFile(location2)); + validateFileStatus(storage, location2, EMPTY_BYTES, false); + assertFalse(storage.createNewFile(location2)); + + HoodieLocation location3 = new HoodieLocation(getTempDir(), "testCreateAppendAndRead/3.file"); + assertFalse(storage.exists(location3)); + storage.createImmutableFileInPath(location3, Option.of(data)); + validateFileStatus(storage, location3, data, false); + + HoodieLocation location4 = new HoodieLocation(getTempDir(), "testCreateAppendAndRead/4"); + assertFalse(storage.exists(location4)); + assertTrue(storage.createDirectory(location4)); + validateFileStatus(storage, location4, EMPTY_BYTES, true); + assertTrue(storage.createDirectory(location4)); + } + + @Test + public void testListing() throws IOException { + HoodieStorage storage = getHoodieStorage(); + // Full list: + // w/1.file + // w/2.file + // x/1.file + // x/2.file + // x/y/1.file + // x/y/2.file + // x/z/1.file + // x/z/2.file + prepareFilesOnStorage(storage); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/2.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/y"), 0, true, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/z"), 0, true, 0), + }).collect(Collectors.toList()), + storage.listDirectEntries(new HoodieLocation(getTempDir(), "x"))); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/2.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/y/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/y/2.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/z/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/z/2.file"), 0, false, 0) + }).collect(Collectors.toList()), + storage.listFiles(new HoodieLocation(getTempDir(), "x"))); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/2.file"), 0, false, 0) + }).collect(Collectors.toList()), + storage.listDirectEntries( + new HoodieLocation(getTempDir(), "x"), e -> e.getName().contains("2"))); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "w/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "w/2.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/z/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/z/2.file"), 0, false, 0) + }).collect(Collectors.toList()), + storage.listDirectEntries(Arrays.stream(new HoodieLocation[] { + new HoodieLocation(getTempDir(), "w"), + new HoodieLocation(getTempDir(), "x/z") + }).collect(Collectors.toList()))); + + assertThrows(FileNotFoundException.class, + () -> storage.listDirectEntries(new HoodieLocation(getTempDir(), "*"))); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/y/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/z/1.file"), 0, false, 0) + }).collect(Collectors.toList()), + storage.globEntries(new HoodieLocation(getTempDir(), "x/*/1.file"))); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/1.file"), 0, false, 0), + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/2.file"), 0, false, 0), + }).collect(Collectors.toList()), + storage.globEntries(new HoodieLocation(getTempDir(), "x/*.file"))); + + validateHoodieFileStatusList( + Arrays.stream(new HoodieFileStatus[] { + new HoodieFileStatus(new HoodieLocation(getTempDir(), "x/y/1.file"), 0, false, 0), + }).collect(Collectors.toList()), + storage.globEntries( + new HoodieLocation(getTempDir(), "x/*/*.file"), + e -> e.getParent().getName().equals("y") && e.getName().contains("1"))); + } + + @Test + public void testFileNotFound() throws IOException { + HoodieStorage storage = getHoodieStorage(); + + HoodieLocation fileLocation = new HoodieLocation(getTempDir(), "testFileNotFound/1.file"); + HoodieLocation dirLocation = new HoodieLocation(getTempDir(), "testFileNotFound/2"); + assertFalse(storage.exists(fileLocation)); + assertThrows(FileNotFoundException.class, () -> storage.open(fileLocation)); + assertThrows(FileNotFoundException.class, () -> storage.getFileStatus(fileLocation)); + assertThrows(FileNotFoundException.class, () -> storage.listDirectEntries(fileLocation)); + assertThrows(FileNotFoundException.class, () -> storage.listDirectEntries(dirLocation)); + assertThrows(FileNotFoundException.class, () -> storage.listDirectEntries(dirLocation, e -> true)); + assertThrows(FileNotFoundException.class, () -> storage.listDirectEntries( + Arrays.stream(new HoodieLocation[] {dirLocation}).collect(Collectors.toList()))); + } + + @Test + public void testRename() throws IOException { + HoodieStorage storage = getHoodieStorage(); + + HoodieLocation location = new HoodieLocation(getTempDir(), "testRename/1.file"); + assertFalse(storage.exists(location)); + storage.create(location).close(); + validateFileStatus(storage, location, EMPTY_BYTES, false); + + HoodieLocation newLocation = new HoodieLocation(getTempDir(), "testRename/1_renamed.file"); + assertTrue(storage.rename(location, newLocation)); + assertFalse(storage.exists(location)); + validateFileStatus(storage, newLocation, EMPTY_BYTES, false); + } + + @Test + public void testDelete() throws IOException { + HoodieStorage storage = getHoodieStorage(); + + HoodieLocation location = new HoodieLocation(getTempDir(), "testDelete/1.file"); + assertFalse(storage.exists(location)); + storage.create(location).close(); + assertTrue(storage.exists(location)); + + assertTrue(storage.deleteFile(location)); + assertFalse(storage.exists(location)); + assertFalse(storage.deleteFile(location)); + + HoodieLocation location2 = new HoodieLocation(getTempDir(), "testDelete/2"); + assertFalse(storage.exists(location2)); + assertTrue(storage.createDirectory(location2)); + assertTrue(storage.exists(location2)); + + assertTrue(storage.deleteDirectory(location2)); + assertFalse(storage.exists(location2)); + assertFalse(storage.deleteDirectory(location2)); + } + + @Test + public void testMakeQualified() { + HoodieStorage storage = getHoodieStorage(); + HoodieLocation location = new HoodieLocation("/tmp/testMakeQualified/1.file"); + assertEquals( + new HoodieLocation("file:/tmp/testMakeQualified/1.file"), + storage.makeQualified(location)); + } + + @Test + public void testGetFileSystem() { + Object conf = getConf(); + Object fs = getFileSystem(conf); + HoodieStorage storage = getHoodieStorage(fs, conf); + assertSame(fs, storage.getFileSystem()); + } + + protected String getTempDir() { + return "file:" + tempDir.toUri().getPath(); + } + + /** + * Prepares files on storage for testing. + * + * @storage {@link HoodieStorage} to use. + */ + private void prepareFilesOnStorage(HoodieStorage storage) throws IOException { + String dir = getTempDir(); + for (String relativePath : RELATIVE_FILE_PATHS) { + storage.create(new HoodieLocation(dir, relativePath)).close(); + } + } + + private HoodieStorage getHoodieStorage() { + Object conf = getConf(); + return getHoodieStorage(getFileSystem(conf), conf); + } + + private void validateFileStatus(HoodieStorage storage, + HoodieLocation location, + byte[] data, + boolean isDirectory) throws IOException { + assertTrue(storage.exists(location)); + HoodieFileStatus fileStatus = storage.getFileStatus(location); + assertEquals(location, fileStatus.getLocation()); + assertEquals(isDirectory, fileStatus.isDirectory()); + assertEquals(!isDirectory, fileStatus.isFile()); + if (!isDirectory) { + assertEquals(data.length, fileStatus.getLength()); + try (InputStream stream = storage.open(location)) { + assertArrayEquals(data, IOUtils.readAsByteArray(stream, data.length)); + } + } + assertTrue(fileStatus.getModificationTime() > 0); + } + + private void validateHoodieFileStatusList(List expected, + List actual) { + assertEquals(expected.size(), actual.size()); + List sortedExpected = expected.stream() + .sorted(Comparator.comparing(HoodieFileStatus::getLocation)) + .collect(Collectors.toList()); + List sortedActual = actual.stream() + .sorted(Comparator.comparing(HoodieFileStatus::getLocation)) + .collect(Collectors.toList()); + for (int i = 0; i < expected.size(); i++) { + // We cannot use HoodieFileStatus#equals as that only compares the location + assertEquals(sortedExpected.get(i).getLocation(), sortedActual.get(i).getLocation()); + assertEquals(sortedExpected.get(i).isDirectory(), sortedActual.get(i).isDirectory()); + assertEquals(sortedExpected.get(i).isFile(), sortedActual.get(i).isFile()); + if (sortedExpected.get(i).isFile()) { + assertEquals(sortedExpected.get(i).getLength(), sortedActual.get(i).getLength()); + } + assertTrue(sortedActual.get(i).getModificationTime() > 0); + } + } +} From e00e2d7e896ba4d75a5578ee69f4ce653e050008 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 28 Jan 2024 23:42:07 -0800 Subject: [PATCH 084/112] [HUDI-7342] Use BaseFileUtils to hide format-specific logic in HoodiePartitionMetadata (#10568) --- .../common/model/HoodiePartitionMetadata.java | 43 +------------------ .../hudi/common/util/BaseFileUtils.java | 15 +++++++ .../org/apache/hudi/common/util/OrcUtils.java | 18 ++++++++ .../apache/hudi/common/util/ParquetUtils.java | 23 ++++++++++ 4 files changed, 57 insertions(+), 42 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java index ad5912ba8b9c..2b63433bef46 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java @@ -18,40 +18,26 @@ package org.apache.hudi.common.model; -import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.avro.HoodieAvroWriteSupport; -import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.hudi.common.util.BaseFileUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.avro.Schema; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.orc.OrcFile; -import org.apache.orc.Writer; -import org.apache.parquet.hadoop.ParquetWriter; -import org.apache.parquet.hadoop.metadata.CompressionCodecName; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; -import org.apache.parquet.schema.Types; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; - /** * The metadata that goes into the meta file in each partition. */ @@ -152,34 +138,7 @@ private String getMetafileExtension() { */ private void writeMetafile(Path filePath) throws IOException { if (format.isPresent()) { - Schema schema = HoodieAvroUtils.getRecordKeySchema(); - - switch (format.get()) { - case PARQUET: - // Since we are only interested in saving metadata to the footer, the schema, blocksizes and other - // parameters are not important. - MessageType type = Types.buildMessage().optional(PrimitiveTypeName.INT64).named("dummyint").named("dummy"); - HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(type, schema, Option.empty(), new Properties()); - try (ParquetWriter writer = new ParquetWriter(filePath, writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) { - for (String key : props.stringPropertyNames()) { - writeSupport.addFooterMetadata(key, props.getProperty(key)); - } - } - break; - case ORC: - // Since we are only interested in saving metadata to the footer, the schema, blocksizes and other - // parameters are not important. - OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(fs.getConf()).fileSystem(fs) - .setSchema(AvroOrcUtils.createOrcSchema(schema)); - try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) { - for (String key : props.stringPropertyNames()) { - writer.addUserMetadata(key, ByteBuffer.wrap(getUTF8Bytes(props.getProperty(key)))); - } - } - break; - default: - throw new HoodieException("Unsupported format for partition metafiles: " + format.get()); - } + BaseFileUtils.getInstance(format.get()).writeMetaFile(fs, filePath, props); } else { // Backwards compatible properties file format FSDataOutputStream os = fs.create(filePath, true); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java index d402f58a40a1..dd2eb7ad5c0f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java @@ -33,11 +33,14 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; /** @@ -216,4 +219,16 @@ public abstract Map readFooter(Configuration configuration, bool * @return The subclass's {@link HoodieFileFormat}. */ public abstract HoodieFileFormat getFormat(); + + /** + * Writes properties to the meta file. + * + * @param fs {@link FileSystem} instance. + * @param filePath file path to write to. + * @param props properties to write. + * @throws IOException upon write error. + */ + public abstract void writeMetaFile(FileSystem fs, + Path filePath, + Properties props) throws IOException; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java index 66e9ab237fcc..0d3342626ae3 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java @@ -32,6 +32,7 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -41,6 +42,7 @@ import org.apache.orc.Reader.Options; import org.apache.orc.RecordReader; import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; import java.io.IOException; import java.nio.ByteBuffer; @@ -50,10 +52,12 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.stream.Collectors; import static org.apache.hudi.common.util.BinaryUtil.toBytes; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Utility functions for ORC files. @@ -265,4 +269,18 @@ public long getRowCount(Configuration conf, Path orcFilePath) { throw new HoodieIOException("Unable to get row count for ORC file:" + orcFilePath, io); } } + + @Override + public void writeMetaFile(FileSystem fs, Path filePath, Properties props) throws IOException { + // Since we are only interested in saving metadata to the footer, the schema, blocksizes and other + // parameters are not important. + Schema schema = HoodieAvroUtils.getRecordKeySchema(); + OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(fs.getConf()).fileSystem(fs) + .setSchema(AvroOrcUtils.createOrcSchema(schema)); + try (Writer writer = OrcFile.createWriter(filePath, writerOptions)) { + for (String key : props.stringPropertyNames()) { + writer.addUserMetadata(key, ByteBuffer.wrap(getUTF8Bytes(props.getProperty(key)))); + } + } + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java index a1e51cd69d42..0a4c5691df31 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java @@ -19,6 +19,7 @@ package org.apache.hudi.common.util; import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.avro.HoodieAvroWriteSupport; import org.apache.hudi.common.model.HoodieColumnRangeMetadata; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieKey; @@ -32,6 +33,7 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.parquet.avro.AvroParquetReader; import org.apache.parquet.avro.AvroReadSupport; @@ -39,13 +41,16 @@ import org.apache.parquet.column.statistics.Statistics; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetReader; +import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.DecimalMetadata; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Types; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,6 +64,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.function.Function; import java.util.stream.Collector; @@ -280,6 +286,23 @@ public long getRowCount(Configuration conf, Path parquetFilePath) { return rowCount; } + @Override + public void writeMetaFile(FileSystem fs, Path filePath, Properties props) throws IOException { + // Since we are only interested in saving metadata to the footer, the schema, blocksizes and other + // parameters are not important. + Schema schema = HoodieAvroUtils.getRecordKeySchema(); + MessageType type = Types.buildMessage() + .optional(PrimitiveType.PrimitiveTypeName.INT64).named("dummyint").named("dummy"); + HoodieAvroWriteSupport writeSupport = + new HoodieAvroWriteSupport(type, schema, Option.empty(), new Properties()); + try (ParquetWriter writer = new ParquetWriter( + filePath, writeSupport, CompressionCodecName.UNCOMPRESSED, 1024, 1024)) { + for (String key : props.stringPropertyNames()) { + writeSupport.addFooterMetadata(key, props.getProperty(key)); + } + } + } + static class RecordKeysFilterFunction implements Function { private final Set candidateKeys; From a05834462c4a9f0c9c80cef27f7a5d9d58f07bcb Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 18:15:46 -0800 Subject: [PATCH 085/112] [HUDI-7218] Integrate new HFile reader with file reader factory (#10330) --- .../apache/hudi/index/HoodieIndexUtils.java | 3 +- .../apache/hudi/io/HoodieAppendHandle.java | 4 +- .../org/apache/hudi/io/HoodieMergeHandle.java | 3 +- .../org/apache/hudi/io/HoodieReadHandle.java | 8 +- .../HoodieBackedTableMetadataWriter.java | 8 +- .../action/commit/HoodieMergeHelper.java | 4 +- .../GenericRecordValidationTestUtils.java | 26 +- .../run/strategy/JavaExecutionStrategy.java | 6 +- .../client/TestJavaHoodieBackedMetadata.java | 19 +- .../HoodieJavaClientTestHarness.java | 45 +- .../MultipleSparkJobExecutionStrategy.java | 6 +- .../SingleSparkJobExecutionStrategy.java | 5 +- .../storage/HoodieSparkFileReaderFactory.java | 5 +- .../ParquetBootstrapMetadataHandler.java | 2 +- .../functional/TestHoodieBackedMetadata.java | 19 +- .../TestHoodieBackedTableMetadata.java | 12 +- .../hudi/testutils/HoodieClientTestUtils.java | 46 +- .../org/apache/hudi/avro/HoodieAvroUtils.java | 14 +- .../hudi/common/bloom/BloomFilterFactory.java | 19 + .../HoodieDynamicBoundedBloomFilter.java | 30 +- .../hudi/common/bloom/SimpleBloomFilter.java | 27 +- .../bootstrap/index/HFileBootstrapIndex.java | 242 +++++++- .../hudi/common/config/ConfigGroups.java | 1 + .../common/config/HoodieReaderConfig.java | 39 ++ .../common/table/TableSchemaResolver.java | 12 +- .../common/table/log/HoodieLogFileReader.java | 9 +- .../table/log/block/HoodieDataBlock.java | 2 + .../table/log/block/HoodieHFileDataBlock.java | 48 +- .../log/block/HoodieParquetDataBlock.java | 4 +- .../hudi/common/util/Base64CodecUtil.java | 11 + .../apache/hudi/common/util/ConfigUtils.java | 65 +- .../storage/HoodieAvroFileReaderFactory.java | 30 +- .../storage/HoodieAvroFileWriterFactory.java | 6 +- .../HoodieAvroHFileReaderImplBase.java | 154 +++++ .../io/storage/HoodieAvroHFileWriter.java | 38 +- .../io/storage/HoodieFileReaderFactory.java | 68 ++- ...r.java => HoodieHBaseAvroHFileReader.java} | 144 +---- .../hudi/io/storage/HoodieHFileUtils.java | 3 +- .../storage/HoodieNativeAvroHFileReader.java | 559 ++++++++++++++++++ .../metadata/HoodieBackedTableMetadata.java | 3 +- .../hudi/metadata/HoodieMetadataPayload.java | 4 +- .../metadata/HoodieTableMetadataUtil.java | 27 +- ...estInLineFileSystemHFileInLiningBase.java} | 95 +-- ...tInLineFileSystemWithHBaseHFileReader.java | 124 ++++ .../TestInLineFileSystemWithHFileReader.java | 104 ++++ .../functional/TestHoodieLogFormat.java | 3 +- .../hudi/common/util/TestBase64CodecUtil.java | 5 + .../TestHoodieAvroFileReaderFactory.java | 10 +- .../TestHoodieHBaseHFileReaderWriter.java | 142 +++++ .../storage/TestHoodieHFileReaderWriter.java | 473 +-------------- .../TestHoodieHFileReaderWriterBase.java | 486 +++++++++++++++ .../io/storage/TestHoodieOrcReaderWriter.java | 4 +- .../storage/TestHoodieReaderWriterUtils.java | 2 +- .../sink/clustering/ClusteringOperator.java | 6 +- .../hudi/hadoop/HoodieHFileRecordReader.java | 22 +- .../HoodieRealtimeRecordReaderUtils.java | 5 +- .../hadoop/testutils/InputFormatTestUtil.java | 3 +- .../reader/DFSHoodieDatasetInputReader.java | 5 +- .../apache/hudi/common/util/FileIOUtils.java | 13 +- .../apache/hudi/common/util/StringUtils.java | 10 + .../org/apache/hudi/io/hfile/HFileCursor.java | 1 + .../hudi/io/hfile/HFileFileInfoBlock.java | 4 +- .../apache/hudi/io/hfile/HFileReaderImpl.java | 3 + .../org/apache/hudi/io/hfile/HFileUtils.java | 34 ++ .../java/org/apache/hudi/io/hfile/Key.java | 5 + .../java/org/apache/hudi/io/util/IOUtils.java | 12 + .../apache/hudi/io/hfile/TestHFileReader.java | 5 +- .../apache/hudi/io/util/TestHFileUtils.java | 44 ++ .../org/apache/hudi/HoodieBaseRelation.scala | 14 +- .../HoodieMetadataTableValidator.java | 22 +- pom.xml | 6 + 71 files changed, 2520 insertions(+), 922 deletions(-) create mode 100644 hudi-common/src/main/java/org/apache/hudi/common/config/HoodieReaderConfig.java create mode 100644 hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java rename hudi-common/src/main/java/org/apache/hudi/io/storage/{HoodieAvroHFileReader.java => HoodieHBaseAvroHFileReader.java} (81%) create mode 100644 hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java rename hudi-common/src/test/java/org/apache/hudi/common/fs/inline/{TestInLineFileSystemHFileInLining.java => TestInLineFileSystemHFileInLiningBase.java} (59%) create mode 100644 hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java create mode 100644 hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java create mode 100644 hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java create mode 100644 hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/io/util/TestHFileUtils.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java index b6db316a3b67..890bffeb5a39 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java @@ -69,6 +69,7 @@ import java.util.TreeSet; import static java.util.stream.Collectors.toList; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; import static org.apache.hudi.table.action.commit.HoodieDeleteHelper.createDeleteRecord; /** @@ -185,7 +186,7 @@ public static List filterKeysFromFile(Path filePath, List candid ValidationUtils.checkArgument(FSUtils.isBaseFile(filePath)); List foundRecordKeys = new ArrayList<>(); try (HoodieFileReader fileReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) - .getFileReader(configuration, filePath)) { + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, configuration, filePath)) { // Load all rowKeys from the file, to double-confirm if (!candidateRecordKeys.isEmpty()) { HoodieTimer timer = HoodieTimer.start(); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java index ca081fce60f1..5d9c5ac54962 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java @@ -19,6 +19,7 @@ package org.apache.hudi.io; import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.fs.FSUtils; @@ -666,7 +667,8 @@ private static HoodieLogBlock getBlock(HoodieWriteConfig writeConfig, return new HoodieAvroDataBlock(records, header, keyField); case HFILE_DATA_BLOCK: return new HoodieHFileDataBlock( - records, header, writeConfig.getHFileCompressionAlgorithm(), new Path(writeConfig.getBasePath())); + records, header, writeConfig.getHFileCompressionAlgorithm(), new Path(writeConfig.getBasePath()), + writeConfig.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER)); case PARQUET_DATA_BLOCK: return new HoodieParquetDataBlock( records, diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java index 21c0059474e8..4460e29c8a43 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java @@ -460,7 +460,8 @@ public void performMergeDataValidationCheck(WriteStatus writeStatus) { } long oldNumWrites = 0; - try (HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(this.recordMerger.getRecordType()).getFileReader(hoodieTable.getHadoopConf(), oldFilePath)) { + try (HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(this.recordMerger.getRecordType()) + .getFileReader(config, hoodieTable.getHadoopConf(), oldFilePath)) { oldNumWrites = reader.getTotalRecords(); } catch (IOException e) { throw new HoodieUpsertException("Failed to check for merge data validation", e); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java index 28e6c0e16794..5b7985ba9795 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java @@ -71,12 +71,12 @@ protected HoodieBaseFile getLatestBaseFile() { } protected HoodieFileReader createNewFileReader() throws IOException { - return HoodieFileReaderFactory.getReaderFactory(this.config.getRecordMerger().getRecordType()).getFileReader(hoodieTable.getHadoopConf(), - new Path(getLatestBaseFile().getPath())); + return HoodieFileReaderFactory.getReaderFactory(this.config.getRecordMerger().getRecordType()) + .getFileReader(config, hoodieTable.getHadoopConf(), new Path(getLatestBaseFile().getPath())); } protected HoodieFileReader createNewFileReader(HoodieBaseFile hoodieBaseFile) throws IOException { - return HoodieFileReaderFactory.getReaderFactory(this.config.getRecordMerger().getRecordType()).getFileReader(hoodieTable.getHadoopConf(), - new Path(hoodieBaseFile.getPath())); + return HoodieFileReaderFactory.getReaderFactory(this.config.getRecordMerger().getRecordType()) + .getFileReader(config, hoodieTable.getHadoopConf(), new Path(hoodieBaseFile.getPath())); } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 2ad169d51261..e508e2d2b7eb 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -507,6 +507,7 @@ private Pair> initializeRecordIndexPartition() // Collect record keys from the files in parallel HoodieData records = readRecordKeysFromBaseFiles( engineContext, + dataWriteConfig, partitionBaseFilePairs, false, dataWriteConfig.getMetadataConfig().getRecordIndexMaxParallelism(), @@ -864,7 +865,8 @@ public void buildMetadataPartitions(HoodieEngineContext engineContext, List writeStatus, String instantTime) { processAndCommit(instantTime, () -> { Map> partitionToRecordMap = - HoodieTableMetadataUtil.convertMetadataToRecords(engineContext, commitMetadata, instantTime, getRecordsGenerationParams()); + HoodieTableMetadataUtil.convertMetadataToRecords( + engineContext, dataWriteConfig, commitMetadata, instantTime, getRecordsGenerationParams()); // Updates for record index are created by parsing the WriteStatus which is a hudi-client object. Hence, we cannot yet move this code // to the HoodieTableMetadataUtil class in hudi-common. @@ -880,7 +882,8 @@ public void updateFromWriteStatuses(HoodieCommitMetadata commitMetadata, HoodieD public void update(HoodieCommitMetadata commitMetadata, HoodieData records, String instantTime) { processAndCommit(instantTime, () -> { Map> partitionToRecordMap = - HoodieTableMetadataUtil.convertMetadataToRecords(engineContext, commitMetadata, instantTime, getRecordsGenerationParams()); + HoodieTableMetadataUtil.convertMetadataToRecords( + engineContext, dataWriteConfig, commitMetadata, instantTime, getRecordsGenerationParams()); HoodieData additionalUpdates = getRecordIndexAdditionalUpserts(records, commitMetadata); partitionToRecordMap.put(MetadataPartitionType.RECORD_INDEX, records.union(additionalUpdates)); return partitionToRecordMap; @@ -1421,6 +1424,7 @@ private HoodieData getRecordIndexReplacedRecords(HoodieReplaceComm return readRecordKeysFromBaseFiles( engineContext, + dataWriteConfig, partitionBaseFilePairs, true, dataWriteConfig.getMetadataConfig().getRecordIndexMaxParallelism(), diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java index c1523d564e48..7fba0463292a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/HoodieMergeHelper.java @@ -82,7 +82,7 @@ public void runMerge(HoodieTable table, HoodieRecord.HoodieRecordType recordType = table.getConfig().getRecordMerger().getRecordType(); HoodieFileReader baseFileReader = HoodieFileReaderFactory .getReaderFactory(recordType) - .getFileReader(hadoopConf, mergeHandle.getOldFilePath()); + .getFileReader(writeConfig, hadoopConf, mergeHandle.getOldFilePath()); HoodieFileReader bootstrapFileReader = null; Schema writerSchema = mergeHandle.getWriterSchemaWithMetaFields(); @@ -114,7 +114,7 @@ public void runMerge(HoodieTable table, Configuration bootstrapFileConfig = new Configuration(table.getHadoopConf()); bootstrapFileReader = HoodieFileReaderFactory.getReaderFactory(recordType).newBootstrapFileReader( baseFileReader, - HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader(bootstrapFileConfig, bootstrapFilePath), + HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader(writeConfig, bootstrapFileConfig, bootstrapFilePath), mergeHandle.getPartitionFields(), mergeHandle.getPartitionValues()); recordSchema = mergeHandle.getWriterSchemaWithMetaFields(); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/GenericRecordValidationTestUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/GenericRecordValidationTestUtils.java index 2196b6f0b630..a2949eb6eee1 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/GenericRecordValidationTestUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/GenericRecordValidationTestUtils.java @@ -19,33 +19,43 @@ package org.apache.hudi.testutils; import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.model.HoodieFileFormat; +import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieValidationException; import org.apache.hudi.hadoop.config.HoodieRealtimeConfig; import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils; +import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; +import java.io.IOException; import java.nio.file.Paths; import java.util.Arrays; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.apache.hudi.common.model.HoodieRecord.COMMIT_SEQNO_METADATA_FIELD; import static org.apache.hudi.common.model.HoodieRecord.COMMIT_TIME_METADATA_FIELD; import static org.apache.hudi.common.model.HoodieRecord.FILENAME_METADATA_FIELD; import static org.apache.hudi.common.model.HoodieRecord.OPERATION_METADATA_FIELD; import static org.apache.hudi.common.model.HoodieRecord.RECORD_KEY_METADATA_FIELD; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.HOODIE_CONSUME_COMMIT; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -126,8 +136,22 @@ public static Map getRecordsMap(HoodieWriteConfig config, .map(partitionPath -> Paths.get(config.getBasePath(), partitionPath).toString()) .collect(Collectors.toList()); return HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat( - hadoopConf, fullPartitionPaths, config.getBasePath(), jobConf, true).stream() + hadoopConf, fullPartitionPaths, config.getBasePath(), jobConf, true).stream() .collect(Collectors.toMap(rec -> rec.get(RECORD_KEY_METADATA_FIELD).toString(), Function.identity())); } + public static Stream readHFile(Configuration conf, String[] paths) { + List valuesAsList = new LinkedList<>(); + for (String path : paths) { + try (HoodieAvroHFileReaderImplBase reader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, conf, new Path(path), HoodieFileFormat.HFILE)) { + valuesAsList.addAll(HoodieAvroHFileReaderImplBase.readAllRecords(reader) + .stream().map(e -> (GenericRecord) e).collect(Collectors.toList())); + } catch (IOException e) { + throw new HoodieException("Error reading HFile " + path, e); + } + } + return valuesAsList.stream(); + } } diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java index 81786d88f8b0..f73238d02108 100644 --- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java +++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java @@ -193,7 +193,8 @@ private List> readRecordsForGroupWithLogs(List> fileSliceReader = new HoodieFileSliceReader(baseFileReader, scanner, readerSchema, tableConfig.getPreCombineField(), writeConfig.getRecordMerger(), tableConfig.getProps(), @@ -221,7 +222,8 @@ private List> readRecordsForGroupWithLogs(List> readRecordsForGroupBaseFiles(List clusteringOps) { List> records = new ArrayList<>(); clusteringOps.forEach(clusteringOp -> { - try (HoodieFileReader baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader(getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath()))) { + try (HoodieFileReader baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType) + .getFileReader(getHoodieTable().getConfig(), getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath()))) { Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(getWriteConfig().getSchema())); Iterator recordIterator = baseFileReader.getRecordIterator(readerSchema); // NOTE: Record have to be cloned here to make sure if it holds low-level engine-specific diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java index 1e09f7e093c4..2dc54cb75ad3 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java @@ -86,7 +86,8 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.index.HoodieIndex; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; +import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.metadata.FileSystemBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter; @@ -110,7 +111,6 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.util.Time; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.schema.MessageType; @@ -539,9 +539,10 @@ public void testVirtualKeysInBaseFiles() throws Exception { table.getHoodieView().sync(); List fileSlices = table.getSliceView().getLatestFileSlices("files").collect(Collectors.toList()); HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get(); - HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(), new Path(baseFile.getPath()), - new CacheConfig(context.getHadoopConf().get())); - List records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader); + HoodieAvroHFileReaderImplBase hoodieHFileReader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + writeConfig, context.getHadoopConf().get(), new Path(baseFile.getPath())); + List records = HoodieAvroHFileReaderImplBase.readAllRecords(hoodieHFileReader); records.forEach(entry -> { if (populateMetaFields) { assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); @@ -959,10 +960,10 @@ private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable tabl } final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get(); - HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(), - new Path(baseFile.getPath()), - new CacheConfig(context.getHadoopConf().get())); - List records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader); + HoodieAvroHFileReaderImplBase hoodieHFileReader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + table.getConfig(), context.getHadoopConf().get(), new Path(baseFile.getPath())); + List records = HoodieAvroHFileReaderImplBase.readAllRecords(hoodieHFileReader); records.forEach(entry -> { if (enableMetaFields) { assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java index 48726efcd6b8..0fab5b811d14 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java @@ -17,7 +17,6 @@ package org.apache.hudi.testutils; -import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.client.HoodieJavaWriteClient; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.client.common.HoodieJavaEngineContext; @@ -65,7 +64,6 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; import org.apache.hudi.index.JavaHoodieIndexFactory; -import org.apache.hudi.io.storage.HoodieHFileUtils; import org.apache.hudi.metadata.FileSystemBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter; import org.apache.hudi.metadata.HoodieTableMetadata; @@ -76,17 +74,12 @@ import org.apache.hudi.table.HoodieTable; import org.apache.hudi.utils.HoodieWriterClientTestHarness; -import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.HFile; -import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -98,7 +91,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Objects; @@ -109,9 +101,8 @@ import java.util.stream.Stream; import static org.apache.hudi.common.testutils.HoodieTestUtils.RAW_TRIPS_TEST_NAME; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors; +import static org.apache.hudi.testutils.GenericRecordValidationTestUtils.readHFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertLinesMatch; @@ -978,7 +969,7 @@ public long countRecordsOptionallySince(String basePath, HoodieTimeline commitTi } }).count(); } else if (paths[0].endsWith(HoodieFileFormat.HFILE.getFileExtension())) { - Stream genericRecordStream = readHFile(paths); + Stream genericRecordStream = readHFile(context.getHadoopConf().get(), paths); if (lastCommitTimeOpt.isPresent()) { return genericRecordStream.filter(gr -> HoodieTimeline.compareTimestamps(lastCommitTimeOpt.get(), HoodieActiveTimeline.LESSER_THAN, gr.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString())) @@ -993,38 +984,6 @@ public long countRecordsOptionallySince(String basePath, HoodieTimeline commitTi } } - public Stream readHFile(String[] paths) { - // TODO: this should be ported to use HoodieStorageReader - List valuesAsList = new LinkedList<>(); - - FileSystem fs = HadoopFSUtils.getFs(paths[0], context.getHadoopConf().get()); - CacheConfig cacheConfig = new CacheConfig(fs.getConf()); - Schema schema = null; - for (String path : paths) { - try { - HFile.Reader reader = - HoodieHFileUtils.createHFileReader(fs, new Path(path), cacheConfig, fs.getConf()); - if (schema == null) { - schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(getUTF8Bytes(SCHEMA_KEY)))); - } - HFileScanner scanner = reader.getScanner(false, false); - if (!scanner.seekTo()) { - // EOF reached - continue; - } - - do { - Cell c = scanner.getCell(); - byte[] value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength()); - valuesAsList.add(HoodieAvroUtils.bytesToAvro(value, schema)); - } while (scanner.next()); - } catch (IOException e) { - throw new HoodieException("Error reading hfile " + path + " as a dataframe", e); - } - } - return valuesAsList.stream(); - } - public HoodieWriteConfig.Builder getConfigBuilder(String schemaStr, HoodieIndex.IndexType indexType, HoodieFailedWritesCleaningPolicy cleaningPolicy) { HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder().withPath(basePath) diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java index 17400acfc050..b1fd74a6169d 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java @@ -381,7 +381,8 @@ private HoodieData> readRecordsForGroupBaseFiles(JavaSparkContex private HoodieFileReader getBaseOrBootstrapFileReader(SerializableConfiguration hadoopConf, String bootstrapBasePath, Option partitionFields, ClusteringOperation clusteringOp) throws IOException { - HoodieFileReader baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader(hadoopConf.get(), new Path(clusteringOp.getDataFilePath())); + HoodieFileReader baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType) + .getFileReader(writeConfig, hadoopConf.get(), new Path(clusteringOp.getDataFilePath())); // handle bootstrap path if (StringUtils.nonEmpty(clusteringOp.getBootstrapFilePath()) && StringUtils.nonEmpty(bootstrapBasePath)) { String bootstrapFilePath = clusteringOp.getBootstrapFilePath(); @@ -393,7 +394,8 @@ private HoodieFileReader getBaseOrBootstrapFileReader(SerializableConfiguration } baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType).newBootstrapFileReader( baseFileReader, - HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader(hadoopConf.get(), new Path(bootstrapFilePath)), partitionFields, + HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader( + writeConfig, hadoopConf.get(), new Path(bootstrapFilePath)), partitionFields, partitionValues); } return baseFileReader; diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java index 79c6c9062dd2..98c016dfaf56 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java @@ -32,9 +32,9 @@ import org.apache.hudi.common.model.HoodieFileGroupId; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.CloseableMappingIterator; -import org.apache.hudi.common.util.Option; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.data.HoodieJavaRDD; import org.apache.hudi.exception.HoodieClusteringException; @@ -146,7 +146,8 @@ private Iterator> readRecordsForGroupBaseFiles(List> indexedRecords = () -> { try { - HoodieFileReader baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType).getFileReader(getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath())); + HoodieFileReader baseFileReader = HoodieFileReaderFactory.getReaderFactory(recordType) + .getFileReader(writeConfig, getHoodieTable().getHadoopConf(), new Path(clusteringOp.getDataFilePath())); Option keyGeneratorOp = writeConfig.populateMetaFields() ? Option.empty() : Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(writeConfig.getProps())); // NOTE: Record have to be cloned here to make sure if it holds low-level engine-specific diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java index de7810be8ae6..f981061ecc35 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileReaderFactory.java @@ -21,6 +21,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieIOException; +import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.spark.sql.internal.SQLConf; @@ -41,7 +42,9 @@ protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) { return new HoodieSparkParquetReader(conf, path); } - protected HoodieFileReader newHFileFileReader(Configuration conf, Path path) throws IOException { + protected HoodieFileReader newHFileFileReader(Configuration conf, + Path path, + Option schemaOption) throws IOException { throw new HoodieIOException("Not support read HFile"); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java index 2c3ddfdcda2c..80a7e6a86a79 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java @@ -80,7 +80,7 @@ protected void executeBootstrap(HoodieBootstrapHandle bootstrapHandl HoodieRecord.HoodieRecordType recordType = table.getConfig().getRecordMerger().getRecordType(); HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(recordType) - .getFileReader(table.getHadoopConf(), sourceFilePath); + .getFileReader(table.getConfig(), table.getHadoopConf(), sourceFilePath); HoodieExecutor executor = null; try { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index e9c9fb12bc1d..511c34eb656b 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -89,7 +89,8 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.index.HoodieIndex; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; +import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.metadata.FileSystemBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter; @@ -117,7 +118,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.util.Time; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.schema.MessageType; @@ -811,9 +811,10 @@ public void testVirtualKeysInBaseFiles() throws Exception { table.getHoodieView().sync(); List fileSlices = table.getSliceView().getLatestFileSlices("files").collect(Collectors.toList()); HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get(); - HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(), new Path(baseFile.getPath()), - new CacheConfig(context.getHadoopConf().get())); - List records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader); + HoodieAvroHFileReaderImplBase hoodieHFileReader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + table.getConfig(), context.getHadoopConf().get(), new Path(baseFile.getPath())); + List records = HoodieAvroHFileReaderImplBase.readAllRecords(hoodieHFileReader); records.forEach(entry -> { if (populateMetaFields) { assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); @@ -1340,10 +1341,10 @@ private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable tabl } final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get(); - HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(), - new Path(baseFile.getPath()), - new CacheConfig(context.getHadoopConf().get())); - List records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader); + HoodieAvroHFileReaderImplBase hoodieHFileReader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + table.getConfig(), context.getHadoopConf().get(), new Path(baseFile.getPath())); + List records = HoodieAvroHFileReaderImplBase.readAllRecords(hoodieHFileReader); records.forEach(entry -> { if (enableMetaFields) { assertNotNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java index 0d601d786b7f..1a268675ac75 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java @@ -37,7 +37,8 @@ import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.ExternalSpillableMap; import org.apache.hudi.config.HoodieWriteConfig; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; +import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.metadata.HoodieBackedTableMetadata; import org.apache.hudi.metadata.HoodieMetadataLogRecordReader; import org.apache.hudi.metadata.HoodieMetadataPayload; @@ -51,7 +52,6 @@ import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.schema.MessageType; import org.junit.jupiter.params.ParameterizedTest; @@ -407,10 +407,10 @@ private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable tabl } final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get(); - HoodieAvroHFileReader hoodieHFileReader = new HoodieAvroHFileReader(context.getHadoopConf().get(), - new Path(baseFile.getPath()), - new CacheConfig(context.getHadoopConf().get())); - List records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader); + HoodieAvroHFileReaderImplBase hoodieHFileReader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + table.getConfig(), context.getHadoopConf().get(), new Path(baseFile.getPath())); + List records = HoodieAvroHFileReaderImplBase.readAllRecords(hoodieHFileReader); records.forEach(entry -> { assertNull(((GenericRecord) entry).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); final String keyInPayload = (String) ((GenericRecord) entry) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java index ff9e73065460..b59b1ea8d670 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java @@ -19,7 +19,6 @@ package org.apache.hudi.testutils; import org.apache.hudi.HoodieSparkUtils; -import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.client.SparkRDDReadClient; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.model.HoodieBaseFile; @@ -39,19 +38,12 @@ import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.hudi.io.storage.HoodieHFileUtils; import org.apache.hudi.timeline.service.TimelineService; -import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.HFile; -import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaSparkContext; @@ -66,13 +58,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.LinkedList; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; +import static org.apache.hudi.testutils.GenericRecordValidationTestUtils.readHFile; /** * Utility methods to aid testing inside the HoodieClient module. @@ -206,7 +196,7 @@ public static long countRecordsOptionallySince(JavaSparkContext jsc, String base return rows.count(); } } else if (paths[0].endsWith(HoodieFileFormat.HFILE.getFileExtension())) { - Stream genericRecordStream = readHFile(jsc, paths); + Stream genericRecordStream = readHFile(jsc.hadoopConfiguration(), paths); if (lastCommitTimeOpt.isPresent()) { return genericRecordStream.filter(gr -> HoodieTimeline.compareTimestamps(lastCommitTimeOpt.get(), HoodieActiveTimeline.LESSER_THAN, gr.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString())) @@ -271,38 +261,6 @@ public static Dataset read(JavaSparkContext jsc, String basePath, SQLContex } } - public static Stream readHFile(JavaSparkContext jsc, String[] paths) { - // TODO: this should be ported to use HoodieStorageReader - List valuesAsList = new LinkedList<>(); - - FileSystem fs = HadoopFSUtils.getFs(paths[0], jsc.hadoopConfiguration()); - CacheConfig cacheConfig = new CacheConfig(fs.getConf()); - Schema schema = null; - for (String path : paths) { - try { - HFile.Reader reader = - HoodieHFileUtils.createHFileReader(fs, new Path(path), cacheConfig, fs.getConf()); - if (schema == null) { - schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(getUTF8Bytes(SCHEMA_KEY)))); - } - HFileScanner scanner = reader.getScanner(false, false); - if (!scanner.seekTo()) { - // EOF reached - continue; - } - - do { - Cell c = scanner.getCell(); - byte[] value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength()); - valuesAsList.add(HoodieAvroUtils.bytesToAvro(value, schema)); - } while (scanner.next()); - } catch (IOException e) { - throw new HoodieException("Error reading hfile " + path + " as a dataframe", e); - } - } - return valuesAsList.stream(); - } - /** * Initializes timeline service based on the write config. * diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 12bf01736c7c..523f6dd742c4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -228,8 +228,18 @@ public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOEx /** * Convert serialized bytes back into avro record. */ - public static GenericRecord bytesToAvro(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { - BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(bytes, BINARY_DECODER.get()); + public static GenericRecord bytesToAvro(byte[] bytes, Schema writerSchema, Schema readerSchema) + throws IOException { + return bytesToAvro(bytes, 0, bytes.length, writerSchema, readerSchema); + } + + /** + * Convert serialized bytes back into avro record. + */ + public static GenericRecord bytesToAvro(byte[] bytes, int offset, int length, Schema writerSchema, + Schema readerSchema) throws IOException { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder( + bytes, offset, length, BINARY_DECODER.get()); BINARY_DECODER.set(decoder); GenericDatumReader reader = new GenericDatumReader<>(writerSchema, readerSchema); return reader.read(null, decoder); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java index 68f1a6911bbd..5bee0ec51495 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/BloomFilterFactory.java @@ -20,6 +20,8 @@ import org.apache.hudi.common.util.hash.Hash; +import java.nio.ByteBuffer; + /** * A Factory class to generate different versions of {@link BloomFilter}. */ @@ -60,4 +62,21 @@ public static BloomFilter fromString(String serString, String bloomFilterTypeCod throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode); } } + + /** + * Generates {@link BloomFilter} from a {@link ByteBuffer}. + * + * @param byteBuffer {@link ByteBuffer} containing the serialized bloom filter. + * @param bloomFilterTypeCode bloom filter type code as string. + * @return the {@link BloomFilter} thus generated from the passed in {@link ByteBuffer}. + */ + public static BloomFilter fromByteBuffer(ByteBuffer byteBuffer, String bloomFilterTypeCode) { + if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) { + return new SimpleBloomFilter(byteBuffer); + } else if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.DYNAMIC_V0.name())) { + return new HoodieDynamicBoundedBloomFilter(byteBuffer); + } else { + throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode); + } + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java index 3825b6634bea..5a4381d2ab8e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java @@ -26,8 +26,10 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.apache.hudi.io.util.IOUtils.getDataInputStream; /** * Hoodie's dynamic bloom bounded bloom filter. This is based largely on Hadoop's DynamicBloomFilter, but with a bound @@ -64,13 +66,24 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter { public HoodieDynamicBoundedBloomFilter(String serString) { // ignoring the type code for now, since we have just one version byte[] bytes = Base64CodecUtil.decode(serString); - DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes)); - try { - internalDynamicBloomFilter = new InternalDynamicBloomFilter(); - internalDynamicBloomFilter.readFields(dis); - dis.close(); + try (DataInputStream stream = new DataInputStream(new ByteArrayInputStream(bytes))) { + extractAndSetInternalBloomFilter(stream); } catch (IOException e) { - throw new HoodieIndexException("Could not deserialize BloomFilter instance", e); + throw new HoodieIndexException("Could not deserialize BloomFilter from string", e); + } + } + + /** + * Creates {@link HoodieDynamicBoundedBloomFilter} from the given {@link ByteBuffer}. + * + * @param byteBuffer {@link ByteBuffer} containing the serialized bloom filter. + */ + public HoodieDynamicBoundedBloomFilter(ByteBuffer byteBuffer) { + // ignoring the type code for now, since we have just one version + try (DataInputStream stream = getDataInputStream(Base64CodecUtil.decode(byteBuffer))) { + extractAndSetInternalBloomFilter(stream); + } catch (IOException e) { + throw new HoodieIndexException("Could not deserialize BloomFilter from byte buffer", e); } } @@ -107,5 +120,10 @@ public String serializeToString() { public BloomFilterTypeCode getBloomFilterTypeCode() { return BloomFilterTypeCode.DYNAMIC_V0; } + + private void extractAndSetInternalBloomFilter(DataInputStream dis) throws IOException { + internalDynamicBloomFilter = new InternalDynamicBloomFilter(); + internalDynamicBloomFilter.readFields(dis); + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java index 0183aedaf065..c7ada7a54fca 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/SimpleBloomFilter.java @@ -30,8 +30,10 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.apache.hudi.io.util.IOUtils.getDataInputStream; /** * A Simple Bloom filter implementation built on top of {@link InternalBloomFilter}. @@ -65,12 +67,24 @@ public SimpleBloomFilter(int numEntries, double errorRate, int hashType) { public SimpleBloomFilter(String serString) { this.filter = new InternalBloomFilter(); byte[] bytes = Base64CodecUtil.decode(serString); - DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes)); - try { - this.filter.readFields(dis); - dis.close(); + try (DataInputStream stream = new DataInputStream(new ByteArrayInputStream(bytes))) { + extractAndSetInternalBloomFilter(stream); + } catch (IOException e) { + throw new HoodieIndexException("Could not deserialize BloomFilter from string", e); + } + } + + /** + * Creates {@link SimpleBloomFilter} from the given {@link ByteBuffer}. + * + * @param byteBuffer {@link ByteBuffer} containing the serialized bloom filter. + */ + public SimpleBloomFilter(ByteBuffer byteBuffer) { + this.filter = new InternalBloomFilter(); + try (DataInputStream stream = getDataInputStream(Base64CodecUtil.decode(byteBuffer))) { + extractAndSetInternalBloomFilter(stream); } catch (IOException e) { - throw new HoodieIndexException("Could not deserialize BloomFilter instance", e); + throw new HoodieIndexException("Could not deserialize BloomFilter from byte buffer", e); } } @@ -138,4 +152,7 @@ public BloomFilterTypeCode getBloomFilterTypeCode() { return BloomFilterTypeCode.SIMPLE; } + private void extractAndSetInternalBloomFilter(DataInputStream dis) throws IOException { + this.filter.readFields(dis); + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index 27314f150dc0..82905ff95aab 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -33,10 +33,16 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.hfile.HFileReader; +import org.apache.hudi.io.hfile.HFileReaderImpl; +import org.apache.hudi.io.hfile.Key; +import org.apache.hudi.io.hfile.UTF8StringKey; import org.apache.hudi.io.storage.HoodieHFileUtils; import org.apache.hudi.metadata.HoodieTableMetadata; +import org.apache.hudi.io.util.IOUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.CellComparatorImpl; @@ -94,7 +100,8 @@ public class HFileBootstrapIndex extends BootstrapIndex { private static final String HFILE_CELL_KEY_SUFFIX_PART = "//LATEST_TIMESTAMP/Put/vlen"; // Additional Metadata written to HFiles. - public static final byte[] INDEX_INFO_KEY = Bytes.toBytes("INDEX_INFO"); + public static final String INDEX_INFO_KEY_STRING = "INDEX_INFO"; + public static final byte[] INDEX_INFO_KEY = Bytes.toBytes(INDEX_INFO_KEY_STRING); private final boolean isPresent; @@ -165,29 +172,6 @@ private static Path fileIdIndexPath(HoodieTableMetaClient metaClient) { HoodieFileFormat.HFILE.getFileExtension())); } - /** - * HFile stores cell key in the format example : "2020/03/18//LATEST_TIMESTAMP/Put/vlen=3692/seqid=0". - * This API returns only the user key part from it. - * @param cellKey HFIle Cell Key - * @return - */ - private static String getUserKeyFromCellKey(String cellKey) { - int hfileSuffixBeginIndex = cellKey.lastIndexOf(HFILE_CELL_KEY_SUFFIX_PART); - return cellKey.substring(0, hfileSuffixBeginIndex); - } - - /** - * Helper method to create HFile Reader. - * - * @param hFilePath File Path - * @param conf Configuration - * @param fileSystem File System - */ - private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) { - LOG.info("Opening HFile for reading :" + hFilePath); - return HoodieHFileUtils.createHFileReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf), conf); - } - @Override public BootstrapIndex.IndexReader createReader() { return new HFileBootstrapIndexReader(metaClient); @@ -229,6 +213,190 @@ public static class HFileBootstrapIndexReader extends BootstrapIndex.IndexReader private final String indexByPartitionPath; private final String indexByFileIdPath; + // Index Readers + private transient HFileReader indexByPartitionReader; + private transient HFileReader indexByFileIdReader; + + // Bootstrap Index Info + private transient HoodieBootstrapIndexInfo bootstrapIndexInfo; + + public HFileBootstrapIndexReader(HoodieTableMetaClient metaClient) { + super(metaClient); + Path indexByPartitionPath = partitionIndexPath(metaClient); + Path indexByFilePath = fileIdIndexPath(metaClient); + this.indexByPartitionPath = indexByPartitionPath.toString(); + this.indexByFileIdPath = indexByFilePath.toString(); + initIndexInfo(); + this.bootstrapBasePath = bootstrapIndexInfo.getBootstrapBasePath(); + LOG.info("Loaded HFileBasedBootstrapIndex with source base path :" + bootstrapBasePath); + } + + /** + * Helper method to create native HFile Reader. + * + * @param hFilePath file path. + * @param fileSystem file system. + */ + private static HFileReader createReader(String hFilePath, FileSystem fileSystem) throws IOException { + LOG.info("Opening HFile for reading :" + hFilePath); + Path path = new Path(hFilePath); + long fileSize = fileSystem.getFileStatus(path).getLen(); + FSDataInputStream stream = fileSystem.open(path); + return new HFileReaderImpl(stream, fileSize); + } + + private synchronized void initIndexInfo() { + if (bootstrapIndexInfo == null) { + try { + bootstrapIndexInfo = fetchBootstrapIndexInfo(); + } catch (IOException ioe) { + throw new HoodieException(ioe.getMessage(), ioe); + } + } + } + + private HoodieBootstrapIndexInfo fetchBootstrapIndexInfo() throws IOException { + return TimelineMetadataUtils.deserializeAvroMetadata( + partitionIndexReader().getMetaInfo(new UTF8StringKey(INDEX_INFO_KEY_STRING)).get(), + HoodieBootstrapIndexInfo.class); + } + + private synchronized HFileReader partitionIndexReader() throws IOException { + if (indexByPartitionReader == null) { + LOG.info("Opening partition index :" + indexByPartitionPath); + this.indexByPartitionReader = createReader(indexByPartitionPath, metaClient.getFs()); + } + return indexByPartitionReader; + } + + private synchronized HFileReader fileIdIndexReader() throws IOException { + if (indexByFileIdReader == null) { + LOG.info("Opening fileId index :" + indexByFileIdPath); + this.indexByFileIdReader = createReader(indexByFileIdPath, metaClient.getFs()); + } + return indexByFileIdReader; + } + + @Override + public List getIndexedPartitionPaths() { + try { + return getAllKeys(partitionIndexReader(), HFileBootstrapIndex::getPartitionFromKey); + } catch (IOException e) { + throw new HoodieIOException("Unable to read indexed partition paths.", e); + } + } + + @Override + public List getIndexedFileGroupIds() { + try { + return getAllKeys(fileIdIndexReader(), HFileBootstrapIndex::getFileGroupFromKey); + } catch (IOException e) { + throw new HoodieIOException("Unable to read indexed file group IDs.", e); + } + } + + private List getAllKeys(HFileReader reader, Function converter) { + List keys = new ArrayList<>(); + try { + boolean available = reader.seekTo(); + while (available) { + keys.add(converter.apply(reader.getKeyValue().get().getKey().getContentInString())); + available = reader.next(); + } + } catch (IOException ioe) { + throw new HoodieIOException(ioe.getMessage(), ioe); + } + + return keys; + } + + @Override + public List getSourceFileMappingForPartition(String partition) { + try { + HFileReader reader = partitionIndexReader(); + Key lookupKey = new UTF8StringKey(getPartitionKey(partition)); + reader.seekTo(); + if (reader.seekTo(lookupKey) == HFileReader.SEEK_TO_FOUND) { + org.apache.hudi.io.hfile.KeyValue keyValue = reader.getKeyValue().get(); + byte[] valBytes = IOUtils.copy( + keyValue.getBytes(), keyValue.getValueOffset(), keyValue.getValueLength()); + HoodieBootstrapPartitionMetadata metadata = + TimelineMetadataUtils.deserializeAvroMetadata(valBytes, HoodieBootstrapPartitionMetadata.class); + return metadata.getFileIdToBootstrapFile().entrySet().stream() + .map(e -> new BootstrapFileMapping(bootstrapBasePath, metadata.getBootstrapPartitionPath(), + partition, e.getValue(), e.getKey())).collect(Collectors.toList()); + } else { + LOG.warn("No value found for partition key (" + partition + ")"); + return new ArrayList<>(); + } + } catch (IOException ioe) { + throw new HoodieIOException(ioe.getMessage(), ioe); + } + } + + @Override + public String getBootstrapBasePath() { + return bootstrapBasePath; + } + + @Override + public Map getSourceFileMappingForFileIds( + List ids) { + Map result = new HashMap<>(); + // Arrange input Keys in sorted order for 1 pass scan + List fileGroupIds = new ArrayList<>(ids); + Collections.sort(fileGroupIds); + try { + HFileReader reader = fileIdIndexReader(); + reader.seekTo(); + for (HoodieFileGroupId fileGroupId : fileGroupIds) { + Key lookupKey = new UTF8StringKey(getFileGroupKey(fileGroupId)); + if (reader.seekTo(lookupKey) == HFileReader.SEEK_TO_FOUND) { + org.apache.hudi.io.hfile.KeyValue keyValue = reader.getKeyValue().get(); + byte[] valBytes = IOUtils.copy( + keyValue.getBytes(), keyValue.getValueOffset(), keyValue.getValueLength()); + HoodieBootstrapFilePartitionInfo fileInfo = TimelineMetadataUtils.deserializeAvroMetadata(valBytes, + HoodieBootstrapFilePartitionInfo.class); + BootstrapFileMapping mapping = new BootstrapFileMapping(bootstrapBasePath, + fileInfo.getBootstrapPartitionPath(), fileInfo.getPartitionPath(), fileInfo.getBootstrapFileStatus(), + fileGroupId.getFileId()); + result.put(fileGroupId, mapping); + } + } + } catch (IOException ioe) { + throw new HoodieIOException(ioe.getMessage(), ioe); + } + return result; + } + + @Override + public void close() { + try { + if (indexByPartitionReader != null) { + indexByPartitionReader.close(); + indexByPartitionReader = null; + } + if (indexByFileIdReader != null) { + indexByFileIdReader.close(); + indexByFileIdReader = null; + } + } catch (IOException ioe) { + throw new HoodieIOException(ioe.getMessage(), ioe); + } + } + } + + /** + * HBase HFile reader based Index Reader. This is deprecated. + */ + public static class HBaseHFileBootstrapIndexReader extends BootstrapIndex.IndexReader { + + // Base Path of external files. + private final String bootstrapBasePath; + // Well Known Paths for indices + private final String indexByPartitionPath; + private final String indexByFileIdPath; + // Index Readers private transient HFile.Reader indexByPartitionReader; private transient HFile.Reader indexByFileIdReader; @@ -236,7 +404,7 @@ public static class HFileBootstrapIndexReader extends BootstrapIndex.IndexReader // Bootstrap Index Info private transient HoodieBootstrapIndexInfo bootstrapIndexInfo; - public HFileBootstrapIndexReader(HoodieTableMetaClient metaClient) { + public HBaseHFileBootstrapIndexReader(HoodieTableMetaClient metaClient) { super(metaClient); Path indexByPartitionPath = partitionIndexPath(metaClient); Path indexByFilePath = fileIdIndexPath(metaClient); @@ -247,6 +415,30 @@ public HFileBootstrapIndexReader(HoodieTableMetaClient metaClient) { LOG.info("Loaded HFileBasedBootstrapIndex with source base path :" + bootstrapBasePath); } + /** + * HFile stores cell key in the format example : "2020/03/18//LATEST_TIMESTAMP/Put/vlen=3692/seqid=0". + * This API returns only the user key part from it. + * + * @param cellKey HFIle Cell Key + * @return + */ + private static String getUserKeyFromCellKey(String cellKey) { + int hfileSuffixBeginIndex = cellKey.lastIndexOf(HFILE_CELL_KEY_SUFFIX_PART); + return cellKey.substring(0, hfileSuffixBeginIndex); + } + + /** + * Helper method to create HFile Reader. + * + * @param hFilePath File Path + * @param conf Configuration + * @param fileSystem File System + */ + private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) { + LOG.info("Opening HFile for reading :" + hFilePath); + return HoodieHFileUtils.createHFileReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf), conf); + } + private void initIndexInfo() { synchronized (this) { if (null == bootstrapIndexInfo) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java index c79d3711c5a9..daba6f9203eb 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java @@ -34,6 +34,7 @@ public enum Names { SPARK_DATASOURCE("Spark Datasource Configs"), FLINK_SQL("Flink Sql Configs"), WRITE_CLIENT("Write Client Configs"), + READER("Reader Configs"), META_SYNC("Metastore and Catalog Sync Configs"), METRICS("Metrics Configs"), RECORD_PAYLOAD("Record Payload Config"), diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieReaderConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieReaderConfig.java new file mode 100644 index 000000000000..1574ec18f47f --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieReaderConfig.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.config; + +import javax.annotation.concurrent.Immutable; + +/** + * Configurations for reading a file group + */ +@Immutable +@ConfigClassProperty(name = "Reader Configs", + groupName = ConfigGroups.Names.READER, + description = "Configurations that control file group reading.") +public class HoodieReaderConfig { + public static final ConfigProperty USE_NATIVE_HFILE_READER = ConfigProperty + .key("_hoodie.hfile.use.native.reader") + .defaultValue(true) + .markAdvanced() + .sinceVersion("0.15.0") + .withDocumentation("When enabled, the native HFile reader is used to read HFiles. This is an internal config."); + +} diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java index 02b1ef352515..86a71ae10754 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java @@ -41,8 +41,9 @@ import org.apache.hudi.internal.schema.InternalSchema; import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager; import org.apache.hudi.internal.schema.utils.SerDeHelper; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; import org.apache.hudi.io.storage.HoodieAvroOrcReader; +import org.apache.hudi.io.storage.HoodieFileReader; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.util.Lazy; import org.apache.avro.JsonProperties; @@ -51,7 +52,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.parquet.avro.AvroSchemaConverter; import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.hadoop.ParquetFileReader; @@ -73,6 +73,7 @@ import static org.apache.hudi.avro.AvroSchemaUtils.appendFieldsToSchema; import static org.apache.hudi.avro.AvroSchemaUtils.containsFieldInSchema; import static org.apache.hudi.avro.AvroSchemaUtils.createNullableSchema; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; /** * Helper class to read schema from data files and log files and to convert it between different formats. @@ -338,9 +339,10 @@ private MessageType readSchemaFromHFileBaseFile(Path hFilePath) throws IOExcepti LOG.info("Reading schema from " + hFilePath); FileSystem fs = metaClient.getRawFs(); - CacheConfig cacheConfig = new CacheConfig(fs.getConf()); - try (HoodieAvroHFileReader hFileReader = new HoodieAvroHFileReader(fs.getConf(), hFilePath, cacheConfig)) { - return convertAvroSchemaToParquet(hFileReader.getSchema()); + try (HoodieFileReader fileReader = + HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, fs.getConf(), hFilePath)) { + return convertAvroSchemaToParquet(fileReader.getSchema()); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java index 32177c82f9ea..27255c7b905e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java @@ -31,6 +31,7 @@ import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType; import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock; +import org.apache.hudi.common.util.ConfigUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.CorruptedLogFileException; import org.apache.hudi.exception.HoodieIOException; @@ -61,6 +62,7 @@ import java.util.Map; import java.util.Objects; +import static org.apache.hudi.common.config.HoodieReaderConfig.USE_NATIVE_HFILE_READER; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.common.util.ValidationUtils.checkState; @@ -207,9 +209,10 @@ private HoodieLogBlock readBlock() throws IOException { case HFILE_DATA_BLOCK: checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION)); - - return new HoodieHFileDataBlock(() -> getFSDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, - Option.ofNullable(readerSchema), header, footer, enableRecordLookups, logFile.getPath()); + return new HoodieHFileDataBlock( + () -> getFSDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, + Option.ofNullable(readerSchema), header, footer, enableRecordLookups, logFile.getPath(), + ConfigUtils.getBooleanWithAltKeys(fs.getConf(), USE_NATIVE_HFILE_READER)); case PARQUET_DATA_BLOCK: checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java index 874f7ebab25a..64781bdb55b6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java @@ -20,6 +20,8 @@ import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; +import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; +import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockContentLocation; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieIOException; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index 34d69eb2288b..6b06bc51b2f6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -19,17 +19,25 @@ package org.apache.hudi.common.table.log.block; import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.fs.inline.InLineFSUtils; +import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; +import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType; +import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockContentLocation; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; +import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; +import org.apache.hudi.io.storage.HoodieFileReader; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; +import org.apache.hudi.io.storage.HoodieHBaseAvroHFileReader; import org.apache.hudi.io.storage.HoodieHBaseKVComparator; import org.apache.avro.Schema; @@ -75,6 +83,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock { // This path is used for constructing HFile reader context, which should not be // interpreted as the actual file path for the HFile data blocks private final Path pathForReader; + private final HoodieConfig hFileReaderConfig; public HoodieHFileDataBlock(Supplier inputStreamSupplier, Option content, @@ -84,19 +93,24 @@ public HoodieHFileDataBlock(Supplier inputStreamSupplier, Map header, Map footer, boolean enablePointLookups, - Path pathForReader) { - super(content, inputStreamSupplier, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, HoodieAvroHFileReader.KEY_FIELD_NAME, enablePointLookups); + Path pathForReader, + boolean useNativeHFileReader) { + super(content, inputStreamSupplier, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, + header, footer, HoodieAvroHFileReaderImplBase.KEY_FIELD_NAME, enablePointLookups); this.compressionAlgorithm = Option.empty(); this.pathForReader = pathForReader; + this.hFileReaderConfig = getHFileReaderConfig(useNativeHFileReader); } public HoodieHFileDataBlock(List records, Map header, Compression.Algorithm compressionAlgorithm, - Path pathForReader) { - super(records, header, new HashMap<>(), HoodieAvroHFileReader.KEY_FIELD_NAME); + Path pathForReader, + boolean useNativeHFileReader) { + super(records, header, new HashMap<>(), HoodieHBaseAvroHFileReader.KEY_FIELD_NAME); this.compressionAlgorithm = Option.of(compressionAlgorithm); this.pathForReader = pathForReader; + this.hFileReaderConfig = getHFileReaderConfig(useNativeHFileReader); } @Override @@ -162,7 +176,8 @@ protected byte[] serializeRecords(List records) throws IOException } }); - writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.SCHEMA_KEY), getUTF8Bytes(getSchema().toString())); + writer.appendFileInfo( + getUTF8Bytes(HoodieAvroHFileReaderImplBase.SCHEMA_KEY), getUTF8Bytes(getSchema().toString())); writer.close(); ostream.flush(); @@ -178,8 +193,11 @@ protected ClosableIterator> deserializeRecords(byte[] conten Configuration hadoopConf = FSUtils.buildInlineConf(getBlockContentLocation().get().getHadoopConf()); FileSystem fs = HadoopFSUtils.getFs(pathForReader.toString(), hadoopConf); // Read the content - try (HoodieAvroHFileReader reader = new HoodieAvroHFileReader(hadoopConf, pathForReader, new CacheConfig(hadoopConf), - fs, content, Option.of(getSchemaFromHeader()))) { + try (HoodieFileReader reader = + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getContentReader( + + hFileReaderConfig, hadoopConf, pathForReader, HoodieFileFormat.HFILE, fs, content, + Option.of(getSchemaFromHeader()))) { return unsafeCast(reader.getRecordIterator(readerSchema)); } } @@ -199,9 +217,10 @@ protected ClosableIterator> lookupRecords(List sorte blockContentLoc.getContentPositionInLogFile(), blockContentLoc.getBlockSize()); - try (final HoodieAvroHFileReader reader = - new HoodieAvroHFileReader(inlineConf, inlinePath, new CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf), - Option.of(getSchemaFromHeader()))) { + try (final HoodieAvroHFileReaderImplBase reader = (HoodieAvroHFileReaderImplBase) + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + hFileReaderConfig, inlineConf, inlinePath, HoodieFileFormat.HFILE, + Option.of(getSchemaFromHeader()))) { // Get writer's schema from the header final ClosableIterator> recordIterator = fullKey ? reader.getRecordsByKeysIterator(sortedKeys, readerSchema) : reader.getRecordsByKeyPrefixIterator(sortedKeys, readerSchema); @@ -227,4 +246,11 @@ private void printRecord(String msg, byte[] bs, Schema schema) throws IOExceptio byte[] json = HoodieAvroUtils.avroToJson(record, true); LOG.error(String.format("%s: %s", msg, new String(json))); } + + private HoodieConfig getHFileReaderConfig(boolean useNativeHFileReader) { + HoodieConfig config = new HoodieConfig(); + config.setValue( + HoodieReaderConfig.USE_NATIVE_HFILE_READER, Boolean.toString(useNativeHFileReader)); + return config; + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java index 9f4c989f0ef0..b026b85c3a3b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java @@ -52,6 +52,7 @@ import static org.apache.hudi.common.config.HoodieStorageConfig.PARQUET_MAX_FILE_SIZE; import static org.apache.hudi.common.config.HoodieStorageConfig.PARQUET_PAGE_SIZE; import static org.apache.hudi.common.model.HoodieFileFormat.PARQUET; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; /** * HoodieParquetDataBlock contains a list of records serialized using Parquet. @@ -158,7 +159,8 @@ protected ClosableIterator> readRecordsFromBlockPayload(Hood Schema writerSchema = new Schema.Parser().parse(this.getLogBlockHeader().get(HeaderMetadataType.SCHEMA)); - ClosableIterator> iterator = HoodieFileReaderFactory.getReaderFactory(type).getFileReader(inlineConf, inlineLogFilePath, PARQUET) + ClosableIterator> iterator = HoodieFileReaderFactory.getReaderFactory(type) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, inlineConf, inlineLogFilePath, PARQUET, Option.empty()) .getRecordIterator(writerSchema, readerSchema); return iterator; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java index 08ba298d2302..663a070620c4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java @@ -18,6 +18,7 @@ package org.apache.hudi.common.util; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Base64; @@ -38,6 +39,16 @@ public static byte[] decode(String encodedString) { return Base64.getDecoder().decode(getUTF8Bytes(encodedString)); } + /** + * Decodes data from the input {@link ByteBuffer} into using the encoding scheme. + * + * @param byteBuffer input data in byte buffer to be decoded. + * @return A newly-allocated {@link ByteBuffer} containing the decoded bytes. + */ + public static ByteBuffer decode(ByteBuffer byteBuffer) { + return Base64.getDecoder().decode(byteBuffer); + } + /** * Encodes all bytes from the specified byte array into String using StandardCharsets.UTF_8. * diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java index 2dad6f979462..39380f1de3b6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ConfigUtils.java @@ -19,6 +19,7 @@ package org.apache.hudi.common.util; import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.model.HoodiePayloadProps; import org.apache.hudi.common.table.HoodieTableConfig; @@ -37,6 +38,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.apache.hudi.common.config.HoodieReaderConfig.USE_NATIVE_HFILE_READER; + public class ConfigUtils { public static final String STREAMER_CONFIG_PREFIX = "hoodie.streamer."; @Deprecated @@ -56,6 +59,8 @@ public class ConfigUtils { */ public static final String TABLE_SERDE_PATH = "path"; + public static final HoodieConfig DEFAULT_HUDI_CONFIG_FOR_READER = new HoodieConfig(); + private static final Logger LOG = LoggerFactory.getLogger(ConfigUtils.class); /** @@ -274,11 +279,11 @@ public static void checkRequiredConfigProperties(TypedProperties props, * Gets the raw value for a {@link ConfigProperty} config from properties. The key and * alternative keys are used to fetch the config. * - * @param props Configs in {@link TypedProperties}. + * @param props Configs in {@link Properties}. * @param configProperty {@link ConfigProperty} config to fetch. * @return {@link Option} of value if the config exists; empty {@link Option} otherwise. */ - public static Option getRawValueWithAltKeys(TypedProperties props, + public static Option getRawValueWithAltKeys(Properties props, ConfigProperty configProperty) { if (props.containsKey(configProperty.key())) { return Option.ofNullable(props.get(configProperty.key())); @@ -294,6 +299,32 @@ public static Option getRawValueWithAltKeys(TypedProperties props, return Option.empty(); } + /** + * Gets the raw value for a {@link ConfigProperty} config from Hadoop configuration. The key and + * alternative keys are used to fetch the config. + * + * @param conf Configs in Hadoop {@link Configuration}. + * @param configProperty {@link ConfigProperty} config to fetch. + * @return {@link Option} of value if the config exists; empty {@link Option} otherwise. + */ + public static Option getRawValueWithAltKeys(Configuration conf, + ConfigProperty configProperty) { + String value = conf.get(configProperty.key()); + if (value != null) { + return Option.of(value); + } + for (String alternative : configProperty.getAlternatives()) { + String altValue = conf.get(alternative); + if (altValue != null) { + LOG.warn(String.format("The configuration key '%s' has been deprecated " + + "and may be removed in the future. Please use the new key '%s' instead.", + alternative, configProperty.key())); + return Option.of(altValue); + } + } + return Option.empty(); + } + /** * Gets the String value for a {@link ConfigProperty} config from properties. The key and * alternative keys are used to fetch the config. If the config is not found, an @@ -407,12 +438,12 @@ public static String getStringWithAltKeys(TypedProperties props, * alternative keys are used to fetch the config. The default value of {@link ConfigProperty} * config, if exists, is returned if the config is not found in the properties. * - * @param props Configs in {@link TypedProperties}. + * @param props Configs in {@link Properties}. * @param configProperty {@link ConfigProperty} config to fetch. * @return boolean value if the config exists; default boolean value if the config does not exist * and there is default value defined in the {@link ConfigProperty} config; {@code false} otherwise. */ - public static boolean getBooleanWithAltKeys(TypedProperties props, + public static boolean getBooleanWithAltKeys(Properties props, ConfigProperty configProperty) { Option rawValue = getRawValueWithAltKeys(props, configProperty); boolean defaultValue = configProperty.hasDefaultValue() @@ -420,6 +451,24 @@ public static boolean getBooleanWithAltKeys(TypedProperties props, return rawValue.map(v -> Boolean.parseBoolean(v.toString())).orElse(defaultValue); } + /** + * Gets the boolean value for a {@link ConfigProperty} config from Hadoop configuration. The key and + * alternative keys are used to fetch the config. The default value of {@link ConfigProperty} + * config, if exists, is returned if the config is not found in the configuration. + * + * @param conf Configs in Hadoop {@link Configuration}. + * @param configProperty {@link ConfigProperty} config to fetch. + * @return boolean value if the config exists; default boolean value if the config does not exist + * and there is default value defined in the {@link ConfigProperty} config; {@code false} otherwise. + */ + public static boolean getBooleanWithAltKeys(Configuration conf, + ConfigProperty configProperty) { + Option rawValue = getRawValueWithAltKeys(conf, configProperty); + boolean defaultValue = configProperty.hasDefaultValue() + ? Boolean.parseBoolean(configProperty.defaultValue().toString()) : false; + return rawValue.map(Boolean::parseBoolean).orElse(defaultValue); + } + /** * Gets the integer value for a {@link ConfigProperty} config from properties. The key and * alternative keys are used to fetch the config. The default value of {@link ConfigProperty} @@ -498,4 +547,12 @@ public static Set getAllConfigKeys(List> configPr return keys.stream(); }).collect(Collectors.toSet()); } + + public static HoodieConfig getReaderConfigs(Configuration conf) { + HoodieConfig config = new HoodieConfig(); + config.setAll(DEFAULT_HUDI_CONFIG_FOR_READER.getProps()); + config.setValue(USE_NATIVE_HFILE_READER, + Boolean.toString(ConfigUtils.getBooleanWithAltKeys(conf, USE_NATIVE_HFILE_READER))); + return config; + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java index 8edb0dd9f560..0a511d10b031 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileReaderFactory.java @@ -20,21 +20,45 @@ import org.apache.hudi.common.util.Option; +import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import java.io.IOException; public class HoodieAvroFileReaderFactory extends HoodieFileReaderFactory { - protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) { return new HoodieAvroParquetReader(conf, path); } - protected HoodieFileReader newHFileFileReader(Configuration conf, Path path) throws IOException { + protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + Configuration conf, + Path path, + Option schemaOption) throws IOException { + if (useNativeHFileReader) { + return new HoodieNativeAvroHFileReader(conf, path, schemaOption); + } + CacheConfig cacheConfig = new CacheConfig(conf); + if (schemaOption.isPresent()) { + return new HoodieHBaseAvroHFileReader(conf, path, cacheConfig, path.getFileSystem(conf), schemaOption); + } + return new HoodieHBaseAvroHFileReader(conf, path, cacheConfig); + } + + protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + Configuration conf, + Path path, + FileSystem fs, + byte[] content, + Option schemaOption) + throws IOException { + if (useNativeHFileReader) { + return new HoodieNativeAvroHFileReader(conf, content, schemaOption); + } CacheConfig cacheConfig = new CacheConfig(conf); - return new HoodieAvroHFileReader(conf, path, cacheConfig); + return new HoodieHBaseAvroHFileReader(conf, path, cacheConfig, fs, content, schemaOption); } @Override diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java index 471ab149fa58..2aac99ab9647 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroFileWriterFactory.java @@ -86,9 +86,11 @@ protected HoodieFileWriter newHFileFileWriter( TaskContextSupplier taskContextSupplier) throws IOException { BloomFilter filter = createBloomFilter(config); HoodieHFileConfig hfileConfig = new HoodieHFileConfig(conf, - Compression.Algorithm.valueOf(config.getString(HoodieStorageConfig.HFILE_COMPRESSION_ALGORITHM_NAME)), + Compression.Algorithm.valueOf( + config.getString(HoodieStorageConfig.HFILE_COMPRESSION_ALGORITHM_NAME)), config.getInt(HoodieStorageConfig.HFILE_BLOCK_SIZE), - config.getLong(HoodieStorageConfig.HFILE_MAX_FILE_SIZE), HoodieAvroHFileReader.KEY_FIELD_NAME, + config.getLong(HoodieStorageConfig.HFILE_MAX_FILE_SIZE), + HoodieAvroHFileReaderImplBase.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR); return new HoodieAvroHFileWriter(instantTime, path, hfileConfig, schema, taskContextSupplier, config.getBoolean(HoodieTableConfig.POPULATE_META_FIELDS)); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java new file mode 100644 index 000000000000..60e17c47aa3c --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.apache.hudi.common.util.CollectionUtils.toStream; +import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; + +public abstract class HoodieAvroHFileReaderImplBase extends HoodieAvroFileReaderBase + implements HoodieSeekingFileReader { + // TODO HoodieHFileReader right now tightly coupled to MT, we should break that coupling + public static final String SCHEMA_KEY = "schema"; + public static final String KEY_BLOOM_FILTER_META_BLOCK = "bloomFilter"; + public static final String KEY_BLOOM_FILTER_TYPE_CODE = "bloomFilterTypeCode"; + + public static final String KEY_FIELD_NAME = "key"; + public static final String KEY_MIN_RECORD = "minRecordKey"; + public static final String KEY_MAX_RECORD = "maxRecordKey"; + + /** + * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY + *

    + * Reads all the records with given schema + */ + public static List readAllRecords(HoodieAvroFileReaderBase reader) + throws IOException { + Schema schema = reader.getSchema(); + return toStream(reader.getIndexedRecordIterator(schema)) + .collect(Collectors.toList()); + } + + /** + * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY + *

    + * Reads all the records with given schema and filtering keys. + */ + public static List readRecords(HoodieAvroHFileReaderImplBase reader, + List keys) throws IOException { + return readRecords(reader, keys, reader.getSchema()); + } + + /** + * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY + *

    + * Reads all the records with given schema and filtering keys. + */ + public static List readRecords(HoodieAvroHFileReaderImplBase reader, + List keys, + Schema schema) throws IOException { + Collections.sort(keys); + return toStream(reader.getIndexedRecordsByKeysIterator(keys, schema)) + .collect(Collectors.toList()); + } + + public abstract ClosableIterator getIndexedRecordsByKeysIterator(List keys, + Schema readerSchema) + throws IOException; + + public abstract ClosableIterator getIndexedRecordsByKeyPrefixIterator( + List sortedKeyPrefixes, Schema readerSchema) throws IOException; + + protected static GenericRecord deserialize(final byte[] keyBytes, + final byte[] valueBytes, + Schema writerSchema, + Schema readerSchema) throws IOException { + return deserialize( + keyBytes, 0, keyBytes.length, valueBytes, 0, valueBytes.length, writerSchema, readerSchema); + } + + protected static GenericRecord deserialize(final byte[] keyBytes, int keyOffset, int keyLength, + final byte[] valueBytes, int valueOffset, int valueLength, + Schema writerSchema, + Schema readerSchema) throws IOException { + GenericRecord record = HoodieAvroUtils.bytesToAvro( + valueBytes, valueOffset, valueLength, writerSchema, readerSchema); + + getKeySchema(readerSchema).ifPresent(keyFieldSchema -> { + final Object keyObject = record.get(keyFieldSchema.pos()); + if (keyObject != null && keyObject.toString().isEmpty()) { + record.put(keyFieldSchema.pos(), getStringFromUTF8Bytes(keyBytes, keyOffset, keyLength)); + } + }); + + return record; + } + + private static Option getKeySchema(Schema schema) { + return Option.ofNullable(schema.getField(KEY_FIELD_NAME)); + } + + static class SeekableByteArrayInputStream extends ByteBufferBackedInputStream + implements Seekable, PositionedReadable { + public SeekableByteArrayInputStream(byte[] buf) { + super(buf); + } + + @Override + public long getPos() throws IOException { + return getPosition(); + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) throws IOException { + return copyFrom(position, buffer, offset, length); + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + read(position, buffer, 0, buffer.length); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { + read(position, buffer, offset, length); + } + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java index b274abdbc2c7..a769828b78ec 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileWriter.java @@ -103,16 +103,19 @@ public HoodieAvroHFileWriter(String instantTime, Path file, HoodieHFileConfig hf .withCellComparator(hfileConfig.getHFileComparator()) .build(); - conf.set(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, String.valueOf(hfileConfig.shouldPrefetchBlocksOnOpen())); + conf.set(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, + String.valueOf(hfileConfig.shouldPrefetchBlocksOnOpen())); conf.set(HColumnDescriptor.CACHE_DATA_IN_L1, String.valueOf(hfileConfig.shouldCacheDataInL1())); - conf.set(DROP_BEHIND_CACHE_COMPACTION_KEY, String.valueOf(hfileConfig.shouldDropBehindCacheCompaction())); + conf.set(DROP_BEHIND_CACHE_COMPACTION_KEY, + String.valueOf(hfileConfig.shouldDropBehindCacheCompaction())); CacheConfig cacheConfig = new CacheConfig(conf); this.writer = HFile.getWriterFactory(conf, cacheConfig) .withPath(this.fs, this.file) .withFileContext(context) .create(); - writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.SCHEMA_KEY), getUTF8Bytes(schema.toString())); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReaderImplBase.SCHEMA_KEY), + getUTF8Bytes(schema.toString())); this.prevRecordKey = ""; } @@ -179,20 +182,23 @@ public void close() throws IOException { if (maxRecordKey == null) { maxRecordKey = ""; } - writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.KEY_MIN_RECORD), getUTF8Bytes(minRecordKey)); - writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.KEY_MAX_RECORD), getUTF8Bytes(maxRecordKey)); - writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReader.KEY_BLOOM_FILTER_TYPE_CODE), + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReaderImplBase.KEY_MIN_RECORD), + getUTF8Bytes(minRecordKey)); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReaderImplBase.KEY_MAX_RECORD), + getUTF8Bytes(maxRecordKey)); + writer.appendFileInfo(getUTF8Bytes(HoodieAvroHFileReaderImplBase.KEY_BLOOM_FILTER_TYPE_CODE), getUTF8Bytes(bloomFilter.getBloomFilterTypeCode().toString())); - writer.appendMetaBlock(HoodieAvroHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() { - @Override - public void write(DataOutput out) throws IOException { - out.write(getUTF8Bytes(bloomFilter.serializeToString())); - } - - @Override - public void readFields(DataInput in) throws IOException { - } - }); + writer.appendMetaBlock(HoodieAvroHFileReaderImplBase.KEY_BLOOM_FILTER_META_BLOCK, + new Writable() { + @Override + public void write(DataOutput out) throws IOException { + out.write(getUTF8Bytes(bloomFilter.serializeToString())); + } + + @Override + public void readFields(DataInput in) throws IOException { + } + }); } writer.close(); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java index 5fe797f9797f..f4b4bedc468b 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReaderFactory.java @@ -18,6 +18,8 @@ package org.apache.hudi.io.storage; +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieRecord; @@ -25,7 +27,9 @@ import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.exception.HoodieException; +import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; @@ -45,7 +49,8 @@ public static HoodieFileReaderFactory getReaderFactory(HoodieRecord.HoodieRecord return new HoodieAvroFileReaderFactory(); case SPARK: try { - Class clazz = ReflectionUtils.getClass("org.apache.hudi.io.storage.HoodieSparkFileReaderFactory"); + Class clazz = + ReflectionUtils.getClass("org.apache.hudi.io.storage.HoodieSparkFileReaderFactory"); return (HoodieFileReaderFactory) clazz.newInstance(); } catch (IllegalArgumentException | IllegalAccessException | InstantiationException e) { throw new HoodieException("Unable to create hoodie spark file writer factory", e); @@ -55,29 +60,71 @@ public static HoodieFileReaderFactory getReaderFactory(HoodieRecord.HoodieRecord } } - public HoodieFileReader getFileReader(Configuration conf, Path path) throws IOException { + public HoodieFileReader getFileReader(HoodieConfig hoodieConfig, Configuration conf, Path path) throws IOException { final String extension = FSUtils.getFileExtension(path.toString()); if (PARQUET.getFileExtension().equals(extension)) { - return newParquetFileReader(conf, path); + return getFileReader(hoodieConfig, conf, path, PARQUET, Option.empty()); } if (HFILE.getFileExtension().equals(extension)) { - return newHFileFileReader(conf, path); + return getFileReader(hoodieConfig, conf, path, HFILE, Option.empty()); } if (ORC.getFileExtension().equals(extension)) { - return newOrcFileReader(conf, path); + return getFileReader(hoodieConfig, conf, path, ORC, Option.empty()); } throw new UnsupportedOperationException(extension + " format not supported yet."); } - public HoodieFileReader getFileReader(Configuration conf, Path path, HoodieFileFormat format) throws IOException { - return this.newParquetFileReader(conf, path); + public HoodieFileReader getFileReader(HoodieConfig hoodieConfig, Configuration conf, Path path, HoodieFileFormat format) + throws IOException { + return getFileReader(hoodieConfig, conf, path, format, Option.empty()); + } + + public HoodieFileReader getFileReader(HoodieConfig hoodieConfig, + Configuration conf, Path path, HoodieFileFormat format, + Option schemaOption) throws IOException { + switch (format) { + case PARQUET: + return this.newParquetFileReader(conf, path); + case HFILE: + boolean useNativeHFileReader = + hoodieConfig.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER); + return newHFileFileReader(useNativeHFileReader, conf, path, schemaOption); + case ORC: + return newOrcFileReader(conf, path); + default: + throw new UnsupportedOperationException(format + " format not supported yet."); + } + } + + public HoodieFileReader getContentReader(HoodieConfig config, + Configuration conf, Path path, HoodieFileFormat format, + FileSystem fs, byte[] content, + Option schemaOption) throws IOException { + switch (format) { + case HFILE: + boolean useNativeHFileReader = + config.getBooleanOrDefault(HoodieReaderConfig.USE_NATIVE_HFILE_READER); + return newHFileFileReader(useNativeHFileReader, conf, path, fs, content, schemaOption); + default: + throw new UnsupportedOperationException(format + " format not supported yet."); + } } protected HoodieFileReader newParquetFileReader(Configuration conf, Path path) { throw new UnsupportedOperationException(); } - protected HoodieFileReader newHFileFileReader(Configuration conf, Path path) throws IOException { + protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + Configuration conf, Path path, + Option schemaOption) throws IOException { + throw new UnsupportedOperationException(); + } + + protected HoodieFileReader newHFileFileReader(boolean useNativeHFileReader, + Configuration conf, Path path, + FileSystem fs, + byte[] content, Option schemaOption) + throws IOException { throw new UnsupportedOperationException(); } @@ -85,7 +132,10 @@ protected HoodieFileReader newOrcFileReader(Configuration conf, Path path) { throw new UnsupportedOperationException(); } - public HoodieFileReader newBootstrapFileReader(HoodieFileReader skeletonFileReader, HoodieFileReader dataFileReader, Option partitionFields, Object[] partitionValues) { + public HoodieFileReader newBootstrapFileReader(HoodieFileReader skeletonFileReader, + HoodieFileReader dataFileReader, + Option partitionFields, + Object[] partitionValues) { throw new UnsupportedOperationException(); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseAvroHFileReader.java similarity index 81% rename from hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseAvroHFileReader.java index 6f6b3485c210..88b7d65b723c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseAvroHFileReader.java @@ -18,7 +18,6 @@ package org.apache.hudi.io.storage; -import org.apache.hudi.avro.HoodieAvroUtils; import org.apache.hudi.common.bloom.BloomFilter; import org.apache.hudi.common.bloom.BloomFilterFactory; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; @@ -27,7 +26,6 @@ import org.apache.hudi.common.util.VisibleForTesting; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.collection.CloseableMappingIterator; -import org.apache.hudi.common.util.io.ByteBufferBackedInputStream; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; @@ -39,8 +37,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PositionedReadable; -import org.apache.hadoop.fs.Seekable; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.CacheConfig; @@ -61,7 +57,6 @@ import java.util.TreeSet; import java.util.stream.Collectors; -import static org.apache.hudi.common.util.CollectionUtils.toStream; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.TypeUtils.unsafeCast; import static org.apache.hudi.common.util.ValidationUtils.checkState; @@ -71,18 +66,8 @@ *

    * {@link HoodieFileReader} implementation allowing to read from {@link HFile}. */ -public class HoodieAvroHFileReader extends HoodieAvroFileReaderBase implements HoodieSeekingFileReader { - - // TODO HoodieHFileReader right now tightly coupled to MT, we should break that coupling - public static final String SCHEMA_KEY = "schema"; - public static final String KEY_BLOOM_FILTER_META_BLOCK = "bloomFilter"; - public static final String KEY_BLOOM_FILTER_TYPE_CODE = "bloomFilterTypeCode"; - - public static final String KEY_FIELD_NAME = "key"; - public static final String KEY_MIN_RECORD = "minRecordKey"; - public static final String KEY_MAX_RECORD = "maxRecordKey"; - - private static final Logger LOG = LoggerFactory.getLogger(HoodieAvroHFileReader.class); +public class HoodieHBaseAvroHFileReader extends HoodieAvroHFileReaderImplBase { + private static final Logger LOG = LoggerFactory.getLogger(HoodieHBaseAvroHFileReader.class); private final Path path; private final FileSystem fs; @@ -102,23 +87,31 @@ public class HoodieAvroHFileReader extends HoodieAvroFileReaderBase implements H private final Object sharedLock = new Object(); - public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig) throws IOException { + public HoodieHBaseAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig) + throws IOException { this(path, HadoopFSUtils.getFs(path.toString(), hadoopConf), hadoopConf, cacheConfig, Option.empty()); } - public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, FileSystem fs, Option schemaOpt) throws IOException { + public HoodieHBaseAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, + FileSystem fs, Option schemaOpt) throws IOException { this(path, fs, hadoopConf, cacheConfig, schemaOpt); } - public HoodieAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, FileSystem fs, byte[] content, Option schemaOpt) throws IOException { + public HoodieHBaseAvroHFileReader(Configuration hadoopConf, Path path, CacheConfig cacheConfig, + FileSystem fs, byte[] content, Option schemaOpt) + throws IOException { this(path, fs, hadoopConf, cacheConfig, schemaOpt, Option.of(content)); } - public HoodieAvroHFileReader(Path path, FileSystem fs, Configuration hadoopConf, CacheConfig config, Option schemaOpt) throws IOException { + public HoodieHBaseAvroHFileReader(Path path, FileSystem fs, Configuration hadoopConf, + CacheConfig config, Option schemaOpt) + throws IOException { this(path, fs, hadoopConf, config, schemaOpt, Option.empty()); } - public HoodieAvroHFileReader(Path path, FileSystem fs, Configuration hadoopConf, CacheConfig config, Option schemaOpt, Option content) throws IOException { + public HoodieHBaseAvroHFileReader(Path path, FileSystem fs, Configuration hadoopConf, + CacheConfig config, Option schemaOpt, + Option content) throws IOException { this.path = path; this.fs = fs; this.hadoopConf = hadoopConf; @@ -224,7 +217,8 @@ protected ClosableIterator getIndexedRecordIterator(Schema reader } @VisibleForTesting - protected ClosableIterator getIndexedRecordsByKeysIterator(List keys, Schema readerSchema) throws IOException { + public ClosableIterator getIndexedRecordsByKeysIterator(List keys, + Schema readerSchema) throws IOException { // We're caching blocks for this scanner to minimize amount of traffic // to the underlying storage as we fetched (potentially) sparsely distributed // keys @@ -234,7 +228,7 @@ protected ClosableIterator getIndexedRecordsByKeysIterator(List getIndexedRecordsByKeyPrefixIterator(List sortedKeyPrefixes, Schema readerSchema) throws IOException { + public ClosableIterator getIndexedRecordsByKeyPrefixIterator(List sortedKeyPrefixes, Schema readerSchema) throws IOException { // We're caching blocks for this scanner to minimize amount of traffic // to the underlying storage as we fetched (potentially) sparsely distributed // keys @@ -409,34 +403,8 @@ private static Option fetchRecordByKeyInternal(HFileScanner scann private static GenericRecord getRecordFromCell(Cell cell, Schema writerSchema, Schema readerSchema) throws IOException { final byte[] keyBytes = copyKeyFromCell(cell); final byte[] valueBytes = copyValueFromCell(cell); - return deserialize(keyBytes, valueBytes, writerSchema, readerSchema); - } - - private static GenericRecord deserializeUnchecked(final byte[] keyBytes, - final byte[] valueBytes, - Schema writerSchema, - Schema readerSchema) { - try { - return deserialize(keyBytes, valueBytes, writerSchema, readerSchema); - } catch (IOException e) { - throw new HoodieIOException("Failed to deserialize payload", e); - } - } - - private static GenericRecord deserialize(final byte[] keyBytes, - final byte[] valueBytes, - Schema writerSchema, - Schema readerSchema) throws IOException { - GenericRecord record = HoodieAvroUtils.bytesToAvro(valueBytes, writerSchema, readerSchema); - - getKeySchema(readerSchema).ifPresent(keyFieldSchema -> { - final Object keyObject = record.get(keyFieldSchema.pos()); - if (keyObject != null && keyObject.toString().isEmpty()) { - record.put(keyFieldSchema.pos(), new String(keyBytes)); - } - }); - - return record; + return deserialize( + keyBytes, 0, keyBytes.length, valueBytes, 0, valueBytes.length, writerSchema, readerSchema); } private static Schema fetchSchema(HFile.Reader reader) { @@ -452,40 +420,6 @@ private static byte[] copyValueFromCell(Cell c) { return Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength()); } - /** - * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY - *

    - * Reads all the records with given schema - */ - public static List readAllRecords(HoodieAvroHFileReader reader) throws IOException { - Schema schema = reader.getSchema(); - return toStream(reader.getIndexedRecordIterator(schema)) - .collect(Collectors.toList()); - } - - /** - * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY - *

    - * Reads all the records with given schema and filtering keys. - */ - public static List readRecords(HoodieAvroHFileReader reader, - List keys) throws IOException { - return readRecords(reader, keys, reader.getSchema()); - } - - /** - * NOTE: THIS SHOULD ONLY BE USED FOR TESTING, RECORDS ARE MATERIALIZED EAGERLY - *

    - * Reads all the records with given schema and filtering keys. - */ - public static List readRecords(HoodieAvroHFileReader reader, - List keys, - Schema schema) throws IOException { - Collections.sort(keys); - return toStream(reader.getIndexedRecordsByKeysIterator(keys, schema)) - .collect(Collectors.toList()); - } - private static HFileScanner getHFileScanner(HFile.Reader reader, boolean cacheBlocks) { return getHFileScanner(reader, cacheBlocks, true); } @@ -504,10 +438,6 @@ private static HFileScanner getHFileScanner(HFile.Reader reader, boolean cacheBl } } - private static Option getKeySchema(Schema schema) { - return Option.ofNullable(schema.getField(KEY_FIELD_NAME)); - } - private static class RecordByKeyPrefixIterator implements ClosableIterator { private final Iterator sortedKeyPrefixesIterator; private Iterator recordsIterator; @@ -674,7 +604,8 @@ private static class RecordIterator implements ClosableIterator { private IndexedRecord next = null; private boolean eof = false; - RecordIterator(HFile.Reader reader, HFileScanner scanner, Schema writerSchema, Schema readerSchema) { + RecordIterator(HFile.Reader reader, HFileScanner scanner, Schema writerSchema, + Schema readerSchema) { this.reader = reader; this.scanner = scanner; this.writerSchema = writerSchema; @@ -729,35 +660,4 @@ public void close() { } } } - - static class SeekableByteArrayInputStream extends ByteBufferBackedInputStream implements Seekable, PositionedReadable { - public SeekableByteArrayInputStream(byte[] buf) { - super(buf); - } - - @Override - public long getPos() throws IOException { - return getPosition(); - } - - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - - @Override - public int read(long position, byte[] buffer, int offset, int length) throws IOException { - return copyFrom(position, buffer, offset, length); - } - - @Override - public void readFully(long position, byte[] buffer) throws IOException { - read(position, buffer, 0, buffer.length); - } - - @Override - public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { - read(position, buffer, offset, length); - } - } } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java index 3dc60fc84a71..eb874634fcc0 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileUtils.java @@ -76,7 +76,8 @@ public static HFile.Reader createHFileReader( // Avoid loading default configs, from the FS, since this configuration is mostly // used as a stub to initialize HFile reader Configuration conf = new Configuration(false); - HoodieAvroHFileReader.SeekableByteArrayInputStream bis = new HoodieAvroHFileReader.SeekableByteArrayInputStream(content); + HoodieHBaseAvroHFileReader.SeekableByteArrayInputStream bis = + new HoodieHBaseAvroHFileReader.SeekableByteArrayInputStream(content); FSDataInputStream fsdis = new FSDataInputStream(bis); FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis); ReaderContext context = new ReaderContextBuilder() diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java new file mode 100644 index 000000000000..a2ba9b6e1ab7 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java @@ -0,0 +1,559 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.common.bloom.BloomFilter; +import org.apache.hudi.common.bloom.BloomFilterFactory; +import org.apache.hudi.common.model.HoodieAvroIndexedRecord; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.common.util.collection.CloseableMappingIterator; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.io.hfile.HFileReader; +import org.apache.hudi.io.hfile.HFileReaderImpl; +import org.apache.hudi.io.hfile.KeyValue; +import org.apache.hudi.io.hfile.UTF8StringKey; +import org.apache.hudi.util.Lazy; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; + +import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; +import static org.apache.hudi.common.util.TypeUtils.unsafeCast; +import static org.apache.hudi.io.hfile.HFileUtils.isPrefixOfKey; + +/** + * An implementation of {@link HoodieAvroHFileReaderImplBase} using native {@link HFileReader}. + */ +public class HoodieNativeAvroHFileReader extends HoodieAvroHFileReaderImplBase { + private static final Logger LOG = LoggerFactory.getLogger(HoodieNativeAvroHFileReader.class); + + private final Configuration conf; + private final Option path; + private final Option bytesContent; + private Option sharedHFileReader; + private final Lazy schema; + + public HoodieNativeAvroHFileReader(Configuration conf, Path path, Option schemaOption) { + this.conf = conf; + this.path = Option.of(path); + this.bytesContent = Option.empty(); + this.sharedHFileReader = Option.empty(); + this.schema = schemaOption.map(Lazy::eagerly) + .orElseGet(() -> Lazy.lazily(() -> fetchSchema(getSharedHFileReader()))); + } + + public HoodieNativeAvroHFileReader(Configuration conf, byte[] content, Option schemaOption) { + this.conf = conf; + this.path = Option.empty(); + this.bytesContent = Option.of(content); + this.sharedHFileReader = Option.empty(); + this.schema = schemaOption.map(Lazy::eagerly) + .orElseGet(() -> Lazy.lazily(() -> fetchSchema(getSharedHFileReader()))); + } + + @Override + public ClosableIterator getIndexedRecordIterator(Schema readerSchema, + Schema requestedSchema) + throws IOException { + if (!Objects.equals(readerSchema, requestedSchema)) { + throw new UnsupportedOperationException( + "Schema projections are not supported in HFile reader"); + } + + HFileReader reader = newHFileReader(); + return new RecordIterator(reader, getSchema(), readerSchema); + } + + @Override + public String[] readMinMaxRecordKeys() { + HFileReader reader = getSharedHFileReader(); + try { + return new String[] { + getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_MIN_RECORD)).get()), + getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_MAX_RECORD)).get())}; + } catch (IOException e) { + throw new HoodieIOException("Cannot read min and max record keys from HFile.", e); + } + } + + @Override + public BloomFilter readBloomFilter() { + try { + HFileReader reader = getSharedHFileReader(); + ByteBuffer byteBuffer = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK).get(); + return BloomFilterFactory.fromByteBuffer(byteBuffer, + getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_BLOOM_FILTER_TYPE_CODE)).get())); + } catch (IOException e) { + throw new HoodieException("Could not read bloom filter from " + path, e); + } + } + + @Override + public Set filterRowKeys(Set candidateRowKeys) { + try (HFileReader reader = newHFileReader()) { + reader.seekTo(); + // candidateRowKeys must be sorted + return new TreeSet<>(candidateRowKeys).stream() + .filter(k -> { + try { + return reader.seekTo(new UTF8StringKey(k)) == HFileReader.SEEK_TO_FOUND; + } catch (IOException e) { + LOG.error("Failed to check key availability: " + k); + return false; + } + }) + .collect(Collectors.toSet()); + } catch (IOException e) { + throw new HoodieIOException("Unable to filter row keys in HFiles", e); + } + } + + @Override + public ClosableIterator getRecordKeyIterator() throws IOException { + HFileReader reader = newHFileReader(); + return new ClosableIterator() { + @Override + public boolean hasNext() { + try { + return reader.next(); + } catch (IOException e) { + throw new HoodieException("Error while scanning for keys", e); + } + } + + @Override + public String next() { + try { + return reader.getKeyValue().get().getKey().getContentInString(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() { + try { + reader.close(); + } catch (IOException e) { + throw new HoodieIOException("Error closing the HFile reader", e); + } + } + }; + } + + @Override + public Schema getSchema() { + return schema.get(); + } + + @Override + public void close() { + try { + if (sharedHFileReader.isPresent()) { + sharedHFileReader.get().close(); + } + } catch (IOException e) { + throw new HoodieIOException("Error closing the HFile reader", e); + } + } + + @Override + public long getTotalRecords() { + return getSharedHFileReader().getNumKeyValueEntries(); + } + + @Override + public ClosableIterator> getRecordsByKeysIterator( + List sortedKeys, Schema schema) throws IOException { + HFileReader reader = newHFileReader(); + ClosableIterator iterator = + new RecordByKeyIterator(reader, sortedKeys, getSchema(), schema); + return new CloseableMappingIterator<>( + iterator, data -> unsafeCast(new HoodieAvroIndexedRecord(data))); + } + + @Override + public ClosableIterator> getRecordsByKeyPrefixIterator( + List sortedKeyPrefixes, Schema schema) throws IOException { + HFileReader reader = newHFileReader(); + ClosableIterator iterator = + new RecordByKeyPrefixIterator(reader, sortedKeyPrefixes, getSchema(), schema); + return new CloseableMappingIterator<>( + iterator, data -> unsafeCast(new HoodieAvroIndexedRecord(data))); + } + + private static Schema fetchSchema(HFileReader reader) { + try { + return new Schema.Parser().parse( + getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(SCHEMA_KEY)).get())); + } catch (IOException e) { + throw new HoodieIOException("Unable to read schema from HFile", e); + } + } + + private static GenericRecord getRecordFromKeyValue(KeyValue keyValue, + Schema writerSchema, + Schema readerSchema) throws IOException { + byte[] bytes = keyValue.getBytes(); + return deserialize( + bytes, keyValue.getKeyContentOffset(), keyValue.getKeyContentLength(), + bytes, keyValue.getValueOffset(), keyValue.getValueLength(), + writerSchema, + readerSchema); + } + + private synchronized HFileReader getSharedHFileReader() { + try { + if (!sharedHFileReader.isPresent()) { + sharedHFileReader = Option.of(newHFileReader()); + } + return sharedHFileReader.get(); + } catch (IOException e) { + throw new HoodieIOException("Unable to construct HFile reader", e); + } + } + + private HFileReader newHFileReader() throws IOException { + FSDataInputStream inputStream; + long fileSize; + if (path.isPresent()) { + FileSystem fs = HadoopFSUtils.getFs(path.get(), conf); + fileSize = fs.getFileStatus(path.get()).getLen(); + inputStream = fs.open(path.get()); + } else { + fileSize = bytesContent.get().length; + inputStream = new FSDataInputStream(new SeekableByteArrayInputStream(bytesContent.get())); + } + return new HFileReaderImpl(inputStream, fileSize); + } + + public ClosableIterator getIndexedRecordsByKeysIterator(List sortedKeys, + Schema readerSchema) + throws IOException { + HFileReader reader = newHFileReader(); + return new RecordByKeyIterator(reader, sortedKeys, getSchema(), schema.get()); + } + + @Override + public ClosableIterator getIndexedRecordsByKeyPrefixIterator( + List sortedKeyPrefixes, Schema readerSchema) throws IOException { + HFileReader reader = newHFileReader(); + return new RecordByKeyPrefixIterator(reader, sortedKeyPrefixes, getSchema(), readerSchema); + } + + private static class RecordIterator implements ClosableIterator { + private final HFileReader reader; + + private final Schema writerSchema; + private final Schema readerSchema; + + private IndexedRecord next = null; + private boolean eof = false; + + RecordIterator(HFileReader reader, Schema writerSchema, Schema readerSchema) { + this.reader = reader; + this.writerSchema = writerSchema; + this.readerSchema = readerSchema; + } + + @Override + public boolean hasNext() { + try { + // NOTE: This is required for idempotency + if (eof) { + return false; + } + + if (next != null) { + return true; + } + + boolean hasRecords; + if (!reader.isSeeked()) { + hasRecords = reader.seekTo(); + } else { + hasRecords = reader.next(); + } + + if (!hasRecords) { + eof = true; + return false; + } + + this.next = getRecordFromKeyValue(reader.getKeyValue().get(), writerSchema, readerSchema); + return true; + } catch (IOException io) { + throw new HoodieIOException("unable to read next record from hfile ", io); + } + } + + @Override + public IndexedRecord next() { + IndexedRecord next = this.next; + this.next = null; + return next; + } + + @Override + public void close() { + try { + reader.close(); + } catch (IOException e) { + throw new HoodieIOException("Error closing the HFile reader", e); + } + } + } + + private static class RecordByKeyIterator implements ClosableIterator { + private final Iterator sortedKeyIterator; + + private final HFileReader reader; + + private final Schema readerSchema; + private final Schema writerSchema; + + private IndexedRecord next = null; + + RecordByKeyIterator(HFileReader reader, List sortedKeys, Schema writerSchema, + Schema readerSchema) throws IOException { + this.sortedKeyIterator = sortedKeys.iterator(); + this.reader = reader; + this.reader.seekTo(); // position at the beginning of the file + + this.writerSchema = writerSchema; + this.readerSchema = readerSchema; + } + + @Override + public boolean hasNext() { + try { + // NOTE: This is required for idempotency + if (next != null) { + return true; + } + + while (sortedKeyIterator.hasNext()) { + UTF8StringKey key = new UTF8StringKey(sortedKeyIterator.next()); + if (reader.seekTo(key) == HFileReader.SEEK_TO_FOUND) { + // Key is found + KeyValue keyValue = reader.getKeyValue().get(); + next = deserialize( + key.getBytes(), key.getContentOffset(), key.getContentLength(), + keyValue.getBytes(), keyValue.getValueOffset(), keyValue.getValueLength(), + writerSchema, readerSchema); + return true; + } + } + return false; + } catch (IOException e) { + throw new HoodieIOException("Unable to read next record from HFile ", e); + } + } + + @Override + public IndexedRecord next() { + IndexedRecord next = this.next; + this.next = null; + return next; + } + + @Override + public void close() { + try { + reader.close(); + } catch (IOException e) { + throw new HoodieIOException("Error closing the HFile reader", e); + } + } + } + + private static class RecordByKeyPrefixIterator implements ClosableIterator { + private final Iterator sortedKeyPrefixesIterator; + private Iterator recordsIterator; + + private final HFileReader reader; + + private final Schema writerSchema; + private final Schema readerSchema; + + private IndexedRecord next = null; + private boolean isFirstKeyPrefix = true; + + RecordByKeyPrefixIterator(HFileReader reader, List sortedKeyPrefixes, + Schema writerSchema, Schema readerSchema) throws IOException { + this.sortedKeyPrefixesIterator = sortedKeyPrefixes.iterator(); + this.reader = reader; + this.reader.seekTo(); // position at the beginning of the file + + this.writerSchema = writerSchema; + this.readerSchema = readerSchema; + } + + @Override + public boolean hasNext() { + try { + while (true) { + // NOTE: This is required for idempotency + if (next != null) { + return true; + } else if (recordsIterator != null && recordsIterator.hasNext()) { + next = recordsIterator.next(); + return true; + } else if (sortedKeyPrefixesIterator.hasNext()) { + recordsIterator = getRecordByKeyPrefixIteratorInternal( + reader, isFirstKeyPrefix, sortedKeyPrefixesIterator.next(), writerSchema, readerSchema); + isFirstKeyPrefix = false; + } else { + return false; + } + } + } catch (IOException e) { + throw new HoodieIOException("Unable to read next record from HFile", e); + } + } + + @Override + public IndexedRecord next() { + IndexedRecord next = this.next; + this.next = null; + return next; + } + + @Override + public void close() { + try { + reader.close(); + } catch (IOException e) { + throw new HoodieIOException("Error closing the HFile reader and scanner", e); + } + } + + private static Iterator getRecordByKeyPrefixIteratorInternal(HFileReader reader, + boolean isFirstKeyPrefix, + String keyPrefix, + Schema writerSchema, + Schema readerSchema) + throws IOException { + UTF8StringKey lookUpKeyPrefix = new UTF8StringKey(keyPrefix); + if (!isFirstKeyPrefix) { + // For the subsequent key prefixes after the first, do special handling to + // avoid potential backward seeks. + Option keyValue = reader.getKeyValue(); + if (!keyValue.isPresent()) { + return Collections.emptyIterator(); + } + if (!isPrefixOfKey(lookUpKeyPrefix, keyValue.get().getKey())) { + // If the key at current cursor does not start with the lookup prefix. + if (lookUpKeyPrefix.compareTo(keyValue.get().getKey()) < 0) { + // Prefix is less than the current key, no key found for the prefix. + return Collections.emptyIterator(); + } else { + // Prefix is greater than the current key. Call seekTo to move the cursor. + int val = reader.seekTo(lookUpKeyPrefix); + if (val >= 1) { + // Try moving to next entry, matching the prefix key; if we're at the EOF, + // `next()` will return false + if (!reader.next()) { + return Collections.emptyIterator(); + } + } + } + } + // If the key current cursor starts with the lookup prefix, + // do not call seekTo. Continue with reading the keys with the prefix. + } else { + // For the first key prefix, directly do seekTo. + int val = reader.seekTo(lookUpKeyPrefix); + if (val >= 1) { + // Try moving to next entry, matching the prefix key; if we're at the EOF, + // `next()` will return false + if (!reader.next()) { + return Collections.emptyIterator(); + } + } + } + + class KeyPrefixIterator implements Iterator { + private IndexedRecord next = null; + private boolean eof = false; + + @Override + public boolean hasNext() { + if (next != null) { + return true; + } else if (eof) { + return false; + } + + // Extract the byte value before releasing the lock since we cannot hold on to the returned cell afterwards + try { + KeyValue keyValue = reader.getKeyValue().get(); + // Check whether we're still reading records corresponding to the key-prefix + if (!isPrefixOfKey(lookUpKeyPrefix, keyValue.getKey())) { + return false; + } + byte[] bytes = keyValue.getBytes(); + next = + deserialize( + bytes, keyValue.getKeyContentOffset(), keyValue.getKeyContentLength(), + bytes, keyValue.getValueOffset(), keyValue.getValueLength(), + writerSchema, readerSchema); + // In case scanner is not able to advance, it means we reached EOF + eof = !reader.next(); + } catch (IOException e) { + throw new HoodieIOException("Failed to deserialize payload", e); + } + + return true; + } + + @Override + public IndexedRecord next() { + IndexedRecord next = this.next; + this.next = null; + return next; + } + } + + return new KeyPrefixIterator(); + } + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java index a1dd3959f79e..86406b5963e2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java @@ -73,6 +73,7 @@ import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FULL_SCAN_LOG_FILES; import static org.apache.hudi.common.util.CollectionUtils.toStream; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS; import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_FILES; @@ -446,7 +447,7 @@ private Pair, Long> getBaseFileReader(FileSlice slice if (basefile.isPresent()) { String baseFilePath = basefile.get().getPath(); baseFileReader = (HoodieSeekingFileReader) HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) - .getFileReader(getHadoopConf(), new Path(baseFilePath)); + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, getHadoopConf(), new Path(baseFilePath)); baseFileOpenMs = timer.endTimer(); LOG.info(String.format("Opened metadata base file from %s at instant %s in %d ms", baseFilePath, basefile.get().getCommitTime(), baseFileOpenMs)); diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java index a814a2fe2121..82400b711650 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java @@ -38,7 +38,7 @@ import org.apache.hudi.common.util.hash.PartitionIndexID; import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.hadoop.fs.CachingPath; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; +import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; import org.apache.hudi.util.Lazy; import org.apache.avro.Schema; @@ -112,7 +112,7 @@ public class HoodieMetadataPayload implements HoodieRecordPayload> convertMetadataToRecords( - HoodieEngineContext context, HoodieCommitMetadata commitMetadata, String instantTime, - MetadataRecordsGenerationParams recordsGenerationParams) { + HoodieEngineContext context, HoodieConfig hoodieConfig, HoodieCommitMetadata commitMetadata, + String instantTime, MetadataRecordsGenerationParams recordsGenerationParams) { final Map> partitionToRecordsMap = new HashMap<>(); final HoodieData filesPartitionRecordsRDD = context.parallelize( convertMetadataToFilesPartitionRecords(commitMetadata, instantTime), 1); partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD); if (recordsGenerationParams.getEnabledPartitionTypes().contains(MetadataPartitionType.BLOOM_FILTERS)) { - final HoodieData metadataBloomFilterRecords = convertMetadataToBloomFilterRecords(context, commitMetadata, instantTime, recordsGenerationParams); + final HoodieData metadataBloomFilterRecords = convertMetadataToBloomFilterRecords( + context, hoodieConfig, commitMetadata, instantTime, recordsGenerationParams); partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecords); } @@ -431,7 +435,7 @@ private static List getPartitionsAdded(HoodieCommitMetadata commitMetada * @return HoodieData of metadata table records */ public static HoodieData convertMetadataToBloomFilterRecords( - HoodieEngineContext context, HoodieCommitMetadata commitMetadata, + HoodieEngineContext context, HoodieConfig hoodieConfig, HoodieCommitMetadata commitMetadata, String instantTime, MetadataRecordsGenerationParams recordsGenerationParams) { final List allWriteStats = commitMetadata.getPartitionToWriteStats().values().stream() .flatMap(entry -> entry.stream()).collect(Collectors.toList()); @@ -463,7 +467,8 @@ public static HoodieData convertMetadataToBloomFilterRecords( final Path writeFilePath = new Path(recordsGenerationParams.getDataMetaClient().getBasePath(), pathWithPartition); try (HoodieFileReader fileReader = - HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(recordsGenerationParams.getDataMetaClient().getHadoopConf(), writeFilePath)) { + HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + hoodieConfig, recordsGenerationParams.getDataMetaClient().getHadoopConf(), writeFilePath)) { try { final BloomFilter fileBloomFilter = fileReader.readBloomFilter(); if (fileBloomFilter == null) { @@ -893,7 +898,9 @@ public static HoodieData convertFilesToColumnStatsRecords(HoodieEn } private static ByteBuffer readBloomFilter(Configuration conf, Path filePath) throws IOException { - try (HoodieFileReader fileReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(conf, filePath)) { + HoodieConfig hoodieConfig = getReaderConfigs(conf); + try (HoodieFileReader fileReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(hoodieConfig, conf, filePath)) { final BloomFilter fileBloomFilter = fileReader.readBloomFilter(); if (fileBloomFilter == null) { return null; @@ -1728,6 +1735,7 @@ public static HoodieRecordGlobalLocation getLocationFromRecordIndexInfo( */ @Deprecated public static HoodieData readRecordKeysFromBaseFiles(HoodieEngineContext engineContext, + HoodieConfig config, List> partitionBaseFilePairs, boolean forDelete, int recordIndexMaxParallelism, @@ -1748,7 +1756,8 @@ public static HoodieData readRecordKeysFromBaseFiles(HoodieEngineC final String fileId = baseFile.getFileId(); final String instantTime = baseFile.getCommitTime(); - HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO).getFileReader(configuration.get(), dataFilePath); + HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO) + .getFileReader(config, configuration.get(), dataFilePath); ClosableIterator recordKeyIterator = reader.getRecordKeyIterator(); return new ClosableIterator() { @@ -1842,7 +1851,9 @@ public HoodieRecord next() { final String fileId = baseFile.getFileId(); final String instantTime = baseFile.getCommitTime(); - HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO).getFileReader(configuration.get(), dataFilePath); + HoodieConfig hoodieConfig = getReaderConfigs(configuration.get()); + HoodieFileReader reader = HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO) + .getFileReader(hoodieConfig, configuration.get(), dataFilePath); ClosableIterator recordKeyIterator = reader.getRecordKeyIterator(); return new ClosableIterator() { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java similarity index 59% rename from hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java rename to hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java index cd3bdd1cddbb..9adc01c1ec8c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java @@ -19,28 +19,22 @@ package org.apache.hudi.common.fs.inline; import org.apache.hudi.common.testutils.FileSystemTestUtils; -import org.apache.hudi.io.storage.HoodieHFileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; -import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.util.Bytes; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; import java.io.File; import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.UUID; @@ -50,31 +44,33 @@ import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getPhantomFile; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getRandomOuterInMemPath; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; /** * Tests {@link InLineFileSystem} to inline HFile. */ -public class TestInLineFileSystemHFileInLining { +public abstract class TestInLineFileSystemHFileInLiningBase { - private static final String LOCAL_FORMATTER = "%010d"; - private static final String VALUE_PREFIX = "value"; + protected static final String LOCAL_FORMATTER = "%010d"; + protected static final String VALUE_PREFIX = "value"; private static final int MIN_BLOCK_BYTES = 1024; private final Configuration inMemoryConf; private final Configuration inlineConf; private final int maxRows = 100 + RANDOM.nextInt(1000); private Path generatedPath; - public TestInLineFileSystemHFileInLining() { + public TestInLineFileSystemHFileInLiningBase() { inMemoryConf = new Configuration(); inMemoryConf.set("fs." + InMemoryFileSystem.SCHEME + ".impl", InMemoryFileSystem.class.getName()); inlineConf = new Configuration(); inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName()); } + protected abstract void validateHFileReading(InLineFileSystem inlineFileSystem, + Configuration conf, + Configuration inlineConf, + Path inlinePath, + int maxRows) throws IOException; + @AfterEach public void teardown() throws IOException { if (generatedPath != null) { @@ -114,42 +110,13 @@ public void testSimpleInlineFileSystem() throws IOException { InLineFileSystem inlineFileSystem = (InLineFileSystem) inlinePath.getFileSystem(inlineConf); FSDataInputStream fin = inlineFileSystem.open(inlinePath); - HFile.Reader reader = - HoodieHFileUtils.createHFileReader(inlineFileSystem, inlinePath, cacheConf, inlineConf); - // Get a scanner that caches and that does not use pread. - HFileScanner scanner = reader.getScanner(true, false); - // Align scanner at start of the file. - scanner.seekTo(); - readAllRecords(scanner); - - Set rowIdsToSearch = getRandomValidRowIds(10); - for (int rowId : rowIdsToSearch) { - KeyValue keyValue = new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId)); - assertEquals(0, scanner.seekTo(keyValue), - "location lookup failed"); - // read the key and see if it matches - Cell cell = scanner.getCell(); - byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength()); - byte[] expectedKey = Arrays.copyOfRange(keyValue.getRowArray(), keyValue.getRowOffset(), keyValue.getRowOffset() + keyValue.getRowLength()); - assertArrayEquals(expectedKey, key, "seeked key does not match"); - scanner.seekTo(keyValue); - ByteBuffer val1 = scanner.getValue(); - scanner.seekTo(keyValue); - ByteBuffer val2 = scanner.getValue(); - assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2)); - } + validateHFileReading(inlineFileSystem, inMemoryConf, inlineConf, inlinePath, maxRows); - int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000}; - for (int rowId : invalidRowIds) { - assertNotEquals(0, scanner.seekTo(new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId))), - "location lookup should have failed"); - } - reader.close(); fin.close(); outerPath.getFileSystem(inMemoryConf).delete(outerPath, true); } - private Set getRandomValidRowIds(int count) { + protected Set getRandomValidRowIds(int count) { Set rowIds = new HashSet<>(); while (rowIds.size() < count) { int index = RANDOM.nextInt(maxRows); @@ -160,12 +127,6 @@ private Set getRandomValidRowIds(int count) { return rowIds; } - private byte[] getSomeKey(int rowId) { - KeyValue kv = new KeyValue(getUTF8Bytes(String.format(LOCAL_FORMATTER, rowId)), - Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put); - return kv.getKey(); - } - private FSDataOutputStream createFSOutput(Path name, Configuration conf) throws IOException { return name.getFileSystem(conf).create(name); } @@ -186,38 +147,6 @@ private void writeSomeRecords(HFile.Writer writer) } } - private void readAllRecords(HFileScanner scanner) throws IOException { - readAndCheckbytes(scanner, 0, maxRows); - } - - // read the records and check - private void readAndCheckbytes(HFileScanner scanner, int start, int n) - throws IOException { - int i = start; - for (; i < (start + n); i++) { - Cell cell = scanner.getCell(); - byte[] key = Arrays.copyOfRange( - cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength()); - byte[] val = Arrays.copyOfRange( - cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength()); - String keyStr = String.format(LOCAL_FORMATTER, i); - String valStr = VALUE_PREFIX + keyStr; - KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"), - Bytes.toBytes("qual"), Bytes.toBytes(valStr)); - byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(key, 0, key.length).getKey(); - byte[] expectedKeyBytes = Arrays.copyOfRange( - kv.getRowArray(), kv.getRowOffset(), kv.getRowOffset() + kv.getRowLength()); - assertArrayEquals(expectedKeyBytes, keyBytes, - "bytes for keys do not match " + keyStr + " " + Bytes.toString(key)); - assertArrayEquals(Bytes.toBytes(valStr), val, - "bytes for vals do not match " + valStr + " " + Bytes.toString(val)); - if (!scanner.next()) { - break; - } - } - assertEquals(i, start + n - 1); - } - private long generateOuterFile(Path outerPath, byte[] inlineBytes) throws IOException { FSDataOutputStream wrappedOut = outerPath.getFileSystem(inMemoryConf).create(outerPath, true); // write random bytes diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java new file mode 100644 index 000000000000..26fb8e34961b --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.fs.inline; + +import org.apache.hudi.io.storage.HoodieHFileUtils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Set; + +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +/** + * Tests {@link InLineFileSystem} with HBase HFile reader. + */ +public class TestInLineFileSystemWithHBaseHFileReader extends TestInLineFileSystemHFileInLiningBase { + @Override + protected void validateHFileReading(InLineFileSystem inlineFileSystem, + Configuration conf, + Configuration inlineConf, + Path inlinePath, + int maxRows) throws IOException { + try (HFile.Reader reader = + HoodieHFileUtils.createHFileReader(inlineFileSystem, inlinePath, new CacheConfig(conf), inlineConf)) { + // Get a scanner that caches and that does not use pread. + HFileScanner scanner = reader.getScanner(true, false); + // Align scanner at start of the file. + scanner.seekTo(); + readAllRecords(scanner, maxRows); + + Set rowIdsToSearch = getRandomValidRowIds(10); + for (int rowId : rowIdsToSearch) { + KeyValue keyValue = new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId)); + assertEquals(0, scanner.seekTo(keyValue), + "location lookup failed"); + // read the key and see if it matches + Cell cell = scanner.getCell(); + byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength()); + byte[] expectedKey = Arrays.copyOfRange(keyValue.getRowArray(), keyValue.getRowOffset(), keyValue.getRowOffset() + keyValue.getRowLength()); + assertArrayEquals(expectedKey, key, "seeked key does not match"); + scanner.seekTo(keyValue); + ByteBuffer val1 = scanner.getValue(); + scanner.seekTo(keyValue); + ByteBuffer val2 = scanner.getValue(); + assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2)); + } + + int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000}; + for (int rowId : invalidRowIds) { + assertNotEquals(0, scanner.seekTo(new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId))), + "location lookup should have failed"); + } + } + } + + private byte[] getSomeKey(int rowId) { + KeyValue kv = new KeyValue(getUTF8Bytes(String.format(LOCAL_FORMATTER, rowId)), + Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put); + return kv.getKey(); + } + + private void readAllRecords(HFileScanner scanner, int maxRows) throws IOException { + readAndCheckbytes(scanner, 0, maxRows); + } + + // read the records and check + private void readAndCheckbytes(HFileScanner scanner, int start, int n) + throws IOException { + int i = start; + for (; i < (start + n); i++) { + Cell cell = scanner.getCell(); + byte[] key = Arrays.copyOfRange( + cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength()); + byte[] val = Arrays.copyOfRange( + cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength()); + String keyStr = String.format(LOCAL_FORMATTER, i); + String valStr = VALUE_PREFIX + keyStr; + KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"), + Bytes.toBytes("qual"), Bytes.toBytes(valStr)); + byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(key, 0, key.length).getKey(); + byte[] expectedKeyBytes = Arrays.copyOfRange( + kv.getRowArray(), kv.getRowOffset(), kv.getRowOffset() + kv.getRowLength()); + assertArrayEquals(expectedKeyBytes, keyBytes, + "bytes for keys do not match " + keyStr + " " + Bytes.toString(key)); + assertArrayEquals(Bytes.toBytes(valStr), val, + "bytes for vals do not match " + valStr + " " + Bytes.toString(val)); + if (!scanner.next()) { + break; + } + } + assertEquals(i, start + n - 1); + } +} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java new file mode 100644 index 000000000000..36240054037c --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHFileReader.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.fs.inline; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.io.hfile.HFileReader; +import org.apache.hudi.io.hfile.HFileReaderImpl; +import org.apache.hudi.io.hfile.Key; +import org.apache.hudi.io.hfile.KeyValue; +import org.apache.hudi.io.hfile.UTF8StringKey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +import static org.apache.hudi.io.hfile.HFileUtils.getValue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests {@link InLineFileSystem} with native HFile reader. + */ +public class TestInLineFileSystemWithHFileReader extends TestInLineFileSystemHFileInLiningBase { + @Override + protected void validateHFileReading(InLineFileSystem inlineFileSystem, + Configuration conf, + Configuration inlineConf, + Path inlinePath, + int maxRows) throws IOException { + long fileSize = inlineFileSystem.getFileStatus(inlinePath).getLen(); + try (FSDataInputStream fin = inlineFileSystem.open(inlinePath)) { + try (HFileReader reader = new HFileReaderImpl(fin, fileSize)) { + // Align scanner at start of the file. + reader.seekTo(); + readAllRecords(reader, maxRows); + + reader.seekTo(); + List rowIdsToSearch = getRandomValidRowIds(10) + .stream().sorted().collect(Collectors.toList()); + for (int rowId : rowIdsToSearch) { + Key lookupKey = getKey(rowId); + assertEquals(0, reader.seekTo(lookupKey), "location lookup failed"); + // read the key and see if it matches + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + assertEquals(lookupKey, keyValue.get().getKey(), "seeked key does not match"); + reader.seekTo(lookupKey); + String val1 = getValue(reader.getKeyValue().get()); + reader.seekTo(lookupKey); + String val2 = getValue(reader.getKeyValue().get()); + assertEquals(val1, val2); + } + + reader.seekTo(); + int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000}; + for (int rowId : invalidRowIds) { + assertNotEquals(0, reader.seekTo(getKey(rowId)), + "location lookup should have failed"); + } + } + } + } + + private Key getKey(int rowId) { + return new UTF8StringKey(String.format(LOCAL_FORMATTER, rowId)); + } + + private void readAllRecords(HFileReader reader, int maxRows) throws IOException { + for (int i = 0; i < maxRows; i++) { + Option keyValue = reader.getKeyValue(); + assertTrue(keyValue.isPresent()); + String key = keyValue.get().getKey().getContentInString(); + String value = getValue(keyValue.get()); + String expectedKeyStr = String.format(LOCAL_FORMATTER, i); + String expectedValStr = VALUE_PREFIX + expectedKeyStr; + + assertEquals(expectedKeyStr, key, "keys do not match " + expectedKeyStr + " " + key); + assertEquals(expectedValStr, value, "values do not match " + expectedValStr + " " + value); + assertEquals(i != maxRows - 1, reader.next()); + } + } +} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index ccab16771133..54c0dd53ed22 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -19,6 +19,7 @@ package org.apache.hudi.common.functional; import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.DeleteRecord; import org.apache.hudi.common.model.HoodieArchivedLogFile; @@ -2814,7 +2815,7 @@ private static HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, Li case AVRO_DATA_BLOCK: return new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD); case HFILE_DATA_BLOCK: - return new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ, pathForReader); + return new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ, pathForReader, HoodieReaderConfig.USE_NATIVE_HFILE_READER.defaultValue()); case PARQUET_DATA_BLOCK: return new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP, 0.1, true); default: diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java index 6648a0292dff..d1010ae75877 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestBase64CodecUtil.java @@ -20,6 +20,7 @@ import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; import java.util.UUID; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; @@ -43,7 +44,11 @@ public void testCodec() { String encodeData = Base64CodecUtil.encode(originalData); byte[] decodeData = Base64CodecUtil.decode(encodeData); + ByteBuffer encodedByteBuffer = ByteBuffer.wrap(getUTF8Bytes(encodeData)); + ByteBuffer decodeByteBuffer = Base64CodecUtil.decode(encodedByteBuffer); + assertArrayEquals(originalData, decodeData); + assertArrayEquals(originalData, decodeByteBuffer.array()); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java index c306bab384b0..dce26779b712 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieAvroFileReaderFactory.java @@ -27,6 +27,7 @@ import java.io.IOException; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -42,19 +43,22 @@ public void testGetFileReader() throws IOException { // parquet file format. final Configuration hadoopConf = new Configuration(); final Path parquetPath = new Path("/partition/path/f1_1-0-1_000.parquet"); - HoodieFileReader parquetReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(hadoopConf, parquetPath); + HoodieFileReader parquetReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, hadoopConf, parquetPath); assertTrue(parquetReader instanceof HoodieAvroParquetReader); // log file format. final Path logPath = new Path("/partition/path/f.b51192a8-574b-4a85-b246-bcfec03ac8bf_100.log.2_1-0-1"); final Throwable thrown = assertThrows(UnsupportedOperationException.class, () -> { - HoodieFileReader logWriter = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(hadoopConf, logPath); + HoodieFileReader logWriter = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, hadoopConf, logPath); }, "should fail since log storage reader is not supported yet."); assertTrue(thrown.getMessage().contains("format not supported yet.")); // Orc file format. final Path orcPath = new Path("/partition/path/f1_1-0-1_000.orc"); - HoodieFileReader orcReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(hadoopConf, orcPath); + HoodieFileReader orcReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, hadoopConf, orcPath); assertTrue(orcReader instanceof HoodieAvroOrcReader); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java new file mode 100644 index 000000000000..85514a6b56e2 --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHBaseHFileReaderWriter.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CellComparatorImpl; +import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; +import static org.apache.hudi.common.util.CollectionUtils.toStream; +import static org.apache.hudi.io.hfile.TestHFileReader.KEY_CREATOR; +import static org.apache.hudi.io.hfile.TestHFileReader.VALUE_CREATOR; +import static org.apache.hudi.io.storage.TestHoodieReaderWriterUtils.writeHFileForTesting; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestHoodieHBaseHFileReaderWriter extends TestHoodieHFileReaderWriterBase { + @Override + protected HoodieAvroFileReader createReader( + Configuration conf) throws Exception { + CacheConfig cacheConfig = new CacheConfig(conf); + return new HoodieHBaseAvroHFileReader(conf, getFilePath(), cacheConfig, + getFilePath().getFileSystem(conf), Option.empty()); + } + + @Override + protected HoodieAvroHFileReaderImplBase createHFileReader(Configuration conf, + byte[] content) throws IOException { + FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); + return new HoodieHBaseAvroHFileReader( + conf, new Path(DUMMY_BASE_PATH), new CacheConfig(conf), fs, content, Option.empty()); + } + + @Override + protected void verifyHFileReader(byte[] content, + String hfileName, + boolean mayUseDefaultComparator, + Class expectedComparatorClazz, + int count) throws IOException { + FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); + try (HFile.Reader reader = + HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content)) { + // HFile version is 3 + assertEquals(3, reader.getTrailer().getMajorVersion()); + if (mayUseDefaultComparator && hfileName.contains("hudi_0_9")) { + // Pre Hudi 0.10, the default comparator is used for metadata table HFiles + // For bootstrap index HFiles, the custom comparator is always used + assertEquals(CellComparatorImpl.class, reader.getComparator().getClass()); + } else { + assertEquals(expectedComparatorClazz, reader.getComparator().getClass()); + } + assertEquals(count, reader.getEntries()); + } + } + + @Test + public void testReaderGetRecordIteratorByKeysWithBackwardSeek() throws Exception { + writeFileWithSimpleSchema(); + try (HoodieAvroHFileReaderImplBase hfileReader = + (HoodieAvroHFileReaderImplBase) createReader(new Configuration())) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + List allRecords = toStream(hfileReader.getRecordIterator()) + .map(r -> (GenericRecord) r.getData()).collect(Collectors.toList()); + // Filter for "key00001, key05, key24, key16, key31, key61". Valid entries should be matched. + // Even though key16 exists, it's a backward seek not in order. So, will not return the matched entry. + List expectedKey1s = allRecords.stream().filter(entry -> ( + (entry.get("_row_key").toString()).contains("key05") + || (entry.get("_row_key").toString()).contains("key24") + || (entry.get("_row_key").toString()).contains("key31"))).collect(Collectors.toList()); + Iterator iterator = + hfileReader.getIndexedRecordsByKeysIterator( + Arrays.asList("key00001", "key05", "key24", "key16", "key31", "key61"), + avroSchema); + List recordsByKeys = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(expectedKey1s, recordsByKeys); + } + } + + @Disabled("This is used for generating testing HFile only") + @ParameterizedTest + @CsvSource({ + "512,GZ,20000,true", "16,GZ,20000,true", + "64,NONE,5000,true", "16,NONE,5000,true", + "16,GZ,200,false" + }) + void generateHFileForTesting(int blockSizeKB, + String compressionCodec, + int numEntries, + boolean uniqueKeys) throws IOException { + writeHFileForTesting( + String.format("/tmp/hudi_1_0_hbase_2_4_9_%sKB_%s_%s.hfile", + blockSizeKB, compressionCodec, numEntries), + blockSizeKB * 1024, + Compression.Algorithm.valueOf(compressionCodec), + numEntries, + KEY_CREATOR, + VALUE_CREATOR, + uniqueKeys); + } +} diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java index 22cca7004d56..e782dd7f28cb 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java @@ -18,481 +18,70 @@ package org.apache.hudi.io.storage; -import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; -import org.apache.hudi.common.config.HoodieStorageConfig; -import org.apache.hudi.common.engine.TaskContextSupplier; -import org.apache.hudi.common.model.EmptyHoodieRecordPayload; -import org.apache.hudi.common.model.HoodieAvroRecord; -import org.apache.hudi.common.model.HoodieKey; -import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.table.HoodieTableConfig; -import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; -import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.CellComparatorImpl; -import org.apache.hadoop.hbase.io.compress.Compression; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.HFile; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.CsvSource; -import org.junit.jupiter.params.provider.MethodSource; -import org.junit.jupiter.params.provider.ValueSource; -import org.mockito.Mockito; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashSet; import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; -import java.util.TreeMap; -import java.util.function.Supplier; import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; import java.util.stream.StreamSupport; -import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; -import static org.apache.hudi.common.util.CollectionUtils.toStream; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; -import static org.apache.hudi.io.hfile.TestHFileReader.BOOTSTRAP_INDEX_HFILE_SUFFIX; -import static org.apache.hudi.io.hfile.TestHFileReader.COMPLEX_SCHEMA_HFILE_SUFFIX; -import static org.apache.hudi.io.hfile.TestHFileReader.KEY_CREATOR; -import static org.apache.hudi.io.hfile.TestHFileReader.SIMPLE_SCHEMA_HFILE_SUFFIX; -import static org.apache.hudi.io.hfile.TestHFileReader.VALUE_CREATOR; -import static org.apache.hudi.io.hfile.TestHFileReader.readHFileFromResources; -import static org.apache.hudi.io.storage.HoodieAvroHFileReader.SCHEMA_KEY; -import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.mockito.Mockito.when; +import static org.junit.jupiter.api.Assertions.assertThrows; -public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase { - private static final String DUMMY_BASE_PATH = "dummy_base_path"; - // Number of records in HFile fixtures for compatibility tests - private static final int NUM_RECORDS_FIXTURE = 50; - - @Override - protected Path getFilePath() { - return new Path(tempDir.toString() + "/f1_1-0-1_000.hfile"); - } - - @Override - protected HoodieAvroHFileWriter createWriter( - Schema avroSchema, boolean populateMetaFields) throws Exception { - String instantTime = "000"; - Configuration conf = new Configuration(); - Properties props = new Properties(); - props.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), Boolean.toString(populateMetaFields)); - TaskContextSupplier mockTaskContextSupplier = Mockito.mock(TaskContextSupplier.class); - Supplier partitionSupplier = Mockito.mock(Supplier.class); - when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier); - when(partitionSupplier.get()).thenReturn(10); - - return (HoodieAvroHFileWriter)HoodieFileWriterFactory.getFileWriter( - instantTime, getFilePath(), conf, HoodieStorageConfig.newBuilder().fromProperties(props).build(), avroSchema, mockTaskContextSupplier, HoodieRecord.HoodieRecordType.AVRO); - } +public class TestHoodieHFileReaderWriter extends TestHoodieHFileReaderWriterBase { @Override protected HoodieAvroFileReader createReader( Configuration conf) throws Exception { - CacheConfig cacheConfig = new CacheConfig(conf); - return new HoodieAvroHFileReader(conf, getFilePath(), cacheConfig, getFilePath().getFileSystem(conf), Option.empty()); + return new HoodieNativeAvroHFileReader(conf, getFilePath(), Option.empty()); } @Override - protected void verifyMetadata(Configuration conf) throws IOException { - FileSystem fs = getFilePath().getFileSystem(conf); - HFile.Reader hfileReader = HoodieHFileUtils.createHFileReader(fs, getFilePath(), new CacheConfig(conf), conf); - assertEquals(HFILE_COMPARATOR.getClass(), hfileReader.getComparator().getClass()); - assertEquals(NUM_RECORDS, hfileReader.getEntries()); + protected HoodieAvroHFileReaderImplBase createHFileReader(Configuration conf, + byte[] content) throws IOException { + return new HoodieNativeAvroHFileReader(conf, content, Option.empty()); } @Override - protected void verifySchema(Configuration conf, String schemaPath) throws IOException { - FileSystem fs = getFilePath().getFileSystem(conf); - HFile.Reader hfileReader = HoodieHFileUtils.createHFileReader(fs, getFilePath(), new CacheConfig(conf), conf); - assertEquals(getSchemaFromResource(TestHoodieHFileReaderWriter.class, schemaPath), - new Schema.Parser().parse(new String(hfileReader.getHFileInfo().get(getUTF8Bytes(SCHEMA_KEY))))); - } - - private static Stream populateMetaFieldsAndTestAvroWithMeta() { - return Arrays.stream(new Boolean[][] { - {true, true}, - {false, true}, - {true, false}, - {false, false} - }).map(Arguments::of); - } - - @ParameterizedTest - @MethodSource("populateMetaFieldsAndTestAvroWithMeta") - public void testWriteReadHFileWithMetaFields(boolean populateMetaFields, boolean testAvroWithMeta) throws Exception { - Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithMetaFields.avsc"); - HoodieAvroHFileWriter writer = createWriter(avroSchema, populateMetaFields); - List keys = new ArrayList<>(); - Map recordMap = new TreeMap<>(); - for (int i = 0; i < 100; i++) { - GenericRecord record = new GenericData.Record(avroSchema); - String key = String.format("%s%04d", "key", i); - record.put("_row_key", key); - keys.add(key); - record.put("time", Integer.toString(RANDOM.nextInt())); - record.put("number", i); - if (testAvroWithMeta) { - // payload does not matter. GenericRecord passed in is what matters - writer.writeAvroWithMetadata(new HoodieAvroRecord(new HoodieKey((String) record.get("_row_key"), - Integer.toString((Integer) record.get("number"))), new EmptyHoodieRecordPayload()).getKey(), record); - // only HoodieKey will be looked up from the 2nd arg(HoodieRecord). - } else { - writer.writeAvro(key, record); - } - recordMap.put(key, record); + protected void verifyHFileReader(byte[] content, + String hfileName, + boolean mayUseDefaultComparator, + Class expectedComparatorClazz, + int count) throws IOException { + try (HoodieAvroHFileReaderImplBase hfileReader = createHFileReader(new Configuration(), content)) { + assertEquals(count, hfileReader.getTotalRecords()); } - writer.close(); - - Configuration conf = new Configuration(); - HoodieAvroHFileReader hoodieHFileReader = (HoodieAvroHFileReader) createReader(conf); - List records = HoodieAvroHFileReader.readAllRecords(hoodieHFileReader); - assertEquals(new ArrayList<>(recordMap.values()), records); - - hoodieHFileReader.close(); - - for (int i = 0; i < 2; i++) { - int randomRowstoFetch = 5 + RANDOM.nextInt(10); - Set rowsToFetch = getRandomKeys(randomRowstoFetch, keys); - - List rowsList = new ArrayList<>(rowsToFetch); - Collections.sort(rowsList); - - List expectedRecords = rowsList.stream().map(recordMap::get).collect(Collectors.toList()); - - hoodieHFileReader = (HoodieAvroHFileReader) createReader(conf); - List result = HoodieAvroHFileReader.readRecords(hoodieHFileReader, rowsList).stream().map(r -> (GenericRecord)r).collect(Collectors.toList()); - - assertEquals(expectedRecords, result); - - result.forEach(entry -> { - if (populateMetaFields && testAvroWithMeta) { - assertNotNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); - } else { - assertNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); - } - }); - hoodieHFileReader.close(); - } - } - - @Disabled("Disable the test with evolved schema for HFile since it's not supported") - @ParameterizedTest - @Override - public void testWriteReadWithEvolvedSchema(String evolvedSchemaPath) throws Exception { - // TODO(HUDI-3683): fix the schema evolution for HFile - } - - @Test - public void testReadHFileFormatRecords() throws Exception { - writeFileWithSimpleSchema(); - FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); - byte[] content = FileIOUtils.readAsByteArray( - fs.open(getFilePath()), (int) fs.getFileStatus(getFilePath()).getLen()); - // Reading byte array in HFile format, without actual file path - Configuration hadoopConf = fs.getConf(); - HoodieAvroHFileReader hfileReader = - new HoodieAvroHFileReader(hadoopConf, new Path(DUMMY_BASE_PATH), new CacheConfig(hadoopConf), fs, content, Option.empty()); - Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); - assertEquals(NUM_RECORDS, hfileReader.getTotalRecords()); - verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); } @Test - public void testReaderGetRecordIterator() throws Exception { + public void testReaderGetRecordIteratorByKeysWithBackwardSeek() throws Exception { writeFileWithSimpleSchema(); - HoodieAvroHFileReader hfileReader = - (HoodieAvroHFileReader) createReader(new Configuration()); - List keys = - IntStream.concat(IntStream.range(40, NUM_RECORDS * 2), IntStream.range(10, 20)) - .mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toList()); - Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); - Iterator> iterator = hfileReader.getRecordsByKeysIterator(keys, avroSchema); - - List expectedIds = - IntStream.concat(IntStream.range(40, NUM_RECORDS), IntStream.range(10, 20)) - .boxed().collect(Collectors.toList()); - int index = 0; - while (iterator.hasNext()) { - GenericRecord record = (GenericRecord) iterator.next().getData(); - String key = "key" + String.format("%02d", expectedIds.get(index)); - assertEquals(key, record.get("_row_key").toString()); - assertEquals(Integer.toString(expectedIds.get(index)), record.get("time").toString()); - assertEquals(expectedIds.get(index), record.get("number")); - index++; - } - } - - @Test - public void testReaderGetRecordIteratorByKeys() throws Exception { - writeFileWithSimpleSchema(); - HoodieAvroHFileReader hfileReader = - (HoodieAvroHFileReader) createReader(new Configuration()); - - Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); - - List keys = Collections.singletonList("key"); - Iterator iterator = - hfileReader.getIndexedRecordsByKeysIterator(keys, avroSchema); - - List recordsByKeys = toStream(iterator).map(r -> (GenericRecord) r).collect(Collectors.toList()); - - List allRecords = toStream(hfileReader.getRecordIterator()) - .map(r -> (GenericRecord) r.getData()).collect(Collectors.toList()); - - // no entries should match since this is exact match. - assertEquals(Collections.emptyList(), recordsByKeys); - - // filter for "key00001, key05, key12, key24, key16, key2, key31, key49, key61, key50". Valid entries should be matched. - // key00001 should not match. - // even though key16 exists, its not in the sorted order of keys passed in. So, will not return the matched entry. - // key2 : we don't have an exact match - // key61 is greater than max key. - // again, by the time we reach key50, cursor is at EOF. So no entries will be returned. - List expectedKey1s = allRecords.stream().filter(entry -> ( - (entry.get("_row_key").toString()).contains("key05") - || (entry.get("_row_key").toString()).contains("key12") - || (entry.get("_row_key").toString()).contains("key24") - || (entry.get("_row_key").toString()).contains("key31") - || (entry.get("_row_key").toString()).contains("key49"))).collect(Collectors.toList()); - iterator = - hfileReader.getIndexedRecordsByKeysIterator(Arrays.asList("key00001", "key05", "key12", "key24", "key16", "key31", "key49","key61","key50"), avroSchema); - recordsByKeys = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord) r) - .collect(Collectors.toList()); - assertEquals(expectedKey1s, recordsByKeys); - } - - @Test - public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception { - writeFileWithSimpleSchema(); - HoodieAvroHFileReader hfileReader = - (HoodieAvroHFileReader) createReader(new Configuration()); - - Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); - - List keyPrefixes = Collections.singletonList("key"); - Iterator iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(keyPrefixes, avroSchema); - - List recordsByPrefix = toStream(iterator).map(r -> (GenericRecord)r).collect(Collectors.toList()); - - List allRecords = toStream(hfileReader.getRecordIterator()) - .map(r -> (GenericRecord) r.getData()).collect(Collectors.toList()); - - assertEquals(allRecords, recordsByPrefix); - - // filter for "key1" : entries from key10 to key19 should be matched - List expectedKey1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")).collect(Collectors.toList()); - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key1"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - assertEquals(expectedKey1s, recordsByPrefix); - - // exact match - List expectedKey25 = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key25")).collect(Collectors.toList()); - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key25"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - assertEquals(expectedKey25, recordsByPrefix); - - // no match. key prefix is beyond entries in file. - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key99"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - assertEquals(Collections.emptyList(), recordsByPrefix); - - // no match. but keyPrefix is in between the entries found in file. - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key1234"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - assertEquals(Collections.emptyList(), recordsByPrefix); - - // filter for "key1", "key30" and "key60" : entries from 'key10 to key19' and 'key30' should be matched. - List expectedKey50and1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1") - || (entry.get("_row_key").toString()).contains("key30")).collect(Collectors.toList()); - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key1", "key30","key6"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - assertEquals(expectedKey50and1s, recordsByPrefix); - - // filter for "key50" and "key0" : entries from key50 and 'key00 to key09' should be matched. - List expectedKey50and0s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0") - || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList()); - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key0", "key50"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - assertEquals(expectedKey50and0s, recordsByPrefix); - - // filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched. - List expectedKey1sand0s = allRecords.stream() - .filter(entry -> (entry.get("_row_key").toString()).contains("key1") || (entry.get("_row_key").toString()).contains("key0")) - .collect(Collectors.toList()); - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key0", "key1"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - Collections.sort(recordsByPrefix, new Comparator() { - @Override - public int compare(GenericRecord o1, GenericRecord o2) { - return o1.get("_row_key").toString().compareTo(o2.get("_row_key").toString()); - } - }); - assertEquals(expectedKey1sand0s, recordsByPrefix); - - // We expect the keys to be looked up in sorted order. If not, matching entries may not be returned. - // key1 should have matching entries, but not key0. - iterator = - hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema); - recordsByPrefix = - StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(r -> (GenericRecord)r) - .collect(Collectors.toList()); - Collections.sort(recordsByPrefix, new Comparator() { - @Override - public int compare(GenericRecord o1, GenericRecord o2) { - return o1.get("_row_key").toString().compareTo(o2.get("_row_key").toString()); - } - }); - assertEquals(expectedKey1s, recordsByPrefix); - } - - @ParameterizedTest - @ValueSource(strings = { - "/hfile/hudi_0_9_hbase_1_2_3", "/hfile/hudi_0_10_hbase_1_2_3", "/hfile/hudi_0_11_hbase_2_4_9"}) - public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException { - // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadPrimitiveRecord() - // using different Hudi releases - String simpleHFile = hfilePrefix + SIMPLE_SCHEMA_HFILE_SUFFIX; - // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadComplexRecord() - // using different Hudi releases - String complexHFile = hfilePrefix + COMPLEX_SCHEMA_HFILE_SUFFIX; - // This fixture is generated from TestBootstrapIndex#testBootstrapIndex() - // using different Hudi releases. The file is copied from .hoodie/.aux/.bootstrap/.partitions/ - String bootstrapIndexFile = hfilePrefix + BOOTSTRAP_INDEX_HFILE_SUFFIX; - - FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); - byte[] content = readHFileFromResources(simpleHFile); - verifyHFileReader( - HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content), - hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE); - - Configuration hadoopConf = fs.getConf(); - HoodieAvroHFileReader hfileReader = - new HoodieAvroHFileReader(hadoopConf, new Path(DUMMY_BASE_PATH), new CacheConfig(hadoopConf), fs, content, Option.empty()); - Schema avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); - assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords()); - verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); - - content = readHFileFromResources(complexHFile); - verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content), - hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE); - hfileReader = - new HoodieAvroHFileReader(hadoopConf, new Path(DUMMY_BASE_PATH), new CacheConfig(hadoopConf), fs, content, - Option.empty()); - avroSchema = getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithUDT.avsc"); - assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords()); - verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); - - content = readHFileFromResources(bootstrapIndexFile); - verifyHFileReader(HoodieHFileUtils.createHFileReader(fs, new Path(DUMMY_BASE_PATH), content), - hfilePrefix, false, HFileBootstrapIndex.HoodieKVComparator.class, 4); - } - - @Disabled("This is used for generating testing HFile only") - @ParameterizedTest - @CsvSource({ - "512,GZ,20000,true", "16,GZ,20000,true", - "64,NONE,5000,true", "16,NONE,5000,true", - "16,GZ,200,false" - }) - void generateHFileForTesting(int blockSizeKB, - String compressionCodec, - int numEntries, - boolean uniqueKeys) throws IOException { - TestHoodieReaderWriterUtils.writeHFileForTesting( - String.format("/tmp/hudi_1_0_hbase_2_4_9_%sKB_%s_%s.hfile", - blockSizeKB, compressionCodec, numEntries), - blockSizeKB * 1024, - Compression.Algorithm.valueOf(compressionCodec), - numEntries, - KEY_CREATOR, - VALUE_CREATOR, - uniqueKeys); - } - - private Set getRandomKeys(int count, List keys) { - Set rowKeys = new HashSet<>(); - int totalKeys = keys.size(); - while (rowKeys.size() < count) { - int index = RANDOM.nextInt(totalKeys); - if (!rowKeys.contains(index)) { - rowKeys.add(keys.get(index)); - } - } - return rowKeys; - } - - private void verifyHFileReader( - HFile.Reader reader, String hfileName, boolean mayUseDefaultComparator, - Class clazz, int count) { - // HFile version is 3 - assertEquals(3, reader.getTrailer().getMajorVersion()); - if (mayUseDefaultComparator && hfileName.contains("hudi_0_9")) { - // Pre Hudi 0.10, the default comparator is used for metadata table HFiles - // For bootstrap index HFiles, the custom comparator is always used - assertEquals(CellComparatorImpl.class, reader.getComparator().getClass()); - } else { - assertEquals(clazz, reader.getComparator().getClass()); + try (HoodieAvroHFileReaderImplBase hfileReader = + (HoodieAvroHFileReaderImplBase) createReader(new Configuration())) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + // Filter for "key00001, key05, key24, key16, key31, key61". + // Even though key16 exists, it's a backward seek not in order. + // Our native HFile reader does not allow backward seek, and throws an exception + // Note that backward seek is not expected to happen in production code + Iterator iterator = + hfileReader.getIndexedRecordsByKeysIterator( + Arrays.asList("key00001", "key05", "key24", "key16", "key31", "key61"), + avroSchema); + assertThrows( + IllegalStateException.class, + () -> StreamSupport.stream( + Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .collect(Collectors.toList())); } - assertEquals(count, reader.getEntries()); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java new file mode 100644 index 000000000000..100d4df878f8 --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriterBase.java @@ -0,0 +1,486 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex; +import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.engine.TaskContextSupplier; +import org.apache.hudi.common.model.EmptyHoodieRecordPayload; +import org.apache.hudi.common.model.HoodieAvroRecord; +import org.apache.hudi.common.model.HoodieKey; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.hadoop.fs.HadoopFSUtils; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.TreeMap; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; +import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; +import static org.apache.hudi.common.util.CollectionUtils.toStream; +import static org.apache.hudi.io.hfile.TestHFileReader.BOOTSTRAP_INDEX_HFILE_SUFFIX; +import static org.apache.hudi.io.hfile.TestHFileReader.COMPLEX_SCHEMA_HFILE_SUFFIX; +import static org.apache.hudi.io.hfile.TestHFileReader.SIMPLE_SCHEMA_HFILE_SUFFIX; +import static org.apache.hudi.io.hfile.TestHFileReader.readHFileFromResources; +import static org.apache.hudi.io.storage.HoodieHFileConfig.HFILE_COMPARATOR; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.mockito.Mockito.when; + +/** + * Abstract class for testing HFile reader implementation. + */ +public abstract class TestHoodieHFileReaderWriterBase extends TestHoodieReaderWriterBase { + protected static final String DUMMY_BASE_PATH = "dummy_base_path"; + // Number of records in HFile fixtures for compatibility tests + protected static final int NUM_RECORDS_FIXTURE = 50; + + protected abstract HoodieAvroHFileReaderImplBase createHFileReader(Configuration conf, + byte[] content) throws IOException; + + protected abstract void verifyHFileReader(byte[] content, + String hfileName, + boolean mayUseDefaultComparator, + Class expectedComparatorClazz, + int count) throws IOException; + + protected static Stream populateMetaFieldsAndTestAvroWithMeta() { + return Arrays.stream(new Boolean[][] { + {true, true}, + {false, true}, + {true, false}, + {false, false} + }).map(Arguments::of); + } + + @Override + protected HoodieAvroHFileWriter createWriter( + Schema avroSchema, boolean populateMetaFields) throws Exception { + String instantTime = "000"; + Configuration conf = new Configuration(); + Properties props = new Properties(); + props.setProperty(HoodieTableConfig.POPULATE_META_FIELDS.key(), Boolean.toString(populateMetaFields)); + TaskContextSupplier mockTaskContextSupplier = Mockito.mock(TaskContextSupplier.class); + Supplier partitionSupplier = Mockito.mock(Supplier.class); + when(mockTaskContextSupplier.getPartitionIdSupplier()).thenReturn(partitionSupplier); + when(partitionSupplier.get()).thenReturn(10); + + return (HoodieAvroHFileWriter) HoodieFileWriterFactory.getFileWriter( + instantTime, getFilePath(), conf, HoodieStorageConfig.newBuilder().fromProperties(props).build(), avroSchema, + mockTaskContextSupplier, HoodieRecord.HoodieRecordType.AVRO); + } + + @Override + protected Path getFilePath() { + return new Path(tempDir.toString() + "/f1_1-0-1_000.hfile"); + } + + @Override + protected void verifyMetadata(Configuration conf) throws IOException { + try (HoodieAvroFileReader reader = createReader(conf)) { + assertEquals(NUM_RECORDS, reader.getTotalRecords()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + protected void verifySchema(Configuration conf, String schemaPath) throws IOException { + try (HoodieAvroFileReader reader = createReader(conf)) { + assertEquals( + getSchemaFromResource(TestHoodieHBaseHFileReaderWriter.class, schemaPath), + reader.getSchema()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @ParameterizedTest + @MethodSource("populateMetaFieldsAndTestAvroWithMeta") + public void testWriteReadHFileWithMetaFields(boolean populateMetaFields, boolean testAvroWithMeta) throws Exception { + Schema avroSchema = getSchemaFromResource(TestHoodieOrcReaderWriter.class, "/exampleSchemaWithMetaFields.avsc"); + HoodieAvroHFileWriter writer = createWriter(avroSchema, populateMetaFields); + List keys = new ArrayList<>(); + Map recordMap = new TreeMap<>(); + for (int i = 0; i < 100; i++) { + GenericRecord record = new GenericData.Record(avroSchema); + String key = String.format("%s%04d", "key", i); + record.put("_row_key", key); + keys.add(key); + record.put("time", Integer.toString(RANDOM.nextInt())); + record.put("number", i); + if (testAvroWithMeta) { + // payload does not matter. GenericRecord passed in is what matters + writer.writeAvroWithMetadata( + new HoodieAvroRecord(new HoodieKey((String) record.get("_row_key"), + Integer.toString((Integer) record.get("number"))), + new EmptyHoodieRecordPayload()).getKey(), record); + // only HoodieKey will be looked up from the 2nd arg(HoodieRecord). + } else { + writer.writeAvro(key, record); + } + recordMap.put(key, record); + } + writer.close(); + + Configuration conf = new Configuration(); + HoodieAvroHFileReaderImplBase hoodieHFileReader = + (HoodieAvroHFileReaderImplBase) createReader(conf); + List records = HoodieAvroHFileReaderImplBase.readAllRecords(hoodieHFileReader); + assertEquals(new ArrayList<>(recordMap.values()), records); + + hoodieHFileReader.close(); + + for (int i = 0; i < 2; i++) { + int randomRowstoFetch = 5 + RANDOM.nextInt(10); + Set rowsToFetch = getRandomKeys(randomRowstoFetch, keys); + + List rowsList = new ArrayList<>(rowsToFetch); + Collections.sort(rowsList); + + List expectedRecords = + rowsList.stream().map(recordMap::get).collect(Collectors.toList()); + + hoodieHFileReader = (HoodieAvroHFileReaderImplBase) createReader(conf); + List result = + HoodieAvroHFileReaderImplBase.readRecords(hoodieHFileReader, rowsList).stream() + .map(r -> (GenericRecord) r).collect(Collectors.toList()); + + assertEquals(expectedRecords, result); + + result.forEach(entry -> { + if (populateMetaFields && testAvroWithMeta) { + assertNotNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); + } else { + assertNull(entry.get(HoodieRecord.RECORD_KEY_METADATA_FIELD)); + } + }); + hoodieHFileReader.close(); + } + } + + @Disabled("Disable the test with evolved schema for HFile since it's not supported") + @ParameterizedTest + @Override + public void testWriteReadWithEvolvedSchema(String evolvedSchemaPath) throws Exception { + // TODO(HUDI-3683): fix the schema evolution for HFile + } + + @Test + public void testReadHFileFormatRecords() throws Exception { + writeFileWithSimpleSchema(); + FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); + byte[] content = FileIOUtils.readAsByteArray( + fs.open(getFilePath()), (int) fs.getFileStatus(getFilePath()).getLen()); + // Reading byte array in HFile format, without actual file path + Configuration hadoopConf = fs.getConf(); + try (HoodieAvroHFileReaderImplBase hfileReader = createHFileReader(hadoopConf, content)) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + assertEquals(NUM_RECORDS, hfileReader.getTotalRecords()); + verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); + } + } + + @Test + public void testReaderGetRecordIterator() throws Exception { + writeFileWithSimpleSchema(); + try (HoodieAvroHFileReaderImplBase hfileReader = + (HoodieAvroHFileReaderImplBase) createReader(new Configuration())) { + List keys = + IntStream.concat(IntStream.range(40, NUM_RECORDS * 2), IntStream.range(10, 20)) + .mapToObj(i -> "key" + String.format("%02d", i)).collect(Collectors.toList()); + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + Iterator> iterator = + hfileReader.getRecordsByKeysIterator(keys, avroSchema); + + List expectedIds = + IntStream.concat(IntStream.range(40, NUM_RECORDS), IntStream.range(10, 20)) + .boxed().collect(Collectors.toList()); + int index = 0; + while (iterator.hasNext()) { + GenericRecord record = (GenericRecord) iterator.next().getData(); + String key = "key" + String.format("%02d", expectedIds.get(index)); + assertEquals(key, record.get("_row_key").toString()); + assertEquals(Integer.toString(expectedIds.get(index)), record.get("time").toString()); + assertEquals(expectedIds.get(index), record.get("number")); + index++; + } + } + } + + @Test + public void testReaderGetRecordIteratorByKeys() throws Exception { + writeFileWithSimpleSchema(); + try (HoodieAvroHFileReaderImplBase hfileReader = + (HoodieAvroHFileReaderImplBase) createReader(new Configuration())) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + + List keys = Collections.singletonList("key"); + Iterator iterator = + hfileReader.getIndexedRecordsByKeysIterator(keys, avroSchema); + + List recordsByKeys = + toStream(iterator).map(r -> (GenericRecord) r).collect(Collectors.toList()); + + List allRecords = toStream(hfileReader.getRecordIterator()) + .map(r -> (GenericRecord) r.getData()).collect(Collectors.toList()); + + // no entries should match since this is exact match. + assertEquals(Collections.emptyList(), recordsByKeys); + + // filter for "key00001, key05, key12, key24, key2, key31, key49, key61, key50". Valid entries should be matched. + // key00001 should not match. + // key2 : we don't have an exact match + // key61 is greater than max key. + // again, by the time we reach key50, cursor is at EOF. So no entries will be returned. + List expectedKey1s = allRecords.stream().filter(entry -> ( + (entry.get("_row_key").toString()).contains("key05") + || (entry.get("_row_key").toString()).contains("key12") + || (entry.get("_row_key").toString()).contains("key24") + || (entry.get("_row_key").toString()).contains("key31") + || (entry.get("_row_key").toString()).contains("key49"))).collect(Collectors.toList()); + iterator = + hfileReader.getIndexedRecordsByKeysIterator( + Arrays.asList("key00001", "key05", "key12", "key24", "key31", "key49", "key61", "key50"), + avroSchema); + recordsByKeys = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(expectedKey1s, recordsByKeys); + } + } + + @Test + public void testReaderGetRecordIteratorByKeyPrefixes() throws Exception { + writeFileWithSimpleSchema(); + try (HoodieAvroHFileReaderImplBase hfileReader = + (HoodieAvroHFileReaderImplBase) createReader(new Configuration())) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + + List keyPrefixes = Collections.singletonList("key"); + Iterator iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(keyPrefixes, avroSchema); + + List recordsByPrefix = + toStream(iterator).map(r -> (GenericRecord) r).collect(Collectors.toList()); + + List allRecords = toStream(hfileReader.getRecordIterator()) + .map(r -> (GenericRecord) r.getData()).collect(Collectors.toList()); + + assertEquals(allRecords, recordsByPrefix); + + // filter for "key1" : entries from key10 to key19 should be matched + List expectedKey1s = + allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")) + .collect(Collectors.toList()); + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key1"), + avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), + false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(expectedKey1s, recordsByPrefix); + + // exact match + List expectedKey25 = + allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key25")) + .collect(Collectors.toList()); + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key25"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(expectedKey25, recordsByPrefix); + + // no match. key prefix is beyond entries in file. + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key99"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(Collections.emptyList(), recordsByPrefix); + + // no match. but keyPrefix is in between the entries found in file. + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Collections.singletonList("key1234"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(Collections.emptyList(), recordsByPrefix); + + // filter for "key1", "key30" and "key60" : entries from 'key10 to key19' and 'key30' should be matched. + List expectedKey50and1s = + allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1") + || (entry.get("_row_key").toString()).contains("key30")).collect(Collectors.toList()); + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key1", "key30", "key6"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(expectedKey50and1s, recordsByPrefix); + + // filter for "key50" and "key0" : entries from key50 and 'key00 to key09' should be matched. + List expectedKey50and0s = + allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0") + || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList()); + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key0", "key50"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + assertEquals(expectedKey50and0s, recordsByPrefix); + + // filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched. + List expectedKey1sand0s = allRecords.stream() + .filter(entry -> (entry.get("_row_key").toString()).contains("key1") + || (entry.get("_row_key").toString()).contains("key0")) + .collect(Collectors.toList()); + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key0", "key1"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + Collections.sort(recordsByPrefix, new Comparator() { + @Override + public int compare(GenericRecord o1, GenericRecord o2) { + return o1.get("_row_key").toString().compareTo(o2.get("_row_key").toString()); + } + }); + assertEquals(expectedKey1sand0s, recordsByPrefix); + + // We expect the keys to be looked up in sorted order. If not, matching entries may not be returned. + // key1 should have matching entries, but not key0. + iterator = + hfileReader.getIndexedRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema); + recordsByPrefix = + StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(r -> (GenericRecord) r) + .collect(Collectors.toList()); + Collections.sort(recordsByPrefix, new Comparator() { + @Override + public int compare(GenericRecord o1, GenericRecord o2) { + return o1.get("_row_key").toString().compareTo(o2.get("_row_key").toString()); + } + }); + assertEquals(expectedKey1s, recordsByPrefix); + } + } + + @ParameterizedTest + @ValueSource(strings = { + "/hfile/hudi_0_9_hbase_1_2_3", "/hfile/hudi_0_10_hbase_1_2_3", "/hfile/hudi_0_11_hbase_2_4_9"}) + public void testHoodieHFileCompatibility(String hfilePrefix) throws IOException { + // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadPrimitiveRecord() + // using different Hudi releases + String simpleHFile = hfilePrefix + SIMPLE_SCHEMA_HFILE_SUFFIX; + // This fixture is generated from TestHoodieReaderWriterBase#testWriteReadComplexRecord() + // using different Hudi releases + String complexHFile = hfilePrefix + COMPLEX_SCHEMA_HFILE_SUFFIX; + // This fixture is generated from TestBootstrapIndex#testBootstrapIndex() + // using different Hudi releases. The file is copied from .hoodie/.aux/.bootstrap/.partitions/ + String bootstrapIndexFile = hfilePrefix + BOOTSTRAP_INDEX_HFILE_SUFFIX; + + FileSystem fs = HadoopFSUtils.getFs(getFilePath().toString(), new Configuration()); + byte[] content = readHFileFromResources(simpleHFile); + verifyHFileReader( + content, hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE); + + Configuration hadoopConf = fs.getConf(); + try (HoodieAvroHFileReaderImplBase hfileReader = createHFileReader(hadoopConf, content)) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchema.avsc"); + assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords()); + verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); + } + + content = readHFileFromResources(complexHFile); + verifyHFileReader( + content, hfilePrefix, true, HFILE_COMPARATOR.getClass(), NUM_RECORDS_FIXTURE); + try (HoodieAvroHFileReaderImplBase hfileReader = createHFileReader(hadoopConf, content)) { + Schema avroSchema = + getSchemaFromResource(TestHoodieReaderWriterBase.class, "/exampleSchemaWithUDT.avsc"); + assertEquals(NUM_RECORDS_FIXTURE, hfileReader.getTotalRecords()); + verifySimpleRecords(hfileReader.getRecordIterator(avroSchema)); + } + + content = readHFileFromResources(bootstrapIndexFile); + verifyHFileReader( + content, hfilePrefix, false, HFileBootstrapIndex.HoodieKVComparator.class, 4); + } + + private Set getRandomKeys(int count, List keys) { + Set rowKeys = new HashSet<>(); + int totalKeys = keys.size(); + while (rowKeys.size() < count) { + int index = RANDOM.nextInt(totalKeys); + if (!rowKeys.contains(index)) { + rowKeys.add(keys.get(index)); + } + } + return rowKeys; + } +} diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java index 98614be25c3e..e2d199498c1d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieOrcReaderWriter.java @@ -38,6 +38,7 @@ import java.util.function.Supplier; import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; import static org.apache.hudi.io.storage.HoodieOrcConfig.AVRO_SCHEMA_METADATA_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -70,7 +71,8 @@ protected HoodieAvroOrcWriter createWriter( @Override protected HoodieAvroFileReader createReader( Configuration conf) throws Exception { - return (HoodieAvroFileReader) HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(conf, getFilePath()); + return (HoodieAvroFileReader) HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, conf, getFilePath()); } @Override diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java index 6a5f3cd46b76..a0ec0dfdb89c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHoodieReaderWriterUtils.java @@ -74,7 +74,7 @@ static void writeHFileForTesting(String fileLocation, } } writer.appendFileInfo(getUTF8Bytes(CUSTOM_META_KEY), getUTF8Bytes(CUSTOM_META_VALUE)); - writer.appendMetaBlock(HoodieAvroHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() { + writer.appendMetaBlock(HoodieNativeAvroHFileReader.KEY_BLOOM_FILTER_META_BLOCK, new Writable() { @Override public void write(DataOutput out) throws IOException { out.write(getUTF8Bytes(DUMMY_BLOOM_FILTER)); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java index 6aa5dd9acbac..ecfc26a10dc7 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/clustering/ClusteringOperator.java @@ -273,7 +273,8 @@ private Iterator readRecordsForGroupWithLogs(List try { Option baseFileReader = StringUtils.isNullOrEmpty(clusteringOp.getDataFilePath()) ? Option.empty() - : Option.of(HoodieFileReaderFactory.getReaderFactory(table.getConfig().getRecordMerger().getRecordType()).getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath()))); + : Option.of(HoodieFileReaderFactory.getReaderFactory(table.getConfig().getRecordMerger().getRecordType()) + .getFileReader(table.getConfig(), table.getHadoopConf(), new Path(clusteringOp.getDataFilePath()))); HoodieMergedLogRecordScanner scanner = HoodieMergedLogRecordScanner.newBuilder() .withFileSystem(table.getMetaClient().getFs()) .withBasePath(table.getMetaClient().getBasePath()) @@ -321,7 +322,8 @@ private Iterator readRecordsForGroupBaseFiles(List Iterable indexedRecords = () -> { try { HoodieFileReaderFactory fileReaderFactory = HoodieFileReaderFactory.getReaderFactory(table.getConfig().getRecordMerger().getRecordType()); - HoodieAvroFileReader fileReader = (HoodieAvroFileReader) fileReaderFactory.getFileReader(table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())); + HoodieAvroFileReader fileReader = (HoodieAvroFileReader) fileReaderFactory + .getFileReader(table.getConfig(), table.getHadoopConf(), new Path(clusteringOp.getDataFilePath())); return new CloseableMappingIterator<>(fileReader.getRecordIterator(readerSchema), HoodieRecord::getData); } catch (IOException e) { diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java index 2fda963f8de6..44b8b57b46dd 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileRecordReader.java @@ -18,11 +18,19 @@ package org.apache.hudi.hadoop; +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.model.HoodieFileFormat; +import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils; +import org.apache.hudi.io.storage.HoodieFileReader; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; + import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -30,25 +38,25 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; -import org.apache.hudi.common.model.HoodieRecord; -import org.apache.hudi.common.util.collection.ClosableIterator; -import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils; -import org.apache.hudi.io.storage.HoodieAvroHFileReader; import java.io.IOException; +import static org.apache.hudi.common.util.ConfigUtils.getReaderConfigs; + public class HoodieHFileRecordReader implements RecordReader { private long count = 0; private ArrayWritable valueObj; - private HoodieAvroHFileReader reader; + private HoodieFileReader reader; private ClosableIterator> recordIterator; private Schema schema; public HoodieHFileRecordReader(Configuration conf, InputSplit split, JobConf job) throws IOException { FileSplit fileSplit = (FileSplit) split; Path path = fileSplit.getPath(); - reader = new HoodieAvroHFileReader(conf, path, new CacheConfig(conf)); + HoodieConfig hoodieConfig = getReaderConfigs(conf); + reader = HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO) + .getFileReader(hoodieConfig, conf, path, HoodieFileFormat.HFILE, Option.empty()); schema = reader.getSchema(); valueObj = new ArrayWritable(Writable.class, new Writable[schema.getFields().size()]); diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java index a6d1cf66acb8..539bc21eb88b 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java @@ -18,6 +18,7 @@ package org.apache.hudi.hadoop.utils; +import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieException; @@ -64,6 +65,7 @@ import static org.apache.hudi.avro.AvroSchemaUtils.appendFieldsToSchema; import static org.apache.hudi.avro.AvroSchemaUtils.createNullableSchema; +import static org.apache.hudi.common.util.ConfigUtils.getReaderConfigs; public class HoodieRealtimeRecordReaderUtils { private static final Logger LOG = LoggerFactory.getLogger(HoodieRealtimeRecordReaderUtils.class); @@ -303,7 +305,8 @@ public static Schema addPartitionFields(Schema schema, List partitioning } public static HoodieFileReader getBaseFileReader(Path path, JobConf conf) throws IOException { - return HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO).getFileReader(conf, path); + HoodieConfig hoodieConfig = getReaderConfigs(conf); + return HoodieFileReaderFactory.getReaderFactory(HoodieRecord.HoodieRecordType.AVRO).getFileReader(hoodieConfig, conf, path); } private static Schema appendNullSchemaFields(Schema schema, List newFieldNames) { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java index 4207e3bf1138..d5f8fa38b5e1 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java @@ -19,6 +19,7 @@ package org.apache.hudi.hadoop.testutils; import org.apache.hudi.avro.HoodieAvroUtils; +import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieAvroIndexedRecord; import org.apache.hudi.common.model.HoodieFileFormat; @@ -398,7 +399,7 @@ public static HoodieLogFormat.Writer writeDataBlockToLogFile(File partitionDir, List hoodieRecords = records.stream().map(HoodieAvroIndexedRecord::new).collect(Collectors.toList()); if (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK) { dataBlock = new HoodieHFileDataBlock( - hoodieRecords, header, Compression.Algorithm.GZ, writer.getLogFile().getPath()); + hoodieRecords, header, Compression.Algorithm.GZ, writer.getLogFile().getPath(), HoodieReaderConfig.USE_NATIVE_HFILE_READER.defaultValue()); } else if (logBlockType == HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK) { dataBlock = new HoodieParquetDataBlock(hoodieRecords, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP, 0.1, true); } else { diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java index a2716d0e73a3..02d534d5b98f 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java @@ -72,6 +72,7 @@ import static java.util.Map.Entry.comparingByValue; import static java.util.stream.Collectors.toMap; +import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; /** * This class helps to generate updates from an already existing hoodie dataset. It supports generating updates in across partitions, files and records. @@ -271,8 +272,8 @@ private Iterator readColumnarOrLogFiles(FileSlice fileSlice) thro if (fileSlice.getBaseFile().isPresent()) { // Read the base files using the latest writer schema. Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr)); - HoodieAvroFileReader reader = TypeUtils.unsafeCast(HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(metaClient.getHadoopConf(), - new Path(fileSlice.getBaseFile().get().getPath()))); + HoodieAvroFileReader reader = TypeUtils.unsafeCast(HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader( + DEFAULT_HUDI_CONFIG_FOR_READER, metaClient.getHadoopConf(), new Path(fileSlice.getBaseFile().get().getPath()))); return new CloseableMappingIterator<>(reader.getRecordIterator(schema), HoodieRecord::getData); } else { // If there is no data file, fall back to reading log files diff --git a/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java index 6b357c6c46c3..25470d47d43e 100644 --- a/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java @@ -7,13 +7,14 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.hudi.common.util; diff --git a/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java index 5143bd680b08..f033127d82e9 100644 --- a/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java @@ -109,6 +109,16 @@ public static byte[] getUTF8Bytes(String str) { return str.getBytes(StandardCharsets.UTF_8); } + public static String getStringFromUTF8Bytes(byte[] bytes) { + return getStringFromUTF8Bytes(bytes, 0, bytes.length); + } + + public static String getStringFromUTF8Bytes(byte[] bytes, + int offset, + int length) { + return new String(bytes, offset, length, StandardCharsets.UTF_8); + } + public static boolean isNullOrEmpty(String str) { return str == null || str.length() == 0; } diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java index 100ae4b5ce5b..b5921b8a4198 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileCursor.java @@ -71,6 +71,7 @@ public void setKeyValue(KeyValue keyValue) { public void setEof() { this.eof = true; + this.keyValue = Option.empty(); } public void unsetEof() { diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java index 7b3518bd2b27..95288c3885e5 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java @@ -27,6 +27,8 @@ import java.util.HashMap; import java.util.Map; +import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; + /** * Represents a {@link HFileBlockType#FILE_INFO} block. */ @@ -46,7 +48,7 @@ public HFileInfo readFileInfo() throws IOException { byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength) != 0) { throw new IOException( "Unexpected Protobuf magic at the beginning of the HFileFileInfoBlock: " - + new String(byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength)); + + getStringFromUTF8Bytes(byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength)); } ByteArrayInputStream inputStream = new ByteArrayInputStream( byteBuff, diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java index b792ba6eb321..87dafc9d8869 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileReaderImpl.java @@ -241,6 +241,9 @@ public boolean isSeeked() { @Override public void close() throws IOException { + currentDataBlockEntry = Option.empty(); + currentDataBlock = Option.empty(); + cursor.setEof(); stream.close(); } diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java index 8f100c351755..796baa4481dc 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java @@ -26,6 +26,8 @@ import java.util.HashMap; import java.util.Map; +import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; + /** * Util methods for reading and writing HFile. */ @@ -73,6 +75,38 @@ public static int compareKeys(Key key1, Key key2) { key2.getBytes(), key2.getContentOffset(), key2.getContentLength()); } + /** + * @param prefix the prefix to check + * @param key key to check + * @return whether the key starts with the prefix. + */ + public static boolean isPrefixOfKey(Key prefix, Key key) { + int prefixLength = prefix.getContentLength(); + int keyLength = key.getLength(); + if (prefixLength > keyLength) { + return false; + } + + byte[] prefixBytes = prefix.getBytes(); + byte[] keyBytes = key.getBytes(); + for (int i = 0; i < prefixLength; i++) { + if (prefixBytes[prefix.getContentOffset() + i] != keyBytes[key.getContentOffset() + i]) { + return false; + } + } + return true; + } + + /** + * Gets the value in String. + * + * @param kv {@link KeyValue} instance. + * @return the String with UTF-8 decoding. + */ + public static String getValue(KeyValue kv) { + return getStringFromUTF8Bytes(kv.getBytes(), kv.getValueOffset(), kv.getValueLength()); + } + /** * The ID mapping cannot change or else that breaks all existing HFiles out there, * even the ones that are not compressed! (They use the NONE algorithm) diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java index 5c00e43ab16f..1f4f35ac3498 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java @@ -21,6 +21,7 @@ import org.apache.hudi.io.util.IOUtils; +import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16; import static org.apache.hudi.io.hfile.HFileUtils.compareKeys; import static org.apache.hudi.io.util.IOUtils.readShort; @@ -64,6 +65,10 @@ public int getContentLength() { return readShort(bytes, getOffset()); } + public String getContentInString() { + return getStringFromUTF8Bytes(getBytes(), getContentOffset(), getContentLength()); + } + @Override public int hashCode() { // Only consider key content for hash code diff --git a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java index 96cc6df95cc8..8017c0eb96f5 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java @@ -19,10 +19,13 @@ package org.apache.hudi.io.util; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; /** * Util methods on I/O. @@ -265,4 +268,13 @@ public static void copy(InputStream inputStream, OutputStream outputStream) thro outputStream.write(buffer, 0, len); } } + + /** + * @param byteBuffer {@link ByteBuffer} containing the bytes. + * @return {@link DataInputStream} based on the byte buffer. + */ + public static DataInputStream getDataInputStream(ByteBuffer byteBuffer) { + return new DataInputStream(new ByteArrayInputStream( + byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit() - byteBuffer.arrayOffset())); + } } diff --git a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java index e0ee96261390..d9a1969c75d4 100644 --- a/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java +++ b/hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java @@ -44,6 +44,7 @@ import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_EOF; import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_FOUND; import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_IN_RANGE; +import static org.apache.hudi.io.hfile.HFileUtils.getValue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -571,10 +572,6 @@ private static void verifyHFileSeekToReads(HFileReader reader, } } - private static String getValue(KeyValue kv) { - return new String(kv.getBytes(), kv.getValueOffset(), kv.getValueLength()); - } - static class KeyLookUpInfo { private final String lookUpKey; private final int expectedSeekToResult; diff --git a/hudi-io/src/test/java/org/apache/hudi/io/util/TestHFileUtils.java b/hudi-io/src/test/java/org/apache/hudi/io/util/TestHFileUtils.java new file mode 100644 index 000000000000..e28fab8195e3 --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/io/util/TestHFileUtils.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io.util; + +import org.apache.hudi.io.hfile.UTF8StringKey; + +import org.junit.jupiter.api.Test; + +import static org.apache.hudi.io.hfile.HFileUtils.isPrefixOfKey; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests {@link TestHFileUtils}. + */ +public class TestHFileUtils { + @Test + public void testIsPrefixOfKey() { + assertTrue(isPrefixOfKey(new UTF8StringKey(""), new UTF8StringKey(""))); + assertTrue(isPrefixOfKey(new UTF8StringKey(""), new UTF8StringKey("abcdefg"))); + assertTrue(isPrefixOfKey(new UTF8StringKey("abc"), new UTF8StringKey("abcdefg"))); + assertTrue(isPrefixOfKey(new UTF8StringKey("abcdefg"), new UTF8StringKey("abcdefg"))); + assertFalse(isPrefixOfKey(new UTF8StringKey("abd"), new UTF8StringKey("abcdefg"))); + assertFalse(isPrefixOfKey(new UTF8StringKey("b"), new UTF8StringKey("abcdefg"))); + assertFalse(isPrefixOfKey(new UTF8StringKey("abcdefgh"), new UTF8StringKey("abcdefg"))); + } +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala index d2ba5a7a4bd4..32afe8c1182b 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala @@ -22,10 +22,13 @@ import org.apache.hudi.HoodieBaseRelation._ import org.apache.hudi.HoodieConversionUtils.toScalaOption import org.apache.hudi.avro.HoodieAvroUtils import org.apache.hudi.client.utils.SparkInternalSchemaConverter -import org.apache.hudi.common.config.{ConfigProperty, HoodieMetadataConfig, SerializableConfiguration} +import org.apache.hudi.common.config.{ConfigProperty, HoodieConfig, HoodieMetadataConfig, SerializableConfiguration} +import org.apache.hudi.common.config.HoodieReaderConfig.USE_NATIVE_HFILE_READER import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath import org.apache.hudi.common.model.{FileSlice, HoodieFileFormat, HoodieRecord} +import org.apache.hudi.common.model.HoodieFileFormat.HFILE +import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver} import org.apache.hudi.common.table.timeline.HoodieTimeline import org.apache.hudi.common.table.timeline.TimelineUtils.validateTimestampAsOf @@ -40,14 +43,13 @@ import org.apache.hudi.hadoop.fs.CachingPath import org.apache.hudi.internal.schema.{HoodieSchemaException, InternalSchema} import org.apache.hudi.internal.schema.convert.AvroInternalSchemaConverter import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper} -import org.apache.hudi.io.storage.HoodieAvroHFileReader +import org.apache.hudi.io.storage.HoodieFileReaderFactory import org.apache.hudi.metadata.HoodieTableMetadata import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} -import org.apache.hadoop.hbase.io.hfile.CacheConfig import org.apache.hadoop.mapred.JobConf import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex import org.apache.spark.internal.Logging @@ -754,7 +756,11 @@ object HoodieBaseRelation extends SparkAdapterSupport { partitionedFile => { val hadoopConf = hadoopConfBroadcast.value.get() val filePath = sparkAdapter.getSparkPartitionedFileUtils.getPathFromPartitionedFile(partitionedFile) - val reader = new HoodieAvroHFileReader(hadoopConf, filePath, new CacheConfig(hadoopConf)) + val hoodieConfig = new HoodieConfig() + hoodieConfig.setValue(USE_NATIVE_HFILE_READER, + options.getOrElse(USE_NATIVE_HFILE_READER.key(), USE_NATIVE_HFILE_READER.defaultValue().toString)) + val reader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(hoodieConfig, hadoopConf, filePath, HFILE) val requiredRowSchema = requiredDataSchema.structTypeSchema // NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index e8fbe611937e..f8607c42237d 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -22,6 +22,7 @@ import org.apache.hudi.avro.model.HoodieCleanerPlan; import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.bloom.BloomFilter; +import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.engine.HoodieEngineContext; @@ -47,6 +48,7 @@ import org.apache.hudi.common.table.view.FileSystemViewManager; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.util.CleanerUtils; +import org.apache.hudi.common.util.ConfigUtils; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ParquetUtils; @@ -87,6 +89,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Properties; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; @@ -95,6 +98,7 @@ import scala.Tuple2; +import static org.apache.hudi.common.config.HoodieReaderConfig.USE_NATIVE_HFILE_READER; import static org.apache.hudi.common.model.HoodieRecord.FILENAME_METADATA_FIELD; import static org.apache.hudi.common.model.HoodieRecord.PARTITION_PATH_METADATA_FIELD; import static org.apache.hudi.common.model.HoodieRecord.RECORD_KEY_METADATA_FIELD; @@ -491,9 +495,9 @@ public boolean doMetadataTableValidation() { } try (HoodieMetadataValidationContext metadataTableBasedContext = - new HoodieMetadataValidationContext(engineContext, cfg, metaClient, true); + new HoodieMetadataValidationContext(engineContext, props, metaClient, true, cfg.assumeDatePartitioning); HoodieMetadataValidationContext fsBasedContext = - new HoodieMetadataValidationContext(engineContext, cfg, metaClient, false)) { + new HoodieMetadataValidationContext(engineContext, props, metaClient, false, cfg.assumeDatePartitioning)) { Set finalBaseFilesForCleaning = baseFilesForCleaning; List> result = new ArrayList<>( engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> { @@ -1267,6 +1271,7 @@ private static class HoodieMetadataValidationContext implements AutoCloseable, S private static final Logger LOG = LoggerFactory.getLogger(HoodieMetadataValidationContext.class); + private final Properties props; private final HoodieTableMetaClient metaClient; private final HoodieTableFileSystemView fileSystemView; private final HoodieTableMetadata tableMetadata; @@ -1274,8 +1279,9 @@ private static class HoodieMetadataValidationContext implements AutoCloseable, S private List allColumnNameList; public HoodieMetadataValidationContext( - HoodieEngineContext engineContext, Config cfg, HoodieTableMetaClient metaClient, - boolean enableMetadataTable) { + HoodieEngineContext engineContext, Properties props, HoodieTableMetaClient metaClient, + boolean enableMetadataTable, boolean assumeDatePartitioning) { + this.props = props; this.metaClient = metaClient; this.enableMetadataTable = enableMetadataTable; HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder() @@ -1283,7 +1289,7 @@ public HoodieMetadataValidationContext( .withMetadataIndexBloomFilter(enableMetadataTable) .withMetadataIndexColumnStats(enableMetadataTable) .withEnableRecordIndex(enableMetadataTable) - .withAssumeDatePartitioning(cfg.assumeDatePartitioning) + .withAssumeDatePartitioning(assumeDatePartitioning) .build(); this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, metaClient, metadataConfig); @@ -1378,7 +1384,11 @@ private List getAllColumnNames() { private Option readBloomFilterFromFile(String partitionPath, String filename) { Path path = new Path(FSUtils.getPartitionPath(metaClient.getBasePathV2(), partitionPath), filename); BloomFilter bloomFilter; - try (HoodieFileReader fileReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO).getFileReader(metaClient.getHadoopConf(), path)) { + HoodieConfig hoodieConfig = new HoodieConfig(); + hoodieConfig.setValue(USE_NATIVE_HFILE_READER, + Boolean.toString(ConfigUtils.getBooleanWithAltKeys(props, USE_NATIVE_HFILE_READER))); + try (HoodieFileReader fileReader = HoodieFileReaderFactory.getReaderFactory(HoodieRecordType.AVRO) + .getFileReader(hoodieConfig, metaClient.getHadoopConf(), path)) { bloomFilter = fileReader.readBloomFilter(); if (bloomFilter == null) { LOG.error("Failed to read bloom filter for " + path); diff --git a/pom.xml b/pom.xml index ab51c9988f37..7d87df764fbe 100644 --- a/pom.xml +++ b/pom.xml @@ -477,6 +477,8 @@ org.apache.htrace:htrace-core4 com.fasterxml.jackson.module:jackson-module-afterburner + + com.google.protobuf:protobuf-java @@ -577,6 +579,10 @@ org.apache.hudi.com.fasterxml.jackson.module + + com.google.protobuf. + org.apache.hudi.com.google.protobuf. + From 8fda1515875893f06dca1afde67accedd0cf678c Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Mon, 29 Jan 2024 09:24:48 -0800 Subject: [PATCH 086/112] [HUDI-6902] Disable a flaky test (#10551) --- .../apache/hudi/utils/HoodieWriterClientTestHarness.java | 4 +--- .../apache/hudi/client/TestJavaHoodieBackedMetadata.java | 3 +-- .../apache/hudi/testutils/HoodieJavaClientTestHarness.java | 6 ++---- .../hudi/client/functional/TestConsistentBucketIndex.java | 5 +---- .../TestDataValidationCheckForLogCompactionActions.java | 4 +--- .../hudi/client/functional/TestHoodieBackedMetadata.java | 4 +--- .../org/apache/hudi/client/functional/TestHoodieIndex.java | 7 +++---- .../apache/hudi/io/TestHoodieKeyLocationFetchHandle.java | 5 +---- .../hudi/table/action/cluster/ClusteringTestUtils.java | 3 +-- .../hudi/table/action/compact/CompactionTestBase.java | 5 +---- .../rollback/TestMergeOnReadRollbackActionExecutor.java | 4 ++-- .../java/org/apache/hudi/functional/TestBootstrap.java | 2 ++ .../java/org/apache/hudi/functional/TestOrcBootstrap.java | 2 ++ .../functional/TestSparkConsistentBucketClustering.java | 5 +---- .../hudi/functional/TestSparkSortAndSizeClustering.java | 5 +---- 15 files changed, 21 insertions(+), 43 deletions(-) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/HoodieWriterClientTestHarness.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/HoodieWriterClientTestHarness.java index 28173acd3aeb..bf7a3e33bf07 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/HoodieWriterClientTestHarness.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/HoodieWriterClientTestHarness.java @@ -27,7 +27,6 @@ import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.RawTripTestPayload; @@ -160,8 +159,7 @@ public HoodieWriteConfig.Builder getConfigBuilder(String schemaStr, HoodieIndex. .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build()) .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server - .withRemoteServerPort(timelineServicePort) - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withRemoteServerPort(timelineServicePort).build()); if (StringUtils.nonEmpty(schemaStr)) { builder.withSchema(schemaStr); } diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java index 2dc54cb75ad3..636eb7e7a342 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java @@ -63,7 +63,6 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.table.view.TableFileSystemView; import org.apache.hudi.common.testutils.FileCreateUtils; @@ -2487,7 +2486,7 @@ public HoodieWriteConfig.Builder getConfigBuilder(String schemaStr, HoodieIndex. .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build()) .withEmbeddedTimelineServerEnabled(false).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .build()); } @Test diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java index 0fab5b811d14..3819ac365dc7 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java @@ -45,7 +45,6 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.table.view.SyncableFileSystemView; import org.apache.hudi.common.table.view.TableFileSystemView; @@ -133,7 +132,7 @@ public static void tearDownAll() throws IOException { @BeforeEach protected void initResources() throws IOException { basePath = tempDir.resolve("java_client_tests" + System.currentTimeMillis()).toAbsolutePath().toUri().getPath(); - hadoopConf = new Configuration(); + hadoopConf = new Configuration(false); taskContextSupplier = new TestJavaTaskContextSupplier(); context = new HoodieJavaEngineContext(hadoopConf, taskContextSupplier); initFileSystem(basePath, hadoopConf); @@ -999,8 +998,7 @@ public HoodieWriteConfig.Builder getConfigBuilder(String schemaStr, HoodieIndex. .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build()) .withEmbeddedTimelineServerEnabled(false).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server - .withRemoteServerPort(timelineServicePort) - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withRemoteServerPort(timelineServicePort).build()); if (StringUtils.nonEmpty(schemaStr)) { builder.withSchema(schemaStr); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestConsistentBucketIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestConsistentBucketIndex.java index b23259c12645..efab3975d72b 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestConsistentBucketIndex.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestConsistentBucketIndex.java @@ -27,8 +27,6 @@ import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.Option; @@ -309,7 +307,6 @@ public HoodieWriteConfig.Builder getConfigBuilder() { .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1024 * 1024).parquetMaxFileSize(1024 * 1024).build()) .forTable("test-trip-table") - .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withEmbeddedTimelineServerEnabled(true); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestDataValidationCheckForLogCompactionActions.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestDataValidationCheckForLogCompactionActions.java index 635f1c651ac6..d72e45b023d4 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestDataValidationCheckForLogCompactionActions.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestDataValidationCheckForLogCompactionActions.java @@ -32,7 +32,6 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.RawTripTestPayload; @@ -407,8 +406,7 @@ private HoodieWriteConfig.Builder getConfigBuilderForSecondTable(String tableNam .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build()) .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server - .withRemoteServerPort(timelineServicePort) - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()) + .withRemoteServerPort(timelineServicePort).build()) .withProperties(properties); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index 511c34eb656b..3370cfd6410d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -65,7 +65,6 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.table.view.TableFileSystemView; import org.apache.hudi.common.testutils.FileCreateUtils; @@ -3125,8 +3124,7 @@ public HoodieWriteConfig.Builder getConfigBuilder(String schemaStr, HoodieIndex. .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build()) .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server - .withRemoteServerPort(timelineServicePort) - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()) + .withRemoteServerPort(timelineServicePort).build()) .withProperties(properties); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java index 4b45fa460759..44cc394df148 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java @@ -35,8 +35,6 @@ import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; import org.apache.hudi.common.table.timeline.HoodieTimeline; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.RawTripTestPayload; @@ -63,6 +61,7 @@ import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -403,6 +402,7 @@ private static Stream regularIndexTypeParams() { return Stream.of(data).map(Arguments::of); } + @Disabled("HUDI-7353") @ParameterizedTest @MethodSource("regularIndexTypeParams") public void testTagLocationAndFetchRecordLocations(IndexType indexType, boolean populateMetaFields, boolean enableMetadataIndex) throws Exception { @@ -645,8 +645,7 @@ public HoodieWriteConfig.Builder getConfigBuilder() { .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1024 * 1024).parquetMaxFileSize(1024 * 1024).build()) .forTable("test-trip-table") - .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withEmbeddedTimelineServerEnabled(true); } private JavaPairRDD>> getRecordLocations(JavaRDD keyRDD, HoodieTable hoodieTable) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieKeyLocationFetchHandle.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieKeyLocationFetchHandle.java index 3e2620c1e4b3..756f37481572 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieKeyLocationFetchHandle.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieKeyLocationFetchHandle.java @@ -26,8 +26,6 @@ import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecordLocation; import org.apache.hudi.common.model.HoodieTableType; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; @@ -173,7 +171,6 @@ public HoodieWriteConfig.Builder getConfigBuilder() { .withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1024 * 1024).parquetMaxFileSize(1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().build()) - .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withEmbeddedTimelineServerEnabled(true); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/ClusteringTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/ClusteringTestUtils.java index fb0d00853129..94687069e885 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/ClusteringTestUtils.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/ClusteringTestUtils.java @@ -29,7 +29,6 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.CompactionTestUtils; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.ClusteringUtils; @@ -97,7 +96,7 @@ public static HoodieWriteConfig getClusteringConfig(String basePath, String sche .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server .withRemoteServerPort(timelineServicePort) - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()) + .build()) .withClusteringConfig(clusteringConfig) .withPreCommitValidatorConfig(HoodiePreCommitValidatorConfig.newBuilder() .withPreCommitValidator(SqlQueryEqualityPreCommitValidator.class.getName()) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java index 551533bb894c..5596b433d4f4 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/CompactionTestBase.java @@ -32,8 +32,6 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.HoodieTableFileSystemView; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestTable; @@ -80,8 +78,7 @@ protected HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) { .hfileMaxFileSize(1024 * 1024 * 1024).parquetMaxFileSize(1024 * 1024 * 1024).orcMaxFileSize(1024 * 1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) - .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withEmbeddedTimelineServerEnabled(true); } /** diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java index f0f2a5e651ab..426f7e489d42 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java @@ -33,7 +33,6 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.table.view.SyncableFileSystemView; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.StringUtils; @@ -249,7 +248,8 @@ public void testRollbackForCanIndexLogFile() throws IOException { .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()) .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()).withRollbackUsingMarkers(false).withAutoCommit(false).build(); + .build()) + .withRollbackUsingMarkers(false).withAutoCommit(false).build(); //1. prepare data new HoodieTestDataGenerator().writePartitionMetadata(fs, new String[] {DEFAULT_FIRST_PARTITION_PATH}, basePath); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java index c3baf0f52354..ca2472590169 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java @@ -85,6 +85,7 @@ import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -115,6 +116,7 @@ /** * Tests Bootstrap Client functionality. */ +@Disabled("HUDI-7353") @Tag("functional") public class TestBootstrap extends HoodieSparkClientTestBase { diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java index 54857e78eb74..8ee712599533 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java @@ -78,6 +78,7 @@ import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -105,6 +106,7 @@ /** * Tests Bootstrap Client functionality. */ +@Disabled("HUDI-7353") @Tag("functional") public class TestOrcBootstrap extends HoodieSparkClientTestBase { diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkConsistentBucketClustering.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkConsistentBucketClustering.java index c965cf5b078f..8d321204aa62 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkConsistentBucketClustering.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkConsistentBucketClustering.java @@ -31,8 +31,6 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.Option; @@ -360,8 +358,7 @@ public HoodieWriteConfig.Builder getConfigBuilder() { .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1024 * 1024).parquetMaxFileSize(1024 * 1024).build()) .forTable("test-trip-table") - .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withEmbeddedTimelineServerEnabled(true); } private static Stream configParams() { diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkSortAndSizeClustering.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkSortAndSizeClustering.java index 1898a276a9f6..fee3ecadda65 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkSortAndSizeClustering.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkSortAndSizeClustering.java @@ -28,8 +28,6 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieTimeline; -import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; -import org.apache.hudi.common.table.view.FileSystemViewStorageType; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.ClusteringUtils; @@ -162,7 +160,6 @@ public HoodieWriteConfig.Builder getConfigBuilder() { .withParallelism(2, 2) .withWriteStatusClass(MetadataMergeWriteStatus.class) .forTable("clustering-table") - .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() - .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()); + .withEmbeddedTimelineServerEnabled(true); } } From 90ca4f02aede7fe9d34f776d5a00c70e8eff18c1 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 19:40:34 -0800 Subject: [PATCH 087/112] [HUDI-7346] Remove usage of org.apache.hadoop.hbase.util.Bytes (#10574) --- .../hudi/cli/commands/TestTableCommand.java | 4 +- .../index/hbase/SparkHoodieHBaseIndex.java | 33 ++++---- .../hbase/TestSparkHoodieHBaseIndex.java | 4 +- .../hudi/avro/GenericAvroSerializer.java | 4 +- .../bootstrap/index/HFileBootstrapIndex.java | 13 ++- .../common/model/HoodieCommitMetadata.java | 7 +- .../HoodieConsistentHashingMetadata.java | 4 +- .../model/HoodieReplaceCommitMetadata.java | 5 +- .../debezium/PostgresDebeziumAvroPayload.java | 5 +- .../common/table/log/HoodieLogFileReader.java | 4 +- .../table/log/block/HoodieAvroDataBlock.java | 4 +- .../hudi/common/util/Base64CodecUtil.java | 4 +- .../apache/hudi/common/util/hash/HashID.java | 6 +- ...FileBasedInternalSchemaStorageManager.java | 4 +- .../HoodieAvroHFileReaderImplBase.java | 4 +- .../storage/HoodieNativeAvroHFileReader.java | 10 +-- ...TestInLineFileSystemHFileInLiningBase.java | 6 +- ...tInLineFileSystemWithHBaseHFileReader.java | 17 ++-- .../TestPostgresDebeziumAvroPayload.java | 6 +- .../apache/hudi/hadoop/InputSplitUtils.java | 4 +- .../apache/hudi/common/util/StringUtils.java | 16 ++-- .../hudi/io/hfile/HFileFileInfoBlock.java | 4 +- .../org/apache/hudi/io/hfile/HFileUtils.java | 4 +- .../java/org/apache/hudi/io/hfile/Key.java | 4 +- .../java/org/apache/hudi/io/util/IOUtils.java | 81 +++++++++++++++++++ .../org/apache/hudi/io/util/TestIOUtils.java | 28 +++++++ .../store/TestRelationalDBBasedStore.java | 9 ++- .../hudi/cli/HDFSParquetImporterUtils.java | 5 +- .../helpers/TestProtoConversionUtil.java | 4 +- 29 files changed, 212 insertions(+), 91 deletions(-) diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java index c1c44f625188..2eed406c6697 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java @@ -46,7 +46,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; @@ -55,6 +54,7 @@ import java.util.Map; import static org.apache.hudi.common.table.HoodieTableMetaClient.METAFOLDER_NAME; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -285,6 +285,6 @@ private String getFileContent(String fileToReadStr) throws IOException { byte[] data = new byte[(int) fileToRead.length()]; fis.read(data); fis.close(); - return new String(data, StandardCharsets.UTF_8); + return fromUTF8Bytes(data); } } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java index 43af6dda0d4a..097e3decc2fb 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java @@ -61,7 +61,6 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.security.UserGroupInformation; import org.apache.spark.Partitioner; import org.apache.spark.SparkConf; @@ -96,6 +95,8 @@ import static org.apache.hadoop.hbase.security.SecurityConstants.REGIONSERVER_KRB_PRINCIPAL; import static org.apache.hadoop.hbase.security.User.HBASE_SECURITY_AUTHORIZATION_CONF_KEY; import static org.apache.hadoop.hbase.security.User.HBASE_SECURITY_CONF_KEY; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Hoodie Index implementation backed by HBase. @@ -107,10 +108,10 @@ public class SparkHoodieHBaseIndex extends HoodieIndex { public static final String DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME = "spark.dynamicAllocation.maxExecutors"; - private static final byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s"); - private static final byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts"); - private static final byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name"); - private static final byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path"); + private static final byte[] SYSTEM_COLUMN_FAMILY = getUTF8Bytes("_s"); + private static final byte[] COMMIT_TS_COLUMN = getUTF8Bytes("commit_ts"); + private static final byte[] FILE_NAME_COLUMN = getUTF8Bytes("file_name"); + private static final byte[] PARTITION_PATH_COLUMN = getUTF8Bytes("partition_path"); private static final Logger LOG = LoggerFactory.getLogger(SparkHoodieHBaseIndex.class); private static Connection hbaseConnection = null; @@ -217,7 +218,7 @@ public void close() { } private Get generateStatement(String key) throws IOException { - return new Get(Bytes.toBytes(getHBaseKey(key))).readVersions(1).addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN) + return new Get(getUTF8Bytes(getHBaseKey(key))).readVersions(1).addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN) .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN).addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN); } @@ -272,10 +273,10 @@ private Function2>, Iterator, Iterator> updateL // This is an update, no need to update index continue; } - Put put = new Put(Bytes.toBytes(getHBaseKey(recordDelegate.getRecordKey()))); - put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN, Bytes.toBytes(loc.get().getInstantTime())); - put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, Bytes.toBytes(loc.get().getFileId())); - put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, Bytes.toBytes(recordDelegate.getPartitionPath())); + Put put = new Put(getUTF8Bytes(getHBaseKey(recordDelegate.getRecordKey()))); + put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN, getUTF8Bytes(loc.get().getInstantTime())); + put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, getUTF8Bytes(loc.get().getFileId())); + put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, getUTF8Bytes(recordDelegate.getPartitionPath())); mutations.add(put); } else { // Delete existing index for a deleted record - Delete delete = new Delete(Bytes.toBytes(getHBaseKey(recordDelegate.getRecordKey()))); + Delete delete = new Delete(getUTF8Bytes(getHBaseKey(recordDelegate.getRecordKey()))); mutations.add(delete); } } @@ -616,7 +617,7 @@ public boolean rollbackCommit(String instantTime) { while (scannerIterator.hasNext()) { Result result = scannerIterator.next(); currentVersionResults.add(result); - statements.add(generateStatement(Bytes.toString(result.getRow()), 0L, rollbackTime - 1)); + statements.add(generateStatement(fromUTF8Bytes(result.getRow()), 0L, rollbackTime - 1)); if (scannerIterator.hasNext() && statements.size() < multiGetBatchSize) { continue; diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java index 4b0666934cf4..6e6177626005 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java @@ -57,7 +57,6 @@ import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.spark.api.java.JavaRDD; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -85,6 +84,7 @@ import static org.apache.hadoop.hbase.HConstants.ZOOKEEPER_CLIENT_PORT; import static org.apache.hadoop.hbase.HConstants.ZOOKEEPER_QUORUM; import static org.apache.hadoop.hbase.HConstants.ZOOKEEPER_ZNODE_PARENT; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -125,7 +125,7 @@ public static void init() throws Exception { utility = new HBaseTestingUtility(hbaseConfig); utility.startMiniCluster(); hbaseConfig = utility.getConnection().getConfiguration(); - utility.createTable(TableName.valueOf(TABLE_NAME), Bytes.toBytes("_s"),2); + utility.createTable(TableName.valueOf(TABLE_NAME), getUTF8Bytes("_s"), 2); } @AfterAll diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java b/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java index ec747d662d88..c1eee68d81c4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/GenericAvroSerializer.java @@ -35,9 +35,9 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.HashMap; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; @@ -81,7 +81,7 @@ private Schema getSchema(byte[] schemaBytes) { if (schemaCache.containsKey(schemaByteBuffer)) { return schemaCache.get(schemaByteBuffer); } else { - String schema = new String(schemaBytes, StandardCharsets.UTF_8); + String schema = fromUTF8Bytes(schemaBytes); Schema parsedSchema = new Schema.Parser().parse(schema); schemaCache.put(schemaByteBuffer, parsedSchema); return parsedSchema; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index 82905ff95aab..b8df453d4032 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -54,7 +54,6 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.io.hfile.HFileScanner; -import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -101,7 +100,7 @@ public class HFileBootstrapIndex extends BootstrapIndex { // Additional Metadata written to HFiles. public static final String INDEX_INFO_KEY_STRING = "INDEX_INFO"; - public static final byte[] INDEX_INFO_KEY = Bytes.toBytes(INDEX_INFO_KEY_STRING); + public static final byte[] INDEX_INFO_KEY = getUTF8Bytes(INDEX_INFO_KEY_STRING); private final boolean isPresent; @@ -515,11 +514,11 @@ private List getAllKeys(HFileScanner scanner, Function convert @Override public List getSourceFileMappingForPartition(String partition) { try (HFileScanner scanner = partitionIndexReader().getScanner(true, false)) { - KeyValue keyValue = new KeyValue(Bytes.toBytes(getPartitionKey(partition)), new byte[0], new byte[0], + KeyValue keyValue = new KeyValue(getUTF8Bytes(getPartitionKey(partition)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]); if (scanner.seekTo(keyValue) == 0) { ByteBuffer readValue = scanner.getValue(); - byte[] valBytes = Bytes.toBytes(readValue); + byte[] valBytes = IOUtils.toBytes(readValue); HoodieBootstrapPartitionMetadata metadata = TimelineMetadataUtils.deserializeAvroMetadata(valBytes, HoodieBootstrapPartitionMetadata.class); return metadata.getFileIdToBootstrapFile().entrySet().stream() @@ -548,11 +547,11 @@ public Map getSourceFileMappingForFileI Collections.sort(fileGroupIds); try (HFileScanner scanner = fileIdIndexReader().getScanner(true, false)) { for (HoodieFileGroupId fileGroupId : fileGroupIds) { - KeyValue keyValue = new KeyValue(Bytes.toBytes(getFileGroupKey(fileGroupId)), new byte[0], new byte[0], + KeyValue keyValue = new KeyValue(getUTF8Bytes(getFileGroupKey(fileGroupId)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]); if (scanner.seekTo(keyValue) == 0) { ByteBuffer readValue = scanner.getValue(); - byte[] valBytes = Bytes.toBytes(readValue); + byte[] valBytes = IOUtils.toBytes(readValue); HoodieBootstrapFilePartitionInfo fileInfo = TimelineMetadataUtils.deserializeAvroMetadata(valBytes, HoodieBootstrapFilePartitionInfo.class); BootstrapFileMapping mapping = new BootstrapFileMapping(bootstrapBasePath, @@ -641,7 +640,7 @@ private void writeNextPartition(String partitionPath, String bootstrapPartitionP Option bytes = TimelineMetadataUtils.serializeAvroMetadata(bootstrapPartitionMetadata, HoodieBootstrapPartitionMetadata.class); if (bytes.isPresent()) { indexByPartitionWriter - .append(new KeyValue(Bytes.toBytes(getPartitionKey(partitionPath)), new byte[0], new byte[0], + .append(new KeyValue(getUTF8Bytes(getPartitionKey(partitionPath)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, bytes.get())); numPartitionKeysAdded++; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java index 4d3596ccc271..3fd2fb7fa7fe 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java @@ -36,7 +36,6 @@ import java.io.IOException; import java.io.Serializable; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -46,6 +45,8 @@ import java.util.Map; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; + /** * All the metadata that gets stored along with a commit. */ @@ -246,7 +247,7 @@ public static T fromJsonString(String jsonStr, Class clazz) throws Except // TODO: refactor this method to avoid doing the json tree walking (HUDI-4822). public static Option>> getFileSliceForFileGroupFromDeltaCommit( byte[] bytes, HoodieFileGroupId fileGroupId) { - String jsonStr = new String(bytes, StandardCharsets.UTF_8); + String jsonStr = fromUTF8Bytes(bytes); if (jsonStr.isEmpty()) { return Option.empty(); } @@ -510,7 +511,7 @@ public int hashCode() { public static T fromBytes(byte[] bytes, Class clazz) throws IOException { try { - return fromJsonString(new String(bytes, StandardCharsets.UTF_8), clazz); + return fromJsonString(fromUTF8Bytes(bytes), clazz); } catch (Exception e) { throw new IOException("unable to read commit metadata", e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java index f7964de5f514..bd1692c738df 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieConsistentHashingMetadata.java @@ -31,11 +31,11 @@ import java.io.IOException; import java.io.Serializable; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.UUID; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** @@ -159,7 +159,7 @@ public byte[] toBytes() throws IOException { public static HoodieConsistentHashingMetadata fromBytes(byte[] bytes) throws IOException { try { - return fromJsonString(new String(bytes, StandardCharsets.UTF_8), HoodieConsistentHashingMetadata.class); + return fromJsonString(fromUTF8Bytes(bytes), HoodieConsistentHashingMetadata.class); } catch (Exception e) { throw new IOException("unable to read hashing metadata", e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieReplaceCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieReplaceCommitMetadata.java index 0a945e0c6ee6..f3c19f6f8dc4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieReplaceCommitMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieReplaceCommitMetadata.java @@ -25,12 +25,13 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; + /** * All the metadata that gets stored along with a commit. */ @@ -116,7 +117,7 @@ public int hashCode() { public static T fromBytes(byte[] bytes, Class clazz) throws IOException { try { - return fromJsonString(new String(bytes, StandardCharsets.UTF_8), clazz); + return fromJsonString(fromUTF8Bytes(bytes), clazz); } catch (Exception e) { throw new IOException("unable to read commit metadata", e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java index 424f51eb1391..71534197e2b1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java @@ -30,10 +30,11 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Properties; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; + /** * Provides support for seamlessly applying changes captured via Debezium for PostgresDB. *

    @@ -141,7 +142,7 @@ private boolean containsBytesToastedValues(IndexedRecord incomingRecord, Schema. || (field.schema().getType() == Schema.Type.UNION && field.schema().getTypes().stream().anyMatch(s -> s.getType() == Schema.Type.BYTES))) // Check length first as an optimization && ((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array().length == DEBEZIUM_TOASTED_VALUE.length() - && DEBEZIUM_TOASTED_VALUE.equals(new String(((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array(), StandardCharsets.UTF_8))); + && DEBEZIUM_TOASTED_VALUE.equals(fromUTF8Bytes(((ByteBuffer) ((GenericData.Record) incomingRecord).get(field.name())).array()))); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java index 27255c7b905e..2df30e7e8fce 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java @@ -40,6 +40,7 @@ import org.apache.hudi.hadoop.fs.SchemeAwareFSDataInputStream; import org.apache.hudi.hadoop.fs.TimedFSDataInputStream; import org.apache.hudi.internal.schema.InternalSchema; +import org.apache.hudi.io.util.IOUtils; import org.apache.hudi.storage.StorageSchemes; import org.apache.avro.Schema; @@ -49,7 +50,6 @@ import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -332,7 +332,7 @@ private long scanForNextAvailableBlockOffset() throws IOException { } catch (EOFException e) { eof = true; } - long pos = Bytes.indexOf(dataBuf, HoodieLogFormat.MAGIC); + long pos = IOUtils.indexOf(dataBuf, HoodieLogFormat.MAGIC); if (pos >= 0) { return currentPos + pos; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java index 852deecbfa97..a38f6fcaa985 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java @@ -50,7 +50,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -63,6 +62,7 @@ import java.util.zip.InflaterInputStream; import static org.apache.hudi.avro.HoodieAvroUtils.recordNeedsRewriteForExtendedAvroTypePromotion; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; import static org.apache.hudi.common.util.ValidationUtils.checkState; @@ -296,7 +296,7 @@ private static String decompress(byte[] bytes) { while ((len = in.read(buffer)) > 0) { baos.write(buffer, 0, len); } - return new String(baos.toByteArray(), StandardCharsets.UTF_8); + return fromUTF8Bytes(baos.toByteArray()); } catch (IOException e) { throw new HoodieIOException("IOException while decompressing text", e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java index 663a070620c4..641b27cc8142 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Base64CodecUtil.java @@ -19,9 +19,9 @@ package org.apache.hudi.common.util; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Base64; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** @@ -56,7 +56,7 @@ public static ByteBuffer decode(ByteBuffer byteBuffer) { * @return base64 encoded data */ public static String encode(byte[] data) { - return new String(Base64.getEncoder().encode(data), StandardCharsets.UTF_8); + return fromUTF8Bytes(Base64.getEncoder().encode(data)); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java index 2a87396005cf..4df8c3852892 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HashID.java @@ -20,11 +20,11 @@ package org.apache.hudi.common.util.hash; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.util.IOUtils; import net.jpountz.xxhash.XXHash32; import net.jpountz.xxhash.XXHash64; import net.jpountz.xxhash.XXHashFactory; -import org.apache.hadoop.hbase.util.Bytes; import java.io.Serializable; import java.security.MessageDigest; @@ -122,10 +122,10 @@ private static byte[] getXXHash(final byte[] message, final Size bits) { switch (bits) { case BITS_32: XXHash32 hash32 = factory.hash32(); - return Bytes.toBytes(hash32.hash(message, 0, message.length, HASH_SEED)); + return IOUtils.toBytes(hash32.hash(message, 0, message.length, HASH_SEED)); case BITS_64: XXHash64 hash64 = factory.hash64(); - return Bytes.toBytes(hash64.hash(message, 0, message.length, HASH_SEED)); + return IOUtils.toBytes(hash64.hash(message, 0, message.length, HASH_SEED)); default: throw new HoodieIOException("XX" + bits + " hash is unsupported!"); } diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java index ea251aec0fd5..c5fb1f716542 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java @@ -38,7 +38,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -46,6 +45,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.table.timeline.HoodieTimeline.SCHEMA_COMMIT_ACTION; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** @@ -155,7 +155,7 @@ public String getHistorySchemaStrByGivenValidCommits(List validCommits) try (FSDataInputStream is = fs.open(latestFilePath)) { content = FileIOUtils.readAsByteArray(is); LOG.info(String.format("read history schema success from file : %s", latestFilePath)); - return new String(content, StandardCharsets.UTF_8); + return fromUTF8Bytes(content); } catch (IOException e) { throw new HoodieIOException("Could not read history schema from " + latestFilePath, e); } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java index 60e17c47aa3c..5e1a260e1589 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroHFileReaderImplBase.java @@ -36,7 +36,7 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.util.CollectionUtils.toStream; -import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; public abstract class HoodieAvroHFileReaderImplBase extends HoodieAvroFileReaderBase implements HoodieSeekingFileReader { @@ -109,7 +109,7 @@ protected static GenericRecord deserialize(final byte[] keyBytes, int keyOffset, getKeySchema(readerSchema).ifPresent(keyFieldSchema -> { final Object keyObject = record.get(keyFieldSchema.pos()); if (keyObject != null && keyObject.toString().isEmpty()) { - record.put(keyFieldSchema.pos(), getStringFromUTF8Bytes(keyBytes, keyOffset, keyLength)); + record.put(keyFieldSchema.pos(), fromUTF8Bytes(keyBytes, keyOffset, keyLength)); } }); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java index a2ba9b6e1ab7..5c22ba18de2f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java @@ -55,7 +55,7 @@ import java.util.TreeSet; import java.util.stream.Collectors; -import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.TypeUtils.unsafeCast; import static org.apache.hudi.io.hfile.HFileUtils.isPrefixOfKey; @@ -107,8 +107,8 @@ public String[] readMinMaxRecordKeys() { HFileReader reader = getSharedHFileReader(); try { return new String[] { - getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_MIN_RECORD)).get()), - getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_MAX_RECORD)).get())}; + fromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_MIN_RECORD)).get()), + fromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_MAX_RECORD)).get())}; } catch (IOException e) { throw new HoodieIOException("Cannot read min and max record keys from HFile.", e); } @@ -120,7 +120,7 @@ public BloomFilter readBloomFilter() { HFileReader reader = getSharedHFileReader(); ByteBuffer byteBuffer = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK).get(); return BloomFilterFactory.fromByteBuffer(byteBuffer, - getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_BLOOM_FILTER_TYPE_CODE)).get())); + fromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(KEY_BLOOM_FILTER_TYPE_CODE)).get())); } catch (IOException e) { throw new HoodieException("Could not read bloom filter from " + path, e); } @@ -223,7 +223,7 @@ public ClosableIterator> getRecordsByKeyPrefixIterat private static Schema fetchSchema(HFileReader reader) { try { return new Schema.Parser().parse( - getStringFromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(SCHEMA_KEY)).get())); + fromUTF8Bytes(reader.getMetaInfo(new UTF8StringKey(SCHEMA_KEY)).get())); } catch (IOException e) { throw new HoodieIOException("Unable to read schema from HFile", e); } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java index 9adc01c1ec8c..090d47aacc7c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLiningBase.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; -import org.apache.hadoop.hbase.util.Bytes; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; @@ -44,6 +43,7 @@ import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getPhantomFile; import static org.apache.hudi.common.testutils.FileSystemTestUtils.getRandomOuterInMemPath; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; /** * Tests {@link InLineFileSystem} to inline HFile. @@ -141,8 +141,8 @@ private void writeSomeRecords(HFile.Writer writer) KeyValue kv; for (int i = 0; i < (maxRows); i++) { String key = String.format(LOCAL_FORMATTER, i); - kv = new KeyValue(Bytes.toBytes(key), Bytes.toBytes("family"), Bytes.toBytes("qual"), - Bytes.toBytes(VALUE_PREFIX + key)); + kv = new KeyValue(getUTF8Bytes(key), getUTF8Bytes("family"), getUTF8Bytes("qual"), + getUTF8Bytes(VALUE_PREFIX + key)); writer.append(kv); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java index 26fb8e34961b..0f3617f27193 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemWithHBaseHFileReader.java @@ -20,6 +20,7 @@ package org.apache.hudi.common.fs.inline; import org.apache.hudi.io.storage.HoodieHFileUtils; +import org.apache.hudi.io.util.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -29,13 +30,13 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; -import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Set; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -73,7 +74,7 @@ protected void validateHFileReading(InLineFileSystem inlineFileSystem, ByteBuffer val1 = scanner.getValue(); scanner.seekTo(keyValue); ByteBuffer val2 = scanner.getValue(); - assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2)); + assertArrayEquals(IOUtils.toBytes(val1), IOUtils.toBytes(val2)); } int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000}; @@ -86,7 +87,7 @@ protected void validateHFileReading(InLineFileSystem inlineFileSystem, private byte[] getSomeKey(int rowId) { KeyValue kv = new KeyValue(getUTF8Bytes(String.format(LOCAL_FORMATTER, rowId)), - Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put); + getUTF8Bytes("family"), getUTF8Bytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put); return kv.getKey(); } @@ -106,15 +107,15 @@ private void readAndCheckbytes(HFileScanner scanner, int start, int n) cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength()); String keyStr = String.format(LOCAL_FORMATTER, i); String valStr = VALUE_PREFIX + keyStr; - KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"), - Bytes.toBytes("qual"), Bytes.toBytes(valStr)); + KeyValue kv = new KeyValue(getUTF8Bytes(keyStr), getUTF8Bytes("family"), + getUTF8Bytes("qual"), getUTF8Bytes(valStr)); byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(key, 0, key.length).getKey(); byte[] expectedKeyBytes = Arrays.copyOfRange( kv.getRowArray(), kv.getRowOffset(), kv.getRowOffset() + kv.getRowLength()); assertArrayEquals(expectedKeyBytes, keyBytes, - "bytes for keys do not match " + keyStr + " " + Bytes.toString(key)); - assertArrayEquals(Bytes.toBytes(valStr), val, - "bytes for vals do not match " + valStr + " " + Bytes.toString(val)); + "bytes for keys do not match " + keyStr + " " + fromUTF8Bytes(key)); + assertArrayEquals(getUTF8Bytes(valStr), val, + "bytes for vals do not match " + valStr + " " + fromUTF8Bytes(val)); if (!scanner.next()) { break; } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java index 945a0d764066..6cdabd3066b2 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/model/debezium/TestPostgresDebeziumAvroPayload.java @@ -38,11 +38,11 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Objects; import java.util.Properties; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -199,11 +199,11 @@ public void testMergeWithToastedValues() throws IOException { .combineAndGetUpdateValue(oldVal, avroSchema).get(); assertEquals("valid string value", outputRecord.get("string_col")); - assertEquals("valid byte value", new String(((ByteBuffer) outputRecord.get("byte_col")).array(), StandardCharsets.UTF_8)); + assertEquals("valid byte value", fromUTF8Bytes(((ByteBuffer) outputRecord.get("byte_col")).array())); assertNull(outputRecord.get("string_null_col_1")); assertNull(outputRecord.get("byte_null_col_1")); assertEquals("valid string value", ((Utf8) outputRecord.get("string_null_col_2")).toString()); - assertEquals("valid byte value", new String(((ByteBuffer) outputRecord.get("byte_null_col_2")).array(), StandardCharsets.UTF_8)); + assertEquals("valid byte value", fromUTF8Bytes(((ByteBuffer) outputRecord.get("byte_null_col_2")).array())); } private GenericRecord createRecord(int primaryKeyValue, @Nullable Operation op, @Nullable Long lsnValue) { diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java index 9739135ae409..7531bb2ea5d6 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputSplitUtils.java @@ -21,8 +21,8 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.nio.charset.StandardCharsets; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; public class InputSplitUtils { @@ -36,7 +36,7 @@ public static void writeString(String str, DataOutput out) throws IOException { public static String readString(DataInput in) throws IOException { byte[] bytes = new byte[in.readInt()]; in.readFully(bytes); - return new String(bytes, StandardCharsets.UTF_8); + return fromUTF8Bytes(bytes); } public static void writeBoolean(Boolean valueToWrite, DataOutput out) throws IOException { diff --git a/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java index f033127d82e9..f73615a16a40 100644 --- a/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java @@ -109,13 +109,19 @@ public static byte[] getUTF8Bytes(String str) { return str.getBytes(StandardCharsets.UTF_8); } - public static String getStringFromUTF8Bytes(byte[] bytes) { - return getStringFromUTF8Bytes(bytes, 0, bytes.length); + public static String fromUTF8Bytes(byte[] bytes) { + return fromUTF8Bytes(bytes, 0, bytes.length); } - public static String getStringFromUTF8Bytes(byte[] bytes, - int offset, - int length) { + public static String fromUTF8Bytes(byte[] bytes, + int offset, + int length) { + if (bytes == null) { + return null; + } + if (length == 0) { + return ""; + } return new String(bytes, offset, length, StandardCharsets.UTF_8); } diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java index 95288c3885e5..e0b93201924d 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileFileInfoBlock.java @@ -27,7 +27,7 @@ import java.util.HashMap; import java.util.Map; -import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; /** * Represents a {@link HFileBlockType#FILE_INFO} block. @@ -48,7 +48,7 @@ public HFileInfo readFileInfo() throws IOException { byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength) != 0) { throw new IOException( "Unexpected Protobuf magic at the beginning of the HFileFileInfoBlock: " - + getStringFromUTF8Bytes(byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength)); + + fromUTF8Bytes(byteBuff, startOffsetInBuff + HFILEBLOCK_HEADER_SIZE, pbMagicLength)); } ByteArrayInputStream inputStream = new ByteArrayInputStream( byteBuff, diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java index 796baa4481dc..bd3568d0b2d4 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/HFileUtils.java @@ -26,7 +26,7 @@ import java.util.HashMap; import java.util.Map; -import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; /** * Util methods for reading and writing HFile. @@ -104,7 +104,7 @@ public static boolean isPrefixOfKey(Key prefix, Key key) { * @return the String with UTF-8 decoding. */ public static String getValue(KeyValue kv) { - return getStringFromUTF8Bytes(kv.getBytes(), kv.getValueOffset(), kv.getValueLength()); + return fromUTF8Bytes(kv.getBytes(), kv.getValueOffset(), kv.getValueLength()); } /** diff --git a/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java b/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java index 1f4f35ac3498..fdeba3d61546 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/hfile/Key.java @@ -21,7 +21,7 @@ import org.apache.hudi.io.util.IOUtils; -import static org.apache.hudi.common.util.StringUtils.getStringFromUTF8Bytes; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT16; import static org.apache.hudi.io.hfile.HFileUtils.compareKeys; import static org.apache.hudi.io.util.IOUtils.readShort; @@ -66,7 +66,7 @@ public int getContentLength() { } public String getContentInString() { - return getStringFromUTF8Bytes(getBytes(), getContentOffset(), getContentLength()); + return fromUTF8Bytes(getBytes(), getContentOffset(), getContentLength()); } @Override diff --git a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java index 8017c0eb96f5..3fd5930add46 100644 --- a/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/io/util/IOUtils.java @@ -201,6 +201,35 @@ public static int compareTo(byte[] bytes1, int offset1, int length1, return length1 - length2; } + /** + * Returns the start position of the first occurrence of the specified {@code + * target} within {@code array}, or {@code -1} if there is no such occurrence. + * + *

    More formally, returns the lowest index {@code i} such that the range + * [i, i + target.length) in {@code array} contains exactly the same elements + * as {@code target}. + * + * @param array the array to search for the sequence {@code target}. + * @param target the array to search for as a sub-sequence of {@code array}. + * @return the start position if found; {@code -1} if there is no such occurrence. + */ + public static int indexOf(byte[] array, byte[] target) { + if (target.length == 0) { + return 0; + } + + outer: + for (int i = 0; i < array.length - target.length + 1; i++) { + for (int j = 0; j < target.length; j++) { + if (array[i + j] != target[j]) { + continue outer; + } + } + return i; + } + return -1; + } + /** * @param bytes input byte array. * @param offset offset to start reading. @@ -215,6 +244,38 @@ public static String bytesToString(byte[] bytes, int offset, int length) { return sb.toString(); } + /** + * Converts an int value to a byte array using big-endian. + * + * @param val value to convert. + * @return the byte array. + */ + public static byte[] toBytes(int val) { + byte[] b = new byte[4]; + for (int i = 3; i > 0; i--) { + b[i] = (byte) val; + val >>>= 8; + } + b[0] = (byte) val; + return b; + } + + /** + * Converts a long value to a byte array using big-endian. + * + * @param val value to convert. + * @return the byte array. + */ + public static byte[] toBytes(long val) { + byte[] b = new byte[8]; + for (int i = 7; i > 0; i--) { + b[i] = (byte) val; + val >>>= 8; + } + b[0] = (byte) val; + return b; + } + /** * @param bytes byte array to hash. * @param offset offset to start hashing. @@ -277,4 +338,24 @@ public static DataInputStream getDataInputStream(ByteBuffer byteBuffer) { return new DataInputStream(new ByteArrayInputStream( byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit() - byteBuffer.arrayOffset())); } + + /** + * Returns a new byte array, copied from the given {@code buf}, from the index 0 (inclusive) + * to the limit (exclusive), regardless of the current position. + * The position and the other index parameters are not changed. + * + * @param buf a byte buffer. + * @return the byte array. + */ + public static byte[] toBytes(ByteBuffer buf) { + ByteBuffer dup = buf.duplicate(); + dup.position(0); + return readBytes(dup); + } + + private static byte[] readBytes(ByteBuffer buf) { + byte[] result = new byte[buf.remaining()]; + buf.get(result); + return result; + } } diff --git a/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java b/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java index 07d4055549be..bc20d47a860b 100644 --- a/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java +++ b/hudi-io/src/test/java/org/apache/hudi/io/util/TestIOUtils.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.stream.Stream; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -107,4 +108,31 @@ public void testByteArrayCompareTo() { assertEquals(-155, IOUtils.compareTo(bytes1, 1, 4, bytes2, 0, 5)); assertEquals(22, IOUtils.compareTo(bytes1, 4, 2, bytes2, 2, 4)); } + + @Test + public void testIndexOf() { + byte[] array = new byte[] {(byte) 0x9b, 0, 0x18, 0x65, 0x2e, (byte) 0xf3}; + assertEquals(0, IOUtils.indexOf(array, new byte[] {})); + assertEquals(0, IOUtils.indexOf(array, new byte[] {(byte) 0x9b, 0})); + assertEquals(2, IOUtils.indexOf(array, new byte[] {0x18, 0x65, 0x2e})); + assertEquals(4, IOUtils.indexOf(array, new byte[] {0x2e, (byte) 0xf3})); + assertEquals(-1, IOUtils.indexOf(array, new byte[] {0x2e, (byte) 0xf3, 0x31})); + assertEquals(-1, IOUtils.indexOf(array, new byte[] {0x31})); + } + + @Test + public void testToBytes() { + assertArrayEquals(new byte[] {0, 0, 0, 20}, IOUtils.toBytes(20)); + assertArrayEquals(new byte[] {0x02, (byte) 0x93, (byte) 0xed, (byte) 0x88}, IOUtils.toBytes(43249032)); + assertArrayEquals(new byte[] {0x19, (byte) 0x99, (byte) 0x9a, 0x61}, IOUtils.toBytes(Integer.MAX_VALUE / 5 + 200)); + assertArrayEquals(new byte[] {(byte) 0x7f, (byte) 0xff, (byte) 0xff, (byte) 0xff}, IOUtils.toBytes(Integer.MAX_VALUE)); + assertArrayEquals(new byte[] {0, 0, 0, 0, 0, 0, 0, 20}, IOUtils.toBytes(20L)); + assertArrayEquals(new byte[] {0, 0, 0, 0, 0x49, 0x52, 0x45, 0x32}, IOUtils.toBytes(1230128434L)); + assertArrayEquals( + new byte[] {0x19, (byte) 0x99, (byte) 0x99, (byte) 0x99, (byte) 0x99, (byte) 0x99, (byte) 0x9a, 0x61}, + IOUtils.toBytes(Long.MAX_VALUE / 5 + 200)); + assertArrayEquals( + new byte[] {(byte) 0x7f, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff}, + IOUtils.toBytes(Long.MAX_VALUE)); + } } diff --git a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/src/test/java/org/apache/hudi/metaserver/store/TestRelationalDBBasedStore.java b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/src/test/java/org/apache/hudi/metaserver/store/TestRelationalDBBasedStore.java index 8f13498f41be..11312efea926 100644 --- a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/src/test/java/org/apache/hudi/metaserver/store/TestRelationalDBBasedStore.java +++ b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/src/test/java/org/apache/hudi/metaserver/store/TestRelationalDBBasedStore.java @@ -24,20 +24,21 @@ import org.apache.hudi.metaserver.thrift.THoodieInstant; import org.apache.hudi.metaserver.thrift.TState; import org.apache.hudi.metaserver.thrift.Table; + import org.apache.thrift.TException; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * Unit tests on metadata store base on relation database of hoodie meta server. @@ -100,8 +101,8 @@ private void testTimelineRelatedAPIs() throws MetaserverStorageException { assertTrue(store.scanInstants(tableId, Arrays.asList(TState.REQUESTED, TState.INFLIGHT), -1).isEmpty()); // instant meta CRUD - byte[] requestedMeta = "requested".getBytes(StandardCharsets.UTF_8); - byte[] inflightMeta = "inflight".getBytes(StandardCharsets.UTF_8); + byte[] requestedMeta = getUTF8Bytes("requested"); + byte[] inflightMeta = getUTF8Bytes("inflight"); store.saveInstantMetadata(tableId, requested, requestedMeta); store.saveInstantMetadata(tableId, inflight, inflightMeta); assertTrue(store.deleteInstantMetadata(tableId, requested)); diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java index 0795acffc4d7..ab8e3820ce1e 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java +++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/cli/HDFSParquetImporterUtils.java @@ -61,7 +61,6 @@ import java.io.Serializable; import java.io.StringReader; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -71,6 +70,8 @@ import scala.Tuple2; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; + /** * Loads data from Parquet Sources. */ @@ -306,7 +307,7 @@ public static String parseSchema(FileSystem fs, String schemaFile) throws Except try (FSDataInputStream inputStream = fs.open(p)) { inputStream.readFully(0, buf.array(), 0, buf.array().length); } - return new String(buf.array(), StandardCharsets.UTF_8); + return fromUTF8Bytes(buf.array()); } public static int handleErrors(JavaSparkContext jsc, String instantTime, JavaRDD writeResponse) { diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java index 6fe7d9aeafb9..f4e4cf65ae80 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestProtoConversionUtil.java @@ -57,7 +57,6 @@ import java.io.UncheckedIOException; import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -67,6 +66,7 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.utilities.sources.helpers.ProtoConversionUtil.toUnsignedBigInteger; @@ -578,6 +578,6 @@ private static List convertMapToList(final Schema protoSch private static String randomString(int size) { byte[] bytes = new byte[size]; RANDOM.nextBytes(bytes); - return new String(bytes, StandardCharsets.UTF_8); + return fromUTF8Bytes(bytes); } } From 97ce21539d48438770ecbfdc6c49aeb2d665b82f Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 19:40:44 -0800 Subject: [PATCH 088/112] [HUDI-7343] Replace Path.SEPARATOR with HoodieLocation.SEPARATOR (#10570) --- .../hudi/cli/commands/ExportCommand.java | 5 +-- .../commands/TestHoodieLogFileCommand.java | 3 +- .../hudi/cli/commands/TestTableCommand.java | 5 +-- .../cli/integ/ITTestBootstrapCommand.java | 9 ++--- .../integ/ITTestHDFSParquetImportCommand.java | 5 +-- .../hudi/cli/integ/ITTestMarkersCommand.java | 5 +-- .../cli/integ/ITTestSavepointsCommand.java | 3 +- .../hudi/client/heartbeat/HeartbeatUtils.java | 3 +- .../heartbeat/HoodieHeartbeatClient.java | 6 ++-- .../lock/FileSystemBasedLockProvider.java | 7 ++-- .../client/TestJavaHoodieBackedMetadata.java | 9 ++--- .../client/TestHoodieClientMultiWriter.java | 3 +- .../functional/TestHoodieBackedMetadata.java | 19 +++++----- .../DirectMarkerBasedDetectionStrategy.java | 3 +- .../hudi/common/fs/inline/InLineFSUtils.java | 12 ++++--- .../heartbeat/HoodieHeartbeatUtils.java | 4 ++- .../common/table/HoodieTableMetaClient.java | 36 ++++++++++--------- .../metadata/AbstractHoodieTableMetadata.java | 9 +++-- .../hudi/metadata/HoodieMetadataPayload.java | 3 +- .../hudi/metadata/HoodieTableMetadata.java | 11 +++--- .../fs/TestHoodieWrapperFileSystem.java | 3 +- .../apache/hudi/sink/meta/CkpMetadata.java | 4 ++- .../org/apache/hudi/source/FileIndex.java | 3 +- .../table/catalog/TableOptionProperties.java | 3 +- .../hudi/table/format/FilePathUtils.java | 5 +-- .../java/org/apache/hudi/util/ClientIds.java | 3 +- .../hudi/util/ViewStorageProperties.java | 3 +- .../hudi/sink/ITTestDataStreamWrite.java | 3 +- .../sink/bucket/ITTestBucketStreamWrite.java | 3 +- .../apache/hudi/sink/utils/TestWriteBase.java | 4 ++- .../java/org/apache/hudi/utils/TestUtils.java | 3 +- .../hadoop/utils/HoodieInputFormatUtils.java | 3 +- .../hudi/hadoop/TestInputPathHandler.java | 13 +++---- .../procedures/ExportInstantsProcedure.scala | 16 ++++----- .../hudi/testutils/DataSourceTestUtils.java | 9 ++--- .../org/apache/hudi/TestHoodieFileIndex.scala | 19 +++++----- .../procedure/TestBootstrapProcedure.scala | 25 ++++++------- .../TestHdfsParquetImportProcedure.scala | 5 +-- .../analysis/HoodieSpark32PlusAnalysis.scala | 9 ++--- .../hudi/hive/testutils/HiveTestService.java | 4 +-- ...erBasedEarlyConflictDetectionRunnable.java | 3 +- .../streamer/SparkSampleWritesUtils.java | 3 +- 42 files changed, 176 insertions(+), 130 deletions(-) diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java index 40e7154b5f99..b196c62d0fba 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java @@ -44,6 +44,7 @@ import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -168,7 +169,7 @@ private int copyArchivedInstants(List statuses, Set actionSe LOG.error("Could not load metadata for action " + action + " at instant time " + instantTime); continue; } - final String outPath = localFolder + Path.SEPARATOR + instantTime + "." + action; + final String outPath = localFolder + HoodieLocation.SEPARATOR + instantTime + "." + action; writeToFile(outPath, HoodieAvroUtils.avroToJson(metadata, true)); } } @@ -190,7 +191,7 @@ private int copyNonArchivedInstants(List instants, int limit, Str final HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient(); final HoodieActiveTimeline timeline = metaClient.getActiveTimeline(); for (HoodieInstant instant : instants) { - String localPath = localFolder + Path.SEPARATOR + instant.getFileName(); + String localPath = localFolder + HoodieLocation.SEPARATOR + instant.getFileName(); byte[] data = null; switch (instant.getAction()) { diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java index ff3898d9d65a..8c433d842a1f 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java @@ -44,6 +44,7 @@ import org.apache.hudi.config.HoodieCompactionConfig; import org.apache.hudi.config.HoodieMemoryConfig; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; @@ -201,7 +202,7 @@ public void testShowLogFileRecordsWithMerge() throws IOException, InterruptedExc // write to path '2015/03/16'. Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema()); - partitionPath = tablePath + Path.SEPARATOR + HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH; + partitionPath = tablePath + HoodieLocation.SEPARATOR + HoodieTestCommitMetadataGenerator.DEFAULT_SECOND_PARTITION_PATH; Files.createDirectories(Paths.get(partitionPath)); HoodieLogFormat.Writer writer = null; diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java index 2eed406c6697..22d108241c6c 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestTableCommand.java @@ -32,6 +32,7 @@ import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.util.Option; +import org.apache.hudi.storage.HoodieLocation; import org.apache.avro.Schema; import org.apache.hadoop.fs.FileSystem; @@ -146,7 +147,7 @@ public void testCreateWithSpecifiedValues() { assertTrue(ShellEvaluationResultUtil.isSuccess(result)); assertEquals("Metadata for table " + tableName + " loaded", result.toString()); HoodieTableMetaClient client = HoodieCLI.getTableMetaClient(); - assertEquals(metaPath + Path.SEPARATOR + "archive", client.getArchivePath()); + assertEquals(metaPath + HoodieLocation.SEPARATOR + "archive", client.getArchivePath()); assertEquals(tablePath, client.getBasePath()); assertEquals(metaPath, client.getMetaPath()); assertEquals(HoodieTableType.MERGE_ON_READ, client.getTableType()); @@ -185,7 +186,7 @@ public void testRefresh() throws IOException { private void testRefreshCommand(String command) throws IOException { // clean table matedata FileSystem fs = FileSystem.get(hadoopConf()); - fs.delete(new Path(tablePath + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME), true); + fs.delete(new Path(tablePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME), true); // Create table assertTrue(prepareTable()); diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java index f22ce1bbaf52..4e7a9c68a1e8 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestBootstrapCommand.java @@ -18,7 +18,6 @@ package org.apache.hudi.cli.integ; -import org.apache.hadoop.fs.Path; import org.apache.hudi.cli.HoodieCLI; import org.apache.hudi.cli.HoodiePrintHelper; import org.apache.hudi.cli.commands.TableCommand; @@ -27,6 +26,8 @@ import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.functional.TestBootstrap; +import org.apache.hudi.storage.HoodieLocation; + import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.junit.jupiter.api.BeforeEach; @@ -64,8 +65,8 @@ public class ITTestBootstrapCommand extends HoodieCLIIntegrationTestBase { public void init() { String srcName = "source"; tableName = "test-table"; - sourcePath = basePath + Path.SEPARATOR + srcName; - tablePath = basePath + Path.SEPARATOR + tableName; + sourcePath = basePath + HoodieLocation.SEPARATOR + srcName; + tablePath = basePath + HoodieLocation.SEPARATOR + tableName; // generate test data partitions = Arrays.asList("2018", "2019", "2020"); @@ -73,7 +74,7 @@ public void init() { for (int i = 0; i < partitions.size(); i++) { Dataset df = TestBootstrap.generateTestRawTripDataset(timestamp, i * NUM_OF_RECORDS, i * NUM_OF_RECORDS + NUM_OF_RECORDS, null, jsc, sqlContext); - df.write().parquet(sourcePath + Path.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)); + df.write().parquet(sourcePath + HoodieLocation.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)); } } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java index 930f6b0064c4..5f19bca25792 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.testutils.HoodieClientTestUtils; import org.apache.hudi.utilities.HDFSParquetImporter; import org.apache.hudi.utilities.functional.TestHDFSParquetImporter; @@ -76,7 +77,7 @@ public class ITTestHDFSParquetImportCommand extends HoodieCLIIntegrationTestBase @BeforeEach public void init() throws IOException, ParseException { tableName = "test_table"; - tablePath = basePath + Path.SEPARATOR + tableName; + tablePath = basePath + HoodieLocation.SEPARATOR + tableName; sourcePath = new Path(basePath, "source"); targetPath = new Path(tablePath); schemaFile = new Path(basePath, "file.schema").toString(); @@ -108,7 +109,7 @@ public void testConvertWithInsert() throws IOException { () -> assertEquals("Table imported to hoodie format", result.toString())); // Check hudi table exist - String metaPath = targetPath + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME; + String metaPath = targetPath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME; assertTrue(Files.exists(Paths.get(metaPath)), "Hoodie table not exist."); // Load meta data diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java index 5aacfd82de04..194c0b498895 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java @@ -18,7 +18,6 @@ package org.apache.hudi.cli.integ; -import org.apache.hadoop.fs.Path; import org.apache.hudi.cli.commands.TableCommand; import org.apache.hudi.cli.testutils.HoodieCLIIntegrationTestBase; import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil; @@ -26,6 +25,8 @@ import org.apache.hudi.common.model.IOType; import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; import org.apache.hudi.common.testutils.FileCreateUtils; +import org.apache.hudi.storage.HoodieLocation; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -53,7 +54,7 @@ public class ITTestMarkersCommand extends HoodieCLIIntegrationTestBase { @BeforeEach public void init() throws IOException { String tableName = "test_table"; - tablePath = basePath + Path.SEPARATOR + tableName; + tablePath = basePath + HoodieLocation.SEPARATOR + tableName; // Create table and connect new TableCommand().createTable( diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java index f74d3c0adfe9..3aebd6a483ff 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java @@ -33,6 +33,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.BeforeEach; @@ -65,7 +66,7 @@ public class ITTestSavepointsCommand extends HoodieCLIIntegrationTestBase { @BeforeEach public void init() throws IOException { String tableName = "test_table"; - tablePath = basePath + Path.SEPARATOR + tableName; + tablePath = basePath + HoodieLocation.SEPARATOR + tableName; // Create table and connect new TableCommand().createTable( diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HeartbeatUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HeartbeatUtils.java index 7c2642da250c..40e08275b29e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HeartbeatUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HeartbeatUtils.java @@ -22,6 +22,7 @@ import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.table.HoodieTable; import org.apache.hadoop.fs.FileSystem; @@ -51,7 +52,7 @@ public static boolean deleteHeartbeatFile(FileSystem fs, String basePath, String boolean deleted = false; try { String heartbeatFolderPath = HoodieTableMetaClient.getHeartbeatFolderPath(basePath); - deleted = fs.delete(new Path(heartbeatFolderPath + Path.SEPARATOR + instantTime), false); + deleted = fs.delete(new Path(heartbeatFolderPath + HoodieLocation.SEPARATOR + instantTime), false); if (!deleted) { LOG.error("Failed to delete heartbeat for instant " + instantTime); } else { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HoodieHeartbeatClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HoodieHeartbeatClient.java index 93656aa29461..bb08ae997d99 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HoodieHeartbeatClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HoodieHeartbeatClient.java @@ -22,6 +22,7 @@ import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieHeartbeatException; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -226,7 +227,8 @@ private void stopHeartbeatTimer(Heartbeat heartbeat) { } public static Boolean heartbeatExists(FileSystem fs, String basePath, String instantTime) throws IOException { - Path heartbeatFilePath = new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + Path.SEPARATOR + instantTime); + Path heartbeatFilePath = new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + + HoodieLocation.SEPARATOR + instantTime); return fs.exists(heartbeatFilePath); } @@ -253,7 +255,7 @@ private void updateHeartbeat(String instantTime) throws HoodieHeartbeatException try { Long newHeartbeatTime = System.currentTimeMillis(); OutputStream outputStream = - this.fs.create(new Path(heartbeatFolderPath + Path.SEPARATOR + instantTime), true); + this.fs.create(new Path(heartbeatFolderPath + HoodieLocation.SEPARATOR + instantTime), true); outputStream.close(); Heartbeat heartbeat = instantToHeartbeatMap.get(instantTime); if (heartbeat.getLastHeartbeatTime() != null && isHeartbeatExpired(instantTime)) { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java index 52e8e0285b41..39c004192456 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java @@ -33,6 +33,7 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieLockException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.conf.Configuration; @@ -77,10 +78,10 @@ public FileSystemBasedLockProvider(final LockConfiguration lockConfiguration, fi String lockDirectory = lockConfiguration.getConfig().getString(FILESYSTEM_LOCK_PATH_PROP_KEY, null); if (StringUtils.isNullOrEmpty(lockDirectory)) { lockDirectory = lockConfiguration.getConfig().getString(HoodieWriteConfig.BASE_PATH.key()) - + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME; + + HoodieLocation.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME; } this.lockTimeoutMinutes = lockConfiguration.getConfig().getInteger(FILESYSTEM_LOCK_EXPIRE_PROP_KEY); - this.lockFile = new Path(lockDirectory + Path.SEPARATOR + LOCK_FILE_NAME); + this.lockFile = new Path(lockDirectory + HoodieLocation.SEPARATOR + LOCK_FILE_NAME); this.lockInfo = new LockInfo(); this.sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); this.fs = HadoopFSUtils.getFs(this.lockFile.toString(), configuration); @@ -220,6 +221,6 @@ public static TypedProperties getLockConfig(String tablePath) { *

    IMPORTANT: this path should be shared especially when there is engine cooperation. */ private static String defaultLockPath(String tablePath) { - return tablePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME; + return tablePath + HoodieLocation.SEPARATOR + AUXILIARYFOLDER_NAME; } } diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java index 636eb7e7a342..9e4afc55c55f 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java @@ -98,6 +98,7 @@ import org.apache.hudi.metadata.HoodieTableMetadataUtil; import org.apache.hudi.metadata.JavaHoodieBackedTableMetadataWriter; import org.apache.hudi.metadata.MetadataPartitionType; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.table.HoodieJavaTable; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.action.HoodieWriteMetadata; @@ -1230,7 +1231,7 @@ public void testFailedBootstrap() throws Exception { // remove the MDT partition from dataset to simulate failed bootstrap Properties updateProperties = new Properties(); updateProperties.setProperty(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), ""); - HoodieTableConfig.update(fs, new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME), + HoodieTableConfig.update(fs, new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME), updateProperties); metaClient = HoodieTableMetaClient.reload(metaClient); @@ -2173,7 +2174,7 @@ public void testRollbackOfPartiallyFailedCommitWithNewPartitions() throws Except // There is no way to simulate failed commit on the main dataset, hence we simply delete the completed // instant so that only the inflight is left over. String commitInstantFileName = HoodieTimeline.makeCommitFileName(newCommitTime); - assertTrue(fs.delete(new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME, + assertTrue(fs.delete(new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME, commitInstantFileName), false)); } @@ -2273,7 +2274,7 @@ public void testErrorCases() throws Exception { // There is no way to simulate failed commit on the main dataset, hence we simply delete the completed // instant so that only the inflight is left over. String commitInstantFileName = HoodieTimeline.makeCommitFileName(newCommitTime); - assertTrue(fs.delete(new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME, + assertTrue(fs.delete(new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME, commitInstantFileName), false)); } @@ -2415,7 +2416,7 @@ public void testRepeatedActionWithSameInstantTime() throws Exception { // To simulate failed clean on the main dataset, we will delete the completed clean instant String cleanInstantFileName = HoodieTimeline.makeCleanerFileName(cleanInstantTime); - assertTrue(fs.delete(new Path(basePath + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME, + assertTrue(fs.delete(new Path(basePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME, cleanInstantFileName), false)); assertEquals(metaClient.reloadActiveTimeline().getCleanerTimeline().filterInflights().countInstants(), 1); assertEquals(metaClient.reloadActiveTimeline().getCleanerTimeline().filterCompletedInstants().countInstants(), 0); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java index 584542fd13f2..a7d1bc7f0142 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java @@ -48,6 +48,7 @@ import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieWriteConflictException; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.table.action.HoodieWriteMetadata; import org.apache.hudi.table.marker.SimpleDirectMarkerBasedDetectionStrategy; import org.apache.hudi.table.marker.SimpleTransactionDirectMarkerBasedDetectionStrategy; @@ -256,7 +257,7 @@ private void testHoodieClientBasicMultiWriterWithEarlyConflictDetection(String t HoodieWriteConfig config4 = HoodieWriteConfig.newBuilder().withProperties(writeConfig.getProps()).withHeartbeatIntervalInMs(heartBeatIntervalForCommit4).build(); final SparkRDDWriteClient client4 = getHoodieWriteClient(config4); - Path heartbeatFilePath = new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + Path.SEPARATOR + nextCommitTime3); + Path heartbeatFilePath = new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + HoodieLocation.SEPARATOR + nextCommitTime3); fs.create(heartbeatFilePath, true); // Wait for heart beat expired for failed commitTime3 "003" diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index 3370cfd6410d..872f7ac2bc38 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -101,6 +101,7 @@ import org.apache.hudi.metadata.HoodieTableMetadataUtil; import org.apache.hudi.metadata.MetadataPartitionType; import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.table.HoodieSparkTable; import org.apache.hudi.table.HoodieTable; import org.apache.hudi.table.action.HoodieWriteMetadata; @@ -1635,7 +1636,7 @@ public void testFailedBootstrap() throws Exception { // remove the MDT partition from dataset to simulate failed bootstrap Properties updateProperties = new Properties(); updateProperties.setProperty(HoodieTableConfig.TABLE_METADATA_PARTITIONS.key(), ""); - HoodieTableConfig.update(fs, new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME), + HoodieTableConfig.update(fs, new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME), updateProperties); metaClient = HoodieTableMetaClient.reload(metaClient); @@ -2628,7 +2629,7 @@ public void testRollbackOfPartiallyFailedCommitWithNewPartitions() throws Except // There is no way to simulate failed commit on the main dataset, hence we simply delete the completed // instant so that only the inflight is left over. String commitInstantFileName = HoodieTimeline.makeCommitFileName(newCommitTime); - assertTrue(fs.delete(new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME, + assertTrue(fs.delete(new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME, commitInstantFileName), false)); } @@ -2680,9 +2681,9 @@ public void testRollbackPendingCommitWithRecordIndex(boolean performUpsert) thro // metadata table partitions are rebootstrapped. metadataWriter.dropMetadataPartitions(Arrays.asList(MetadataPartitionType.RECORD_INDEX, FILES)); assertFalse(fs.exists(new Path(getMetadataTableBasePath(basePath) - + Path.SEPARATOR + FILES.getPartitionPath()))); + + HoodieLocation.SEPARATOR + FILES.getPartitionPath()))); assertFalse(fs.exists(new Path(getMetadataTableBasePath(basePath) - + Path.SEPARATOR + MetadataPartitionType.RECORD_INDEX.getPartitionPath()))); + + HoodieLocation.SEPARATOR + MetadataPartitionType.RECORD_INDEX.getPartitionPath()))); metaClient = HoodieTableMetaClient.reload(metaClient); // Insert/upsert third batch of records @@ -2699,14 +2700,14 @@ public void testRollbackPendingCommitWithRecordIndex(boolean performUpsert) thro writeStatuses = client.insert(jsc.parallelize(records, 1), commitTime).collect(); } assertNoWriteErrors(writeStatuses); - assertTrue(fs.exists(new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME))); + assertTrue(fs.exists(new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME))); metaClient = HoodieTableMetaClient.reload(metaClient); assertFalse(metaClient.getActiveTimeline().filterCompletedInstants().filterCompletedInstants().findInstantsAfterOrEquals(commitTime, 1).empty()); assertTrue(fs.exists(new Path(getMetadataTableBasePath(basePath) - + Path.SEPARATOR + FILES.getPartitionPath()))); + + HoodieLocation.SEPARATOR + FILES.getPartitionPath()))); assertTrue(fs.exists(new Path(getMetadataTableBasePath(basePath) - + Path.SEPARATOR + MetadataPartitionType.RECORD_INDEX.getPartitionPath()))); + + HoodieLocation.SEPARATOR + MetadataPartitionType.RECORD_INDEX.getPartitionPath()))); } /** @@ -2847,7 +2848,7 @@ public void testErrorCases() throws Exception { // There is no way to simulate failed commit on the main dataset, hence we simply delete the completed // instant so that only the inflight is left over. String commitInstantFileName = HoodieTimeline.makeCommitFileName(newCommitTime); - assertTrue(fs.delete(new Path(basePath + Path.SEPARATOR + METAFOLDER_NAME, + assertTrue(fs.delete(new Path(basePath + HoodieLocation.SEPARATOR + METAFOLDER_NAME, commitInstantFileName), false)); } @@ -3052,7 +3053,7 @@ public void testRepeatedActionWithSameInstantTime() throws Exception { // To simulate failed clean on the main dataset, we will delete the completed clean instant String cleanInstantFileName = HoodieTimeline.makeCleanerFileName(cleanInstantTime); - assertTrue(fs.delete(new Path(basePath + Path.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME, + assertTrue(fs.delete(new Path(basePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.METAFOLDER_NAME, cleanInstantFileName), false)); assertEquals(metaClient.reloadActiveTimeline().getCleanerTimeline().filterInflights().countInstants(), 1); assertEquals(metaClient.reloadActiveTimeline().getCleanerTimeline().filterCompletedInstants().countInstants(), 0); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java b/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java index 1f3f4f2536d8..ea08456d16e3 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/conflict/detection/DirectMarkerBasedDetectionStrategy.java @@ -27,6 +27,7 @@ import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -77,7 +78,7 @@ public DirectMarkerBasedDetectionStrategy(HoodieWrapperFileSystem fs, String par * @throws IOException upon errors. */ public boolean checkMarkerConflict(String basePath, long maxAllowableHeartbeatIntervalInMs) throws IOException { - String tempFolderPath = basePath + Path.SEPARATOR + HoodieTableMetaClient.TEMPFOLDER_NAME; + String tempFolderPath = basePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.TEMPFOLDER_NAME; List candidateInstants = MarkerUtils.getCandidateInstants(activeTimeline, Arrays.stream(fs.listStatus(new Path(tempFolderPath))).map(FileStatus::getPath).collect(Collectors.toList()), instantTime, maxAllowableHeartbeatIntervalInMs, fs, basePath); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java index 6031f29d907d..06a96542585c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java @@ -18,6 +18,8 @@ package org.apache.hudi.common.fs.inline; +import org.apache.hudi.storage.HoodieLocation; + import org.apache.hadoop.fs.Path; import java.io.File; @@ -33,8 +35,7 @@ public class InLineFSUtils { private static final String START_OFFSET_STR = "start_offset"; private static final String LENGTH_STR = "length"; - private static final String PATH_SEPARATOR = "/"; - private static final String SCHEME_SEPARATOR = ":"; + private static final String SCHEME_SEPARATOR = "" + HoodieLocation.COLON_CHAR; private static final String EQUALS_STR = "="; private static final String LOCAL_FILESYSTEM_SCHEME = "file"; @@ -54,8 +55,9 @@ public class InLineFSUtils { public static Path getInlineFilePath(Path outerPath, String origScheme, long inLineStartOffset, long inLineLength) { final String subPath = new File(outerPath.toString().substring(outerPath.toString().indexOf(":") + 1)).getPath(); return new Path( - InLineFileSystem.SCHEME + SCHEME_SEPARATOR + PATH_SEPARATOR + subPath + PATH_SEPARATOR + origScheme - + PATH_SEPARATOR + "?" + START_OFFSET_STR + EQUALS_STR + inLineStartOffset + InLineFileSystem.SCHEME + SCHEME_SEPARATOR + + HoodieLocation.SEPARATOR + subPath + HoodieLocation.SEPARATOR + origScheme + + HoodieLocation.SEPARATOR + "?" + START_OFFSET_STR + EQUALS_STR + inLineStartOffset + "&" + LENGTH_STR + EQUALS_STR + inLineLength ); } @@ -84,7 +86,7 @@ public static Path getOuterFilePathFromInlinePath(Path inlineFSPath) { final String pathExceptScheme = basePath.toString().substring(basePath.toString().indexOf(SCHEME_SEPARATOR) + 1); final String fullPath = outerFileScheme + SCHEME_SEPARATOR - + (outerFileScheme.equals(LOCAL_FILESYSTEM_SCHEME) ? PATH_SEPARATOR : "") + + (outerFileScheme.equals(LOCAL_FILESYSTEM_SCHEME) ? HoodieLocation.SEPARATOR : "") + pathExceptScheme; return new Path(fullPath); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/heartbeat/HoodieHeartbeatUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/heartbeat/HoodieHeartbeatUtils.java index 223d46e416f3..f7af86f79542 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/heartbeat/HoodieHeartbeatUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/heartbeat/HoodieHeartbeatUtils.java @@ -20,6 +20,7 @@ package org.apache.hudi.common.heartbeat; import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -44,7 +45,8 @@ public class HoodieHeartbeatUtils { * @throws IOException */ public static Long getLastHeartbeatTime(FileSystem fs, String basePath, String instantTime) throws IOException { - Path heartbeatFilePath = new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + Path.SEPARATOR + instantTime); + Path heartbeatFilePath = new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + + HoodieLocation.SEPARATOR + instantTime); if (fs.exists(heartbeatFilePath)) { return fs.getFileStatus(heartbeatFilePath).getModificationTime(); } else { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java index 1d9f38a1d263..2054f689e85a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java @@ -47,6 +47,7 @@ import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.hadoop.fs.SerializablePath; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -87,17 +88,18 @@ public class HoodieTableMetaClient implements Serializable { private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(HoodieTableMetaClient.class); public static final String METAFOLDER_NAME = ".hoodie"; - public static final String TEMPFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".temp"; - public static final String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".aux"; - public static final String BOOTSTRAP_INDEX_ROOT_FOLDER_PATH = AUXILIARYFOLDER_NAME + Path.SEPARATOR + ".bootstrap"; - public static final String SAMPLE_WRITES_FOLDER_PATH = AUXILIARYFOLDER_NAME + Path.SEPARATOR + ".sample_writes"; - public static final String HEARTBEAT_FOLDER_NAME = METAFOLDER_NAME + Path.SEPARATOR + ".heartbeat"; - public static final String METADATA_TABLE_FOLDER_PATH = METAFOLDER_NAME + Path.SEPARATOR + "metadata"; - public static final String HASHING_METADATA_FOLDER_NAME = ".bucket_index" + Path.SEPARATOR + "consistent_hashing_metadata"; + public static final String TEMPFOLDER_NAME = METAFOLDER_NAME + HoodieLocation.SEPARATOR + ".temp"; + public static final String AUXILIARYFOLDER_NAME = METAFOLDER_NAME + HoodieLocation.SEPARATOR + ".aux"; + public static final String BOOTSTRAP_INDEX_ROOT_FOLDER_PATH = AUXILIARYFOLDER_NAME + HoodieLocation.SEPARATOR + ".bootstrap"; + public static final String SAMPLE_WRITES_FOLDER_PATH = AUXILIARYFOLDER_NAME + HoodieLocation.SEPARATOR + ".sample_writes"; + public static final String HEARTBEAT_FOLDER_NAME = METAFOLDER_NAME + HoodieLocation.SEPARATOR + ".heartbeat"; + public static final String METADATA_TABLE_FOLDER_PATH = METAFOLDER_NAME + HoodieLocation.SEPARATOR + "metadata"; + public static final String HASHING_METADATA_FOLDER_NAME = + ".bucket_index" + HoodieLocation.SEPARATOR + "consistent_hashing_metadata"; public static final String BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH - + Path.SEPARATOR + ".partitions"; - public static final String BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH = BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + Path.SEPARATOR - + ".fileids"; + + HoodieLocation.SEPARATOR + ".partitions"; + public static final String BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH = + BOOTSTRAP_INDEX_ROOT_FOLDER_PATH + HoodieLocation.SEPARATOR + ".fileids"; public static final String SCHEMA_FOLDER_NAME = ".schema"; @@ -240,7 +242,7 @@ public String getHashingMetadataPath() { * @return Temp Folder path */ public String getTempFolderPath() { - return basePath + Path.SEPARATOR + TEMPFOLDER_NAME; + return basePath + HoodieLocation.SEPARATOR + TEMPFOLDER_NAME; } /** @@ -250,35 +252,35 @@ public String getTempFolderPath() { * @return */ public String getMarkerFolderPath(String instantTs) { - return String.format("%s%s%s", getTempFolderPath(), Path.SEPARATOR, instantTs); + return String.format("%s%s%s", getTempFolderPath(), HoodieLocation.SEPARATOR, instantTs); } /** * @return Auxiliary Meta path */ public String getMetaAuxiliaryPath() { - return basePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME; + return basePath + HoodieLocation.SEPARATOR + AUXILIARYFOLDER_NAME; } /** * @return Heartbeat folder path. */ public static String getHeartbeatFolderPath(String basePath) { - return String.format("%s%s%s", basePath, Path.SEPARATOR, HEARTBEAT_FOLDER_NAME); + return String.format("%s%s%s", basePath, HoodieLocation.SEPARATOR, HEARTBEAT_FOLDER_NAME); } /** * @return Bootstrap Index By Partition Folder */ public String getBootstrapIndexByPartitionFolderPath() { - return basePath + Path.SEPARATOR + BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH; + return basePath + HoodieLocation.SEPARATOR + BOOTSTRAP_INDEX_BY_PARTITION_FOLDER_PATH; } /** * @return Bootstrap Index By Hudi File Id Folder */ public String getBootstrapIndexByFileIdFolderNameFolderPath() { - return basePath + Path.SEPARATOR + BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH; + return basePath + HoodieLocation.SEPARATOR + BOOTSTRAP_INDEX_BY_FILE_ID_FOLDER_PATH; } /** @@ -286,7 +288,7 @@ public String getBootstrapIndexByFileIdFolderNameFolderPath() { */ public String getArchivePath() { String archiveFolder = tableConfig.getArchivelogFolder(); - return getMetaPath() + Path.SEPARATOR + archiveFolder; + return getMetaPath() + HoodieLocation.SEPARATOR + archiveFolder; } /** diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java index e84c646cb504..96d93d01bf5a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/AbstractHoodieTableMetadata.java @@ -27,8 +27,7 @@ import org.apache.hudi.hadoop.fs.SerializablePath; import org.apache.hudi.internal.schema.Type; import org.apache.hudi.internal.schema.Types; - -import org.apache.hadoop.fs.Path; +import org.apache.hudi.storage.HoodieLocation; import java.util.Collections; import java.util.List; @@ -58,14 +57,14 @@ protected static int getPathPartitionLevel(Types.RecordType partitionFields, Str int level = 1; for (int i = 1; i < path.length() - 1; i++) { - if (path.charAt(i) == Path.SEPARATOR_CHAR) { + if (path.charAt(i) == HoodieLocation.SEPARATOR_CHAR) { level++; } } - if (path.startsWith(Path.SEPARATOR)) { + if (path.startsWith(HoodieLocation.SEPARATOR)) { level--; } - if (path.endsWith(Path.SEPARATOR)) { + if (path.endsWith(HoodieLocation.SEPARATOR)) { level--; } return level; diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java index 82400b711650..38da2e58844f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java @@ -39,6 +39,7 @@ import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.util.Lazy; import org.apache.avro.Schema; @@ -360,7 +361,7 @@ public static HoodieRecord createBloomFilterMetadataRecor final String bloomFilterType, final ByteBuffer bloomFilter, final boolean isDeleted) { - checkArgument(!baseFileName.contains(Path.SEPARATOR) + checkArgument(!baseFileName.contains(HoodieLocation.SEPARATOR) && FSUtils.isBaseFile(new Path(baseFileName)), "Invalid base file '" + baseFileName + "' for MetaIndexBloomFilter!"); final String bloomFilterIndexKey = getBloomFilterRecordKey(partitionName, baseFileName); diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java index 0ba197a5c68a..ba40f269a0f4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java @@ -30,11 +30,12 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieMetadataException; +import org.apache.hudi.expression.Expression; +import org.apache.hudi.internal.schema.Types; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hudi.expression.Expression; -import org.apache.hudi.internal.schema.Types; import java.io.IOException; import java.io.Serializable; @@ -68,7 +69,7 @@ public interface HoodieTableMetadata extends Serializable, AutoCloseable { * Return the base-path of the Metadata Table for the given Dataset identified by base-path */ static String getMetadataTableBasePath(String dataTableBasePath) { - return dataTableBasePath + Path.SEPARATOR + HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH; + return dataTableBasePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH; } /** @@ -93,7 +94,7 @@ static String getDataTableBasePathFromMetadataTable(String metadataTableBasePath * @param metadataTableBasePath The base path of the metadata table */ static String getDatasetBasePath(String metadataTableBasePath) { - int endPos = metadataTableBasePath.lastIndexOf(Path.SEPARATOR + HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH); + int endPos = metadataTableBasePath.lastIndexOf(HoodieLocation.SEPARATOR + HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH); checkState(endPos != -1, metadataTableBasePath + " should be base path of the metadata table"); return metadataTableBasePath.substring(0, endPos); } @@ -107,7 +108,7 @@ static boolean isMetadataTable(String basePath) { if (basePath == null || basePath.isEmpty()) { return false; } - if (basePath.endsWith(Path.SEPARATOR)) { + if (basePath.endsWith(HoodieLocation.SEPARATOR)) { basePath = basePath.substring(0, basePath.length() - 1); } return basePath.endsWith(HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java index 15887cb80e27..dc9fdf367409 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestHoodieWrapperFileSystem.java @@ -24,6 +24,7 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -70,7 +71,7 @@ public static void cleanUp() { public void testCreateImmutableFileInPath() throws IOException { HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(HadoopFSUtils.getFs(basePath, new Configuration()), new NoOpConsistencyGuard()); String testContent = "test content"; - Path testFile = new Path(basePath + Path.SEPARATOR + "clean.00000001"); + Path testFile = new Path(basePath + HoodieLocation.SEPARATOR + "clean.00000001"); // create same commit twice fs.createImmutableFileInPath(testFile, Option.of(getUTF8Bytes(testContent))); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java index c182528344c1..73065a5247d0 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/meta/CkpMetadata.java @@ -26,6 +26,7 @@ import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.flink.configuration.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -225,7 +226,8 @@ public static CkpMetadata getInstance(FileSystem fs, String basePath, String uni protected static String ckpMetaPath(String basePath, String uniqueId) { // .hoodie/.aux/ckp_meta - String metaPath = basePath + Path.SEPARATOR + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + Path.SEPARATOR + CKP_META; + String metaPath = basePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + + HoodieLocation.SEPARATOR + CKP_META; return StringUtils.isNullOrEmpty(uniqueId) ? metaPath : metaPath + "_" + uniqueId; } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java index 2ddf10ef1719..68c2a05fccd4 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java @@ -29,6 +29,7 @@ import org.apache.hudi.source.prune.PartitionPruners; import org.apache.hudi.source.prune.PrimaryKeyPruners; import org.apache.hudi.source.stats.ColumnStatsIndices; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.util.DataTypeUtils; import org.apache.hudi.util.StreamerUtil; @@ -120,7 +121,7 @@ public List> getPartitions( } List> partitions = new ArrayList<>(); for (String partitionPath : partitionPaths) { - String[] paths = partitionPath.split(Path.SEPARATOR); + String[] paths = partitionPath.split(HoodieLocation.SEPARATOR); Map partitionMapping = new LinkedHashMap<>(); if (hivePartition) { Arrays.stream(paths).forEach(p -> { diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java index 6844a4136e2c..2dc8f618b1f7 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java @@ -25,6 +25,7 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieValidationException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.sync.common.util.SparkDataSourceTableUtils; import org.apache.hudi.util.AvroSchemaConverter; @@ -137,7 +138,7 @@ public static Map loadFromProperties(String basePath, Configurat } private static Path getPropertiesFilePath(String basePath) { - String auxPath = basePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME; + String auxPath = basePath + HoodieLocation.SEPARATOR + AUXILIARYFOLDER_NAME; return new Path(auxPath, FILE_NAME); } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java index 826b96f617fc..78467abe9dc0 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java @@ -20,6 +20,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.util.DataTypeUtils; import org.apache.flink.api.java.tuple.Tuple2; @@ -98,7 +99,7 @@ public static String generatePartitionPath( int i = 0; for (Map.Entry e : partitionKVs.entrySet()) { if (i > 0) { - suffixBuf.append(Path.SEPARATOR); + suffixBuf.append(HoodieLocation.SEPARATOR); } if (hivePartition) { suffixBuf.append(escapePathName(e.getKey())); @@ -108,7 +109,7 @@ public static String generatePartitionPath( i++; } if (sepSuffix) { - suffixBuf.append(Path.SEPARATOR); + suffixBuf.append(HoodieLocation.SEPARATOR); } return suffixBuf.toString(); } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java index 2fb8bd893072..82350a3b85bc 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ClientIds.java @@ -24,6 +24,7 @@ import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieHeartbeatException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.flink.configuration.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -148,7 +149,7 @@ public static boolean isHeartbeatExpired(FileSystem fs, Path path, long timeoutT // Utilities // ------------------------------------------------------------------------- private String getHeartbeatFolderPath(String basePath) { - return basePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME + Path.SEPARATOR + HEARTBEAT_FOLDER_NAME; + return basePath + HoodieLocation.SEPARATOR + AUXILIARYFOLDER_NAME + HoodieLocation.SEPARATOR + HEARTBEAT_FOLDER_NAME; } private Path getHeartbeatFilePath(String basePath, String uniqueId) { diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java index 7eea95369907..8e328aee4d29 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java @@ -24,6 +24,7 @@ import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.flink.configuration.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -82,7 +83,7 @@ public static FileSystemViewStorageConfig loadFromProperties(String basePath, Co } private static Path getPropertiesFilePath(String basePath, String uniqueId) { - String auxPath = basePath + Path.SEPARATOR + AUXILIARYFOLDER_NAME; + String auxPath = basePath + HoodieLocation.SEPARATOR + AUXILIARYFOLDER_NAME; String fileName = StringUtils.isNullOrEmpty(uniqueId) ? FILE_NAME : FILE_NAME + "_" + uniqueId; return new Path(auxPath, fileName); } diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java index 954ca6593c36..8995d0247bc9 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/ITTestDataStreamWrite.java @@ -28,6 +28,7 @@ import org.apache.hudi.sink.transform.ChainedTransformer; import org.apache.hudi.sink.transform.Transformer; import org.apache.hudi.sink.utils.Pipelines; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.table.catalog.HoodieCatalog; import org.apache.hudi.table.catalog.TableOptionProperties; import org.apache.hudi.util.AvroSchemaConverter; @@ -440,7 +441,7 @@ public void testHoodiePipelineBuilderSourceWithSchemaSet() throws Exception { // create table dir final String dbName = DEFAULT_DATABASE.defaultValue(); final String tableName = "t1"; - File testTable = new File(tempFile, dbName + Path.SEPARATOR + tableName); + File testTable = new File(tempFile, dbName + HoodieLocation.SEPARATOR + tableName); testTable.mkdir(); Configuration conf = TestConfigurations.getDefaultConf(testTable.toURI().toString()); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java index 0978b1cc4e64..d0b365049803 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/bucket/ITTestBucketStreamWrite.java @@ -28,6 +28,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex.IndexType; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.util.StreamerUtil; import org.apache.hudi.utils.FlinkMiniCluster; import org.apache.hudi.utils.TestConfigurations; @@ -110,7 +111,7 @@ private static void doDeleteCommit(String tablePath, boolean isCow) throws Excep // delete successful commit to simulate an unsuccessful write FileSystem fs = metaClient.getFs(); - Path path = new Path(metaClient.getMetaPath() + Path.SEPARATOR + filename); + Path path = new Path(metaClient.getMetaPath() + HoodieLocation.SEPARATOR + filename); fs.delete(path); // marker types are different for COW and MOR diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java index d385846be057..7d6fb1abfd9f 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java @@ -31,6 +31,7 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.sink.event.WriteMetadataEvent; import org.apache.hudi.sink.meta.CkpMetadata; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.util.StreamerUtil; import org.apache.hudi.utils.TestData; import org.apache.hudi.utils.TestUtils; @@ -459,7 +460,8 @@ public TestHarness rollbackLastCompleteInstantToInflight() throws Exception { HoodieActiveTimeline.deleteInstantFile(metaClient.getFs(), metaClient.getMetaPath(), lastCompletedInstant.get()); // refresh the heartbeat in case it is timed out. OutputStream outputStream = - metaClient.getFs().create(new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + Path.SEPARATOR + this.lastComplete), true); + metaClient.getFs().create(new Path(HoodieTableMetaClient.getHeartbeatFolderPath(basePath) + + HoodieLocation.SEPARATOR + this.lastComplete), true); outputStream.close(); this.lastPending = this.lastComplete; this.lastComplete = lastCompleteInstant(); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java index 5fa78e3647f7..2a90e2b031e4 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java @@ -29,6 +29,7 @@ import org.apache.hudi.configuration.FlinkOptions; import org.apache.hudi.configuration.HadoopConfigurations; import org.apache.hudi.source.StreamReadMonitoringFunction; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.table.format.mor.MergeOnReadInputSplit; import org.apache.hudi.util.StreamerUtil; @@ -105,7 +106,7 @@ public static String getNthArchivedInstant(String basePath, int n) { public static String getSplitPartitionPath(MergeOnReadInputSplit split) { assertTrue(split.getLogPaths().isPresent()); final String logPath = split.getLogPaths().get().get(0); - String[] paths = logPath.split(Path.SEPARATOR); + String[] paths = logPath.split(HoodieLocation.SEPARATOR); return paths[paths.length - 2]; } diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java index 80e1186776f8..505acccee873 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java @@ -43,6 +43,7 @@ import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit; import org.apache.hudi.hadoop.realtime.HoodieRealtimePath; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -244,7 +245,7 @@ public static Option getAffectedPartitions(List commitsTo return Option.empty(); } String incrementalInputPaths = partitionsToList.stream() - .map(s -> StringUtils.isNullOrEmpty(s) ? tableMetaClient.getBasePath() : tableMetaClient.getBasePath() + Path.SEPARATOR + s) + .map(s -> StringUtils.isNullOrEmpty(s) ? tableMetaClient.getBasePath() : tableMetaClient.getBasePath() + HoodieLocation.SEPARATOR + s) .filter(s -> { /* * Ensure to return only results from the original input path that has incremental changes diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java index 561851c8e2b8..b88b58f1ad98 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestInputPathHandler.java @@ -25,6 +25,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.testutils.minicluster.HdfsTestService; import org.apache.hudi.hadoop.utils.HoodieHiveUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -167,12 +168,12 @@ static HoodieTableMetaClient initTableType(Configuration hadoopConf, String base static List generatePartitions(DistributedFileSystem dfs, String basePath) throws IOException { List paths = new ArrayList<>(); - paths.add(new Path(basePath + Path.SEPARATOR + "2019/05/21")); - paths.add(new Path(basePath + Path.SEPARATOR + "2019/05/22")); - paths.add(new Path(basePath + Path.SEPARATOR + "2019/05/23")); - paths.add(new Path(basePath + Path.SEPARATOR + "2019/05/24")); - paths.add(new Path(basePath + Path.SEPARATOR + "2019/05/25")); - for (Path path: paths) { + paths.add(new Path(basePath + HoodieLocation.SEPARATOR + "2019/05/21")); + paths.add(new Path(basePath + HoodieLocation.SEPARATOR + "2019/05/22")); + paths.add(new Path(basePath + HoodieLocation.SEPARATOR + "2019/05/23")); + paths.add(new Path(basePath + HoodieLocation.SEPARATOR + "2019/05/24")); + paths.add(new Path(basePath + HoodieLocation.SEPARATOR + "2019/05/25")); + for (Path path : paths) { dfs.mkdirs(path); } return paths; diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala index 99b70519de65..5f5279714a89 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala @@ -17,20 +17,22 @@ package org.apache.spark.sql.hudi.command.procedures -import org.apache.avro.generic.GenericRecord -import org.apache.avro.specific.SpecificData -import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hudi.HoodieCLIUtils import org.apache.hudi.avro.HoodieAvroUtils import org.apache.hudi.avro.model.HoodieArchivedMetaEntry -import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieLogFile +import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.common.table.log.HoodieLogFormat import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline, TimelineMetadataUtils} import org.apache.hudi.exception.HoodieException +import org.apache.hudi.hadoop.fs.HadoopFSUtils +import org.apache.hudi.storage.HoodieLocation +import org.apache.avro.generic.GenericRecord +import org.apache.avro.specific.SpecificData +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} @@ -38,8 +40,6 @@ import java.io.File import java.util import java.util.Collections import java.util.function.Supplier -import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType -import org.apache.hudi.hadoop.fs.HadoopFSUtils import scala.collection.JavaConverters._ import scala.util.control.Breaks.break @@ -158,7 +158,7 @@ class ExportInstantsProcedure extends BaseProcedure with ProcedureBuilder with L null } val instantTime = archiveEntryRecord.get("commitTime").toString - val outPath = localFolder + Path.SEPARATOR + instantTime + "." + action + val outPath = localFolder + HoodieLocation.SEPARATOR + instantTime + "." + action if (metadata != null) writeToFile(fileSystem, outPath, HoodieAvroUtils.avroToJson(metadata, true)) if ( { copyCount += 1; @@ -181,7 +181,7 @@ class ExportInstantsProcedure extends BaseProcedure with ProcedureBuilder with L val timeline = metaClient.getActiveTimeline val fileSystem = HadoopFSUtils.getFs(metaClient.getBasePath, jsc.hadoopConfiguration()) for (instant <- instants) { - val localPath = localFolder + Path.SEPARATOR + instant.getFileName + val localPath = localFolder + HoodieLocation.SEPARATOR + instant.getFileName val data: Array[Byte] = instant.getAction match { case HoodieTimeline.CLEAN_ACTION => val metadata = TimelineMetadataUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(instant).get) diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java index 4a93245dc8d2..ed9aebaad66f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/testutils/DataSourceTestUtils.java @@ -18,16 +18,17 @@ package org.apache.hudi.testutils; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.util.FileIOUtils; + +import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.testutils.HoodieTestDataGenerator; -import org.apache.hudi.common.util.FileIOUtils; -import org.apache.avro.Schema; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala index 803702addb48..df07c72f0907 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala @@ -17,45 +17,48 @@ package org.apache.hudi -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path import org.apache.hudi.DataSourceReadOptions.{FILE_INDEX_LISTING_MODE_EAGER, FILE_INDEX_LISTING_MODE_LAZY, QUERY_TYPE, QUERY_TYPE_SNAPSHOT_OPT_VAL} import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.HoodieConversionUtils.toJavaOption import org.apache.hudi.HoodieFileIndex.DataSkippingFailureMode import org.apache.hudi.client.HoodieJavaWriteClient import org.apache.hudi.client.common.HoodieJavaEngineContext -import org.apache.hudi.common.config.TimestampKeyGeneratorConfig.{TIMESTAMP_INPUT_DATE_FORMAT, TIMESTAMP_OUTPUT_DATE_FORMAT, TIMESTAMP_TYPE_FIELD} import org.apache.hudi.common.config.{HoodieMetadataConfig, HoodieStorageConfig} +import org.apache.hudi.common.config.TimestampKeyGeneratorConfig.{TIMESTAMP_INPUT_DATE_FORMAT, TIMESTAMP_OUTPUT_DATE_FORMAT, TIMESTAMP_TYPE_FIELD} import org.apache.hudi.common.engine.EngineType import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.{HoodieBaseFile, HoodieRecord, HoodieTableType} -import org.apache.hudi.common.table.view.HoodieTableFileSystemView import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient} +import org.apache.hudi.common.table.view.HoodieTableFileSystemView +import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils} import org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings -import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils} import org.apache.hudi.common.util.PartitionPathEncodeUtils import org.apache.hudi.common.util.StringUtils.isNullOrEmpty import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.exception.HoodieException import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.TimestampType import org.apache.hudi.metadata.HoodieTableMetadata +import org.apache.hudi.storage.HoodieLocation import org.apache.hudi.testutils.HoodieSparkClientTestBase import org.apache.hudi.util.JFunction + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, EqualTo, GreaterThanOrEqual, LessThan, Literal} import org.apache.spark.sql.execution.datasources.{NoopCache, PartitionDirectory} import org.apache.spark.sql.functions.{lit, struct} import org.apache.spark.sql.hudi.HoodieSparkSessionExtension import org.apache.spark.sql.types.{IntegerType, StringType} -import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.api.{BeforeEach, Test} +import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.{Arguments, CsvSource, MethodSource, ValueSource} import java.util.Properties import java.util.function.Consumer + import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import scala.util.Random @@ -813,9 +816,9 @@ class TestHoodieFileIndex extends HoodieSparkClientTestBase with ScalaAssertionS if (hiveStylePartitioning) { partitionNames.zip(partitionValues).map { case (name, value) => s"$name=$value" - }.mkString(Path.SEPARATOR) + }.mkString(HoodieLocation.SEPARATOR) } else { - partitionValues.mkString(Path.SEPARATOR) + partitionValues.mkString(HoodieLocation.SEPARATOR) } } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestBootstrapProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestBootstrapProcedure.scala index a8ac9b5e3176..fc45509190cc 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestBootstrapProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestBootstrapProcedure.scala @@ -17,11 +17,12 @@ package org.apache.spark.sql.hudi.procedure -import org.apache.hadoop.fs.Path import org.apache.hudi.common.model.HoodieTableType import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.functional.TestBootstrap import org.apache.hudi.keygen.constant.KeyGeneratorOptions +import org.apache.hudi.storage.HoodieLocation + import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql.{Dataset, Row} @@ -40,8 +41,8 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val basePath = s"${tmp.getCanonicalPath}" val srcName: String = "source" - val sourcePath = basePath + Path.SEPARATOR + srcName - val tablePath = basePath + Path.SEPARATOR + tableName + val sourcePath = basePath + HoodieLocation.SEPARATOR + srcName + val tablePath = basePath + HoodieLocation.SEPARATOR + tableName val jsc = new JavaSparkContext(spark.sparkContext) // generate test data @@ -49,7 +50,7 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val timestamp: Long = Instant.now.toEpochMilli for (i <- 0 until partitions.size) { val df: Dataset[Row] = TestBootstrap.generateTestRawTripDataset(timestamp, i * NUM_OF_RECORDS, i * NUM_OF_RECORDS + NUM_OF_RECORDS, null, jsc, spark.sqlContext) - df.write.parquet(sourcePath + Path.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)) + df.write.parquet(sourcePath + HoodieLocation.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)) } spark.sql("set hoodie.bootstrap.parallelism = 20") @@ -105,8 +106,8 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val basePath = s"${tmp.getCanonicalPath}" val srcName: String = "source" - val sourcePath = basePath + Path.SEPARATOR + srcName - val tablePath = basePath + Path.SEPARATOR + tableName + val sourcePath = basePath + HoodieLocation.SEPARATOR + srcName + val tablePath = basePath + HoodieLocation.SEPARATOR + tableName val jsc = new JavaSparkContext(spark.sparkContext) // generate test data @@ -114,7 +115,7 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val timestamp: Long = Instant.now.toEpochMilli for (i <- 0 until partitions.size) { val df: Dataset[Row] = TestBootstrap.generateTestRawTripDataset(timestamp, i * NUM_OF_RECORDS, i * NUM_OF_RECORDS + NUM_OF_RECORDS, null, jsc, spark.sqlContext) - df.write.parquet(sourcePath + Path.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)) + df.write.parquet(sourcePath + HoodieLocation.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)) } spark.sql("set hoodie.bootstrap.parallelism = 20") @@ -171,8 +172,8 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val basePath = s"${tmp.getCanonicalPath}" val srcName: String = "source" - val sourcePath = basePath + Path.SEPARATOR + srcName - val tablePath = basePath + Path.SEPARATOR + tableName + val sourcePath = basePath + HoodieLocation.SEPARATOR + srcName + val tablePath = basePath + HoodieLocation.SEPARATOR + tableName val jsc = new JavaSparkContext(spark.sparkContext) // generate test data @@ -227,8 +228,8 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val basePath = s"${tmp.getCanonicalPath}" val srcName: String = "source" - val sourcePath = basePath + Path.SEPARATOR + srcName - val tablePath = basePath + Path.SEPARATOR + tableName + val sourcePath = basePath + HoodieLocation.SEPARATOR + srcName + val tablePath = basePath + HoodieLocation.SEPARATOR + tableName val jsc = new JavaSparkContext(spark.sparkContext) // generate test data @@ -236,7 +237,7 @@ class TestBootstrapProcedure extends HoodieSparkProcedureTestBase { val timestamp: Long = Instant.now.toEpochMilli for (i <- 0 until partitions.size) { val df: Dataset[Row] = TestBootstrap.generateTestRawTripDataset(timestamp, i * NUM_OF_RECORDS, i * NUM_OF_RECORDS + NUM_OF_RECORDS, null, jsc, spark.sqlContext) - df.write.parquet(sourcePath + Path.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)) + df.write.parquet(sourcePath + HoodieLocation.SEPARATOR + PARTITION_FIELD + "=" + partitions.get(i)) } spark.sql("set hoodie.bootstrap.parallelism = 20") diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala index 595e9173cbeb..9ca3ff0719be 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala @@ -26,6 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils} import org.apache.hudi.common.util.StringUtils.getUTF8Bytes import org.apache.hudi.hadoop.fs.HadoopFSUtils +import org.apache.hudi.storage.HoodieLocation import org.apache.hudi.testutils.HoodieClientTestUtils import org.apache.parquet.avro.AvroParquetWriter @@ -46,7 +47,7 @@ class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase { withTempDir { tmp => val fs: FileSystem = HadoopFSUtils.getFs(tmp.getCanonicalPath, spark.sparkContext.hadoopConfiguration) val tableName = generateTableName - val tablePath = tmp.getCanonicalPath + Path.SEPARATOR + tableName + val tablePath = tmp.getCanonicalPath + HoodieLocation.SEPARATOR + tableName val sourcePath = new Path(tmp.getCanonicalPath, "source") val targetPath = new Path(tablePath) val schemaFile = new Path(tmp.getCanonicalPath, "file.schema").toString @@ -79,7 +80,7 @@ class TestHdfsParquetImportProcedure extends HoodieSparkProcedureTestBase { withTempDir { tmp => val fs: FileSystem = HadoopFSUtils.getFs(tmp.getCanonicalPath, spark.sparkContext.hadoopConfiguration) val tableName = generateTableName - val tablePath = tmp.getCanonicalPath + Path.SEPARATOR + tableName + val tablePath = tmp.getCanonicalPath + HoodieLocation.SEPARATOR + tableName val sourcePath = new Path(tmp.getCanonicalPath, "source") val targetPath = new Path(tablePath) val schemaFile = new Path(tmp.getCanonicalPath, "file.schema").toString diff --git a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala index d603f2c13d6f..0166ce9b9529 100644 --- a/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala +++ b/hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark32PlusAnalysis.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.hudi.analysis -import org.apache.hadoop.fs.Path import org.apache.hudi.{DataSourceReadOptions, DefaultSource, SparkAdapterSupport} +import org.apache.hudi.storage.HoodieLocation + +import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.HoodieSpark3CatalystPlanUtils.MatchResolvedTable import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.resolveExpressionByPlanChildren import org.apache.spark.sql.catalyst.analysis.{AnalysisErrorAt, EliminateSubqueryAliases, NamedRelation, UnresolvedAttribute, UnresolvedPartitionSpec} @@ -29,14 +31,13 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.connector.catalog.{Table, V1Table} -import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation} import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.isMetaField import org.apache.spark.sql.hudi.ProvidesHoodieConfig import org.apache.spark.sql.hudi.analysis.HoodieSpark32PlusAnalysis.{HoodieV1OrV2Table, ResolvesToHudiTable} import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table import org.apache.spark.sql.hudi.command.{AlterHoodieTableDropPartitionCommand, ShowHoodieTablePartitionsCommand, TruncateHoodieTableCommand} -import org.apache.spark.sql.{AnalysisException, SQLContext, SparkSession} /** * NOTE: PLEASE READ CAREFULLY @@ -91,7 +92,7 @@ case class HoodieSpark32PlusResolveReferences(spark: SparkSession) extends Rule[ case HoodieTableChanges(args) => val (tablePath, opts) = HoodieTableChangesOptionsParser.parseOptions(args, HoodieTableChanges.FUNC_NAME) val hoodieDataSource = new DefaultSource - if (tablePath.contains(Path.SEPARATOR)) { + if (tablePath.contains(HoodieLocation.SEPARATOR)) { // the first param is table path val relation = hoodieDataSource.createRelation(spark.sqlContext, opts ++ Map("path" -> tablePath)) LogicalRelation(relation) diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java index ad1918eabf8b..29d144005306 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestService.java @@ -20,9 +20,9 @@ import org.apache.hudi.common.testutils.NetworkTestUtils; import org.apache.hudi.common.util.FileIOUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.HiveMetaStore; @@ -220,7 +220,7 @@ private void resetSystemProperties() { } private static String getHiveLocation(String baseLocation) { - return baseLocation + Path.SEPARATOR + "hive"; + return baseLocation + HoodieLocation.SEPARATOR + "hive"; } private HiveServer2 startHiveServer(HiveConf serverConf) { diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerBasedEarlyConflictDetectionRunnable.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerBasedEarlyConflictDetectionRunnable.java index 5cc3d431d300..931bd421b39e 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerBasedEarlyConflictDetectionRunnable.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerBasedEarlyConflictDetectionRunnable.java @@ -25,6 +25,7 @@ import org.apache.hudi.common.util.HoodieTimer; import org.apache.hudi.common.util.MarkerUtils; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hudi.timeline.service.handlers.MarkerHandler; import org.apache.hadoop.conf.Configuration; @@ -87,7 +88,7 @@ public void run() { // and the markers from the requests pending processing. currentInstantAllMarkers.addAll(markerHandler.getAllMarkers(markerDir)); currentInstantAllMarkers.addAll(pendingMarkers); - Path tempPath = new Path(basePath + Path.SEPARATOR + HoodieTableMetaClient.TEMPFOLDER_NAME); + Path tempPath = new Path(basePath + HoodieLocation.SEPARATOR + HoodieTableMetaClient.TEMPFOLDER_NAME); List instants = MarkerUtils.getAllMarkerDir(tempPath, fs); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java index 11a19b030fc5..d4fc5e8053a6 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/SparkSampleWritesUtils.java @@ -34,6 +34,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.hadoop.fs.CachingPath; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.storage.HoodieLocation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -138,7 +139,7 @@ private static Pair doSampleWrites(JavaSparkContext jsc, Option } private static String getSampleWritesBasePath(JavaSparkContext jsc, HoodieWriteConfig writeConfig, String instantTime) throws IOException { - Path basePath = new CachingPath(writeConfig.getBasePath(), SAMPLE_WRITES_FOLDER_PATH + Path.SEPARATOR + instantTime); + Path basePath = new CachingPath(writeConfig.getBasePath(), SAMPLE_WRITES_FOLDER_PATH + HoodieLocation.SEPARATOR + instantTime); FileSystem fs = HadoopFSUtils.getFs(basePath, jsc.hadoopConfiguration()); if (fs.exists(basePath)) { fs.delete(basePath, true); From 4d49fa4acff9b840febd019978b70622cd4d5bea Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 30 Jan 2024 22:11:35 -0800 Subject: [PATCH 089/112] [HUDI-7345] Remove usage of org.apache.hadoop.util.VersionUtil (#10571) --- .../org/apache/hudi/avro/HoodieAvroUtils.java | 5 +- .../hudi/common/util/ComparableVersion.java | 402 ++++++++++++++++++ .../apache/hudi/common/util/StringUtils.java | 108 ++++- .../hudi/common/util/TestStringUtils.java | 134 ++++++ 4 files changed, 643 insertions(+), 6 deletions(-) create mode 100644 hudi-io/src/main/java/org/apache/hudi/common/util/ComparableVersion.java create mode 100644 hudi-io/src/test/java/org/apache/hudi/common/util/TestStringUtils.java diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index 523f6dd742c4..208f376ea019 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -67,7 +67,6 @@ import org.apache.avro.io.JsonEncoder; import org.apache.avro.specific.SpecificRecordBase; import org.apache.avro.util.Utf8; -import org.apache.hadoop.util.VersionUtil; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -1312,11 +1311,11 @@ public static GenericRecord rewriteRecordDeep(GenericRecord oldRecord, Schema ne } public static boolean gteqAvro1_9() { - return VersionUtil.compareVersions(AVRO_VERSION, "1.9") >= 0; + return StringUtils.compareVersions(AVRO_VERSION, "1.9") >= 0; } public static boolean gteqAvro1_10() { - return VersionUtil.compareVersions(AVRO_VERSION, "1.10") >= 0; + return StringUtils.compareVersions(AVRO_VERSION, "1.10") >= 0; } /** diff --git a/hudi-io/src/main/java/org/apache/hudi/common/util/ComparableVersion.java b/hudi-io/src/main/java/org/apache/hudi/common/util/ComparableVersion.java new file mode 100644 index 000000000000..467c39b4ee69 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/ComparableVersion.java @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.util; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Locale; +import java.util.Properties; +import java.util.Stack; + +/** + * Generic implementation of version comparison. + * + *

    Features: + *

      + *
    • mixing of '-' (dash) and '.' (dot) separators,
    • + *
    • transition between characters and digits also constitutes a separator: + * 1.0alpha1 => [1, 0, alpha, 1]
    • + *
    • unlimited number of version components,
    • + *
    • version components in the text can be digits or strings,
    • + *
    • strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering. + * Well-known qualifiers (case insensitive) are:
        + *
      • alpha or a
      • + *
      • beta or b
      • + *
      • milestone or m
      • + *
      • rc or cr
      • + *
      • snapshot
      • + *
      • (the empty string) or ga or final
      • + *
      • sp
      • + *
      + * Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive), + *
    • + *
    • a dash usually precedes a qualifier, and is always less important than something preceded with a dot.
    • + *

    + * + * @see "Versioning" on Maven Wiki + * This class is copied from {@code org.apache.hadoop.util.ComparableVersion} to avoid Hadoop dependency. + */ +public class ComparableVersion + implements Comparable { + private String value; + + private String canonical; + + private ComparableVersion.ListItem items; + + private interface Item { + int INTEGER_ITEM = 0; + int STRING_ITEM = 1; + int LIST_ITEM = 2; + + int compareTo(ComparableVersion.Item item); + + int getType(); + + boolean isNull(); + } + + /** + * Represents a numeric item in the version item list. + */ + private static class IntegerItem + implements ComparableVersion.Item { + private static final BigInteger BIG_INTEGER_ZERO = new BigInteger("0"); + + private final BigInteger value; + + public static final ComparableVersion.IntegerItem ZERO = new ComparableVersion.IntegerItem(); + + private IntegerItem() { + this.value = BIG_INTEGER_ZERO; + } + + public IntegerItem(String str) { + this.value = new BigInteger(str); + } + + public int getType() { + return INTEGER_ITEM; + } + + public boolean isNull() { + return BIG_INTEGER_ZERO.equals(value); + } + + public int compareTo(ComparableVersion.Item item) { + if (item == null) { + return BIG_INTEGER_ZERO.equals(value) ? 0 : 1; // 1.0 == 1, 1.1 > 1 + } + + switch (item.getType()) { + case INTEGER_ITEM: + return value.compareTo(((ComparableVersion.IntegerItem) item).value); + + case STRING_ITEM: + return 1; // 1.1 > 1-sp + + case LIST_ITEM: + return 1; // 1.1 > 1-1 + + default: + throw new RuntimeException("invalid item: " + item.getClass()); + } + } + + public String toString() { + return value.toString(); + } + } + + /** + * Represents a string in the version item list, usually a qualifier. + */ + private static class StringItem + implements ComparableVersion.Item { + private static final String[] QUALIFIERS = {"alpha", "beta", "milestone", "rc", "snapshot", "", "sp"}; + + private static final List QUALIFIER_LIST = Arrays.asList(QUALIFIERS); + + private static final Properties ALIASES = new Properties(); + + static { + ALIASES.put("ga", ""); + ALIASES.put("final", ""); + ALIASES.put("cr", "rc"); + } + + /** + * A comparable value for the empty-string qualifier. This one is used to determine if a given qualifier makes + * the version older than one without a qualifier, or more recent. + */ + private static final String RELEASE_VERSION_INDEX = String.valueOf(QUALIFIER_LIST.indexOf("")); + + private String value; + + public StringItem(String value, boolean followedByDigit) { + if (followedByDigit && value.length() == 1) { + // a1 = alpha-1, b1 = beta-1, m1 = milestone-1 + switch (value.charAt(0)) { + case 'a': + value = "alpha"; + break; + case 'b': + value = "beta"; + break; + case 'm': + value = "milestone"; + break; + default: + break; + } + } + this.value = ALIASES.getProperty(value, value); + } + + public int getType() { + return STRING_ITEM; + } + + public boolean isNull() { + return (comparableQualifier(value).compareTo(RELEASE_VERSION_INDEX) == 0); + } + + /** + * Returns a comparable value for a qualifier. + *

    + * This method takes into account the ordering of known qualifiers then unknown qualifiers with lexical ordering. + *

    + * just returning an Integer with the index here is faster, but requires a lot of if/then/else to check for -1 + * or QUALIFIERS.size and then resort to lexical ordering. Most comparisons are decided by the first character, + * so this is still fast. If more characters are needed then it requires a lexical sort anyway. + * + * @param qualifier + * @return an equivalent value that can be used with lexical comparison + */ + public static String comparableQualifier(String qualifier) { + int i = QUALIFIER_LIST.indexOf(qualifier); + + return i == -1 ? (QUALIFIER_LIST.size() + "-" + qualifier) : String.valueOf(i); + } + + public int compareTo(ComparableVersion.Item item) { + if (item == null) { + // 1-rc < 1, 1-ga > 1 + return comparableQualifier(value).compareTo(RELEASE_VERSION_INDEX); + } + switch (item.getType()) { + case INTEGER_ITEM: + return -1; // 1.any < 1.1 ? + + case STRING_ITEM: + return comparableQualifier(value).compareTo(comparableQualifier(((ComparableVersion.StringItem) item).value)); + + case LIST_ITEM: + return -1; // 1.any < 1-1 + + default: + throw new RuntimeException("invalid item: " + item.getClass()); + } + } + + public String toString() { + return value; + } + } + + /** + * Represents a version list item. This class is used both for the global item list and for sub-lists (which start + * with '-(number)' in the version specification). + */ + private static class ListItem + extends ArrayList + implements ComparableVersion.Item { + public int getType() { + return LIST_ITEM; + } + + public boolean isNull() { + return (size() == 0); + } + + void normalize() { + for (ListIterator iterator = listIterator(size()); iterator.hasPrevious(); ) { + ComparableVersion.Item item = iterator.previous(); + if (item.isNull()) { + iterator.remove(); // remove null trailing items: 0, "", empty list + } else { + break; + } + } + } + + public int compareTo(ComparableVersion.Item item) { + if (item == null) { + if (size() == 0) { + return 0; // 1-0 = 1- (normalize) = 1 + } + ComparableVersion.Item first = get(0); + return first.compareTo(null); + } + switch (item.getType()) { + case INTEGER_ITEM: + return -1; // 1-1 < 1.0.x + + case STRING_ITEM: + return 1; // 1-1 > 1-sp + + case LIST_ITEM: + Iterator left = iterator(); + Iterator right = ((ComparableVersion.ListItem) item).iterator(); + + while (left.hasNext() || right.hasNext()) { + ComparableVersion.Item l = left.hasNext() ? left.next() : null; + ComparableVersion.Item r = right.hasNext() ? right.next() : null; + + // if this is shorter, then invert the compare and mul with -1 + int result = l == null ? -1 * r.compareTo(l) : l.compareTo(r); + + if (result != 0) { + return result; + } + } + + return 0; + + default: + throw new RuntimeException("invalid item: " + item.getClass()); + } + } + + public String toString() { + StringBuilder buffer = new StringBuilder("("); + for (Iterator iter = iterator(); iter.hasNext(); ) { + buffer.append(iter.next()); + if (iter.hasNext()) { + buffer.append(','); + } + } + buffer.append(')'); + return buffer.toString(); + } + } + + public ComparableVersion(String version) { + parseVersion(version); + } + + public final void parseVersion(String version) { + this.value = version; + + items = new ComparableVersion.ListItem(); + + version = version.toLowerCase(Locale.ENGLISH); + + ComparableVersion.ListItem list = items; + + Stack stack = new Stack(); + stack.push(list); + + boolean isDigit = false; + + int startIndex = 0; + + for (int i = 0; i < version.length(); i++) { + char c = version.charAt(i); + + if (c == '.') { + if (i == startIndex) { + list.add(ComparableVersion.IntegerItem.ZERO); + } else { + list.add(parseItem(isDigit, version.substring(startIndex, i))); + } + startIndex = i + 1; + } else if (c == '-') { + if (i == startIndex) { + list.add(ComparableVersion.IntegerItem.ZERO); + } else { + list.add(parseItem(isDigit, version.substring(startIndex, i))); + } + startIndex = i + 1; + + if (isDigit) { + list.normalize(); // 1.0-* = 1-* + + if ((i + 1 < version.length()) && Character.isDigit(version.charAt(i + 1))) { + // new ListItem only if previous were digits and new char is a digit, + // ie need to differentiate only 1.1 from 1-1 + list.add(list = new ComparableVersion.ListItem()); + + stack.push(list); + } + } + } else if (Character.isDigit(c)) { + if (!isDigit && i > startIndex) { + list.add(new ComparableVersion.StringItem(version.substring(startIndex, i), true)); + startIndex = i; + } + + isDigit = true; + } else { + if (isDigit && i > startIndex) { + list.add(parseItem(true, version.substring(startIndex, i))); + startIndex = i; + } + + isDigit = false; + } + } + + if (version.length() > startIndex) { + list.add(parseItem(isDigit, version.substring(startIndex))); + } + + while (!stack.isEmpty()) { + list = (ComparableVersion.ListItem) stack.pop(); + list.normalize(); + } + + canonical = items.toString(); + } + + private static ComparableVersion.Item parseItem(boolean isDigit, String buf) { + return isDigit ? new ComparableVersion.IntegerItem(buf) : new ComparableVersion.StringItem(buf, false); + } + + public int compareTo(ComparableVersion o) { + return items.compareTo(o.items); + } + + public String toString() { + return value; + } + + public boolean equals(Object o) { + return (o instanceof ComparableVersion) && canonical.equals(((ComparableVersion) o).canonical); + } + + public int hashCode() { + return canonical.hashCode(); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java index f73615a16a40..eb8f19987484 100644 --- a/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/StringUtils.java @@ -33,8 +33,10 @@ */ public class StringUtils { - public static final char[] HEX_CHAR = new char[]{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + public static final char[] HEX_CHAR = new char[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; public static final String EMPTY_STRING = ""; + // Represents a failed index search + public static final int INDEX_NOT_FOUND = -1; /** *

    @@ -66,7 +68,7 @@ public static String join(final String[] array, final String separator) { if (array == null) { return null; } - return org.apache.hadoop.util.StringUtils.join(separator, array); + return String.join(separator, array); } /** @@ -85,7 +87,7 @@ public static String join(final List list, final String separator) { if (list == null || list.size() == 0) { return null; } - return org.apache.hadoop.util.StringUtils.join(separator, list.toArray(new String[0])); + return String.join(separator, list.toArray(new String[0])); } public static String toHexString(byte[] bytes) { @@ -200,4 +202,104 @@ public static String truncate(String str, int headLength, int tailLength) { return head + "..." + tail; } + + /** + * Compares two version name strings using maven's ComparableVersion class. + * + * @param version1 the first version to compare + * @param version2 the second version to compare + * @return a negative integer if version1 precedes version2, a positive + * integer if version2 precedes version1, and 0 if and only if the two + * versions are equal. + */ + public static int compareVersions(String version1, String version2) { + ComparableVersion v1 = new ComparableVersion(version1); + ComparableVersion v2 = new ComparableVersion(version2); + return v1.compareTo(v2); + } + + /** + * Replaces all occurrences of a String within another String. + * + *

    A null reference passed to this method is a no-op.

    + * + *
    +   * StringUtils.replace(null, *, *)        = null
    +   * StringUtils.replace("", *, *)          = ""
    +   * StringUtils.replace("any", null, *)    = "any"
    +   * StringUtils.replace("any", *, null)    = "any"
    +   * StringUtils.replace("any", "", *)      = "any"
    +   * StringUtils.replace("aba", "a", null)  = "aba"
    +   * StringUtils.replace("aba", "a", "")    = "b"
    +   * StringUtils.replace("aba", "a", "z")   = "zbz"
    +   * 
    + *

    + * This method is copied from hadoop StringUtils. + * + * @param text text to search and replace in, may be null + * @param searchString the String to search for, may be null + * @param replacement the String to replace it with, may be null + * @return the text with any replacements processed, + * null if null String input + * @see #replace(String text, String searchString, String replacement, int max) + */ + public static String replace(String text, String searchString, String replacement) { + return replace(text, searchString, replacement, -1); + } + + /** + * Replaces a String with another String inside a larger String, + * for the first max values of the search String. + * + *

    A null reference passed to this method is a no-op.

    + * + *
    +   * StringUtils.replace(null, *, *, *)         = null
    +   * StringUtils.replace("", *, *, *)           = ""
    +   * StringUtils.replace("any", null, *, *)     = "any"
    +   * StringUtils.replace("any", *, null, *)     = "any"
    +   * StringUtils.replace("any", "", *, *)       = "any"
    +   * StringUtils.replace("any", *, *, 0)        = "any"
    +   * StringUtils.replace("abaa", "a", null, -1) = "abaa"
    +   * StringUtils.replace("abaa", "a", "", -1)   = "b"
    +   * StringUtils.replace("abaa", "a", "z", 0)   = "abaa"
    +   * StringUtils.replace("abaa", "a", "z", 1)   = "zbaa"
    +   * StringUtils.replace("abaa", "a", "z", 2)   = "zbza"
    +   * StringUtils.replace("abaa", "a", "z", -1)  = "zbzz"
    +   * 
    + *

    + * This method is copied from hadoop StringUtils. + * + * @param text text to search and replace in, may be null + * @param searchString the String to search for, may be null + * @param replacement the String to replace it with, may be null + * @param max maximum number of values to replace, or -1 if no maximum + * @return the text with any replacements processed, + * null if null String input + */ + public static String replace(String text, String searchString, String replacement, int max) { + if (isNullOrEmpty(text) || isNullOrEmpty(searchString) || replacement == null || max == 0) { + return text; + } + int start = 0; + int end = text.indexOf(searchString, start); + if (end == INDEX_NOT_FOUND) { + return text; + } + int replLength = searchString.length(); + int increase = replacement.length() - replLength; + increase = (increase < 0 ? 0 : increase); + increase *= (max < 0 ? 16 : (max > 64 ? 64 : max)); + StringBuilder buf = new StringBuilder(text.length() + increase); + while (end != INDEX_NOT_FOUND) { + buf.append(text.substring(start, end)).append(replacement); + start = end + replLength; + if (--max == 0) { + break; + } + end = text.indexOf(searchString, start); + } + buf.append(text.substring(start)); + return buf.toString(); + } } diff --git a/hudi-io/src/test/java/org/apache/hudi/common/util/TestStringUtils.java b/hudi-io/src/test/java/org/apache/hudi/common/util/TestStringUtils.java new file mode 100644 index 000000000000..a4bee6bc6be7 --- /dev/null +++ b/hudi-io/src/test/java/org/apache/hudi/common/util/TestStringUtils.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.util; + +import org.junit.jupiter.api.Test; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; + +import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests {@link StringUtils}. + */ +public class TestStringUtils { + + private static final String[] STRINGS = {"This", "is", "a", "test"}; + + @Test + public void testStringJoinWithDelim() { + String joinedString = StringUtils.joinUsingDelim("-", STRINGS); + assertEquals(STRINGS.length, joinedString.split("-").length); + } + + @Test + public void testStringJoin() { + assertNotEquals(null, StringUtils.join("")); + assertNotEquals(null, StringUtils.join(STRINGS)); + } + + @Test + public void testStringJoinWithJavaImpl() { + assertNull(StringUtils.join(",", null)); + assertEquals("", String.join(",", Collections.singletonList(""))); + assertEquals(",", String.join(",", Arrays.asList("", ""))); + assertEquals("a,", String.join(",", Arrays.asList("a", ""))); + } + + @Test + public void testStringNullToEmpty() { + String str = "This is a test"; + assertEquals(str, StringUtils.nullToEmpty(str)); + assertEquals("", StringUtils.nullToEmpty(null)); + } + + @Test + public void testStringObjToString() { + assertNull(StringUtils.objToString(null)); + assertEquals("Test String", StringUtils.objToString("Test String")); + + // assert byte buffer + ByteBuffer byteBuffer1 = ByteBuffer.wrap(getUTF8Bytes("1234")); + ByteBuffer byteBuffer2 = ByteBuffer.wrap(getUTF8Bytes("5678")); + // assert equal because ByteBuffer has overwritten the toString to return a summary string + assertEquals(byteBuffer1.toString(), byteBuffer2.toString()); + // assert not equal + assertNotEquals(StringUtils.objToString(byteBuffer1), StringUtils.objToString(byteBuffer2)); + } + + @Test + public void testStringEmptyToNull() { + assertNull(StringUtils.emptyToNull("")); + assertEquals("Test String", StringUtils.emptyToNull("Test String")); + } + + @Test + public void testStringNullOrEmpty() { + assertTrue(StringUtils.isNullOrEmpty(null)); + assertTrue(StringUtils.isNullOrEmpty("")); + assertNotEquals(null, StringUtils.isNullOrEmpty("this is not empty")); + assertTrue(StringUtils.isNullOrEmpty("")); + } + + @Test + public void testSplit() { + assertEquals(new ArrayList<>(), StringUtils.split(null, ",")); + assertEquals(new ArrayList<>(), StringUtils.split("", ",")); + assertEquals(Arrays.asList("a", "b", "c"), StringUtils.split("a,b, c", ",")); + assertEquals(Arrays.asList("a", "b", "c"), StringUtils.split("a,b,, c ", ",")); + } + + @Test + public void testHexString() { + String str = "abcd"; + assertEquals(StringUtils.toHexString(getUTF8Bytes(str)), toHexString(getUTF8Bytes(str))); + } + + private static String toHexString(byte[] bytes) { + StringBuilder sb = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } + + @Test + public void testTruncate() { + assertNull(StringUtils.truncate(null, 10, 10)); + assertEquals("http://use...ons/latest", StringUtils.truncate("http://username:password@myregistry.com:5000/versions/latest", 10, 10)); + assertEquals("http://abc.com", StringUtils.truncate("http://abc.com", 10, 10)); + } + + @Test + public void testCompareVersions() { + assertTrue(StringUtils.compareVersions("1.10", "1.9") > 0); + assertTrue(StringUtils.compareVersions("1.9", "1.10") < 0); + assertTrue(StringUtils.compareVersions("1.100.1", "1.10") > 0); + assertTrue(StringUtils.compareVersions("1.10.1", "1.10") > 0); + assertTrue(StringUtils.compareVersions("1.10", "1.10") == 0); + } +} From bcfcd9f89392373d3f809c30b9f1cc7ea4acfa5a Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 19:56:55 -0800 Subject: [PATCH 090/112] [HUDI-7344] Use Java Stream instead of FSDataStream when possible (#10573) --- .../hudi/cli/commands/CompactionCommand.java | 8 +++--- .../commands/TestUpgradeDowngradeCommand.java | 12 ++++---- .../integ/ITTestHDFSParquetImportCommand.java | 4 +-- .../HoodieTestCommitMetadataGenerator.java | 6 ++-- .../lock/FileSystemBasedLockProvider.java | 6 ++-- .../bucket/ConsistentBucketIndexUtils.java | 8 +++--- .../hudi/HoodieTestCommitGenerator.java | 6 ++-- .../client/TestJavaHoodieBackedMetadata.java | 4 +-- .../functional/TestHoodieBackedMetadata.java | 4 +-- .../org/apache/hudi/table/TestCleaner.java | 4 +-- .../TestTimelineServerBasedWriteMarkers.java | 8 +++--- .../table/upgrade/TestUpgradeDowngrade.java | 12 ++++---- .../common/model/HoodiePartitionMetadata.java | 11 ++++---- .../hudi/common/table/HoodieTableConfig.java | 22 +++++++-------- .../table/timeline/HoodieActiveTimeline.java | 4 +-- .../hudi/common/util/InternalSchemaCache.java | 4 +-- .../apache/hudi/common/util/MarkerUtils.java | 28 +++++++++---------- ...FileBasedInternalSchemaStorageManager.java | 4 +-- .../common/table/TestHoodieTableConfig.java | 10 +++---- .../testutils/HoodieTestDataGenerator.java | 7 +++-- .../table/catalog/TableOptionProperties.java | 8 +++--- .../hudi/util/ViewStorageProperties.java | 8 +++--- .../hadoop/fs/HoodieWrapperFileSystem.java | 17 +++++------ .../apache/hudi/common/util/FileIOUtils.java | 24 ++++++++-------- .../hudi/hive/testutils/HiveTestCluster.java | 3 +- .../hudi/hive/testutils/HiveTestUtil.java | 8 +++--- .../sync/common/util/ManifestFileWriter.java | 4 +-- .../handlers/marker/MarkerDirState.java | 10 +++---- .../utilities/HoodieCompactionAdminTool.java | 8 +++--- .../utilities/perf/TimelineServerPerf.java | 4 +-- .../schema/FilebasedSchemaProvider.java | 4 +-- .../hudi/utilities/sources/JdbcSource.java | 4 +-- .../TestHoodieDeltaStreamer.java | 8 +++--- .../functional/TestHDFSParquetImporter.java | 4 +-- .../helpers/TestSanitizationUtils.java | 4 +-- 35 files changed, 144 insertions(+), 146 deletions(-) diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java index c9cebb1b227f..a32387b4c778 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java @@ -45,7 +45,6 @@ import org.apache.hudi.table.action.compact.OperationResult; import org.apache.hudi.utilities.UtilHelpers; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.launcher.SparkLauncher; @@ -57,6 +56,7 @@ import org.springframework.shell.standard.ShellOption; import java.io.IOException; +import java.io.InputStream; import java.io.ObjectInputStream; import java.util.ArrayList; import java.util.HashMap; @@ -437,15 +437,15 @@ private static String getTmpSerializerFile() { private T deSerializeOperationResult(String inputP, FileSystem fs) throws Exception { Path inputPath = new Path(inputP); - FSDataInputStream fsDataInputStream = fs.open(inputPath); - ObjectInputStream in = new ObjectInputStream(fsDataInputStream); + InputStream inputStream = fs.open(inputPath); + ObjectInputStream in = new ObjectInputStream(inputStream); try { T result = (T) in.readObject(); LOG.info("Result : " + result); return result; } finally { in.close(); - fsDataInputStream.close(); + inputStream.close(); } } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java index 4d1a0ec3fb74..237a9f1985be 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java @@ -33,8 +33,6 @@ import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.testutils.HoodieClientTestUtils; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -45,6 +43,8 @@ import org.junit.jupiter.params.provider.ValueSource; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.Arrays; import java.util.stream.Stream; @@ -115,7 +115,7 @@ private static Stream testArgsForUpgradeDowngradeCommand() { public void testUpgradeDowngradeCommand(HoodieTableVersion fromVersion, HoodieTableVersion toVersion) throws Exception { // Start with hoodie.table.version to 5 metaClient.getTableConfig().setTableVersion(HoodieTableVersion.FIVE); - try (FSDataOutputStream os = metaClient.getFs().create(new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE), true)) { + try (OutputStream os = metaClient.getFs().create(new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE), true)) { metaClient.getTableConfig().getProps().store(os, ""); } metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()); @@ -163,10 +163,10 @@ private void verifyTableVersion(HoodieTableVersion expectedVersion) throws IOExc private void assertTableVersionFromPropertyFile(HoodieTableVersion expectedVersion) throws IOException { Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE); // Load the properties and verify - FSDataInputStream fsDataInputStream = metaClient.getFs().open(propertyFile); + InputStream inputStream = metaClient.getFs().open(propertyFile); HoodieConfig config = new HoodieConfig(); - config.getProps().load(fsDataInputStream); - fsDataInputStream.close(); + config.getProps().load(inputStream); + inputStream.close(); assertEquals(Integer.toString(expectedVersion.versionCode()), config.getString(HoodieTableConfig.VERSION)); } } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java index 5f19bca25792..34becfa0de32 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestHDFSParquetImportCommand.java @@ -33,7 +33,6 @@ import org.apache.hudi.utilities.functional.TestHDFSParquetImporter.HoodieTripModel; import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -45,6 +44,7 @@ import org.springframework.shell.Shell; import java.io.IOException; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.text.ParseException; @@ -83,7 +83,7 @@ public void init() throws IOException, ParseException { schemaFile = new Path(basePath, "file.schema").toString(); // create schema file - try (FSDataOutputStream schemaFileOS = fs.create(new Path(schemaFile))) { + try (OutputStream schemaFileOS = fs.create(new Path(schemaFile))) { schemaFileOS.write(getUTF8Bytes(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)); } diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java index 1ade400414b9..0a11ca3aaaf0 100644 --- a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java +++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java @@ -29,10 +29,10 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import java.io.IOException; +import java.io.OutputStream; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -114,8 +114,8 @@ public static void createCommitFileWithMetadata(String basePath, String commitTi static void createFileWithMetadata(String basePath, Configuration configuration, String name, String content) throws IOException { Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + name); - try (FSDataOutputStream os = HadoopFSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { - os.writeBytes(new String(getUTF8Bytes(content))); + try (OutputStream os = HadoopFSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { + os.write(getUTF8Bytes(content)); } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java index 39c004192456..3cd3cefe750b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java @@ -37,7 +37,6 @@ import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -45,6 +44,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; import java.io.Serializable; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -181,9 +181,9 @@ public void initLockInfo() { } public void reloadCurrentOwnerLockInfo() { - try (FSDataInputStream fis = fs.open(this.lockFile)) { + try (InputStream is = fs.open(this.lockFile)) { if (fs.exists(this.lockFile)) { - this.currentOwnerLockInfo = FileIOUtils.readAsUTFString(fis); + this.currentOwnerLockInfo = FileIOUtils.readAsUTFString(is); } else { this.currentOwnerLockInfo = ""; } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java index 3bf40d1f1388..5b4d5cfba457 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java @@ -32,7 +32,6 @@ import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.table.HoodieTable; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; @@ -40,6 +39,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -184,10 +184,10 @@ public static boolean saveMetadata(HoodieTable table, HoodieConsistentHashingMet HoodieWrapperFileSystem fs = table.getMetaClient().getFs(); Path dir = FSUtils.getPartitionPath(table.getMetaClient().getHashingMetadataPath(), metadata.getPartitionPath()); Path fullPath = new Path(dir, metadata.getFilename()); - try (FSDataOutputStream fsOut = fs.create(fullPath, overwrite)) { + try (OutputStream out = fs.create(fullPath, overwrite)) { byte[] bytes = metadata.toBytes(); - fsOut.write(bytes); - fsOut.close(); + out.write(bytes); + out.close(); return true; } catch (IOException e) { LOG.warn("Failed to update bucket metadata: " + metadata, e); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java index 04f975ebe52d..9c86cdeee811 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/HoodieTestCommitGenerator.java @@ -31,13 +31,13 @@ import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -163,8 +163,8 @@ public static void createCommitFileWithMetadata( String basePath, Configuration configuration, String filename, String content) throws IOException { Path commitFilePath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + filename); - try (FSDataOutputStream os = HadoopFSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { - os.writeBytes(new String(getUTF8Bytes(content))); + try (OutputStream os = HadoopFSUtils.getFs(basePath, configuration).create(commitFilePath, true)) { + os.write(getUTF8Bytes(content)); } } diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java index 9e4afc55c55f..c484db90547f 100644 --- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/client/TestJavaHoodieBackedMetadata.java @@ -108,7 +108,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Time; @@ -126,6 +125,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -2853,7 +2853,7 @@ private void changeTableVersion(HoodieTableVersion version) throws IOException { metaClient = HoodieTableMetaClient.reload(metaClient); metaClient.getTableConfig().setTableVersion(version); Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE); - try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) { + try (OutputStream os = metaClient.getFs().create(propertyFile)) { metaClient.getTableConfig().getProps().store(os, ""); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java index 872f7ac2bc38..dc563ec00630 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java @@ -114,7 +114,6 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -136,6 +135,7 @@ import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -3581,7 +3581,7 @@ private void changeTableVersion(HoodieTableVersion version) throws IOException { metaClient = HoodieTableMetaClient.reload(metaClient); metaClient.getTableConfig().setTableVersion(version); Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE); - try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) { + try (OutputStream os = metaClient.getFs().create(propertyFile)) { metaClient.getTableConfig().getProps().store(os, ""); } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java index 8003c28c2ff0..4e6916188914 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java @@ -78,7 +78,6 @@ import org.apache.hudi.table.action.clean.CleanPlanner; import org.apache.hudi.testutils.HoodieCleanerTestBase; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaRDD; import org.junit.jupiter.api.Test; @@ -86,6 +85,7 @@ import org.junit.jupiter.params.provider.ValueSource; import java.io.IOException; +import java.io.OutputStream; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; @@ -1019,7 +1019,7 @@ public void testCleanPreviousCorruptedCleanFiles() throws IOException { for (String f : cleanerFileNames) { Path commitFile = new Path(Paths .get(metaClient.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME, f).toString()); - try (FSDataOutputStream os = metaClient.getFs().create(commitFile, true)) { + try (OutputStream os = metaClient.getFs().create(commitFile, true)) { // Write empty clean metadata os.write(new byte[0]); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java index 17bc372a14f9..b27f40e2addd 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestTimelineServerBasedWriteMarkers.java @@ -34,7 +34,6 @@ import org.apache.hudi.timeline.service.TimelineService; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; @@ -43,6 +42,7 @@ import java.io.Closeable; import java.io.IOException; +import java.io.InputStream; import java.util.Collection; import java.util.List; import java.util.stream.Collectors; @@ -110,10 +110,10 @@ void verifyMarkersInFileSystem(boolean isTablePartitioned) throws IOException { // Verifies the marker type file Path markerTypeFilePath = new Path(markerFolderPath, MarkerUtils.MARKER_TYPE_FILENAME); assertTrue(MarkerUtils.doesMarkerTypeFileExist(fs, markerFolderPath.toString())); - FSDataInputStream fsDataInputStream = fs.open(markerTypeFilePath); + InputStream inputStream = fs.open(markerTypeFilePath); assertEquals(MarkerType.TIMELINE_SERVER_BASED.toString(), - FileIOUtils.readAsUTFString(fsDataInputStream)); - closeQuietly(fsDataInputStream); + FileIOUtils.readAsUTFString(inputStream)); + closeQuietly(inputStream); } /** diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java index 10bd153c90f3..111b2141e285 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java @@ -53,8 +53,6 @@ import org.apache.hudi.testutils.HoodieClientTestBase; import org.apache.hudi.testutils.HoodieClientTestUtils; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -71,6 +69,8 @@ import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; @@ -847,7 +847,7 @@ private Pair, List> twoUpsertCommitDataWithTwoP private void prepForDowngradeFromVersion(HoodieTableVersion fromVersion) throws IOException { metaClient.getTableConfig().setTableVersion(fromVersion); Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE); - try (FSDataOutputStream os = metaClient.getFs().create(propertyFile)) { + try (OutputStream os = metaClient.getFs().create(propertyFile)) { metaClient.getTableConfig().getProps().store(os, ""); } } @@ -880,10 +880,10 @@ private void assertTableVersion( assertEquals(expectedVersion.versionCode(), metaClient.getTableConfig().getTableVersion().versionCode()); Path propertyFile = new Path(metaClient.getMetaPath() + "/" + HoodieTableConfig.HOODIE_PROPERTIES_FILE); // Load the properties and verify - FSDataInputStream fsDataInputStream = metaClient.getFs().open(propertyFile); + InputStream inputStream = metaClient.getFs().open(propertyFile); HoodieConfig config = new HoodieConfig(); - config.getProps().load(fsDataInputStream); - fsDataInputStream.close(); + config.getProps().load(inputStream); + inputStream.close(); assertEquals(Integer.toString(expectedVersion.versionCode()), config.getString(HoodieTableConfig.VERSION)); } } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java index 2b63433bef46..bbf505c8670f 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodiePartitionMetadata.java @@ -24,14 +24,14 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.List; import java.util.Map; import java.util.Properties; @@ -141,10 +141,9 @@ private void writeMetafile(Path filePath) throws IOException { BaseFileUtils.getInstance(format.get()).writeMetaFile(fs, filePath, props); } else { // Backwards compatible properties file format - FSDataOutputStream os = fs.create(filePath, true); + OutputStream os = fs.create(filePath, true); props.store(os, "partition metadata"); - os.hsync(); - os.hflush(); + os.flush(); os.close(); } } @@ -169,7 +168,7 @@ public void readFromFS() throws IOException { private boolean readTextFormatMetaFile() { // Properties file format Path metafilePath = textFormatMetaFilePath(partitionPath); - try (FSDataInputStream is = fs.open(metafilePath)) { + try (InputStream is = fs.open(metafilePath)) { props.load(is); format = Option.empty(); return true; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java index d94206d4c5cf..dc40f7d65d81 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java @@ -43,14 +43,14 @@ import org.apache.hudi.metadata.MetadataPartitionType; import org.apache.avro.Schema; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.time.Instant; import java.util.Arrays; import java.util.HashSet; @@ -289,7 +289,7 @@ public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName } if (needStore) { // FIXME(vc): wonder if this can be removed. Need to look into history. - try (FSDataOutputStream outputStream = fs.create(propertyPath)) { + try (OutputStream outputStream = fs.create(propertyPath)) { storeProperties(props, outputStream); } } @@ -312,7 +312,7 @@ private static Properties getOrderedPropertiesWithTableChecksum(Properties props * @return return the table checksum * @throws IOException */ - private static String storeProperties(Properties props, FSDataOutputStream outputStream) throws IOException { + private static String storeProperties(Properties props, OutputStream outputStream) throws IOException { final String checksum; if (isValidChecksum(props)) { checksum = props.getProperty(TABLE_CHECKSUM.key()); @@ -347,7 +347,7 @@ private static TypedProperties fetchConfigs(FileSystem fs, String metaPath) thro while (readRetryCount++ < MAX_READ_RETRIES) { for (Path path : Arrays.asList(cfgPath, backupCfgPath)) { // Read the properties and validate that it is a valid file - try (FSDataInputStream is = fs.open(path)) { + try (InputStream is = fs.open(path)) { props.clear(); props.load(is); found = true; @@ -385,8 +385,8 @@ public static void recover(FileSystem fs, Path metadataFolder) throws IOExceptio static void recoverIfNeeded(FileSystem fs, Path cfgPath, Path backupCfgPath) throws IOException { if (!fs.exists(cfgPath)) { // copy over from backup - try (FSDataInputStream in = fs.open(backupCfgPath); - FSDataOutputStream out = fs.create(cfgPath, false)) { + try (InputStream in = fs.open(backupCfgPath); + OutputStream out = fs.create(cfgPath, false)) { FileIOUtils.copy(in, out); } } @@ -413,7 +413,7 @@ private static void modify(FileSystem fs, Path metadataFolder, Properties modify TypedProperties props = fetchConfigs(fs, metadataFolder.toString()); // 2. backup the existing properties. - try (FSDataOutputStream out = fs.create(backupCfgPath, false)) { + try (OutputStream out = fs.create(backupCfgPath, false)) { storeProperties(props, out); } @@ -422,13 +422,13 @@ private static void modify(FileSystem fs, Path metadataFolder, Properties modify // 4. Upsert and save back. String checksum; - try (FSDataOutputStream out = fs.create(cfgPath, true)) { + try (OutputStream out = fs.create(cfgPath, true)) { modifyFn.accept(props, modifyProps); checksum = storeProperties(props, out); } // 4. verify and remove backup. - try (FSDataInputStream in = fs.open(cfgPath)) { + try (InputStream in = fs.open(cfgPath)) { props.clear(); props.load(in); if (!props.containsKey(TABLE_CHECKSUM.key()) || !props.getProperty(TABLE_CHECKSUM.key()).equals(checksum)) { @@ -470,7 +470,7 @@ public static void create(FileSystem fs, Path metadataFolder, Properties propert } HoodieConfig hoodieConfig = new HoodieConfig(properties); Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); - try (FSDataOutputStream outputStream = fs.create(propertyPath)) { + try (OutputStream outputStream = fs.create(propertyPath)) { if (!hoodieConfig.contains(NAME)) { throw new IllegalArgumentException(NAME.key() + " property needs to be specified"); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java index 7ba5205c5fc2..90fabdc94f89 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java @@ -30,7 +30,6 @@ import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; @@ -38,6 +37,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; import java.io.Serializable; import java.text.ParseException; import java.util.Arrays; @@ -799,7 +799,7 @@ protected void createFileInMetaPath(String filename, Option content, boo } protected Option readDataFromPath(Path detailPath) { - try (FSDataInputStream is = metaClient.getFs().open(detailPath)) { + try (InputStream is = metaClient.getFs().open(detailPath)) { return Option.of(FileIOUtils.readAsByteArray(is)); } catch (IOException e) { throw new HoodieIOException("Could not read commit details from " + detailPath, e); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java b/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java index c11a2cfd4bb8..7864d0d26155 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/InternalSchemaCache.java @@ -36,13 +36,13 @@ import com.github.benmanes.caffeine.cache.Caffeine; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; import java.util.List; import java.util.Set; @@ -193,7 +193,7 @@ public static InternalSchema getInternalSchemaByVersionId(long versionId, String if (candidateCommitFile != null) { try { byte[] data; - try (FSDataInputStream is = fs.open(candidateCommitFile)) { + try (InputStream is = fs.open(candidateCommitFile)) { data = FileIOUtils.readAsByteArray(is); } catch (IOException e) { throw e; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/MarkerUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/MarkerUtils.java index 73ad7e7dfc78..4ad6b874bc62 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/MarkerUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/MarkerUtils.java @@ -30,8 +30,6 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -40,6 +38,8 @@ import java.io.BufferedWriter; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -111,14 +111,14 @@ public static boolean doesMarkerTypeFileExist(FileSystem fileSystem, String mark */ public static Option readMarkerType(FileSystem fileSystem, String markerDir) { Path markerTypeFilePath = new Path(markerDir, MARKER_TYPE_FILENAME); - FSDataInputStream fsDataInputStream = null; + InputStream inputStream = null; Option content = Option.empty(); try { if (!doesMarkerTypeFileExist(fileSystem, markerDir)) { return Option.empty(); } - fsDataInputStream = fileSystem.open(markerTypeFilePath); - String markerType = FileIOUtils.readAsUTFString(fsDataInputStream); + inputStream = fileSystem.open(markerTypeFilePath); + String markerType = FileIOUtils.readAsUTFString(inputStream); if (StringUtils.isNullOrEmpty(markerType)) { return Option.empty(); } @@ -127,7 +127,7 @@ public static Option readMarkerType(FileSystem fileSystem, String ma throw new HoodieIOException("Cannot read marker type file " + markerTypeFilePath.toString() + "; " + e.getMessage(), e); } finally { - closeQuietly(fsDataInputStream); + closeQuietly(inputStream); } return content; } @@ -141,18 +141,18 @@ public static Option readMarkerType(FileSystem fileSystem, String ma */ public static void writeMarkerTypeToFile(MarkerType markerType, FileSystem fileSystem, String markerDir) { Path markerTypeFilePath = new Path(markerDir, MARKER_TYPE_FILENAME); - FSDataOutputStream fsDataOutputStream = null; + OutputStream outputStream = null; BufferedWriter bufferedWriter = null; try { - fsDataOutputStream = fileSystem.create(markerTypeFilePath, false); - bufferedWriter = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + outputStream = fileSystem.create(markerTypeFilePath, false); + bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)); bufferedWriter.write(markerType.toString()); } catch (IOException e) { throw new HoodieException("Failed to create marker type file " + markerTypeFilePath.toString() + "; " + e.getMessage(), e); } finally { closeQuietly(bufferedWriter); - closeQuietly(fsDataOutputStream); + closeQuietly(outputStream); } } @@ -224,13 +224,13 @@ public static Set readMarkersFromFile(Path markersFilePath, Serializable * @return Markers in a {@code Set} of String. */ public static Set readMarkersFromFile(Path markersFilePath, SerializableConfiguration conf, boolean ignoreException) { - FSDataInputStream fsDataInputStream = null; + InputStream inputStream = null; Set markers = new HashSet<>(); try { LOG.debug("Read marker file: " + markersFilePath); FileSystem fs = markersFilePath.getFileSystem(conf.get()); - fsDataInputStream = fs.open(markersFilePath); - markers = new HashSet<>(FileIOUtils.readAsUTFStringLines(fsDataInputStream)); + inputStream = fs.open(markersFilePath); + markers = new HashSet<>(FileIOUtils.readAsUTFStringLines(inputStream)); } catch (IOException e) { String errorMessage = "Failed to read MARKERS file " + markersFilePath; if (ignoreException) { @@ -239,7 +239,7 @@ public static Set readMarkersFromFile(Path markersFilePath, Serializable throw new HoodieIOException(errorMessage, e); } } finally { - closeQuietly(fsDataInputStream); + closeQuietly(inputStream); } return markers; } diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java index c5fb1f716542..f67c0b3f943e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/io/FileBasedInternalSchemaStorageManager.java @@ -31,13 +31,13 @@ import org.apache.hudi.internal.schema.utils.SerDeHelper; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -152,7 +152,7 @@ public String getHistorySchemaStrByGivenValidCommits(List validCommits) if (!validaSchemaFiles.isEmpty()) { Path latestFilePath = new Path(baseSchemaPath, validaSchemaFiles.get(validaSchemaFiles.size() - 1)); byte[] content; - try (FSDataInputStream is = fs.open(latestFilePath)) { + try (InputStream is = fs.open(latestFilePath)) { content = FileIOUtils.readAsByteArray(is); LOG.info(String.format("read history schema success from file : %s", latestFilePath)); return fromUTF8Bytes(content); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java index 81928457b2f1..fc9ca493e777 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java @@ -23,7 +23,6 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.AfterEach; @@ -33,6 +32,7 @@ import org.junit.jupiter.params.provider.ValueSource; import java.io.IOException; +import java.io.OutputStream; import java.util.Properties; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -120,7 +120,7 @@ public void testReadsWhenPropsFileDoesNotExist() throws IOException { public void testReadsWithUpdateFailures() throws IOException { HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null, null); fs.delete(cfgPath, false); - try (FSDataOutputStream out = fs.create(backupCfgPath)) { + try (OutputStream out = fs.create(backupCfgPath)) { config.getProps().store(out, ""); } @@ -137,7 +137,7 @@ public void testUpdateRecovery(boolean shouldPropsFileExist) throws IOException if (!shouldPropsFileExist) { fs.delete(cfgPath, false); } - try (FSDataOutputStream out = fs.create(backupCfgPath)) { + try (OutputStream out = fs.create(backupCfgPath)) { config.getProps().store(out, ""); } @@ -160,13 +160,13 @@ public void testReadRetry() throws IOException { // Should return backup config if hoodie.properties is corrupted Properties props = new Properties(); - try (FSDataOutputStream out = fs.create(cfgPath)) { + try (OutputStream out = fs.create(cfgPath)) { props.store(out, "No checksum in file so is invalid"); } new HoodieTableConfig(fs, metaPath.toString(), null, null); // Should throw exception if both hoodie.properties and backup are corrupted - try (FSDataOutputStream out = fs.create(backupCfgPath)) { + try (OutputStream out = fs.create(backupCfgPath)) { props.store(out, "No checksum in file so is invalid"); } assertThrows(IllegalArgumentException.class, () -> new HoodieTableConfig(fs, metaPath.toString(), null, null)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java index 3434680117a9..5e467e84bfb0 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java @@ -55,6 +55,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.OutputStream; import java.io.Serializable; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; @@ -534,7 +535,7 @@ private static void createMetadataFile(String f, String basePath, Configuration private static void createMetadataFile(String f, String basePath, Configuration configuration, byte[] content) { Path commitFile = new Path( basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + f); - FSDataOutputStream os = null; + OutputStream os = null; try { FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); os = fs.create(commitFile, true); @@ -587,7 +588,7 @@ public static void createEmptyCleanRequestedFile(String basePath, String instant private static void createEmptyFile(String basePath, Path filePath, Configuration configuration) throws IOException { FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); - FSDataOutputStream os = fs.create(filePath, true); + OutputStream os = fs.create(filePath, true); os.close(); } @@ -603,7 +604,7 @@ public static void createCompactionAuxiliaryMetadata(String basePath, HoodieInst Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instant.getFileName()); FileSystem fs = HadoopFSUtils.getFs(basePath, configuration); - try (FSDataOutputStream os = fs.create(commitFile, true)) { + try (OutputStream os = fs.create(commitFile, true)) { HoodieCompactionPlan workload = HoodieCompactionPlan.newBuilder().setVersion(1).build(); // Write empty commit metadata os.write(TimelineMetadataUtils.serializeCompactionPlan(workload).get()); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java index 2dc8f618b1f7..12eb251f6536 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/catalog/TableOptionProperties.java @@ -34,8 +34,6 @@ import org.apache.flink.table.types.logical.RowType; import org.apache.flink.table.types.logical.VarCharType; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.api.Table; @@ -44,6 +42,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -107,7 +107,7 @@ public static void createProperties(String basePath, Map options) throws IOException { Path propertiesFilePath = getPropertiesFilePath(basePath); FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); - try (FSDataOutputStream outputStream = fs.create(propertiesFilePath)) { + try (OutputStream outputStream = fs.create(propertiesFilePath)) { Properties properties = new Properties(); properties.putAll(options); properties.store(outputStream, @@ -125,7 +125,7 @@ public static Map loadFromProperties(String basePath, Configurat Properties props = new Properties(); FileSystem fs = HadoopFSUtils.getFs(basePath, hadoopConf); - try (FSDataInputStream inputStream = fs.open(propertiesFilePath)) { + try (InputStream inputStream = fs.open(propertiesFilePath)) { props.load(inputStream); for (final String name : props.stringPropertyNames()) { options.put(name, props.getProperty(name)); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java index 8e328aee4d29..1c13e2024151 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/util/ViewStorageProperties.java @@ -27,14 +27,14 @@ import org.apache.hudi.storage.HoodieLocation; import org.apache.flink.configuration.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.Date; import java.util.Properties; @@ -58,7 +58,7 @@ public static void createProperties( Path propertyPath = getPropertiesFilePath(basePath, flinkConf.getString(FlinkOptions.WRITE_CLIENT_ID)); FileSystem fs = HadoopFSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(flinkConf)); fs.delete(propertyPath, false); - try (FSDataOutputStream outputStream = fs.create(propertyPath)) { + try (OutputStream outputStream = fs.create(propertyPath)) { config.getProps().store(outputStream, "Filesystem view storage properties saved on " + new Date(System.currentTimeMillis())); } @@ -73,7 +73,7 @@ public static FileSystemViewStorageConfig loadFromProperties(String basePath, Co FileSystem fs = HadoopFSUtils.getFs(basePath, HadoopConfigurations.getHadoopConf(conf)); Properties props = new Properties(); try { - try (FSDataInputStream inputStream = fs.open(propertyPath)) { + try (InputStream inputStream = fs.open(propertyPath)) { props.load(inputStream); } return FileSystemViewStorageConfig.newBuilder().fromProperties(props).build(); diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java index 326b24353cff..cdb11572fcd6 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HoodieWrapperFileSystem.java @@ -52,6 +52,7 @@ import org.apache.hadoop.util.Progressable; import java.io.IOException; +import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.EnumSet; @@ -1019,34 +1020,34 @@ protected boolean needCreateTempFile() { */ public void createImmutableFileInPath(Path fullPath, Option content) throws HoodieIOException { - FSDataOutputStream fsout = null; + OutputStream out = null; Path tmpPath = null; boolean needTempFile = needCreateTempFile(); try { if (!content.isPresent()) { - fsout = fileSystem.create(fullPath, false); + out = fileSystem.create(fullPath, false); } if (content.isPresent() && needTempFile) { Path parent = fullPath.getParent(); tmpPath = new Path(parent, fullPath.getName() + TMP_PATH_POSTFIX); - fsout = fileSystem.create(tmpPath, false); - fsout.write(content.get()); + out = fileSystem.create(tmpPath, false); + out.write(content.get()); } if (content.isPresent() && !needTempFile) { - fsout = fileSystem.create(fullPath, false); - fsout.write(content.get()); + out = fileSystem.create(fullPath, false); + out.write(content.get()); } } catch (IOException e) { String errorMsg = "Failed to create file " + (tmpPath != null ? tmpPath : fullPath); throw new HoodieIOException(errorMsg, e); } finally { try { - if (null != fsout) { - fsout.close(); + if (null != out) { + out.close(); } } catch (IOException e) { String errorMsg = "Failed to close file " + (needTempFile ? tmpPath : fullPath); diff --git a/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java b/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java index 25470d47d43e..37c573a173c9 100644 --- a/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java +++ b/hudi-io/src/main/java/org/apache/hudi/common/util/FileIOUtils.java @@ -21,8 +21,6 @@ import org.apache.hudi.exception.HoodieIOException; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -116,18 +114,18 @@ public static void copy(InputStream inputStream, OutputStream outputStream) thro public static void copy( FileSystem fileSystem, org.apache.hadoop.fs.Path sourceFilePath, org.apache.hadoop.fs.Path destFilePath) { - FSDataInputStream fsDataInputStream = null; - FSDataOutputStream fsDataOutputStream = null; + InputStream inputStream = null; + OutputStream outputStream = null; try { - fsDataInputStream = fileSystem.open(sourceFilePath); - fsDataOutputStream = fileSystem.create(destFilePath, false); - copy(fsDataInputStream, fsDataOutputStream); + inputStream = fileSystem.open(sourceFilePath); + outputStream = fileSystem.create(destFilePath, false); + copy(inputStream, outputStream); } catch (IOException e) { throw new HoodieIOException(String.format("Cannot copy from %s to %s", sourceFilePath.toString(), destFilePath.toString()), e); } finally { - closeQuietly(fsDataInputStream); - closeQuietly(fsDataOutputStream); + closeQuietly(inputStream); + closeQuietly(outputStream); } } @@ -176,9 +174,9 @@ public static void createFileInPath(FileSystem fileSystem, org.apache.hadoop.fs. } if (content.isPresent()) { - FSDataOutputStream fsout = fileSystem.create(fullPath, true); - fsout.write(content.get()); - fsout.close(); + OutputStream out = fileSystem.create(fullPath, true); + out.write(content.get()); + out.close(); } } catch (IOException e) { LOG.warn("Failed to create file " + fullPath, e); @@ -193,7 +191,7 @@ public static void createFileInPath(FileSystem fileSystem, org.apache.hadoop.fs. } public static Option readDataFromPath(FileSystem fileSystem, org.apache.hadoop.fs.Path detailPath, boolean ignoreIOE) { - try (FSDataInputStream is = fileSystem.open(detailPath)) { + try (InputStream is = fileSystem.open(detailPath)) { return Option.of(FileIOUtils.readAsByteArray(is)); } catch (IOException e) { LOG.warn("Could not read commit details from " + detailPath, e); diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java index 239816c3179e..3d2b0c32f60f 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestCluster.java @@ -38,7 +38,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -174,7 +173,7 @@ private void createCommitFile(HoodieCommitMetadata commitMetadata, String commit byte[] bytes = getUTF8Bytes(commitMetadata.toJsonString()); Path fullPath = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime)); - FSDataOutputStream fsout = dfsCluster.getFileSystem().create(fullPath, true); + OutputStream fsout = dfsCluster.getFileSystem().create(fullPath, true); fsout.write(bytes); fsout.close(); } diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java index 78d3185e6ae8..321ab130e85a 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java @@ -57,7 +57,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -74,6 +73,7 @@ import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.net.URISyntaxException; import java.nio.file.Files; import java.time.Instant; @@ -587,9 +587,9 @@ private static void createDeltaCommitFile(HoodieCommitMetadata deltaCommitMetada private static void createMetaFile(String basePath, String fileName, byte[] bytes) throws IOException { Path fullPath = new Path(basePath + "/" + METAFOLDER_NAME + "/" + fileName); - FSDataOutputStream fsout = fileSystem.create(fullPath, true); - fsout.write(bytes); - fsout.close(); + OutputStream out = fileSystem.create(fullPath, true); + out.write(bytes); + out.close(); } public static Set getCreatedTablesSet() { diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java index a5181972fb84..dd210537d4a7 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/util/ManifestFileWriter.java @@ -28,12 +28,12 @@ import org.apache.hudi.metadata.HoodieMetadataFileSystemView; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedWriter; +import java.io.OutputStream; import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.util.List; @@ -71,7 +71,7 @@ public synchronized void writeManifestFile(boolean useAbsolutePath) { LOG.info("Writing base file names to manifest file: " + baseFiles.size()); } final Path manifestFilePath = getManifestFilePath(useAbsolutePath); - try (FSDataOutputStream outputStream = metaClient.getFs().create(manifestFilePath, true); + try (OutputStream outputStream = metaClient.getFs().create(manifestFilePath, true); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8))) { for (String f : baseFiles) { writer.write(f); diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerDirState.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerDirState.java index 05551dc42dde..5202ef2d05ed 100644 --- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerDirState.java +++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerDirState.java @@ -33,7 +33,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.module.afterburner.AfterburnerModule; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.StringUtils; @@ -42,6 +41,7 @@ import java.io.BufferedWriter; import java.io.IOException; +import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Serializable; import java.nio.charset.StandardCharsets; @@ -365,17 +365,17 @@ private void flushMarkersToFile(int markerFileIndex) { LOG.debug("Write to " + markerDirPath + "/" + MARKERS_FILENAME_PREFIX + markerFileIndex); HoodieTimer timer = HoodieTimer.start(); Path markersFilePath = new Path(markerDirPath, MARKERS_FILENAME_PREFIX + markerFileIndex); - FSDataOutputStream fsDataOutputStream = null; + OutputStream outputStream = null; BufferedWriter bufferedWriter = null; try { - fsDataOutputStream = fileSystem.create(markersFilePath); - bufferedWriter = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); + outputStream = fileSystem.create(markersFilePath); + bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)); bufferedWriter.write(fileMarkersMap.get(markerFileIndex).toString()); } catch (IOException e) { throw new HoodieIOException("Failed to overwrite marker file " + markersFilePath, e); } finally { closeQuietly(bufferedWriter); - closeQuietly(fsDataOutputStream); + closeQuietly(outputStream); } LOG.debug(markersFilePath.toString() + " written in " + timer.endTimer() + " ms"); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java index d296a65ceb4f..8806ce46ea35 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactionAdminTool.java @@ -28,12 +28,12 @@ import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; import java.io.ObjectOutputStream; +import java.io.OutputStream; import java.io.Serializable; import java.util.List; @@ -107,11 +107,11 @@ public void run(JavaSparkContext jsc) throws Exception { private void serializeOperationResult(FileSystem fs, T result) throws Exception { if ((cfg.outputPath != null) && (result != null)) { Path outputPath = new Path(cfg.outputPath); - FSDataOutputStream fsout = fs.create(outputPath, true); - ObjectOutputStream out = new ObjectOutputStream(fsout); + OutputStream stream = fs.create(outputPath, true); + ObjectOutputStream out = new ObjectOutputStream(stream); out.writeObject(result); out.close(); - fsout.close(); + stream.close(); } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java index d17fe76668ca..c3e3b4b99fd8 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java @@ -41,7 +41,6 @@ import com.codahale.metrics.Snapshot; import com.codahale.metrics.UniformReservoir; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.SparkConf; @@ -50,6 +49,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.OutputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; @@ -189,7 +189,7 @@ private static class Dumper implements Serializable { private final Path dumpPath; private final FileSystem fileSystem; - private FSDataOutputStream outputStream; + private OutputStream outputStream; public Dumper(FileSystem fs, Path dumpPath) { this.dumpPath = dumpPath; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java index 2b2e0dab7369..e4d2bf58e43e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/FilebasedSchemaProvider.java @@ -26,12 +26,12 @@ import org.apache.hudi.utilities.sources.helpers.SanitizationUtils; import org.apache.avro.Schema; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; import java.io.IOException; +import java.io.InputStream; import java.util.Collections; import static org.apache.hudi.common.util.ConfigUtils.checkRequiredConfigProperties; @@ -88,7 +88,7 @@ public Schema getTargetSchema() { private static Schema readAvroSchemaFromFile(String schemaPath, FileSystem fs, boolean sanitizeSchema, String invalidCharMask) { String schemaStr; - try (FSDataInputStream in = fs.open(new Path(schemaPath))) { + try (InputStream in = fs.open(new Path(schemaPath))) { schemaStr = FileIOUtils.readAsUTFString(in); } catch (IOException ioe) { throw new HoodieSchemaProviderException(String.format("Error reading schema from file %s", schemaPath), ioe); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JdbcSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JdbcSource.java index 0efc737623a1..f1845dac34aa 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JdbcSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JdbcSource.java @@ -29,7 +29,6 @@ import org.apache.hudi.utilities.exception.HoodieReadFromSourceException; import org.apache.hudi.utilities.schema.SchemaProvider; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; @@ -45,6 +44,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.InputStream; import java.net.URI; import java.util.Arrays; import java.util.Collections; @@ -84,7 +84,7 @@ private static DataFrameReader validatePropsAndGetDataFrameReader(final SparkSes final TypedProperties properties) throws HoodieException { DataFrameReader dataFrameReader; - FSDataInputStream passwordFileStream = null; + InputStream passwordFileStream = null; try { dataFrameReader = session.read().format("jdbc"); dataFrameReader = dataFrameReader.option( diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java index e05a0c0d05e4..16a523d5ac1f 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestHoodieDeltaStreamer.java @@ -99,7 +99,6 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; @@ -131,6 +130,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InputStream; import java.sql.Connection; import java.sql.DriverManager; import java.time.Instant; @@ -1594,7 +1594,7 @@ public void testPayloadClassUpdate() throws Exception { Properties props = new Properties(); String metaPath = dataSetBasePath + "/.hoodie/hoodie.properties"; FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); - try (FSDataInputStream inputStream = fs.open(new Path(metaPath))) { + try (InputStream inputStream = fs.open(new Path(metaPath))) { props.load(inputStream); } @@ -1614,7 +1614,7 @@ public void testPartialPayloadClass() throws Exception { Properties props = new Properties(); String metaPath = dataSetBasePath + "/.hoodie/hoodie.properties"; FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); - try (FSDataInputStream inputStream = fs.open(new Path(metaPath))) { + try (InputStream inputStream = fs.open(new Path(metaPath))) { props.load(inputStream); } assertEquals(new HoodieConfig(props).getString(HoodieTableConfig.PAYLOAD_CLASS_NAME), PartialUpdateAvroPayload.class.getName()); @@ -1639,7 +1639,7 @@ public void testPayloadClassUpdateWithCOWTable() throws Exception { Properties props = new Properties(); String metaPath = dataSetBasePath + "/.hoodie/hoodie.properties"; FileSystem fs = HadoopFSUtils.getFs(cfg.targetBasePath, jsc.hadoopConfiguration()); - try (FSDataInputStream inputStream = fs.open(new Path(metaPath))) { + try (InputStream inputStream = fs.open(new Path(metaPath))) { props.load(inputStream); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java index dca7d8a7ce13..bd67ec267c9b 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java @@ -27,7 +27,6 @@ import org.apache.hudi.utilities.HDFSParquetImporter; import org.apache.avro.generic.GenericRecord; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -43,6 +42,7 @@ import org.junit.jupiter.api.Test; import java.io.IOException; +import java.io.OutputStream; import java.io.Serializable; import java.text.ParseException; import java.util.ArrayList; @@ -272,7 +272,7 @@ public List createUpsertRecords(Path srcFolder) throws ParseExcep } private void createSchemaFile(String schemaFile) throws IOException { - FSDataOutputStream schemaFileOS = dfs().create(new Path(schemaFile)); + OutputStream schemaFileOS = dfs().create(new Path(schemaFile)); schemaFileOS.write(getUTF8Bytes(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)); schemaFileOS.close(); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java index 0919a8c31eda..1a660ac71353 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.java @@ -27,7 +27,6 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaParseException; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.spark.api.java.JavaSparkContext; @@ -43,6 +42,7 @@ import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; +import java.io.InputStream; import java.util.stream.Stream; import static org.apache.hudi.utilities.testutils.SanitizationTestUtils.generateProperFormattedSchema; @@ -126,7 +126,7 @@ public void testBadAvroSchemaDisabledTest() { private String getJson(String path) { FileSystem fs = HadoopFSUtils.getFs(path, jsc.hadoopConfiguration(), true); String schemaStr; - try (FSDataInputStream in = fs.open(new Path(path))) { + try (InputStream in = fs.open(new Path(path))) { schemaStr = FileIOUtils.readAsUTFString(in); } catch (IOException e) { throw new HoodieIOException("can't read schema file", e); From e38c731f247916bb21ca41ff9d89bfdab149139b Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 26 Feb 2024 20:44:08 -0800 Subject: [PATCH 091/112] [HUDI-7347] Introduce SeekableDataInputStream for random access (#10575) --- .../common/table/log/HoodieLogFileReader.java | 36 +++++++++---- .../table/log/block/HoodieAvroDataBlock.java | 4 +- .../table/log/block/HoodieCDCDataBlock.java | 4 +- .../table/log/block/HoodieCommandBlock.java | 5 +- .../table/log/block/HoodieCorruptBlock.java | 5 +- .../table/log/block/HoodieDataBlock.java | 4 +- .../table/log/block/HoodieDeleteBlock.java | 11 +++- .../table/log/block/HoodieHFileDataBlock.java | 4 +- .../table/log/block/HoodieLogBlock.java | 16 +++--- .../log/block/HoodieParquetDataBlock.java | 4 +- .../fs/HadoopSeekableDataInputStream.java | 48 +++++++++++++++++ .../hudi/io/SeekableDataInputStream.java | 53 +++++++++++++++++++ 12 files changed, 156 insertions(+), 38 deletions(-) create mode 100644 hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopSeekableDataInputStream.java create mode 100644 hudi-io/src/main/java/org/apache/hudi/io/SeekableDataInputStream.java diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java index 2df30e7e8fce..c7289106f482 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java @@ -37,9 +37,11 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.hadoop.fs.BoundedFsDataInputStream; +import org.apache.hudi.hadoop.fs.HadoopSeekableDataInputStream; import org.apache.hudi.hadoop.fs.SchemeAwareFSDataInputStream; import org.apache.hudi.hadoop.fs.TimedFSDataInputStream; import org.apache.hudi.internal.schema.InternalSchema; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.util.IOUtils; import org.apache.hudi.storage.StorageSchemes; @@ -90,7 +92,7 @@ public class HoodieLogFileReader implements HoodieLogFormat.Reader { private final boolean reverseReader; private final boolean enableRecordLookups; private boolean closed = false; - private FSDataInputStream inputStream; + private SeekableDataInputStream inputStream; public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize, boolean readBlockLazily) throws IOException { @@ -120,7 +122,7 @@ public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSc Path updatedPath = FSUtils.makeQualified(fs, logFile.getPath()); this.logFile = updatedPath.equals(logFile.getPath()) ? logFile : new HoodieLogFile(updatedPath, logFile.getFileSize()); this.bufferSize = bufferSize; - this.inputStream = getFSDataInputStream(fs, this.logFile, bufferSize); + this.inputStream = getDataInputStream(fs, this.logFile, bufferSize); this.readerSchema = readerSchema; this.readBlockLazily = readBlockLazily; this.reverseReader = reverseReader; @@ -202,7 +204,7 @@ private HoodieLogBlock readBlock() throws IOException { if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) { return HoodieAvroDataBlock.getBlock(content.get(), readerSchema, internalSchema); } else { - return new HoodieAvroDataBlock(() -> getFSDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, + return new HoodieAvroDataBlock(() -> getDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, getTargetReaderSchemaForBlock(), header, footer, keyField); } @@ -210,7 +212,7 @@ private HoodieLogBlock readBlock() throws IOException { checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION)); return new HoodieHFileDataBlock( - () -> getFSDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, + () -> getDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, enableRecordLookups, logFile.getPath(), ConfigUtils.getBooleanWithAltKeys(fs.getConf(), USE_NATIVE_HFILE_READER)); @@ -218,17 +220,17 @@ private HoodieLogBlock readBlock() throws IOException { checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("Parquet block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION)); - return new HoodieParquetDataBlock(() -> getFSDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, + return new HoodieParquetDataBlock(() -> getDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, getTargetReaderSchemaForBlock(), header, footer, keyField); case DELETE_BLOCK: - return new HoodieDeleteBlock(content, () -> getFSDataInputStream(fs, this.logFile, bufferSize), readBlockLazily, Option.of(logBlockContentLoc), header, footer); + return new HoodieDeleteBlock(content, () -> getDataInputStream(fs, this.logFile, bufferSize), readBlockLazily, Option.of(logBlockContentLoc), header, footer); case COMMAND_BLOCK: - return new HoodieCommandBlock(content, () -> getFSDataInputStream(fs, this.logFile, bufferSize), readBlockLazily, Option.of(logBlockContentLoc), header, footer); + return new HoodieCommandBlock(content, () -> getDataInputStream(fs, this.logFile, bufferSize), readBlockLazily, Option.of(logBlockContentLoc), header, footer); case CDC_DATA_BLOCK: - return new HoodieCDCDataBlock(() -> getFSDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, readerSchema, header, keyField); + return new HoodieCDCDataBlock(() -> getDataInputStream(fs, this.logFile, bufferSize), content, readBlockLazily, logBlockContentLoc, readerSchema, header, keyField); default: throw new HoodieNotSupportedException("Unsupported Block " + blockType); @@ -270,7 +272,7 @@ private HoodieLogBlock createCorruptBlock(long blockStartPos) throws IOException Option corruptedBytes = HoodieLogBlock.tryReadContent(inputStream, corruptedBlockSize, readBlockLazily); HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, corruptedBlockSize, nextBlockOffset); - return new HoodieCorruptBlock(corruptedBytes, () -> getFSDataInputStream(fs, this.logFile, bufferSize), readBlockLazily, Option.of(logBlockContentLoc), new HashMap<>(), new HashMap<>()); + return new HoodieCorruptBlock(corruptedBytes, () -> getDataInputStream(fs, this.logFile, bufferSize), readBlockLazily, Option.of(logBlockContentLoc), new HashMap<>(), new HashMap<>()); } private boolean isBlockCorrupted(int blocksize) throws IOException { @@ -474,9 +476,23 @@ public void remove() { throw new UnsupportedOperationException("Remove not supported for HoodieLogFileReader"); } + /** + * Fetch the right {@link SeekableDataInputStream} to be used by wrapping with required input streams. + * + * @param fs instance of {@link FileSystem} in use. + * @param bufferSize buffer size to be used. + * @return the right {@link SeekableDataInputStream} as required. + */ + private static SeekableDataInputStream getDataInputStream(FileSystem fs, + HoodieLogFile logFile, + int bufferSize) { + return new HadoopSeekableDataInputStream(getFSDataInputStream(fs, logFile, bufferSize)); + } + /** * Fetch the right {@link FSDataInputStream} to be used by wrapping with required input streams. - * @param fs instance of {@link FileSystem} in use. + * + * @param fs instance of {@link FileSystem} in use. * @param bufferSize buffer size to be used. * @return the right {@link FSDataInputStream} as required. */ diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java index a38f6fcaa985..620e123059b1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java @@ -28,6 +28,7 @@ import org.apache.hudi.common.util.collection.CloseableMappingIterator; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.internal.schema.InternalSchema; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.avro.Schema; import org.apache.avro.generic.GenericDatumReader; @@ -39,7 +40,6 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.apache.hadoop.fs.FSDataInputStream; import javax.annotation.Nonnull; @@ -74,7 +74,7 @@ public class HoodieAvroDataBlock extends HoodieDataBlock { private final ThreadLocal encoderCache = new ThreadLocal<>(); - public HoodieAvroDataBlock(Supplier inputStreamSupplier, + public HoodieAvroDataBlock(Supplier inputStreamSupplier, Option content, boolean readBlockLazily, HoodieLogBlockContentLocation logBlockContentLocation, diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCDCDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCDCDataBlock.java index 8f2cd8c64478..44140b5b6af8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCDCDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCDCDataBlock.java @@ -20,9 +20,9 @@ import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.util.Option; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.avro.Schema; -import org.apache.hadoop.fs.FSDataInputStream; import java.util.HashMap; import java.util.List; @@ -35,7 +35,7 @@ public class HoodieCDCDataBlock extends HoodieAvroDataBlock { public HoodieCDCDataBlock( - Supplier inputStreamSupplier, + Supplier inputStreamSupplier, Option content, boolean readBlockLazily, HoodieLogBlockContentLocation logBlockContentLocation, diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java index ed5338344ad8..deeb903cd180 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java @@ -19,8 +19,7 @@ package org.apache.hudi.common.table.log.block; import org.apache.hudi.common.util.Option; - -import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hudi.io.SeekableDataInputStream; import java.util.HashMap; import java.util.Map; @@ -44,7 +43,7 @@ public HoodieCommandBlock(Map header) { this(Option.empty(), null, false, Option.empty(), header, new HashMap<>()); } - public HoodieCommandBlock(Option content, Supplier inputStreamSupplier, boolean readBlockLazily, + public HoodieCommandBlock(Option content, Supplier inputStreamSupplier, boolean readBlockLazily, Option blockContentLocation, Map header, Map footer) { super(header, footer, blockContentLocation, content, inputStreamSupplier, readBlockLazily); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java index 928ae780ee62..19d704c25952 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java @@ -19,8 +19,7 @@ package org.apache.hudi.common.table.log.block; import org.apache.hudi.common.util.Option; - -import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hudi.io.SeekableDataInputStream; import java.io.IOException; import java.util.Map; @@ -32,7 +31,7 @@ */ public class HoodieCorruptBlock extends HoodieLogBlock { - public HoodieCorruptBlock(Option corruptedBytes, Supplier inputStreamSupplier, boolean readBlockLazily, + public HoodieCorruptBlock(Option corruptedBytes, Supplier inputStreamSupplier, boolean readBlockLazily, Option blockContentLocation, Map header, Map footer) { super(header, footer, blockContentLocation, corruptedBytes, inputStreamSupplier, readBlockLazily); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java index 64781bdb55b6..22dfdd4e7ea1 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java @@ -25,9 +25,9 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.avro.Schema; -import org.apache.hadoop.fs.FSDataInputStream; import java.io.IOException; import java.util.HashSet; @@ -88,7 +88,7 @@ public HoodieDataBlock(List records, * NOTE: This ctor is used on the write-path (ie when records ought to be written into the log) */ protected HoodieDataBlock(Option content, - Supplier inputStreamSupplier, + Supplier inputStreamSupplier, boolean readBlockLazily, Option blockContentLocation, Option readerSchema, diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java index 1f92c21e0416..1639b835ab6d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java @@ -26,6 +26,7 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.SerializationUtils; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.util.Lazy; import org.apache.avro.io.BinaryDecoder; @@ -36,7 +37,6 @@ import org.apache.avro.io.EncoderFactory; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; -import org.apache.hadoop.fs.FSDataInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -73,9 +73,16 @@ public HoodieDeleteBlock(DeleteRecord[] recordsToDelete, Map content, Supplier inputStreamSupplier, boolean readBlockLazily, + public HoodieDeleteBlock(Option content, Supplier inputStreamSupplier, boolean readBlockLazily, Option blockContentLocation, Map header, Map footer) { + // Setting `shouldWriteRecordPositions` to false as this constructor is only used by the reader + this(content, inputStreamSupplier, readBlockLazily, blockContentLocation, header, footer, false); + } + + HoodieDeleteBlock(Option content, Supplier inputStreamSupplier, boolean readBlockLazily, + Option blockContentLocation, Map header, + Map footer, boolean shouldWriteRecordPositions) { super(header, footer, blockContentLocation, content, inputStreamSupplier, readBlockLazily); } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index 6b06bc51b2f6..eeed39358725 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -34,6 +34,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; import org.apache.hudi.io.storage.HoodieFileReader; import org.apache.hudi.io.storage.HoodieFileReaderFactory; @@ -44,7 +45,6 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -85,7 +85,7 @@ public class HoodieHFileDataBlock extends HoodieDataBlock { private final Path pathForReader; private final HoodieConfig hFileReaderConfig; - public HoodieHFileDataBlock(Supplier inputStreamSupplier, + public HoodieHFileDataBlock(Supplier inputStreamSupplier, Option content, boolean readBlockLazily, HoodieLogBlockContentLocation logBlockContentLocation, diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java index 0cf37c851057..a062ab33f2a7 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java @@ -25,16 +25,15 @@ import org.apache.hudi.common.util.TypeUtils; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.roaringbitmap.longlong.Roaring64NavigableMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.EOFException; import java.io.IOException; @@ -65,10 +64,7 @@ public abstract class HoodieLogBlock { private final Option blockContentLocation; // data for a specific block private Option content; - // TODO : change this to just InputStream so this works for any FileSystem - // create handlers to return specific type of inputstream based on FS - // input stream corresponding to the log file where this logBlock belongs - private final Supplier inputStreamSupplier; + private final Supplier inputStreamSupplier; // Toggle flag, whether to read blocks lazily (I/O intensive) or not (Memory intensive) protected boolean readBlockLazily; @@ -77,7 +73,7 @@ public HoodieLogBlock( @Nonnull Map logBlockFooter, @Nonnull Option blockContentLocation, @Nonnull Option content, - @Nullable Supplier inputStreamSupplier, + @Nullable Supplier inputStreamSupplier, boolean readBlockLazily) { this.logBlockHeader = logBlockHeader; this.logBlockFooter = logBlockFooter; @@ -248,7 +244,7 @@ public static byte[] getLogMetadataBytes(Map metadat /** * Convert bytes to LogMetadata, follow the same order as {@link HoodieLogBlock#getLogMetadataBytes}. */ - public static Map getLogMetadata(DataInputStream dis) throws IOException { + public static Map getLogMetadata(SeekableDataInputStream dis) throws IOException { Map metadata = new HashMap<>(); // 1. Read the metadata written out @@ -272,7 +268,7 @@ public static Map getLogMetadata(DataInputStream dis * Read or Skip block content of a log block in the log file. Depends on lazy reading enabled in * {@link HoodieMergedLogRecordScanner} */ - public static Option tryReadContent(FSDataInputStream inputStream, Integer contentLength, boolean readLazily) + public static Option tryReadContent(SeekableDataInputStream inputStream, Integer contentLength, boolean readLazily) throws IOException { if (readLazily) { // Seek to the end of the content block @@ -294,7 +290,7 @@ protected void inflate() throws HoodieIOException { checkState(!content.isPresent(), "Block has already been inflated"); checkState(inputStreamSupplier != null, "Block should have input-stream provided"); - try (FSDataInputStream inputStream = inputStreamSupplier.get()) { + try (SeekableDataInputStream inputStream = inputStreamSupplier.get()) { content = Option.of(new byte[(int) this.getBlockContentLocation().get().getBlockSize()]); inputStream.seek(this.getBlockContentLocation().get().getContentPositionInLogFile()); inputStream.readFully(content.get(), 0, content.get().length); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java index b026b85c3a3b..92c08bf1153d 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java @@ -26,13 +26,13 @@ import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.util.collection.ClosableIterator; import org.apache.hudi.common.util.Option; +import org.apache.hudi.io.SeekableDataInputStream; import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.io.storage.HoodieFileWriter; import org.apache.hudi.io.storage.HoodieFileWriterFactory; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.parquet.hadoop.ParquetWriter; @@ -63,7 +63,7 @@ public class HoodieParquetDataBlock extends HoodieDataBlock { private final Option expectedCompressionRatio; private final Option useDictionaryEncoding; - public HoodieParquetDataBlock(Supplier inputStreamSupplier, + public HoodieParquetDataBlock(Supplier inputStreamSupplier, Option content, boolean readBlockLazily, HoodieLogBlockContentLocation logBlockContentLocation, diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopSeekableDataInputStream.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopSeekableDataInputStream.java new file mode 100644 index 000000000000..ae10ca0ac3f6 --- /dev/null +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/hadoop/fs/HadoopSeekableDataInputStream.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.hadoop.fs; + +import org.apache.hudi.io.SeekableDataInputStream; + +import org.apache.hadoop.fs.FSDataInputStream; + +import java.io.IOException; + +/** + * An implementation of {@link SeekableDataInputStream} based on Hadoop's {@link FSDataInputStream} + */ +public class HadoopSeekableDataInputStream extends SeekableDataInputStream { + private final FSDataInputStream stream; + + public HadoopSeekableDataInputStream(FSDataInputStream stream) { + super(stream); + this.stream = stream; + } + + @Override + public long getPos() throws IOException { + return stream.getPos(); + } + + @Override + public void seek(long pos) throws IOException { + stream.seek(pos); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/io/SeekableDataInputStream.java b/hudi-io/src/main/java/org/apache/hudi/io/SeekableDataInputStream.java new file mode 100644 index 000000000000..c76fd3be32d9 --- /dev/null +++ b/hudi-io/src/main/java/org/apache/hudi/io/SeekableDataInputStream.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.io; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A {@link InputStream} that supports random access by allowing to seek to + * an arbitrary position within the stream and read the content. + */ +public abstract class SeekableDataInputStream extends DataInputStream { + /** + * Creates a DataInputStream that uses the specified + * underlying InputStream. + * + * @param in the specified input stream + */ + public SeekableDataInputStream(InputStream in) { + super(in); + } + + /** + * @return current position of the stream. The next read() will be from that location. + */ + public abstract long getPos() throws IOException; + + /** + * Seeks to a position within the stream. + * + * @param pos target position to seek to. + * @throws IOException upon error. + */ + public abstract void seek(long pos) throws IOException; +} From aef157a504664fc5bc493f031e3926eb3e8465b7 Mon Sep 17 00:00:00 2001 From: wang guo <57866042+1574720406qq@users.noreply.github.com> Date: Thu, 1 Feb 2024 09:10:16 +0800 Subject: [PATCH 092/112] [MINOR] Add serialVersionUID to HoodieRecord class (#10592) --- .../src/main/java/org/apache/hudi/common/model/HoodieRecord.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java index f62ddfe77433..c220fac720d8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java @@ -47,6 +47,7 @@ */ public abstract class HoodieRecord implements HoodieRecordCompatibilityInterface, KryoSerializable, Serializable { + private static final long serialVersionUID = 3015229555587559252L; public static final String COMMIT_TIME_METADATA_FIELD = HoodieMetadataField.COMMIT_TIME_METADATA_FIELD.getFieldName(); public static final String COMMIT_SEQNO_METADATA_FIELD = HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD.getFieldName(); public static final String RECORD_KEY_METADATA_FIELD = HoodieMetadataField.RECORD_KEY_METADATA_FIELD.getFieldName(); From 104fa7daa215126227636e2e978b1ce312bea4ed Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:18:41 -0800 Subject: [PATCH 093/112] [HUDI-6902] Fix a test about timestamp format (#10606) --- .../apache/hudi/hadoop/TestHoodieParquetInputFormat.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java index 37ec5cef24f5..f824753b6bbb 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java @@ -66,12 +66,14 @@ import java.io.IOException; import java.nio.file.Paths; import java.sql.Timestamp; +import java.text.SimpleDateFormat; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.ArrayList; import java.util.Collections; +import java.util.Date; import java.util.List; import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource; @@ -813,7 +815,11 @@ public void testHoodieParquetInputFormatReadTimeType() throws IOException { Instant.ofEpochMilli(testTimestampLong), ZoneOffset.UTC); assertEquals(Timestamp.valueOf(localDateTime).toString(), String.valueOf(writable.get()[0])); } else { - assertEquals(new Timestamp(testTimestampLong).toString(), String.valueOf(writable.get()[0])); + Date date = new Date(); + date.setTime(testTimestampLong); + assertEquals( + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(date), + String.valueOf(writable.get()[0])); } // test long assertEquals(testTimestampLong * 1000, ((LongWritable) writable.get()[1]).get()); From cb2d94b31146b12c97cb048698c42ccaaeff41dd Mon Sep 17 00:00:00 2001 From: Aditya Goenka <63430370+ad1happy2go@users.noreply.github.com> Date: Sat, 3 Feb 2024 03:59:58 +0530 Subject: [PATCH 094/112] [HUDI-6868] Support extracting passwords from credential store for Hive Sync (#10577) Co-authored-by: Danny Chan --- .../org/apache/hudi/HoodieSparkSqlWriter.scala | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index 5c6f5b451cdf..eea93e426fba 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -21,6 +21,8 @@ import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.shims.ShimLoader import org.apache.hudi.AutoRecordKeyGenerationUtils.mayBeValidateParamsForAutoGenerationOfRecordKeys import org.apache.hudi.AvroConversionUtils.{convertAvroSchemaToStructType, convertStructTypeToAvroSchema, getAvroRecordNameAndNamespace} import org.apache.hudi.DataSourceOptionsHelper.fetchMissingWriteConfigsFromTableConfig @@ -1000,7 +1002,19 @@ class HoodieSparkSqlWriterInternal { properties.put(HiveSyncConfigHolder.HIVE_SYNC_SCHEMA_STRING_LENGTH_THRESHOLD.key, spark.sessionState.conf.getConf(StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD).toString) properties.put(HoodieSyncConfig.META_SYNC_SPARK_VERSION.key, SPARK_VERSION) properties.put(HoodieSyncConfig.META_SYNC_USE_FILE_LISTING_FROM_METADATA.key, hoodieConfig.getBoolean(HoodieMetadataConfig.ENABLE)) - + if ((fs.getConf.get(HiveConf.ConfVars.METASTOREPWD.varname) == null || fs.getConf.get(HiveConf.ConfVars.METASTOREPWD.varname).isEmpty) && + (properties.get(HiveSyncConfigHolder.HIVE_PASS.key()) == null || properties.get(HiveSyncConfigHolder.HIVE_PASS.key()).toString.isEmpty)){ + try { + val passwd = ShimLoader.getHadoopShims.getPassword(spark.sparkContext.hadoopConfiguration, HiveConf.ConfVars.METASTOREPWD.varname) + if (passwd != null && !passwd.isEmpty) { + fs.getConf.set(HiveConf.ConfVars.METASTOREPWD.varname, passwd) + properties.put(HiveSyncConfigHolder.HIVE_PASS.key(), passwd) + } + } catch { + case e: Exception => + log.info("Exception while trying to get Meta Sync password from hadoop credential store", e) + } + } // Collect exceptions in list because we want all sync to run. Then we can throw val failedMetaSyncs = new mutable.HashMap[String,HoodieException]() syncClientToolClassSet.foreach(impl => { From fa6e499efc47e3055b492e8ceb497b59d4fc3fc8 Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Fri, 2 Feb 2024 20:37:41 -0800 Subject: [PATCH 095/112] [Hudi-6902] Fix the timestamp format in hive test (#10610) --- .../apache/hudi/hadoop/TestHoodieParquetInputFormat.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java index f824753b6bbb..6b4b4fad8fdc 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java @@ -46,6 +46,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; @@ -817,9 +818,9 @@ public void testHoodieParquetInputFormatReadTimeType() throws IOException { } else { Date date = new Date(); date.setTime(testTimestampLong); - assertEquals( - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(date), - String.valueOf(writable.get()[0])); + Timestamp actualTime = ((TimestampWritable) writable.get()[0]).getTimestamp(); + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + assertEquals(dateFormat.format(date), dateFormat.format(actualTime)); } // test long assertEquals(testTimestampLong * 1000, ((LongWritable) writable.get()[1]).get()); From 4a0429297fc891be13f80d646677b3e561e0b6cd Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Sat, 3 Feb 2024 14:40:18 -0500 Subject: [PATCH 096/112] [HUDI-7284] Fix bad method name getLastPendingClusterCommit to getLastPendingClusterInstant (#10613) Co-authored-by: Jonathan Vexler <=> --- .../hudi/common/table/timeline/HoodieDefaultTimeline.java | 2 +- .../apache/hudi/common/table/timeline/HoodieTimeline.java | 2 +- .../org/apache/hudi/common/util/TestClusteringUtils.java | 6 +++--- .../java/org/apache/hudi/utilities/streamer/StreamSync.java | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java index 046ef8e7591d..e3c468919fe9 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java @@ -514,7 +514,7 @@ public Option getLastClusterCommit() { } @Override - public Option getLastPendingClusterCommit() { + public Option getLastPendingClusterInstant() { return Option.fromJavaOptional(filterPendingReplaceTimeline() .getReverseOrderedInstants() .filter(i -> ClusteringUtils.isPendingClusteringInstant(this, i)).findFirst()); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java index 43c70cbc0003..11979a2c9e88 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java @@ -405,7 +405,7 @@ public interface HoodieTimeline extends Serializable { * get the most recent pending cluster commit if present * */ - public Option getLastPendingClusterCommit(); + public Option getLastPendingClusterInstant(); /** * Read the completed instant details. diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java index 244ee1dba3ae..5f2f050a17a9 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestClusteringUtils.java @@ -104,20 +104,20 @@ public void testClusteringPlanMultipleInstants() throws Exception { validateClusteringInstant(fileIds1, partitionPath1, clusterTime1, fileGroupToInstantMap); validateClusteringInstant(fileIds2, partitionPath1, clusterTime, fileGroupToInstantMap); validateClusteringInstant(fileIds3, partitionPath1, clusterTime, fileGroupToInstantMap); - Option lastPendingClustering = metaClient.getActiveTimeline().getLastPendingClusterCommit(); + Option lastPendingClustering = metaClient.getActiveTimeline().getLastPendingClusterInstant(); assertTrue(lastPendingClustering.isPresent()); assertEquals("2", lastPendingClustering.get().getTimestamp()); //check that it still gets picked if it is inflight HoodieInstant inflight = metaClient.getActiveTimeline().transitionReplaceRequestedToInflight(lastPendingClustering.get(), Option.empty()); assertEquals(HoodieInstant.State.INFLIGHT, inflight.getState()); - lastPendingClustering = metaClient.reloadActiveTimeline().getLastPendingClusterCommit(); + lastPendingClustering = metaClient.reloadActiveTimeline().getLastPendingClusterInstant(); assertEquals("2", lastPendingClustering.get().getTimestamp()); //now that it is complete, the first instant should be picked HoodieInstant complete = metaClient.getActiveTimeline().transitionReplaceInflightToComplete(inflight, Option.empty()); assertEquals(HoodieInstant.State.COMPLETED, complete.getState()); - lastPendingClustering = metaClient.reloadActiveTimeline().getLastPendingClusterCommit(); + lastPendingClustering = metaClient.reloadActiveTimeline().getLastPendingClusterInstant(); assertEquals("1", lastPendingClustering.get().getTimestamp()); } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java index a55509eadc05..ce8d5f80af35 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/streamer/StreamSync.java @@ -454,7 +454,7 @@ public Pair, JavaRDD> syncOnce() throws IOException private Option getLastPendingClusteringInstant(Option commitTimelineOpt) { if (commitTimelineOpt.isPresent()) { - Option pendingClusteringInstant = commitTimelineOpt.get().getLastPendingClusterCommit(); + Option pendingClusteringInstant = commitTimelineOpt.get().getLastPendingClusterInstant(); return pendingClusteringInstant.isPresent() ? Option.of(pendingClusteringInstant.get().getTimestamp()) : Option.empty(); } return Option.empty(); From 692f0d1c22303e823784ba82c7437b15226b3436 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Mon, 5 Feb 2024 00:32:38 +0100 Subject: [PATCH 097/112] [HUDI-7351] Implement partition pushdown for glue (#10604) --- hudi-aws/pom.xml | 16 ++ .../aws/sync/AWSGlueCatalogSyncClient.java | 70 +++++++-- .../aws/sync/util/GlueFilterGenVisitor.java | 32 ++++ .../util/GluePartitionFilterGenerator.java | 29 ++++ .../apache/hudi/config/HoodieAWSConfig.java | 14 ++ .../aws/sync/ITTestGluePartitionPushdown.java | 133 ++++++++++++++++ .../aws/sync/TestGluePartitionPushdown.java | 143 ++++++++++++++++++ .../org/apache/hudi/hive/HiveSyncTool.java | 5 +- .../hudi/hive/HoodieHiveSyncClient.java | 6 + .../hudi/hive/util/FilterGenVisitor.java | 2 +- .../hive/util/PartitionFilterGenerator.java | 14 +- .../util/TestPartitionFilterGenerator.java | 26 ++-- .../sync/common/HoodieMetaSyncOperations.java | 7 + pom.xml | 2 + 14 files changed, 460 insertions(+), 39 deletions(-) create mode 100644 hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GlueFilterGenVisitor.java create mode 100644 hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GluePartitionFilterGenerator.java create mode 100644 hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java create mode 100644 hudi-aws/src/test/java/org/apache/hudi/aws/sync/TestGluePartitionPushdown.java diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml index 9768a4f56235..57aaf22216c5 100644 --- a/hudi-aws/pom.xml +++ b/hudi-aws/pom.xml @@ -31,6 +31,7 @@ 1.15.0 + latest @@ -255,6 +256,21 @@ + + motoserver/moto:${moto.version} + it-aws + + + ${moto.port}:${moto.port} + + + + ${moto.endpoint}/moto-api/ + + + + + diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index e038b9539a70..ab48080be1e7 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -18,6 +18,7 @@ package org.apache.hudi.aws.sync; +import org.apache.hudi.aws.sync.util.GluePartitionFilterGenerator; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.util.CollectionUtils; @@ -28,7 +29,9 @@ import org.apache.hudi.sync.common.model.FieldSchema; import org.apache.hudi.sync.common.model.Partition; +import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.glue.GlueAsyncClient; +import software.amazon.awssdk.services.glue.GlueAsyncClientBuilder; import software.amazon.awssdk.services.glue.model.AlreadyExistsException; import software.amazon.awssdk.services.glue.model.BatchCreatePartitionRequest; import software.amazon.awssdk.services.glue.model.BatchCreatePartitionResponse; @@ -66,6 +69,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.net.URI; +import java.net.URISyntaxException; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; @@ -84,6 +89,8 @@ import static org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING; import static org.apache.hudi.config.GlueCatalogSyncClientConfig.META_SYNC_PARTITION_INDEX_FIELDS; import static org.apache.hudi.config.GlueCatalogSyncClientConfig.META_SYNC_PARTITION_INDEX_FIELDS_ENABLE; +import static org.apache.hudi.config.HoodieAWSConfig.AWS_GLUE_ENDPOINT; +import static org.apache.hudi.config.HoodieAWSConfig.AWS_GLUE_REGION; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE; import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType; @@ -103,7 +110,7 @@ public class AWSGlueCatalogSyncClient extends HoodieSyncClient { private static final Logger LOG = LoggerFactory.getLogger(AWSGlueCatalogSyncClient.class); private static final int MAX_PARTITIONS_PER_REQUEST = 100; private static final int MAX_DELETE_PARTITIONS_PER_REQUEST = 25; - private final GlueAsyncClient awsGlue; + protected final GlueAsyncClient awsGlue; private static final String GLUE_PARTITION_INDEX_ENABLE = "partition_filtering.enabled"; private static final int PARTITION_INDEX_MAX_NUMBER = 3; /** @@ -118,7 +125,16 @@ public class AWSGlueCatalogSyncClient extends HoodieSyncClient { public AWSGlueCatalogSyncClient(HiveSyncConfig config) { super(config); - this.awsGlue = GlueAsyncClient.builder().build(); + try { + GlueAsyncClientBuilder awsGlueBuilder = GlueAsyncClient.builder(); + awsGlueBuilder = config.getString(AWS_GLUE_ENDPOINT) == null ? awsGlueBuilder : + awsGlueBuilder.endpointOverride(new URI(config.getString(AWS_GLUE_ENDPOINT))); + awsGlueBuilder = config.getString(AWS_GLUE_REGION) == null ? awsGlueBuilder : + awsGlueBuilder.region(Region.of(config.getString(AWS_GLUE_REGION))); + this.awsGlue = awsGlueBuilder.build(); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } this.databaseName = config.getStringOrDefault(META_SYNC_DATABASE_NAME); this.skipTableArchive = config.getBooleanOrDefault(GlueCatalogSyncClientConfig.GLUE_SKIP_TABLE_ARCHIVE); this.enableMetadataTable = Boolean.toString(config.getBoolean(GLUE_METADATA_FILE_LISTING)).toUpperCase(); @@ -127,25 +143,42 @@ public AWSGlueCatalogSyncClient(HiveSyncConfig config) { @Override public List getAllPartitions(String tableName) { try { - List partitions = new ArrayList<>(); - String nextToken = null; - do { - GetPartitionsResponse result = awsGlue.getPartitions(GetPartitionsRequest.builder() - .databaseName(databaseName) - .tableName(tableName) - .nextToken(nextToken) - .build()).get(); - partitions.addAll(result.partitions().stream() - .map(p -> new Partition(p.values(), p.storageDescriptor().location())) - .collect(Collectors.toList())); - nextToken = result.nextToken(); - } while (nextToken != null); - return partitions; + return getPartitions(GetPartitionsRequest.builder() + .databaseName(databaseName) + .tableName(tableName)); } catch (Exception e) { throw new HoodieGlueSyncException("Failed to get all partitions for table " + tableId(databaseName, tableName), e); } } + @Override + public List getPartitionsByFilter(String tableName, String filter) { + try { + return getPartitions(GetPartitionsRequest.builder() + .databaseName(databaseName) + .tableName(tableName) + .expression(filter)); + } catch (Exception e) { + throw new HoodieGlueSyncException("Failed to get partitions for table " + tableId(databaseName, tableName) + " from expression: " + filter, e); + } + } + + private List getPartitions(GetPartitionsRequest.Builder partitionRequestBuilder) throws InterruptedException, ExecutionException { + List partitions = new ArrayList<>(); + String nextToken = null; + do { + GetPartitionsResponse result = awsGlue.getPartitions(partitionRequestBuilder + .excludeColumnSchema(true) + .nextToken(nextToken) + .build()).get(); + partitions.addAll(result.partitions().stream() + .map(p -> new Partition(p.values(), p.storageDescriptor().location())) + .collect(Collectors.toList())); + nextToken = result.nextToken(); + } while (nextToken != null); + return partitions; + } + @Override public void addPartitionsToTable(String tableName, List partitionsToAdd) { if (partitionsToAdd.isEmpty()) { @@ -697,6 +730,11 @@ public void deleteLastReplicatedTimeStamp(String tableName) { throw new UnsupportedOperationException("Not supported: `deleteLastReplicatedTimeStamp`"); } + @Override + public String generatePushDownFilter(List writtenPartitions, List partitionFields) { + return new GluePartitionFilterGenerator().generatePushDownFilter(writtenPartitions, partitionFields, (HiveSyncConfig) config); + } + private List getColumnsFromSchema(Map mapSchema) { List cols = new ArrayList<>(); for (String key : mapSchema.keySet()) { diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GlueFilterGenVisitor.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GlueFilterGenVisitor.java new file mode 100644 index 000000000000..859e01032103 --- /dev/null +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GlueFilterGenVisitor.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.aws.sync.util; + +import org.apache.hudi.hive.util.FilterGenVisitor; + +public class GlueFilterGenVisitor extends FilterGenVisitor { + + @Override + protected String quoteStringLiteral(String value) { + // Glue uses jSQLParser. + // https://jsqlparser.github.io/JSqlParser/usage.html#define-the-parser-features + return "'" + (value.contains("'") ? value.replaceAll("'", "''") : value) + "'"; + } + +} diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GluePartitionFilterGenerator.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GluePartitionFilterGenerator.java new file mode 100644 index 000000000000..c9a8605a2270 --- /dev/null +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/util/GluePartitionFilterGenerator.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.aws.sync.util; + +import org.apache.hudi.expression.Expression; +import org.apache.hudi.hive.util.PartitionFilterGenerator; + +public class GluePartitionFilterGenerator extends PartitionFilterGenerator { + + protected String generateFilterString(Expression filter) { + return filter.accept(new GlueFilterGenVisitor()); + } +} diff --git a/hudi-aws/src/main/java/org/apache/hudi/config/HoodieAWSConfig.java b/hudi-aws/src/main/java/org/apache/hudi/config/HoodieAWSConfig.java index 45d6878fa3df..8eb76573d0e1 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/config/HoodieAWSConfig.java +++ b/hudi-aws/src/main/java/org/apache/hudi/config/HoodieAWSConfig.java @@ -69,6 +69,20 @@ public class HoodieAWSConfig extends HoodieConfig { .sinceVersion("0.10.0") .withDocumentation("AWS session token"); + public static final ConfigProperty AWS_GLUE_ENDPOINT = ConfigProperty + .key("hoodie.aws.glue.endpoint") + .noDefaultValue() + .markAdvanced() + .sinceVersion("0.14.2") + .withDocumentation("Aws glue endpoint"); + + public static final ConfigProperty AWS_GLUE_REGION = ConfigProperty + .key("hoodie.aws.glue.region") + .noDefaultValue() + .markAdvanced() + .sinceVersion("0.14.2") + .withDocumentation("Aws glue endpoint"); + private HoodieAWSConfig() { super(); } diff --git a/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java new file mode 100644 index 000000000000..940fbfb0bf3f --- /dev/null +++ b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.aws.sync; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.model.HoodieAvroPayload; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.config.HoodieAWSConfig; +import org.apache.hudi.hive.HiveSyncConfig; +import org.apache.hudi.sync.common.model.FieldSchema; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.glue.model.Column; +import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; +import software.amazon.awssdk.services.glue.model.CreatePartitionRequest; +import software.amazon.awssdk.services.glue.model.CreateTableRequest; +import software.amazon.awssdk.services.glue.model.DatabaseInput; +import software.amazon.awssdk.services.glue.model.DeleteDatabaseRequest; +import software.amazon.awssdk.services.glue.model.DeleteTableRequest; +import software.amazon.awssdk.services.glue.model.PartitionInput; +import software.amazon.awssdk.services.glue.model.SerDeInfo; +import software.amazon.awssdk.services.glue.model.StorageDescriptor; +import software.amazon.awssdk.services.glue.model.TableInput; + +import java.io.IOException; +import java.nio.file.Files; +import java.time.Instant; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; + +public class ITTestGluePartitionPushdown { + + private static final String MOTO_ENDPOINT = "http://localhost:5000"; + private static final String DB_NAME = "db_name"; + private static final String TABLE_NAME = "tbl_name"; + private String basePath = Files.createTempDirectory("hivesynctest" + Instant.now().toEpochMilli()).toUri().toString(); + private String tablePath = basePath + "/" + TABLE_NAME; + private TypedProperties hiveSyncProps; + private AWSGlueCatalogSyncClient glueSync; + private FileSystem fileSystem; + private Column[] partitionsColumn = {Column.builder().name("part1").type("int").build(), Column.builder().name("part2").type("string").build()}; + List partitionsFieldSchema = Arrays.asList(new FieldSchema("part1", "int"), new FieldSchema("part2", "string")); + + public ITTestGluePartitionPushdown() throws IOException {} + + @BeforeEach + public void setUp() throws Exception { + hiveSyncProps = new TypedProperties(); + hiveSyncProps.setProperty(HoodieAWSConfig.AWS_ACCESS_KEY.key(), "dummy"); + hiveSyncProps.setProperty(HoodieAWSConfig.AWS_SECRET_KEY.key(), "dummy"); + hiveSyncProps.setProperty(HoodieAWSConfig.AWS_SESSION_TOKEN.key(), "dummy"); + hiveSyncProps.setProperty(HoodieAWSConfig.AWS_GLUE_ENDPOINT.key(), MOTO_ENDPOINT); + hiveSyncProps.setProperty(HoodieAWSConfig.AWS_GLUE_REGION.key(), "eu-west-1"); + hiveSyncProps.setProperty(META_SYNC_BASE_PATH.key(), tablePath); + hiveSyncProps.setProperty(META_SYNC_DATABASE_NAME.key(), DB_NAME); + + HiveSyncConfig hiveSyncConfig = new HiveSyncConfig(hiveSyncProps, new Configuration()); + fileSystem = hiveSyncConfig.getHadoopFileSystem(); + fileSystem.mkdirs(new Path(tablePath)); + Configuration configuration = new Configuration(); + HoodieTableMetaClient.withPropertyBuilder() + .setTableType(HoodieTableType.COPY_ON_WRITE) + .setTableName(TABLE_NAME) + .setPayloadClass(HoodieAvroPayload.class) + .initTable(configuration, tablePath); + + glueSync = new AWSGlueCatalogSyncClient(new HiveSyncConfig(hiveSyncProps)); + glueSync.awsGlue.createDatabase(CreateDatabaseRequest.builder().databaseInput(DatabaseInput.builder().name(DB_NAME).build()).build()).get(); + + glueSync.awsGlue.createTable(CreateTableRequest.builder().databaseName(DB_NAME) + .tableInput(TableInput.builder().name(TABLE_NAME).partitionKeys( + partitionsColumn) + .storageDescriptor( + StorageDescriptor.builder() + .serdeInfo(SerDeInfo.builder().serializationLibrary("").build()) + .location(tablePath) + .columns( + Column.builder().name("col1").type("string").build() + ) + .build()) + .build()).build()).get(); + } + + @AfterEach + public void teardown() throws Exception { + glueSync.awsGlue.deleteTable(DeleteTableRequest.builder().databaseName(DB_NAME).name(TABLE_NAME).build()).get(); + glueSync.awsGlue.deleteDatabase(DeleteDatabaseRequest.builder().name(DB_NAME).build()).get(); + fileSystem.delete(new Path(tablePath), true); + } + + @Test + public void testEmptyPartitionShouldReturnEmpty() { + Assertions.assertEquals(0, glueSync.getPartitionsByFilter(TABLE_NAME, + glueSync.generatePushDownFilter(Arrays.asList("1/bar"), partitionsFieldSchema)).size()); + } + + @Test + public void testPresentPartitionShouldReturnIt() throws ExecutionException, InterruptedException { + glueSync.awsGlue.createPartition(CreatePartitionRequest.builder().databaseName(DB_NAME).tableName(TABLE_NAME) + .partitionInput(PartitionInput.builder() + .storageDescriptor(StorageDescriptor.builder().columns(partitionsColumn).build()) + .values("1", "b'ar").build()).build()).get(); + + Assertions.assertEquals(1, glueSync.getPartitionsByFilter(TABLE_NAME, + glueSync.generatePushDownFilter(Arrays.asList("1/b'ar", "2/foo", "1/b''ar"), partitionsFieldSchema)).size()); + } +} diff --git a/hudi-aws/src/test/java/org/apache/hudi/aws/sync/TestGluePartitionPushdown.java b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/TestGluePartitionPushdown.java new file mode 100644 index 000000000000..d0fe7bf2922d --- /dev/null +++ b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/TestGluePartitionPushdown.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.aws.sync; + +import org.apache.hudi.aws.sync.util.GluePartitionFilterGenerator; +import org.apache.hudi.hive.HiveSyncConfig; +import org.apache.hudi.sync.common.model.FieldSchema; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import static org.apache.hudi.hive.HiveSyncConfig.HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestGluePartitionPushdown { + + GluePartitionFilterGenerator partitionFilterGenerator = new GluePartitionFilterGenerator(); + @Test + public void testPushDownFilters() { + Properties props = new Properties(); + HiveSyncConfig config = new HiveSyncConfig(props); + List partitionFieldSchemas = new ArrayList<>(4); + partitionFieldSchemas.add(new FieldSchema("date", "date")); + partitionFieldSchemas.add(new FieldSchema("year", "string")); + partitionFieldSchemas.add(new FieldSchema("month", "int")); + partitionFieldSchemas.add(new FieldSchema("day", "bigint")); + + List writtenPartitions = new ArrayList<>(); + writtenPartitions.add("2022-09-01/2022/9/1"); + assertEquals("(((date = 2022-09-01 AND year = '2022') AND month = 9) AND day = 1)", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + writtenPartitions.add("2022-09-02/2022/9/2"); + assertEquals( + "((((date = 2022-09-01 AND year = '2022') AND month = 9) AND day = 1) OR (((date = 2022-09-02 AND year = '2022') AND month = 9) AND day = 2))", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + // If there are incompatible types to convert as filters inside partition + partitionFieldSchemas.clear(); + writtenPartitions.clear(); + partitionFieldSchemas.add(new FieldSchema("date", "date")); + partitionFieldSchemas.add(new FieldSchema("finished", "boolean")); + + writtenPartitions.add("2022-09-01/true"); + assertEquals("date = 2022-09-01", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + writtenPartitions.add("2022-09-02/true"); + assertEquals("(date = 2022-09-01 OR date = 2022-09-02)", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + // If no compatible types matched to convert as filters + partitionFieldSchemas.clear(); + writtenPartitions.clear(); + partitionFieldSchemas.add(new FieldSchema("finished", "boolean")); + + writtenPartitions.add("true"); + assertEquals("", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + writtenPartitions.add("false"); + assertEquals("", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + // If no compatible types matched to convert as filters + partitionFieldSchemas.clear(); + writtenPartitions.clear(); + partitionFieldSchemas.add(new FieldSchema("status", "string")); + writtenPartitions.add("to_be_'escaped"); + assertEquals("status = 'to_be_''escaped'", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + } + + @Test + public void testPushDownFilterIfExceedLimit() { + Properties props = new Properties(); + props.put(HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE.key(), "0"); + HiveSyncConfig config = new HiveSyncConfig(props); + List partitionFieldSchemas = new ArrayList<>(4); + partitionFieldSchemas.add(new FieldSchema("date", "date")); + partitionFieldSchemas.add(new FieldSchema("year", "string")); + partitionFieldSchemas.add(new FieldSchema("month", "int")); + partitionFieldSchemas.add(new FieldSchema("day", "bigint")); + + List writtenPartitions = new ArrayList<>(); + writtenPartitions.add("2022-09-01/2022/9/1"); + + assertEquals("(((date = 2022-09-01 AND year = '2022') AND month = 9) AND day = 1)", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + writtenPartitions.add("2022-09-02/2022/9/2"); + writtenPartitions.add("2022-09-03/2022/9/2"); + writtenPartitions.add("2022-09-04/2022/9/2"); + assertEquals( + "((((date >= 2022-09-01 AND date <= 2022-09-04) AND year = '2022') AND month = 9) AND (day >= 1 AND day <= 2))", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + // If there are incompatible types to convert as filters inside partition + partitionFieldSchemas.clear(); + writtenPartitions.clear(); + partitionFieldSchemas.add(new FieldSchema("date", "date")); + partitionFieldSchemas.add(new FieldSchema("finished", "boolean")); + + writtenPartitions.add("2022-09-01/true"); + assertEquals("date = 2022-09-01", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + writtenPartitions.add("2022-09-02/true"); + writtenPartitions.add("2022-09-03/false"); + writtenPartitions.add("2022-09-04/false"); + assertEquals("(date >= 2022-09-01 AND date <= 2022-09-04)", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + + // If no compatible types matched to convert as filters + partitionFieldSchemas.clear(); + writtenPartitions.clear(); + partitionFieldSchemas.add(new FieldSchema("finished", "boolean")); + + writtenPartitions.add("true"); + assertEquals("", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + writtenPartitions.add("false"); + writtenPartitions.add("false"); + writtenPartitions.add("false"); + assertEquals("", + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + } +} diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java index 9d44bbdc07ef..b0fb3098c107 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java @@ -24,7 +24,6 @@ import org.apache.hudi.common.util.Option; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.InvalidTableException; -import org.apache.hudi.hive.util.PartitionFilterGenerator; import org.apache.hudi.sync.common.HoodieSyncClient; import org.apache.hudi.sync.common.HoodieSyncTool; import org.apache.hudi.sync.common.model.FieldSchema; @@ -40,6 +39,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Properties; @@ -390,10 +390,11 @@ private List getTablePartitions(String tableName, List writte List partitionFields = syncClient.getMetastoreFieldSchemas(tableName) .stream() .filter(f -> partitionKeys.contains(f.getName())) + .sorted(Comparator.comparing(f -> partitionKeys.indexOf(f.getName()))) .collect(Collectors.toList()); return syncClient.getPartitionsByFilter(tableName, - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFields, config)); + syncClient.generatePushDownFilter(writtenPartitions, partitionFields)); } /** diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java index 32ad873a83d3..757d60285856 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java @@ -34,6 +34,7 @@ import org.apache.hudi.hive.ddl.HiveSyncMode; import org.apache.hudi.hive.ddl.JDBCExecutor; import org.apache.hudi.hive.util.IMetaStoreClientUtil; +import org.apache.hudi.hive.util.PartitionFilterGenerator; import org.apache.hudi.sync.common.HoodieSyncClient; import org.apache.hudi.sync.common.model.FieldSchema; import org.apache.hudi.sync.common.model.Partition; @@ -228,6 +229,11 @@ public List getPartitionsByFilter(String tableName, String filter) { } } + @Override + public String generatePushDownFilter(List writtenPartitions, List partitionFields) { + return new PartitionFilterGenerator().generatePushDownFilter(writtenPartitions, partitionFields, config); + } + @Override public void createTable(String tableName, MessageType storageSchema, String inputFormatClass, String outputFormatClass, String serdeClass, diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/FilterGenVisitor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/FilterGenVisitor.java index f42b157727c3..d0bc5d9e05bf 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/FilterGenVisitor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/FilterGenVisitor.java @@ -33,7 +33,7 @@ private String makeBinaryOperatorString(String left, Expression.Operator operat return String.format("%s %s %s", left, operator.sqlOperator, right); } - private String quoteStringLiteral(String value) { + protected String quoteStringLiteral(String value) { if (!value.contains("\"")) { return "\"" + value + "\""; } else if (!value.contains("'")) { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java index 9ff22d2d5dc8..55354818598d 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/util/PartitionFilterGenerator.java @@ -59,7 +59,7 @@ public class PartitionFilterGenerator { private static final String UNSUPPORTED_TYPE_ERROR = "The value type: %s doesn't support to " + "be pushed down to HMS, acceptable types: " + String.join(",", SUPPORT_TYPES); - private static Literal buildLiteralExpression(String fieldValue, String fieldType) { + private Literal buildLiteralExpression(String fieldValue, String fieldType) { switch (fieldType.toLowerCase(Locale.ROOT)) { case HiveSchemaUtil.INT_TYPE_NAME: return new Literal<>(Integer.parseInt(fieldValue), Types.IntType.get()); @@ -85,7 +85,7 @@ private static Literal buildLiteralExpression(String fieldValue, String fieldTyp * Or(And(Equal(Attribute(date), Literal(2022-09-01)), Equal(Attribute(hour), Literal(12))), * And(Equal(Attribute(date), Literal(2022-09-02)), Equal(Attribute(hour), Literal(13)))) */ - private static Expression buildPartitionExpression(List partitions, List partitionFields) { + private Expression buildPartitionExpression(List partitions, List partitionFields) { return partitions.stream().map(partition -> { List partitionValues = partition.getValues(); Expression root = null; @@ -114,7 +114,7 @@ private static Expression buildPartitionExpression(List partitions, L * Extract partition values from the {@param partitions}, and binding to * corresponding partition fieldSchemas. */ - private static List> extractFieldValues(List partitions, List partitionFields) { + private List> extractFieldValues(List partitions, List partitionFields) { return IntStream.range(0, partitionFields.size()) .mapToObj(i -> { Set values = new HashSet(); @@ -126,7 +126,7 @@ private static List> extractFieldValues(List { + private class ValueComparator implements Comparator { private final String valueType; public ValueComparator(String type) { @@ -163,7 +163,7 @@ public int compare(String s1, String s2) { * * This method can reduce the Expression tree level a lot if each field has too many values. */ - private static Expression buildMinMaxPartitionExpression(List partitions, List partitionFields) { + private Expression buildMinMaxPartitionExpression(List partitions, List partitionFields) { return extractFieldValues(partitions, partitionFields).stream().map(fieldWithValues -> { FieldSchema fieldSchema = fieldWithValues.getKey(); @@ -198,7 +198,7 @@ private static Expression buildMinMaxPartitionExpression(List partiti }); } - public static String generatePushDownFilter(List writtenPartitions, List partitionFields, HiveSyncConfig config) { + public String generatePushDownFilter(List writtenPartitions, List partitionFields, HiveSyncConfig config) { PartitionValueExtractor partitionValueExtractor = ReflectionUtils .loadClass(config.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS)); @@ -228,7 +228,7 @@ public static String generatePushDownFilter(List writtenPartitions, List return ""; } - private static String generateFilterString(Expression filter) { + protected String generateFilterString(Expression filter) { return filter.accept(new FilterGenVisitor()); } } diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/util/TestPartitionFilterGenerator.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/util/TestPartitionFilterGenerator.java index 7488709aca65..a142020c6863 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/util/TestPartitionFilterGenerator.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/util/TestPartitionFilterGenerator.java @@ -31,6 +31,7 @@ public class TestPartitionFilterGenerator { + PartitionFilterGenerator partitionFilterGenerator = new PartitionFilterGenerator(); @Test public void testPushDownFilters() { Properties props = new Properties(); @@ -43,14 +44,13 @@ public void testPushDownFilters() { List writtenPartitions = new ArrayList<>(); writtenPartitions.add("2022-09-01/2022/9/1"); - assertEquals("(((date = 2022-09-01 AND year = \"2022\") AND month = 9) AND day = 1)", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); writtenPartitions.add("2022-09-02/2022/9/2"); assertEquals( "((((date = 2022-09-01 AND year = \"2022\") AND month = 9) AND day = 1) OR (((date = 2022-09-02 AND year = \"2022\") AND month = 9) AND day = 2))", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); // If there are incompatible types to convert as filters inside partition partitionFieldSchemas.clear(); @@ -60,10 +60,10 @@ public void testPushDownFilters() { writtenPartitions.add("2022-09-01/true"); assertEquals("date = 2022-09-01", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); writtenPartitions.add("2022-09-02/true"); assertEquals("(date = 2022-09-01 OR date = 2022-09-02)", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); // If no compatible types matched to convert as filters partitionFieldSchemas.clear(); @@ -72,10 +72,10 @@ public void testPushDownFilters() { writtenPartitions.add("true"); assertEquals("", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); writtenPartitions.add("false"); assertEquals("", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); } @Test @@ -93,14 +93,14 @@ public void testPushDownFilterIfExceedLimit() { writtenPartitions.add("2022-09-01/2022/9/1"); assertEquals("(((date = 2022-09-01 AND year = \"2022\") AND month = 9) AND day = 1)", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); writtenPartitions.add("2022-09-02/2022/9/2"); writtenPartitions.add("2022-09-03/2022/9/2"); writtenPartitions.add("2022-09-04/2022/9/2"); assertEquals( "((((date >= 2022-09-01 AND date <= 2022-09-04) AND year = \"2022\") AND month = 9) AND (day >= 1 AND day <= 2))", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); // If there are incompatible types to convert as filters inside partition partitionFieldSchemas.clear(); @@ -110,12 +110,12 @@ public void testPushDownFilterIfExceedLimit() { writtenPartitions.add("2022-09-01/true"); assertEquals("date = 2022-09-01", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); writtenPartitions.add("2022-09-02/true"); writtenPartitions.add("2022-09-03/false"); writtenPartitions.add("2022-09-04/false"); assertEquals("(date >= 2022-09-01 AND date <= 2022-09-04)", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); // If no compatible types matched to convert as filters partitionFieldSchemas.clear(); @@ -124,11 +124,11 @@ public void testPushDownFilterIfExceedLimit() { writtenPartitions.add("true"); assertEquals("", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); writtenPartitions.add("false"); writtenPartitions.add("false"); writtenPartitions.add("false"); assertEquals("", - PartitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); + partitionFilterGenerator.generatePushDownFilter(writtenPartitions, partitionFieldSchemas, config)); } } diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java index 87af1d16d75c..b1acaf143961 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieMetaSyncOperations.java @@ -233,4 +233,11 @@ default void updateLastReplicatedTimeStamp(String tableName, String timeStamp) { default void deleteLastReplicatedTimeStamp(String tableName) { } + + /** + * Generates a push down filter string to retrieve existing partitions + */ + default String generatePushDownFilter(List writtenPartitions, List partitionFields) { + throw new UnsupportedOperationException(); + } } diff --git a/pom.xml b/pom.xml index 7d87df764fbe..5e3ec3b870fe 100644 --- a/pom.xml +++ b/pom.xml @@ -227,6 +227,8 @@ hadoop2-2.2.7 8000 http://localhost:${dynamodb-local.port} + 5000 + http://localhost:${moto.port} 2.7.3 2.1.1 1.1.8.3 From 18f10ba2b4fdf6bf6d8843c9ad8b161b8a9fc2c5 Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:14:43 -0800 Subject: [PATCH 098/112] [HUDI-7375] Disable a flaky test method (#10627) Which is caused by issues from underlying MiniHDFS. --- .../org/apache/hudi/common/functional/TestHoodieLogFormat.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java index 54c0dd53ed22..0b3bcc812ae0 100755 --- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java @@ -80,6 +80,7 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.io.TempDir; @@ -1903,6 +1904,7 @@ public void testAvroLogRecordReaderWithInsertsDeleteAndRollback(ExternalSpillabl 0, 0, Option.empty()); } + @Disabled("HUDI-7375") @ParameterizedTest @MethodSource("testArguments") public void testLogReaderWithDifferentVersionsOfDeleteBlocks(ExternalSpillableMap.DiskMapType diskMapType, From b8b88cfdd66b6c40256e683006f2ae6b8c6fa08e Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 5 Feb 2024 17:31:35 -0800 Subject: [PATCH 099/112] [HUDI-7366] Fix HoodieLocation with encoded paths (#10602) --- .../java/org/apache/hudi/storage/HoodieLocation.java | 3 ++- .../apache/hudi/io/storage/TestHoodieLocation.java | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java index 3b3a05dc9b42..2073548b7d10 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java @@ -108,7 +108,8 @@ public HoodieLocation(HoodieLocation parent, String child) { parentUri.getAuthority(), parentPathWithSeparator, null, - parentUri.getFragment()).resolve(normalizedChild); + parentUri.getFragment()) + .resolve(new URI(null, null, normalizedChild, null, null)); this.uri = new URI( parentUri.getScheme(), parentUri.getAuthority(), diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java index 4c765d2cc3f3..7c3af8741ba0 100644 --- a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java @@ -115,6 +115,18 @@ public void testURI() throws URISyntaxException { new HoodieLocation(new HoodieLocation(new URI("foo://bar/baz#bud")), "/fud#boo").toString()); } + @Test + public void testEncoded() { + // encoded character like `%2F` should be kept as is + assertEquals(new HoodieLocation("s3://foo/bar/1%2F2%2F3"), new HoodieLocation("s3://foo/bar", "1%2F2%2F3")); + assertEquals("s3://foo/bar/1%2F2%2F3", new HoodieLocation("s3://foo/bar", "1%2F2%2F3").toString()); + assertEquals(new HoodieLocation("s3://foo/bar/1%2F2%2F3"), + new HoodieLocation(new HoodieLocation("s3://foo/bar"), "1%2F2%2F3")); + assertEquals("s3://foo/bar/1%2F2%2F3", + new HoodieLocation(new HoodieLocation("s3://foo/bar"), "1%2F2%2F3").toString()); + assertEquals("s3://foo/bar/1%2F2%2F3", new HoodieLocation("s3://foo/bar/1%2F2%2F3").toString()); + } + @Test public void testPathToUriConversion() throws URISyntaxException { assertEquals(new URI(null, null, "/foo?bar", null, null), From d17ae75aed331bd0959172af464dc9fd478eff17 Mon Sep 17 00:00:00 2001 From: Shawn Chang <42792772+CTTY@users.noreply.github.com> Date: Mon, 5 Feb 2024 19:43:50 -0800 Subject: [PATCH 100/112] [HUDI-7338] Bump HBase, Pulsar, Jetty version (#10223) Co-authored-by: Shawn Chang --- pom.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 5e3ec3b870fe..903d3a58714a 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,7 @@ ${fasterxml.spark3.version} 2.0.0 2.8.0 - 2.8.1 + 3.0.2 ${pulsar.spark.scala12.version} 2.4.5 3.1.1.4 @@ -189,9 +189,9 @@ log4j2-surefire.properties 0.12.0 4.6.7 - 9.4.48.v20220622 + 9.4.53.v20231009 3.1.0-incubating - 2.4.9 + 2.4.13 1.4.199 3.1.2 false @@ -476,6 +476,7 @@ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous org.apache.hbase.thirdparty:hbase-shaded-netty org.apache.hbase.thirdparty:hbase-shaded-protobuf + org.apache.hbase.thirdparty:hbase-unsafe org.apache.htrace:htrace-core4 com.fasterxml.jackson.module:jackson-module-afterburner From 51a364c4de4bfc521ca095069e79068b8ef29a30 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 6 Feb 2024 16:22:22 -0800 Subject: [PATCH 101/112] [HUDI-7367] Add makeQualified APIs (#10607) * [HUDI-7367] Add makeQualified APIs * Fix checkstyle --- .../org/apache/hudi/common/fs/FSUtils.java | 13 ++++++ .../apache/hudi/common/fs/TestFSUtils.java | 21 +++++++++ .../storage/hadoop/HoodieHadoopStorage.java | 6 +++ .../apache/hudi/storage/HoodieLocation.java | 45 +++++++++++++++++++ .../apache/hudi/storage/HoodieStorage.java | 9 ++++ .../hudi/io/storage/TestHoodieLocation.java | 15 +++++++ .../io/storage/TestHoodieStorageBase.java | 7 +++ 7 files changed, 116 insertions(+) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java index e3d4a43fe592..7d0b6b88bc7a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java @@ -40,6 +40,8 @@ import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; import org.apache.hudi.metadata.HoodieTableMetadata; +import org.apache.hudi.storage.HoodieLocation; +import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StorageSchemes; import org.apache.hadoop.conf.Configuration; @@ -120,6 +122,17 @@ public static Path makeQualified(FileSystem fs, Path path) { return path.makeQualified(fs.getUri(), fs.getWorkingDirectory()); } + /** + * Makes location qualified with {@link HoodieStorage}'s URI. + * + * @param storage instance of {@link HoodieStorage}. + * @param location to be qualified. + * @return qualified location, prefixed with the URI of the target HoodieStorage object provided. + */ + public static HoodieLocation makeQualified(HoodieStorage storage, HoodieLocation location) { + return location.makeQualified(storage.getUri()); + } + /** * A write token uniquely identifies an attempt at one of the IOHandle operations (Merge/Create/Append). */ diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java index 14ba96c01f46..a004c5f2b80e 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java @@ -31,6 +31,11 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; +import org.apache.hudi.hadoop.fs.HoodieWrapperFileSystem; +import org.apache.hudi.hadoop.fs.NoOpConsistencyGuard; +import org.apache.hudi.storage.HoodieLocation; +import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.hadoop.HoodieHadoopStorage; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -548,6 +553,22 @@ public void testGetFileStatusAtLevel() throws IOException { .collect(Collectors.toSet())); } + @Test + public void testMakeQualified() { + FileSystem fs = HadoopFSUtils.getFs("file:///a/b/c", new Configuration()); + FileSystem wrapperFs = new HoodieWrapperFileSystem(fs, new NoOpConsistencyGuard()); + HoodieStorage storage = new HoodieHadoopStorage(fs); + HoodieStorage wrapperStorage = new HoodieHadoopStorage(wrapperFs); + assertEquals(new HoodieLocation("file:///x/y"), + FSUtils.makeQualified(storage, new HoodieLocation("/x/y"))); + assertEquals(new HoodieLocation("file:///x/y"), + FSUtils.makeQualified(wrapperStorage, new HoodieLocation("/x/y"))); + assertEquals(new HoodieLocation("s3://x/y"), + FSUtils.makeQualified(storage, new HoodieLocation("s3://x/y"))); + assertEquals(new HoodieLocation("s3://x/y"), + FSUtils.makeQualified(wrapperStorage, new HoodieLocation("s3://x/y"))); + } + private Path getHoodieTempDir() { return new Path(baseUri.toString(), ".hoodie/.temp"); } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java index b863e97cba16..c11531aca4b2 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/storage/hadoop/HoodieHadoopStorage.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -53,6 +54,11 @@ public String getScheme() { return fs.getScheme(); } + @Override + public URI getUri() { + return fs.getUri(); + } + @Override public OutputStream create(HoodieLocation location, boolean overwrite) throws IOException { return fs.create(convertHoodieLocationToPath(location), overwrite); diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java index 2073548b7d10..8b51bd07ff94 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieLocation.java @@ -187,6 +187,51 @@ public URI toUri() { return uri; } + /** + * Returns a qualified location object. + * + * @param defaultUri if this location is missing the scheme or authority + * components, borrow them from this URI. + * @return this location if it contains a scheme and authority, or + * a new path that includes a path and authority and is fully qualified. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public HoodieLocation makeQualified(URI defaultUri) { + if (!isAbsolute()) { + throw new IllegalStateException("Only an absolute path can be made qualified"); + } + HoodieLocation location = this; + URI locationUri = location.toUri(); + + String scheme = locationUri.getScheme(); + String authority = locationUri.getAuthority(); + String fragment = locationUri.getFragment(); + + if (scheme != null && (authority != null || defaultUri.getAuthority() == null)) { + return location; + } + + if (scheme == null) { + scheme = defaultUri.getScheme(); + } + + if (authority == null) { + authority = defaultUri.getAuthority(); + if (authority == null) { + authority = ""; + } + } + + URI newUri; + try { + newUri = new URI(scheme, authority, + normalize(locationUri.getPath(), true), null, fragment); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + return new HoodieLocation(newUri); + } + @Override public String toString() { // This value could be overwritten concurrently and that's okay, since diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java index eea2c3ff692c..75d7dc28defd 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/HoodieStorage.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.URI; import java.util.ArrayList; import java.util.List; @@ -51,6 +52,14 @@ public abstract class HoodieStorage implements Closeable { @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) public abstract String getScheme(); + /** + * Returns a URI which identifies this HoodieStorage. + * + * @return the URI of this storage instance. + */ + @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING) + public abstract URI getUri(); + /** * Creates an OutputStream at the indicated location. * diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java index 7c3af8741ba0..caee807a1f60 100644 --- a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieLocation.java @@ -182,6 +182,21 @@ public void testDepth() throws URISyntaxException { assertEquals(4, new HoodieLocation(new HoodieLocation("s3://a/b/c"), "d/e").depth()); } + @Test + public void testMakeQualified() throws URISyntaxException { + URI defaultUri = new URI("hdfs://host1/dir1"); + assertEquals(new HoodieLocation("hdfs://host1/a/b/c"), + new HoodieLocation("/a/b/c").makeQualified(defaultUri)); + assertEquals(new HoodieLocation("hdfs://host2/a/b/c"), + new HoodieLocation("hdfs://host2/a/b/c").makeQualified(defaultUri)); + assertEquals(new HoodieLocation("hdfs://host1/a/b/c"), + new HoodieLocation("hdfs:/a/b/c").makeQualified(defaultUri)); + assertEquals(new HoodieLocation("s3://a/b/c"), + new HoodieLocation("s3://a/b/c/").makeQualified(defaultUri)); + assertThrows(IllegalStateException.class, + () -> new HoodieLocation("a").makeQualified(defaultUri)); + } + @Test public void testEquals() { assertEquals(new HoodieLocation("/foo"), new HoodieLocation("/foo")); diff --git a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java index 0424d22157d6..6c7fc2f4dd5b 100644 --- a/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java +++ b/hudi-io/src/test/java/org/apache/hudi/io/storage/TestHoodieStorageBase.java @@ -33,6 +33,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.Path; import java.util.Arrays; import java.util.Comparator; @@ -99,6 +101,11 @@ public void testGetScheme() { assertEquals("file", getHoodieStorage().getScheme()); } + @Test + public void testGetUri() throws URISyntaxException { + assertEquals(new URI("file:///"), getHoodieStorage().getUri()); + } + @Test public void testCreateWriteAndRead() throws IOException { HoodieStorage storage = getHoodieStorage(); From 66ac9ff92e58dbc89ee4bdc9d621816ac3d97795 Mon Sep 17 00:00:00 2001 From: Nicolas Paris Date: Thu, 8 Feb 2024 04:35:30 +0100 Subject: [PATCH 102/112] [HUDI-7351] Handle case when glue expression larger than 2048 limit (#10623) --- hudi-aws/pom.xml | 2 +- .../aws/sync/AWSGlueCatalogSyncClient.java | 9 ++++- .../aws/sync/ITTestGluePartitionPushdown.java | 36 ++++++++++++++++--- .../org/apache/hudi/hive/HiveSyncConfig.java | 3 +- 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml index 57aaf22216c5..8a86c641db8f 100644 --- a/hudi-aws/pom.xml +++ b/hudi-aws/pom.xml @@ -31,7 +31,7 @@ 1.15.0 - latest + 5.0.1 diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index ab48080be1e7..f215617ef1c7 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -113,6 +113,7 @@ public class AWSGlueCatalogSyncClient extends HoodieSyncClient { protected final GlueAsyncClient awsGlue; private static final String GLUE_PARTITION_INDEX_ENABLE = "partition_filtering.enabled"; private static final int PARTITION_INDEX_MAX_NUMBER = 3; + private static final int GLUE_EXPRESSION_MAX_CHARS = 2048; /** * athena v2/v3 table property * see https://docs.aws.amazon.com/athena/latest/ug/querying-hudi.html @@ -154,10 +155,16 @@ public List getAllPartitions(String tableName) { @Override public List getPartitionsByFilter(String tableName, String filter) { try { - return getPartitions(GetPartitionsRequest.builder() + if (filter.length() <= GLUE_EXPRESSION_MAX_CHARS) { + LOG.info("Pushdown filters: {}", filter); + return getPartitions(GetPartitionsRequest.builder() .databaseName(databaseName) .tableName(tableName) .expression(filter)); + } else { + LOG.warn("Falling back to listing all partition since expression filter length > {}", GLUE_EXPRESSION_MAX_CHARS); + return getAllPartitions(tableName); + } } catch (Exception e) { throw new HoodieGlueSyncException("Failed to get partitions for table " + tableId(databaseName, tableName) + " from expression: " + filter, e); } diff --git a/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java index 940fbfb0bf3f..b0aa34bdfce1 100644 --- a/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java +++ b/hudi-aws/src/test/java/org/apache/hudi/aws/sync/ITTestGluePartitionPushdown.java @@ -47,10 +47,12 @@ import java.io.IOException; import java.nio.file.Files; import java.time.Instant; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.ExecutionException; +import static org.apache.hudi.hive.HiveSyncConfig.HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; @@ -114,6 +116,13 @@ public void teardown() throws Exception { fileSystem.delete(new Path(tablePath), true); } + private void createPartitions(String...partitions) throws ExecutionException, InterruptedException { + glueSync.awsGlue.createPartition(CreatePartitionRequest.builder().databaseName(DB_NAME).tableName(TABLE_NAME) + .partitionInput(PartitionInput.builder() + .storageDescriptor(StorageDescriptor.builder().columns(partitionsColumn).build()) + .values(partitions).build()).build()).get(); + } + @Test public void testEmptyPartitionShouldReturnEmpty() { Assertions.assertEquals(0, glueSync.getPartitionsByFilter(TABLE_NAME, @@ -122,12 +131,29 @@ public void testEmptyPartitionShouldReturnEmpty() { @Test public void testPresentPartitionShouldReturnIt() throws ExecutionException, InterruptedException { - glueSync.awsGlue.createPartition(CreatePartitionRequest.builder().databaseName(DB_NAME).tableName(TABLE_NAME) - .partitionInput(PartitionInput.builder() - .storageDescriptor(StorageDescriptor.builder().columns(partitionsColumn).build()) - .values("1", "b'ar").build()).build()).get(); - + createPartitions("1", "b'ar"); Assertions.assertEquals(1, glueSync.getPartitionsByFilter(TABLE_NAME, glueSync.generatePushDownFilter(Arrays.asList("1/b'ar", "2/foo", "1/b''ar"), partitionsFieldSchema)).size()); } + + @Test + public void testPresentPartitionShouldReturnAllWhenExpressionFilterLengthTooLong() throws ExecutionException, InterruptedException { + createPartitions("1", "b'ar"); + + // this will generate an expression larger than GLUE_EXPRESSION_MAX_CHARS + List tooLargePartitionPredicate = new ArrayList<>(); + for (int i = 0; i < 500; i++) { + tooLargePartitionPredicate.add(i + "/foo"); + } + Assertions.assertEquals(1, glueSync.getPartitionsByFilter(TABLE_NAME, + glueSync.generatePushDownFilter(tooLargePartitionPredicate, partitionsFieldSchema)).size(), + "Should fallback to listing all existing partitions"); + + // now set the pushdown max size to a low value to transform the expression in lower/upper bound + hiveSyncProps.setProperty(HIVE_SYNC_FILTER_PUSHDOWN_MAX_SIZE.key(), "10"); + glueSync = new AWSGlueCatalogSyncClient(new HiveSyncConfig(hiveSyncProps)); + Assertions.assertEquals(0, glueSync.getPartitionsByFilter(TABLE_NAME, + glueSync.generatePushDownFilter(tooLargePartitionPredicate, partitionsFieldSchema)).size(), + "No partitions should match"); + } } diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index 73f25b1615fc..331c8906bc55 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -85,7 +85,8 @@ public class HiveSyncConfig extends HoodieSyncConfig { .defaultValue(1000) .markAdvanced() .withDocumentation("Max size limit to push down partition filters, if the estimate push down " - + "filters exceed this size, will directly try to fetch all partitions"); + + "filters exceed this size, will directly try to fetch all partitions between the min/max." + + "In case of glue metastore, this value should be reduced because it has a filter length limit."); public static String getBucketSpec(String bucketCols, int bucketNum) { return "CLUSTERED BY (" + bucketCols + " INTO " + bucketNum + " BUCKETS"; From e03a88c2778a994b8c5b6d2a8f9b7971e130cbb6 Mon Sep 17 00:00:00 2001 From: voonhous Date: Thu, 8 Feb 2024 11:41:48 +0800 Subject: [PATCH 103/112] [HUDI-7392] Fix connection leak causing lingering CLOSE_WAIT (#10636) --- .../hudi/index/bucket/ConsistentBucketIndexUtils.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java index 5b4d5cfba457..d22e4b21a5ec 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/ConsistentBucketIndexUtils.java @@ -39,6 +39,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; @@ -220,11 +221,11 @@ private static void createCommitMarker(HoodieTable table, Path fileStatus, Path * @return HoodieConsistentHashingMetadata object */ private static Option loadMetadataFromGivenFile(HoodieTable table, FileStatus metaFile) { - try { - if (metaFile == null) { - return Option.empty(); - } - byte[] content = FileIOUtils.readAsByteArray(table.getMetaClient().getFs().open(metaFile.getPath())); + if (metaFile == null) { + return Option.empty(); + } + try (InputStream is = table.getMetaClient().getFs().open(metaFile.getPath())) { + byte[] content = FileIOUtils.readAsByteArray(is); return Option.of(HoodieConsistentHashingMetadata.fromBytes(content)); } catch (FileNotFoundException e) { return Option.empty(); From 99114975a2519093382274bb6e05e98eb5ce8c24 Mon Sep 17 00:00:00 2001 From: xuzifu666 <1206332514@qq.com> Date: Thu, 8 Feb 2024 11:43:23 +0800 Subject: [PATCH 104/112] [HUDI-7387] Serializable Class need contains serialVersionUID to keep compatibility in upgrade (#10633) --- .../src/main/java/org/apache/hudi/client/BaseHoodieClient.java | 1 + .../java/org/apache/hudi/table/action/BaseActionExecutor.java | 1 + .../apache/hudi/table/action/rollback/BaseRollbackHelper.java | 1 + .../src/main/java/org/apache/hudi/schema/SchemaProvider.java | 2 ++ .../org/apache/hudi/utilities/HoodieDataTableValidator.java | 1 + .../org/apache/hudi/utilities/HoodieDropPartitionsTool.java | 1 + .../org/apache/hudi/utilities/HoodieMetadataTableValidator.java | 1 + .../java/org/apache/hudi/utilities/HoodieSnapshotCopier.java | 1 + .../org/apache/hudi/utilities/HoodieWithTimelineServer.java | 1 + .../src/main/java/org/apache/hudi/utilities/TableSizeStats.java | 1 + 10 files changed, 11 insertions(+) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java index 73bafa691d8a..8980f9044211 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java @@ -61,6 +61,7 @@ public abstract class BaseHoodieClient implements Serializable, AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(BaseHoodieClient.class); + private static final long serialVersionUID = 1L; protected final transient FileSystem fs; protected final transient HoodieEngineContext context; protected final transient Configuration hadoopConf; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java index 13d43040dd8a..c0683946b9bb 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java @@ -36,6 +36,7 @@ public abstract class BaseActionExecutor implements Serializable { + private static final long serialVersionUID = 1L; protected final transient HoodieEngineContext context; protected final transient Configuration hadoopConf; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java index a95b3a3dc5c3..94473e98d79c 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java @@ -56,6 +56,7 @@ */ public class BaseRollbackHelper implements Serializable { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(BaseRollbackHelper.class); protected static final String EMPTY_STRING = ""; diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java index 5def413b5029..eba4e51861dc 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/schema/SchemaProvider.java @@ -27,6 +27,8 @@ */ public abstract class SchemaProvider implements Serializable { + private static final long serialVersionUID = 1L; + public abstract Schema getSourceSchema(); public Schema getTargetSchema() { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java index 3f1a19421ac6..ec5387ac894f 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java @@ -95,6 +95,7 @@ */ public class HoodieDataTableValidator implements Serializable { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(HoodieDataTableValidator.class); // Spark context diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java index 1695462a30ea..ba214452356a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java @@ -103,6 +103,7 @@ */ public class HoodieDropPartitionsTool implements Serializable { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(HoodieDropPartitionsTool.class); // Spark context private final transient JavaSparkContext jsc; diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index f8607c42237d..7a536da61986 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -164,6 +164,7 @@ */ public class HoodieMetadataTableValidator implements Serializable { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(HoodieMetadataTableValidator.class); // Spark context diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java index 2ecc5d4e066d..77528599563e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieSnapshotCopier.java @@ -62,6 +62,7 @@ */ public class HoodieSnapshotCopier implements Serializable { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(HoodieSnapshotCopier.class); static class Config implements Serializable { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java index 326f56a628e0..e2c23b151532 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieWithTimelineServer.java @@ -43,6 +43,7 @@ public class HoodieWithTimelineServer implements Serializable { + private static final long serialVersionUID = 1L; private final Config cfg; public HoodieWithTimelineServer(Config cfg) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java index 4c37a5d3f9a3..813a9fa7f045 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/TableSizeStats.java @@ -95,6 +95,7 @@ */ public class TableSizeStats implements Serializable { + private static final long serialVersionUID = 1L; private static final Logger LOG = LoggerFactory.getLogger(TableSizeStats.class); // Date formatter for parsing partition dates (example: 2023/5/5/ or 2023-5-5). From 32fe3b6f542800cc6500762c75743236ac58d688 Mon Sep 17 00:00:00 2001 From: lxliyou001 <47881938+lxliyou001@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:16:32 +0800 Subject: [PATCH 105/112] [MINOR] fix typo (#10634) --- .../main/java/org/apache/hudi/common/bloom/InternalFilter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java index 6b2e46ee0777..e23255bb4b61 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/InternalFilter.java @@ -192,7 +192,7 @@ public void write(DataOutput out) throws IOException { *

    For efficiency, implementations should attempt to re-use storage in the * existing object where possible.

    * - * @param in DataInput to deseriablize this object from. + * @param in DataInput to deserialize this object from. * @throws IOException */ public void readFields(DataInput in) throws IOException { From 8436febed98d14f0d7a2bd0a83a3796364040a37 Mon Sep 17 00:00:00 2001 From: voonhous Date: Fri, 9 Feb 2024 03:05:29 +0800 Subject: [PATCH 106/112] [HUDI-7394] Fix run script of hudi-timeline-server-bundle (#10640) --- packaging/hudi-timeline-server-bundle/pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/packaging/hudi-timeline-server-bundle/pom.xml b/packaging/hudi-timeline-server-bundle/pom.xml index ff9a9712e090..f906305e0e86 100644 --- a/packaging/hudi-timeline-server-bundle/pom.xml +++ b/packaging/hudi-timeline-server-bundle/pom.xml @@ -76,6 +76,13 @@ rocksdbjni + + + org.apache.avro + avro + compile + + org.apache.hadoop @@ -192,6 +199,7 @@ commons-io:commons-io log4j:log4j org.openjdk.jol:jol-core + org.apache.avro:avro @@ -207,6 +215,10 @@ com.fasterxml.jackson. org.apache.hudi.com.fasterxml.jackson. + + org.apache.avro. + org.apache.hudi.org.apache.avro. + From 09f3fb5cefb354190eec94b763afccdebaba7d86 Mon Sep 17 00:00:00 2001 From: Jon Vexler Date: Thu, 8 Feb 2024 16:30:13 -0500 Subject: [PATCH 107/112] [HUDI-7373] revert config hoodie.write.handle.missing.cols.with.lossless.type.promotion (#10611) --------- Co-authored-by: Jonathan Vexler <=> --- .../org/apache/hudi/common/config/HoodieCommonConfig.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java index 97b2462e3eff..65fded08e521 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieCommonConfig.java @@ -77,8 +77,8 @@ public class HoodieCommonConfig extends HoodieConfig { .key("hoodie.write.set.null.for.missing.columns") .defaultValue("false") .markAdvanced() - .withAlternatives("hoodie.write.set.null.for.missing.columns") - .withDocumentation("When a non-nullable column is missing from incoming batch during a write operation, the write " + .sinceVersion("0.14.1") + .withDocumentation("When a nullable column is missing from incoming batch during a write operation, the write " + " operation will fail schema compatibility check. Set this option to true will make the missing " + " column be filled with null values to successfully complete the write operation."); From a0ebac84d5ec90876f78708fcca0361e1fc0b674 Mon Sep 17 00:00:00 2001 From: Lin Liu <141371752+linliu-code@users.noreply.github.com> Date: Sat, 10 Feb 2024 11:33:03 -0800 Subject: [PATCH 108/112] [HUDI-6902] Containerize the Azure CI (#10512) * [HUDI-6902] Containerize the Azure tests * remove warning message --- .github/workflows/bot.yml | 56 +++-- Dockerfile | 31 +++ azure-pipelines-20230430.yml | 194 +++++++++--------- .../hudi-metaserver-server/pom.xml | 49 +++-- pom.xml | 1 + 5 files changed, 192 insertions(+), 139 deletions(-) create mode 100644 Dockerfile diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index b7a08d4a9a02..6c80b621cbcd 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -20,6 +20,11 @@ on: branches: - master - 'release-*' + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: ${{ !contains(github.ref, 'master') }} + env: MVN_ARGS: -e -ntp -B -V -Dgpg.skip -Djacoco.skip -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5 SPARK_COMMON_MODULES: hudi-spark-datasource/hudi-spark,hudi-spark-datasource/hudi-spark-common @@ -35,6 +40,7 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: Check Binary Files run: ./scripts/release/validate_source_binary_files.sh - name: Check Copyright @@ -86,12 +92,13 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: Build Project env: SCALA_PROFILE: ${{ matrix.scalaProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }} run: - mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" + mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,$SPARK_COMMON_MODULES,$SPARK_MODULES" - name: Quickstart Test env: SCALA_PROFILE: ${{ matrix.scalaProfile }} @@ -105,7 +112,7 @@ jobs: SPARK_MODULES: ${{ matrix.sparkModules }} if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI run: - mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS + mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS - name: FT - Spark env: SCALA_PROFILE: ${{ matrix.scalaProfile }} @@ -143,7 +150,7 @@ jobs: SPARK_PROFILE: ${{ matrix.sparkProfile }} FLINK_PROFILE: ${{ matrix.flinkProfile }} run: - ./mvnw clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -DskipTests=true -Phudi-platform-service $MVN_ARGS -am -pl hudi-hadoop-mr,hudi-client/hudi-java-client + ./mvnw clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -DskipTests=true -Phudi-platform-service -Pthrift-gen-source-with-script $MVN_ARGS -am -pl hudi-hadoop-mr,hudi-client/hudi-java-client - name: UT - hudi-hadoop-mr and hudi-client/hudi-java-client env: SCALA_PROFILE: ${{ matrix.scalaProfile }} @@ -175,6 +182,7 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: Build Project env: SCALA_PROFILE: ${{ matrix.scalaProfile }} @@ -187,6 +195,7 @@ jobs: java-version: '17' distribution: 'adopt' architecture: x64 + cache: maven - name: Quickstart Test env: SCALA_PROFILE: ${{ matrix.scalaProfile }} @@ -228,12 +237,13 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: Build Project env: SCALA_PROFILE: 'scala-2.12' FLINK_PROFILE: ${{ matrix.flinkProfile }} run: - mvn clean install -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-examples/hudi-examples-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS + mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-examples/hudi-examples-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS - name: Quickstart Test env: SCALA_PROFILE: 'scala-2.12' @@ -246,7 +256,7 @@ jobs: FLINK_PROFILE: ${{ matrix.flinkProfile }} if: ${{ endsWith(env.FLINK_PROFILE, '1.18') }} run: | - mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS + mvn clean install -T 2 -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink $MVN_ARGS docker-java17-test: @@ -269,6 +279,7 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: UT/FT - Docker Test - OpenJDK 17 env: FLINK_PROFILE: ${{ matrix.flinkProfile }} @@ -291,19 +302,13 @@ jobs: - flinkProfile: 'flink1.18' sparkProfile: 'spark3.4' sparkRuntime: 'spark3.4.0' - - flinkProfile: 'flink1.18' - sparkProfile: 'spark3.3' - sparkRuntime: 'spark3.3.2' - flinkProfile: 'flink1.17' sparkProfile: 'spark3.3' sparkRuntime: 'spark3.3.2' - flinkProfile: 'flink1.16' - sparkProfile: 'spark3.3' - sparkRuntime: 'spark3.3.2' - - flinkProfile: 'flink1.15' sparkProfile: 'spark3.3' sparkRuntime: 'spark3.3.1' - - flinkProfile: 'flink1.14' + - flinkProfile: 'flink1.15' sparkProfile: 'spark3.2' sparkRuntime: 'spark3.2.3' - flinkProfile: 'flink1.14' @@ -323,16 +328,17 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: Build Project env: FLINK_PROFILE: ${{ matrix.flinkProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }} SCALA_PROFILE: 'scala-2.12' run: | - mvn clean package -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DdeployArtifacts=true -DskipTests=true $MVN_ARGS + mvn clean package -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DdeployArtifacts=true -DskipTests=true $MVN_ARGS -Phudi-platform-service -Pthrift-gen-source-with-script # TODO remove the sudo below. It's a needed workaround as detailed in HUDI-5708. sudo chown -R "$USER:$(id -g -n)" hudi-platform-service/hudi-metaserver/target/generated-sources - mvn clean package -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -DdeployArtifacts=true -DskipTests=true $MVN_ARGS -pl packaging/hudi-flink-bundle -am -Davro.version=1.10.0 + mvn clean package -T 2 -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -DdeployArtifacts=true -DskipTests=true $MVN_ARGS -pl packaging/hudi-flink-bundle -am -Davro.version=1.10.0 -Phudi-platform-service -Pthrift-gen-source-with-script - name: IT - Bundle Validation - OpenJDK 8 env: FLINK_PROFILE: ${{ matrix.flinkProfile }} @@ -371,18 +377,30 @@ jobs: strategy: matrix: include: - - flinkProfile: 'flink1.16' + - flinkProfile: 'flink1.18' sparkProfile: 'spark3' + sparkRuntime: 'spark3.5.0' + - flinkProfile: 'flink1.18' + sparkProfile: 'spark3.5' + sparkRuntime: 'spark3.5.0' + - flinkProfile: 'flink1.18' + sparkProfile: 'spark3.4' + sparkRuntime: 'spark3.4.0' + - flinkProfile: 'flink1.17' + sparkProfile: 'spark3.3' sparkRuntime: 'spark3.3.2' - - flinkProfile: 'flink1.15' + - flinkProfile: 'flink1.16' sparkProfile: 'spark3.3' sparkRuntime: 'spark3.3.1' - - flinkProfile: 'flink1.14' + - flinkProfile: 'flink1.15' sparkProfile: 'spark3.2' sparkRuntime: 'spark3.2.3' - flinkProfile: 'flink1.14' sparkProfile: 'spark3.1' sparkRuntime: 'spark3.1.3' + - flinkProfile: 'flink1.14' + sparkProfile: 'spark3.0' + sparkRuntime: 'spark3.0.2' - flinkProfile: 'flink1.14' sparkProfile: 'spark' sparkRuntime: 'spark2.4.8' @@ -397,6 +415,7 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: IT - Bundle Validation - OpenJDK 8 env: FLINK_PROFILE: ${{ matrix.flinkProfile }} @@ -436,12 +455,13 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 + cache: maven - name: Build Project env: SPARK_PROFILE: ${{ matrix.sparkProfile }} SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11' run: - mvn clean install $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -DskipTests=true $MVN_ARGS + mvn clean install -T 2 $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -DskipTests=true $MVN_ARGS - name: 'UT integ-test' env: SPARK_PROFILE: ${{ matrix.sparkProfile }} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..f8d038771435 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Use a home made image as the base, which includes: +# utuntu:latest +# git +# thrift +# maven +# java8 +# Use an official Ubuntu base image +FROM apachehudi/hudi-ci-bundle-validation-base:azure_ci_test_base_new + +CMD ["java", "-version"] + +# Set the working directory to /app +WORKDIR /hudi + +# Copy git repo into the working directory +COPY . /hudi \ No newline at end of file diff --git a/azure-pipelines-20230430.yml b/azure-pipelines-20230430.yml index e834d5f75217..559686a2292f 100644 --- a/azure-pipelines-20230430.yml +++ b/azure-pipelines-20230430.yml @@ -41,6 +41,7 @@ parameters: type: object default: - 'hudi-client/hudi-spark-client' + - 'hudi-spark-datasource/hudi-spark' - name: job3UTModules type: object default: @@ -92,11 +93,12 @@ parameters: - '!hudi-flink-datasource/hudi-flink1.16.x' - '!hudi-flink-datasource/hudi-flink1.17.x' - '!hudi-flink-datasource/hudi-flink1.18.x' + - '!hudi-spark-datasource/hudi-spark' variables: BUILD_PROFILES: '-Dscala-2.12 -Dspark3.2 -Dflink1.18' PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true -ntp -B -V -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn' - MVN_OPTS_INSTALL: '-Phudi-platform-service -DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS) -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5' + MVN_OPTS_INSTALL: '-DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS) -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=5' MVN_OPTS_TEST: '-fae -Pwarn-log $(BUILD_PROFILES) $(PLUGIN_OPTS)' JOB1_MODULES: ${{ join(',',parameters.job1Modules) }} JOB2_MODULES: ${{ join(',',parameters.job2Modules) }} @@ -106,128 +108,120 @@ variables: stages: - stage: test + variables: + - name: DOCKER_BUILDKIT + value: 1 jobs: - job: UT_FT_1 displayName: UT FT common & flink & UT client/spark-client timeoutInMinutes: '150' steps: - - task: Maven@4 - displayName: maven install + - task: Docker@2 + displayName: "login to docker" inputs: - mavenPomFile: 'pom.xml' - goals: 'clean install' - options: $(MVN_OPTS_INSTALL) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - - task: Maven@4 - displayName: UT common flink client/spark-client + command: "login" + containerRegistry: "apachehudi-docker-hub" + - task: Docker@2 + displayName: "load repo into image" inputs: - mavenPomFile: 'pom.xml' - goals: 'test' - options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_MODULES),hudi-client/hudi-spark-client - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - mavenOptions: '-Xmx4g' - - task: Maven@4 - displayName: FT common flink + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'build' + Dockerfile: '**/Dockerfile' + ImageName: $(Build.BuildId) + - task: Docker@2 + displayName: "UT FT common flink client/spark-client" inputs: - mavenPomFile: 'pom.xml' - goals: 'test' - options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB1_MODULES) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - mavenOptions: '-Xmx4g' - - script: | - grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100 - displayName: Top 100 long-running testcases + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'run' + arguments: > + -i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId) + /bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) + && mvn test $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB1_MODULES),hudi-client/hudi-spark-client + && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB1_MODULES) + && grep \"testcase\" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'\"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100" - job: UT_FT_2 - displayName: FT client/spark-client + displayName: FT client/spark-client & hudi-spark-datasource/hudi-spark timeoutInMinutes: '150' steps: - - task: Maven@4 - displayName: maven install + - task: Docker@2 + displayName: "login to docker" inputs: - mavenPomFile: 'pom.xml' - goals: 'clean install' - options: $(MVN_OPTS_INSTALL) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - - task: Maven@4 - displayName: FT client/spark-client + command: "login" + containerRegistry: "apachehudi-docker-hub" + - task: Docker@2 + displayName: "load repo into image" inputs: - mavenPomFile: 'pom.xml' - goals: 'test' - options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB2_MODULES) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - mavenOptions: '-Xmx4g' - - script: | - grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100 - displayName: Top 100 long-running testcases + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'build' + Dockerfile: '**/Dockerfile' + ImageName: $(Build.BuildId) + - task: Docker@2 + displayName: "FT client/spark-client & hudi-spark-datasource/hudi-spark" + inputs: + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'run' + arguments: > + -i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId) + /bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) + && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB2_MODULES) + && grep \"testcase\" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'\"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100" - job: UT_FT_3 displayName: UT spark-datasource timeoutInMinutes: '240' steps: - - task: Maven@4 - displayName: maven install + - task: Docker@2 + displayName: "login to docker" + inputs: + command: "login" + containerRegistry: "apachehudi-docker-hub" + - task: Docker@2 + displayName: "load repo into image" inputs: - mavenPomFile: 'pom.xml' - goals: 'clean install' - options: $(MVN_OPTS_INSTALL) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - - task: Maven@4 - displayName: UT spark-datasource + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'build' + Dockerfile: '**/Dockerfile' + ImageName: $(Build.BuildId) + - task: Docker@2 + displayName: "UT spark-datasource" inputs: - mavenPomFile: 'pom.xml' - goals: 'test' - options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB3_MODULES) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - mavenOptions: '-Xmx4g' - - script: | - grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100 - displayName: Top 100 long-running testcases + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'run' + arguments: > + -i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId) + /bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) && mvn test $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB3_MODULES) + && grep \"testcase\" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'\"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100" - job: UT_FT_4 displayName: UT FT other modules timeoutInMinutes: '240' steps: - - task: Maven@4 - displayName: maven install + - task: Docker@2 + displayName: "login to docker hub" inputs: - mavenPomFile: 'pom.xml' - goals: 'clean install' - options: $(MVN_OPTS_INSTALL) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - - task: Maven@4 - displayName: UT other modules + command: "login" + containerRegistry: "apachehudi-docker-hub" + - task: Docker@2 + displayName: "load repo into image" inputs: - mavenPomFile: 'pom.xml' - goals: 'test' - options: $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB4_UT_MODULES) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - mavenOptions: '-Xmx4g' - - task: Maven@4 - displayName: FT other modules + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'build' + Dockerfile: '**/Dockerfile' + ImageName: $(Build.BuildId) + - task: Docker@2 + displayName: "UT FT other modules" inputs: - mavenPomFile: 'pom.xml' - goals: 'test' - options: $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB4_FT_MODULES) - publishJUnitResults: true - testResultsFiles: '**/surefire-reports/TEST-*.xml' - jdkVersionOption: '1.8' - mavenOptions: '-Xmx4g' - - script: | - grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100 - displayName: Top 100 long-running testcases + containerRegistry: 'apachehudi-docker-hub' + repository: 'apachehudi/hudi-ci-bundle-validation-base' + command: 'run' + arguments: > + -i docker.io/apachehudi/hudi-ci-bundle-validation-base:$(Build.BuildId) + /bin/bash -c "mvn clean install $(MVN_OPTS_INSTALL) -Phudi-platform-service -Pthrift-gen-source + && mvn test $(MVN_OPTS_TEST) -Punit-tests -pl $(JOB4_UT_MODULES) + && mvn test $(MVN_OPTS_TEST) -Pfunctional-tests -pl $(JOB4_UT_MODULES) + && grep \"testcase\" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'\"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100" \ No newline at end of file diff --git a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml index 10ac5be853a0..5df5a2346d9b 100644 --- a/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml +++ b/hudi-platform-service/hudi-metaserver/hudi-metaserver-server/pom.xml @@ -92,6 +92,34 @@ + + thrift-gen-source-with-script + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + thrift-install-and-generate-source + generate-sources + + exec + + + + + ${project.parent.basedir}/src/main/thrift/bin/thrift_binary.sh + + ${thrift.install.env} + + false + + + + + m1-mac @@ -108,27 +136,6 @@ - - org.codehaus.mojo - exec-maven-plugin - 1.6.0 - - - thrift-install-and-generate-source - generate-sources - - exec - - - - - ${project.parent.basedir}/src/main/thrift/bin/thrift_binary.sh - - ${thrift.install.env} - - false - - org.jacoco jacoco-maven-plugin diff --git a/pom.xml b/pom.xml index 903d3a58714a..0a02a1589204 100644 --- a/pom.xml +++ b/pom.xml @@ -232,6 +232,7 @@ 2.7.3 2.1.1 1.1.8.3 + /usr/local/bin/thrift From 40cc538398d7495556d3468e2d7fd2a32b4de01a Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 26 May 2024 18:07:06 -0700 Subject: [PATCH 109/112] [HUDI-7707] Enable bundle validation on Java 8 and 11 --- .github/workflows/bot.yml | 8 +++++--- packaging/bundle-validation/ci_run.sh | 9 +++++---- packaging/bundle-validation/validate.sh | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 6c80b621cbcd..82e012caea1c 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -342,21 +342,23 @@ jobs: - name: IT - Bundle Validation - OpenJDK 8 env: FLINK_PROFILE: ${{ matrix.flinkProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} SPARK_RUNTIME: ${{ matrix.sparkRuntime }} SCALA_PROFILE: 'scala-2.12' if: ${{ env.SPARK_PROFILE >= 'spark3' }} # Only run validation on Spark 3 run: | HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) - ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk8 + ./packaging/bundle-validation/ci_run.sh hudi_docker_java8 $HUDI_VERSION openjdk8 - name: IT - Bundle Validation - OpenJDK 11 env: FLINK_PROFILE: ${{ matrix.flinkProfile }} + SPARK_PROFILE: ${{ matrix.sparkProfile }} SPARK_RUNTIME: ${{ matrix.sparkRuntime }} SCALA_PROFILE: 'scala-2.12' if: ${{ env.SPARK_PROFILE >= 'spark3' }} # Only run validation on Spark 3 run: | HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) - ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk11 + ./packaging/bundle-validation/ci_run.sh hudi_docker_java11 $HUDI_VERSION openjdk11 - name: IT - Bundle Validation - OpenJDK 17 env: FLINK_PROFILE: ${{ matrix.flinkProfile }} @@ -366,7 +368,7 @@ jobs: if: ${{ env.SPARK_PROFILE >= 'spark3.3' }} # Only Spark 3.3 and above support Java 17 run: | HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) - ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk17 + ./packaging/bundle-validation/ci_run.sh hudi_docker_java17 $HUDI_VERSION openjdk17 validate-release-candidate-bundles: if: false diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index 6b80ab7078d8..56ce62d3a649 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -27,9 +27,10 @@ # This is to run by GitHub Actions CI tasks from the project root directory # and it contains the CI environment-specific variables. -HUDI_VERSION=$1 -JAVA_RUNTIME_VERSION=$2 -STAGING_REPO_NUM=$3 +CONTAINER_NAME=$1 +HUDI_VERSION=$2 +JAVA_RUNTIME_VERSION=$3 +STAGING_REPO_NUM=$4 echo "HUDI_VERSION: $HUDI_VERSION JAVA_RUNTIME_VERSION: $JAVA_RUNTIME_VERSION" echo "SPARK_RUNTIME: $SPARK_RUNTIME SPARK_PROFILE (optional): $SPARK_PROFILE" @@ -210,7 +211,7 @@ docker build \ . # run validation script in docker -docker run --name hudi_docker \ +docker run --name $CONTAINER_NAME \ -v ${GITHUB_WORKSPACE}:/opt/bundle-validation/docker-test \ -v $TMP_JARS_DIR:/opt/bundle-validation/jars \ -v $TMP_DATA_DIR:/opt/bundle-validation/data \ diff --git a/packaging/bundle-validation/validate.sh b/packaging/bundle-validation/validate.sh index 75d4227c74a3..b7c2643a780e 100755 --- a/packaging/bundle-validation/validate.sh +++ b/packaging/bundle-validation/validate.sh @@ -295,7 +295,7 @@ if [ "$?" -ne 0 ]; then fi echo "::warning::validate.sh done validating utilities slim bundle" -if [[ ${JAVA_RUNTIME_VERSION} == 'openjdk8' ]]; then +if [[ ${JAVA_RUNTIME_VERSION} == 'openjdk8' && ! "${FLINK_HOME}" == *"1.18"* ]]; then echo "::warning::validate.sh validating flink bundle" test_flink_bundle if [ "$?" -ne 0 ]; then From 287e4f3fdc1f27e958539079510b37e8ad2e038a Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 26 May 2024 21:43:17 -0700 Subject: [PATCH 110/112] Bundle validation only --- .github/workflows/bot.yml | 316 -------------------------------------- 1 file changed, 316 deletions(-) diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 82e012caea1c..8ba091b79621 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -51,174 +51,6 @@ jobs: - name: RAT check run: ./scripts/release/validate_source_rat.sh - test-spark: - runs-on: ubuntu-latest - strategy: - matrix: - include: - - scalaProfile: "scala-2.11" - sparkProfile: "spark2.4" - sparkModules: "hudi-spark-datasource/hudi-spark2" - - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.0" - sparkModules: "hudi-spark-datasource/hudi-spark3.0.x" - - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.1" - sparkModules: "hudi-spark-datasource/hudi-spark3.1.x" - - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.2" - sparkModules: "hudi-spark-datasource/hudi-spark3.2.x" - - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.3" - sparkModules: "hudi-spark-datasource/hudi-spark3.3.x" - - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.4" - sparkModules: "hudi-spark-datasource/hudi-spark3.4.x" - - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.5" - sparkModules: "hudi-spark-datasource/hudi-spark3.5.x" - - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: Build Project - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - run: - mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,$SPARK_COMMON_MODULES,$SPARK_MODULES" - - name: Quickstart Test - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - run: - mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS - - name: UT - Common & Spark - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_MODULES: ${{ matrix.sparkModules }} - if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI - run: - mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS - - name: FT - Spark - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_MODULES: ${{ matrix.sparkModules }} - if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI - run: - mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS - - test-hudi-hadoop-mr-and-hudi-java-client: - runs-on: ubuntu-latest - timeout-minutes: 40 - strategy: - matrix: - include: - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.2" - flinkProfile: "flink1.18" - - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: Generate Maven Wrapper - run: - mvn -N io.takari:maven:wrapper - - name: Build Project - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - FLINK_PROFILE: ${{ matrix.flinkProfile }} - run: - ./mvnw clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -DskipTests=true -Phudi-platform-service -Pthrift-gen-source-with-script $MVN_ARGS -am -pl hudi-hadoop-mr,hudi-client/hudi-java-client - - name: UT - hudi-hadoop-mr and hudi-client/hudi-java-client - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - FLINK_PROFILE: ${{ matrix.flinkProfile }} - run: - ./mvnw test -Punit-tests -fae -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"FLINK_PROFILE" -pl hudi-hadoop-mr,hudi-client/hudi-java-client $MVN_ARGS - - test-spark-java17: - runs-on: ubuntu-latest - strategy: - matrix: - include: - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.3" - sparkModules: "hudi-spark-datasource/hudi-spark3.3.x" - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.4" - sparkModules: "hudi-spark-datasource/hudi-spark3.4.x" - - scalaProfile: "scala-2.12" - sparkProfile: "spark3.5" - sparkModules: "hudi-spark-datasource/hudi-spark3.5.x" - - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: Build Project - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - run: - mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - java-version: '17' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: Quickstart Test - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - run: - mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS - - name: UT - Common & Spark - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_MODULES: ${{ matrix.sparkModules }} - if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI - run: - mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS - - name: FT - Spark - env: - SCALA_PROFILE: ${{ matrix.scalaProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_MODULES: ${{ matrix.sparkModules }} - if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 as it's covered by Azure CI - run: - mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS - test-flink: runs-on: ubuntu-latest strategy: @@ -259,38 +91,6 @@ jobs: mvn clean install -T 2 -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink -am -Davro.version=1.10.0 -DskipTests=true $MVN_ARGS mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$FLINK_PROFILE" -pl hudi-flink-datasource/hudi-flink $MVN_ARGS - docker-java17-test: - runs-on: ubuntu-latest - strategy: - matrix: - include: - - flinkProfile: 'flink1.18' - sparkProfile: 'spark3.5' - sparkRuntime: 'spark3.5.0' - - flinkProfile: 'flink1.18' - sparkProfile: 'spark3.4' - sparkRuntime: 'spark3.4.0' - - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: UT/FT - Docker Test - OpenJDK 17 - env: - FLINK_PROFILE: ${{ matrix.flinkProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_RUNTIME: ${{ matrix.sparkRuntime }} - SCALA_PROFILE: 'scala-2.12' - if: ${{ env.SPARK_PROFILE >= 'spark3.4' }} # Only support Spark 3.4 for now - run: | - HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) - ./packaging/bundle-validation/run_docker_java17.sh - validate-bundles: runs-on: ubuntu-latest strategy: @@ -369,119 +169,3 @@ jobs: run: | HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) ./packaging/bundle-validation/ci_run.sh hudi_docker_java17 $HUDI_VERSION openjdk17 - - validate-release-candidate-bundles: - if: false - runs-on: ubuntu-latest - env: - HUDI_VERSION: 0.13.1-rcx - STAGING_REPO_NUM: 1123 - strategy: - matrix: - include: - - flinkProfile: 'flink1.18' - sparkProfile: 'spark3' - sparkRuntime: 'spark3.5.0' - - flinkProfile: 'flink1.18' - sparkProfile: 'spark3.5' - sparkRuntime: 'spark3.5.0' - - flinkProfile: 'flink1.18' - sparkProfile: 'spark3.4' - sparkRuntime: 'spark3.4.0' - - flinkProfile: 'flink1.17' - sparkProfile: 'spark3.3' - sparkRuntime: 'spark3.3.2' - - flinkProfile: 'flink1.16' - sparkProfile: 'spark3.3' - sparkRuntime: 'spark3.3.1' - - flinkProfile: 'flink1.15' - sparkProfile: 'spark3.2' - sparkRuntime: 'spark3.2.3' - - flinkProfile: 'flink1.14' - sparkProfile: 'spark3.1' - sparkRuntime: 'spark3.1.3' - - flinkProfile: 'flink1.14' - sparkProfile: 'spark3.0' - sparkRuntime: 'spark3.0.2' - - flinkProfile: 'flink1.14' - sparkProfile: 'spark' - sparkRuntime: 'spark2.4.8' - - flinkProfile: 'flink1.14' - sparkProfile: 'spark2.4' - sparkRuntime: 'spark2.4.8' - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: IT - Bundle Validation - OpenJDK 8 - env: - FLINK_PROFILE: ${{ matrix.flinkProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_RUNTIME: ${{ matrix.sparkRuntime }} - run: | - ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk8 $STAGING_REPO_NUM - - name: IT - Bundle Validation - OpenJDK 11 - env: - FLINK_PROFILE: ${{ matrix.flinkProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_RUNTIME: ${{ matrix.sparkRuntime }} - if: ${{ startsWith(env.SPARK_PROFILE, 'spark3') }} # Only Spark 3.x supports Java 11 as of now - run: | - ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk11 $STAGING_REPO_NUM - - name: IT - Bundle Validation - OpenJDK 17 - env: - FLINK_PROFILE: ${{ matrix.flinkProfile }} - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_RUNTIME: ${{ matrix.sparkRuntime }} - if: ${{ endsWith(env.SPARK_PROFILE, '3.3') }} # Only Spark 3.3 supports Java 17 as of now - run: | - ./packaging/bundle-validation/ci_run.sh $HUDI_VERSION openjdk17 $STAGING_REPO_NUM - - integration-tests: - runs-on: ubuntu-latest - strategy: - matrix: - include: - - sparkProfile: 'spark2.4' - sparkArchive: 'spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz' - steps: - - uses: actions/checkout@v3 - - name: Set up JDK 8 - uses: actions/setup-java@v3 - with: - java-version: '8' - distribution: 'adopt' - architecture: x64 - cache: maven - - name: Build Project - env: - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11' - run: - mvn clean install -T 2 $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -DskipTests=true $MVN_ARGS - - name: 'UT integ-test' - env: - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11' - run: - mvn test $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -DskipUTs=false -DskipITs=true -pl hudi-integ-test $MVN_ARGS - - name: 'IT' - env: - SPARK_PROFILE: ${{ matrix.sparkProfile }} - SPARK_ARCHIVE: ${{ matrix.sparkArchive }} - SCALA_PROFILE: '-Dscala-2.11 -Dscala.binary.version=2.11' - run: | - echo "Downloading $SPARK_ARCHIVE" - curl https://archive.apache.org/dist/spark/$SPARK_ARCHIVE --create-dirs -o $GITHUB_WORKSPACE/$SPARK_ARCHIVE - tar -xvf $GITHUB_WORKSPACE/$SPARK_ARCHIVE -C $GITHUB_WORKSPACE/ - mkdir /tmp/spark-events/ - SPARK_ARCHIVE_BASENAME=$(basename $SPARK_ARCHIVE) - export SPARK_HOME=$GITHUB_WORKSPACE/${SPARK_ARCHIVE_BASENAME%.*} - rm -f $GITHUB_WORKSPACE/$SPARK_ARCHIVE - docker system prune --all --force - mvn verify $SCALA_PROFILE -D"$SPARK_PROFILE" -Pintegration-tests -pl !hudi-flink-datasource/hudi-flink $MVN_ARGS From e880168678ce63dd9558de01b5adedd0d21599b5 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Sun, 26 May 2024 22:14:28 -0700 Subject: [PATCH 111/112] Fix ci_run.sh --- packaging/bundle-validation/ci_run.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packaging/bundle-validation/ci_run.sh b/packaging/bundle-validation/ci_run.sh index 56ce62d3a649..8fb2fd916677 100755 --- a/packaging/bundle-validation/ci_run.sh +++ b/packaging/bundle-validation/ci_run.sh @@ -121,7 +121,8 @@ fi TMP_JARS_DIR=/tmp/jars/$(date +%s) mkdir -p $TMP_JARS_DIR -if [[ "$HUDI_VERSION" == *"SNAPSHOT" ]]; then +if [[ -z "$STAGING_REPO_NUM" ]]; then + echo 'Adding built bundle jars for validation' cp ${GITHUB_WORKSPACE}/packaging/hudi-flink-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-hadoop-mr-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ cp ${GITHUB_WORKSPACE}/packaging/hudi-kafka-connect-bundle/target/hudi-*-$HUDI_VERSION.jar $TMP_JARS_DIR/ @@ -145,6 +146,10 @@ else HUDI_SPARK_BUNDLE_NAME=hudi-spark2.4-bundle_2.11 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.11 HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.11 + elif [[ ${SPARK_PROFILE} == 'spark3.0' ]]; then + HUDI_SPARK_BUNDLE_NAME=hudi-spark3.0-bundle_2.12 + HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12 + HUDI_UTILITIES_SLIM_BUNDLE_NAME=hudi-utilities-slim-bundle_2.12 elif [[ ${SPARK_PROFILE} == 'spark3.1' ]]; then HUDI_SPARK_BUNDLE_NAME=hudi-spark3.1-bundle_2.12 HUDI_UTILITIES_BUNDLE_NAME=hudi-utilities-bundle_2.12 From b36297b0c652483e749f07295ff3de8815cbe9a1 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Mon, 27 May 2024 01:30:02 -0700 Subject: [PATCH 112/112] Remove maven cache --- .github/workflows/bot.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml index 8ba091b79621..4f0f38d31d94 100644 --- a/.github/workflows/bot.yml +++ b/.github/workflows/bot.yml @@ -128,7 +128,6 @@ jobs: java-version: '8' distribution: 'adopt' architecture: x64 - cache: maven - name: Build Project env: FLINK_PROFILE: ${{ matrix.flinkProfile }}