diff --git a/confluent-kafka-plugins/pom.xml b/confluent-kafka-plugins/pom.xml
index 8fd5d47..683513f 100644
--- a/confluent-kafka-plugins/pom.xml
+++ b/confluent-kafka-plugins/pom.xml
@@ -56,6 +56,13 @@
org.apache.avro
avro
${avro.version}
+
+
+
+ com.thoughtworks.paranamer
+ paranamer
+
+
io.confluent
@@ -69,7 +76,7 @@
org.apache.kafka
- kafka_2.11
+ kafka_2.12
${kafka10.version}
@@ -84,16 +91,16 @@
org.apache.spark
- spark-streaming-kafka-0-10_2.11
- ${spark2.version}
+ spark-streaming-kafka-0-10_2.12
+ ${spark3.version}
org.apache.kafka
- kafka_2.11
+ kafka_2.12
org.apache.spark
- spark-tags_2.11
+ spark-tags_2.12
net.jpountz.lz4
@@ -103,8 +110,8 @@
org.apache.spark
- spark-mllib_2.11
- ${spark2.version}
+ spark-mllib_2.12
+ ${spark3.version}
provided
@@ -115,14 +122,14 @@
org.apache.spark
- spark-streaming_2.11
- ${spark2.version}
+ spark-streaming_2.12
+ ${spark3.version}
provided
org.apache.spark
- spark-core_2.11
- ${spark2.version}
+ spark-core_2.12
+ ${spark3.version}
provided
@@ -173,19 +180,19 @@
io.cdap.cdap
- cdap-spark-core2_2.11
+ cdap-spark-core3_2.12
${cdap.version}
test
io.cdap.cdap
- cdap-data-pipeline2_2.11
+ cdap-data-pipeline3_2.12
${cdap.version}
test
io.cdap.cdap
- cdap-data-streams2_2.11
+ cdap-data-streams3_2.12
${cdap.version}
test
@@ -221,19 +228,44 @@
+
+ net.alchim31.maven
+ scala-maven-plugin
+ 3.3.1
+
+
+ compile
+
+ compile
+
+ compile
+
+
+ test-compile
+
+ testCompile
+
+ test-compile
+
+
+ process-resources
+
+ compile
+
+
+
+
org.apache.felix
maven-bundle-plugin
- 3.3.0
+ 3.5.1
<_exportcontents>
io.cdap.plugin.confluent.*;
org.apache.spark.streaming.kafka010.*;
- org.apache.kafka.common.*;
- org.apache.kafka.common.serialization.*;
+ org.apache.kafka.*;
io.confluent.kafka.serializers.*;
- org.apache.kafka.clients.*;
*;inline=false;scope=compile
true
@@ -255,8 +287,8 @@
1.1.0
- system:cdap-data-pipeline[6.1.0-SNAPSHOT,7.0.0-SNAPSHOT)
- system:cdap-data-streams[6.1.0-SNAPSHOT,7.0.0-SNAPSHOT)
+ system:cdap-data-pipeline[6.8.0,7.0.0-SNAPSHOT)
+ system:cdap-data-streams[6.8.0,7.0.0-SNAPSHOT)
diff --git a/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSource.java b/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSource.java
index 8f4aed6..86068ec 100644
--- a/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSource.java
+++ b/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSource.java
@@ -27,11 +27,14 @@
import io.cdap.cdap.etl.api.StageConfigurer;
import io.cdap.cdap.etl.api.streaming.StreamingContext;
import io.cdap.cdap.etl.api.streaming.StreamingSource;
+import io.cdap.cdap.etl.api.streaming.StreamingStateHandler;
import io.cdap.plugin.common.Constants;
import io.confluent.kafka.schemaregistry.client.CachedSchemaRegistryClient;
import io.confluent.kafka.schemaregistry.client.SchemaMetadata;
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
import org.apache.spark.streaming.api.java.JavaDStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
@@ -45,7 +48,9 @@
@Plugin(type = StreamingSource.PLUGIN_TYPE)
@Name(ConfluentStreamingSource.PLUGIN_NAME)
@Description("Confluent Kafka streaming source.")
-public class ConfluentStreamingSource extends StreamingSource {
+public class ConfluentStreamingSource extends StreamingSource implements StreamingStateHandler {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ConfluentStreamingSource.class);
public static final String PLUGIN_NAME = "Confluent";
private final ConfluentStreamingSourceConfig conf;
@@ -79,6 +84,7 @@ public JavaDStream getStream(StreamingContext context) throws
collector.getOrThrowException();
context.registerLineage(conf.referenceName);
+
return ConfluentStreamingSourceUtil.getStructuredRecordJavaDStream(context, conf, outputSchema, collector);
}
diff --git a/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSourceUtil.java b/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSourceUtil.java
index ccfcd9e..0f97063 100644
--- a/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSourceUtil.java
+++ b/confluent-kafka-plugins/src/main/java/io/cdap/plugin/confluent/streaming/source/ConfluentStreamingSourceUtil.java
@@ -20,6 +20,7 @@
import com.google.common.base.Strings;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
+import com.google.gson.Gson;
import io.cdap.cdap.api.data.format.FormatSpecification;
import io.cdap.cdap.api.data.format.RecordFormat;
import io.cdap.cdap.api.data.format.StructuredRecord;
@@ -28,7 +29,9 @@
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.cdap.etl.api.streaming.StreamingContext;
import io.cdap.cdap.format.RecordFormats;
+import io.cdap.plugin.batch.source.KafkaPartitionOffsets;
import io.cdap.plugin.confluent.common.KafkaHelpers;
+import io.cdap.plugin.confluent.source.ConfluentDStream;
import io.cdap.plugin.format.avro.AvroToStructuredTransformer;
import io.confluent.kafka.serializers.KafkaAvroDeserializer;
import org.apache.avro.generic.GenericRecord;
@@ -46,24 +49,35 @@
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.Time;
import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.kafka010.OffsetRange;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Properties;
import java.util.Set;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
import javax.annotation.Nonnull;
/**
@@ -74,48 +88,124 @@
*/
final class ConfluentStreamingSourceUtil {
private static final Logger LOG = LoggerFactory.getLogger(ConfluentStreamingSourceUtil.class);
+ private static final Gson gson = new Gson();
private ConfluentStreamingSourceUtil() {
// no-op
}
/**
- * Returns {@link JavaDStream} for {@link ConfluentStreamingSource}.
- * @param context streaming context
+ * Returns {@link JavaInputDStream} for {@link ConfluentStreamingSource}.
+ *
+ * @param context streaming context
* @param conf kafka conf
- * @param outputSchema source output schema
* @param collector failure collector
+ * @param stateSupplier state supplier
*/
- static JavaDStream getStructuredRecordJavaDStream(
- StreamingContext context, ConfluentStreamingSourceConfig conf, Schema outputSchema, FailureCollector collector) {
+ static JavaInputDStream> getConsumerRecordJavaDStream(
+ StreamingContext context, ConfluentStreamingSourceConfig conf, FailureCollector collector,
+ Supplier