-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add GCP Pub Sub Source Connector, FromJSON SMT
The Google Cloud Pub/Sub Source Connector can be used to ingest data from a topic/subscription. The connector knows which GCP subscription to listen for messages on using the following configuration properties: - `gcp.pubsub.project.id` - `gcp.pubsub.topic.id` - `gcp.pubsub.subscription.id` The GCP private key credentials are loaded from the file located on the Docker container file system and can be downloaded from the GCP console: - `gcp.pubsub.credentials.path` Alternatively, the JSON file contents can be inlined: - `gcp.pubsub.credentials.json` The full list of connector configuration properties can be found at - https://docs.confluent.io/kafka-connectors/gcp-pubsub/current/configuration_options.html#pubsub-source-connector-config --- The scenario covered in this connector expects a JSON string to be published to the GCP Topic. In order to convert that JSON object to a Kafka Connect Struct the `FromJSON` SMT is being used: - `com.github.jcustenborder.kafka.connect.json.FromJson$Value` The `FromJson` SMT requires a JSON schema to enable it to map the JSON properties to the Struct fields. This is done by referencing a json file in the Docker container file system: - `"transforms.fromJson.json.schema.location": "Url"` - `"transforms.fromJson.json.schema.url": "file:///schemas/FromJson-pub-sub-schema.json"` This JSON schema can also be inlined: - `"transforms.fromJson.json.schema.location": "Inline"` - `json.schema.inline` The payload from GCP is stored in the `MessageData` field and extracted. --- The target Kafka Topic has a JSON schema applied to the value subject to ensure only valid data is produced to the topic, we use this converter to perform the JSON schema validation: - `"value.converter": "io.confluent.connect.json.JsonSchemaConverter"` If the JSON object in the value of the record does not conform to the JSON schema, the record will be rejected and an error with the JSON validation details will be logged. --- We use ValueToKey/ExtractField$Key SMTs to extract the OrderNumber and use it as the Kafka record key: - https://docs.confluent.io/platform/current/connect/transforms/valuetokey.html See more at: - https://www.confluent.io/hub/confluentinc/kafka-connect-gcp-pubsub - https://github.com/jcustenborder/kafka-connect-json-schema - https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-json-schema/transformations/FromJson.html
- Loading branch information
Showing
6 changed files
with
158 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
connect-connector-configs/gcp-pubsub-source-connector.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"name": "gcp-pubsub-source-connector", | ||
"config": { | ||
"connector.class": "io.confluent.connect.gcp.pubsub.PubSubSourceConnector", | ||
"tasks.max": "1", | ||
"confluent.topic.bootstrap.servers": "broker:29092", | ||
"gcp.pubsub.project.id": "be-mygwtproject", | ||
"gcp.pubsub.topic.id": "kafka-connect-topic", | ||
"gcp.pubsub.subscription.id": "kafka-connect-topic-sub", | ||
"gcp.pubsub.credentials.path": "/credentials/gcp-pubsub-credentials.json", | ||
"gcp.pubsub.data.format": "utf_8", | ||
"kafka.topic": "pub-sub-topic", | ||
"key.converter": "org.apache.kafka.connect.storage.StringConverter", | ||
"value.converter": "io.confluent.connect.json.JsonSchemaConverter", | ||
"value.converter.schemas.enable": "false", | ||
"value.converter.schema.registry.url": "http://schema-registry:8081", | ||
"value.converter.auto.register.schemas": "false", | ||
"value.converter.use.latest.version": "true", | ||
"value.converter.latest.compatibility.strict": "false", | ||
"value.converter.json.fail.invalid.schema": "true", | ||
"transforms": "extractValue,fromJson,valueToKey,extractKey", | ||
"transforms.extractValue.type": "org.apache.kafka.connect.transforms.ExtractField$Value", | ||
"transforms.extractValue.field": "MessageData", | ||
"transforms.fromJson.type": "com.github.jcustenborder.kafka.connect.json.FromJson$Value", | ||
"transforms.fromJson.json.schema.location": "Url", | ||
"transforms.fromJson.json.schema.url": "file:///schemas/gcp-pubsub-FromJson-schema.json", | ||
"transforms.valueToKey.type": "org.apache.kafka.connect.transforms.ValueToKey", | ||
"transforms.valueToKey.fields": "OrderNumber", | ||
"transforms.extractKey.type": "org.apache.kafka.connect.transforms.ExtractField$Key", | ||
"transforms.extractKey.field": "OrderNumber", | ||
"errors.tolerance": "all", | ||
"errors.log.include.messages": "true", | ||
"errors.log.enable": "true" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"type": "service_account", | ||
"project_id": "", | ||
"private_key_id": "", | ||
"private_key": "", | ||
"client_email": "", | ||
"client_id": "", | ||
"auth_uri": "", | ||
"token_uri": "", | ||
"auth_provider_x509_cert_url": "", | ||
"client_x509_cert_url": "", | ||
"universe_domain": "" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"title": "PubSubData", | ||
"type": "object", | ||
"properties": { | ||
"OrderNumber": { | ||
"type": "string" | ||
}, | ||
"SiteId": { | ||
"type": "string" | ||
}, | ||
"Locale": { | ||
"type": "string" | ||
}, | ||
"CreatedTime": { | ||
"type": "string" | ||
}, | ||
"AddressId": { | ||
"type": "integer" | ||
}, | ||
"OrderValue": { | ||
"type": "number" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"description": "JSON Data sourced from GCP Pub Sub", | ||
"title": "pub-sub-data", | ||
"type": "object", | ||
"properties": { | ||
"AddressId": { | ||
"type": "integer" | ||
}, | ||
"CreatedTime": { | ||
"maxLength": 26, | ||
"type": "string" | ||
}, | ||
"Locale": { | ||
"type": "string", | ||
"minLength": 5, | ||
"maxLength": 5 | ||
}, | ||
"OrderNumber": { | ||
"maxLength": 36, | ||
"type": "string" | ||
}, | ||
"OrderValue": { | ||
"type": "number", | ||
"minimum": 10 | ||
}, | ||
"SiteId": { | ||
"maxLength": 10, | ||
"type": "string" | ||
} | ||
} | ||
} |