Skip to content

Commit f46bb1b

Browse files
committed
Leverage common service module (closes #6)
1 parent 354a75a commit f46bb1b

File tree

6 files changed

+83
-157
lines changed

6 files changed

+83
-157
lines changed

README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ To start loading "enriched" data into Postgres:
2929
```hcl
3030
module "enriched_topic" {
3131
source = "snowplow-devops/pubsub-topic/google"
32-
version = "0.1.0"
32+
version = "0.3.0"
3333
3434
name = "enriched-topic"
3535
}
3636
3737
module "pipeline_db" {
3838
source = "snowplow-devops/cloud-sql/google"
39-
version = "0.1.0"
39+
version = "0.3.0"
4040
4141
name = "pipeline-db"
4242
@@ -95,7 +95,7 @@ To load the "bad" data instead:
9595
```hcl
9696
module "bad_1_topic" {
9797
source = "snowplow-devops/pubsub-topic/google"
98-
version = "0.1.0"
98+
version = "0.3.0"
9999
100100
name = "bad-1-topic"
101101
}
@@ -146,7 +146,7 @@ module "postgres_loader_bad" {
146146

147147
| Name | Version |
148148
|------|---------|
149-
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 0.15 |
149+
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0.0 |
150150
| <a name="requirement_google"></a> [google](#requirement\_google) | >= 3.44.0 |
151151

152152
## Providers
@@ -159,24 +159,22 @@ module "postgres_loader_bad" {
159159

160160
| Name | Source | Version |
161161
|------|--------|---------|
162-
| <a name="module_telemetry"></a> [telemetry](#module\_telemetry) | snowplow-devops/telemetry/snowplow | 0.2.0 |
162+
| <a name="module_service"></a> [service](#module\_service) | snowplow-devops/service-ce/google | 0.1.0 |
163+
| <a name="module_telemetry"></a> [telemetry](#module\_telemetry) | snowplow-devops/telemetry/snowplow | 0.5.0 |
163164

164165
## Resources
165166

166167
| Name | Type |
167168
|------|------|
168169
| [google_compute_firewall.egress](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource |
169170
| [google_compute_firewall.ingress_ssh](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource |
170-
| [google_compute_instance_template.tpl](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_template) | resource |
171-
| [google_compute_region_instance_group_manager.grp](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_region_instance_group_manager) | resource |
172171
| [google_project_iam_member.sa_cloud_sql_client](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
173172
| [google_project_iam_member.sa_logging_log_writer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
174173
| [google_project_iam_member.sa_pubsub_publisher](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
175174
| [google_project_iam_member.sa_pubsub_subscriber](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
176175
| [google_project_iam_member.sa_pubsub_viewer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
177176
| [google_pubsub_subscription.in](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/pubsub_subscription) | resource |
178177
| [google_service_account.sa](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_account) | resource |
179-
| [google_compute_image.ubuntu_20_04](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source |
180178

181179
## Inputs
182180

@@ -189,10 +187,11 @@ module "postgres_loader_bad" {
189187
| <a name="input_in_topic_name"></a> [in\_topic\_name](#input\_in\_topic\_name) | The name of the input pubsub topic that the loader will pull data from | `string` | n/a | yes |
190188
| <a name="input_name"></a> [name](#input\_name) | A name which will be pre-pended to the resources created | `string` | n/a | yes |
191189
| <a name="input_network"></a> [network](#input\_network) | The name of the network to deploy within | `string` | n/a | yes |
192-
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | The id of the project in which this resource is created | `string` | n/a | yes |
190+
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | The project ID in which the stack is being deployed | `string` | n/a | yes |
193191
| <a name="input_purpose"></a> [purpose](#input\_purpose) | The type of data the loader will be pulling which can be one of ENRICHED\_EVENTS or JSON (Note: JSON can be used for loading bad rows) | `string` | n/a | yes |
194192
| <a name="input_region"></a> [region](#input\_region) | The name of the region to deploy within | `string` | n/a | yes |
195193
| <a name="input_schema_name"></a> [schema\_name](#input\_schema\_name) | The database schema to load data into (e.g atomic \| atomic\_bad) | `string` | n/a | yes |
194+
| <a name="input_app_version"></a> [app\_version](#input\_app\_version) | App version to use. This variable facilitates dev flow, the modules may not work with anything other than the default value. | `string` | `"0.3.1"` | no |
196195
| <a name="input_associate_public_ip_address"></a> [associate\_public\_ip\_address](#input\_associate\_public\_ip\_address) | Whether to assign a public ip address to this instance; if false this instance must be behind a Cloud NAT to connect to the internet | `bool` | `true` | no |
197196
| <a name="input_custom_iglu_resolvers"></a> [custom\_iglu\_resolvers](#input\_custom\_iglu\_resolvers) | The custom Iglu Resolvers that will be used by the loader to resolve and validate events | <pre>list(object({<br> name = string<br> priority = number<br> uri = string<br> api_key = string<br> vendor_prefixes = list(string)<br> }))</pre> | `[]` | no |
198197
| <a name="input_db_host"></a> [db\_host](#input\_db\_host) | The hostname of the database to connect to (Note: if db\_instance\_name is non-empty this setting is ignored) | `string` | `""` | no |
@@ -201,6 +200,7 @@ module "postgres_loader_bad" {
201200
| <a name="input_default_iglu_resolvers"></a> [default\_iglu\_resolvers](#input\_default\_iglu\_resolvers) | The default Iglu Resolvers that will be used by the loader to resolve and validate events | <pre>list(object({<br> name = string<br> priority = number<br> uri = string<br> api_key = string<br> vendor_prefixes = list(string)<br> }))</pre> | <pre>[<br> {<br> "api_key": "",<br> "name": "Iglu Central",<br> "priority": 10,<br> "uri": "http://iglucentral.com",<br> "vendor_prefixes": []<br> },<br> {<br> "api_key": "",<br> "name": "Iglu Central - Mirror 01",<br> "priority": 20,<br> "uri": "http://mirror01.iglucentral.com",<br> "vendor_prefixes": []<br> }<br>]</pre> | no |
202201
| <a name="input_gcp_logs_enabled"></a> [gcp\_logs\_enabled](#input\_gcp\_logs\_enabled) | Whether application logs should be reported to GCP Logging | `bool` | `true` | no |
203202
| <a name="input_in_max_concurrent_checkpoints"></a> [in\_max\_concurrent\_checkpoints](#input\_in\_max\_concurrent\_checkpoints) | The maximum number of concurrent effects for the topic checkpointing system - essentially how many concurrent acks we will make to PubSub | `number` | `100` | no |
203+
| <a name="input_java_opts"></a> [java\_opts](#input\_java\_opts) | Custom JAVA Options | `string` | `"-XX:InitialRAMPercentage=75 -XX:MaxRAMPercentage=75"` | no |
204204
| <a name="input_labels"></a> [labels](#input\_labels) | The labels to append to this resource | `map(string)` | `{}` | no |
205205
| <a name="input_machine_type"></a> [machine\_type](#input\_machine\_type) | The machine type to use | `string` | `"e2-small"` | no |
206206
| <a name="input_ssh_block_project_keys"></a> [ssh\_block\_project\_keys](#input\_ssh\_block\_project\_keys) | Whether to block project wide SSH keys | `bool` | `true` | no |

main.tf

Lines changed: 48 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ locals {
33
module_version = "0.2.1"
44

55
app_name = "snowplow-postgres-loader"
6-
app_version = "0.3.1"
6+
app_version = var.app_version
77

88
local_labels = {
99
name = var.name
@@ -21,7 +21,7 @@ locals {
2121

2222
module "telemetry" {
2323
source = "snowplow-devops/telemetry/snowplow"
24-
version = "0.2.0"
24+
version = "0.5.0"
2525

2626
count = var.telemetry_enabled ? 1 : 0
2727

@@ -34,11 +34,6 @@ module "telemetry" {
3434
module_version = local.module_version
3535
}
3636

37-
data "google_compute_image" "ubuntu_20_04" {
38-
family = "ubuntu-2004-lts"
39-
project = "ubuntu-os-cloud"
40-
}
41-
4237
# --- IAM: Service Account setup
4338

4439
resource "google_service_account" "sa" {
@@ -47,28 +42,33 @@ resource "google_service_account" "sa" {
4742
}
4843

4944
resource "google_project_iam_member" "sa_pubsub_viewer" {
50-
role = "roles/pubsub.viewer"
51-
member = "serviceAccount:${google_service_account.sa.email}"
45+
project = var.project_id
46+
role = "roles/pubsub.viewer"
47+
member = "serviceAccount:${google_service_account.sa.email}"
5248
}
5349

5450
resource "google_project_iam_member" "sa_pubsub_subscriber" {
55-
role = "roles/pubsub.subscriber"
56-
member = "serviceAccount:${google_service_account.sa.email}"
51+
project = var.project_id
52+
role = "roles/pubsub.subscriber"
53+
member = "serviceAccount:${google_service_account.sa.email}"
5754
}
5855

5956
resource "google_project_iam_member" "sa_pubsub_publisher" {
60-
role = "roles/pubsub.publisher"
61-
member = "serviceAccount:${google_service_account.sa.email}"
57+
project = var.project_id
58+
role = "roles/pubsub.publisher"
59+
member = "serviceAccount:${google_service_account.sa.email}"
6260
}
6361

6462
resource "google_project_iam_member" "sa_logging_log_writer" {
65-
role = "roles/logging.logWriter"
66-
member = "serviceAccount:${google_service_account.sa.email}"
63+
project = var.project_id
64+
role = "roles/logging.logWriter"
65+
member = "serviceAccount:${google_service_account.sa.email}"
6766
}
6867

6968
resource "google_project_iam_member" "sa_cloud_sql_client" {
70-
role = "roles/cloudsql.client"
71-
member = "serviceAccount:${google_service_account.sa.email}"
69+
project = var.project_id
70+
role = "roles/cloudsql.client"
71+
member = "serviceAccount:${google_service_account.sa.email}"
7272
}
7373

7474
# --- CE: Firewall rules
@@ -181,112 +181,45 @@ locals {
181181
})
182182

183183
startup_script = templatefile("${path.module}/templates/startup-script.sh.tmpl", {
184-
config = local.config
185-
iglu_resolver = local.iglu_resolver
186-
version = local.app_version
187-
db_host = local.db_host
188-
db_port = var.db_port
189-
db_name = var.db_name
190-
db_username = var.db_username
191-
db_password = var.db_password
192-
schema_name = var.schema_name
184+
config_b64 = base64encode(local.config)
185+
iglu_resolver_b64 = base64encode(local.iglu_resolver)
186+
version = local.app_version
187+
db_host = local.db_host
188+
db_port = var.db_port
189+
db_name = var.db_name
190+
db_username = var.db_username
191+
db_password = var.db_password
192+
schema_name = var.schema_name
193193

194194
db_instance_name = var.db_instance_name
195195
cloud_sql_proxy_enabled = var.db_instance_name != ""
196196

197197
telemetry_script = join("", module.telemetry.*.gcp_ubuntu_20_04_user_data)
198198

199199
gcp_logs_enabled = var.gcp_logs_enabled
200-
})
201-
202-
ssh_keys_metadata = <<EOF
203-
%{for v in var.ssh_key_pairs~}
204-
${v.user_name}:${v.public_key}
205-
%{endfor~}
206-
EOF
207-
}
208-
209-
resource "google_compute_instance_template" "tpl" {
210-
name_prefix = "${var.name}-"
211-
description = "This template is used to create Postgres Loader instances"
212-
213-
instance_description = var.name
214-
machine_type = var.machine_type
215-
216-
scheduling {
217-
automatic_restart = true
218-
on_host_maintenance = "MIGRATE"
219-
}
220-
221-
disk {
222-
source_image = var.ubuntu_20_04_source_image == "" ? data.google_compute_image.ubuntu_20_04.self_link : var.ubuntu_20_04_source_image
223-
auto_delete = true
224-
boot = true
225-
disk_type = "pd-standard"
226-
disk_size_gb = 10
227-
}
228-
229-
# Note: Only one of either network or subnetwork can be supplied
230-
# https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance_template#network_interface
231-
network_interface {
232-
network = var.subnetwork == "" ? var.network : ""
233-
subnetwork = var.subnetwork
234-
235-
dynamic "access_config" {
236-
for_each = var.associate_public_ip_address ? [1] : []
237-
238-
content {
239-
network_tier = "PREMIUM"
240-
}
241-
}
242-
}
243-
244-
service_account {
245-
email = google_service_account.sa.email
246-
scopes = ["cloud-platform"]
247-
}
248200

249-
metadata_startup_script = local.startup_script
250-
251-
metadata = {
252-
block-project-ssh-keys = var.ssh_block_project_keys
253-
254-
ssh-keys = local.ssh_keys_metadata
255-
}
256-
257-
tags = [var.name]
258-
259-
labels = local.labels
260-
261-
lifecycle {
262-
create_before_destroy = true
263-
}
201+
java_opts = var.java_opts
202+
})
264203
}
265204

266-
resource "google_compute_region_instance_group_manager" "grp" {
267-
name = "${var.name}-grp"
268-
269-
base_instance_name = var.name
270-
region = var.region
271-
272-
target_size = var.target_size
273-
274-
version {
275-
name = "${local.app_name}-${local.app_version}"
276-
instance_template = google_compute_instance_template.tpl.self_link
277-
}
278-
279-
update_policy {
280-
type = "PROACTIVE"
281-
minimal_action = "REPLACE"
282-
max_unavailable_fixed = 3
283-
}
284-
285-
wait_for_instances = true
286-
287-
timeouts {
288-
create = "20m"
289-
update = "20m"
290-
delete = "30m"
291-
}
205+
module "service" {
206+
source = "snowplow-devops/service-ce/google"
207+
version = "0.1.0"
208+
209+
user_supplied_script = local.startup_script
210+
name = var.name
211+
instance_group_version_name = "${local.app_name}-${local.app_version}"
212+
labels = local.labels
213+
214+
region = var.region
215+
network = var.network
216+
subnetwork = var.subnetwork
217+
218+
ubuntu_20_04_source_image = var.ubuntu_20_04_source_image
219+
machine_type = var.machine_type
220+
target_size = var.target_size
221+
ssh_block_project_keys = var.ssh_block_project_keys
222+
ssh_key_pairs = var.ssh_key_pairs
223+
service_account_email = google_service_account.sa.email
224+
associate_public_ip_address = var.associate_public_ip_address
292225
}

outputs.tf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
output "manager_id" {
2-
value = google_compute_region_instance_group_manager.grp.id
2+
value = module.service.manager_id
33
description = "Identifier for the instance group manager"
44
}
55

66
output "manager_self_link" {
7-
value = google_compute_region_instance_group_manager.grp.self_link
7+
value = module.service.manager_self_link
88
description = "The URL for the instance group manager"
99
}
1010

1111
output "instance_group_url" {
12-
value = google_compute_region_instance_group_manager.grp.instance_group
12+
value = module.service.instance_group_url
1313
description = "The full URL of the instance group created by the manager"
1414
}

templates/startup-script.sh.tmpl

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,13 @@
1-
#!/bin/bash
2-
set -e -x
3-
4-
# -----------------------------------------------------------------------------
5-
# BASE INSTALL
6-
# -----------------------------------------------------------------------------
7-
81
readonly CONFIG_DIR=/opt/snowplow/config
92

10-
function install_base_packages() {
11-
sudo apt install wget curl unzip -y
12-
}
13-
14-
function install_docker_ce() {
15-
sudo apt install docker.io -y
16-
sudo systemctl enable --now docker
17-
}
18-
19-
sudo apt update -y
20-
21-
install_base_packages
22-
install_docker_ce
23-
243
sudo mkdir -p $${CONFIG_DIR}
25-
sudo cat << EOF > $${CONFIG_DIR}/postgres_loader.json
26-
${config}
4+
5+
sudo base64 --decode << EOF > $${CONFIG_DIR}/postgres_loader.json
6+
${config_b64}
277
EOF
288

29-
sudo cat << EOF > $${CONFIG_DIR}/iglu_resolver.json
30-
${iglu_resolver}
9+
sudo base64 --decode << EOF > $${CONFIG_DIR}/iglu_resolver.json
10+
${iglu_resolver_b64}
3111
EOF
3212

3313
# Create the schema to load data into
@@ -57,7 +37,7 @@ sudo docker run \
5737
%{ if gcp_logs_enabled ~}
5838
--log-driver gcplogs \
5939
%{ endif ~}
60-
-v $${CONFIG_DIR}:/snowplow/config \
40+
--mount type=bind,source=$${CONFIG_DIR},target=/snowplow/config \
6141
-e 'PGUSER=${db_username}' \
6242
-e 'PGPASSWORD=${db_password}' \
6343
postgres:13 \
@@ -69,14 +49,15 @@ sudo docker run \
6949
--name postgres_loader \
7050
--restart always \
7151
--network host \
52+
--memory=$(get_application_memory_mb)m \
7253
%{ if gcp_logs_enabled ~}
7354
--log-driver gcplogs \
7455
%{ else ~}
7556
--log-opt max-size=10m \
7657
--log-opt max-file=5 \
7758
%{ endif ~}
78-
-v $${CONFIG_DIR}:/snowplow/config \
79-
-e 'JAVA_OPTS=-Dorg.slf4j.simpleLogger.defaultLogLevel=info' \
59+
--mount type=bind,source=$${CONFIG_DIR},target=/snowplow/config \
60+
--env JDK_JAVA_OPTIONS='${java_opts}' \
8061
snowplow/snowplow-postgres-loader:${version} \
8162
--config /snowplow/config/postgres_loader.json \
8263
--resolver /snowplow/config/iglu_resolver.json

0 commit comments

Comments
 (0)