diff --git a/.circleci/config.yml b/.circleci/config.yml index e69de29..9d883d6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -0,0 +1,105 @@ +defaults: &defaults + machine: true + environment: + GRUNTWORK_INSTALLER_VERSION: v0.0.21 + TERRATEST_LOG_PARSER_VERSION: v0.13.13 + MODULE_CI_VERSION: v0.13.3 + TERRAFORM_VERSION: 0.11.8 + TERRAGRUNT_VERSION: NONE + PACKER_VERSION: NONE + GOLANG_VERSION: 1.11.2 + K8S_VERSION: v1.10.0 + KUBECONFIG: /home/circleci/.kube/config + +install_gruntwork_utils: &install_gruntwork_utils + name: install gruntwork utils + command: | + curl -Ls https://raw.githubusercontent.com/gruntwork-io/gruntwork-installer/master/bootstrap-gruntwork-installer.sh | bash /dev/stdin --version "${GRUNTWORK_INSTALLER_VERSION}" + gruntwork-install --module-name "gruntwork-module-circleci-helpers" --repo "https://github.com/gruntwork-io/module-ci" --tag "${MODULE_CI_VERSION}" + gruntwork-install --binary-name "terratest_log_parser" --repo "https://github.com/gruntwork-io/terratest" --tag "${TERRATEST_LOG_PARSER_VERSION}" + configure-environment-for-gruntwork-module \ + --circle-ci-2-machine-executor \ + --terraform-version ${TERRAFORM_VERSION} \ + --terragrunt-version ${TERRAGRUNT_VERSION} \ + --packer-version ${PACKER_VERSION} \ + --use-go-dep \ + --go-version ${GOLANG_VERSION} \ + --go-src-path test + +version: 2 +jobs: + build: + <<: *defaults + steps: + - checkout + - restore_cache: + keys: + - dep-v1-{{ checksum "test/Gopkg.lock" }} + + # Install gruntwork utilities + - run: + <<: *install_gruntwork_utils + + - save_cache: + key: dep-v1-{{ checksum "test/Gopkg.lock" }} + paths: + - ./test/vendor + + # Fail the build if the pre-commit hooks don't pass. Note: if you run pre-commit install locally, these hooks will + # execute automatically every time before you commit, ensuring the build never fails at this step! + - run: pip install pre-commit==1.11.2 + - run: pre-commit install + - run: pre-commit run --all-files + + - persist_to_workspace: + root: /home/circleci + paths: + - project + - terraform + - packer + + test: + <<: *defaults + steps: + - attach_workspace: + at: /home/circleci + - checkout + - run: echo 'export PATH=$HOME/terraform:$HOME/packer:$PATH' >> $BASH_ENV + - run: + <<: *install_gruntwork_utils + - run: + name: update gcloud + command: | + sudo apt-get remove -y google-cloud-sdk + sudo /opt/google-cloud-sdk/bin/gcloud --quiet components update + sudo /opt/google-cloud-sdk/bin/gcloud --quiet components update beta kubectl + - run: + name: run tests + command: | + mkdir -p /tmp/logs + # required for gcloud and kubectl to authenticate correctly + echo $GCLOUD_SERVICE_KEY | gcloud auth activate-service-account --key-file=- + gcloud --quiet config set project ${GOOGLE_PROJECT_ID} + gcloud --quiet config set compute/zone ${GOOGLE_COMPUTE_ZONE} + # required for terraform and terratest to authenticate correctly + echo $GCLOUD_SERVICE_KEY > /tmp/gcloud.json + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcloud.json" + # run the tests + run-go-tests --path test --timeout 60m | tee /tmp/logs/all.log + no_output_timeout: 3600s + - run: + command: terratest_log_parser --testlog /tmp/logs/all.log --outputdir /tmp/logs + when: always + - store_artifacts: + path: /tmp/logs + - store_test_results: + path: /tmp/logs + +workflows: + version: 2 + build-and-test: + jobs: + - build + - test: + requires: + - build diff --git a/.gitignore b/.gitignore index 40c6dd5..16fc95e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,5 +21,8 @@ out/ # Go best practices dictate that libraries should not include the vendor directory vendor -# Ignore test data -.test_data/ +# Folder used to store temporary test data by Terratest +.test-data + +# Mock user-data log file +mock-user-data.log diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..dd17d91 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/gruntwork-io/pre-commit + sha: v0.0.2 + hooks: + - id: terraform-fmt + diff --git a/examples/gke-regional-private-cluster/README.md b/examples/gke-regional-private-cluster/README.md new file mode 100644 index 0000000..397533f --- /dev/null +++ b/examples/gke-regional-private-cluster/README.md @@ -0,0 +1,49 @@ +# GKE Regional Private Cluster + +This example creates a Regional Private GKE Cluster. + +Regional GKE Clusters are high-availability clusters where the cluster master is +spread across multiple GCP zones. During a zonal outage, the Kubernetes control +plane and a subset of your nodes will still be available, provided that at least +1 zone that your cluster is running in is still available. + +Regional control planes are accessible even during upgrades. + +By default, regional clusters will create nodes across 3 zones in a region. If +you're interested in how nodes are distributed in regional clusters, read the +GCP docs about [balancing across zones](https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-autoscaler#balancing_across_zones). + +Nodes in a private cluster are only granted private IP addresses; they're not +accessible from the public internet, as part of a defense-in-depth strategy. A +private cluster can use a GCP HTTP(S) or Network load balancer to accept public +traffic, or an internal load balancer from within your VPC network. + +Private clusters use [Private Google Access](https://cloud.google.com/vpc/docs/private-access-options) +to access Google APIs such as Stackdriver, and to pull container images from +Google Container Registry. To use other APIs and services over the internet, you +can use a [`gke-regional-public-cluster`](../gke-regional-public-cluster). +Private clusters are recommended for running most apps and services. + +## Limitations + +No region shares GPU types across all of their zones; you will need to +explicitly specify the zones your cluster runs in in order to use GPUs. + +Node Pools cannot be created in zones without a master cluster; you can update +the zones of your cluster master provided your new zones are within the +region your cluster is present in. + + +Currently, you cannot use a proxy to reach the cluster master of a regional +cluster through its private IP address. + +## How do you run these examples? + +1. Install [Terraform](https://www.terraform.io/). +1. Make sure you have Python installed (version 2.x) and in your `PATH`. +1. Open `variables.tf`, and fill in any required variables that don't have a +default. +1. Run `terraform get`. +1. Run `terraform plan`. +1. If the plan looks good, run `terraform apply`. diff --git a/examples/gke-regional-public-cluster/README.md b/examples/gke-regional-public-cluster/README.md new file mode 100644 index 0000000..c70f507 --- /dev/null +++ b/examples/gke-regional-public-cluster/README.md @@ -0,0 +1,37 @@ +# GKE Regional Public Cluster + +This example creates a Regional Public GKE Cluster. + +Regional GKE Clusters are high-availability clusters where the cluster master is +spread across multiple GCP zones. During a zonal outage, the Kubernetes control +plane and a subset of your nodes will still be available, provided that at least +1 zone that your cluster is running in is still available. + +Regional control planes are accessible even during upgrades. + +By default, regional clusters will create nodes across 3 zones in a region. If +you're interested in how nodes are distributed in regional clusters, read the +GCP docs about [balancing across zones](https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-autoscaler#balancing_across_zones). + +Nodes in a public cluster are accessible from the public internet; try using a +private cluster such as in [`gke-regional-private-cluster`](../gke-regional-private-cluster) +to limit access to/from your nodes. Private clusters are recommended for running +most apps and services. + +## Limitations + +No region shares GPU types across all of their zones; you will need to +explicitly specify the zones your cluster runs in in order to use GPUs. + +Node Pools cannot be created in zones without a master cluster; you can update +the zones of your cluster master provided your new zones are within the +region your cluster is present in. + +## How do you run these examples? + +1. Install [Terraform](https://learn.hashicorp.com/terraform/getting-started/install.html) v0.10.3 or later. +1. Open `variables.tf`, and fill in any required variables that don't have a +default. +1. Run `terraform get`. +1. Run `terraform plan`. +1. If the plan looks good, run `terraform apply`. diff --git a/examples/gke-regional-public-cluster/main.tf b/examples/gke-regional-public-cluster/main.tf new file mode 100644 index 0000000..e50124c --- /dev/null +++ b/examples/gke-regional-public-cluster/main.tf @@ -0,0 +1,101 @@ +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY A GKE REGIONAL PUBLIC CLUSTER IN GOOGLE CLOUD +# This is an example of how to use the gke-cluster module to deploy a regional public Kubernetes cluster in GCP with a +# Load Balancer in front of it. +# --------------------------------------------------------------------------------------------------------------------- + +provider "google-beta" { + project = "${var.project}" + region = "${var.region}" +} + +# Use Terraform 0.10.x so that we can take advantage of Terraform GCP functionality as a separate provider via +# https://github.com/terraform-providers/terraform-provider-google +terraform { + required_version = ">= 0.10.3" +} + +module "gke_cluster" { + # When using these modules in your own templates, you will need to use a Git URL with a ref attribute that pins you + # to a specific version of the modules, such as the following example: + # source = "git::git@github.com:gruntwork-io/gke-cluster.git//modules/gke-cluster?ref=v0.0.1" + source = "../../modules/gke-cluster" + + name = "${var.cluster_name}" + + project = "${var.project}" + region = "${var.region}" + network = "${google_compute_network.main.name}" + subnetwork = "${google_compute_subnetwork.main.name}" +} + +# Node Pool + +// Node Pool Resource +resource "google_container_node_pool" "node_pool" { + provider = "google-beta" + + name = "main-pool" + project = "${var.project}" + region = "${var.region}" + cluster = "${module.gke_cluster.name}" + + initial_node_count = "1" + + autoscaling { + min_node_count = "1" + max_node_count = "5" + } + + management { + auto_repair = "true" + auto_upgrade = "true" + } + + node_config { + image_type = "COS" + machine_type = "n1-standard-1" + + labels = { + all-pools-example = "true" + } + + tags = ["main-pool-example"] + disk_size_gb = "30" + disk_type = "pd-standard" + preemptible = false + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + } + + lifecycle { + ignore_changes = ["initial_node_count"] + } + + timeouts { + create = "30m" + update = "30m" + delete = "30m" + } +} + +# TODO(rileykarson): Add proper VPC network config once we've made a VPC module +resource "random_string" "suffix" { + length = 4 + special = false + upper = false +} + +resource "google_compute_network" "main" { + name = "${var.cluster_name}-network-${random_string.suffix.result}" + auto_create_subnetworks = "false" +} + +resource "google_compute_subnetwork" "main" { + name = "${var.cluster_name}-subnetwork-${random_string.suffix.result}" + ip_cidr_range = "10.0.0.0/17" + region = "${var.region}" + network = "${google_compute_network.main.self_link}" +} diff --git a/examples/gke-regional-public-cluster/outputs.tf b/examples/gke-regional-public-cluster/outputs.tf new file mode 100644 index 0000000..51f473b --- /dev/null +++ b/examples/gke-regional-public-cluster/outputs.tf @@ -0,0 +1,22 @@ +output "cluster_endpoint" { + description = "The IP address of the cluster master." + sensitive = true + value = "${module.gke_cluster.endpoint}" +} + +output "client_certificate" { + description = "Public certificate used by clients to authenticate to the cluster endpoint." + value = "${module.gke_cluster.client_certificate}" +} + +output "client_key" { + description = "Private key used by clients to authenticate to the cluster endpoint." + sensitive = true + value = "${module.gke_cluster.client_key}" +} + +output "cluster_ca_certificate" { + description = "The public certificate that is the root of trust for the cluster." + sensitive = true + value = "${module.gke_cluster.cluster_ca_certificate}" +} diff --git a/examples/gke-regional-public-cluster/variables.tf b/examples/gke-regional-public-cluster/variables.tf new file mode 100644 index 0000000..d9beee1 --- /dev/null +++ b/examples/gke-regional-public-cluster/variables.tf @@ -0,0 +1,22 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# These parameters must be supplied when consuming this module. +# --------------------------------------------------------------------------------------------------------------------- + +variable "project" { + description = "The name of the GCP Project where all resources will be launched." +} + +variable "region" { + description = "The Region in which all GCP resources will be launched." +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "cluster_name" { + description = "The name of the Kubernetes cluster." + default = "example-cluster" +} diff --git a/examples/zonal-gke-cluster/README.md b/examples/gke-zonal-private-cluster/README.md similarity index 100% rename from examples/zonal-gke-cluster/README.md rename to examples/gke-zonal-private-cluster/README.md diff --git a/examples/gke-zonal-public-cluster/README.md b/examples/gke-zonal-public-cluster/README.md new file mode 100644 index 0000000..1333ed7 --- /dev/null +++ b/examples/gke-zonal-public-cluster/README.md @@ -0,0 +1 @@ +TODO diff --git a/examples/regional-gke-cluster/TODO.md b/examples/regional-gke-cluster/TODO.md deleted file mode 100644 index e69de29..0000000 diff --git a/main.tf b/main.tf index e69de29..8b13789 100644 --- a/main.tf +++ b/main.tf @@ -0,0 +1 @@ + diff --git a/modules/gke-cluster/README.md b/modules/gke-cluster/README.md index e69de29..3584f31 100644 --- a/modules/gke-cluster/README.md +++ b/modules/gke-cluster/README.md @@ -0,0 +1,71 @@ +# GKE Cluster Module + +The GKE Cluster module is used to administer the [cluster master](https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-architecture) +for a [Google Kubernetes Engine (GKE) Cluster](https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-admin-overview). + +The cluster master is the "control plane" of the cluster; for example, it runs +the Kubernetes API used by `kubectl`. Worker machines are configured by +attaching [GKE node pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools) +to the cluster module. + +## How do you use this module? + +* See the [root README](/README.md) for instructions on using Terraform modules. +* See the [examples](/examples) folder for example usage. +* See [variables.tf](./variables.tf) for all the variables you can set on this module. +* See [outputs.tf](./outputs.tf) for all the variables that are outputed by this module. + +## What is a GKE Cluster? + +The GKE Cluster, or "cluster master", runs the Kubernetes control plane +processes including the Kubernetes API server, scheduler, and core resource +controllers. + +The master is the unified endpoint for your cluster; it's the "hub" through +which all other components such as nodes interact. Users can interact with the +cluster via Kubernetes API calls, such as by using `kubectl`. The GKE cluster +is responsible for running workloads on nodes, as well as scaling/upgrading +nodes. + +## How do I attach worker machines using a GKE node pool? + +A "[node](https://kubernetes.io/docs/concepts/architecture/nodes/)" is +a worker machine in Kubernetes; in GKE, nodes are provisioned as +[Google Compute Engine VM instances](https://cloud.google.com/compute/docs/instances/). + +[GKE Node Pools](https://cloud.google.com/kubernetes-engine/docs/concepts/node-pools) +are a group of nodes who share the same configuration, defined as a [NodeConfig](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/NodeConfig). +Node pools also control the autoscaling of their nodes, and autoscaling +configuration is done inline, alongside the node config definition. A GKE +Cluster can have multiple node pools defined. + +Node pools are configured directly with the +[`google_container_node_pool`](https://www.terraform.io/docs/providers/google/r/container_node_pool.html) +Terraform resource by providing a reference to the cluster you configured with +this module as the `cluster` field. + +## What VPC network will this cluster use? + +You must explicitly specify the network and subnetwork of your GKE cluster using +the `network` and `subnetwork` fields; this module will not implicitly use the +`default` network with an automatically generated subnetwork. + +The modules in the [`terraform-google-network`](https://github.com/gruntwork-io/terraform-google-network) +Gruntwork module are a useful tool for configuring your VPC network and +subnetworks in GCP. + +## What IAM roles does this module configure? (unimplemented) + +Given a service account, this module will enable the following IAM roles: + +* roles/compute.viewer +* roles/container.clusterAdmin +* roles/container.developer +* roles/iam.serviceAccountUser + +## What services does this module enable on my project? (unimplemented) + +This module will ensure the following services are active on your project: + +* Compute Engine API - compute.googleapis.com +* Kubernetes Engine API - container.googleapis.com diff --git a/modules/gke-cluster/main.tf b/modules/gke-cluster/main.tf new file mode 100644 index 0000000..1366bfa --- /dev/null +++ b/modules/gke-cluster/main.tf @@ -0,0 +1,97 @@ +resource "google_container_cluster" "cluster" { + name = "${var.name}" + description = "${var.description}" + + project = "${var.project}" + region = "${var.region}" + network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" + subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" + + logging_service = "${var.logging_service}" + monitoring_service = "${var.monitoring_service}" + min_master_version = "${local.kubernetes_version}" + + # We want to make a cluster with no node pools, and manage them all with the + # fine-grained google_container_node_pool resource. The API requires a node + # pool or an initial count to be defined; that initial count creates the + # "default node pool" with that # of nodes. + # + # So, we need to set an initial_node_count of 1. This will make a default node + # pool with server-defined defaults that Terraform will immediately delete as + # part of Create. This leaves us in our desired state- with a cluster master + # with no node pools. + remove_default_node_pool = true + + initial_node_count = 1 + + addons_config { + http_load_balancing { + disabled = "${var.http_load_balancing ? 0 : 1}" + } + + horizontal_pod_autoscaling { + disabled = "${var.horizontal_pod_autoscaling ? 0 : 1}" + } + + kubernetes_dashboard { + disabled = "${var.enable_kubernetes_dashboard ? 0 : 1}" + } + + network_policy_config { + disabled = "${var.enable_network_policy ? 0 : 1}" + } + } + + network_policy { + enabled = "${var.enable_network_policy}" + + # Tigera (Calico Felix) is the only provider + provider = "CALICO" + } + + master_auth { + username = "${var.basic_auth_username}" + password = "${var.basic_auth_password}" + + client_certificate_config { + issue_client_certificate = "${var.enable_kubernetes_dashboard}" + } + } + + master_authorized_networks_config = "${var.master_authorized_networks_config}" + + maintenance_policy { + daily_maintenance_window { + start_time = "${var.maintenance_start_time}" + } + } + + # Version 2.0.0 will set the default timeouts to these values. + timeouts { + create = "30m" + update = "30m" + delete = "30m" + } +} + +locals { + kubernetes_version = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_node_version}" + network_project = "${var.network_project != "" ? var.network_project : var.project}" +} + +data "google_compute_network" "gke_network" { + name = "${var.network}" + project = "${local.network_project}" +} + +data "google_compute_subnetwork" "gke_subnetwork" { + name = "${var.subnetwork}" + region = "${var.region}" + project = "${local.network_project}" +} + +// Get available master versions in our region to determine the latest version +data "google_container_engine_versions" "region" { + region = "${var.region}" + project = "${var.project}" +} diff --git a/modules/gke-cluster/outputs.tf b/modules/gke-cluster/outputs.tf new file mode 100644 index 0000000..78e9734 --- /dev/null +++ b/modules/gke-cluster/outputs.tf @@ -0,0 +1,34 @@ +output "name" { + # This may seem redundant with the `name` input, but it serves an important + # purpose. Terraform won't establish a dependency graph without this to interpolate on. + description = "The name of the cluster master. This output is used for interpolation with node pools, other modules." + + value = "${google_container_cluster.cluster.name}" +} + +output "master_version" { + description = "The Kubernetes master version." + value = "${google_container_cluster.cluster.master_version}" +} + +output "endpoint" { + description = "The IP address of the cluster master." + sensitive = true + value = "${google_container_cluster.cluster.endpoint}" +} + +# The following outputs allow authentication and connectivity to the GKE Cluster. +output "client_certificate" { + description = "Public certificate used by clients to authenticate to the cluster endpoint." + value = "${base64decode(google_container_cluster.cluster.master_auth.0.client_certificate)}" +} + +output "client_key" { + description = "Private key used by clients to authenticate to the cluster endpoint." + value = "${base64decode(google_container_cluster.cluster.master_auth.0.client_key)}" +} + +output "cluster_ca_certificate" { + description = "The public certificate that is the root of trust for the cluster." + value = "${base64decode(google_container_cluster.cluster.master_auth.0.cluster_ca_certificate)}" +} diff --git a/modules/gke-cluster/variables.tf b/modules/gke-cluster/variables.tf new file mode 100644 index 0000000..463be08 --- /dev/null +++ b/modules/gke-cluster/variables.tf @@ -0,0 +1,145 @@ +# --------------------------------------------------------------------------------------------------------------------- +# REQUIRED PARAMETERS +# These variables are expected to be passed in by the operator +# --------------------------------------------------------------------------------------------------------------------- + +variable "project" { + description = "The project ID to host the cluster in" +} + +variable "region" { + description = "The region to host the cluster in" +} + +variable "name" { + description = "The name of the cluster" +} + +variable "network" { + description = "The VPC network to host the cluster in" +} + +variable "subnetwork" { + description = "The subnetwork to host the cluster in" +} + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# Generally, these values won't need to be changed. +# --------------------------------------------------------------------------------------------------------------------- + +variable "description" { + description = "The description of the cluster" + default = "" +} + +variable "kubernetes_version" { + description = "The Kubernetes version of the masters. If set to 'latest' it will pull latest available version in the selected region." + default = "latest" +} + +variable "logging_service" { + description = "The logging service that the cluster should write logs to. Available options include logging.googleapis.com, logging.googleapis.com/kubernetes (beta), and none" + default = "logging.googleapis.com" +} + +variable "monitoring_service" { + description = "The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none" + default = "monitoring.googleapis.com" +} + +variable "horizontal_pod_autoscaling" { + description = "Whether to enable the horizontal pod autoscaling addon" + default = true +} + +variable "http_load_balancing" { + description = "Whether to enable the http (L7) load balancing addon" + default = true +} + +// TODO(robmorgan): Are we using these values below? We should understand them more fully before adding them to configs. + +variable "network_project" { + description = "The project ID of the shared VPC's host (for shared vpc support)" + default = "" +} + +variable "master_authorized_networks_config" { + type = "list" + + description = <