diff --git a/isotope/example-config.toml b/isotope/example-config.toml index 689096753a..45f416a777 100644 --- a/isotope/example-config.toml +++ b/isotope/example-config.toml @@ -9,7 +9,7 @@ environments = [ ] [cluster] -project_id = "istio-200620" +project_id = "" # # Cluster will be created with one n1-standard-1 node per zone in the default node-pool. # Only one of them will be running Prometheus. In total, the cluster will have # 1 Prometheus node per zone, 1 client node per zone, and N service-graph nodes per zone. @@ -19,10 +19,10 @@ name = "isotope-cluster-2" # would be in the first zone you specify but the nodes would be # spread around the zones. zones = ["us-central1-a", "us-central1-b", "us-central1-c"] -version = "1.12.7-gke.25" +version = "1.27.3-gke.100" [istio] -archive_url = "https://github.com/istio/istio/releases/download/1.1.3/istio-1.1.3-linux.tar.gz" +archive_url = "https://github.com/istio/istio/releases/download/1.20.0/istio-1.20.0-linux-amd64.tar.gz" [server] machine_type = "n1-standard-1" diff --git a/isotope/runner/README.md b/isotope/runner/README.md new file mode 100644 index 0000000000..3d71f93513 --- /dev/null +++ b/isotope/runner/README.md @@ -0,0 +1,22 @@ +# Runner + +This subdirectory contains the Python3 _module_ for automating topology +tests. The executable "main" for this is at "../run_tests.py". + +## Pseudocode + +```txt +read configuration +create cluster +add prometheus +for each topology: + convert topology to Kubernetes YAML + for each environment (none, istio, sidecars only, etc.): + update Prometheus labels + deploy environment + deploy topology + run load test + delete topology + delete environment +delete cluster +``` diff --git a/isotope/runner/__init__.py b/isotope/runner/__init__.py new file mode 100644 index 0000000000..99e483edc5 --- /dev/null +++ b/isotope/runner/__init__.py @@ -0,0 +1,34 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for automating topology testing. + +The pseudo-code for the intended calls for this is: + +``` +read configuration +create cluster +add prometheus +for each topology: + convert topology to Kubernetes YAML + for each environment (none, istio, sidecars only, etc.): + update Prometheus labels + deploy environment + deploy topology + run load test + delete topology + delete environment +delete cluster +``` +""" diff --git a/isotope/runner/cluster.py b/isotope/runner/cluster.py new file mode 100644 index 0000000000..597bf8c6f9 --- /dev/null +++ b/isotope/runner/cluster.py @@ -0,0 +1,169 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Set up for GKE clusters and Prometheus monitoring.""" + +import logging +import os + +from . import consts, prometheus, resources, sh, wait +from typing import List + + +def set_up_if_not_exists( + project_id: str, name: str, zones: List[str], version: str, + service_graph_machine_type: str, service_graph_disk_size_gb: int, + service_graph_num_nodes: int, client_machine_type: str, + client_disk_size_gb: int) -> None: + sh.run_gcloud(['config', 'set', 'project', project_id], check=True) + zone = zones[0] + + # TODO: This is the default tabular output. Filter the input to just the + # names of the existing clusters. + output = sh.run_gcloud( + ['container', 'clusters', 'list', '--zone', zone], check=True).stdout + # TODO: Also check if the cluster is normal (e.g. not being deleted). + if name + " " in output: + logging.debug('%s already exists; bypassing creation', name) + else: + logging.debug('%s does not exist yet; creating...', name) + set_up(project_id, name, zones, version, service_graph_machine_type, + service_graph_disk_size_gb, service_graph_num_nodes, + client_machine_type, client_disk_size_gb) + + +def set_up(project_id: str, name: str, zones: List[str], version: str, + service_graph_machine_type: str, service_graph_disk_size_gb: int, + service_graph_num_nodes: int, client_machine_type: str, + client_disk_size_gb: int, deploy_prometheus=False) -> None: + """Creates and sets up a GKE cluster. + + Args: + project_id: full ID for the cluster's GCP project + name: name of the GKE cluster + zone: GCE zone (e.g. "us-central1-a") + version: GKE version (e.g. "1.9.7-gke.3") + service_graph_machine_type: GCE type of service machines + service_graph_disk_size_gb: disk size of service machines in gigabytes + service_graph_num_nodes: number of machines in the service graph pool + client_machine_type: GCE type of client machine + client_disk_size_gb: disk size of client machine in gigabytes + """ + sh.run_gcloud(['config', 'set', 'project', project_id], check=True) + + _create_cluster(name, zones, version, 'n1-standard-4', 16, 1) + _create_cluster_role_binding() + + if deploy_prometheus: + _create_persistent_volume() + _initialize_helm() + _helm_add_prometheus_operator() + prometheus.apply( + intermediate_file_path=resources.PROMETHEUS_VALUES_GEN_YAML_PATH) + + _create_service_graph_node_pool(service_graph_num_nodes, + service_graph_machine_type, + service_graph_disk_size_gb, + zones[0]) + _create_client_node_pool(client_machine_type, client_disk_size_gb, zones[0]) + + +def _create_cluster(name: str, zones: List[str], version: str, machine_type: str, + disk_size_gb: int, num_nodes: int) -> None: + logging.info('creating cluster "%s"', name) + node_locations = ','.join(zones) + zone = zones[0] + + sh.run_gcloud( + [ + 'container', 'clusters', 'create', name, '--zone', zone, + '--node-locations', node_locations, '--cluster-version', version, + '--machine-type', machine_type, '--disk-size', + str(disk_size_gb), '--num-nodes', + str(num_nodes) + ], + check=True) + sh.run_gcloud(['config', 'set', 'container/cluster', name], check=True) + sh.run_gcloud( + ['container', 'clusters', 'get-credentials', '--zone', zone, name], + check=True) + + +def _create_service_graph_node_pool(num_nodes: int, machine_type: str, + disk_size_gb: int, zone: str) -> None: + logging.info('creating service graph node-pool') + _create_node_pool(consts.SERVICE_GRAPH_NODE_POOL_NAME, num_nodes, + machine_type, disk_size_gb, zone) + + +def _create_client_node_pool(machine_type: str, disk_size_gb: int, + zone: str) -> None: + logging.info('creating client node-pool') + _create_node_pool(consts.CLIENT_NODE_POOL_NAME, 1, machine_type, + disk_size_gb, zone) + + +def _create_node_pool(name: str, num_nodes: int, machine_type: str, + disk_size_gb: int, zone: str) -> None: + sh.run_gcloud( + [ + 'container', 'node-pools', 'create', name, '--machine-type', + machine_type, '--num-nodes', + str(num_nodes), '--disk-size', + str(disk_size_gb), '--zone', + zone + ], + check=True) + + +def _create_cluster_role_binding() -> None: + logging.info('creating cluster-admin-binding') + proc = sh.run_gcloud(['config', 'get-value', 'account'], check=True) + account = proc.stdout + sh.run_kubectl( + [ + 'create', 'clusterrolebinding', 'cluster-admin-binding', + '--clusterrole', 'cluster-admin', '--user', account + ], + check=True) + + +def _create_persistent_volume() -> None: + logging.info('creating persistent volume') + sh.run_kubectl( + ['apply', '-f', resources.PERSISTENT_VOLUME_YAML_PATH], check=True) + + +def _initialize_helm() -> None: + logging.info('initializing Helm') + sh.run_kubectl( + ['create', '-f', resources.HELM_SERVICE_ACCOUNT_YAML_PATH], check=True) + sh.run_with_k8s_api( + ['helm', 'init', '--service-account', 'tiller', '--wait'], check=True) + sh.run_with_k8s_api( + [ + 'helm', 'repo', 'add', 'coreos', + 'https://s3-eu-west-1.amazonaws.com/coreos-charts/stable' + ], + check=True) + + +def _helm_add_prometheus_operator() -> None: + logging.info('installing coreos/prometheus-operator') + sh.run_with_k8s_api( + [ + 'helm', 'install', 'coreos/prometheus-operator', '--name', + 'prometheus-operator', '--namespace', consts.MONITORING_NAMESPACE + ], + check=True) diff --git a/isotope/runner/config.py b/isotope/runner/config.py new file mode 100644 index 0000000000..0b19c38c71 --- /dev/null +++ b/isotope/runner/config.py @@ -0,0 +1,126 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Read runner configuration from a dict or TOML.""" + +from typing import Any, Dict, List, Optional + +import toml + + +class RunnerConfig: + """Represents the intermediary between a config file""" + + def __init__(self, topology_paths: List[str], environments: List[str], + istio_archive_url: str, cluster_project_id: str, + cluster_name: str, cluster_zones: List[str], + cluster_version: str, server_machine_type: str, + server_disk_size_gb: int, server_num_nodes: int, + server_image: str, client_machine_type: str, + client_disk_size_gb: int, client_image: str, + client_qps: Optional[int], client_duration: str, + client_num_conc_conns: int) -> None: + self.topology_paths = topology_paths + self.environments = environments + self.istio_archive_url = istio_archive_url + self.cluster_project_id = cluster_project_id + self.cluster_name = cluster_name + self.cluster_zones = cluster_zones + self.cluster_version = cluster_version + self.server_machine_type = server_machine_type + self.server_disk_size_gb = server_disk_size_gb + self.server_num_nodes = server_num_nodes + self.server_image = server_image + self.client_machine_type = client_machine_type + self.client_disk_size_gb = client_disk_size_gb + self.client_image = client_image + self.client_qps = client_qps + self.client_duration = client_duration + self.client_num_conc_conns = client_num_conc_conns + + def labels(self) -> Dict[str, str]: + """Returns the static labels for Prometheus for this configuration.""" + return { + 'istio_archive_url': self.istio_archive_url, + 'cluster_version': self.cluster_version, + 'cluster_zones': self.cluster_zones, + 'server_machine_type': self.server_machine_type, + 'server_disk_size_gb': str(self.server_disk_size_gb), + 'server_num_nodes': str(self.server_num_nodes), + 'server_image': self.server_image, + 'client_machine_type': self.client_machine_type, + 'client_disk_size_gb': str(self.client_disk_size_gb), + 'client_image': self.client_image, + 'client_qps': str(self.client_qps), + 'client_duration': self.client_duration, + 'client_num_concurrent_connections': + str(self.client_num_conc_conns), + } + + +def from_dict(d: Dict[str, Any]) -> RunnerConfig: + topology_paths = d.get('topology_paths', []) + environments = d.get('environments', []) + + istio = d['istio'] + istio_archive_url = istio['archive_url'] + + cluster = d['cluster'] + cluster_project_id = cluster['project_id'] + cluster_name = cluster['name'] + cluster_zones = cluster['zones'] + cluster_version = cluster['version'] + + server = d['server'] + server_machine_type = server['machine_type'] + server_disk_size_gb = server['disk_size_gb'] + server_num_nodes = server['num_nodes'] + server_image = server['image'] + + client = d['client'] + client_machine_type = client['machine_type'] + client_disk_size_gb = client['disk_size_gb'] + client_image = client['image'] + client_qps = client['qps'] + if client_qps == 'max': + client_qps = None + else: + # Must coerce into integer, otherwise not a valid QPS. + client_qps = int(client_qps) + client_duration = client['duration'] + client_num_conc_conns = client['num_concurrent_connections'] + + return RunnerConfig( + topology_paths=topology_paths, + environments=environments, + istio_archive_url=istio_archive_url, + cluster_project_id=cluster_project_id, + cluster_name=cluster_name, + cluster_zones=cluster_zones, + cluster_version=cluster_version, + server_machine_type=server_machine_type, + server_disk_size_gb=server_disk_size_gb, + server_image=server_image, + server_num_nodes=server_num_nodes, + client_machine_type=client_machine_type, + client_disk_size_gb=client_disk_size_gb, + client_image=client_image, + client_qps=client_qps, + client_duration=client_duration, + client_num_conc_conns=client_num_conc_conns) + + +def from_toml_file(path: str) -> RunnerConfig: + d = toml.load(path) + return from_dict(d) diff --git a/isotope/runner/consts.py b/isotope/runner/consts.py new file mode 100644 index 0000000000..215fa6b21c --- /dev/null +++ b/isotope/runner/consts.py @@ -0,0 +1,35 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Common constants used throughout the runner module.""" + +import datetime + +DEFAULT_NAMESPACE = 'default' +MONITORING_NAMESPACE = 'monitoring' +ISTIO_NAMESPACE = 'istio-system' +SERVICE_GRAPH_NAMESPACE = 'default' + +DEFAULT_NODE_POOL_NAME = 'default-pool' +SERVICE_GRAPH_NODE_POOL_NAME = 'service-graph-pool' +CLIENT_NODE_POOL_NAME = 'client-pool' +CLIENT_NAME = 'client' +CLIENT_PORT = 8080 +SERVICE_GRAPH_SERVICE_SELECTOR = 'role=service' +SERVICE_PORT = 8080 +ISTIO_INGRESS_GATEWAY_PORT = 80 + +PROMETHEUS_SCRAPE_INTERVAL = datetime.timedelta(seconds=30) + +ISTIO_TELEMETRY_PORT = 42422 diff --git a/isotope/runner/entrypoint.py b/isotope/runner/entrypoint.py new file mode 100644 index 0000000000..f860d29a98 --- /dev/null +++ b/isotope/runner/entrypoint.py @@ -0,0 +1,42 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Read topology YAML and extract information about the entrypoint service.""" + +import yaml + +from . import consts + + +def extract_name(topology_path: str) -> str: + """Returns the name of the entrypoint service in the topology.""" + with open(topology_path, 'r') as f: + topology = yaml.load(f, Loader=yaml.FullLoader) + + services = topology['services'] + entrypoint_services = [svc for svc in services if svc.get('isEntrypoint')] + if len(entrypoint_services) != 1: + raise ValueError( + 'topology at {} should only have one entrypoint'.format( + topology_path)) + entrypoint_name = entrypoint_services[0]['name'] + return entrypoint_name + + +def extract_url(topology_path: str) -> str: + """Returns the in-cluster URL to access the service graph's entrypoint.""" + entrypoint_name = extract_name(topology_path) + url = 'http://{}.{}.svc.cluster.local:{}'.format( + entrypoint_name, consts.SERVICE_GRAPH_NAMESPACE, consts.SERVICE_PORT) + return url diff --git a/isotope/runner/entrypoint_test.py b/isotope/runner/entrypoint_test.py new file mode 100644 index 0000000000..ac8e0f54a5 --- /dev/null +++ b/isotope/runner/entrypoint_test.py @@ -0,0 +1,61 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import textwrap +from unittest import mock + +import pytest + +from . import entrypoint + + +def test_extract_url_should_return_service_with_is_entrypoint() -> None: + contents = textwrap.dedent("""\ + services: + - name: b + - name: a + isEntrypoint: true + """) + + expected = 'http://a.service-graph.svc.cluster.local:8080' + with mock.patch('builtins.open', mock.mock_open(read_data=contents)): + actual = entrypoint.extract_url('fake-file.yaml') + + assert expected == actual + + +def test_extract_url_should_fail_with_no_entrypoints() -> None: + contents = textwrap.dedent("""\ + services: + - name: b + - name: a + """) + + with mock.patch('builtins.open', mock.mock_open(read_data=contents)): + with pytest.raises(ValueError): + entrypoint.extract_url('fake-file.yaml') + + +def test_extract_url_should_fail_with_multiple_entrypoints() -> None: + contents = textwrap.dedent("""\ + services: + - name: b + isEntrypoint: true + - name: a + isEntrypoint: true + """) + + with mock.patch('builtins.open', mock.mock_open(read_data=contents)): + with pytest.raises(ValueError): + entrypoint.extract_url('fake-file.yaml') diff --git a/isotope/runner/istio.py b/isotope/runner/istio.py new file mode 100644 index 0000000000..a201ff13c1 --- /dev/null +++ b/isotope/runner/istio.py @@ -0,0 +1,203 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for manipulating the Istio environment.""" +from __future__ import print_function + +import contextlib +import logging +import os +import tarfile +import tempfile +from typing import Any, Dict, Generator + +import yaml +import time + +from . import consts, kubectl, resources, sh, wait + +DAILY_BUILD_URL = "https://storage.googleapis.com/istio-prerelease/daily-build" + + +def convert_archive(archive_url: str) -> str: + """Convert symbolic archive into archive url + + """ + if archive_url.startswith("http"): + return archive_url + + full_name = "{}-09-15".format(archive_url) + + return "{daily}/{full_name}/istio-{full_name}-linux-amd64.tar.gz".format( + daily=DAILY_BUILD_URL, full_name=full_name) + + +def set_up(entrypoint_service_name: str, entrypoint_service_namespace: str, + archive_url: str, values: str) -> None: + """Installs Istio from the archive URL. + + This downloads and extracts the archive in a temporary directory, then + installs the resources via `istioctl install` and `kubectl apply`. + """ + archive_url = convert_archive(archive_url) + + print(("Using archive_url", archive_url)) + + with tempfile.TemporaryDirectory() as tmp_dir_path: + archive_path = os.path.join(tmp_dir_path, 'istio.tar.gz') + _download(archive_url, archive_path) + + extracted_dir_path = os.path.join(tmp_dir_path, 'istio') + extracted_istio_path = _extract(archive_path, extracted_dir_path) + + istioctl_path = os.path.join(extracted_istio_path, 'bin', + 'istioctl') + + chart_path = os.path.join(extracted_istio_path, 'install', + 'kubernetes', 'helm', 'istio') + _install_istio(istioctl_path) + + _create_ingress_rules(entrypoint_service_name, + entrypoint_service_namespace) + + +def get_ingress_gateway_url() -> str: + ip = wait.until_output([ + 'kubectl', '--namespace', consts.ISTIO_NAMESPACE, 'get', 'service', + 'istio-ingressgateway', '-o', + 'jsonpath={.status.loadBalancer.ingress[0].ip}' + ]) + return 'http://{}:{}'.format(ip, consts.ISTIO_INGRESS_GATEWAY_PORT) + + +def _download(archive_url: str, path: str) -> None: + logging.info('downloading %s', archive_url) + sh.run(['curl', '-L', '--output', path, archive_url]) + + +def _extract(archive_path: str, extracted_dir_path: str) -> str: + """Extracts the .tar.gz at archive_path to extracted_dir_path. + + Args: + archive_path: path to a .tar.gz archive file, containing a single + directory when extracted + extracted_dir_path: the destination in which to extract the contents + of the archive + + Returns: + the path to the single directory the archive contains + """ + with tarfile.open(archive_path) as tar: + tar.extractall(path=extracted_dir_path) + extracted_items = os.listdir(extracted_dir_path) + if len(extracted_items) != 1: + raise ValueError( + 'archive at {} did not contain a single directory'.format( + archive_path)) + return os.path.join(extracted_dir_path, extracted_items[0]) + + +def _install_istio(istioctl_path) -> None: + logging.info('Running istioctl install') + install = sh.run([istioctl_path, + 'install', + '--set', + 'profile=default', + '--skip-confirmation']) + + +@contextlib.contextmanager +def _work_dir(path: str) -> Generator[None, None, None]: + prev_path = os.getcwd() + if not os.path.exists(path): + os.makedirs(path) + os.chdir(path) + yield + os.chdir(prev_path) + + +def _create_ingress_rules(entrypoint_service_name: str, + entrypoint_service_namespace: str) -> None: + logging.info('creating istio ingress rules') + ingress_yaml = _get_ingress_yaml(entrypoint_service_name, + entrypoint_service_namespace) + kubectl.apply_text( + ingress_yaml, intermediate_file_path=resources.ISTIO_INGRESS_YAML_PATH) + + +def _get_ingress_yaml(entrypoint_service_name: str, + entrypoint_service_namespace: str) -> str: + gateway = _get_gateway_dict() + virtual_service = _get_virtual_service_dict(entrypoint_service_name, + entrypoint_service_namespace) + return yaml.dump_all([gateway, virtual_service], default_flow_style=False) + + +def _get_gateway_dict() -> Dict[str, Any]: + return { + 'apiVersion': 'networking.istio.io/v1alpha3', + 'kind': 'Gateway', + 'metadata': { + 'name': 'entrypoint-gateway', + }, + 'spec': { + 'selector': { + 'istio': 'ingressgateway', + }, + 'servers': [{ + 'hosts': ['*'], + 'port': { + 'name': 'http', + 'number': consts.ISTIO_INGRESS_GATEWAY_PORT, + 'protocol': 'HTTP', + }, + }], + }, + } + + +def _get_virtual_service_dict( + entrypoint_service_name: str, + entrypoint_service_namespace: str) -> Dict[str, Any]: + return { + 'apiVersion': 'networking.istio.io/v1alpha3', + 'kind': 'VirtualService', + 'metadata': { + 'name': 'entrypoint', + }, + 'spec': { + 'hosts': ['*'], + 'gateways': ['entrypoint-gateway'], + 'http': [{ + 'route': [{ + 'destination': { + 'host': + '{}.{}.svc.cluster.local'.format( + entrypoint_service_name, + entrypoint_service_namespace), + 'port': { + 'number': consts.SERVICE_PORT, + }, + }, + }], + }], + }, + } + + +def tear_down() -> None: + """Deletes the Istio resources and namespace.""" + sh.run_kubectl(['delete', '-f', resources.ISTIO_GEN_YAML_PATH]) + sh.run_kubectl(['delete', 'namespace', consts.ISTIO_NAMESPACE]) + wait.until_namespace_is_deleted(consts.SERVICE_GRAPH_NAMESPACE) diff --git a/isotope/runner/kubectl.py b/isotope/runner/kubectl.py new file mode 100644 index 0000000000..725a28bc9e --- /dev/null +++ b/isotope/runner/kubectl.py @@ -0,0 +1,124 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Abstractions for common calls to kubectl.""" + +import contextlib +import logging +import socket +import subprocess +import tempfile +import time +from typing import Any, Dict, Generator, List + +import yaml + +from . import sh + + +@contextlib.contextmanager +def manifest(path: str, cleanup=False) -> Generator[None, None, None]: + """Runs `kubectl apply -f path` on entry and opposing delete on exit.""" + try: + apply_file(path) + yield + finally: + if cleanup: + delete_file(path) + + +def apply_file(path: str) -> None: + sh.run_kubectl(['apply', '-f', path], check=True) + + +def delete_file(path: str) -> None: + sh.run_kubectl(['delete', '-f', path]) + + +def apply_dicts(dicts: List[Dict[str, Any]], + intermediate_file_path: str = None) -> None: + yaml_str = yaml.dump_all(dicts) + apply_text(yaml_str, intermediate_file_path=intermediate_file_path) + + +def apply_text(json_or_yaml: str, intermediate_file_path: str = None) -> None: + """Creates/updates resources described in either JSON or YAML string. + + Uses `kubectl apply -f FILE`. + + Args: + json_or_yaml: contains either the JSON or YAML manifest of the + resource(s) to apply; applied through an intermediate file + intermediate_file_path: if set, defines the file to write to (useful + for debugging); otherwise, uses a temporary file + """ + if intermediate_file_path is None: + opener = tempfile.NamedTemporaryFile(mode='w+') + else: + opener = open(intermediate_file_path, 'w+') + + with opener as f: + f.write(json_or_yaml) + f.flush() + apply_file(f.name) + + +@contextlib.contextmanager +def port_forward(label_key: str, label_value: str, target_port: int, + namespace: str) -> Generator[int, None, None]: + """Port forwards the first pod matching label, yielding the open port.""" + # TODO: Catch error if label matches zero pods. + pod_name = sh.run_kubectl( + [ + 'get', 'pod', '-l{}={}'.format(label_key, label_value), + '-o=jsonpath={.items[0].metadata.name}', '--namespace', namespace + ], + check=True).stdout + local_port = _get_open_port() + proc = subprocess.Popen( + [ + 'kubectl', '--namespace', namespace, 'port-forward', pod_name, + '{}:{}'.format(local_port, target_port) + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + try: + # proc.communicate waits until the process terminates or timeout. + stdout, stderr_bytes = proc.communicate(timeout=1) + + # If proc terminates after 1 second, assume that an error occurred. + stderr = stderr_bytes.decode('utf-8') if stderr_bytes else '' + info = ': {}'.format(stderr) if stderr else '' + msg = 'could not port-forward to {}:{} on local port {}{}'.format( + pod_name, target_port, local_port, info) + raise RuntimeError(msg) + except subprocess.TimeoutExpired: + # If proc is still running after 1 second, check that it is + # forwarding...and assume that proc will + # continue port forwarding until termination, as expected. + pass + + yield local_port + + proc.terminate() + + +# Adapted from +# https://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python. +def _get_open_port() -> int: + sock = socket.socket() + sock.bind(('', 0)) + _, port = sock.getsockname() + return port diff --git a/isotope/runner/md5.py b/isotope/runner/md5.py new file mode 100644 index 0000000000..70b977acee --- /dev/null +++ b/isotope/runner/md5.py @@ -0,0 +1,24 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib + + +def hex(path: str) -> str: + """Returns the hex of the MD5 hash of the file at path's contents.""" + hash_md5 = hashlib.md5() + with open(path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + hash_md5.update(chunk) + return hash_md5.hexdigest() diff --git a/isotope/runner/mesh.py b/isotope/runner/mesh.py new file mode 100644 index 0000000000..99ab7e9e75 --- /dev/null +++ b/isotope/runner/mesh.py @@ -0,0 +1,101 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Definitions for abstracting a mesh environment underlying a topology. + +Any new environments should add an entry to the if-else chain in `for_state` +and define three functions: + +1. set_up: creates the necessary Kubernetes resources +2. get_ingress_url: returns the URL to access the ingress or entrypoint +3. tear_down: deletes the previously created resources + +These three functions make it simple for use in a with-statement. +""" +from __future__ import print_function + +import contextlib +from typing import Callable, Generator + +from . import config, consts, istio as istio_lib + + +class Environment: + """Bundles functions to set up, tear down, and interface with a mesh.""" + + def __init__(self, name: str, set_up: Callable[[], None], + tear_down: Callable[[], None], + get_ingress_url: Callable[[], str]) -> None: + self.name = name + self.set_up = set_up + self.tear_down = tear_down + self.get_ingress_url = get_ingress_url + + @contextlib.contextmanager + def context(self) -> Generator[str, None, None]: + try: + self.set_up() + yield self.get_ingress_url() + finally: + self.tear_down() + + +def none(entrypoint_service_name: str, entrypoint_service_port: int, + entrypoint_service_namespace: str) -> Environment: + def get_ingress_url() -> str: + return 'http://{}.{}.svc.cluster.local:{}'.format( + entrypoint_service_name, entrypoint_service_namespace, + entrypoint_service_port) + + return Environment( + name='none', + set_up=_do_nothing, + tear_down=_do_nothing, + get_ingress_url=get_ingress_url) + + +def istio(entrypoint_service_name: str, entrypoint_service_namespace: str, + archive_url: str, values: str, tear_down=False) -> Environment: + def set_up() -> None: + istio_lib.set_up(entrypoint_service_name, entrypoint_service_namespace, + archive_url, values) + + td = _do_nothing + if tear_down: + td = istio_lib.tear_down + return Environment( + name='istio', + set_up=set_up, + tear_down=td, + get_ingress_url=istio_lib.get_ingress_gateway_url) + + +def for_state(name: str, entrypoint_service_name: str, + entrypoint_service_namespace: str, + config: config.RunnerConfig, values: str) -> Environment: + if name == 'NONE': + env = none(entrypoint_service_name, consts.SERVICE_PORT, + consts.SERVICE_GRAPH_NAMESPACE) + elif name == 'ISTIO': + env = istio(entrypoint_service_name, entrypoint_service_namespace, + config.istio_archive_url, values) + else: + raise ValueError('{} is not a known environment'.format(name)) + + return env + + +def _do_nothing(): + print("empty teardown") + pass diff --git a/isotope/runner/mesh_test.py b/isotope/runner/mesh_test.py new file mode 100644 index 0000000000..84db58e0c0 --- /dev/null +++ b/isotope/runner/mesh_test.py @@ -0,0 +1,30 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +from . import mesh + + +def test_context_should_call_functions(): + set_up = mock.MagicMock() + tear_down = mock.MagicMock() + ingress_url = 'http://example.com' + get_ingress_url = mock.MagicMock(return_value=ingress_url) + env = mesh.Environment('', set_up, tear_down, get_ingress_url) + with env.context() as url: + set_up.assert_called_once_with() + get_ingress_url.assert_called_once_with() + assert url == ingress_url + tear_down.assert_called_once_with() diff --git a/isotope/runner/pipeline.py b/isotope/runner/pipeline.py new file mode 100644 index 0000000000..bfcfcb2e2b --- /dev/null +++ b/isotope/runner/pipeline.py @@ -0,0 +1,216 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines run which follows the testing pipeline after cluster creation.""" + +import contextlib +import logging +import os +import time +import re +from typing import Dict, Generator, Optional + +import requests + +from . import consts, entrypoint, istio, kubectl, md5, mesh, prometheus, \ + resources, sh, wait + +_REPO_ROOT = os.path.join(os.getcwd(), + os.path.dirname(os.path.dirname(__file__))) +_MAIN_GO_PATH = os.path.join(_REPO_ROOT, 'convert', 'main.go') + + +def _parse_namespace_from_topology_line(line: str) -> str: + pattern = r"\s*namespace: (.+)" + match = re.search(pattern, line) + if match: + return match.group(1) + return None + + +def _kubectl_create_namespaces(namespaces: list[str]) -> bool: + for ns in namespaces: + gen = sh.run( + [ + 'kubectl', 'create', 'ns', ns + ], + check=False) + + +def run(topology_path: str, env: mesh.Environment, service_image: str, + client_image: str, istio_archive_url: str, test_qps: Optional[int], + test_duration: str, test_num_concurrent_connections: int, + static_labels: Dict[str, str], deploy_prometheus=False) -> None: + """Runs a load test on the topology in topology_path with the environment. + + Args: + topology_path: the path to the file containing the topology + env: the pre-existing mesh environment for the topology (i.e. Istio) + service_image: the Docker image to represent each node in the topology + client_image: the Docker image which can run a load test (i.e. Fortio) + istio_archive_url: URL to access a released tar.gz archive via an + HTTP GET request + test_qps: the target QPS for the client; None = max + test_duration: the duration for the client to run + test_num_concurrent_connections: the number of simultaneous connections + for the client to make + static_labels: labels to add to each Prometheus monitor + """ + + manifest_path = _gen_yaml(topology_path, service_image, + test_num_concurrent_connections, client_image, + env.name) + + # Parsing out the namespaces that will be used in the service graph. + # Currently do nothing with this except use them to scope the + # pod health checks. + service_graph_namespaces = ["default"] + with open(topology_path, 'r') as f: + for line in f: + namespace = _parse_namespace_from_topology_line(line) + if namespace: + service_graph_namespaces.append(namespace) + logging.info('topology file contains the following service-graph namespaces: "%s"', service_graph_namespaces) + + # TODO: this should not be necessary, but the namespaces are not part of convert output. + _kubectl_create_namespaces(service_graph_namespaces) + + topology_name = _get_basename_no_ext(topology_path) + labels = { + 'environment': env.name, + 'topology_name': topology_name, + 'topology_hash': md5.hex(topology_path), + **static_labels, + } + if deploy_prometheus: + prometheus.apply( + labels, + intermediate_file_path=resources.PROMETHEUS_VALUES_GEN_YAML_PATH) + + with env.context() as ingress_url: + logging.info('starting test with environment "%s"', env.name) + result_output_path = '{}_{}.json'.format(topology_name, env.name) + + _test_service_graph(manifest_path, result_output_path, ingress_url, + test_qps, test_duration, + test_num_concurrent_connections, + service_graph_namespaces) + + +def _get_basename_no_ext(path: str) -> str: + basename = os.path.basename(path) + return os.path.splitext(basename)[0] + + +def _gen_yaml(topology_path: str, service_image: str, + max_idle_connections_per_host: int, client_image: str, + env_name: str) -> str: + """Converts topology_path to Kubernetes manifests. + + The neighboring Go command in convert/ handles this operation. + + Args: + topology_path: the path containing the topology YAML + service_image: the Docker image to represent each node in the topology; + passed to the Go command + client_image: the Docker image which can run a load test (i.e. Fortio); + passed to the Go command + env_name: the environment name (i.e. "NONE" or "ISTIO") + """ + logging.info('generating Kubernetes manifests from %s', topology_path) + service_graph_node_selector = _get_gke_node_selector( + consts.SERVICE_GRAPH_NODE_POOL_NAME) + client_node_selector = _get_gke_node_selector(consts.CLIENT_NODE_POOL_NAME) + gen = sh.run( + [ + 'go', 'run', _MAIN_GO_PATH, 'kubernetes', '--service-image', + service_image, '--service-max-idle-connections-per-host', + str(max_idle_connections_per_host), '--client-image', client_image, + "--environment-name", env_name, + '--service-node-selector', service_graph_node_selector, + '--client-node-selector', client_node_selector, + topology_path, + ], + check=True) + with open(resources.SERVICE_GRAPH_GEN_YAML_PATH, 'w') as f: + f.write(gen.stdout) + + return resources.SERVICE_GRAPH_GEN_YAML_PATH + + +def _get_gke_node_selector(node_pool_name: str) -> str: + return 'cloud.google.com/gke-nodepool={}'.format(node_pool_name) + + +def _test_service_graph(yaml_path: str, test_result_output_path: str, + test_target_url: str, test_qps: Optional[int], + test_duration: str, + test_num_concurrent_connections: int, + service_graph_namespaces: list[str]) -> None: + """Deploys the service graph at yaml_path and runs a load test on it.""" + # TODO: extract to env.context, with entrypoint hostname as the ingress URL + with kubectl.manifest(yaml_path): + wait.until_service_graph_is_ready(service_graph_namespaces) + + _run_load_test(test_result_output_path, test_target_url, test_qps, + test_duration, test_num_concurrent_connections) + + wait.until_prometheus_has_scraped() + + +def _run_load_test(result_output_path: str, test_target_url: str, + test_qps: Optional[int], test_duration: str, + test_num_concurrent_connections: int) -> None: + """Sends an HTTP request to the client; expecting a JSON response. + + The HTTP request's query string contains the necessary info to perform + the load test, adapted from the arguments described in + https://github.com/istio/istio/blob/master/tools/README.md#run-the-functions. + + Args: + result_output_path: the path to write the JSON output. + test_target_url: the in-cluster URL to + test_qps: the target QPS for the client; None = max + test_duration: the duration for the client to run + test_num_concurrent_connections: the number of simultaneous connections + for the client to make + """ + logging.info('starting load test') + with kubectl.port_forward("app", consts.CLIENT_NAME, consts.CLIENT_PORT, + consts.DEFAULT_NAMESPACE) as local_port: + qps = -1 if test_qps is None else test_qps # -1 indicates max QPS. + url = ('http://localhost:{}/fortio' + '?json=on&qps={}&t={}&c={}&load=Start&url={}').format( + local_port, qps, test_duration, + test_num_concurrent_connections, test_target_url) + result = _http_get_json(url) + _write_to_file(result_output_path, result) + + +def _http_get_json(url: str) -> str: + """Sends an HTTP GET request to url, returning its JSON response.""" + response = None + while response is None: + try: + logging.info('sending request: %s', url) + response = requests.get(url) + except (requests.ConnectionError, requests.HTTPError) as e: + logging.error('%s; retrying request to %s', e, url) + return response.text + + +def _write_to_file(path: str, contents: str) -> None: + logging.debug('writing contents to %s', path) + with open(path, 'w') as f: + f.writelines(contents) diff --git a/isotope/runner/prometheus.py b/isotope/runner/prometheus.py new file mode 100644 index 0000000000..62cd1edea9 --- /dev/null +++ b/isotope/runner/prometheus.py @@ -0,0 +1,168 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for creating and updating the Prometheus configuration.""" + +import logging +import time +import tempfile +import textwrap +from typing import Any, Dict, List + +import yaml + +from . import consts, sh, wait + +_HELM_RELEASE_NAME = 'kube-prometheus' + + +def apply(labels: Dict[str, str] = {}, + intermediate_file_path: str = None) -> None: + """Creates or updates Prometheus values to add labels to all metrics. + + Args: + json_or_yaml: contains either the JSON or YAML manifest of the + resource(s) to apply; applied through an intermediate file + intermediate_file_path: if set, defines the file to write to (useful + for debugging); otherwise, uses a temporary file + """ + logging.info('applying Prometheus configuration') + + config = _get_values(labels) + values_yaml = yaml.dump(config, default_flow_style=False) + + if intermediate_file_path is None: + opener = tempfile.NamedTemporaryFile(mode='w+') + else: + opener = open(intermediate_file_path, 'w+') + + with opener as f: + f.write(values_yaml) + f.flush() + + _apply_prometheus_values(f.name) + + +def _apply_prometheus_values(path: str) -> None: + proc = sh.run_with_k8s_api(['helm', 'get', _HELM_RELEASE_NAME]) + already_exists = proc.returncode == 0 + if already_exists: + _update_prometheus(path) + else: + _install_prometheus(path) + + +def _update_prometheus(values_path: str) -> None: + logging.debug('updating coreos/kube-prometheus') + sh.run_with_k8s_api( + [ + 'helm', 'upgrade', _HELM_RELEASE_NAME, 'coreos/kube-prometheus', + '--values', values_path + ], + check=True) + # TODO: Should wait until Prometheus is actually updated. + time.sleep(5 * 60) + + +def _install_prometheus(values_path: str) -> None: + logging.debug('installing coreos/kube-prometheus') + sh.run_with_k8s_api( + [ + 'helm', 'install', 'coreos/kube-prometheus', '--name', + _HELM_RELEASE_NAME, '--namespace', consts.MONITORING_NAMESPACE, + '--values', values_path + ], + check=True) + wait.until_stateful_sets_are_ready(consts.MONITORING_NAMESPACE) + + +def _get_values(labels: Dict[str, str]) -> Dict[str, Any]: + return { + 'deployAlertManager': False, + 'deployExporterNode': True, + 'deployGrafana': True, + 'deployKubeControllerManager': True, + 'deployKubeDNS': True, + 'deployKubeEtcd': True, + 'deployKubelets': True, + 'deployKubeScheduler': True, + 'deployKubeState': True, + 'exporter-kubelets': { + # Must be false for GKE. + 'https': False, + }, + 'prometheus': _get_prometheus_config(labels) + } + + +def _get_prometheus_config(labels: Dict[str, str]) -> Dict[str, Any]: + metric_relabelings = _get_metric_relabelings(labels) + return { + 'serviceMonitors': [ + _get_service_monitor('service-graph-monitor', 8080, + consts.SERVICE_GRAPH_NAMESPACE, + {'app': 'service-graph'}, metric_relabelings), + _get_service_monitor('client-monitor', 42422, + consts.DEFAULT_NAMESPACE, {'app': 'client'}, + metric_relabelings), + _get_service_monitor('istio-mixer-monitor', 42422, + consts.ISTIO_NAMESPACE, {'istio': 'mixer'}, + metric_relabelings), + ], + 'storageSpec': + _get_storage_spec(), + } + + +def _get_service_monitor( + name: str, port: int, namespace: str, match_labels: Dict[str, str], + metric_relabelings: List[Dict[str, Any]]) -> Dict[str, Any]: + return { + 'name': + name, + 'endpoints': [{ + 'targetPort': port, + 'metricRelabelings': metric_relabelings, + }], + 'namespaceSelector': { + 'matchNames': [namespace], + }, + 'selector': { + 'matchLabels': match_labels, + }, + } + + +def _get_metric_relabelings(labels: Dict[str, str]) -> List[Dict[str, Any]]: + return [{ + 'targetLabel': key, + 'replacement': value, + } for key, value in labels.items()] + + +def _get_storage_spec() -> Dict[str, Any]: + return { + 'volumeClaimTemplate': { + 'spec': { + 'accessModes': ['ReadWriteOnce'], + 'resources': { + 'requests': { + 'storage': '10G', + }, + }, + 'volumeName': 'prometheus-persistent-volume', + 'storageClassName': '', + }, + }, + } diff --git a/isotope/runner/prometheus_test.py b/isotope/runner/prometheus_test.py new file mode 100644 index 0000000000..59e62ccde5 --- /dev/null +++ b/isotope/runner/prometheus_test.py @@ -0,0 +1,126 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml + +from . import prometheus + + +def test_values_should_return_correct_yaml(): + expected = { + 'prometheus': { + 'storageSpec': { + 'volumeClaimTemplate': { + 'spec': { + 'accessModes': ['ReadWriteOnce'], + 'resources': { + 'requests': { + 'storage': '10G' + } + }, + 'storageClassName': '', + 'volumeName': 'prometheus-persistent-volume' + } + } + }, + 'serviceMonitors': [{ + 'endpoints': [{ + 'metricRelabelings': [{ + 'replacement': 'tjberry', + 'targetLabel': 'user' + }, { + 'replacement': 'stuff', + 'targetLabel': 'custom' + }], + 'targetPort': + 8080 + }], + 'namespaceSelector': { + 'matchNames': ['service-graph'] + }, + 'selector': { + 'matchLabels': { + 'app': 'service-graph' + } + }, + 'name': + 'service-graph-monitor' + }, { + 'endpoints': [{ + 'metricRelabelings': [{ + 'replacement': 'tjberry', + 'targetLabel': 'user' + }, { + 'replacement': 'stuff', + 'targetLabel': 'custom' + }], + 'targetPort': + 42422 + }], + 'namespaceSelector': { + 'matchNames': ['default'] + }, + 'selector': { + 'matchLabels': { + 'app': 'client' + } + }, + 'name': + 'client-monitor' + }, { + 'endpoints': [{ + 'metricRelabelings': [{ + 'replacement': 'tjberry', + 'targetLabel': 'user' + }, { + 'replacement': 'stuff', + 'targetLabel': 'custom' + }], + 'targetPort': + 42422 + }], + 'namespaceSelector': { + 'matchNames': ['istio-system'] + }, + 'selector': { + 'matchLabels': { + 'istio': 'mixer' + } + }, + 'name': + 'istio-mixer-monitor' + }] + }, + 'deployExporterNode': True, + 'deployAlertManager': False, + 'deployKubeState': True, + 'deployKubeDNS': True, + 'deployKubeScheduler': True, + 'deployKubelets': True, + 'deployGrafana': True, + 'deployKubeControllerManager': True, + 'deployKubeEtcd': True, + 'exporter-kubelets': { + 'https': False + } + } + + labels = { + 'user': 'tjberry', + 'custom': 'stuff', + } + + actual = prometheus._get_values(labels) + + assert expected == actual diff --git a/isotope/runner/requirements.txt b/isotope/runner/requirements.txt new file mode 100644 index 0000000000..737e80c436 --- /dev/null +++ b/isotope/runner/requirements.txt @@ -0,0 +1,4 @@ +pytest +pyyaml +requests +toml diff --git a/isotope/runner/resources/.gitignore b/isotope/runner/resources/.gitignore new file mode 100644 index 0000000000..84168613a1 --- /dev/null +++ b/isotope/runner/resources/.gitignore @@ -0,0 +1 @@ +*.gen.yaml diff --git a/isotope/runner/resources/__init__.py b/isotope/runner/resources/__init__.py new file mode 100644 index 0000000000..68f92b8509 --- /dev/null +++ b/isotope/runner/resources/__init__.py @@ -0,0 +1,33 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines constant paths for static and generated YAML.""" + +import os + +_RESOURCES_DIR = os.path.realpath( + os.path.join(os.getcwd(), os.path.dirname(__file__))) + +HELM_SERVICE_ACCOUNT_YAML_PATH = os.path.join(_RESOURCES_DIR, + 'helm-service-account.yaml') +PERSISTENT_VOLUME_YAML_PATH = os.path.join(_RESOURCES_DIR, + 'persistent-volume.yaml') + +PROMETHEUS_VALUES_GEN_YAML_PATH = os.path.join(_RESOURCES_DIR, + 'values-prometheus.gen.yaml') +SERVICE_GRAPH_GEN_YAML_PATH = os.path.join(_RESOURCES_DIR, + 'service-graph.gen.yaml') +ISTIO_GEN_YAML_PATH = os.path.join(_RESOURCES_DIR, 'istio.gen.yaml') +ISTIO_INGRESS_YAML_PATH = os.path.join(_RESOURCES_DIR, + 'istio-ingress.gen.yaml') diff --git a/isotope/runner/resources/helm-service-account.yaml b/isotope/runner/resources/helm-service-account.yaml new file mode 100644 index 0000000000..0e1b0838d0 --- /dev/null +++ b/isotope/runner/resources/helm-service-account.yaml @@ -0,0 +1,21 @@ +# Create a service account for Helm and grant the cluster admin role. +# It is assumed that helm should be installed with this service account +# (tiller). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: tiller + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: tiller +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: tiller + namespace: kube-system diff --git a/isotope/runner/resources/persistent-volume.yaml b/isotope/runner/resources/persistent-volume.yaml new file mode 100644 index 0000000000..fb067e6384 --- /dev/null +++ b/isotope/runner/resources/persistent-volume.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: prometheus-persistent-volume +spec: + storageClassName: "" + capacity: + storage: 1000G + accessModes: + - ReadWriteOnce + gcePersistentDisk: + pdName: prometheus-pd + fsType: ext4 diff --git a/isotope/runner/sh.py b/isotope/runner/sh.py new file mode 100644 index 0000000000..7282bccab9 --- /dev/null +++ b/isotope/runner/sh.py @@ -0,0 +1,93 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Abstractions for common shell calls.""" + +import contextlib +import logging +import subprocess +from typing import Dict, List, Union + +from . import wait + + +def run_gcloud(args: List[str], check=False) -> subprocess.CompletedProcess: + return run(['gcloud', *args], check=check) + + +def run_kubectl(args: List[str], check=False) -> subprocess.CompletedProcess: + return run_with_k8s_api(['kubectl', *args], check=check) + + +def run_with_k8s_api(args: List[str], + check=False) -> subprocess.CompletedProcess: + """Ensures the command succeeds against a responsive Kubernetes API.""" + proc = run(args) + + # Retry while the error is because of connection refusal. + while 'getsockopt' in proc.stderr: + logging.debug('Kubernetes connection failed; retrying...') + # Wait until `kubectl version` completes, indicating the + # Kubernetes API is responsive. + wait.until( + lambda: run_kubectl(['version']).returncode == 0, + retry_interval_seconds=5) + proc = run(args) + + if check and proc.returncode != 0: + logging.error('%s\n%s\n%s', proc, proc.stdout, proc.stderr) + raise subprocess.CalledProcessError( + proc.returncode, proc.args, output=proc.stdout, stderr=proc.stderr) + + return proc + + +def run(args: List[str], check=False, + env: Dict[str, str] = None) -> subprocess.CompletedProcess: + """Delegates to subprocess.run, capturing stdout and stderr. + + Args: + args: the list of args, with the command as the first item + check: if True, raises an exception if the command returns non-zero + env: the environment variables to set during the command's runtime + + Returns: + A completed process, with stdout and stderr decoded as UTF-8 strings. + """ + logging.debug('%s', args) + + try: + proc = subprocess.run( + args, + check=check, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + except subprocess.CalledProcessError as e: + _decode(e) + if check: + logging.error('%s\n%s\n%s', e, e.stdout, e.stderr) + raise e + + _decode(proc) + return proc + + +def _decode( + proc: Union[subprocess.CompletedProcess, subprocess.CalledProcessError] +) -> None: + if proc.stdout is not None: + proc.stdout = proc.stdout.decode('utf-8').strip() + if proc.stderr is not None: + proc.stderr = proc.stderr.decode('utf-8').strip() diff --git a/isotope/runner/test.heap b/isotope/runner/test.heap new file mode 100644 index 0000000000..e69de29bb2 diff --git a/isotope/runner/wait.py b/isotope/runner/wait.py new file mode 100644 index 0000000000..2d086dfb38 --- /dev/null +++ b/isotope/runner/wait.py @@ -0,0 +1,122 @@ +# Copyright Istio Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions which block until certain conditions.""" + +import collections +import datetime +import logging +import subprocess +import time +from typing import Callable, List + +from . import consts, sh + +RETRY_INTERVAL = datetime.timedelta(seconds=5) + + +def until(predicate: Callable[[], bool], + retry_interval_seconds: int = RETRY_INTERVAL.seconds) -> None: + """Calls predicate every RETRY_INTERVAL until it returns True.""" + while not predicate(): + time.sleep(retry_interval_seconds) + + +def until_output(args: List[str]) -> str: + output = None + while output is None: + stdout = sh.run(args).stdout + if stdout: + output = stdout + else: + time.sleep(RETRY_INTERVAL.seconds) + return output + + +def _until_rollouts_complete(resource_type: str, namespace: str) -> None: + proc = sh.run_kubectl( + [ + '--namespace', namespace, 'get', resource_type, '-o', + 'jsonpath={.items[*].metadata.name}' + ], + check=True) + resources = collections.deque(proc.stdout.split(' ')) + logging.info('waiting for %ss in %s (%s) to rollout', resource_type, + namespace, ', '.join(resources)) + while len(resources) > 0: + resource = resources.popleft() + try: + # kubectl blocks until ready. + sh.run_kubectl( + [ + '--namespace', namespace, 'rollout', 'status', + resource_type, resource + ], + check=True) + except subprocess.CalledProcessError as e: + msg = 'failed to check rollout status of {}'.format(resource) + if 'watch closed' in e.stderr: + logging.debug('%s; retrying later', msg) + resources.append(resource) + else: + logging.error(msg) + + +def until_deployments_are_ready( + namespace: str = consts.DEFAULT_NAMESPACE) -> None: + """Blocks until namespace's deployments' rollout statuses are complete.""" + _until_rollouts_complete('deployment', namespace) + + +def until_stateful_sets_are_ready( + namespace: str = consts.DEFAULT_NAMESPACE) -> None: + """Blocks until namespace's statefulsets' rollout statuses are complete.""" + _until_rollouts_complete('statefulset', namespace) + + +def until_prometheus_has_scraped() -> None: + logging.info('allowing Prometheus time to scrape final metrics') + # Add 5 seconds for more confidence that responses to "/metrics" complete. + time.sleep(consts.PROMETHEUS_SCRAPE_INTERVAL.seconds + 5) + + +def until_namespace_is_deleted( + namespace: str = consts.DEFAULT_NAMESPACE) -> None: + """Blocks until `kubectl get namespace` returns an error.""" + until(lambda: _namespace_is_deleted(namespace)) + + +def _namespace_is_deleted(namespace: str = consts.DEFAULT_NAMESPACE) -> bool: + proc = sh.run_kubectl(['get', 'namespace', namespace]) + return proc.returncode != 0 + + +def until_service_graph_is_ready(service_graph_namespaces: list[str]) -> None: + """Blocks until each node in the service graph reports readiness.""" + until(lambda: _service_graph_is_ready(service_graph_namespaces)) + + +def _service_graph_is_ready(service_graph_namespaces: list[str]) -> bool: + for ns in service_graph_namespaces: + proc = sh.run_kubectl( + [ + '--namespace', ns, 'get', 'pods', + '--selector', consts.SERVICE_GRAPH_SERVICE_SELECTOR, '-o', + 'jsonpath={.items[*].status.conditions[?(@.type=="Ready")].status}' + ], + check=True) + out = proc.stdout + if ('False' in out): + return False + return True