Skip to content

Commit

Permalink
Change API from 'Google API Client' to 'Google Cloud Python Client'
Browse files Browse the repository at this point in the history
According to the 'Google API Client' GH page:

```
This library is considered complete and is in maintenance mode. This means
that we will address critical bugs and security issues but will not add any
new features.

This library is officially supported by Google. However, the maintainers of
this repository recommend using Cloud Client Libraries for Python, where
possible, for new code development.
```

So change the code accordingly to adapt it to 'Google Cloud Python Client'.

Signed-off-by: Pablo Méndez Hernández <[email protected]>
  • Loading branch information
pablomh committed Oct 24, 2024
1 parent 366322c commit b8521ff
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 124 deletions.
235 changes: 113 additions & 122 deletions krkn/scenario_plugins/node_actions/gcp_node_scenarios.py
Original file line number Diff line number Diff line change
@@ -1,185 +1,177 @@
import os
import sys
import time
import logging
import json
import google.auth
import krkn.scenario_plugins.node_actions.common_node_functions as nodeaction
from krkn.scenario_plugins.node_actions.abstract_node_scenarios import (
abstract_node_scenarios,
)
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
from google.cloud import compute_v1
from krkn_lib.k8s import KrknKubernetes


class GCP:
def __init__(self):
try:
gapp_creds = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
with open(gapp_creds, "r") as f:
f_str = f.read()
self.project = json.loads(f_str)["project_id"]
# self.project = runcommand.invoke("gcloud config get-value project").split("/n")[0].strip()
logging.info("project " + str(self.project) + "!")
credentials = GoogleCredentials.get_application_default()
self.client = discovery.build(
"compute", "v1", credentials=credentials, cache_discovery=False
)

_, self.project_id = google.auth.default()
self.instance_client = compute_v1.InstancesClient()
except Exception as e:
logging.error("Error on setting up GCP connection: " + str(e))

raise e

# Get the instance ID of the node
# Get the instance and zone name of the node
def get_instance_id(self, node):
zone_request = self.client.zones().list(project=self.project)
while zone_request is not None:
zone_response = zone_request.execute()
for zone in zone_response["items"]:
instances_request = self.client.instances().list(
project=self.project, zone=zone["name"]
)
while instances_request is not None:
instance_response = instances_request.execute()
if "items" in instance_response.keys():
for instance in instance_response["items"]:
if instance["name"] in node:
return instance["name"], zone["name"]
instances_request = self.client.zones().list_next(
previous_request=instances_request,
previous_response=instance_response,
)
zone_request = self.client.zones().list_next(
previous_request=zone_request, previous_response=zone_response
)
request = compute_v1.AggregatedListInstancesRequest(
project = self.project_id
)
agg_list = self.instance_client.aggregated_list(request=request)
for zone, response in agg_list:
if response.instances:
for instance in response.instances:
if instance.name in node:
zone_name = zone.split("/")[-1]
return instance.name, zone_name
logging.info("no instances ")

# Start the node instance
def start_instances(self, zone, instance_id):
def start_instances(self, zone, instance_name):
try:
self.client.instances().start(
project=self.project, zone=zone, instance=instance_id
).execute()
logging.info("vm name " + str(instance_id) + " started")
request = compute_v1.StartInstanceRequest(
instance=instance_name,
project=self.project_id,
zone=zone,
)
self.instance_client.start(request=request)
logging.info("vm name " + str(instance_name) + " started")
except Exception as e:
logging.error(
"Failed to start node instance %s. Encountered following "
"exception: %s." % (instance_id, e)
"exception: %s." % (instance_name, e)
)

raise RuntimeError()

# Stop the node instance
def stop_instances(self, zone, instance_id):
def stop_instances(self, zone, instance_name):
try:
self.client.instances().stop(
project=self.project, zone=zone, instance=instance_id
).execute()
logging.info("vm name " + str(instance_id) + " stopped")
request = compute_v1.StopInstanceRequest(
instance=instance_name,
project=self.project_id,
zone=zone,
)
self.instance_client.stop(request=request)
logging.info("vm name " + str(instance_name) + " stopped")
except Exception as e:
logging.error(
"Failed to stop node instance %s. Encountered following "
"exception: %s." % (instance_id, e)
"exception: %s." % (instance_name, e)
)

raise RuntimeError()

# Start the node instance
def suspend_instances(self, zone, instance_id):
# Suspend the node instance
def suspend_instances(self, zone, instance_name):
try:
self.client.instances().suspend(
project=self.project, zone=zone, instance=instance_id
).execute()
logging.info("vm name " + str(instance_id) + " suspended")
request = compute_v1.SuspendInstanceRequest(
instance=instance_name,
project=self.project_id,
zone=zone,
)
self.instance_client.suspend(request=request)
logging.info("vm name " + str(instance_name) + " suspended")
except Exception as e:
logging.error(
"Failed to suspend node instance %s. Encountered following "
"exception: %s." % (instance_id, e)
"exception: %s." % (instance_name, e)
)

raise RuntimeError()

# Terminate the node instance
def terminate_instances(self, zone, instance_id):
def terminate_instances(self, zone, instance_name):
try:
self.client.instances().delete(
project=self.project, zone=zone, instance=instance_id
).execute()
logging.info("vm name " + str(instance_id) + " terminated")
request = compute_v1.DeleteInstanceRequest(
instance=instance_name,
project=self.project_id,
zone=zone,
)
self.instance_client.delete(request=request)
logging.info("vm name " + str(instance_name) + " terminated")
except Exception as e:
logging.error(
"Failed to start node instance %s. Encountered following "
"exception: %s." % (instance_id, e)
"Failed to terminate node instance %s. Encountered following "
"exception: %s." % (instance_name, e)
)

raise RuntimeError()

# Reboot the node instance
def reboot_instances(self, zone, instance_id):
def reboot_instances(self, zone, instance_name):
try:
self.client.instances().reset(
project=self.project, zone=zone, instance=instance_id
).execute()
logging.info("vm name " + str(instance_id) + " rebooted")
request = compute_v1.ResetInstanceRequest(
instance=instance_name,
project=self.project_id,
zone=zone,
)
self.instance_client.reset(request=request)
logging.info("vm name " + str(instance_name) + " rebooted")
except Exception as e:
logging.error(
"Failed to start node instance %s. Encountered following "
"exception: %s." % (instance_id, e)
"Failed to reboot node instance %s. Encountered following "
"exception: %s." % (instance_name, e)
)

raise RuntimeError()

# Get instance status
def get_instance_status(self, zone, instance_id, expected_status, timeout):
# statuses: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING,
def get_instance_status(self, zone, instance_name, expected_status, timeout):
# states: PROVISIONING, STAGING, RUNNING, STOPPING, SUSPENDING, SUSPENDED, REPAIRING,
# and TERMINATED.
i = 0
sleeper = 5
while i <= timeout:
instStatus = (
self.client.instances()
.get(project=self.project, zone=zone, instance=instance_id)
.execute()
)
logging.info("Status of vm " + str(instStatus["status"]))
if instStatus["status"] == expected_status:
try:
request = compute_v1.GetInstanceRequest(
instance=instance_name,
project=self.project_id,
zone=zone,
)
instance_status = self.instance_client.get(request=request).status
logging.info("Status of vm " + instance_status)
except Exception as e:
logging.error(
"Failed to reboot node instance %s. Encountered following "
"exception: %s." % (instance_name, e)
)

raise RuntimeError()

if instance_status == expected_status:
return True
time.sleep(sleeper)
i += sleeper
logging.error(
"Status of %s was not %s in %s seconds"
% (instance_id, expected_status, timeout)
% (instance_name, expected_status, timeout)
)
return False

# Wait until the node instance is suspended
def wait_until_suspended(self, zone, instance_id, timeout):
return self.get_instance_status(zone, instance_id, "SUSPENDED", timeout)
def wait_until_suspended(self, zone, instance_name, timeout):
return self.get_instance_status(zone, instance_name, "SUSPENDED", timeout)

# Wait until the node instance is running
def wait_until_running(self, zone, instance_id, timeout):
return self.get_instance_status(zone, instance_id, "RUNNING", timeout)
def wait_until_running(self, zone, instance_name, timeout):
return self.get_instance_status(zone, instance_name, "RUNNING", timeout)

# Wait until the node instance is stopped
def wait_until_stopped(self, zone, instance_id, timeout):
return self.get_instance_status(zone, instance_id, "TERMINATED", timeout)
def wait_until_stopped(self, zone, instance_name, timeout):
# In GCP, the next state after STOPPING is TERMINATED
return self.get_instance_status(zone, instance_name, "TERMINATED", timeout)

# Wait until the node instance is terminated
def wait_until_terminated(self, zone, instance_id, timeout):
try:
i = 0
sleeper = 5
while i <= timeout:
instStatus = (
self.client.instances()
.get(project=self.project, zone=zone, instance=instance_id)
.execute()
)
logging.info("Status of vm " + str(instStatus["status"]))
time.sleep(sleeper)
except Exception as e:
logging.info("here " + str(e))
return True
def wait_until_terminated(self, zone, instance_name, timeout):
return self.get_instance_status(zone, instance_name, "TERMINATED", timeout)


# krkn_lib
Expand All @@ -193,15 +185,15 @@ def node_start_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_start_scenario injection")
instance_id, zone = self.gcp.get_instance_id(node)
instance_name, zone = self.gcp.get_instance_id(node)
logging.info(
"Starting the node %s with instance ID: %s " % (node, instance_id)
"Starting the node %s with instance ID: %s " % (node, instance_name)
)
self.gcp.start_instances(zone, instance_id)
self.gcp.wait_until_running(zone, instance_id, timeout)
self.gcp.start_instances(zone, instance_name)
self.gcp.wait_until_running(zone, instance_name, timeout)
nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
logging.info(
"Node with instance ID: %s is in running state" % instance_id
"Node with instance ID: %s is in running state" % instance_name
)
logging.info("node_start_scenario has been successfully injected!")
except Exception as e:
Expand All @@ -219,14 +211,14 @@ def node_stop_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_stop_scenario injection")
instance_id, zone = self.gcp.get_instance_id(node)
instance_name, zone = self.gcp.get_instance_id(node)
logging.info(
"Stopping the node %s with instance ID: %s " % (node, instance_id)
"Stopping the node %s with instance ID: %s " % (node, instance_name)
)
self.gcp.stop_instances(zone, instance_id)
self.gcp.wait_until_stopped(zone, instance_id, timeout)
self.gcp.stop_instances(zone, instance_name)
self.gcp.wait_until_stopped(zone, instance_name, timeout)
logging.info(
"Node with instance ID: %s is in stopped state" % instance_id
"Node with instance ID: %s is in stopped state" % instance_name
)
nodeaction.wait_for_unknown_status(node, timeout, self.kubecli)
except Exception as e:
Expand All @@ -243,21 +235,21 @@ def node_termination_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_termination_scenario injection")
instance_id, zone = self.gcp.get_instance_id(node)
instance_name, zone = self.gcp.get_instance_id(node)
logging.info(
"Terminating the node %s with instance ID: %s "
% (node, instance_id)
% (node, instance_name)
)
self.gcp.terminate_instances(zone, instance_id)
self.gcp.wait_until_terminated(zone, instance_id, timeout)
self.gcp.terminate_instances(zone, instance_name)
self.gcp.wait_until_terminated(zone, instance_name, timeout)
for _ in range(timeout):
if node not in self.kubecli.list_nodes():
break
time.sleep(1)
if node in self.kubecli.list_nodes():
raise RuntimeError("Node could not be terminated")
logging.info(
"Node with instance ID: %s has been terminated" % instance_id
"Node with instance ID: %s has been terminated" % instance_name
)
logging.info("node_termination_scenario has been successfuly injected!")
except Exception as e:
Expand All @@ -267,22 +259,21 @@ def node_termination_scenario(self, instance_kill_count, node, timeout):
)
logging.error("node_termination_scenario injection failed!")


raise e
raise RuntimeError()

# Node scenario to reboot the node
def node_reboot_scenario(self, instance_kill_count, node, timeout):
for _ in range(instance_kill_count):
try:
logging.info("Starting node_reboot_scenario injection")
instance_id, zone = self.gcp.get_instance_id(node)
instance_name, zone = self.gcp.get_instance_id(node)
logging.info(
"Rebooting the node %s with instance ID: %s " % (node, instance_id)
"Rebooting the node %s with instance ID: %s " % (node, instance_name)
)
self.gcp.reboot_instances(zone, instance_id)
self.gcp.reboot_instances(zone, instance_name)
nodeaction.wait_for_ready_status(node, timeout, self.kubecli)
logging.info(
"Node with instance ID: %s has been rebooted" % instance_id
"Node with instance ID: %s has been rebooted" % instance_name
)
logging.info("node_reboot_scenario has been successfuly injected!")
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ coverage==7.4.1
datetime==5.4
docker==7.0.0
gitpython==3.1.41
google-api-python-client==2.116.0
google-auth==2.35.0
google-cloud-compute==1.19.2
ibm_cloud_sdk_core==3.18.0
ibm_vpc==0.20.0
jinja2==3.1.4
krkn-lib==4.0.3
lxml==5.1.0
kubernetes==28.1.0
numpy==1.26.4
oauth2client==4.1.3
pandas==2.2.0
openshift-client==1.0.21
paramiko==3.4.0
Expand Down

0 comments on commit b8521ff

Please sign in to comment.