Skip to content

Commit

Permalink
Added the running rosa spots ability
Browse files Browse the repository at this point in the history
  • Loading branch information
athiruma committed Aug 11, 2023
1 parent 967454d commit 0279cb3
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 6 deletions.
17 changes: 17 additions & 0 deletions dags/openshift_nightlies/config/install/rosa/ovn-small-spot.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"aws_profile": "",
"aws_access_key_id": "",
"aws_secret_access_key": "",
"aws_authentication_method": "sts",
"rosa_environment": "staging",
"rosa_cli_version": "container",
"ocm_environment": "stage",
"managed_channel_group": "nightly",
"managed_ocp_version": "latest",
"openshift_worker_count": 24,
"openshift_network_type": "OVNKubernetes",
"openshift_worker_instance_type": "m5.2xlarge",
"machineset_metadata_label_prefix": "machine.openshift.io",
"openshift_workload_node_instance_type": null,
"enable_spot_workers": false
}
5 changes: 5 additions & 0 deletions dags/openshift_nightlies/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ platforms:
config:
install: rosa/ovn-small.json
benchmarks: small-control-plane-mgs.json
- name: sts-ovn-small-spot-cp
schedule: "0 12 * * 3"
config:
install: rosa/ovn-small-spot.json
benchmarks: small-control-plane-mgs.json
- name: sts-ovn-medium-cp
schedule: "5 12 * * 1"
config:
Expand Down
46 changes: 41 additions & 5 deletions dags/openshift_nightlies/scripts/install/rosa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,17 @@ _wait_for_nodes_ready(){
export KUBECONFIG=./kubeconfig
ALL_READY_ITERATIONS=0
ITERATIONS=0
# Node count is number of workers + 3 infra
NODES_COUNT=$(($2+3))
if [ "$3" == "rosa-spots=true" ]; then
if [ "$SPOT_POOL_READY" == "true" ]; then
# Node count is number of workers pool + 3 infra
NODES_COUNT=$(($2+3))
else
NODES_COUNT=$2
fi
else
# Node count is number of workers + 3 infra
NODES_COUNT=$(($2+3))
fi
# 30 seconds per node, waiting for all nodes ready to finalize
while [ ${ITERATIONS} -le $((${NODES_COUNT}*5)) ] ; do
NODES_READY_COUNT=$(oc get nodes -l $3 | grep " Ready " | wc -l)
Expand Down Expand Up @@ -86,7 +95,16 @@ _wait_for_cluster_ready(){
echo "Set end time of prom scrape"
export END_TIME=$(date +"%s")
START_TIMER=$(date +%s)
_wait_for_nodes_ready $1 ${COMPUTE_WORKERS_NUMBER} "node-role.kubernetes.io/worker"
if [ "$ENABLE_SPOT_WORKERS" == "true" ]; then
if [ "$SPOT_POOL_READY" == "true" ]; then
_wait_for_nodes_ready $1 ${COMPUTE_WORKERS_NUMBER} "node-role.kubernetes.io/worker"
else
DEFAULT_WORKER_NODES=3
_wait_for_nodes_ready $1 $DEFAULT_WORKER_NODES "node-role.kubernetes.io/worker"
fi
else
_wait_for_nodes_ready $1 ${COMPUTE_WORKERS_NUMBER} "node-role.kubernetes.io/worker"
fi
CURRENT_TIMER=$(date +%s)
# Time since rosa cluster is ready until all nodes are ready
DURATION=$(($CURRENT_TIMER - $START_TIMER))
Expand Down Expand Up @@ -129,6 +147,8 @@ setup(){
export MANAGED_CHANNEL_GROUP=$(cat ${json_file} | jq -r .managed_channel_group)
export CLUSTER_NAME=$(cat ${json_file} | jq -r .openshift_cluster_name)
export COMPUTE_WORKERS_NUMBER=$(cat ${json_file} | jq -r .openshift_worker_count)
export COMPUTE_WORKERS_TYPE=$(cat ${json_file} | jq -r .openshift_worker_instance_type)
export ENABLE_SPOT_WORKERS=$(cat ${json_file} | jq -r .enable_spot_workers)
export NETWORK_TYPE=$(cat ${json_file} | jq -r .openshift_network_type)
export ES_SERVER=$(cat ${json_file} | jq -r .es_server)
export UUID=$(uuidgen)
Expand Down Expand Up @@ -167,16 +187,31 @@ setup(){
return 0
}

_create_spot_worker_pool(){
if [ "$ENABLE_SPOT_WORKERS" == "true" ]; then
if [ "$COMPUTE_WORKERS_NUMBER" != "3" ]; then
rosa create machinepool -c ${CLUSTER_NAME} --name="${CLUSTER_NAME}-spot-pool" --replicas=$((COMPUTE_WORKERS_NUMBER-3)) --instance-type="${COMPUTE_WORKERS_TYPE}" --labels="rosa-spots=true" --use-spot-instances
_wait_for_nodes_ready $CLUSTER_NAME $((COMPUTE_WORKERS_NUMBER-3)) "rosa-spots=true"
export SPOT_POOL_READY=true
fi
fi
}

install(){
export COMPUTE_WORKERS_TYPE=$(cat ${json_file} | jq -r .openshift_worker_instance_type)
export CLUSTER_AUTOSCALE=$(cat ${json_file} | jq -r .cluster_autoscale)
export OIDC_CONFIG=$(cat ${json_file} | jq -r .oidc_config)
export INSTALLATION_PARAMS=""
if [ $AWS_AUTHENTICATION_METHOD == "sts" ] ; then
INSTALLATION_PARAMS="${INSTALLATION_PARAMS} --sts -m auto --yes"
fi
INSTALLATION_PARAMS="${INSTALLATION_PARAMS} --multi-az" # Multi AZ is default on hosted-cp cluster
rosa create cluster --tags=User:${GITHUB_USERNAME} --cluster-name ${CLUSTER_NAME} --version "${ROSA_VERSION}" --channel-group=${MANAGED_CHANNEL_GROUP} --compute-machine-type ${COMPUTE_WORKERS_TYPE} --replicas ${COMPUTE_WORKERS_NUMBER} --network-type ${NETWORK_TYPE} ${INSTALLATION_PARAMS}
if [ "$ENABLE_SPOT_WORKERS" == "true" ]; then
rosa create cluster --tags=User:${GITHUB_USERNAME} --cluster-name ${CLUSTER_NAME} --version "${ROSA_VERSION}" --channel-group=${MANAGED_CHANNEL_GROUP} --compute-machine-type ${COMPUTE_WORKERS_TYPE} --network-type ${NETWORK_TYPE} ${INSTALLATION_PARAMS}
_wait_for_cluster_ready ${CLUSTER_NAME}
_create_spot_worker_pool
else
rosa create cluster --tags=User:${GITHUB_USERNAME} --cluster-name ${CLUSTER_NAME} --version "${ROSA_VERSION}" --channel-group=${MANAGED_CHANNEL_GROUP} --compute-machine-type ${COMPUTE_WORKERS_TYPE} --replicas ${COMPUTE_WORKERS_NUMBER} --network-type ${NETWORK_TYPE} ${INSTALLATION_PARAMS}
fi
postinstall
return 0
}
Expand Down Expand Up @@ -277,6 +312,7 @@ if [[ "$operation" == "install" ]]; then
index_metadata
elif [ "${CLUSTER_STATUS}" == "ready" ] ; then
printf "INFO: Cluster ${CLUSTER_NAME} already installed and ready, reusing..."
_create_spot_worker_pool
postinstall
elif [ "${CLUSTER_STATUS}" == "error" ] ; then
printf "INFO: Cluster ${CLUSTER_NAME} errored, cleaning them now..."
Expand Down
3 changes: 2 additions & 1 deletion dags/openshift_nightlies/tasks/install/rosa/defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@
"ocm_cli_fork": "https://github.com/openshift-online/ocm-cli",
"ocm_cli_version": "container",
"rosa_hcp": "false",
"aws_region": "us-west-2"
"aws_region": "us-west-2",
"enable_spot_workers": false
}

0 comments on commit 0279cb3

Please sign in to comment.