Skip to content

Commit

Permalink
[patch] multiple quick winner fixes (#966)
Browse files Browse the repository at this point in the history
  • Loading branch information
andrercm committed Aug 18, 2023
1 parent 6b9d5ae commit 3ebec8c
Show file tree
Hide file tree
Showing 23 changed files with 70 additions and 67 deletions.
4 changes: 4 additions & 0 deletions docs/playbooks/oneclick-visualinspection.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ All timings are estimates, see the individual pages for each of these playbooks
- `MAS_CONFIG_DIR` Directory where generated config files will be saved (you may also provide pre-generated config files here)
- `IBM_ENTITLEMENT_KEY` Your IBM Entitlement key to access the IBM Container Registry

## Optional environment variables
- `MAS_APP_SETTINGS_VISUALINSPECTION_STORAGE_CLASS` Defines a custom file storage class for Visual Inspection application. If none provided, then a default storage class will be auto defined accordingly to your cluster's availability i.e `ibmc-file-gold` for IBM Cloud or `azurefiles-premium` for Azure clusters.
- `MAS_APP_SETTINGS_VISUALINSPECTION_STORAGE_SIZE` Defines persistent storage size for Visual Inspection application. If not provided, default is `100Gi`.

## Usage
```bash
export MAS_INSTANCE_ID=inst1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ mas_app_ws_fqn: "{{ lookup('env','MAS_APP_WS_FQN') | default('manageworkspaces.a
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: ManageWorkspace

mas_app_cfg_timeout: 480 # 8 minutes before we give up and fall back into the retry loop
mas_app_cfg_retries: 50 # 8 mins each loop * 50 loops =~ 400 minutes / 6 2/3 hours (Manage is really slow to set up)
mas_app_cfg_timeout: "{{ lookup('env', 'MAS_APP_CFG_TIMEOUT') | default(480, true)}}" # 8 minutes before we give up and fall back into the retry loop
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(50, true)}}" # 8 mins each loop * 50 loops =~ 400 minutes / 6 2/3 hours (Manage is really slow to set up)
11 changes: 11 additions & 0 deletions ibm/mas_devops/roles/cp4d_service/tasks/wait/wait-wd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,17 @@
- cpd_cr_wait_lookup.resources[0].status.watsonDiscoveryStatus != "Completed"
include_tasks: wait-wd-detectimagepullbackoff.yml

# Adding this in an attempt to have WD installs completing successfully in a consistent way
# in few occasions wd-discovery-ranker-rest pod fails to start up due some other WD pods not running properly
# thus this forces all wd pods to be rebooted with the hopes that it does the trick to have them all up and running
- name: "wait-wd : Restart all wd pods to prevent intermitent failures with wd-discovery-ranker-rest"
when:
- cpd_cr_wait_lookup.resources is defined
- cpd_cr_wait_lookup.resources | length == 1
- cpd_cr_wait_lookup.resources[0].status is defined
- cpd_cr_wait_lookup.resources[0].status.watsonDiscoveryStatus is defined
- cpd_cr_wait_lookup.resources[0].status.watsonDiscoveryStatus != "Completed"
shell: "oc get pods -n {{ cpd_instance_namespace }} | awk '{print $1}'| grep wd- | xargs oc delete pod -n {{ cpd_instance_namespace }}"

# 4. Wait for CP4D service CR to be ready
# -----------------------------------------------------------------------------
Expand Down
11 changes: 9 additions & 2 deletions ibm/mas_devops/roles/gencfg_workspace/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
---

# 1. Check for undefined properties that do not have a default
# -----------------------------------------------------------------------------
- name: "Assert that mas_instance_id has been provided"
Expand All @@ -17,11 +16,19 @@
that: mas_workspace_name is defined and mas_workspace_name != ""
fail_msg: "mas_workspace_name property has not been set"

- name: "Fail if mas_workspace_id does not meet requirements"
assert:
that: mas_workspace_id is match ('^[a-z][a-z0-9]{2,11}$')
fail_msg:
- "mas_workspace_id does not meet requirements"
- "Must be 3-12 characters long"
- "Must only use lowercase letters and numbers"
- "Must start with a lowercase letter"

# 2. Generate Workspace for MAS
# -----------------------------------------------------------------------------
- name: Copy Workspace to filesytem
ansible.builtin.template:
src: workspace.yml.j2
dest: "{{ mas_config_dir }}/workspace-{{ mas_workspace_id }}.yml"
mode: '664'
mode: "664"
20 changes: 7 additions & 13 deletions ibm/mas_devops/roles/install_operator/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,30 @@
---

# 1. Create namespace we will deploy to
# -----------------------------------------------------------------------------
- name: "Create namespace"
kubernetes.core.k8s:
api_version: v1
kind: Namespace
name: '{{ namespace }}'

name: "{{ namespace }}"

- name: Add custom labels to namespace
when: custom_labels is defined and custom_labels | length > 0
kubernetes.core.k8s:
state: patched
kind: Namespace
name: '{{ namespace }}'
name: "{{ namespace }}"
definition: "{{ lookup('template', 'templates/custom_labels.json.j2') }}"


# 2. Create an image pull secret for the pre-release catalog
# -----------------------------------------------------------------------------
- name: "Debug Entitlement Secret Creation"
debug:
msg:
- "Target Namespace ....................... {{ namespace }}"
- "Artifactory Username ................... {{ artifactory_username | default('<undefined>', true) }}"
- "Artifactory Password ................... {{ artifactory_token | default('<undefined>', true) }}"
- "Artifactory Password ................... {{ '************' if (artifactory_token is defined) else '<undefined>' }}"
- "ICR Username ........................... {{ icr_username | default('<undefined>', true) }}"
- "ICR Password ........................... {{ icr_password | default('<undefined>', true) }}"
- "ICR Password ........................... {{ '************' if (icr_password is defined) else '<undefined>' }}"

- name: "Create ibm-entitlement secret"
kubernetes.core.k8s:
Expand All @@ -37,11 +34,10 @@
type: kubernetes.io/dockerconfigjson
metadata:
name: ibm-entitlement
namespace: '{{ namespace }}'
namespace: "{{ namespace }}"
data:
.dockerconfigjson: "{{ lookup('template', 'templates/ibm-entitlement-with-artifactory.json.j2') | to_json | b64encode }}"


# 3. Patch the default service account for pre-release build access
# -----------------------------------------------------------------------------
# If we don't do this then we won't be able to pull the operator image from
Expand All @@ -56,11 +52,10 @@
kind: ServiceAccount
metadata:
name: default
namespace: '{{ namespace }}'
namespace: "{{ namespace }}"
imagePullSecrets:
- name: ibm-entitlement


# 4. Create the operator group that will scope the operator
# -----------------------------------------------------------------------------
- name: "Create operator group"
Expand All @@ -69,7 +64,6 @@
wait: yes
wait_timeout: 60 # subsequent tasks will fail if the group isn't fully created


# 5. Create the subscription for the operator
# -----------------------------------------------------------------------------
- name: "Create subscription"
Expand All @@ -78,5 +72,5 @@
wait: yes
wait_timeout: 300
wait_condition:
type: 'CatalogSourcesUnhealthy'
type: "CatalogSourcesUnhealthy"
status: "False"
3 changes: 2 additions & 1 deletion ibm/mas_devops/roles/kafka/README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
kafka
=====

This role provides support to install a Kafka Cluster using [Red Hat AMQ Streams](https://www.redhat.com/en/resources/amq-streams-datasheet), IBM Event Streams or AWS MSK and generate configuration that can be directly applied to Maximo Application Suite.
This role provides support to install a Kafka Cluster using [Red Hat AMQ Streams](https://www.redhat.com/en/resources/amq-streams-datasheet), [IBM Event Streams](https://www.ibm.com/cloud/event-streams) or [AWS MSK](https://aws.amazon.com/msk/) and generate configuration that can be directly applied to Maximo Application Suite.

> The Red Hat AMQ streams component is a massively scalable, distributed, and high-performance data streaming platform based on the Apache Kafka project. It offers a distributed backbone that allows microservices and other applications to share data with high throughput and low latency.
>
> As more applications move to Kubernetes and Red Hat OpenShift, it is increasingly important to be able to run the communication infrastructure on the same platform. Red Hat OpenShift, as a highly scalable platform, is a natural fit for messaging technologies such as Kafka. The AMQ streams component makes running and managing Apache Kafka OpenShift native through the use of powerful operators that simplify the deployment, configuration, management, and use of Apache Kafka on Red Hat OpenShift.
>
> The AMQ streams component is part of the Red Hat AMQ family, which also includes the AMQ broker, a longtime innovation leader in Java™ Message Service (JMS) and polyglot messaging, as well as the AMQ interconnect router, a wide-area, peer-to-peer messaging solution.
**Note:** The MAS license does not include entitlement for AMQ streams. The MAS Devops Collection supports this Kafka deployment as an example only.

!!! tip
The role will generate a yaml file containing the definition of a Secret and KafkaCfg resource that can be used to configure the deployed cluster as the MAS system Kafka.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,6 @@
namespace: "{{ mongodb_namespace }}"
register: mongodb_crt_lookup

- name: "community : install : Create MAS MongoCfg (debug)"
debug:
msg: "{{ lookup('template', 'templates/community/suite_mongocfg.yml.j2') }}"
vars:
mongodb_ca_pem: "{{ mongodb_ca_lookup.resources[0].data['ca.crt'] }}"
mongodb_admin_password: "{{ admin_password_lookup.resources[0].data.password | b64decode }}"

- name: "community : install : Create MAS MongoCfg"
when:
- mas_instance_id is defined
Expand Down
2 changes: 1 addition & 1 deletion ibm/mas_devops/roles/ocp_config/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ ocp_ingress_server_timeout: "{{ lookup('env', 'OCP_INGRESS_SERVER_TIMEOUT') | de

# Ingress Controller Settings
# -----------------------------------------------------------------------------
ocp_operatorhub_disable_redhat_sources: "{{ lookup('env', 'OCP_OPERATORHUB_DISABLE_REDHAT_SOURCES') | default('30s', true) }}"
ocp_operatorhub_disable_redhat_sources: "{{ lookup('env', 'OCP_OPERATORHUB_DISABLE_REDHAT_SOURCES') | default('False', true) | bool }}"
6 changes: 0 additions & 6 deletions ibm/mas_devops/roles/ocp_provision/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ supported_cluster_types:
- rosa
- ipi


# GPU support (limited)
# -----------------------------------------------------------------------------
# Flag to add GPU worker node to cluster. Currently only set up for ROKS clusters
Expand All @@ -26,7 +25,6 @@ gpu_workers: "{{ lookup('env', 'GPU_WORKERS') | default('1', true) }}"
# The name of the gpu worker pool.
gpu_workerpool_name: "{{ lookup('env', 'GPU_WORKERPOOL_NAME') | default('gpu', true) }}"


# ROKS
# -----------------------------------------------------------------------------
ibmcloud_endpoint: "{{ lookup('env', 'IBMCLOUD_ENDPOINT') | default('https://cloud.ibm.com', true) }}"
Expand All @@ -38,7 +36,6 @@ roks_flavor: "{{ lookup('env', 'ROKS_FLAVOR') | default('b3c.16x64.300gb', true)
roks_workers: "{{ lookup('env', 'ROKS_WORKERS') | default('3', true) }}"
roks_flags: "{{ lookup('env', 'ROKS_FLAGS') | default('', true) }}"


# FYRE
# -----------------------------------------------------------------------------
fyre_username: "{{ lookup('env', 'FYRE_USERNAME') }}"
Expand All @@ -54,14 +51,12 @@ fyre_worker_count: "{{ lookup('env', 'FYRE_WORKER_COUNT') | default('3', true) }
fyre_worker_memory: "{{ lookup('env', 'FYRE_WORKER_MEMORY') | default('16', true) }}"
fyre_worker_cpu: "{{ lookup('env', 'FYRE_WORKER_CPU') | default('64', true) }}"


# ROSA
# -----------------------------------------------------------------------------
rosa_token: "{{ lookup('env', 'ROSA_TOKEN') }}"
rosa_cluster_admin_password: "{{ lookup('env', 'ROSA_CLUSTER_ADMIN_PASSWORD') }}"
rosa_compute_nodes: "{{ lookup('env', 'ROSA_COMPUTE_NODES') | default('3', true) }}"


# AWS
# -----------------------------------------------------------------------------
aws_access_key_id: "{{ lookup('env', 'AWS_ACCESS_KEY_ID') }}"
Expand Down Expand Up @@ -93,4 +88,3 @@ ipi_config_dir: "{{ ipi_dir }}/config/{{ cluster_name }}"

ocp_installer_dir: "{{ ipi_dir }}/installer/{{ ocp_version }}"
ocp_installer_exe: "{{ ipi_dir }}/installer/{{ ocp_version }}/openshift-install"

1 change: 1 addition & 0 deletions ibm/mas_devops/roles/sls/tasks/install/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@
password: "{{ mongocfg[0].data.password | b64decode}}"
hosts: "{{ mongocfg[1].spec.config.hosts }}"
certificates: "{{ lookup('template', 'templates/mongo-certificates.yml.j2') }}"
no_log: true
when: mongocfg is defined and mongocfg[0] is defined and mongocfg[0].data is defined

- name: Create the Mongo Secret for SLS
Expand Down
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/assist.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
mas_app_ws_fqn: assistworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: AssistWorkspace
mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
mas_app_ws_fqn: healthworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: HealthWorkspace
mas_app_cfg_retries: 50
mas_app_cfg_delay: 480
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(480, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(50, true)}}"

mas_app_settings_aio_flag: "{{ lookup('env', 'MAS_APP_SETTINGS_AIO_FLAG') | default('true', true)}}"
mas_app_settings_db2_schema: "{{ lookup('env', 'MAS_APP_SETTINGS_DB2_SCHEMA') | default('maximo', true)}}"
Expand Down
5 changes: 2 additions & 3 deletions ibm/mas_devops/roles/suite_app_config/vars/hputilities.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@
mas_app_ws_fqn: hputilitiesworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: HPUtilitiesWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/iot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
mas_app_ws_fqn: iotworkspaces.iot.ibm.com
mas_app_ws_apiversion: iot.ibm.com/v1
mas_app_ws_kind: IoTWorkspace
mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/manage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: ManageWorkspace

# Depending on the number of components being used the time to configure Manage can grow significantly
mas_app_cfg_retries: 60 # ~8 hours
mas_app_cfg_delay: 480 # ~8 minutes
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(480, true)}}" # ~8 minutes
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(60, true)}}" # ~8 hours

mas_app_settings_aio_flag: "{{ lookup('env', 'MAS_APP_SETTINGS_AIO_FLAG') | default('true', true) | bool }}"
mas_app_settings_db2_schema: "{{ lookup('env', 'MAS_APP_SETTINGS_DB2_SCHEMA') | default('maximo', true)}}"
Expand Down
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/monitor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mas_app_ws_fqn: monitorworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: MonitorWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/mso.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mas_app_ws_fqn: msoworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: MSOWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/optimizer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mas_app_ws_fqn: optimizerworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: OptimizerWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/predict.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mas_app_ws_fqn: predictworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: PredictWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
4 changes: 2 additions & 2 deletions ibm/mas_devops/roles/suite_app_config/vars/safety.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mas_app_ws_fqn: safetyworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: SafetyWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ mas_app_ws_fqn: visualinspectionappworkspaces.apps.mas.ibm.com
mas_app_ws_apiversion: apps.mas.ibm.com/v1
mas_app_ws_kind: VisualInspectionAppWorkspace

mas_app_cfg_delay: 120
mas_app_cfg_retries: 30
mas_app_cfg_delay: "{{ lookup('env', 'MAS_APP_CFG_DELAY') | default(120, true)}}"
mas_app_cfg_retries: "{{ lookup('env', 'MAS_APP_CFG_RETRIES') | default(30, true)}}"
Loading

0 comments on commit 3ebec8c

Please sign in to comment.