Skip to content

Commit 459b509

Browse files
committed
describe the steps to provision ARM64 clusters
1 parent dc65c90 commit 459b509

File tree

6 files changed

+285
-63
lines changed

6 files changed

+285
-63
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
## Install a OCP cluster with ARM64 Arch on Oracle Cloud Infrastructure (OCI) with CCM
2+
3+
Install an OCP cluster in OCI with Platform External as an option and OCI Cloud Controler Manager.
4+
5+
## Prerequisites
6+
7+
- okd-installer Collection with [OCI dependencies installed](./oci-prerequisites.md):
8+
- Compartments used to launch the cluster created and exported to variable `${OCI_COMPARTMENT_ID}`
9+
- DNS Zone place the DNS zone and exported to variable `${OCI_COMPARTMENT_ID_DNS}`
10+
- Compartment used to store the RHCOS image exported to variable `${OCI_COMPARTMENT_ID_IMAGE}`
11+
12+
Example:
13+
14+
```bash
15+
cat <<EOF > ~/.oci/env
16+
# Compartment that the cluster will be installed
17+
OCI_COMPARTMENT_ID="<CHANGE_ME:ocid1.compartment.oc1.UUID>"
18+
19+
# Compartment that the DNS Zone is created (based domain)
20+
OCI_COMPARTMENT_ID_DNS="<CHANGE_ME:ocid1.compartment.oc1.UUID>"
21+
22+
# Compartment that the OS Image will be created
23+
OCI_COMPARTMENT_ID_IMAGE="<CHANGE_ME:ocid1.compartment.oc1.UUID>"
24+
EOF
25+
source ~/.oci/env
26+
```
27+
28+
## Setup with Platform External type and CCM
29+
30+
Create the vars file for okd-installer collection:
31+
32+
```bash
33+
# MCO patch without revendor (w/o disabling FG)
34+
CLUSTER_NAME=oci-e414rc2arm1usash1
35+
VARS_FILE=./vars-oci-ha_${CLUSTER_NAME}.yaml
36+
37+
cat <<EOF > ${VARS_FILE}
38+
provider: oci
39+
cluster_name: ${CLUSTER_NAME}
40+
config_cluster_region: us-ashburn-1
41+
42+
cluster_profile: ha
43+
destroy_bootstrap: no
44+
45+
#config_base_domain: splat-oci.devcluster.openshift.com
46+
config_base_domain: us-ashburn-1.splat-oci.devcluster.openshift.com
47+
48+
config_ssh_key: "$(cat ~/.ssh/openshift-dev.pub)"
49+
config_pull_secret_file: "${HOME}/.openshift/pull-secret-latest.json"
50+
51+
config_cluster_version: 4.14.0-rc.2
52+
version: 4.14.0-rc.2
53+
54+
config_platform: external
55+
config_platform_spec: '{"platformName":"oci"}'
56+
57+
oci_ccm_namespace: oci-cloud-controller-manager
58+
oci_compartment_id: ${OCI_COMPARTMENT_ID}
59+
oci_compartment_id_dns: ${OCI_COMPARTMENT_ID_DNS}
60+
oci_compartment_id_image: ${OCI_COMPARTMENT_ID_IMAGE}
61+
62+
# Available manifest paches (runs after 'create manifest' stage)
63+
config_patches:
64+
- rm-capi-machines
65+
- mc_varlibetcd
66+
- mc-kubelet-providerid
67+
- deploy-oci-ccm
68+
#- deploy-oci-csi
69+
70+
# MachineConfig to set the Kubelet environment. Will use this script to discover the ProviderID
71+
cfg_patch_kubelet_providerid_script: |
72+
PROVIDERID=\$(curl -H "Authorization: Bearer Oracle" -sL http://169.254.169.254/opc/v2/instance/ | jq -r .id);
73+
74+
# spread nodes between "AZs"
75+
oci_availability_domains:
76+
- gzqB:US-ASHBURN-AD-1
77+
- gzqB:US-ASHBURN-AD-2
78+
- gzqB:US-ASHBURN-AD-3
79+
80+
oci_fault_domains:
81+
- FAULT-DOMAIN-1
82+
- FAULT-DOMAIN-2
83+
- FAULT-DOMAIN-3
84+
85+
# OCI config for ARM64
86+
config_default_architecture: arm64
87+
compute_shape: "VM.Standard.A1.Flex"
88+
controlplane_shape: "VM.Standard.A1.Flex"
89+
bootstrap_instance: "VM.Standard.A1.Flex"
90+
91+
# Define the OS Image mirror
92+
os_mirror: yes
93+
os_mirror_from: stream_artifacts
94+
os_mirror_stream:
95+
architecture: aarch64
96+
artifact: openstack
97+
format: qcow2.gz
98+
99+
os_mirror_to_provider: oci
100+
os_mirror_to_oci:
101+
compartment_id: ${OCI_COMPARTMENT_ID_IMAGE}
102+
bucket: rhcos-images
103+
image_type: QCOW2
104+
# not supported yet, must be added for arm64
105+
# https://oci-ansible-collection.readthedocs.io/en/latest/collections/oracle/oci/oci_compute_image_shape_compatibility_entry_module.html#ansible-collections-oracle-oci-oci-compute-image-shape-compatibility-entry-module
106+
compatibility_shapes:
107+
- name: VM.Standard.A1.Flex
108+
memory_constraints:
109+
min_in_gbs: 4
110+
max_in_gbs: 128
111+
ocpu_constraints:
112+
min: 2
113+
max: 32
114+
EOF
115+
```
116+
117+
## Install the cluster
118+
119+
```bash
120+
ansible-playbook mtulio.okd_installer.create_all \
121+
-e cert_max_retries=30 \
122+
-e cert_wait_interval_sec=60 \
123+
-e @$VARS_FILE
124+
```
125+
126+
### Approve certificates
127+
128+
Export `KUBECONFIG`:
129+
130+
```bash
131+
export KUBECONFIG=$HOME/.ansible/okd-installer/clusters/${CLUSTER_NAME}/auth/kubeconfig
132+
```
133+
134+
Check and Approve the certificates:
135+
```bash
136+
oc get csr \
137+
-o go-template='{{range .items}}{{if not .status}}{{.metadata.name}}{{"\n"}}{{end}}{{end}}' \
138+
| xargs oc adm certificate approve
139+
```
140+
141+
Check if the nodes joined to the cluster:
142+
143+
```bash
144+
oc get nodes
145+
```
146+
147+
## Testing
148+
149+
Setup the test environment (internal registry, labeling and taint worker node, etc):
150+
151+
```bash
152+
test_node=$(oc get nodes -l node-role.kubernetes.io/worker='' -o jsonpath='{.items[0].metadata.name}')
153+
oc label node $test_node node-role.kubernetes.io/tests=""
154+
oc adm taint node $test_node node-role.kubernetes.io/tests="":NoSchedule
155+
```
156+
157+
Run the tests:
158+
159+
```bash
160+
./opct run -w &&\
161+
./opct retrieve &&\
162+
./opct report *.tar.gz --save-to /tmp/results --server-skip
163+
```
164+
165+
## Destroy the cluster
166+
167+
```bash
168+
ansible-playbook mtulio.okd_installer.destroy_cluster -e @$VARS_FILE
169+
```

docs/guides/OCI/installing-quickly-external.md

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,11 @@ Install an OCP cluster in OCI with Platform External as an option and OCI Cloud
55
## Prerequisites
66

77
- okd-installer Collection with [OCI dependencies installed](./oci-prerequisites.md):
8-
- Compartments used to create the cluster created and exported to variable `${}`
9-
- DNS Zone place the DNS zone and exported to variable `${}`
10-
- Compartment used to store the RHCOS image exported to variable `${}`
8+
- Compartments used to launch the cluster created and exported to variable `${OCI_COMPARTMENT_ID}`
9+
- DNS Zone place the DNS zone and exported to variable `${OCI_COMPARTMENT_ID_DNS}`
10+
- Compartment used to store the RHCOS image exported to variable `${OCI_COMPARTMENT_ID_IMAGE}`
1111

12-
## Setup with Platform External type and CCM
13-
14-
Create the vars file for okd-installer collection:
12+
Example:
1513

1614
```bash
1715
cat <<EOF > ~/.oci/env
@@ -25,9 +23,15 @@ OCI_COMPARTMENT_ID_DNS="<CHANGE_ME:ocid1.compartment.oc1.UUID>"
2523
OCI_COMPARTMENT_ID_IMAGE="<CHANGE_ME:ocid1.compartment.oc1.UUID>"
2624
EOF
2725
source ~/.oci/env
26+
```
2827

28+
## Setup with Platform External type and CCM
29+
30+
Create the vars file for okd-installer collection:
31+
32+
```bash
2933
# MCO patch without revendor (w/o disabling FG)
30-
CLUSTER_NAME=oci-e414rc0
34+
CLUSTER_NAME=oci-e414rc2
3135
VARS_FILE=./vars-oci-ha_${CLUSTER_NAME}.yaml
3236

3337
cat <<EOF > ${VARS_FILE}
@@ -43,51 +47,58 @@ cluster_profile: ha
4347
destroy_bootstrap: no
4448
4549
config_base_domain: splat-oci.devcluster.openshift.com
50+
4651
config_ssh_key: "$(cat ~/.ssh/openshift-dev.pub)"
4752
config_pull_secret_file: "${HOME}/.openshift/pull-secret-latest.json"
4853
49-
config_cluster_version: 4.14.0-rc.0
50-
version: 4.14.0-rc.0
51-
52-
# Define the OS Image mirror
53-
os_mirror: yes
54-
os_mirror_from: stream_artifacts
55-
os_mirror_stream:
56-
architecture: x86_64
57-
artifact: openstack
58-
format: qcow2.gz
59-
60-
os_mirror_to_provider: oci
61-
os_mirror_to_oci:
62-
compartment_id: ${OCI_COMPARTMENT_ID_IMAGE}
63-
bucket: rhcos-images
64-
image_type: QCOW2
65-
66-
EOF
67-
68-
69-
# Platform External setup only
70-
cat <<EOF >> ${VARS_FILE}
54+
config_cluster_version: 4.14.0-rc.2
55+
version: 4.14.0-rc.2
7156
57+
# Platform External setup
7258
config_platform: external
7359
config_platform_spec: '{"platformName":"oci"}'
7460
7561
# Available manifest paches (runs after 'create manifest' stage)
7662
config_patches:
7763
- rm-capi-machines
64+
- mc_varlibetcd
7865
- mc-kubelet-providerid
7966
- deploy-oci-ccm
80-
- deploy-oci-csi
67+
#- deploy-oci-csi
8168
8269
# MachineConfig to set the Kubelet environment. Will use this script to discover the ProviderID
8370
cfg_patch_kubelet_providerid_script: |
8471
PROVIDERID=\$(curl -H "Authorization: Bearer Oracle" -sL http://169.254.169.254/opc/v2/instance/ | jq -r .id);
8572
8673
oci_ccm_namespace: oci-cloud-controller-manager
8774
75+
# Define the OS Image mirror
76+
os_mirror: yes
77+
os_mirror_from: stream_artifacts
78+
os_mirror_stream:
79+
architecture: x86_64
80+
artifact: openstack
81+
format: qcow2.gz
82+
83+
os_mirror_to_provider: oci
84+
os_mirror_to_oci:
85+
compartment_id: ${OCI_COMPARTMENT_ID_IMAGE}
86+
bucket: rhcos-images
87+
image_type: QCOW2
88+
89+
# Experimental: increase the boot volume performance
90+
# controlplane_source_details:
91+
# source_type: image
92+
# boot_volume_size_in_gbs: 1200
93+
# boot_volume_vpus_per_gb: 120
94+
95+
# Mount control plane as a second volume
96+
# cfg_patch_mc_varlibetcd:
97+
# device_path: /dev/sdb
8898
EOF
8999
```
90100

101+
91102
## Install the cluster
92103

93104
```bash
@@ -99,12 +110,25 @@ ansible-playbook mtulio.okd_installer.create_all \
99110

100111
### Approve certificates
101112

113+
Export `KUBECONFIG`:
114+
115+
```bash
116+
export KUBECONFIG=$HOME/.ansible/okd-installer/clusters/${CLUSTER_NAME}/auth/kubeconfig
117+
```
118+
119+
Check and Approve the certificates:
102120
```bash
103121
oc get csr \
104122
-o go-template='{{range .items}}{{if not .status}}{{.metadata.name}}{{"\n"}}{{end}}{{end}}' \
105123
| xargs oc adm certificate approve
106124
```
107125

126+
Check if the nodes joined to the cluster:
127+
128+
```bash
129+
oc get nodes
130+
```
131+
108132
## Testing
109133

110134
Setup the test environment (internal registry, labeling and taint worker node, etc):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
DISTRIBUTION="ocp"
22
RELEASE_REPO="quay.io/openshift-release-dev/ocp-release"
3-
VERSION="4.14.0-rc.0"
3+
VERSION="4.14.0-rc.2"
44
RELEASE_VERSION="${VERSION}-x86_64"
55
PULL_SECRET_FILE="${HOME}/.openshift/pull-secret-latest.json"

playbooks/vars/oci/profiles/ha/node-bootstrap.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ _cluster_prefix: "{{ cluster_state.infra_id }}"
55
bootstrap_bucket: "{{ _cluster_prefix }}-infra"
66

77
# Vars used on Machine/Compute Stack
8-
_instance_type: "{{ bootstrap_instance | d('m6i.xlarge') }}"
8+
_instance_type: "{{ bootstrap_instance | d('VM.Standard.E4.Flex') }}"
99
_instance_profile: "{{ cluster_state.compute.iam_profile_bootstrap }}"
1010
# _image_id: "{{ custom_image_id | d(cluster_state.compute.image_id) }}"
1111
_image_id: "{{ custom_image_id }}"
@@ -16,6 +16,8 @@ _machine_suffix: ''
1616
## User Data template
1717
userdata_config_source: "{{ bootstrap_bucket_signed_url }}"
1818

19+
default_availability_domain: "gzqB:US-ASHBURN-AD-1"
20+
1921
## Common vars used in the Stack vars
2022
# _common:
2123
# prefix: "{{ _cluster_prefix }}-bootstrap"
@@ -72,10 +74,10 @@ compute_resources:
7274
region: "{{ config_cluster_region }}"
7375
#freeform_tags: {'Department': 'Finance'}
7476
#defined_tags: {'Operations': {'CostCenter': 'US'}}
75-
availability_domain: "gzqB:US-SANJOSE-1-AD-1"
77+
availability_domain: "{{ default_availability_domain }}"
7678
# platform_config:
7779
# type: AMD_VM
78-
shape: "VM.Standard.E4.Flex"
80+
shape: "{{ _instance_type }}"
7981
shape_config:
8082
ocpus: 4
8183
memory_in_gbs: 16

0 commit comments

Comments
 (0)