Skip to content

Commit 4e5c3b7

Browse files
committed
[Build] Mitigate the impact of out-of-sync repo mirrors in Rocky and RHEL by reducing the cache expiration to 300seconds and refreshing it every time the installation command fails.
1 parent 48044c2 commit 4e5c3b7

File tree

8 files changed

+73
-7
lines changed

8 files changed

+73
-7
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ This file is used to list changes made in each version of the AWS ParallelCluste
55

66
3.15.0
77
------
8+
**CHANGES**
9+
- Mitigate the risk of transient build-image failures in RHEL and Rocky caused by out-of-sync repo mirrors,
10+
by refreshing the local cache at every failed attempt.
811

912
3.14.1
1013
------

cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_redhat8.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
end
1818

1919
use 'partial/_install_packages_common.rb'
20-
use 'partial/_install_packages_rhel_amazon.rb'
20+
use 'partial/_install_packages_rhel_rocky.rb'
2121

2222
def default_packages
2323
# environment-modules required by EFA, Intel MPI and ARM PL

cookbooks/aws-parallelcluster-platform/resources/install_packages/install_packages_rocky8.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
end
1818

1919
use 'partial/_install_packages_common.rb'
20-
use 'partial/_install_packages_rhel_amazon.rb'
20+
use 'partial/_install_packages_rhel_rocky.rb'
2121

2222
def default_packages
2323
# environment-modules required by EFA, Intel MPI and ARM PL
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# frozen_string_literal: true
2+
3+
#
4+
# Copyright:: 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License").
7+
# You may not use this file except in compliance with the License.
8+
# A copy of the License is located at
9+
#
10+
# http://aws.amazon.com/apache2.0/
11+
#
12+
# or in the "LICENSE.txt" file accompanying this file.
13+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
14+
# See the License for the specific language governing permissions and limitations under the License.
15+
16+
action :install do
17+
ruby_block 'install_packages_with_metadata_refresh' do
18+
block do
19+
max_retries = 10
20+
retry_delay = 5
21+
packages = Array(new_resource.packages).join(' ')
22+
23+
max_retries.times do |attempt|
24+
# Refresh metadata on each attempt to handle mirror inconsistency
25+
shell_out!('dnf clean metadata && dnf makecache', timeout: 300)
26+
27+
result = shell_out("dnf install -y #{packages}", timeout: 600)
28+
break if result.exitstatus == 0
29+
30+
Chef::Log.warn("Package install attempt #{attempt + 1}/#{max_retries} failed: #{result.stderr}")
31+
raise "Package installation failed after #{max_retries} attempts" if attempt == max_retries - 1
32+
33+
sleep retry_delay
34+
end
35+
end
36+
end
37+
end

cookbooks/aws-parallelcluster-platform/spec/unit/resources/install_packages_spec.rb

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,17 @@ def self.setup(chef_run)
3232
end
3333

3434
if %w(amazon centos redhat rocky).include?(platform)
35-
it 'installs default packages' do
36-
is_expected.to install_package(default_packages)
37-
.with(retries: 10)
38-
.with(retry_delay: 5)
39-
.with(flush_cache: { before: true })
35+
if platform == 'amazon'
36+
it 'installs default packages' do
37+
is_expected.to install_package(default_packages)
38+
.with(retries: 10)
39+
.with(retry_delay: 5)
40+
.with(flush_cache: { before: true })
41+
end
42+
else
43+
it 'installs default packages with metadata refresh' do
44+
is_expected.to run_ruby_block('install_packages_with_metadata_refresh')
45+
end
4046
end
4147

4248
if platform == 'amazon' && version == '2'

cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_redhat8.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@
4141
execute 'yum-config-manager_skip_if_unavail' do
4242
command "yum-config-manager --setopt=\*.skip_if_unavailable=1 --save"
4343
end
44+
45+
# Reduce metadata cache time to mitigate mirror inconsistency issues
46+
execute 'yum-config-manager_metadata_expire' do
47+
command "yum-config-manager --setopt=\*.metadata_expire=300 --save"
48+
end
4449
end
4550

4651
action :update do

cookbooks/aws-parallelcluster-shared/resources/package_repos/package_repos_rocky8.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ def powertool_name
4848
execute 'yum-config-manager_skip_if_unavail' do
4949
command "yum-config-manager --setopt=\*.skip_if_unavailable=1 --save"
5050
end
51+
52+
# Reduce metadata cache time to mitigate mirror inconsistency issues
53+
execute 'yum-config-manager_metadata_expire' do
54+
command "yum-config-manager --setopt=\*.metadata_expire=300 --save"
55+
end
5156
end
5257

5358
action :update do

cookbooks/aws-parallelcluster-shared/spec/unit/resources/package_repos_spec.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ def self.setup(chef_run)
6363
.with(command: 'yum-config-manager --setopt=*.skip_if_unavailable=1 --save')
6464
end
6565

66+
it 'sets metadata expire time' do
67+
is_expected.to run_execute('yum-config-manager_metadata_expire')
68+
.with(command: 'yum-config-manager --setopt=*.metadata_expire=300 --save')
69+
end
70+
6671
it 'enables rhui' do
6772
is_expected.to run_execute('yum-config-manager-rhel')
6873
.with(command: "yum-config-manager --enable codeready-builder-for-rhel-#{version.to_i}-rhui-rpms")
@@ -102,6 +107,11 @@ def self.setup(chef_run)
102107
.with(command: 'yum-config-manager --setopt=*.skip_if_unavailable=1 --save')
103108
end
104109

110+
it 'sets metadata expire time' do
111+
is_expected.to run_execute('yum-config-manager_metadata_expire')
112+
.with(command: 'yum-config-manager --setopt=*.metadata_expire=300 --save')
113+
end
114+
105115
else
106116
pending "Implement for #{platform}"
107117
end

0 commit comments

Comments
 (0)