Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ This file is used to list changes made in each version of the AWS ParallelCluste

3.15.0
------
**CHANGES**
- Mitigate the risk of transient build-image failures in RHEL and Rocky caused by out-of-sync repo mirrors,
by refreshing the local cache at every failed attempt.

3.14.1
------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
end

use 'partial/_install_packages_common.rb'
use 'partial/_install_packages_rhel_amazon.rb'
use 'partial/_install_packages_rhel_rocky.rb'

def default_packages
# environment-modules required by EFA, Intel MPI and ARM PL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
end

use 'partial/_install_packages_common.rb'
use 'partial/_install_packages_rhel_amazon.rb'
use 'partial/_install_packages_rhel_rocky.rb'

def default_packages
# environment-modules required by EFA, Intel MPI and ARM PL
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

#
# Copyright:: 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file.
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
# See the License for the specific language governing permissions and limitations under the License.

action :install do
ruby_block 'install_packages_with_metadata_refresh' do
block do
max_retries = 10
retry_delay = 5
packages = Array(new_resource.packages).join(' ')

max_retries.times do |attempt|
# Refresh metadata on each attempt to handle mirror inconsistency
shell_out!('dnf clean metadata && dnf makecache', timeout: 300)

result = shell_out("dnf install -y #{packages}", timeout: 600)
break if result.exitstatus == 0

Chef::Log.warn("Package install attempt #{attempt + 1}/#{max_retries} failed: #{result.stderr}")
raise "Package installation failed after #{max_retries} attempts" if attempt == max_retries - 1

sleep retry_delay
end
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@ def self.setup(chef_run)
end

if %w(amazon centos redhat rocky).include?(platform)
it 'installs default packages' do
is_expected.to install_package(default_packages)
.with(retries: 10)
.with(retry_delay: 5)
.with(flush_cache: { before: true })
if platform == 'amazon'
it 'installs default packages' do
is_expected.to install_package(default_packages)
.with(retries: 10)
.with(retry_delay: 5)
.with(flush_cache: { before: true })
end
else
it 'installs default packages with metadata refresh' do
is_expected.to run_ruby_block('install_packages_with_metadata_refresh')
end
end

if platform == 'amazon' && version == '2'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
execute 'yum-config-manager_skip_if_unavail' do
command "yum-config-manager --setopt=\*.skip_if_unavailable=1 --save"
end

# Reduce metadata cache time to mitigate mirror inconsistency issues
execute 'yum-config-manager_metadata_expire' do
command "yum-config-manager --setopt=\*.metadata_expire=300 --save"
end
end

action :update do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ def powertool_name
execute 'yum-config-manager_skip_if_unavail' do
command "yum-config-manager --setopt=\*.skip_if_unavailable=1 --save"
end

# Reduce metadata cache time to mitigate mirror inconsistency issues
execute 'yum-config-manager_metadata_expire' do
command "yum-config-manager --setopt=\*.metadata_expire=300 --save"
end
end

action :update do
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ def self.setup(chef_run)
.with(command: 'yum-config-manager --setopt=*.skip_if_unavailable=1 --save')
end

it 'sets metadata expire time' do
is_expected.to run_execute('yum-config-manager_metadata_expire')
.with(command: 'yum-config-manager --setopt=*.metadata_expire=300 --save')
end

it 'enables rhui' do
is_expected.to run_execute('yum-config-manager-rhel')
.with(command: "yum-config-manager --enable codeready-builder-for-rhel-#{version.to_i}-rhui-rpms")
Expand Down Expand Up @@ -102,6 +107,11 @@ def self.setup(chef_run)
.with(command: 'yum-config-manager --setopt=*.skip_if_unavailable=1 --save')
end

it 'sets metadata expire time' do
is_expected.to run_execute('yum-config-manager_metadata_expire')
.with(command: 'yum-config-manager --setopt=*.metadata_expire=300 --save')
end

else
pending "Implement for #{platform}"
end
Expand Down
Loading