diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 17d6bea94..9e1ea7afc 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -32,6 +32,7 @@ from core.schains.ima import get_migration_ts as get_ima_migration_ts from core.schains.cleaner import ( + remove_ima_container, remove_schain_container, remove_schain_volume ) @@ -363,6 +364,25 @@ def reloaded_skaled_container(self, ignore_reached_exit: bool = True) -> bool: ignore_reached_exit=ignore_reached_exit) return initial_status + @BaseActionManager.monitor_block + def recreated_schain_containers(self, ignore_reached_exit: bool = True) -> bool: + logger.info('Restart skaled and IMA from scratch') + initial_status = True + # Remove IMA -> skaled, start skaled -> IMA + if is_container_exists(self.name, container_type=IMA_CONTAINER, dutils=self.dutils): + initial_status = False + remove_ima_container(self.name, dutils=self.dutils) + if is_container_exists(self.name, container_type=SCHAIN_CONTAINER, dutils=self.dutils): + initial_status = False + remove_schain_container(self.name, dutils=self.dutils) + # Reseting restart counters + self.schain_record.set_restart_count(0) + self.schain_record.set_failed_rpc_count(0) + self.schain_record.set_needs_reload(False) + self.skaled_container() + self.ima_container() + return initial_status + @BaseActionManager.monitor_block def skaled_rpc(self) -> bool: initial_status = self.checks.rpc.status diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 439bc3367..ae8b87be4 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -145,9 +145,7 @@ def execute(self) -> None: self.am.firewall_rules() if not self.checks.volume: self.am.volume() - self.am.reloaded_skaled_container(ignore_reached_exit=False) - if not self.checks.ima_container: - self.am.restart_ima_container() + self.am.recreated_schain_containers(ignore_reached_exit=False) class NewConfigSkaledMonitor(BaseSkaledMonitor): diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index b14d8c889..b99d4750c 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -221,6 +221,35 @@ def ima_linked(econfig): econfig.update(state) +def test_recreated_schain_containers( + skaled_am, + skaled_checks, + ima_linked, + cleanup_ima, + schain_db, + dutils +): + name = schain_db + + skaled_am.volume() + skaled_am.recreated_schain_containers() + schain_container = f'skale_schain_{name}' + ima_container = f'skale_ima_{name}' + dutils.wait_for_container_creation(schain_container) + dutils.wait_for_container_creation(ima_container) + skaled_created_ts = dutils.get_container_created_ts(schain_container) + ima_created_ts = dutils.get_container_created_ts(ima_container) + + skaled_am.recreated_schain_containers() + dutils.wait_for_container_creation(schain_container) + dutils.wait_for_container_creation(ima_container) + + skaled_ts = dutils.get_container_created_ts(schain_container) + ima_ts = dutils.get_container_created_ts(ima_container) + assert skaled_ts > skaled_created_ts + assert ima_ts > ima_created_ts + + def test_ima_container_action_new_chain( skaled_am, skaled_checks, diff --git a/tools/docker_utils.py b/tools/docker_utils.py index 0862408fb..301fa87f9 100644 --- a/tools/docker_utils.py +++ b/tools/docker_utils.py @@ -46,10 +46,16 @@ CONTAINER_LOGS_SEPARATOR ) from tools.configs.logs import REMOVED_CONTAINERS_FOLDER_PATH +from tools.helper import run_cmd logger = logging.getLogger(__name__) MAX_RETRIES = 12 +CONTAINER_CREATION_TIMEOUT = 10 + + +class ContainerCreationTimeoutError(Exception): + pass def format_containers(f): @@ -338,7 +344,6 @@ def get_cmd(self, container_id: str) -> Dict: def get_container_created_ts(self, container_id: str) -> int: info = self.get_info(container_id) if info: - print(info) iso_time = info['stats']['Created'].split('.')[0] return int(datetime.fromisoformat(iso_time).timestamp()) else: @@ -362,8 +367,8 @@ def restart_all_schains( def pull(self, name: str) -> None: with DockerUtils.docker_lock: - repo, tag = name.split(':') - self.client.images.pull(repository=repo, tag=tag) + # repo, tag = name.split(':') + run_cmd(['docker', 'pull', name]) def pulled(self, name: str) -> bool: with DockerUtils.docker_lock: @@ -382,3 +387,10 @@ def get_container_image_name(self, name: str) -> Optional[str]: if info.get('status') == CONTAINER_NOT_FOUND: return None return info['stats']['Config']['Image'] + + def wait_for_container_creation(self, name: str, timeout=CONTAINER_CREATION_TIMEOUT): + start_ts = time.time() + while time.time() - start_ts < timeout and not self.is_container_exists(name): + time.sleep(0.2) + if not self.is_container_exists(name): + raise ContainerCreationTimeoutError(f'{name} has not been created within {timeout}s')