Skip to content

Commit

Permalink
Merge pull request #993 from skalenetwork/989-rotation-restart-ima
Browse files Browse the repository at this point in the history
989. Restart both IMA and skaled during PostRotationMonitor
  • Loading branch information
DmytroNazarenko authored Oct 6, 2023
2 parents 73633b2 + ce618cb commit 17372b4
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 6 deletions.
20 changes: 20 additions & 0 deletions core/schains/monitor/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from core.schains.ima import get_migration_ts as get_ima_migration_ts

from core.schains.cleaner import (
remove_ima_container,
remove_schain_container,
remove_schain_volume
)
Expand Down Expand Up @@ -363,6 +364,25 @@ def reloaded_skaled_container(self, ignore_reached_exit: bool = True) -> bool:
ignore_reached_exit=ignore_reached_exit)
return initial_status

@BaseActionManager.monitor_block
def recreated_schain_containers(self, ignore_reached_exit: bool = True) -> bool:
logger.info('Restart skaled and IMA from scratch')
initial_status = True
# Remove IMA -> skaled, start skaled -> IMA
if is_container_exists(self.name, container_type=IMA_CONTAINER, dutils=self.dutils):
initial_status = False
remove_ima_container(self.name, dutils=self.dutils)
if is_container_exists(self.name, container_type=SCHAIN_CONTAINER, dutils=self.dutils):
initial_status = False
remove_schain_container(self.name, dutils=self.dutils)
# Reseting restart counters
self.schain_record.set_restart_count(0)
self.schain_record.set_failed_rpc_count(0)
self.schain_record.set_needs_reload(False)
self.skaled_container()
self.ima_container()
return initial_status

@BaseActionManager.monitor_block
def skaled_rpc(self) -> bool:
initial_status = self.checks.rpc.status
Expand Down
4 changes: 1 addition & 3 deletions core/schains/monitor/skaled_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,7 @@ def execute(self) -> None:
self.am.firewall_rules()
if not self.checks.volume:
self.am.volume()
self.am.reloaded_skaled_container(ignore_reached_exit=False)
if not self.checks.ima_container:
self.am.restart_ima_container()
self.am.recreated_schain_containers(ignore_reached_exit=False)


class NewConfigSkaledMonitor(BaseSkaledMonitor):
Expand Down
29 changes: 29 additions & 0 deletions tests/schains/monitor/action/skaled_action_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,35 @@ def ima_linked(econfig):
econfig.update(state)


def test_recreated_schain_containers(
skaled_am,
skaled_checks,
ima_linked,
cleanup_ima,
schain_db,
dutils
):
name = schain_db

skaled_am.volume()
skaled_am.recreated_schain_containers()
schain_container = f'skale_schain_{name}'
ima_container = f'skale_ima_{name}'
dutils.wait_for_container_creation(schain_container)
dutils.wait_for_container_creation(ima_container)
skaled_created_ts = dutils.get_container_created_ts(schain_container)
ima_created_ts = dutils.get_container_created_ts(ima_container)

skaled_am.recreated_schain_containers()
dutils.wait_for_container_creation(schain_container)
dutils.wait_for_container_creation(ima_container)

skaled_ts = dutils.get_container_created_ts(schain_container)
ima_ts = dutils.get_container_created_ts(ima_container)
assert skaled_ts > skaled_created_ts
assert ima_ts > ima_created_ts


def test_ima_container_action_new_chain(
skaled_am,
skaled_checks,
Expand Down
18 changes: 15 additions & 3 deletions tools/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,16 @@
CONTAINER_LOGS_SEPARATOR
)
from tools.configs.logs import REMOVED_CONTAINERS_FOLDER_PATH
from tools.helper import run_cmd

logger = logging.getLogger(__name__)

MAX_RETRIES = 12
CONTAINER_CREATION_TIMEOUT = 10


class ContainerCreationTimeoutError(Exception):
pass


def format_containers(f):
Expand Down Expand Up @@ -338,7 +344,6 @@ def get_cmd(self, container_id: str) -> Dict:
def get_container_created_ts(self, container_id: str) -> int:
info = self.get_info(container_id)
if info:
print(info)
iso_time = info['stats']['Created'].split('.')[0]
return int(datetime.fromisoformat(iso_time).timestamp())
else:
Expand All @@ -362,8 +367,8 @@ def restart_all_schains(

def pull(self, name: str) -> None:
with DockerUtils.docker_lock:
repo, tag = name.split(':')
self.client.images.pull(repository=repo, tag=tag)
# repo, tag = name.split(':')
run_cmd(['docker', 'pull', name])

def pulled(self, name: str) -> bool:
with DockerUtils.docker_lock:
Expand All @@ -382,3 +387,10 @@ def get_container_image_name(self, name: str) -> Optional[str]:
if info.get('status') == CONTAINER_NOT_FOUND:
return None
return info['stats']['Config']['Image']

def wait_for_container_creation(self, name: str, timeout=CONTAINER_CREATION_TIMEOUT):
start_ts = time.time()
while time.time() - start_ts < timeout and not self.is_container_exists(name):
time.sleep(0.2)
if not self.is_container_exists(name):
raise ContainerCreationTimeoutError(f'{name} has not been created within {timeout}s')

0 comments on commit 17372b4

Please sign in to comment.