diff --git a/.gitignore b/.gitignore index 7667e979..621d94a8 100644 --- a/.gitignore +++ b/.gitignore @@ -71,4 +71,6 @@ tests/skale-data/contracts_info/schain_ima_abi.json temp_* .DS_Store -.coverage* \ No newline at end of file +.coverage* + +.env diff --git a/core/schains/checks.py b/core/schains/checks.py index 0202ed22..8beb2673 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -41,7 +41,6 @@ from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController from core.schains.ima import get_ima_time_frame, get_migration_ts as get_ima_migration_ts -from core.schains.process import is_monitor_process_alive from core.schains.rpc import ( check_endpoint_alive, check_endpoint_blocks, @@ -390,11 +389,6 @@ def blocks(self) -> CheckRes: return CheckRes(check_endpoint_blocks(http_endpoint)) return CheckRes(False) - @property - def process(self) -> CheckRes: - """Checks that sChain monitor process is running""" - return CheckRes(is_monitor_process_alive(self.schain_record.monitor_id)) - @property def exit_zero(self) -> CheckRes: """Check that sChain container exited with zero code""" @@ -453,7 +447,12 @@ def __getattr__(self, attr: str) -> Any: def get_name(self) -> str: return self.name - def get_all(self, log: bool = True, save: bool = False, needed: Optional[List[str]] = None): + def get_all( + self, + log: bool = True, + save: bool = False, + needed: Optional[List[str]] = None + ) -> dict: needed = needed or API_ALLOWED_CHECKS plain_checks = {} diff --git a/core/schains/info.py b/core/schains/info.py index 0046b398..ee458eb1 100644 --- a/core/schains/info.py +++ b/core/schains/info.py @@ -19,7 +19,7 @@ class SchainData: dkg_status: int is_deleted: bool first_run: bool - repair_mode: bool + repair_ts: int def to_dict(self) -> dict: return { @@ -30,7 +30,7 @@ def to_dict(self) -> dict: 'dkg_status': self.dkg_status, 'is_deleted': self.is_deleted, 'first_run': self.first_run, - 'repair_mode': self.repair_mode + 'repair_ts': self.repair_ts } @@ -52,7 +52,7 @@ def get_schain_info_by_name(skale: Skale, schain_name: str) -> SchainData: record.dkg_status, record.is_deleted, record.first_run, - record.repair_mode + int(record.repair_date.timestamp()) ) diff --git a/core/schains/process.py b/core/schains/process.py index 1da14999..b6a9495f 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -22,6 +22,7 @@ import os import shutil import signal +import time from typing import Tuple import pathlib @@ -143,3 +144,9 @@ def terminate_process( def is_monitor_process_alive(monitor_pid: int) -> bool: """Checks that provided monitor_id is inited and alive""" return monitor_pid != 0 and check_pid(monitor_pid) + + +def is_process_healthy(schain_name: str, allowed_diff: int) -> bool: + pid, pts = get_schain_process_info(schain_name) + current_ts = int(time.time()) + return pid is not None and is_monitor_process_alive(pid) and current_ts - pts < allowed_diff diff --git a/requirements.txt b/requirements.txt index b8ff328c..f0406e34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ Jinja2==3.1.2 docker==6.1.3 python-iptables==1.0.1 -skale.py==6.4b0 +skale.py==6.4b1 requests==2.31 ima-predeployed==2.1.0b0 diff --git a/tests/routes/node_test.py b/tests/routes/node_test.py index c0bc291a..0610af26 100644 --- a/tests/routes/node_test.py +++ b/tests/routes/node_test.py @@ -1,5 +1,7 @@ -import socket import datetime +import glob +import shutil +import socket import pytest import mock @@ -15,6 +17,7 @@ from core.node import Node, NodeStatus from core.node_config import NodeConfig from core.schains.config.file_manager import ConfigFileManager +from tools.configs.schains import SCHAINS_DIR_PATH from tools.configs.tg import TG_API_KEY, TG_CHAT_ID from web.routes.node import node_bp from web.helper import get_api_url @@ -259,6 +262,10 @@ def test_exit_maintenance(skale_bp, node_config_in_maintenance): def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale_bp): + for path in glob.glob(f'{SCHAINS_DIR_PATH}/*'): + if not path.endswith(schain_on_contracts): + shutil.rmtree(path) + data = get_bp_data( skale_bp, get_api_url(BLUEPRINT_NAME, 'update-safe'), diff --git a/tests/routes/schains_test.py b/tests/routes/schains_test.py index fdede93d..52c14a22 100644 --- a/tests/routes/schains_test.py +++ b/tests/routes/schains_test.py @@ -122,7 +122,7 @@ def test_get_schain( 'id': schain_id, 'mainnet_owner': skale.wallet.address, 'part_of_node': 1, 'dkg_status': 1, 'is_deleted': False, - 'first_run': True, 'repair_mode': False + 'first_run': True, 'repair_ts': int(r.repair_date.timestamp()) } } diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 4baa89dc..f0d67f32 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -4,7 +4,6 @@ from http import HTTPStatus from collections import namedtuple -from multiprocessing import Process import mock import docker @@ -20,9 +19,8 @@ schain_config_dir ) from core.schains.config.schain_node import generate_schain_nodes -from core.schains.skaled_exit_codes import SkaledExitCodes from core.schains.runner import get_container_info, get_image_name, run_ima_container -# from core.schains.cleaner import remove_ima_container +from core.schains.skaled_exit_codes import SkaledExitCodes from tools.configs.containers import IMA_CONTAINER, SCHAIN_CONTAINER from tools.helper import read_json @@ -330,29 +328,6 @@ def test_exit_code(skale, rule_controller, schain_db, current_nodes, estate, dut dutils.safe_rm(container_name) -def test_process(skale, rule_controller, schain_db, current_nodes, estate, dutils): - schain_record = SChainRecord.get_by_name(schain_db) - checks = SChainChecks( - schain_db, - TEST_NODE_ID, - schain_record=schain_record, - rule_controller=rule_controller, - stream_version=CONFIG_STREAM, - current_nodes=current_nodes, - last_dkg_successful=True, - estate=estate, - dutils=dutils - ) - assert not checks.process.status - - process = Process(target=time.sleep, args=(5,)) - process.start() - schain_record.set_monitor_id(process.ident) - assert checks.process.status - process.join() - assert not checks.process.status - - def test_get_all(schain_config, rule_controller, dutils, current_nodes, schain_db, estate): schain_name = schain_config['skaleConfig']['sChain']['schainName'] schain_record = SChainRecord.get_by_name(schain_name) @@ -377,7 +352,6 @@ def test_get_all(schain_config, rule_controller, dutils, current_nodes, schain_d assert isinstance(checks_dict['rpc'], bool) assert isinstance(checks_dict['blocks'], bool) assert isinstance(checks_dict['ima_container'], bool) - assert isinstance(checks_dict['process'], bool) estate.ima_linked = False checks_without_ima = SChainChecksMock( diff --git a/tests/schains/info_test.py b/tests/schains/info_test.py index 5850bdd1..5bab27a4 100644 --- a/tests/schains/info_test.py +++ b/tests/schains/info_test.py @@ -1,42 +1,42 @@ +import freezegun + from core.schains.info import get_schain_info_by_name -from tests.utils import upsert_schain_record_with_config +from tests.utils import CURRENT_DATETIME, upsert_schain_record_with_config +@freezegun.freeze_time(CURRENT_DATETIME) def test_get_schain_info_by_name(skale, schain_on_contracts, schain_db): name = schain_on_contracts - upsert_schain_record_with_config(name) + schain_record = upsert_schain_record_with_config(name) info = get_schain_info_by_name(skale, name) + expected_ts = int(schain_record.repair_date.timestamp()) assert info.name == name assert info.schain_id == skale.schains.name_to_id(name) assert info.part_of_node == 1 assert info.dkg_status == 1 assert not info.is_deleted assert info.first_run - assert not info.repair_mode + assert info.repair_ts == expected_ts assert info.to_dict() == { - 'name': name, - 'id': skale.schains.name_to_id(name), - 'mainnet_owner': info.mainnet_owner, - 'part_of_node': 1, - 'dkg_status': 1, - 'is_deleted': False, - 'first_run': True, - 'repair_mode': False + 'name': name, + 'id': skale.schains.name_to_id(name), + 'mainnet_owner': info.mainnet_owner, + 'part_of_node': 1, + 'dkg_status': 1, + 'is_deleted': False, + 'first_run': True, + 'repair_ts': expected_ts, } -def test_get_schain_info_by_name_not_exist_contracts( - skale, schain_db -): +def test_get_schain_info_by_name_not_exist_contracts(skale, schain_db): name = 'undefined_schain' info = get_schain_info_by_name(skale, name) assert info is None -def test_get_schain_info_by_name_not_exist_db( - skale, schain_on_contracts, db -): +def test_get_schain_info_by_name_not_exist_db(skale, schain_on_contracts, db): name = schain_on_contracts info = get_schain_info_by_name(skale, name) assert info is None diff --git a/tests/utils.py b/tests/utils.py index 3feb301e..dc33bf91 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ """ SKALE test utilities """ +import datetime import os import json import random @@ -40,6 +41,7 @@ from web.models.schain import upsert_schain_record CURRENT_TS = 1594903080 +CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TS) DIR_PATH = os.path.dirname(os.path.realpath(__file__)) ENDPOINT = os.getenv('ENDPOINT') diff --git a/web/routes/health.py b/web/routes/health.py index f2336747..f55fc166 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -28,20 +28,14 @@ from core.node import get_check_report, get_skale_node_version from core.node import get_current_nodes from core.schains.checks import SChainChecks -from core.schains.firewall.utils import ( - get_default_rule_controller, - get_sync_agent_ranges -) -from core.schains.ima import get_ima_log_checks from core.schains.external_config import ExternalState +from core.schains.firewall.utils import get_default_rule_controller, get_sync_agent_ranges +from core.schains.ima import get_ima_log_checks +from core.schains.process import is_process_healthy +from tools.configs.schains import DKG_TIMEOUT_COEFFICIENT from tools.sgx_utils import SGX_CERTIFICATES_FOLDER, SGX_SERVER_URL from web.models.schain import SChainRecord -from web.helper import ( - construct_err_response, - construct_ok_response, - get_api_url, - g_skale -) +from web.helper import construct_err_response, construct_ok_response, get_api_url, g_skale logger = logging.getLogger(__name__) BLUEPRINT_NAME = 'health' @@ -56,9 +50,7 @@ def containers(): all = request.args.get('all') == 'True' name_filter = request.args.get('name_filter') or '' containers_list = g.docker_utils.get_containers_info( - all=all, - name_filter=name_filter, - format=True + all=all, name_filter=name_filter, format=True ) return construct_ok_response(containers_list) @@ -72,17 +64,13 @@ def schains_checks(): checks_filter = checks_filter.split(',') node_id = g.config.id if node_id is None: - return construct_err_response(status_code=HTTPStatus.BAD_REQUEST, - msg='No node installed') + return construct_err_response(status_code=HTTPStatus.BAD_REQUEST, msg='No node installed') schains = g.skale.schains.get_schains_for_node(node_id) + allowed_diff = int(g.skale.constants_holder.get_dkg_timeout() * DKG_TIMEOUT_COEFFICIENT) sync_agent_ranges = get_sync_agent_ranges(g.skale) stream_version = get_skale_node_version() - estate = ExternalState( - chain_id=g.skale.web3.eth.chain_id, - ima_linked=True, - ranges=[] - ) + estate = ExternalState(chain_id=g.skale.web3.eth.chain_id, ima_linked=True, ranges=[]) checks = [] for schain in schains: if schain.name != '': @@ -90,8 +78,7 @@ def schains_checks(): rotation_id = rotation_data['rotation_id'] if SChainRecord.added(schain.name): rc = get_default_rule_controller( - name=schain.name, - sync_agent_ranges=sync_agent_ranges + name=schain.name, sync_agent_ranges=sync_agent_ranges ) current_nodes = get_current_nodes(g.skale, schain.name) schain_record = SChainRecord.get_by_name(schain.name) @@ -105,12 +92,14 @@ def schains_checks(): current_nodes=current_nodes, last_dkg_successful=True, estate=estate, - sync_node=False + sync_node=False, ).get_all(needed=checks_filter) - checks.append({ - 'name': schain.name, - 'healthchecks': schain_checks - }) + if not checks_filter or 'process' in checks_filter: + schain_checks.update( + {'process': is_process_healthy(schain.name, allowed_diff=allowed_diff)} + ) + + checks.append({'name': schain.name, 'healthchecks': schain_checks}) return construct_ok_response(checks) @@ -119,8 +108,7 @@ def ima_log_checks(): logger.debug(request) node_id = g.config.id if node_id is None: - return construct_err_response(status_code=HTTPStatus.BAD_REQUEST, - msg='No node installed') + return construct_err_response(status_code=HTTPStatus.BAD_REQUEST, msg='No node installed') checks = get_ima_log_checks() return construct_ok_response(checks) @@ -152,15 +140,12 @@ def sgx_info(): 'status_https': status_https, 'sgx_server_url': SGX_SERVER_URL, 'sgx_keyname': g.config.sgx_key_name, - 'sgx_wallet_version': version + 'sgx_wallet_version': version, } return construct_ok_response(data=res) -@health_bp.route( - get_api_url(BLUEPRINT_NAME, 'check-report'), - methods=['GET'] -) +@health_bp.route(get_api_url(BLUEPRINT_NAME, 'check-report'), methods=['GET']) def check_report(): logger.debug(request) report = get_check_report() diff --git a/web/routes/schains.py b/web/routes/schains.py index 58963321..5c03e776 100644 --- a/web/routes/schains.py +++ b/web/routes/schains.py @@ -18,6 +18,7 @@ # along with this program. If not, see . import logging +from dataclasses import asdict from flask import Blueprint, g, request @@ -89,10 +90,11 @@ def schains_list(): node_id = g.config.id if node_id is None: return construct_err_response(msg='No node installed') - schains_list = list(filter( - lambda s: s.get('name'), - g.skale.schains.get_schains_for_node(node_id) - )) + schains_list = [ + asdict(s) + for s in g.skale.schains.get_schains_for_node(node_id) + if s and s.name != '' + ] return construct_ok_response(schains_list)