Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix various checks issues (beta) #1127

Merged
merged 5 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,6 @@ tests/skale-data/contracts_info/schain_ima_abi.json
temp_*

.DS_Store
.coverage*
.coverage*

.env
13 changes: 6 additions & 7 deletions core/schains/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from core.schains.dkg.utils import get_secret_key_share_filepath
from core.schains.firewall.types import IRuleController
from core.schains.ima import get_ima_time_frame, get_migration_ts as get_ima_migration_ts
from core.schains.process import is_monitor_process_alive
from core.schains.rpc import (
check_endpoint_alive,
check_endpoint_blocks,
Expand Down Expand Up @@ -390,11 +389,6 @@ def blocks(self) -> CheckRes:
return CheckRes(check_endpoint_blocks(http_endpoint))
return CheckRes(False)

@property
def process(self) -> CheckRes:
"""Checks that sChain monitor process is running"""
return CheckRes(is_monitor_process_alive(self.schain_record.monitor_id))

@property
def exit_zero(self) -> CheckRes:
"""Check that sChain container exited with zero code"""
Expand Down Expand Up @@ -453,7 +447,12 @@ def __getattr__(self, attr: str) -> Any:
def get_name(self) -> str:
return self.name

def get_all(self, log: bool = True, save: bool = False, needed: Optional[List[str]] = None):
def get_all(
self,
log: bool = True,
save: bool = False,
needed: Optional[List[str]] = None
) -> dict:
needed = needed or API_ALLOWED_CHECKS

plain_checks = {}
Expand Down
6 changes: 3 additions & 3 deletions core/schains/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class SchainData:
dkg_status: int
is_deleted: bool
first_run: bool
repair_mode: bool
repair_ts: int

def to_dict(self) -> dict:
return {
Expand All @@ -30,7 +30,7 @@ def to_dict(self) -> dict:
'dkg_status': self.dkg_status,
'is_deleted': self.is_deleted,
'first_run': self.first_run,
'repair_mode': self.repair_mode
'repair_ts': self.repair_ts
}


Expand All @@ -52,7 +52,7 @@ def get_schain_info_by_name(skale: Skale, schain_name: str) -> SchainData:
record.dkg_status,
record.is_deleted,
record.first_run,
record.repair_mode
int(record.repair_date.timestamp())
)


Expand Down
7 changes: 7 additions & 0 deletions core/schains/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import os
import shutil
import signal
import time
from typing import Tuple

import pathlib
Expand Down Expand Up @@ -143,3 +144,9 @@ def terminate_process(
def is_monitor_process_alive(monitor_pid: int) -> bool:
"""Checks that provided monitor_id is inited and alive"""
return monitor_pid != 0 and check_pid(monitor_pid)


def is_process_healthy(schain_name: str, allowed_diff: int) -> bool:
pid, pts = get_schain_process_info(schain_name)
current_ts = int(time.time())
return pid is not None and is_monitor_process_alive(pid) and current_ts - pts < allowed_diff
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Jinja2==3.1.2
docker==6.1.3
python-iptables==1.0.1

skale.py==6.4b0
skale.py==6.4b1

requests==2.31
ima-predeployed==2.1.0b0
Expand Down
9 changes: 8 additions & 1 deletion tests/routes/node_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import socket
import datetime
import glob
import shutil
import socket

import pytest
import mock
Expand All @@ -15,6 +17,7 @@
from core.node import Node, NodeStatus
from core.node_config import NodeConfig
from core.schains.config.file_manager import ConfigFileManager
from tools.configs.schains import SCHAINS_DIR_PATH
from tools.configs.tg import TG_API_KEY, TG_CHAT_ID
from web.routes.node import node_bp
from web.helper import get_api_url
Expand Down Expand Up @@ -259,6 +262,10 @@ def test_exit_maintenance(skale_bp, node_config_in_maintenance):


def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale_bp):
for path in glob.glob(f'{SCHAINS_DIR_PATH}/*'):
if not path.endswith(schain_on_contracts):
shutil.rmtree(path)

data = get_bp_data(
skale_bp,
get_api_url(BLUEPRINT_NAME, 'update-safe'),
Expand Down
2 changes: 1 addition & 1 deletion tests/routes/schains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def test_get_schain(
'id': schain_id,
'mainnet_owner': skale.wallet.address,
'part_of_node': 1, 'dkg_status': 1, 'is_deleted': False,
'first_run': True, 'repair_mode': False
'first_run': True, 'repair_ts': int(r.repair_date.timestamp())
}
}

Expand Down
28 changes: 1 addition & 27 deletions tests/schains/checks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from http import HTTPStatus

from collections import namedtuple
from multiprocessing import Process

import mock
import docker
Expand All @@ -20,9 +19,8 @@
schain_config_dir
)
from core.schains.config.schain_node import generate_schain_nodes
from core.schains.skaled_exit_codes import SkaledExitCodes
from core.schains.runner import get_container_info, get_image_name, run_ima_container
# from core.schains.cleaner import remove_ima_container
from core.schains.skaled_exit_codes import SkaledExitCodes

from tools.configs.containers import IMA_CONTAINER, SCHAIN_CONTAINER
from tools.helper import read_json
Expand Down Expand Up @@ -330,29 +328,6 @@ def test_exit_code(skale, rule_controller, schain_db, current_nodes, estate, dut
dutils.safe_rm(container_name)


def test_process(skale, rule_controller, schain_db, current_nodes, estate, dutils):
schain_record = SChainRecord.get_by_name(schain_db)
checks = SChainChecks(
schain_db,
TEST_NODE_ID,
schain_record=schain_record,
rule_controller=rule_controller,
stream_version=CONFIG_STREAM,
current_nodes=current_nodes,
last_dkg_successful=True,
estate=estate,
dutils=dutils
)
assert not checks.process.status

process = Process(target=time.sleep, args=(5,))
process.start()
schain_record.set_monitor_id(process.ident)
assert checks.process.status
process.join()
assert not checks.process.status


def test_get_all(schain_config, rule_controller, dutils, current_nodes, schain_db, estate):
schain_name = schain_config['skaleConfig']['sChain']['schainName']
schain_record = SChainRecord.get_by_name(schain_name)
Expand All @@ -377,7 +352,6 @@ def test_get_all(schain_config, rule_controller, dutils, current_nodes, schain_d
assert isinstance(checks_dict['rpc'], bool)
assert isinstance(checks_dict['blocks'], bool)
assert isinstance(checks_dict['ima_container'], bool)
assert isinstance(checks_dict['process'], bool)

estate.ima_linked = False
checks_without_ima = SChainChecksMock(
Expand Down
34 changes: 17 additions & 17 deletions tests/schains/info_test.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
import freezegun

from core.schains.info import get_schain_info_by_name
from tests.utils import upsert_schain_record_with_config
from tests.utils import CURRENT_DATETIME, upsert_schain_record_with_config


@freezegun.freeze_time(CURRENT_DATETIME)
def test_get_schain_info_by_name(skale, schain_on_contracts, schain_db):
name = schain_on_contracts
upsert_schain_record_with_config(name)
schain_record = upsert_schain_record_with_config(name)
info = get_schain_info_by_name(skale, name)
expected_ts = int(schain_record.repair_date.timestamp())
assert info.name == name
assert info.schain_id == skale.schains.name_to_id(name)
assert info.part_of_node == 1
assert info.dkg_status == 1
assert not info.is_deleted
assert info.first_run
assert not info.repair_mode
assert info.repair_ts == expected_ts

assert info.to_dict() == {
'name': name,
'id': skale.schains.name_to_id(name),
'mainnet_owner': info.mainnet_owner,
'part_of_node': 1,
'dkg_status': 1,
'is_deleted': False,
'first_run': True,
'repair_mode': False
'name': name,
'id': skale.schains.name_to_id(name),
'mainnet_owner': info.mainnet_owner,
'part_of_node': 1,
'dkg_status': 1,
'is_deleted': False,
'first_run': True,
'repair_ts': expected_ts,
}


def test_get_schain_info_by_name_not_exist_contracts(
skale, schain_db
):
def test_get_schain_info_by_name_not_exist_contracts(skale, schain_db):
name = 'undefined_schain'
info = get_schain_info_by_name(skale, name)
assert info is None


def test_get_schain_info_by_name_not_exist_db(
skale, schain_on_contracts, db
):
def test_get_schain_info_by_name_not_exist_db(skale, schain_on_contracts, db):
name = schain_on_contracts
info = get_schain_info_by_name(skale, name)
assert info is None
2 changes: 2 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" SKALE test utilities """

import datetime
import os
import json
import random
Expand Down Expand Up @@ -40,6 +41,7 @@
from web.models.schain import upsert_schain_record

CURRENT_TS = 1594903080
CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TS)

DIR_PATH = os.path.dirname(os.path.realpath(__file__))
ENDPOINT = os.getenv('ENDPOINT')
Expand Down
55 changes: 20 additions & 35 deletions web/routes/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,14 @@
from core.node import get_check_report, get_skale_node_version
from core.node import get_current_nodes
from core.schains.checks import SChainChecks
from core.schains.firewall.utils import (
get_default_rule_controller,
get_sync_agent_ranges
)
from core.schains.ima import get_ima_log_checks
from core.schains.external_config import ExternalState
from core.schains.firewall.utils import get_default_rule_controller, get_sync_agent_ranges
from core.schains.ima import get_ima_log_checks
from core.schains.process import is_process_healthy
from tools.configs.schains import DKG_TIMEOUT_COEFFICIENT
from tools.sgx_utils import SGX_CERTIFICATES_FOLDER, SGX_SERVER_URL
from web.models.schain import SChainRecord
from web.helper import (
construct_err_response,
construct_ok_response,
get_api_url,
g_skale
)
from web.helper import construct_err_response, construct_ok_response, get_api_url, g_skale

logger = logging.getLogger(__name__)
BLUEPRINT_NAME = 'health'
Expand All @@ -56,9 +50,7 @@ def containers():
all = request.args.get('all') == 'True'
name_filter = request.args.get('name_filter') or ''
containers_list = g.docker_utils.get_containers_info(
all=all,
name_filter=name_filter,
format=True
all=all, name_filter=name_filter, format=True
)
return construct_ok_response(containers_list)

Expand All @@ -72,26 +64,21 @@ def schains_checks():
checks_filter = checks_filter.split(',')
node_id = g.config.id
if node_id is None:
return construct_err_response(status_code=HTTPStatus.BAD_REQUEST,
msg='No node installed')
return construct_err_response(status_code=HTTPStatus.BAD_REQUEST, msg='No node installed')

schains = g.skale.schains.get_schains_for_node(node_id)
allowed_diff = int(g.skale.constants_holder.get_dkg_timeout() * DKG_TIMEOUT_COEFFICIENT)
sync_agent_ranges = get_sync_agent_ranges(g.skale)
stream_version = get_skale_node_version()
estate = ExternalState(
chain_id=g.skale.web3.eth.chain_id,
ima_linked=True,
ranges=[]
)
estate = ExternalState(chain_id=g.skale.web3.eth.chain_id, ima_linked=True, ranges=[])
checks = []
for schain in schains:
if schain.name != '':
rotation_data = g.skale.node_rotation.get_rotation(schain.name)
rotation_id = rotation_data['rotation_id']
if SChainRecord.added(schain.name):
rc = get_default_rule_controller(
name=schain.name,
sync_agent_ranges=sync_agent_ranges
name=schain.name, sync_agent_ranges=sync_agent_ranges
)
current_nodes = get_current_nodes(g.skale, schain.name)
schain_record = SChainRecord.get_by_name(schain.name)
Expand All @@ -105,12 +92,14 @@ def schains_checks():
current_nodes=current_nodes,
last_dkg_successful=True,
estate=estate,
sync_node=False
sync_node=False,
).get_all(needed=checks_filter)
checks.append({
'name': schain.name,
'healthchecks': schain_checks
})
if not checks_filter or 'process' in checks_filter:
schain_checks.update(
{'process': is_process_healthy(schain.name, allowed_diff=allowed_diff)}
)

checks.append({'name': schain.name, 'healthchecks': schain_checks})
return construct_ok_response(checks)


Expand All @@ -119,8 +108,7 @@ def ima_log_checks():
logger.debug(request)
node_id = g.config.id
if node_id is None:
return construct_err_response(status_code=HTTPStatus.BAD_REQUEST,
msg='No node installed')
return construct_err_response(status_code=HTTPStatus.BAD_REQUEST, msg='No node installed')
checks = get_ima_log_checks()
return construct_ok_response(checks)

Expand Down Expand Up @@ -152,15 +140,12 @@ def sgx_info():
'status_https': status_https,
'sgx_server_url': SGX_SERVER_URL,
'sgx_keyname': g.config.sgx_key_name,
'sgx_wallet_version': version
'sgx_wallet_version': version,
}
return construct_ok_response(data=res)


@health_bp.route(
get_api_url(BLUEPRINT_NAME, 'check-report'),
methods=['GET']
)
@health_bp.route(get_api_url(BLUEPRINT_NAME, 'check-report'), methods=['GET'])
def check_report():
logger.debug(request)
report = get_check_report()
Expand Down
Loading
Loading