Skip to content

Commit

Permalink
Improved start-up callback
Browse files Browse the repository at this point in the history
  • Loading branch information
weiiwang01 committed Dec 10, 2024
1 parent d1c0c05 commit 38db050
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 27 deletions.
2 changes: 1 addition & 1 deletion charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,4 @@ resources:
description: OCI image for the OpenCTI platform/worker.

assumes:
- juju >= 3.3
- juju >= 3.4
83 changes: 63 additions & 20 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

import json
import logging
import pathlib
import secrets
import time
import textwrap
import typing
import urllib.parse
import uuid
Expand Down Expand Up @@ -70,8 +71,6 @@ class OpenCTICharm(ops.CharmBase):
_PEER_SECRET_FIELD = "secret" # nosec
_PEER_SECRET_ADMIN_TOKEN_SECRET_FIELD = "admin-token" # nosec
_PEER_SECRET_HEALTH_ACCESS_KEY_SECRET_FIELD = "health-access-key" # nosec
_HEALTH_CHECK_TIMEOUT = 200
_HEALTH_CHECK_INTERVAL = 5

def __init__(self, *args: typing.Any):
"""Construct.
Expand Down Expand Up @@ -138,6 +137,9 @@ def __init__(self, *args: typing.Any):
self.framework.observe(self._s3.on.credentials_gone, self._reconcile)
self.framework.observe(self._ingress.on.ready, self._reconcile)
self.framework.observe(self._ingress.on.revoked, self._reconcile)
self.framework.observe(
self.on["opencti"].pebble_custom_notice, self._on_pebble_custom_notice
)

def _amqp_relation_joined(self, event: ops.RelationJoinedEvent) -> None:
"""Handle amqp relation joined event.
Expand All @@ -149,6 +151,15 @@ def _amqp_relation_joined(self, event: ops.RelationJoinedEvent) -> None:
if self.unit.is_leader():
event.relation.data[self.app]["admin"] = "true"

def _on_pebble_custom_notice(self, event: ops.PebbleCustomNoticeEvent) -> None:
"""Handle pebble custom notice event.
Args:
event: Pebble custom notice event.
"""
if event.notice.key.startswith("canonical.com/opencti/"):
self._reconcile(event)

def _reconcile(self, _: ops.EventBase) -> None:
"""Run charm reconcile function and catch all exceptions."""
try:
Expand Down Expand Up @@ -189,6 +200,10 @@ def _reconcile_raw(self) -> None:
summary="OpenCTI platform/worker",
description="OpenCTI platform/worker",
services={
"charm-callback": {
"override": "replace",
"command": f"bash {self._install_callback_script(health_check_url)}",
},
"platform": {
"override": "replace",
"command": "node build/back.js",
Expand All @@ -213,6 +228,22 @@ def _reconcile_raw(self) -> None:
"worker-1": worker_service,
"worker-2": worker_service,
},
),
combine=True,
)
self._container.replan()
self._container.start("platform")
try:
self._check_platform_health(health_check_url)
except PlatformNotReady as exc:
self._container.start("charm-callback")
raise PlatformNotReady("waiting for opencti platform to start") from exc
self._container.stop("charm-callback")
self._container.add_layer(
label="opencti",
layer=ops.pebble.LayerDict(
summary="OpenCTI platform/worker",
description="OpenCTI platform/worker",
checks={
"platform": {
"override": "replace",
Expand All @@ -227,23 +258,35 @@ def _reconcile_raw(self) -> None:
combine=True,
)
self._container.replan()
self._container.start("platform")
start_time = time.time()
deadline = start_time + self._HEALTH_CHECK_TIMEOUT
while time.time() < deadline:
try:
self._check_platform_health(health_check_url)
self._container.start("worker-0")
self._container.start("worker-1")
self._container.start("worker-2")
return
except PlatformNotReady:
self.unit.status = ops.WaitingStatus(
f"waiting for opencti platform to start ({int(time.time() - start_time)}s)"
)
time.sleep(self._HEALTH_CHECK_INTERVAL)
continue
raise PlatformNotReady("opencti platform start-up failed")
self._container.start("worker-0")
self._container.start("worker-1")
self._container.start("worker-2")

def _install_callback_script(self, health_check_url: str) -> pathlib.Path:
"""Install platform startup callback script for noticing the charm on start.
Args:
health_check_url: opencti health check endpoint.
Returns:
callback script path inside the container.
"""
script = textwrap.dedent(
f"""\
while :; do
if curl -sfo /dev/null "{health_check_url}"; then
pebble notify canonical.com/opencti/platform-healthy
break
else
sleep 5
fi
done
"""
)
path = pathlib.Path("/opt/opencti/charm-callback.sh")
self._container.make_dir(path.parent, make_parents=True)
self._container.push(path, script, encoding="utf-8")
return path

@staticmethod
def _check_platform_health(health_check_url: str) -> None: # pragma: nocover
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,4 @@ async def machine_charm_dependencies_fixture(machine_model: Model):
await machine_model.create_offer(f"{opensearch.name}:opensearch-client", "opensearch-client")
rabbitmq_server = await machine_model.deploy("rabbitmq-server", channel="3.9/stable")
await machine_model.create_offer(f"{rabbitmq_server.name}:amqp", "amqp")
await machine_model.wait_for_idle(timeout=1800)
await machine_model.wait_for_idle(timeout=3600)
2 changes: 1 addition & 1 deletion tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ async def test_deploy_charm(
secret_id = secret_id.strip()
await model.grant_secret("opencti-admin-user", opencti.name)
await opencti.set_config({"admin-user": secret_id})
await model.wait_for_idle(timeout=1800, status="active")
await model.wait_for_idle(timeout=3600, status="active")


async def test_opencti_workers(get_unit_ips, ops_test):
Expand Down
10 changes: 6 additions & 4 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def test_pebble_plan():
container = state_out.get_container("opencti")
assert container.plan.to_dict() == {
"services": {
"charm-callback": {
"command": "bash /opt/opencti/charm-callback.sh",
"override": "replace",
},
"platform": {
"command": "node build/back.js",
"environment": {
Expand Down Expand Up @@ -191,15 +195,13 @@ def test_amqp_request_admin_user(leader):
assert data["admin"] == "true"


def test_opencti_platform_start_failure(monkeypatch, patch_check_platform_health):
def test_opencti_wait_platform_start(patch_check_platform_health):
patch_check_platform_health.side_effect = PlatformNotReady()
monkeypatch.setattr(OpenCTICharm, "_HEALTH_CHECK_TIMEOUT", 0.1)
monkeypatch.setattr(OpenCTICharm, "_HEALTH_CHECK_INTERVAL", 0.1)
ctx = ops.testing.Context(OpenCTICharm)
state_in = StateBuilder().add_required_integrations().add_required_configs().build()
state_out = ctx.run(ctx.on.config_changed(), state_in)
assert state_out.unit_status.name == "waiting"
assert state_out.unit_status.message == "opencti platform start-up failed"
assert state_out.unit_status.message == "waiting for opencti platform to start"


@pytest.mark.usefixtures("patch_check_platform_health")
Expand Down

0 comments on commit 38db050

Please sign in to comment.