Skip to content

Commit

Permalink
Reset restart failed count on update status (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
Thanhphan1147 authored Mar 25, 2024
1 parent b0a012b commit 307dc62
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ def _on_update_status(self, _: ops.UpdateStatusEvent) -> None:
self.model.unit.status = ops.BlockedStatus("Waiting for relation.")
return

# set NRestart of the service back to 0
# We do it here because at this point we can be certain that
# the service is up and running
self.jenkins_agent_service.reset_failed_state()

self.model.unit.status = ops.ActiveStatus()


Expand Down
16 changes: 15 additions & 1 deletion src/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

logger = logging.getLogger(__name__)
AGENT_SERVICE_NAME = "jenkins-agent"
APT_PACKAGE_VERSION = "1.0.8"
APT_PACKAGE_VERSION = "1.0.9"
APT_PACKAGE_NAME = f"jenkins-agent-{APT_PACKAGE_VERSION}"
SYSTEMD_SERVICE_CONF_DIR = "/etc/systemd/system/jenkins-agent.service.d/"
PPA_URI = "https://ppa.launchpadcontent.net/canonical-is-devops/jenkins-agent-charm/ubuntu/"
Expand Down Expand Up @@ -166,6 +166,20 @@ def restart(self) -> None:
if not self._startup_check():
raise ServiceRestartError("Error waiting for the agent service to start")

def reset_failed_state(self) -> None:
"""Reset NRestart count of service back to 0.
The service keeps track of the 'restart-count' and blocks further restarts
if the maximum allowed is reached. This count is not reset when the service restarts
so we need to do it manually.
"""
try:
# Disable protected-access here because reset-failed is not implemented in the lib
systemd._systemctl("reset-failed", AGENT_SERVICE_NAME) # pylint: disable=W0212
except systemd.SystemdError:
# We only log the exception here as this is not critical
logger.error("Failed to reset failed state")

def reset(self) -> None:
"""Stop the agent service and clear its configuration file.
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import ops
import ops.testing
import pytest
from charms.operator_libs_linux.v1 import systemd

import charm_state
import service
Expand Down Expand Up @@ -173,6 +174,27 @@ def test_update_status_service_active(
"""
harness.add_relation(charm_state.AGENT_RELATION, "jenkins-k8s")
monkeypatch.setattr(service.JenkinsAgentService, "is_active", PropertyMock(return_value=True))
monkeypatch.setattr(systemd, "_systemctl", MagicMock(side_effect=systemd.SystemdError))

harness.begin()

harness.charm.on.update_status.emit()

assert harness.charm.unit.status.name == ops.ActiveStatus.name


def test_update_status_reset_failed_state_systemd_error(
harness: ops.testing.Harness, monkeypatch: pytest.MonkeyPatch
):
"""
arrange: given a charm with relation to jenkins and the service is active.
act: when update-status hook is fired with reset-failed raising an error.
assert: The charm correctly ignore the error and sets the status to active.
"""
harness.add_relation(charm_state.AGENT_RELATION, "jenkins-k8s")
monkeypatch.setattr(service.JenkinsAgentService, "is_active", PropertyMock(return_value=True))
monkeypatch.setattr(systemd, "_systemctl", MagicMock(side_effect=systemd.SystemdError))

harness.begin()

harness.charm.on.update_status.emit()
Expand Down

0 comments on commit 307dc62

Please sign in to comment.