From 307dc6231fc5608b635c5c55b107aaab6f1b3321 Mon Sep 17 00:00:00 2001 From: Phan Trung Thanh Date: Mon, 25 Mar 2024 12:54:11 +0100 Subject: [PATCH] Reset restart failed count on update status (#29) --- src/charm.py | 5 +++++ src/service.py | 16 +++++++++++++++- tests/unit/test_charm.py | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/charm.py b/src/charm.py index dd623bf..376e6f0 100755 --- a/src/charm.py +++ b/src/charm.py @@ -109,6 +109,11 @@ def _on_update_status(self, _: ops.UpdateStatusEvent) -> None: self.model.unit.status = ops.BlockedStatus("Waiting for relation.") return + # set NRestart of the service back to 0 + # We do it here because at this point we can be certain that + # the service is up and running + self.jenkins_agent_service.reset_failed_state() + self.model.unit.status = ops.ActiveStatus() diff --git a/src/service.py b/src/service.py index 1d6e6d7..910f083 100644 --- a/src/service.py +++ b/src/service.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) AGENT_SERVICE_NAME = "jenkins-agent" -APT_PACKAGE_VERSION = "1.0.8" +APT_PACKAGE_VERSION = "1.0.9" APT_PACKAGE_NAME = f"jenkins-agent-{APT_PACKAGE_VERSION}" SYSTEMD_SERVICE_CONF_DIR = "/etc/systemd/system/jenkins-agent.service.d/" PPA_URI = "https://ppa.launchpadcontent.net/canonical-is-devops/jenkins-agent-charm/ubuntu/" @@ -166,6 +166,20 @@ def restart(self) -> None: if not self._startup_check(): raise ServiceRestartError("Error waiting for the agent service to start") + def reset_failed_state(self) -> None: + """Reset NRestart count of service back to 0. + + The service keeps track of the 'restart-count' and blocks further restarts + if the maximum allowed is reached. This count is not reset when the service restarts + so we need to do it manually. + """ + try: + # Disable protected-access here because reset-failed is not implemented in the lib + systemd._systemctl("reset-failed", AGENT_SERVICE_NAME) # pylint: disable=W0212 + except systemd.SystemdError: + # We only log the exception here as this is not critical + logger.error("Failed to reset failed state") + def reset(self) -> None: """Stop the agent service and clear its configuration file. diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 7e3f1bf..f17cd27 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -10,6 +10,7 @@ import ops import ops.testing import pytest +from charms.operator_libs_linux.v1 import systemd import charm_state import service @@ -173,6 +174,27 @@ def test_update_status_service_active( """ harness.add_relation(charm_state.AGENT_RELATION, "jenkins-k8s") monkeypatch.setattr(service.JenkinsAgentService, "is_active", PropertyMock(return_value=True)) + monkeypatch.setattr(systemd, "_systemctl", MagicMock(side_effect=systemd.SystemdError)) + + harness.begin() + + harness.charm.on.update_status.emit() + + assert harness.charm.unit.status.name == ops.ActiveStatus.name + + +def test_update_status_reset_failed_state_systemd_error( + harness: ops.testing.Harness, monkeypatch: pytest.MonkeyPatch +): + """ + arrange: given a charm with relation to jenkins and the service is active. + act: when update-status hook is fired with reset-failed raising an error. + assert: The charm correctly ignore the error and sets the status to active. + """ + harness.add_relation(charm_state.AGENT_RELATION, "jenkins-k8s") + monkeypatch.setattr(service.JenkinsAgentService, "is_active", PropertyMock(return_value=True)) + monkeypatch.setattr(systemd, "_systemctl", MagicMock(side_effect=systemd.SystemdError)) + harness.begin() harness.charm.on.update_status.emit()