Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d71f321

Browse files
authoredMar 24, 2025
cleanup for remote-run resources (#4024)
Clean up logic for the (meant to be) ephemeral resources which are created in remote-run invocations (#4022).
1 parent 9f2435d commit d71f321

File tree

6 files changed

+264
-10
lines changed

6 files changed

+264
-10
lines changed
 

‎debian/paasta-tools.links

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ opt/venvs/paasta-tools/bin/generate_authenticating_services.py usr/bin/generate_
1616
opt/venvs/paasta-tools/bin/kubernetes_remove_evicted_pods.py usr/bin/kubernetes_remove_evicted_pods
1717
opt/venvs/paasta-tools/bin/paasta-api usr/bin/paasta-api
1818
opt/venvs/paasta-tools/bin/paasta-fsm usr/bin/paasta-fsm
19+
opt/venvs/paasta-tools/bin/paasta_cleanup_remote_run_resources.py usr/bin/paasta_cleanup_remote_run_resources
1920
opt/venvs/paasta-tools/bin/paasta_cleanup_stale_nodes.py usr/bin/paasta_cleanup_stale_nodes
2021
opt/venvs/paasta-tools/bin/paasta_prune_completed_pods usr/bin/paasta_prune_completed_pods
2122
opt/venvs/paasta-tools/bin/paasta_cleanup_tron_namespaces usr/bin/paasta_cleanup_tron_namespaces
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env python
2+
# Copyright 2015-2019 Yelp Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
import argparse
16+
from datetime import datetime
17+
from datetime import timedelta
18+
from datetime import timezone
19+
from typing import Any
20+
from typing import Callable
21+
from typing import Sequence
22+
from typing import Tuple
23+
24+
from paasta_tools.kubernetes.remote_run import get_remote_run_role_bindings
25+
from paasta_tools.kubernetes.remote_run import get_remote_run_roles
26+
from paasta_tools.kubernetes.remote_run import get_remote_run_service_accounts
27+
from paasta_tools.kubernetes_tools import get_all_managed_namespaces
28+
from paasta_tools.kubernetes_tools import KubeClient
29+
30+
31+
ListingFuncType = Callable[[KubeClient, str], Sequence[Any]]
32+
DeletionFuncType = Callable[[str, str], Any]
33+
34+
35+
def clean_namespace(kube_client: KubeClient, namespace: str, age_limit: datetime):
36+
"""Clean ephemeral remote-run resource in a namespace
37+
38+
:param KubeClient kube_client: kubernetes client
39+
:param str namepsace: kubernetes namespace
40+
:param datetime age_limit: expiration time for resources
41+
"""
42+
cleanup_actions: Sequence[Tuple[DeletionFuncType, ListingFuncType]] = (
43+
(
44+
kube_client.core.delete_namespaced_service_account,
45+
get_remote_run_service_accounts,
46+
),
47+
(kube_client.rbac.delete_namespaced_role, get_remote_run_roles),
48+
(kube_client.rbac.delete_namespaced_role_binding, get_remote_run_role_bindings),
49+
)
50+
for delete_func, list_func in cleanup_actions:
51+
for entity in list_func(kube_client, namespace):
52+
if (
53+
not entity.metadata.name.startswith("remote-run-")
54+
or entity.metadata.creation_timestamp > age_limit
55+
):
56+
continue
57+
delete_func(entity.metadata.name, namespace)
58+
59+
60+
def parse_args() -> argparse.Namespace:
61+
parser = argparse.ArgumentParser(
62+
description="Clean ephemeral Kubernetes resources created by remote-run invocations",
63+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
64+
)
65+
parser.add_argument(
66+
"--max-age",
67+
type=int,
68+
default=600,
69+
help="Maximum age, in seconds, resources are allowed to have",
70+
)
71+
return parser.parse_args()
72+
73+
74+
def main():
75+
args = parse_args()
76+
kube_client = KubeClient()
77+
age_limit = datetime.now(tzinfo=timezone.utc) - timedelta(seconds=args.max_age)
78+
for namespace in get_all_managed_namespaces(kube_client):
79+
clean_namespace(kube_client, namespace, age_limit)
80+
81+
82+
if __name__ == "__main__":
83+
main()

‎paasta_tools/kubernetes/remote_run.py

+51-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
import hashlib
1515
import logging
1616
from time import sleep
17+
from typing import List
1718
from typing import Optional
19+
from typing import Sequence
1820
from typing import TypedDict
1921

2022
from kubernetes.client import AuthenticationV1TokenRequest
@@ -236,7 +238,7 @@ def remote_run_token(
236238
kube_client, namespace, pod_name, user
237239
)
238240
role = create_pod_scoped_role(kube_client, namespace, pod_name, user)
239-
bind_role_to_service_account(kube_client, namespace, service_account, role)
241+
bind_role_to_service_account(kube_client, namespace, service_account, role, user)
240242
return create_temp_exec_token(kube_client, namespace, service_account)
241243

242244

@@ -293,6 +295,23 @@ def create_temp_exec_token(
293295
return response.status.token
294296

295297

298+
def get_remote_run_service_accounts(
299+
kube_client: KubeClient, namespace: str, user: str = ""
300+
) -> Sequence[V1ServiceAccount]:
301+
"""List all temporary service account related to remote-run
302+
303+
:param KubeClient kube_client: Kubernetes client
304+
:param str namespace: pod namespace
305+
:param str user: optionally filter by owning user
306+
:return: list of service accounts
307+
"""
308+
return get_all_service_accounts(
309+
kube_client,
310+
namespace=namespace,
311+
label_selector=(f"{POD_OWNER_LABEL}={user}" if user else POD_OWNER_LABEL),
312+
)
313+
314+
296315
def create_remote_run_service_account(
297316
kube_client: KubeClient,
298317
namespace: str,
@@ -308,11 +327,7 @@ def create_remote_run_service_account(
308327
"""
309328
pod_name_hash = hashlib.sha1(pod_name.encode("utf-8")).hexdigest()[:12]
310329
service_account_name = limit_size_with_hash(f"remote-run-{user}-{pod_name_hash}")
311-
service_accounts = get_all_service_accounts(
312-
kube_client,
313-
namespace=namespace,
314-
label_selector=f"{POD_OWNER_LABEL}={user}",
315-
)
330+
service_accounts = get_remote_run_service_accounts(kube_client, namespace, user)
316331
if any(item.metadata.name == service_account_name for item in service_accounts):
317332
return service_account_name
318333
service_account = V1ServiceAccount(
@@ -366,18 +381,21 @@ def bind_role_to_service_account(
366381
namespace: str,
367382
service_account: str,
368383
role: str,
384+
user: str,
369385
) -> None:
370386
"""Bind service account to role
371387
372388
:param KubeClient kube_client: Kubernetes client
373389
:param str namespace: service account namespace
374390
:param str service_account: service account name
375391
:param str role: role name
392+
:param str user: user requiring the role
376393
"""
377394
role_binding = V1RoleBinding(
378395
metadata=V1ObjectMeta(
379-
name=limit_size_with_hash(f"binding-{role}"),
396+
name=limit_size_with_hash(f"remote-run-binding-{role}"),
380397
namespace=namespace,
398+
labels={POD_OWNER_LABEL: user},
381399
),
382400
role_ref=V1RoleRef(
383401
api_group="rbac.authorization.k8s.io",
@@ -395,3 +413,29 @@ def bind_role_to_service_account(
395413
namespace=namespace,
396414
body=role_binding,
397415
)
416+
417+
418+
def get_remote_run_roles(kube_client: KubeClient, namespace: str) -> List[V1Role]:
419+
"""List all temporary roles related to remote-run
420+
421+
:param KubeClient kube_client: Kubernetes client
422+
:param str namespace: pod namespace
423+
:return: list of roles
424+
"""
425+
return kube_client.rbac.list_namespaced_role(
426+
namespace, label_selector=POD_OWNER_LABEL
427+
).items
428+
429+
430+
def get_remote_run_role_bindings(
431+
kube_client: KubeClient, namespace: str
432+
) -> List[V1RoleBinding]:
433+
"""List all temporary role bindings related to remote-run
434+
435+
:param KubeClient kube_client: Kubernetes client
436+
:param str namespace: pod namespace
437+
:return: list of roles
438+
"""
439+
return kube_client.rbac.list_namespaced_role_binding(
440+
namespace, label_selector=POD_OWNER_LABEL
441+
).items

‎setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def get_install_requires():
5959
"paasta_tools/generate_services_yaml.py",
6060
"paasta_tools/generate_authenticating_services.py",
6161
"paasta_tools/kubernetes/bin/kubernetes_remove_evicted_pods.py",
62+
"paasta_tools/kubernetes/bin/paasta_cleanup_remote_run_resources.py",
6263
"paasta_tools/kubernetes/bin/paasta_cleanup_stale_nodes.py",
6364
"paasta_tools/kubernetes/bin/paasta_secrets_sync.py",
6465
"paasta_tools/paasta_deploy_tron_jobs",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2015-2016 Yelp Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from datetime import datetime
15+
from unittest.mock import call
16+
from unittest.mock import MagicMock
17+
from unittest.mock import patch
18+
19+
from paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources import (
20+
clean_namespace,
21+
)
22+
from paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources import main
23+
24+
25+
def _create_mock_kube_resource(name: str, creation_time: datetime):
26+
mock_resource = MagicMock()
27+
mock_resource.metadata.name = name
28+
mock_resource.metadata.creation_timestamp = creation_time
29+
return mock_resource
30+
31+
32+
@patch(
33+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.get_remote_run_role_bindings",
34+
autospec=True,
35+
)
36+
@patch(
37+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.get_remote_run_roles",
38+
autospec=True,
39+
)
40+
@patch(
41+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.get_remote_run_service_accounts",
42+
autospec=True,
43+
)
44+
def test_clean_namespace(mock_get_sa, mock_get_roles, mock_get_bindings):
45+
mock_client = MagicMock()
46+
mock_get_sa.return_value = [
47+
_create_mock_kube_resource("foobar", datetime(2025, 1, 1, 0, 0, 0)),
48+
_create_mock_kube_resource("remote-run-abc", datetime(2025, 1, 1, 0, 0, 0)),
49+
]
50+
mock_get_roles.return_value = [
51+
_create_mock_kube_resource("remote-run-abc", datetime(2025, 1, 1, 2, 0, 0)),
52+
_create_mock_kube_resource("remote-run-def", datetime(2025, 1, 1, 0, 0, 0)),
53+
]
54+
mock_get_bindings.return_value = [
55+
_create_mock_kube_resource("whatever", datetime(2025, 1, 1, 0, 0, 0)),
56+
]
57+
clean_namespace(mock_client, "abc", datetime(2025, 1, 1, 1, 1, 1))
58+
mock_client.core.delete_namespaced_service_account.assert_has_calls(
59+
[call("remote-run-abc", "abc")]
60+
)
61+
mock_client.rbac.delete_namespaced_role.assert_has_calls(
62+
[call("remote-run-def", "abc")]
63+
)
64+
mock_client.rbac.delete_namespaced_role_binding.assert_not_called()
65+
66+
67+
@patch(
68+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.get_all_managed_namespaces",
69+
autospec=True,
70+
)
71+
@patch(
72+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.clean_namespace",
73+
autospec=True,
74+
)
75+
@patch(
76+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.KubeClient",
77+
autospec=True,
78+
)
79+
@patch(
80+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.parse_args",
81+
autospec=True,
82+
)
83+
@patch(
84+
"paasta_tools.kubernetes.bin.paasta_cleanup_remote_run_resources.datetime",
85+
autospec=True,
86+
)
87+
def test_main(
88+
mock_datetime, mock_parse_args, mock_kube, mock_clean, mock_get_namespaces
89+
):
90+
mock_parse_args.return_value.max_age = 60
91+
mock_datetime.now.return_value = datetime(2025, 1, 1, 0, 1, 0)
92+
mock_get_namespaces.return_value = ["a", "b", "c"]
93+
main()
94+
mock_clean.assert_has_calls(
95+
[
96+
call(mock_kube.return_value, "a", datetime(2025, 1, 1, 0, 0, 0)),
97+
call(mock_kube.return_value, "b", datetime(2025, 1, 1, 0, 0, 0)),
98+
call(mock_kube.return_value, "c", datetime(2025, 1, 1, 0, 0, 0)),
99+
]
100+
)

‎tests/kubernetes/test_remote_run.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from paasta_tools.kubernetes.remote_run import create_remote_run_service_account
3333
from paasta_tools.kubernetes.remote_run import create_temp_exec_token
3434
from paasta_tools.kubernetes.remote_run import find_job_pod
35+
from paasta_tools.kubernetes.remote_run import get_remote_run_role_bindings
36+
from paasta_tools.kubernetes.remote_run import get_remote_run_roles
3537
from paasta_tools.kubernetes.remote_run import remote_run_ready
3638
from paasta_tools.kubernetes.remote_run import remote_run_start
3739
from paasta_tools.kubernetes.remote_run import remote_run_stop
@@ -193,7 +195,11 @@ def test_remote_run_token(
193195
mock_client, "namespace", "remote-run-someuser-somejob-112233", "someuser"
194196
)
195197
mock_bind_role.assert_called_once_with(
196-
mock_client, "namespace", "somesa", "somerole"
198+
mock_client,
199+
"namespace",
200+
"somesa",
201+
"somerole",
202+
"someuser",
197203
)
198204
mock_create_token.assert_called_once_with(mock_client, "namespace", "somesa")
199205
# job not found
@@ -320,13 +326,16 @@ def test_create_pod_scoped_role():
320326

321327
def test_bind_role_to_service_account():
322328
mock_client = MagicMock()
323-
bind_role_to_service_account(mock_client, "namespace", "somesa", "somerole")
329+
bind_role_to_service_account(
330+
mock_client, "namespace", "somesa", "somerole", "someuser"
331+
)
324332
mock_client.rbac.create_namespaced_role_binding.assert_called_once_with(
325333
namespace="namespace",
326334
body=V1RoleBinding(
327335
metadata=V1ObjectMeta(
328-
name=f"binding-somerole",
336+
name=f"remote-run-binding-somerole",
329337
namespace="namespace",
338+
labels={"paasta.yelp.com/pod_owner": "someuser"},
330339
),
331340
role_ref=V1RoleRef(
332341
api_group="rbac.authorization.k8s.io",
@@ -341,3 +350,19 @@ def test_bind_role_to_service_account():
341350
],
342351
),
343352
)
353+
354+
355+
def test_get_remote_run_roles():
356+
mock_client = MagicMock()
357+
get_remote_run_roles(mock_client, "namespace")
358+
mock_client.rbac.list_namespaced_role.assert_called_once_with(
359+
"namespace", label_selector="paasta.yelp.com/pod_owner"
360+
)
361+
362+
363+
def test_get_remote_run_role_bindings():
364+
mock_client = MagicMock()
365+
get_remote_run_role_bindings(mock_client, "namespace")
366+
mock_client.rbac.list_namespaced_role_binding.assert_called_once_with(
367+
"namespace", label_selector="paasta.yelp.com/pod_owner"
368+
)

0 commit comments

Comments
 (0)
Please sign in to comment.