forked from microsoft/genaiops-promptflow-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_create_kubernetes_deployment.py
155 lines (131 loc) · 6.02 KB
/
test_create_kubernetes_deployment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""Tests for create_kubernetes_deployment."""
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from llmops.common.deployment.kubernetes_deployment import (
create_kubernetes_deployment
)
SUBSCRIPTION_ID = "TEST_SUBSCRIPTION_ID"
RESOURCE_GROUP_NAME = "TEST_RESOURCE_GROUP_NAME"
WORKSPACE_NAME = "TEST_WORKSPACE_NAME"
THIS_PATH = Path(__file__).parent
RESOURCE_PATH = THIS_PATH / "resources"
REQUEST_TIMEOUT_MS = 3 * 60 * 1000
@pytest.fixture(scope="module", autouse=True)
def _set_required_env_vars():
"""Set required environment variables."""
monkeypatch = pytest.MonkeyPatch()
monkeypatch.setenv("SUBSCRIPTION_ID", "TEST_SUBSCRIPTION_ID")
monkeypatch.setenv("RESOURCE_GROUP_NAME", "TEST_RESOURCE_GROUP_NAME")
monkeypatch.setenv("WORKSPACE_NAME", "TEST_WORKSPACE_NAME")
def test_create_kubernetes_deployment():
"""Test create_kubernetes_deployment."""
model_name = "exp_dev"
model_version = "1"
endpoint_name = "k8s-test-endpoint"
prior_deployment_name = "k8s-test-prior-deployment"
deployment_name = "k8s-test-deployment"
deployment_description = "k8s-test-deployment-description"
deployment_vm_size = "k8s-test-vm-size"
deployment_instance_count = 1
deployment_cpu = "0.5"
deployment_mem = "500Mi"
deployment_config = {
"liveness_route": {"path": "/health", "port": "8080"},
"readiness_route": {"path": "/health", "port": "8080"},
"scoring_route": {"path": "/score", "port": "8080"},
}
with patch(
"llmops.common.deployment.kubernetes_deployment.MLClient"
) as mock_ml_client:
# Mock the MLClient
ml_client_instance = Mock()
mock_ml_client.return_value = ml_client_instance
# Mock model get
ml_client_instance.models.get.return_value = Mock()
# Mock deployment list
mock_deployment = Mock()
mock_old_deployment = Mock()
mock_deployment.name = deployment_name
mock_old_deployment.name = prior_deployment_name
ml_client_instance.online_deployments.list.return_value = [
mock_deployment,
mock_old_deployment,
]
# Create the deployment
create_kubernetes_deployment(
model_version, base_path=str(RESOURCE_PATH), env_name="dev"
)
# Assert ml_client.models.get is called with the expected arguments
ml_client_instance.models.get.assert_called_with(
model_name,
model_version
)
# Assert that ml_client.online_deployments.list is called
# with the expected arguments
ml_client_instance.online_deployments.list.assert_called_with(
endpoint_name, local=False
)
# Assert that ml_client.online_deployments.begin_create_or_update
# is called once
create_deployment_calls = (
ml_client_instance.online_deployments.begin_create_or_update
)
assert create_deployment_calls.call_count == 1
# Assert that ml_client.online_endpoints.begin_create_or_update is
# called with the correct arguments
# create_endpoint_calls.call_args_list is triple nested,
# first index: select the call of
# ml_client.online_deployments.begin_create_or_update [0]
# second index: select the argument of
# ml_client.online_deployments.begin_create_or_update [0 (deployment)]
# third index: select the first element of the tuple [0]
created_deployment = create_deployment_calls.call_args_list[0][0][0]
assert created_deployment.name == deployment_name
assert created_deployment.description == deployment_description
assert created_deployment.endpoint_name == endpoint_name
assert created_deployment.instance_type == deployment_vm_size
assert created_deployment.instance_count == deployment_instance_count
assert created_deployment.app_insights_enabled is True
assert created_deployment.environment.build.path == str(
RESOURCE_PATH / "flows/exp_flow"
)
assert (
created_deployment.environment.build.dockerfile_path == (
"docker/dockerfile"
)
)
assert created_deployment.environment.inference_config == (
deployment_config
)
assert (
created_deployment.request_settings.request_timeout_ms
== REQUEST_TIMEOUT_MS
)
assert created_deployment.resources.requests.cpu == deployment_cpu
assert created_deployment.resources.requests.memory == deployment_mem
env_vars = created_deployment.environment_variables
assert env_vars["k8s-test-key"] == "k8s-test-value"
assert env_vars["PROMPTFLOW_RUN_MODE"] == "serving"
expected_deployment_config = (
f"deployment.subscription_id={SUBSCRIPTION_ID},"
f"deployment.resource_group={RESOURCE_GROUP_NAME},"
f"deployment.workspace_name={WORKSPACE_NAME},"
f"deployment.endpoint_name={endpoint_name},"
f"deployment.deployment_name={deployment_name}"
)
assert env_vars["PRT_CONFIG_OVERRIDE"] == expected_deployment_config
# Assert online_endpoints.begin_create_or_update is called twice
update_endpoint_calls = ml_client_instance.begin_create_or_update
assert update_endpoint_calls.call_count == 1
# Assert that ml_client.online_endpoints.begin_create_or_update is
# called with the correct argument
# update_endpoint_calls.call_args_list is triple nested,
# first index: select the call of
# ml_client.online_endpoints.begin_create_or_update [0]
# second index: select the argument of
# ml_client.online_endpoints.begin_create_or_update [0 (endpoint)]
# third index: select the first element of the tuple [0]
updated_endpoint = update_endpoint_calls.call_args_list[0][0][0]
assert int(updated_endpoint.traffic[deployment_name]) == 70
assert int(updated_endpoint.traffic[prior_deployment_name]) == 30