Skip to content

Commit af5d286

Browse files
committed
add list avzone and deploy de in avzone
1 parent 5b93703 commit af5d286

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

src/together/cli/api/endpoints.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ def endpoints(ctx: click.Context) -> None:
132132
type=int,
133133
help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
134134
)
135+
@click.option(
136+
"--user-specified-avzone",
137+
help="User-specified availability zone (e.g., us-central-4b)",
138+
)
135139
@click.option(
136140
"--wait",
137141
is_flag=True,
@@ -152,6 +156,7 @@ def create(
152156
no_speculative_decoding: bool,
153157
no_auto_start: bool,
154158
inactive_timeout: int | None,
159+
user_specified_avzone: str | None,
155160
wait: bool,
156161
) -> None:
157162
"""Create a new dedicated inference endpoint."""
@@ -177,6 +182,7 @@ def create(
177182
disable_speculative_decoding=no_speculative_decoding,
178183
state="STOPPED" if no_auto_start else "STARTED",
179184
inactive_timeout=inactive_timeout,
185+
user_specified_avzone=user_specified_avzone,
180186
)
181187
except InvalidRequestError as e:
182188
print_api_error(e)
@@ -203,6 +209,8 @@ def create(
203209
click.echo(" Auto-start: disabled", err=True)
204210
if inactive_timeout is not None:
205211
click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
212+
if user_specified_avzone:
213+
click.echo(f" Availability zone: {user_specified_avzone}", err=True)
206214

207215
click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
208216

@@ -432,3 +440,23 @@ def update(
432440

433441
click.echo("Successfully updated endpoint", err=True)
434442
click.echo(endpoint_id)
443+
444+
@endpoints.command()
445+
@click.option("--json", is_flag=True, help="Print output in JSON format")
446+
@click.pass_obj
447+
@handle_api_errors
448+
def avzones(client: Together, json: bool) -> None:
449+
"""List all available availability zones."""
450+
avzones = client.endpoints.list_avzones()
451+
452+
if not avzones:
453+
click.echo("No availability zones found", err=True)
454+
return
455+
456+
if json:
457+
import json as json_lib
458+
click.echo(json_lib.dumps({"avzones": avzones}, indent=2))
459+
else:
460+
click.echo("Available zones:", err=True)
461+
for avzone in sorted(avzones):
462+
click.echo(f" {avzone}")

src/together/resources/endpoints.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def create(
6060
disable_speculative_decoding: bool = True,
6161
state: Literal["STARTED", "STOPPED"] = "STARTED",
6262
inactive_timeout: Optional[int] = None,
63+
user_specified_avzone: Optional[str] = None,
6364
) -> DedicatedEndpoint:
6465
"""
6566
Create a new dedicated endpoint.
@@ -74,6 +75,7 @@ def create(
7475
disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
7576
state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
7677
inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
78+
user_specified_avzone (str, optional): Start endpoint in specified availability zone (e.g., us-central-4b).
7779
7880
Returns:
7981
DedicatedEndpoint: Object containing endpoint information
@@ -100,6 +102,9 @@ def create(
100102
if inactive_timeout is not None:
101103
data["inactive_timeout"] = inactive_timeout
102104

105+
if user_specified_avzone is not None:
106+
data["user_specified_avzone"] = user_specified_avzone
107+
103108
response, _, _ = requestor.request(
104109
options=TogetherRequest(
105110
method="POST",
@@ -257,6 +262,31 @@ def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]
257262

258263
return [HardwareWithStatus(**item) for item in response.data["data"]]
259264

265+
def list_avzones(self) -> List[str]:
266+
"""
267+
List all available availability zones.
268+
269+
Returns:
270+
List[str]: List of unique availability zones
271+
"""
272+
requestor = api_requestor.APIRequestor(
273+
client=self._client,
274+
)
275+
276+
response, _, _ = requestor.request(
277+
options=TogetherRequest(
278+
method="GET",
279+
url="clusters/avzones",
280+
),
281+
stream=False,
282+
)
283+
284+
assert isinstance(response, TogetherResponse)
285+
assert isinstance(response.data, dict)
286+
assert isinstance(response.data["avzones"], list)
287+
288+
return response.data["avzones"]
289+
260290

261291
class AsyncEndpoints:
262292
def __init__(self, client: TogetherClient) -> None:
@@ -308,6 +338,7 @@ async def create(
308338
disable_speculative_decoding: bool = True,
309339
state: Literal["STARTED", "STOPPED"] = "STARTED",
310340
inactive_timeout: Optional[int] = None,
341+
user_specified_avzone: Optional[str] = None,
311342
) -> DedicatedEndpoint:
312343
"""
313344
Create a new dedicated endpoint.
@@ -348,6 +379,9 @@ async def create(
348379
if inactive_timeout is not None:
349380
data["inactive_timeout"] = inactive_timeout
350381

382+
if user_specified_avzone is not None:
383+
data["user_specified_avzone"] = user_specified_avzone
384+
351385
response, _, _ = await requestor.arequest(
352386
options=TogetherRequest(
353387
method="POST",
@@ -506,3 +540,28 @@ async def list_hardware(
506540
assert isinstance(response.data["data"], list)
507541

508542
return [HardwareWithStatus(**item) for item in response.data["data"]]
543+
544+
async def list_avzones(self) -> List[str]:
545+
"""
546+
List all available availability zones.
547+
548+
Returns:
549+
List[str]: List of unique availability zones
550+
"""
551+
requestor = api_requestor.APIRequestor(
552+
client=self._client,
553+
)
554+
555+
response, _, _ = await requestor.arequest(
556+
options=TogetherRequest(
557+
method="GET",
558+
url="clusters/avzones",
559+
),
560+
stream=False,
561+
)
562+
563+
assert isinstance(response, TogetherResponse)
564+
assert isinstance(response.data, dict)
565+
assert isinstance(response.data["avzones"], list)
566+
567+
return response.data["avzones"]

0 commit comments

Comments
 (0)