togethercomputer · orangetin · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@ client = Together()
 
 # Simple text message
 response = client.chat.completions.create(
-    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+    model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
     messages=[{"role": "user", "content": "tell me about new york"}],
 )
 print(response.choices[0].message.content)
@@ -148,7 +148,7 @@ from together import Together
 
 client = Together()
 stream = client.chat.completions.create(
-    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+    model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
     messages=[{"role": "user", "content": "tell me about new york"}],
     stream=True,
 )
@@ -173,7 +173,7 @@ async def async_chat_completion(messages):
     async_client = AsyncTogether()
     tasks = [
         async_client.chat.completions.create(
-            model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
             messages=[{"role": "user", "content": message}],
         )
         for message in messages
@@ -196,7 +196,7 @@ from together import Together
 client = Together()
 
 response = client.chat.completions.create(
-    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+    model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
     messages=[{"role": "user", "content": "tell me about new york"}],
     logprobs=1
 )
@@ -347,7 +347,7 @@ client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # deletes a
 
 ### Fine-tunes
 
-The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-python).
+The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-quickstart).
 
 ```python
 from together import Together
@@ -356,7 +356,7 @@ client = Together()
 
 client.fine_tuning.create(
   training_file = 'file-d0d318cb-b7d9-493a-bd70-1cfe089d3815',
-  model = 'mistralai/Mixtral-8x7B-Instruct-v0.1',
+  model = 'meta-llama/Llama-3.2-3B-Instruct',
   n_epochs = 3,
   n_checkpoints = 1,
   batch_size = "max",
@@ -394,7 +394,7 @@ for model in models:
 together chat.completions \
   --message "system" "You are a helpful assistant named Together" \
   --message "user" "What is your name?" \
-  --model mistralai/Mixtral-8x7B-Instruct-v0.1
+  --model meta-llama/Llama-4-Scout-17B-16E-Instruct
 ```
 
 The Chat Completions CLI enables streaming tokens to stdout by default. To disable streaming, use `--no-stream`.
@@ -404,7 +404,7 @@ The Chat Completions CLI enables streaming tokens to stdout by default. To disab
 ```bash
 together completions \
   "Large language models are " \
-  --model mistralai/Mixtral-8x7B-v0.1 \
+  --model meta-llama/Llama-4-Scout-17B-16E-Instruct \
   --max-tokens 512 \
   --stop "."
 ```

diff --git a/src/together/cli/api/endpoints.py b/src/together/cli/api/endpoints.py
@@ -82,7 +82,7 @@ def endpoints(ctx: click.Context) -> None:
 @click.option(
     "--model",
     required=True,
-    help="The model to deploy (e.g. mistralai/Mixtral-8x7B-Instruct-v0.1)",
+    help="The model to deploy (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct)",
 )
 @click.option(
     "--min-replicas",

diff --git a/tests/integration/resources/test_completion_stream.py b/tests/integration/resources/test_completion_stream.py
@@ -35,7 +35,7 @@ def test_create(
         random_repetition_penalty,  # noqa
     ) -> None:
         prompt = "The space robots have"
-        model = "mistralai/Mixtral-8x7B-v0.1"
+        model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
         stop = ["</s>"]
 
         # max_tokens should be a reasonable number for this test
@@ -69,10 +69,12 @@ def test_create(
             assert isinstance(chunk.id, str)
             assert isinstance(chunk.created, int)
             assert isinstance(chunk.object, ObjectType)
-            assert isinstance(chunk.choices[0], CompletionChoicesChunk)
-            assert isinstance(chunk.choices[0].index, int)
-            assert isinstance(chunk.choices[0].delta, DeltaContent)
-            assert isinstance(chunk.choices[0].delta.content, str)
+
+            if chunk.choices:
+                assert isinstance(chunk.choices[0], CompletionChoicesChunk)
+                assert isinstance(chunk.choices[0].index, int)
+                assert isinstance(chunk.choices[0].delta, DeltaContent)
+                assert isinstance(chunk.choices[0].delta.content, str)
 
             usage = chunk.usage