Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ client = Together()

# Simple text message
response = client.chat.completions.create(
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
messages=[{"role": "user", "content": "tell me about new york"}],
)
print(response.choices[0].message.content)
Expand Down Expand Up @@ -148,7 +148,7 @@ from together import Together

client = Together()
stream = client.chat.completions.create(
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
messages=[{"role": "user", "content": "tell me about new york"}],
stream=True,
)
Expand All @@ -173,7 +173,7 @@ async def async_chat_completion(messages):
async_client = AsyncTogether()
tasks = [
async_client.chat.completions.create(
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
messages=[{"role": "user", "content": message}],
)
for message in messages
Expand All @@ -196,7 +196,7 @@ from together import Together
client = Together()

response = client.chat.completions.create(
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
messages=[{"role": "user", "content": "tell me about new york"}],
logprobs=1
)
Expand Down Expand Up @@ -347,7 +347,7 @@ client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # deletes a

### Fine-tunes

The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-python).
The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-quickstart).

```python
from together import Together
Expand All @@ -356,7 +356,7 @@ client = Together()

client.fine_tuning.create(
training_file = 'file-d0d318cb-b7d9-493a-bd70-1cfe089d3815',
model = 'mistralai/Mixtral-8x7B-Instruct-v0.1',
model = 'meta-llama/Llama-3.2-3B-Instruct',
n_epochs = 3,
n_checkpoints = 1,
batch_size = "max",
Expand Down Expand Up @@ -394,7 +394,7 @@ for model in models:
together chat.completions \
--message "system" "You are a helpful assistant named Together" \
--message "user" "What is your name?" \
--model mistralai/Mixtral-8x7B-Instruct-v0.1
--model meta-llama/Llama-4-Scout-17B-16E-Instruct
```

The Chat Completions CLI enables streaming tokens to stdout by default. To disable streaming, use `--no-stream`.
Expand All @@ -404,7 +404,7 @@ The Chat Completions CLI enables streaming tokens to stdout by default. To disab
```bash
together completions \
"Large language models are " \
--model mistralai/Mixtral-8x7B-v0.1 \
--model meta-llama/Llama-4-Scout-17B-16E-Instruct \
--max-tokens 512 \
--stop "."
```
Expand Down
2 changes: 1 addition & 1 deletion src/together/cli/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def endpoints(ctx: click.Context) -> None:
@click.option(
"--model",
required=True,
help="The model to deploy (e.g. mistralai/Mixtral-8x7B-Instruct-v0.1)",
help="The model to deploy (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct)",
)
@click.option(
"--min-replicas",
Expand Down
12 changes: 7 additions & 5 deletions tests/integration/resources/test_completion_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_create(
random_repetition_penalty, # noqa
) -> None:
prompt = "The space robots have"
model = "mistralai/Mixtral-8x7B-v0.1"
model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
stop = ["</s>"]

# max_tokens should be a reasonable number for this test
Expand Down Expand Up @@ -69,10 +69,12 @@ def test_create(
assert isinstance(chunk.id, str)
assert isinstance(chunk.created, int)
assert isinstance(chunk.object, ObjectType)
assert isinstance(chunk.choices[0], CompletionChoicesChunk)
assert isinstance(chunk.choices[0].index, int)
assert isinstance(chunk.choices[0].delta, DeltaContent)
assert isinstance(chunk.choices[0].delta.content, str)

if chunk.choices:
assert isinstance(chunk.choices[0], CompletionChoicesChunk)
assert isinstance(chunk.choices[0].index, int)
assert isinstance(chunk.choices[0].delta, DeltaContent)
assert isinstance(chunk.choices[0].delta.content, str)

usage = chunk.usage

Expand Down