models.yaml

# Notes:
#  - do not submit pull requests to add new models; this list will be updated in batches with new releases.

# Links:
#  - https://platform.openai.com/docs/models
#  - https://openai.com/api/pricing/
#  - https://platform.openai.com/docs/api-reference/chat
- platform: openai
  models:
    - name: gpt-4o
      max_input_tokens: 128000
      max_output_tokens: 16384
      input_price: 2.5
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: gpt-4o-2024-11-20
      max_input_tokens: 128000
      max_output_tokens: 16384
      input_price: 2.5
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: gpt-4o-2024-08-06
      max_input_tokens: 128000
      max_output_tokens: 16384
      input_price: 2.5
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: chatgpt-4o-latest
      max_input_tokens: 128000
      max_output_tokens: 16384
      input_price: 5
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: gpt-4o-mini
      max_input_tokens: 128000
      max_output_tokens: 16384
      input_price: 0.15
      output_price: 0.6
      supports_vision: true
      supports_function_calling: true
    - name: gpt-4-turbo
      max_input_tokens: 128000
      max_output_tokens: 4096
      input_price: 10
      output_price: 30
      supports_vision: true
      supports_function_calling: true
    - name: o1-preview
      max_input_tokens: 128000
      max_output_tokens: 32768
      input_price: 15
      output_price: 60
      no_system_message: true
    - name: o1-mini
      max_input_tokens: 128000
      max_output_tokens: 65536
      input_price: 3
      output_price: 12
      no_system_message: true
    - name: gpt-3.5-turbo
      max_input_tokens: 16385
      max_output_tokens: 4096
      input_price: 0.5
      output_price: 1.5
      supports_function_calling: true
    - name: text-embedding-3-large
      type: embedding
      input_price: 0.13
      max_tokens_per_chunk: 8191
      default_chunk_size: 2000
      max_batch_size: 100
    - name: text-embedding-3-small
      type: embedding
      input_price: 0.02
      max_tokens_per_chunk: 8191
      default_chunk_size: 2000
      max_batch_size: 100

# Links:
#  - https://ai.google.dev/models/gemini
#  - https://ai.google.dev/pricing
#  - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent
- platform: gemini
  models:
    - name: gemini-1.5-pro-latest
      max_input_tokens: 2097152
      max_output_tokens: 8192
      input_price: 0
      output_price: 0
      supports_vision: true
      supports_function_calling: true
    - name: gemini-1.5-flash-latest
      max_input_tokens: 1048576
      max_output_tokens: 8192
      input_price: 0
      output_price: 0
      supports_vision: true
      supports_function_calling: true
    - name: gemini-1.5-flash-8b-latest
      max_input_tokens: 1048576
      max_output_tokens: 8192
      input_price: 0
      output_price: 0
      supports_vision: true
      supports_function_calling: true
    - name: gemini-2.0-flash-exp
      max_input_tokens: 1048576
      max_output_tokens: 8192
      input_price: 0
      output_price: 0
      supports_vision: true
      supports_function_calling: true
    - name: gemini-exp-1206
      max_input_tokens: 32768
      max_output_tokens: 8192
      input_price: 0
      output_price: 0
      supports_vision: true
      supports_function_calling: true
    - name: text-embedding-004
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 2048
      default_chunk_size: 1500
      max_batch_size: 100

# Links:
#  - https://docs.anthropic.com/claude/docs/models-overview
#  - https://docs.anthropic.com/claude/reference/messages-streaming
- platform: claude
  models:
    - name: claude-3-5-sonnet-latest
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-5-sonnet-20241022
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-5-haiku-latest
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 1
      output_price: 5
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-5-haiku-20241022
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 1
      output_price: 5
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-opus-20240229
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 15
      output_price: 75
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-sonnet-20240229
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-haiku-20240307
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 0.25
      output_price: 1.25
      supports_vision: true
      supports_function_calling: true

# Links:
#  - https://docs.mistral.ai/getting-started/models/models_overview/
#  - https://mistral.ai/technology/#pricing
#  - https://docs.mistral.ai/api/
- platform: mistral
  models:
    - name: mistral-large-latest
      max_input_tokens: 128000
      input_price: 2
      output_price: 6
      supports_function_calling: true
    - name: mistral-small-latest
      max_input_tokens: 32000
      input_price: 0.2
      output_price: 0.6
      supports_function_calling: true
    - name: codestral-latest
      max_input_tokens: 32000
      input_price: 0.2
      output_price: 0.6
    - name: ministral-8b-latest
      max_input_tokens: 128000
      input_price: 0.1
      output_price: 0.1
      supports_function_calling: true
    - name: open-mistral-nemo
      max_input_tokens: 128000
      input_price: 0.15
      output_price: 0.15
      supports_function_calling: true
    - name: open-codestral-mamba
      max_input_tokens: 256000
      input_price: 0.25
      output_price: 0.25
    - name: pixtral-large-latest
      max_input_tokens: 128000
      input_price: 2
      output_price: 6
      supports_vision: true
    - name: pixtral-12b-latest
      max_input_tokens: 128000
      input_price: 0.15
      output_price: 0.15
      supports_vision: true
    - name: mistral-embed
      type: embedding
      max_input_tokens: 8092
      input_price: 0.1
      max_tokens_per_chunk: 8092
      default_chunk_size: 2000

# Links:
#  - https://docs.ai21.com/docs/jamba-15-models
#  - https://www.ai21.com/pricing
#  - https://docs.ai21.com/reference/jamba-15-api-ref
- platform: ai21
  models:
    - name: jamba-1.5-large
      max_input_tokens: 256000
      input_price: 2
      output_price: 8
      supports_function_calling: true
    - name: jamba-1.5-mini
      max_input_tokens: 256000
      input_price: 0.2
      output_price: 0.4
      supports_function_calling: true

# Links:
#  - https://docs.cohere.com/docs/command-r-plus
#  - https://cohere.com/pricing
#  - https://docs.cohere.com/reference/chat
- platform: cohere
  models:
    - name: command-r-plus-08-2024
      max_input_tokens: 128000
      max_output_tokens: 4096
      input_price: 2.5
      output_price: 10
      supports_function_calling: true
    - name: command-r-08-2024
      max_input_tokens: 128000
      max_output_tokens: 4096
      input_price: 0.15
      output_price: 0.6
      supports_function_calling: true
    - name: command-r7b-12-2024
      max_input_tokens: 128000
      max_output_tokens: 4096
      input_price: 0.0375
      output_price: 0.15
    - name: embed-english-v3.0
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 96
    - name: embed-english-light-v3.0
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 512
      default_chunk_size: 700
      max_batch_size: 96
    - name: embed-multilingual-v3.0
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 96
    - name: embed-multilingual-light-v3.0
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 512
      default_chunk_size: 700
      max_batch_size: 96
    - name: rerank-v3.5
      type: reranker
      max_input_tokens: 4096
    - name: rerank-english-v3.0
      type: reranker
      max_input_tokens: 4096
    - name: rerank-multilingual-v3.0
      type: reranker
      max_input_tokens: 4096

# Links:
#  - https://docs.x.ai/docs#models
#  - https://docs.x.ai/api/endpoints#chat-completions
- platform: xai
  models:
    - name: grok-2-1212
      max_input_tokens: 131072
      input_price: 2
      output_price: 10
      supports_function_calling: true
    - name: grok-beta
      max_input_tokens: 131072
      input_price: 5
      output_price: 15
      supports_function_calling: true
    - name: grok-2-vision-1212    
      max_input_tokens: 32768
      input_price: 2
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: grok-vision-beta
      max_input_tokens: 8192
      input_price: 5
      output_price: 15
      supports_vision: true

# Links:
#  - https://docs.perplexity.ai/guides/model-cards
#  - https://docs.perplexity.ai/guides/pricing
#  - https://docs.perplexity.ai/api-reference/chat-completions
- platform: perplexity
  models:
    - name: llama-3.1-sonar-huge-128k-online
      max_input_tokens: 127072
      input_price: 5
      output_price: 5
    - name: llama-3.1-sonar-large-128k-online
      max_input_tokens: 127072
      input_price: 1
      output_price: 1
    - name: llama-3.1-sonar-small-128k-online
      max_input_tokens: 127072
      input_price: 0.2
      output_price: 0.2

# Links:
#  - https://console.groq.com/docs/models
#  - https://console.groq.com/docs/api-reference#chat
- platform: groq
  models:
    - name: llama-3.3-70b-versatile
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
      supports_function_calling: true
    - name: llama-3.1-70b-versatile
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
      supports_function_calling: true
    - name: llama-3.1-8b-instant
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
      supports_function_calling: true
    - name: llama-3.2-90b-vision-preview
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
      supports_vision: true
    - name: llama-3.2-11b-vision-preview
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
      supports_vision: true
    - name: gemma2-9b-it
      max_input_tokens: 8192
      input_price: 0
      output_price: 0
      supports_function_calling: true

# Links:
#  - https://ollama.com/library
#  - https://github.com/ollama/ollama/blob/main/docs/openai.md
- platform: ollama
  models:
    - name: llama3.1
      max_input_tokens: 128000
      supports_function_calling: true
    - name: llama3.2
      max_input_tokens: 128000
      supports_function_calling: true
    - name: llama3.2-vision
      max_input_tokens: 128000
      supports_vision: true
    - name: llama3.3
      max_input_tokens: 128000
      supports_function_calling: true
    - name: qwq
      max_input_tokens: 32768
      supports_function_calling: true
    - name: qwen2.5
      max_input_tokens: 128000
      supports_function_calling: true
    - name: qwen2.5-coder
      max_input_tokens: 32768
      supports_function_calling: true
    - name: gemma2
      max_input_tokens: 8192
    - name: nomic-embed-text
      type: embedding
      max_tokens_per_chunk: 8192
      default_chunk_size: 1000
      max_batch_size: 50

# Links:
#  - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
#  - https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models
#  - https://cloud.google.com/vertex-ai/generative-ai/pricing
#  - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
- platform: vertexai
  models:
    - name: gemini-1.5-pro-002
      max_input_tokens: 2097152
      max_output_tokens: 8192
      input_price: 1.25
      output_price: 3.75
      supports_vision: true
      supports_function_calling: true
    - name: gemini-1.5-flash-002
      max_input_tokens: 1048576
      max_output_tokens: 8192
      input_price: 0.01875
      output_price: 0.075
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-5-sonnet-v2@20241022
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-5-sonnet@20240620
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-5-haiku@20241022
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 1
      output_price: 5
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-opus@20240229
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 15
      output_price: 75
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-sonnet@20240229
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: claude-3-haiku@20240307
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 0.25
      output_price: 1.25
      supports_vision: true
      supports_function_calling: true
    - name: mistral-large-2411
      max_input_tokens: 128000
      input_price: 2
      output_price: 6
      supports_function_calling: true
    - name: mistral-nemo@2407
      max_input_tokens: 128000
      input_price: 0.15
      output_price: 0.15
      supports_function_calling: true
    - name: codestral@2405
      max_input_tokens: 32000
      input_price: 0.2
      output_price: 0.6
    - name: text-embedding-004
      type: embedding
      max_input_tokens: 20000
      input_price: 0.025
      max_tokens_per_chunk: 2048
      default_chunk_size: 1500
      max_batch_size: 5
    - name: text-multilingual-embedding-002
      type: embedding
      max_input_tokens: 20000
      input_price: 0.2
      max_tokens_per_chunk: 2048
      default_chunk_size: 1500
      max_batch_size: 5

# Links:
#  - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
#  - https://aws.amazon.com/bedrock/pricing/
#  - https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-support.html
- platform: bedrock
  models:
    - name: anthropic.claude-3-5-sonnet-20241022-v2:0
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: anthropic.claude-3-5-sonnet-20240620-v1:0
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: anthropic.claude-3-5-haiku-20241022-v1:0
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 1
      output_price: 5
      supports_vision: true
      supports_function_calling: true
    - name: anthropic.claude-3-opus-20240229-v1:0
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 15
      output_price: 75
      supports_vision: true
      supports_function_calling: true
    - name: anthropic.claude-3-sonnet-20240229-v1:0
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: anthropic.claude-3-haiku-20240307-v1:0
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 0.25
      output_price: 1.25
      supports_vision: true
      supports_function_calling: true
    - name: meta.llama3-1-405b-instruct-v1:0
      max_input_tokens: 128000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 5.32
      output_price: 16
      supports_function_calling: true
    - name: meta.llama3-1-70b-instruct-v1:0
      max_input_tokens: 128000
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0.99
      output_price: 0.99
      supports_function_calling: true
    - name: meta.llama3-1-8b-instruct-v1:0
      max_input_tokens: 128000
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0.22
      output_price: 0.22
      supports_function_calling: true
    - name: us.meta.llama3-2-90b-instruct-v1:0
      max_input_tokens: 128000
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 2
      output_price: 2
      supports_function_calling: true
      supports_vision: true
    - name: us.meta.llama3-2-11b-instruct-v1:0
      max_input_tokens: 128000
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0.35
      output_price: 0.35
      supports_function_calling: true
      supports_vision: true
    - name: us.amazon.nova-pro-v1:0
      max_input_tokens: 300000
      max_output_tokens: 5120
      input_price: 0.8
      output_price: 3.2
      supports_vision: true
    - name: us.amazon.nova-lite-v1:0
      max_input_tokens: 300000
      max_output_tokens: 5120
      input_price: 0.06
      output_price: 0.24
      supports_vision: true
    - name: us.amazon.nova-micro-v1:0
      max_input_tokens: 128000
      max_output_tokens: 5120
      input_price: 0.035
      output_price: 0.14
    - name: mistral.mistral-large-2407-v1:0
      max_input_tokens: 128000
      input_price: 2
      output_price: 6
      supports_function_calling: true
    - name: cohere.command-r-plus-v1:0
      max_input_tokens: 128000
      input_price: 3
      output_price: 15
      supports_function_calling: true
    - name: cohere.command-r-v1:0
      max_input_tokens: 128000
      input_price: 0.5
      output_price: 1.5
      supports_function_calling: true
    - name: cohere.embed-english-v3
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 96
    - name: cohere.embed-multilingual-v3
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 96
    - name: ai21.jamba-1-5-large-v1:0
      max_input_tokens: 256000
      input_price: 2
      output_price: 8
      supports_function_calling: true
    - name: ai21.jamba-1-5-mini-v1:0
      max_input_tokens: 256000
      input_price: 0.2
      output_price: 0.4
      supports_function_calling: true

# Links:
#  - https://developers.cloudflare.com/workers-ai/models/
#  - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/
- platform: cloudflare
  models:
    - name: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
      max_input_tokens: 6144
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0
      output_price: 0
    - name: '@cf/meta/llama-3.1-70b-instruct'
      max_input_tokens: 6144
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0
      output_price: 0
    - name: '@cf/meta/llama-3.1-8b-instruct'
      max_input_tokens: 6144
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0
      output_price: 0
    - name: '@cf/meta/llama-3.2-11b-vision-instruct' 
      max_input_tokens: 6144
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0
      output_price: 0
    - name: '@cf/baai/bge-large-en-v1.5'
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100

# Links:
#  - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
#  - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
- platform: ernie
  models:
    - name: ernie-4.0-turbo-8k-latest
      max_input_tokens: 8192
      input_price: 2.8
      output_price: 8.4
      supports_function_calling: true
    - name: ernie-4.0-turbo-128k
      max_input_tokens: 128000
      input_price: 2.8
      output_price: 8.4
      supports_function_calling: true
    - name: ernie-4.0-8k-latest
      max_input_tokens: 8192
      input_price: 4.2
      output_price: 12.6
      supports_function_calling: true
    - name: ernie-3.5-8k-preview
      max_input_tokens: 8192
      input_price: 0.112
      output_price: 0.28
      supports_function_calling: true
    - name: ernie-speed-pro-128k
      max_input_tokens: 128000
      input_price: 0.042
      output_price: 0.084
    - name: bge_large_zh
      type: embedding
      input_price: 0.07
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 16
    - name: bge_large_en
      type: embedding
      input_price: 0.07
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 16
    - name: bce_reranker_base
      type: reranker
      max_input_tokens: 1024
      input_price: 0.07

# Links:
#  - https://help.aliyun.com/zh/model-studio/getting-started/models
#  - https://help.aliyun.com/zh/model-studio/developer-reference/use-qwen-by-calling-api
- platform: qianwen
  models:
    - name: qwen-max-latest
      max_input_tokens: 30720
      max_output_tokens: 8192
      input_price: 2.8
      output_price: 8.4
      supports_function_calling: true
    - name: qwen-plus-latest
      max_input_tokens: 128000
      max_output_tokens: 8192
      input_price: 0.112
      output_price: 0.28
      supports_function_calling: true
    - name: qwen-turbo-latest
      max_input_tokens: 129024
      max_output_tokens: 8192
      input_price: 0.042
      output_price: 0.084
      supports_function_calling: true
    - name: qwen-coder-plus-latest
      max_input_tokens: 129024
      max_output_tokens: 8192
      input_price: 0.49
      output_price: 0.98
      supports_function_calling: true
    - name: qwen-coder-turbo-latest
      max_input_tokens: 129024
      max_output_tokens: 8192
      input_price: 0.28
      output_price: 0.84
      supports_function_calling: true
    - name: qwen-long
      max_input_tokens: 1000000
      input_price: 0.07
      output_price: 0.28
    - name: qwq-32b-preview
      max_input_tokens: 30720
      max_output_tokens: 16384
      input_price: 0.49
      output_price: 0.98
      supports_function_calling: true
    - name: qwen-vl-max-latest
      input_price: 2.8
      output_price: 2.8
      supports_vision: true
    - name: qwen-vl-plus-latest
      input_price: 1.12
      output_price: 1.12
      supports_vision: true
    - name: qwen2.5-72b-instruct
      max_input_tokens: 129024
      max_output_tokens: 8192
      input_price: 0.56
      output_price: 1.68
      supports_function_calling: true
    - name: qwen2.5-coder-32b-instruct
      max_input_tokens: 129024
      max_output_tokens: 8192
      input_price: 0.49
      output_price: 0.98
      supports_function_calling: true
    - name: text-embedding-v3
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 8192
      default_chunk_size: 2000
      max_batch_size: 6
    - name: text-embedding-v2
      type: embedding
      input_price: 0.1
      max_tokens_per_chunk: 2048
      default_chunk_size: 2000
      max_batch_size: 25

# links:
#  - https://cloud.tencent.com/document/product/1729/104753
#  - https://cloud.tencent.com/document/product/1729/97731
#  - https://cloud.tencent.com/document/product/1729/111007
- platform: hunyuan
  models:
    - name: hunyuan-turbo-latest
      max_input_tokens: 28000
      max_output_tokens: 4096
      input_price: 2.1
      output_price: 7.0
      supports_function_calling: true
    - name: hunyuan-pro
      max_input_tokens: 28000
      max_output_tokens: 4096
      input_price: 4.2
      output_price: 14.0
      supports_function_calling: true
    - name: hunyuan-large
      max_input_tokens: 28000
      max_output_tokens: 4096
      supports_function_calling: true
    - name: hunyuan-large-longcontext
      max_input_tokens: 128000
      max_output_tokens: 6144
      supports_function_calling: true
    - name: hunyuan-standard
      max_input_tokens: 30000
      max_output_tokens: 2048
      input_price: 0.112
      output_price: 0.28
      supports_function_calling: true
    - name: hunyuan-standard-256K
      max_input_tokens: 250000
      max_output_tokens: 6144
      input_price: 0.07
      output_price: 0.28
      supports_function_calling: true
    - name: hunyuan-lite
      max_input_tokens: 250000
      max_output_tokens: 6144
      input_price: 0
      output_price: 0
      supports_function_calling: true
    - name: hunyuan-functioncall
      max_input_tokens: 28000
      max_output_tokens: 4096
      input_price: 0.56
      output_price: 1.12
      supports_function_calling: true
    - name: hunyuan-turbo-vision
      max_input_tokens: 6144
      max_output_tokens: 2048
      input_price: 11.2
      output_price: 11.2
      supports_vision: true
    - name: hunyuan-vision
      max_input_tokens: 6144
      max_output_tokens: 2048
      input_price: 2.52
      output_price: 2.52
      supports_vision: true
    - name: hunyuan-embedding
      type: embedding
      input_price: 0.01
      max_tokens_per_chunk: 1024
      default_chunk_size: 1000
      max_batch_size: 100

# Links:
#  - https://platform.moonshot.cn/docs/intro
#  - https://platform.moonshot.cn/docs/pricing/chat
#  - https://platform.moonshot.cn/docs/api/chat
- platform: moonshot
  models:
    - name: moonshot-v1-8k
      max_input_tokens: 8000
      input_price: 1.68
      output_price: 1.68
      supports_function_calling: true
    - name: moonshot-v1-32k
      max_input_tokens: 32000
      input_price: 3.36
      output_price: 3.36
      supports_function_calling: true
    - name: moonshot-v1-128k
      max_input_tokens: 128000
      input_price: 8.4
      output_price: 8.4
      supports_function_calling: true

# Links:
#  - https://api-docs.deepseek.com/quick_start/pricing
#  - https://platform.deepseek.com/api-docs/api/create-chat-completion
- platform: deepseek
  models:
    - name: deepseek-chat
      max_input_tokens: 65536
      max_output_tokens: 4096
      input_price: 0.14
      output_price: 0.28
      supports_function_calling: true

# Links:
#  - https://open.bigmodel.cn/dev/howuse/model
#  - https://open.bigmodel.cn/pricing
#  - https://open.bigmodel.cn/dev/api#glm-4
- platform: zhipuai
  models:
    - name: glm-4-plus
      max_input_tokens: 128000
      input_price: 7
      output_price: 7
      supports_function_calling: true
    - name: glm-4-alltools
      max_input_tokens: 128000
      input_price: 14
      output_price: 14
      supports_function_calling: true
    - name: glm-4-0520
      max_input_tokens: 128000
      input_price: 14
      output_price: 14
      supports_function_calling: true
    - name: glm-4-long
      max_input_tokens: 1000000
      input_price: 0.14
      output_price: 0.14
      supports_function_calling: true
    - name: glm-4-flash
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
      supports_function_calling: true
    - name: glm-4v-plus
      max_input_tokens: 8192
      input_price: 1.4
      output_price: 1.4
      supports_vision: true
    - name: embedding-3
      type: embedding
      max_input_tokens: 8192
      input_price: 0.07
      max_tokens_per_chunk: 8192
      default_chunk_size: 2000

# Links:
#  - https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9
#  - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion
- platform: lingyiwanwu
  models:
    - name: yi-lightning
      max_input_tokens: 16384
      input_price: 0.14
      output_price: 0.14
    - name: yi-large
      max_input_tokens: 32768
      input_price: 2.8
      output_price: 2.8
    - name: yi-large-fc
      max_input_tokens: 32768
      input_price: 2.8
      output_price: 2.8
      supports_function_calling: true
    - name: yi-large-rag
      max_input_tokens: 16384
      input_price: 3.5
      output_price: 3.5
    - name: yi-medium-200k
      max_input_tokens: 200000
      input_price: 1.68
      output_price: 1.68
    - name: yi-vision
      max_input_tokens: 16384
      input_price: 0.84
      output_price: 0.84
      supports_vision: true

# Links:
#  - https://github.com/marketplace/models
- platform: github
  models:
    - name: gpt-4o
      max_input_tokens: 128000
      supports_function_calling: true
    - name: gpt-4o-mini
      max_input_tokens: 128000
      supports_function_calling: true
    - name: o1-preview
      max_input_tokens: 128000
      no_stream: true
      no_system_message: true
    - name: o1-mini
      max_input_tokens: 128000
      no_stream: true
      no_system_message: true
    - name: text-embedding-3-large
      type: embedding
      max_tokens_per_chunk: 8191
      default_chunk_size: 2000
      max_batch_size: 100
    - name: text-embedding-3-small
      type: embedding
      max_tokens_per_chunk: 8191
      default_chunk_size: 2000
      max_batch_size: 100
    - name: llama-3.3-70b-instruct
      max_input_tokens: 128000
    - name: meta-llama-3.1-405b-instruct
      max_input_tokens: 128000
    - name: meta-llama-3.1-70b-instruct
      max_input_tokens: 128000
    - name: meta-llama-3.1-8b-instruct
      max_input_tokens: 128000
    - name: llama-3.2-90b-vision-instruct
      max_input_tokens: 8192
      supports_vision: true
    - name: llama-3.2-11b-vision-instruct
      max_input_tokens: 8192
      supports_vision: true
    - name: mistral-large-2411
      max_input_tokens: 128000
      supports_function_calling: true
    - name: mistral-nemo
      max_input_tokens: 128000
      supports_function_calling: true
    - name: cohere-command-r-plus-08-2024
      max_input_tokens: 128000
      supports_function_calling: true
    - name: cohere-command-r-08-2024
      max_input_tokens: 128000
      supports_function_calling: true
    - name: cohere-embed-v3-english
      type: embedding
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 96
    - name: cohere-embed-v3-multilingual
      type: embedding
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 96
    - name: ai21-jamba-1.5-large
      max_input_tokens: 256000
      supports_function_calling: true
    - name: ai21-jamba-1.5-mini
      max_input_tokens: 256000
      supports_function_calling: true
    - name: phi-3.5-moe-instruct
      max_input_tokens: 128000
    - name: phi-3.5-mini-instruct
      max_input_tokens: 128000
    - name: phi-3.5-vision-instruct
      max_input_tokens: 128000
      supports_vision: true

# Links:
#  - https://deepinfra.com/models
#  - https://deepinfra.com/pricing
- platform: deepinfra
  models:
    - name: meta-llama/Llama-3.3-70B-Instruct
      max_input_tokens: 128000
      input_price: 0.23
      output_price: 0.40
    - name: meta-llama/Meta-Llama-3.1-405B-Instruct
      max_input_tokens: 32000
      input_price: 1.79
      output_price: 1.79
      supports_function_calling: true
    - name: meta-llama/Meta-Llama-3.1-70B-Instruct
      max_input_tokens: 128000
      input_price: 0.35
      output_price: 0.4
      supports_function_calling: true
    - name: meta-llama/Meta-Llama-3.1-8B-Instruct
      max_input_tokens: 128000
      input_price: 0.055
      output_price: 0.055
      supports_function_calling: true
    - name: meta-llama/Llama-3.2-90B-Vision-Instruct
      max_input_tokens: 128000
      input_price: 0.35
      output_price: 0.4
    - name: meta-llama/Llama-3.2-11B-Vision-Instruct
      max_input_tokens: 128000
      input_price: 0.055
      output_price: 0.055
    - name: mistralai/Mistral-Nemo-Instruct-2407
      max_input_tokens: 128000
      input_price: 0.13
      output_price: 0.13
    - name: google/gemma-2-27b-it
      max_input_tokens: 8192
      input_price: 0.27
      output_price: 0.27
    - name: google/gemma-2-9b-it
      max_input_tokens: 8192
      input_price: 0.06
      output_price: 0.06
    - name: Qwen/Qwen2.5-72B-Instruct
      max_input_tokens: 32768
      input_price: 0.35
      output_price: 0.40
      supports_function_calling: true
    - name: Qwen/Qwen2.5-Coder-32B-Instruct
      max_input_tokens: 32768
      input_price: 0.18
      output_price: 0.18
    - name: Qwen/QwQ-32B-Preview
      max_input_tokens: 32768
      input_price: 0.15
      output_price: 0.60
    - name: nvidia/Llama-3.1-Nemotron-70B-Instruct
      max_input_tokens: 128000
      input_price: 0.35
      output_price: 0.40
      supports_function_calling: true
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      input_price: 0.01
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-m3
      type: embedding
      input_price: 0.01
      max_tokens_per_chunk: 8192
      default_chunk_size: 2000
      max_batch_size: 100
    - name: intfloat/e5-large-v2
      type: embedding
      input_price: 0.01
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: intfloat/multilingual-e5-large
      type: embedding
      input_price: 0.01
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: thenlper/gte-large
      type: embedding
      input_price: 0.01
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100

# Links:
#  - https://fireworks.ai/models
#  - https://fireworks.ai/pricing
- platform: fireworks
  models:
    - name: accounts/fireworks/models/llama-v3p3-70b-instruct
      max_input_tokens: 131072
      input_price: 0.9
      output_price: 0.9
    - name: accounts/fireworks/models/llama-v3p1-405b-instruct
      max_input_tokens: 131072
      input_price: 3
      output_price: 3
      supports_function_calling: true
    - name: accounts/fireworks/models/llama-v3p1-70b-instruct
      max_input_tokens: 131072
      input_price: 0.9
      output_price: 0.9
      supports_function_calling: true
    - name: accounts/fireworks/models/llama-v3p1-8b-instruct
      max_input_tokens: 131072
      input_price: 0.2
      output_price: 0.2
    - name: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
      max_input_tokens: 131072
      input_price: 0.9
      output_price: 0.9
      supports_vision: true
    - name: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
      max_input_tokens: 131072
      input_price: 0.2
      output_price: 0.2
      supports_vision: true
    - name: accounts/fireworks/models/gemma2-9b-it
      max_input_tokens: 8192
      input_price: 0.2
      output_price: 0.2
    - name: accounts/fireworks/models/qwen2p5-72b-instruct
      max_input_tokens: 32768
      input_price: 0.9
      output_price: 0.9
    - name: accounts/fireworks/models/qwen2p5-coder-32b-instruct
      max_input_tokens: 32768
      input_price: 0.9
      output_price: 0.9
    - name: accounts/fireworks/models/qwen-qwq-32b-preview
      max_input_tokens: 32768
      input_price: 0.9
      output_price: 0.9
    - name: accounts/fireworks/models/qwen2-vl-72b-instruct
      max_input_tokens: 32768
      input_price: 0.9
      output_price: 0.9
      supports_vision: true
    - name: accounts/fireworks/models/phi-3-vision-128k-instruct
      max_input_tokens: 131072
      input_price: 0.2
      output_price: 0.2
      supports_vision: true
    - name: accounts/fireworks/models/firellava-13b
      max_input_tokens: 4096
      input_price: 0.2
      output_price: 0.2
      supports_vision: true
    - name: accounts/fireworks/models/firefunction-v2
      max_input_tokens: 32768
      input_price: 0.2
      output_price: 0.2
      supports_function_calling: true
    - name: nomic-ai/nomic-embed-text-v1.5
      type: embedding
      input_price: 0.008
      max_tokens_per_chunk: 8192
      default_chunk_size: 1500
      max_batch_size: 100
    - name: WhereIsAI/UAE-Large-V1
      type: embedding
      input_price: 0.016
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: thenlper/gte-large
      type: embedding
      input_price: 0.016
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100

# Links:
#  - https://openrouter.ai/models
- platform: openrouter
  models:
    - name: openai/gpt-4o
      max_input_tokens: 128000
      input_price: 2.5
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: openai/gpt-4o-2024-11-20
      max_input_tokens: 128000
      input_price: 2.5
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: openai/gpt-4o-2024-08-06
      max_input_tokens: 128000
      input_price: 2.5
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: openai/chatgpt-4o-latest
      max_input_tokens: 128000
      input_price: 5
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: openai/gpt-4o-mini
      max_input_tokens: 128000
      input_price: 0.15
      output_price: 0.6
      supports_vision: true
      supports_function_calling: true
    - name: openai/gpt-4-turbo
      max_input_tokens: 128000
      input_price: 10
      output_price: 30
      supports_vision: true
      supports_function_calling: true
    - name: openai/o1-preview
      max_input_tokens: 128000
      input_price: 15
      output_price: 60
      no_system_message: true
    - name: openai/o1-mini
      max_input_tokens: 128000
      input_price: 3
      output_price: 12
      no_system_message: true
    - name: openai/gpt-3.5-turbo
      max_input_tokens: 16385
      input_price: 0.5
      output_price: 1.5
      supports_function_calling: true
    - name: google/gemini-pro-1.5
      max_input_tokens: 2000000
      input_price: 1.25
      output_price: 5
      supports_vision: true
      supports_function_calling: true
    - name: google/gemini-flash-1.5
      max_input_tokens: 1000000
      input_price: 0.075
      output_price: 0.3
      supports_vision: true
      supports_function_calling: true
    - name: google/gemini-flash-1.5-8b
      max_input_tokens: 1000000
      input_price: 0.0375
      output_price: 0.15
      supports_vision: true
      supports_function_calling: true
    - name: google/gemma-2-27b-it
      max_input_tokens: 8192
      input_price: 0.27
      output_price: 0.27
    - name: google/gemma-2-9b-it
      max_input_tokens: 4096
      input_price: 0.06
      output_price: 0.06
    - name: anthropic/claude-3.5-sonnet
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: anthropic/claude-3-5-haiku
      max_input_tokens: 200000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 1
      output_price: 5
      supports_vision: true
      supports_function_calling: true
    - name: anthropic/claude-3-opus
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 15
      output_price: 75
      supports_vision: true
      supports_function_calling: true
    - name: anthropic/claude-3-sonnet
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 3
      output_price: 15
      supports_vision: true
      supports_function_calling: true
    - name: anthropic/claude-3-haiku
      max_input_tokens: 200000
      max_output_tokens: 4096
      require_max_tokens: true
      input_price: 0.25
      output_price: 1.25
      supports_vision: true
      supports_function_calling: true
    - name: meta-llama/llama-3.3-70b-instruct
      max_input_tokens: 131072
      input_price: 0.88
      output_price: 0.88
    - name: meta-llama/llama-3.1-405b-instruct
      max_input_tokens: 131072
      input_price: 2.8
      output_price: 2.8
      supports_function_calling: true
    - name: meta-llama/llama-3.1-70b-instruct
      max_input_tokens: 131072
      input_price: 0.34
      output_price: 0.39
      supports_function_calling: true
    - name: meta-llama/llama-3.1-8b-instruct
      max_input_tokens: 131072
      input_price: 0.05
      output_price: 0.05
    - name: meta-llama/llama-3.2-90b-vision-instruct
      max_input_tokens: 131072
      input_price: 0.9
      output_price: 0.9
      supports_vision: true
    - name: meta-llama/llama-3.2-11b-vision-instruct
      max_input_tokens: 131072
      input_price: 0.055
      output_price: 0.055
      supports_vision: true
    - name: mistralai/mistral-large
      max_input_tokens: 128000
      input_price: 2
      output_price: 6
      supports_function_calling: true
    - name: mistralai/mistral-small
      max_input_tokens: 32000
      input_price: 0.2
      output_price: 0.6
      supports_function_calling: true
    - name: mistralai/ministral-8b
      max_input_tokens: 128000
      input_price: 0.1
      output_price: 0.1
      supports_function_calling: true
    - name: mistralai/ministral-3b
      max_input_tokens: 128000
      input_price: 0.04
      output_price: 0.04
      supports_function_calling: true
    - name: mistralai/mistral-nemo
      max_input_tokens: 128000
      input_price: 0.13
      output_price: 0.13
      supports_function_calling: true
    - name: mistralai/codestral-mamba
      max_input_tokens: 256000
      input_price: 0.25
      output_price: 0.25
    - name: mistralai/pixtral-large-2411
      max_input_tokens: 128000
      input_price: 2
      output_price: 6
      supports_vision: true
    - name: mistralai/pixtral-12b	
      max_input_tokens: 128000
      input_price: 0.1
      output_price: 0.1
      supports_vision: true
    - name: ai21/jamba-1-5-large
      max_input_tokens: 256000
      input_price: 2
      output_price: 8
      supports_function_calling: true
    - name: ai21/jamba-1-5-mini
      max_input_tokens: 256000
      input_price: 0.2
      output_price: 0.4
      supports_function_calling: true
    - name: cohere/command-r-plus-08-2024
      max_input_tokens: 128000
      input_price: 2.5
      output_price: 10
      supports_function_calling: true
    - name: cohere/command-r-08-2024
      max_input_tokens: 128000
      input_price: 0.15
      output_price: 0.6
      supports_function_calling: true
    - name: cohere/command-r7b-12-2024
      max_input_tokens: 128000
      max_output_tokens: 4096
      input_price: 0.0375
      output_price: 0.15
    - name: deepseek/deepseek-chat
      max_input_tokens: 32768
      input_price: 0.14
      output_price: 0.28
      supports_function_calling: true
    - name: perplexity/llama-3.1-sonar-huge-128k-online
      max_input_tokens: 127072
      input_price: 5
      output_price: 5
    - name: perplexity/llama-3.1-sonar-large-128k-online
      max_input_tokens: 127072
      input_price: 1
      output_price: 1
    - name: perplexity/llama-3.1-sonar-small-128k-online
      max_input_tokens: 127072
      input_price: 0.2
      output_price: 0.2
    - name: 01-ai/yi-large
      max_input_tokens: 32768
      input_price: 3
      output_price: 3
    - name: microsoft/phi-3.5-mini-128k-instruct
      max_input_tokens: 128000
      input_price: 0.1
      output_price: 0.1
    - name: microsoft/phi-3-medium-128k-instruct
      max_input_tokens: 128000
      input_price: 1
      output_price: 1
    - name: microsoft/phi-3-mini-128k-instruct
      max_input_tokens: 128000
      input_price: 0.1
      output_price: 0.1
    - name: qwen/qwen-2.5-72b-instruct
      max_input_tokens: 131072
      input_price: 0.35
      output_price: 0.4
      supports_function_calling: true
    - name: qwen/qwen-2.5-coder-32b-instruct
      max_input_tokens: 32768
      input_price: 0.18
      output_price: 0.18
    - name: qwen/qwen-2-vl-72b-instruct
      max_input_tokens: 32768
      input_price: 0.4
      output_price: 0.4
    - name: qwen/qwq-32b-preview
      max_input_tokens: 32768
      input_price: 0.15
      output_price: 0.6
    - name: nvidia/llama-3.1-nemotron-70b-instruct
      max_input_tokens: 131072
      input_price: 0.35
      output_price: 0.4
      supports_function_calling: true
    - name: x-ai/grok-2-1212
      max_input_tokens: 131072
      input_price: 2
      output_price: 10
      supports_function_calling: true
    - name: x-ai/grok-beta
      max_input_tokens: 32768
      input_price: 5
      output_price: 15
      supports_function_calling: true
    - name: x-ai/grok-2-vision-1212    
      max_input_tokens: 32768
      input_price: 2
      output_price: 10
      supports_vision: true
      supports_function_calling: true
    - name: x-ai/grok-vision-beta
      max_input_tokens: 8192
      input_price: 5
      output_price: 15
      supports_vision: true
    - name: amazon/nova-pro-v1 
      max_input_tokens: 300000
      max_output_tokens: 5120
      input_price: 0.8
      output_price: 3.2
      supports_vision: true
    - name: amazon/nova-lite-v1
      max_input_tokens: 300000
      max_output_tokens: 5120
      input_price: 0.06
      output_price: 0.24
      supports_vision: true
    - name: amazon/nova-micro-v1
      max_input_tokens: 128000
      max_output_tokens: 5120
      input_price: 0.035
      output_price: 0.14

# Links
#  - https://cloud.siliconflow.cn/models
#  - https://docs.siliconflow.cn/api-reference/chat-completions/chat-completions
- platform: siliconflow
  models:
    - name: meta-llama/Llama-3.3-70B-Instruct
      max_input_tokens: 32768
      input_price: 0.578
      output_price: 0.578
    - name: meta-llama/Meta-Llama-3.1-405B-Instruct
      max_input_tokens: 32768
      input_price: 2.94
      output_price: 2.94
    - name: meta-llama/Meta-Llama-3.1-70B-Instruct
      max_input_tokens: 32768
      input_price: 0.578
      output_price: 0.578
    - name: meta-llama/Meta-Llama-3.1-8B-Instruct
      max_input_tokens: 32768
      input_price: 0
      output_price: 0
    - name: Qwen/Qwen2.5-72B-Instruct
      max_input_tokens: 32768
      input_price: 0.578
      output_price: 0.578
      supports_function_calling: true
    - name: Qwen/Qwen2.5-72B-Instruct-128K
      max_input_tokens: 128000
      input_price: 0.578
      output_price: 0.578
      supports_function_calling: true
    - name: Qwen/Qwen2.5-7B-Instruct
      max_input_tokens: 32768
      input_price: 0
      output_price: 0
      supports_function_calling: true
    - name: Qwen/Qwen2.5-Coder-32B-Instruct
      max_input_tokens: 32768
      input_price: 0.176
      output_price: 0.176
    - name: Qwen/Qwen2.5-Coder-7B-Instruct
      max_input_tokens: 32768
      input_price: 0
      output_price: 0
    - name: Qwen/Qwen2-VL-72B-Instruct
      max_input_tokens: 32768
      input_price: 0.5782
      output_price: 0.5782
      supports_vision: true
    - name: Qwen/QwQ-32B-Preview
      max_input_tokens: 32768
      input_price: 0.176
      output_price: 0.176
    - name: google/gemma-2-27b-it
      max_input_tokens: 8192
      input_price: 0.176
      output_price: 0.176
    - name: google/gemma-2-9b-it
      max_input_tokens: 8192
      input_price: 0
      output_price: 0
    - name: deepseek-ai/DeepSeek-V2.5
      max_input_tokens: 32768
      input_price: 0.186
      output_price: 0.186
      supports_function_calling: true
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-large-zh-v1.5
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-m3
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 8192
      default_chunk_size: 2000
      max_batch_size: 100
    - name: BAAI/bge-reranker-v2-m3
      type: reranker
      max_input_tokens: 8192
      input_price: 0

# Links:
#  - https://docs.together.ai/docs/serverless-models
#  - https://www.together.ai/pricing
- platform: together
  models:
    - name: meta-llama/Llama-3.3-70B-Instruct-Turbo
      max_input_tokens: 32768
      input_price: 0.88
      output_price: 0.88
      supports_function_calling: true
    - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
      max_input_tokens: 32768
      input_price: 3.5
      output_price: 3.5
      supports_function_calling: true
    - name: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
      max_input_tokens: 32768
      input_price: 0.88
      output_price: 0.88
      supports_function_calling: true
    - name: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
      max_input_tokens: 32768
      input_price: 0.18
      output_price: 0.18
      supports_function_calling: true
    - name: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
      max_input_tokens: 131072
      input_price: 0.88
      output_price: 0.88
      supports_vision: true
    - name: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
      max_input_tokens: 131072
      input_price: 0.18
      output_price: 0.18
      supports_vision: true
    - name: google/gemma-2-27b-it
      max_input_tokens: 8192
      input_price: 0.8
      output_price: 0.8
    - name: google/gemma-2-9b-it
      max_input_tokens: 8192
      input_price: 0.3
      output_price: 0.3
    - name: Qwen/Qwen2.5-72B-Instruct-Turbo
      max_input_tokens: 32768
      input_price: 1.2
      output_price: 1.2
    - name: Qwen/Qwen2.5-7B-Instruct-Turbo
      max_input_tokens: 32768
      input_price: 0.3
      output_price: 0.3
    - name: Qwen/Qwen2.5-Coder-32B-Instruct
      max_input_tokens: 16384
      input_price: 0.8
      output_price: 0.8
    - name: Qwen/QwQ-32B-Preview
      max_input_tokens: 32768
      input_price: 1.2
      output_price: 1.2
    - name: WhereIsAI/UAE-Large-V1
      type: embedding
      input_price: 0.016
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      input_price: 0.016
      max_tokens_per_chunk: 512
      default_chunk_size: 1000
      max_batch_size: 100
    - name: Salesforce/Llama-Rank-V1
      type: reranker
      max_input_tokens: 8192
      input_price: 0.1

# Links:
#  - https://jina.ai/
#  - https://api.jina.ai/redoc
- platform: jina
  models:
    - name: jina-embeddings-v3
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 8192
      default_chunk_size: 2000
      max_batch_size: 100
    - name: jina-colbert-v2
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 8192
      default_chunk_size: 1500
      max_batch_size: 100
    - name: jina-clip-v2
      type: embedding
      input_price: 0
      max_tokens_per_chunk: 8192
      default_chunk_size: 1500
      max_batch_size: 100
    - name: jina-colbert-v2
      type: reranker
      max_input_tokens: 8192
      input_price: 0
    - name: jina-reranker-v2-base-multilingual
      type: reranker
      max_input_tokens: 8192
      input_price: 0

# Links:
#  - https://docs.voyageai.com/docs/embeddings
#  - https://docs.voyageai.com/docs/pricing
#  - https://docs.voyageai.com/reference/
- platform: voyageai
  models:
    - name: voyage-3-large
      type: embedding
      max_input_tokens: 120000
      input_price: 0.18
      max_tokens_per_chunk: 32000
      default_chunk_size: 2000
      max_batch_size: 128
    - name: voyage-3
      type: embedding
      max_input_tokens: 320000
      input_price: 0.06
      max_tokens_per_chunk: 32000
      default_chunk_size: 2000
      max_batch_size: 128
    - name: voyage-3-lite
      type: embedding
      max_input_tokens: 1000000
      input_price: 0.02
      max_tokens_per_chunk: 32000
      default_chunk_size: 1000
      max_batch_size: 128
    - name: rerank-2
      type: reranker
      max_input_tokens: 16000
      input_price: 0.05
    - name: rerank-2-lite
      type: reranker
      max_input_tokens: 8000
      input_price: 0.02