feat(backend): Update llm models (#9390)
There are some models missing and their metadata is incorrect. ### Changes 🏗️ - Add models, update metadata and pricing. - Add `max_output_tokens` to metadata, `None` indicates that max tokes are unspecified in provider docs - Added models: - OpenAI `o3-mini` and `o1` - Anthropic `claude-3-5-haiku-latest` - Groq `llama-3.3-70b-versatile`, `deepseek-r1-distill-llama-70b` - Ollama `llama3.3` - Use the max output tokens from the provider that handles the least (so it works for all) for OpenRouter models - Use `max_output_tokens` and 4096 if `None` as a fallback - Removed models (no longer working): - `gemma-7b-it` - `llama-3.1-70b-versatile` - `llama-3.1-405b-reasoning` - Rename llm enum name from `GEMINI_FLASH_1_5_8B` to `GEMINI_FLASH_1_5` ### Checklist 📋 #### For code changes: - [ ] I have clearly listed my changes in the PR description - [ ] I have made a test plan - [ ] I have tested my changes according to the test plan: <!-- Put your test plan here: --> - [ ] ... <details> <summary>Example test plan</summary> - [ ] Create from scratch and execute an agent with at least 3 blocks - [ ] Import an agent from file upload, and confirm it executes correctly - [ ] Upload agent to marketplace - [ ] Import an agent from marketplace and confirm it executes correctly - [ ] Edit an agent from monitor, and confirm it executes correctly </details> --------- Co-authored-by: Aarushi <50577581+aarushik93@users.noreply.github.com>pull/9367/merge
parent
58cadeb3b9
commit
533d120e98
|
@ -69,6 +69,7 @@ def AICredentialsField() -> AICredentials:
|
||||||
class ModelMetadata(NamedTuple):
|
class ModelMetadata(NamedTuple):
|
||||||
provider: str
|
provider: str
|
||||||
context_window: int
|
context_window: int
|
||||||
|
max_output_tokens: int | None
|
||||||
|
|
||||||
|
|
||||||
class LlmModelMeta(EnumMeta):
|
class LlmModelMeta(EnumMeta):
|
||||||
|
@ -92,6 +93,8 @@ class LlmModelMeta(EnumMeta):
|
||||||
|
|
||||||
class LlmModel(str, Enum, metaclass=LlmModelMeta):
|
class LlmModel(str, Enum, metaclass=LlmModelMeta):
|
||||||
# OpenAI models
|
# OpenAI models
|
||||||
|
O3_MINI = "o3-mini"
|
||||||
|
O1 = "o1"
|
||||||
O1_PREVIEW = "o1-preview"
|
O1_PREVIEW = "o1-preview"
|
||||||
O1_MINI = "o1-mini"
|
O1_MINI = "o1-mini"
|
||||||
GPT4O_MINI = "gpt-4o-mini"
|
GPT4O_MINI = "gpt-4o-mini"
|
||||||
|
@ -100,30 +103,31 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
|
||||||
GPT3_5_TURBO = "gpt-3.5-turbo"
|
GPT3_5_TURBO = "gpt-3.5-turbo"
|
||||||
# Anthropic models
|
# Anthropic models
|
||||||
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
|
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
|
||||||
|
CLAUDE_3_5_HAIKU = "claude-3-5-haiku-latest"
|
||||||
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
|
CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
|
||||||
# Groq models
|
# Groq models
|
||||||
LLAMA3_8B = "llama3-8b-8192"
|
|
||||||
LLAMA3_70B = "llama3-70b-8192"
|
|
||||||
MIXTRAL_8X7B = "mixtral-8x7b-32768"
|
|
||||||
GEMMA_7B = "gemma-7b-it"
|
|
||||||
GEMMA2_9B = "gemma2-9b-it"
|
GEMMA2_9B = "gemma2-9b-it"
|
||||||
# New Groq models (Preview)
|
LLAMA3_3_70B = "llama-3.3-70b-versatile"
|
||||||
LLAMA3_1_405B = "llama-3.1-405b-reasoning"
|
|
||||||
LLAMA3_1_70B = "llama-3.1-70b-versatile"
|
|
||||||
LLAMA3_1_8B = "llama-3.1-8b-instant"
|
LLAMA3_1_8B = "llama-3.1-8b-instant"
|
||||||
|
LLAMA3_70B = "llama3-70b-8192"
|
||||||
|
LLAMA3_8B = "llama3-8b-8192"
|
||||||
|
MIXTRAL_8X7B = "mixtral-8x7b-32768"
|
||||||
|
# Groq preview models
|
||||||
|
DEEPSEEK_LLAMA_70B = "deepseek-r1-distill-llama-70b"
|
||||||
# Ollama models
|
# Ollama models
|
||||||
|
OLLAMA_LLAMA3_3 = "llama3.3"
|
||||||
OLLAMA_LLAMA3_2 = "llama3.2"
|
OLLAMA_LLAMA3_2 = "llama3.2"
|
||||||
OLLAMA_LLAMA3_8B = "llama3"
|
OLLAMA_LLAMA3_8B = "llama3"
|
||||||
OLLAMA_LLAMA3_405B = "llama3.1:405b"
|
OLLAMA_LLAMA3_405B = "llama3.1:405b"
|
||||||
OLLAMA_DOLPHIN = "dolphin-mistral:latest"
|
OLLAMA_DOLPHIN = "dolphin-mistral:latest"
|
||||||
# OpenRouter models
|
# OpenRouter models
|
||||||
GEMINI_FLASH_1_5_8B = "google/gemini-flash-1.5"
|
GEMINI_FLASH_1_5 = "google/gemini-flash-1.5"
|
||||||
GROK_BETA = "x-ai/grok-beta"
|
GROK_BETA = "x-ai/grok-beta"
|
||||||
MISTRAL_NEMO = "mistralai/mistral-nemo"
|
MISTRAL_NEMO = "mistralai/mistral-nemo"
|
||||||
COHERE_COMMAND_R_08_2024 = "cohere/command-r-08-2024"
|
COHERE_COMMAND_R_08_2024 = "cohere/command-r-08-2024"
|
||||||
COHERE_COMMAND_R_PLUS_08_2024 = "cohere/command-r-plus-08-2024"
|
COHERE_COMMAND_R_PLUS_08_2024 = "cohere/command-r-plus-08-2024"
|
||||||
EVA_QWEN_2_5_32B = "eva-unit-01/eva-qwen-2.5-32b"
|
EVA_QWEN_2_5_32B = "eva-unit-01/eva-qwen-2.5-32b"
|
||||||
DEEPSEEK_CHAT = "deepseek/deepseek-chat"
|
DEEPSEEK_CHAT = "deepseek/deepseek-chat" # Actually: DeepSeek V3
|
||||||
PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE = (
|
PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE = (
|
||||||
"perplexity/llama-3.1-sonar-large-128k-online"
|
"perplexity/llama-3.1-sonar-large-128k-online"
|
||||||
)
|
)
|
||||||
|
@ -148,47 +152,74 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
|
||||||
def context_window(self) -> int:
|
def context_window(self) -> int:
|
||||||
return self.metadata.context_window
|
return self.metadata.context_window
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_output_tokens(self) -> int | None:
|
||||||
|
return self.metadata.max_output_tokens
|
||||||
|
|
||||||
|
|
||||||
MODEL_METADATA = {
|
MODEL_METADATA = {
|
||||||
LlmModel.O1_PREVIEW: ModelMetadata("openai", 32000),
|
# https://platform.openai.com/docs/models
|
||||||
LlmModel.O1_MINI: ModelMetadata("openai", 62000),
|
LlmModel.O3_MINI: ModelMetadata("openai", 200000, 100000), # o3-mini-2025-01-31
|
||||||
LlmModel.GPT4O_MINI: ModelMetadata("openai", 128000),
|
LlmModel.O1: ModelMetadata("openai", 200000, 100000), # o1-2024-12-17
|
||||||
LlmModel.GPT4O: ModelMetadata("openai", 128000),
|
LlmModel.O1_PREVIEW: ModelMetadata(
|
||||||
LlmModel.GPT4_TURBO: ModelMetadata("openai", 128000),
|
"openai", 128000, 32768
|
||||||
LlmModel.GPT3_5_TURBO: ModelMetadata("openai", 16385),
|
), # o1-preview-2024-09-12
|
||||||
LlmModel.CLAUDE_3_5_SONNET: ModelMetadata("anthropic", 200000),
|
LlmModel.O1_MINI: ModelMetadata("openai", 128000, 65536), # o1-mini-2024-09-12
|
||||||
LlmModel.CLAUDE_3_HAIKU: ModelMetadata("anthropic", 200000),
|
LlmModel.GPT4O_MINI: ModelMetadata(
|
||||||
LlmModel.LLAMA3_8B: ModelMetadata("groq", 8192),
|
"openai", 128000, 16384
|
||||||
LlmModel.LLAMA3_70B: ModelMetadata("groq", 8192),
|
), # gpt-4o-mini-2024-07-18
|
||||||
LlmModel.MIXTRAL_8X7B: ModelMetadata("groq", 32768),
|
LlmModel.GPT4O: ModelMetadata("openai", 128000, 16384), # gpt-4o-2024-08-06
|
||||||
LlmModel.GEMMA_7B: ModelMetadata("groq", 8192),
|
LlmModel.GPT4_TURBO: ModelMetadata(
|
||||||
LlmModel.GEMMA2_9B: ModelMetadata("groq", 8192),
|
"openai", 128000, 4096
|
||||||
LlmModel.LLAMA3_1_405B: ModelMetadata("groq", 8192),
|
), # gpt-4-turbo-2024-04-09
|
||||||
# Limited to 16k during preview
|
LlmModel.GPT3_5_TURBO: ModelMetadata("openai", 16385, 4096), # gpt-3.5-turbo-0125
|
||||||
LlmModel.LLAMA3_1_70B: ModelMetadata("groq", 131072),
|
# https://docs.anthropic.com/en/docs/about-claude/models
|
||||||
LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 131072),
|
LlmModel.CLAUDE_3_5_SONNET: ModelMetadata(
|
||||||
LlmModel.OLLAMA_LLAMA3_2: ModelMetadata("ollama", 8192),
|
"anthropic", 200000, 8192
|
||||||
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192),
|
), # claude-3-5-sonnet-20241022
|
||||||
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192),
|
LlmModel.CLAUDE_3_5_HAIKU: ModelMetadata(
|
||||||
LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768),
|
"anthropic", 200000, 8192
|
||||||
LlmModel.GEMINI_FLASH_1_5_8B: ModelMetadata("open_router", 8192),
|
), # claude-3-5-haiku-20241022
|
||||||
LlmModel.GROK_BETA: ModelMetadata("open_router", 8192),
|
LlmModel.CLAUDE_3_HAIKU: ModelMetadata(
|
||||||
LlmModel.MISTRAL_NEMO: ModelMetadata("open_router", 4000),
|
"anthropic", 200000, 4096
|
||||||
LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata("open_router", 4000),
|
), # claude-3-haiku-20240307
|
||||||
LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata("open_router", 4000),
|
# https://console.groq.com/docs/models
|
||||||
LlmModel.EVA_QWEN_2_5_32B: ModelMetadata("open_router", 4000),
|
LlmModel.GEMMA2_9B: ModelMetadata("groq", 8192, None),
|
||||||
LlmModel.DEEPSEEK_CHAT: ModelMetadata("open_router", 8192),
|
LlmModel.LLAMA3_3_70B: ModelMetadata("groq", 128000, 32768),
|
||||||
|
LlmModel.LLAMA3_1_8B: ModelMetadata("groq", 128000, 8192),
|
||||||
|
LlmModel.LLAMA3_70B: ModelMetadata("groq", 8192, None),
|
||||||
|
LlmModel.LLAMA3_8B: ModelMetadata("groq", 8192, None),
|
||||||
|
LlmModel.MIXTRAL_8X7B: ModelMetadata("groq", 32768, None),
|
||||||
|
LlmModel.DEEPSEEK_LLAMA_70B: ModelMetadata("groq", 128000, None),
|
||||||
|
# https://ollama.com/library
|
||||||
|
LlmModel.OLLAMA_LLAMA3_3: ModelMetadata("ollama", 8192, None),
|
||||||
|
LlmModel.OLLAMA_LLAMA3_2: ModelMetadata("ollama", 8192, None),
|
||||||
|
LlmModel.OLLAMA_LLAMA3_8B: ModelMetadata("ollama", 8192, None),
|
||||||
|
LlmModel.OLLAMA_LLAMA3_405B: ModelMetadata("ollama", 8192, None),
|
||||||
|
LlmModel.OLLAMA_DOLPHIN: ModelMetadata("ollama", 32768, None),
|
||||||
|
# https://openrouter.ai/models
|
||||||
|
LlmModel.GEMINI_FLASH_1_5: ModelMetadata("open_router", 1000000, 8192),
|
||||||
|
LlmModel.GROK_BETA: ModelMetadata("open_router", 131072, 131072),
|
||||||
|
LlmModel.MISTRAL_NEMO: ModelMetadata("open_router", 128000, 4096),
|
||||||
|
LlmModel.COHERE_COMMAND_R_08_2024: ModelMetadata("open_router", 128000, 4096),
|
||||||
|
LlmModel.COHERE_COMMAND_R_PLUS_08_2024: ModelMetadata("open_router", 128000, 4096),
|
||||||
|
LlmModel.EVA_QWEN_2_5_32B: ModelMetadata("open_router", 16384, 4096),
|
||||||
|
LlmModel.DEEPSEEK_CHAT: ModelMetadata("open_router", 64000, 2048),
|
||||||
LlmModel.PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE: ModelMetadata(
|
LlmModel.PERPLEXITY_LLAMA_3_1_SONAR_LARGE_128K_ONLINE: ModelMetadata(
|
||||||
"open_router", 8192
|
"open_router", 127072, 127072
|
||||||
),
|
),
|
||||||
LlmModel.QWEN_QWQ_32B_PREVIEW: ModelMetadata("open_router", 4000),
|
LlmModel.QWEN_QWQ_32B_PREVIEW: ModelMetadata("open_router", 32768, 32768),
|
||||||
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata("open_router", 4000),
|
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_405B: ModelMetadata(
|
||||||
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata("open_router", 4000),
|
"open_router", 131000, 4096
|
||||||
LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata("open_router", 4000),
|
),
|
||||||
LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata("open_router", 4000),
|
LlmModel.NOUSRESEARCH_HERMES_3_LLAMA_3_1_70B: ModelMetadata(
|
||||||
LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata("open_router", 4000),
|
"open_router", 12288, 12288
|
||||||
LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata("open_router", 4000),
|
),
|
||||||
LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4000),
|
LlmModel.AMAZON_NOVA_LITE_V1: ModelMetadata("open_router", 300000, 5120),
|
||||||
|
LlmModel.AMAZON_NOVA_MICRO_V1: ModelMetadata("open_router", 128000, 5120),
|
||||||
|
LlmModel.AMAZON_NOVA_PRO_V1: ModelMetadata("open_router", 300000, 5120),
|
||||||
|
LlmModel.MICROSOFT_WIZARDLM_2_8X22B: ModelMetadata("open_router", 65536, 4096),
|
||||||
|
LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4096, 4096),
|
||||||
}
|
}
|
||||||
|
|
||||||
for model in LlmModel:
|
for model in LlmModel:
|
||||||
|
@ -314,7 +345,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
|
||||||
llm_model: LlmModel,
|
llm_model: LlmModel,
|
||||||
prompt: list[dict],
|
prompt: list[dict],
|
||||||
json_format: bool,
|
json_format: bool,
|
||||||
max_tokens: int | None = None,
|
max_tokens: int | None,
|
||||||
ollama_host: str = "localhost:11434",
|
ollama_host: str = "localhost:11434",
|
||||||
) -> tuple[str, int, int]:
|
) -> tuple[str, int, int]:
|
||||||
"""
|
"""
|
||||||
|
@ -332,6 +363,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
|
||||||
The number of tokens used in the completion.
|
The number of tokens used in the completion.
|
||||||
"""
|
"""
|
||||||
provider = llm_model.metadata.provider
|
provider = llm_model.metadata.provider
|
||||||
|
max_tokens = max_tokens or llm_model.max_output_tokens or 4096
|
||||||
|
|
||||||
if provider == "openai":
|
if provider == "openai":
|
||||||
oai_client = openai.OpenAI(api_key=credentials.api_key.get_secret_value())
|
oai_client = openai.OpenAI(api_key=credentials.api_key.get_secret_value())
|
||||||
|
@ -381,7 +413,7 @@ class AIStructuredResponseGeneratorBlock(AIBlockBase):
|
||||||
model=llm_model.value,
|
model=llm_model.value,
|
||||||
system=sysprompt,
|
system=sysprompt,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
max_tokens=max_tokens or 8192,
|
max_tokens=max_tokens,
|
||||||
)
|
)
|
||||||
self.prompt = json.dumps(prompt)
|
self.prompt = json.dumps(prompt)
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,8 @@ from backend.integrations.credentials_store import (
|
||||||
# =============== Configure the cost for each LLM Model call =============== #
|
# =============== Configure the cost for each LLM Model call =============== #
|
||||||
|
|
||||||
MODEL_COST: dict[LlmModel, int] = {
|
MODEL_COST: dict[LlmModel, int] = {
|
||||||
|
LlmModel.O3_MINI: 2, # $1.10 / $4.40
|
||||||
|
LlmModel.O1: 16, # $15 / $60
|
||||||
LlmModel.O1_PREVIEW: 16,
|
LlmModel.O1_PREVIEW: 16,
|
||||||
LlmModel.O1_MINI: 4,
|
LlmModel.O1_MINI: 4,
|
||||||
LlmModel.GPT4O_MINI: 1,
|
LlmModel.GPT4O_MINI: 1,
|
||||||
|
@ -42,20 +44,21 @@ MODEL_COST: dict[LlmModel, int] = {
|
||||||
LlmModel.GPT4_TURBO: 10,
|
LlmModel.GPT4_TURBO: 10,
|
||||||
LlmModel.GPT3_5_TURBO: 1,
|
LlmModel.GPT3_5_TURBO: 1,
|
||||||
LlmModel.CLAUDE_3_5_SONNET: 4,
|
LlmModel.CLAUDE_3_5_SONNET: 4,
|
||||||
|
LlmModel.CLAUDE_3_5_HAIKU: 1, # $0.80 / $4.00
|
||||||
LlmModel.CLAUDE_3_HAIKU: 1,
|
LlmModel.CLAUDE_3_HAIKU: 1,
|
||||||
LlmModel.LLAMA3_8B: 1,
|
LlmModel.LLAMA3_8B: 1,
|
||||||
LlmModel.LLAMA3_70B: 1,
|
LlmModel.LLAMA3_70B: 1,
|
||||||
LlmModel.MIXTRAL_8X7B: 1,
|
LlmModel.MIXTRAL_8X7B: 1,
|
||||||
LlmModel.GEMMA_7B: 1,
|
|
||||||
LlmModel.GEMMA2_9B: 1,
|
LlmModel.GEMMA2_9B: 1,
|
||||||
LlmModel.LLAMA3_1_405B: 1,
|
LlmModel.LLAMA3_3_70B: 1, # $0.59 / $0.79
|
||||||
LlmModel.LLAMA3_1_70B: 1,
|
|
||||||
LlmModel.LLAMA3_1_8B: 1,
|
LlmModel.LLAMA3_1_8B: 1,
|
||||||
|
LlmModel.OLLAMA_LLAMA3_3: 1,
|
||||||
LlmModel.OLLAMA_LLAMA3_2: 1,
|
LlmModel.OLLAMA_LLAMA3_2: 1,
|
||||||
LlmModel.OLLAMA_LLAMA3_8B: 1,
|
LlmModel.OLLAMA_LLAMA3_8B: 1,
|
||||||
LlmModel.OLLAMA_LLAMA3_405B: 1,
|
LlmModel.OLLAMA_LLAMA3_405B: 1,
|
||||||
|
LlmModel.DEEPSEEK_LLAMA_70B: 1, # ? / ?
|
||||||
LlmModel.OLLAMA_DOLPHIN: 1,
|
LlmModel.OLLAMA_DOLPHIN: 1,
|
||||||
LlmModel.GEMINI_FLASH_1_5_8B: 1,
|
LlmModel.GEMINI_FLASH_1_5: 1,
|
||||||
LlmModel.GROK_BETA: 5,
|
LlmModel.GROK_BETA: 5,
|
||||||
LlmModel.MISTRAL_NEMO: 1,
|
LlmModel.MISTRAL_NEMO: 1,
|
||||||
LlmModel.COHERE_COMMAND_R_08_2024: 1,
|
LlmModel.COHERE_COMMAND_R_08_2024: 1,
|
||||||
|
|
Loading…
Reference in New Issue