From 2dc038b6c08a93f05ff722dbab0bcfa454d31f3e Mon Sep 17 00:00:00 2001
From: Chirag Modi <98582575+cmodi-meta@users.noreply.github.com>
Date: Wed, 14 May 2025 12:45:40 -0700
Subject: [PATCH] Add Llama API Support (#9899)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The changes in this PR are to add Llama API support.

### Changes 🏗️
We add both backend and frontend support.

**Backend**:
- Add llama_api provider
- Include models supported by Llama API along with configs
- llm_call
- credential store and llama_api_key field in Settings

**Frontend**:
- Llama API as a type
- Credentials input and provider for Llama API


### Checklist 📋

#### For code changes:
- [X] I have clearly listed my changes in the PR description
- [X] I have tested my changes according to the test plan:

**Test Plan**:

<details>
  <summary>AI Text Generator</summary>

  - [X] Start-up backend and frontend:
- Start backend with Docker services: `docker compose up -d --build`
     - Start frontend: `npm install && npm run dev`
- By visiting http://localhost:3000/, test inference and structured
outputs
  - [X] Create from scratch
  - [X] Request for Llama API Credentials

<img width="2015" alt="image"
src="https://github.com/user-attachments/assets/3dede402-3718-4441-9327-ecab25c63ebf"
/>

  - [X] Execute an agent with at least 3 blocks

<img width="2026" alt="image"
src="https://github.com/user-attachments/assets/59d6d56b-2ccc-4af5-b511-4af312c3f7f8"
/>

  - [X] Confirm it executes correctly
</details>

<details>
  <summary>Structured Response Generator</summary>

  - [X] Start-up backend and frontend:
- Start backend with Docker services: `docker compose up -d --build`
     - Start frontend: `npm install && npm run dev`
- By visiting http://localhost:3000/, test inference and structured
outputs
  - [X] Create from scratch
  - [X] Execute an agent
<img width="2023" alt="image"
src="https://github.com/user-attachments/assets/d1107638-bf1b-45b1-a296-1e0fac29525b"
/>

  - [X] Confirm it executes correctly
</details>

---------

Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
---
 autogpt_platform/backend/.env.example         |  1 +
 .../backend/backend/blocks/llm.py             | 64 ++++++++++++++++++-
 .../backend/backend/data/block_cost_config.py | 22 +++++++
 .../backend/integrations/credentials_store.py |  8 +++
 .../backend/backend/integrations/providers.py |  1 +
 .../backend/backend/util/settings.py          |  1 +
 .../profile/(user)/integrations/page.tsx      |  1 +
 .../integrations/credentials-input.tsx        |  1 +
 .../integrations/credentials-provider.tsx     |  1 +
 .../src/lib/autogpt-server-api/types.ts       |  1 +
 docs/content/platform/blocks/llm.md           | 16 ++++-
 11 files changed, 113 insertions(+), 4 deletions(-)
diff --git a/autogpt_platform/backend/.env.example b/autogpt_platform/backend/.env.example
index 60d941b839..ebd9d7c6ad 100644
--- a/autogpt_platform/backend/.env.example
+++ b/autogpt_platform/backend/.env.example
@@ -129,6 +129,7 @@ OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
 GROQ_API_KEY=
 OPEN_ROUTER_API_KEY=
+LLAMA_API_KEY=
 
 # Reddit
 # Go to https://www.reddit.com/prefs/apps and create a new app
diff --git a/autogpt_platform/backend/backend/blocks/llm.py b/autogpt_platform/backend/backend/blocks/llm.py
index f630937e61..675b77ee5d 100644
--- a/autogpt_platform/backend/backend/blocks/llm.py
+++ b/autogpt_platform/backend/backend/blocks/llm.py
@@ -36,6 +36,7 @@ LLMProviderName = Literal[
     ProviderName.OLLAMA,
     ProviderName.OPENAI,
     ProviderName.OPEN_ROUTER,
+    ProviderName.LLAMA_API,
 ]
 AICredentials = CredentialsMetaInput[LLMProviderName, Literal["api_key"]]
 
@@ -141,6 +142,11 @@ class LlmModel(str, Enum, metaclass=LlmModelMeta):
     GRYPHE_MYTHOMAX_L2_13B = "gryphe/mythomax-l2-13b"
     META_LLAMA_4_SCOUT = "meta-llama/llama-4-scout"
     META_LLAMA_4_MAVERICK = "meta-llama/llama-4-maverick"
+    # Llama API models
+    LLAMA_API_LLAMA_4_SCOUT = "Llama-4-Scout-17B-16E-Instruct-FP8"
+    LLAMA_API_LLAMA4_MAVERICK = "Llama-4-Maverick-17B-128E-Instruct-FP8"
+    LLAMA_API_LLAMA3_3_8B = "Llama-3.3-8B-Instruct"
+    LLAMA_API_LLAMA3_3_70B = "Llama-3.3-70B-Instruct"
 
     @property
     def metadata(self) -> ModelMetadata:
@@ -230,6 +236,11 @@ MODEL_METADATA = {
     LlmModel.GRYPHE_MYTHOMAX_L2_13B: ModelMetadata("open_router", 4096, 4096),
     LlmModel.META_LLAMA_4_SCOUT: ModelMetadata("open_router", 131072, 131072),
     LlmModel.META_LLAMA_4_MAVERICK: ModelMetadata("open_router", 1048576, 1000000),
+    # Llama API models
+    LlmModel.LLAMA_API_LLAMA_4_SCOUT: ModelMetadata("llama_api", 128000, 4028),
+    LlmModel.LLAMA_API_LLAMA4_MAVERICK: ModelMetadata("llama_api", 128000, 4028),
+    LlmModel.LLAMA_API_LLAMA3_3_8B: ModelMetadata("llama_api", 128000, 4028),
+    LlmModel.LLAMA_API_LLAMA3_3_70B: ModelMetadata("llama_api", 128000, 4028),
 }
 
 for model in LlmModel:
@@ -516,9 +527,6 @@ def llm_call(
             messages=prompt,  # type: ignore
             max_tokens=max_tokens,
             tools=tools_param,  # type: ignore
-            parallel_tool_calls=(
-                openai.NOT_GIVEN if parallel_tool_calls is None else parallel_tool_calls
-            ),
         )
 
         # If there's no response, raise an error
@@ -542,6 +550,56 @@ def llm_call(
         else:
             tool_calls = None
 
+        return LLMResponse(
+            raw_response=response.choices[0].message,
+            prompt=prompt,
+            response=response.choices[0].message.content or "",
+            tool_calls=tool_calls,
+            prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
+            completion_tokens=response.usage.completion_tokens if response.usage else 0,
+        )
+    elif provider == "llama_api":
+        tools_param = tools if tools else openai.NOT_GIVEN
+        client = openai.OpenAI(
+            base_url="https://api.llama.com/compat/v1/",
+            api_key=credentials.api_key.get_secret_value(),
+        )
+
+        response = client.chat.completions.create(
+            extra_headers={
+                "HTTP-Referer": "https://agpt.co",
+                "X-Title": "AutoGPT",
+            },
+            model=llm_model.value,
+            messages=prompt,  # type: ignore
+            max_tokens=max_tokens,
+            tools=tools_param,  # type: ignore
+            parallel_tool_calls=(
+                openai.NOT_GIVEN if parallel_tool_calls is None else parallel_tool_calls
+            ),
+        )
+
+        # If there's no response, raise an error
+        if not response.choices:
+            if response:
+                raise ValueError(f"Llama API error: {response}")
+            else:
+                raise ValueError("No response from Llama API.")
+
+        if response.choices[0].message.tool_calls:
+            tool_calls = [
+                ToolContentBlock(
+                    id=tool.id,
+                    type=tool.type,
+                    function=ToolCall(
+                        name=tool.function.name, arguments=tool.function.arguments
+                    ),
+                )
+                for tool in response.choices[0].message.tool_calls
+            ]
+        else:
+            tool_calls = None
+
         return LLMResponse(
             raw_response=response.choices[0].message,
             prompt=prompt,
diff --git a/autogpt_platform/backend/backend/data/block_cost_config.py b/autogpt_platform/backend/backend/data/block_cost_config.py
index 2397ce0a0b..1e15b5f620 100644
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -26,6 +26,7 @@ from backend.integrations.credentials_store import (
     groq_credentials,
     ideogram_credentials,
     jina_credentials,
+    llama_api_credentials,
     open_router_credentials,
     openai_credentials,
     replicate_credentials,
@@ -81,6 +82,10 @@ MODEL_COST: dict[LlmModel, int] = {
     LlmModel.GRYPHE_MYTHOMAX_L2_13B: 1,
     LlmModel.META_LLAMA_4_SCOUT: 1,
     LlmModel.META_LLAMA_4_MAVERICK: 1,
+    LlmModel.LLAMA_API_LLAMA_4_SCOUT: 1,
+    LlmModel.LLAMA_API_LLAMA4_MAVERICK: 1,
+    LlmModel.LLAMA_API_LLAMA3_3_8B: 1,
+    LlmModel.LLAMA_API_LLAMA3_3_70B: 1,
 }
 
 for model in LlmModel:
@@ -153,6 +158,23 @@ LLM_COST = (
         for model, cost in MODEL_COST.items()
         if MODEL_METADATA[model].provider == "open_router"
     ]
+    # Llama API Models
+    + [
+        BlockCost(
+            cost_type=BlockCostType.RUN,
+            cost_filter={
+                "model": model,
+                "credentials": {
+                    "id": llama_api_credentials.id,
+                    "provider": llama_api_credentials.provider,
+                    "type": llama_api_credentials.type,
+                },
+            },
+            cost_amount=cost,
+        )
+        for model, cost in MODEL_COST.items()
+        if MODEL_METADATA[model].provider == "llama_api"
+    ]
 )
 
 # =============== This is the exhaustive list of cost for each Block =============== #
diff --git a/autogpt_platform/backend/backend/integrations/credentials_store.py b/autogpt_platform/backend/backend/integrations/credentials_store.py
index 152aa7ee39..09849536c6 100644
--- a/autogpt_platform/backend/backend/integrations/credentials_store.py
+++ b/autogpt_platform/backend/backend/integrations/credentials_store.py
@@ -177,6 +177,14 @@ zerobounce_credentials = APIKeyCredentials(
     expires_at=None,
 )
 
+llama_api_credentials = APIKeyCredentials(
+    id="d44045af-1c33-4833-9e19-752313214de2",
+    provider="llama_api",
+    api_key=SecretStr(settings.secrets.llama_api_key),
+    title="Use Credits for Llama API",
+    expires_at=None,
+)
+
 DEFAULT_CREDENTIALS = [
     ollama_credentials,
     revid_credentials,
diff --git a/autogpt_platform/backend/backend/integrations/providers.py b/autogpt_platform/backend/backend/integrations/providers.py
index bf409b68d3..bd054641b8 100644
--- a/autogpt_platform/backend/backend/integrations/providers.py
+++ b/autogpt_platform/backend/backend/integrations/providers.py
@@ -20,6 +20,7 @@ class ProviderName(str, Enum):
     IDEOGRAM = "ideogram"
     JINA = "jina"
     LINEAR = "linear"
+    LLAMA_API = "llama_api"
     MEDIUM = "medium"
     MEM0 = "mem0"
     NOTION = "notion"
diff --git a/autogpt_platform/backend/backend/util/settings.py b/autogpt_platform/backend/backend/util/settings.py
index 159e31bb88..3ae35d6794 100644
--- a/autogpt_platform/backend/backend/util/settings.py
+++ b/autogpt_platform/backend/backend/util/settings.py
@@ -388,6 +388,7 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
     anthropic_api_key: str = Field(default="", description="Anthropic API key")
     groq_api_key: str = Field(default="", description="Groq API key")
     open_router_api_key: str = Field(default="", description="Open Router API Key")
+    llama_api_key: str = Field(default="", description="Llama API Key")
 
     reddit_client_id: str = Field(default="", description="Reddit client ID")
     reddit_client_secret: str = Field(default="", description="Reddit client secret")
diff --git a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/integrations/page.tsx b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/integrations/page.tsx
index e5f1cd7ee9..534c50bce1 100644
--- a/autogpt_platform/frontend/src/app/(platform)/profile/(user)/integrations/page.tsx
+++ b/autogpt_platform/frontend/src/app/(platform)/profile/(user)/integrations/page.tsx
@@ -117,6 +117,7 @@ export default function PrivatePage() {
       "3bcdbda3-84a3-46af-8fdb-bfd2472298b8", // SmartLead
       "63a6e279-2dc2-448e-bf57-85776f7176dc", // ZeroBounce
       "9aa1bde0-4947-4a70-a20c-84daa3850d52", // Google Maps
+      "d44045af-1c33-4833-9e19-752313214de2", // Llama API
     ],
     [],
   );
diff --git a/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx b/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx
index 3d2246f910..83e337efe9 100644
--- a/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx
+++ b/autogpt_platform/frontend/src/components/integrations/credentials-input.tsx
@@ -76,6 +76,7 @@ export const providerIcons: Record<
   openai: fallbackIcon,
   openweathermap: fallbackIcon,
   open_router: fallbackIcon,
+  llama_api: fallbackIcon,
   pinecone: fallbackIcon,
   slant3d: fallbackIcon,
   screenshotone: fallbackIcon,
diff --git a/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx b/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx
index 4f0fe0a60d..d31cda8530 100644
--- a/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx
+++ b/autogpt_platform/frontend/src/components/integrations/credentials-provider.tsx
@@ -40,6 +40,7 @@ const providerDisplayNames: Record<CredentialsProviderName, string> = {
   openai: "OpenAI",
   openweathermap: "OpenWeatherMap",
   open_router: "Open Router",
+  llama_api: "Llama API",
   pinecone: "Pinecone",
   screenshotone: "ScreenshotOne",
   slant3d: "Slant3D",
diff --git a/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts b/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts
index 696d9f3b37..1aec142578 100644
--- a/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts
+++ b/autogpt_platform/frontend/src/lib/autogpt-server-api/types.ts
@@ -172,6 +172,7 @@ export const PROVIDER_NAMES = {
   OPENAI: "openai",
   OPENWEATHERMAP: "openweathermap",
   OPEN_ROUTER: "open_router",
+  LLAMA_API: "llama_api",
   PINECONE: "pinecone",
   SCREENSHOTONE: "screenshotone",
   SLANT3D: "slant3d",
diff --git a/docs/content/platform/blocks/llm.md b/docs/content/platform/blocks/llm.md
index 02c0b79985..4eafa62bee 100644
--- a/docs/content/platform/blocks/llm.md
+++ b/docs/content/platform/blocks/llm.md
@@ -156,4 +156,18 @@ The block formulates a prompt based on the given focus or source data, sends it
 | Error | Any error message if the process fails |
 
 ### Possible use case
-Automatically generating a list of key points or action items from a long meeting transcript or summarizing the main topics discussed in a series of documents.
\ No newline at end of file
+Automatically generating a list of key points or action items from a long meeting transcript or summarizing the main topics discussed in a series of documents.
+
+# Providers
+There are severals providers that AutoGPT users can use for running inference with LLM models.
+
+## Llama API
+Llama API is a Meta-hosted API service that helps you integrate Llama models quickly and efficiently. Using OpenAI comptability endpoints, you can easily access the power of Llama models without the need for complex setup or configuration!
+
+Join the [waitlist](https://llama.developer.meta.com?utm_source=partner-autogpt&utm_medium=readme) to get access!
+
+Try the Llama API provider by selecting any of the following LLM Model names from the AI blocks mentioned above:
+- Llama-4-Scout-17B-16E-Instruct-FP8
+- Llama-4-Maverick-17B-128E-Instruct-FP8
+- Llama-3.3-8B-Instruct
+- Llama-3-70B-Instruct