AutoGPT/autogpt/chat.py

import logging
import time

import openai
from dotenv import load_dotenv

from autogpt import token_counter
from autogpt.config import Config
from autogpt.llm_utils import create_chat_completion
from autogpt.logger import logger

cfg = Config()


def create_chat_message(role, content):
    """
    Create a chat message with the given role and content.

    Args:
    role (str): The role of the message sender, e.g., "system", "user", or "assistant".
    content (str): The content of the message.

    Returns:
    dict: A dictionary containing the role and content of the message.
    """
    return {"role": role, "content": content}


def generate_context(prompt, relevant_memory, full_message_history, model):
    current_context = [
        create_chat_message("system", prompt),
        create_chat_message(
            "system", f"The current time and date is {time.strftime('%c')}"
        ),
        create_chat_message(
            "system",
            f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
        ),
    ]

    # Add messages from the full message history until we reach the token limit
    next_message_to_add_index = len(full_message_history) - 1
    insertion_index = len(current_context)
    # Count the currently used tokens
    current_tokens_used = token_counter.count_message_tokens(current_context, model)
    return (
        next_message_to_add_index,
        current_tokens_used,
        insertion_index,
        current_context,
    )


# TODO: Change debug from hardcode to argument
def chat_with_ai(
    prompt, user_input, full_message_history, permanent_memory, token_limit
):
    """Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory."""
    while True:
        try:
            """
            Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory.

            Args:
            prompt (str): The prompt explaining the rules to the AI.
            user_input (str): The input from the user.
            full_message_history (list): The list of all messages sent between the user and the AI.
            permanent_memory (Obj): The memory object containing the permanent memory.
            token_limit (int): The maximum number of tokens allowed in the API call.

            Returns:
            str: The AI's response.
            """
            model = cfg.fast_llm_model  # TODO: Change model from hardcode to argument
            # Reserve 1000 tokens for the response

            logger.debug(f"Token limit: {token_limit}")
            send_token_limit = token_limit - 1000

            relevant_memory = (
                ""
                if len(full_message_history) == 0
                else permanent_memory.get_relevant(str(full_message_history[-9:]), 10)
            )

            logger.debug(f"Memory Stats: {permanent_memory.get_stats()}")

            (
                next_message_to_add_index,
                current_tokens_used,
                insertion_index,
                current_context,
            ) = generate_context(prompt, relevant_memory, full_message_history, model)

            while current_tokens_used > 2500:
                # remove memories until we are under 2500 tokens
                relevant_memory = relevant_memory[1:]
                (
                    next_message_to_add_index,
                    current_tokens_used,
                    insertion_index,
                    current_context,
                ) = generate_context(
                    prompt, relevant_memory, full_message_history, model
                )

            current_tokens_used += token_counter.count_message_tokens(
                [create_chat_message("user", user_input)], model
            )  # Account for user input (appended later)

            while next_message_to_add_index >= 0:
                # print (f"CURRENT TOKENS USED: {current_tokens_used}")
                message_to_add = full_message_history[next_message_to_add_index]

                tokens_to_add = token_counter.count_message_tokens(
                    [message_to_add], model
                )
                if current_tokens_used + tokens_to_add > send_token_limit:
                    break

                # Add the most recent message to the start of the current context, after the two system prompts.
                current_context.insert(
                    insertion_index, full_message_history[next_message_to_add_index]
                )

                # Count the currently used tokens
                current_tokens_used += tokens_to_add

                # Move to the next most recent message in the full message history
                next_message_to_add_index -= 1

            # Append user input, the length of this is accounted for above
            current_context.extend([create_chat_message("user", user_input)])

            # Calculate remaining tokens
            tokens_remaining = token_limit - current_tokens_used
            # assert tokens_remaining >= 0, "Tokens remaining is negative. This should never happen, please submit a bug report at https://www.github.com/Torantulino/Auto-GPT"

            # Debug print the current context
            logger.debug(f"Token limit: {token_limit}")
            logger.debug(f"Send Token Count: {current_tokens_used}")
            logger.debug(f"Tokens remaining for response: {tokens_remaining}")
            logger.debug("------------ CONTEXT SENT TO AI ---------------")
            for message in current_context:
                # Skip printing the prompt
                if message["role"] == "system" and message["content"] == prompt:
                    continue
                logger.debug(f"{message['role'].capitalize()}: {message['content']}")
                logger.debug("")
            logger.debug("----------- END OF CONTEXT ----------------")

            # TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about
            assistant_reply = create_chat_completion(
                model=model,
                messages=current_context,
                max_tokens=tokens_remaining,
            )

            # Update full message history
            full_message_history.append(create_chat_message("user", user_input))
            full_message_history.append(
                create_chat_message("assistant", assistant_reply)
            )

            return assistant_reply
        except openai.error.RateLimitError:
            # TODO: When we switch to langchain, this is built in
            print("Error: ", "API Rate Limit Reached. Waiting 10 seconds...")
            time.sleep(10)
formatting 2023-04-14 19:42:28 +00:00			`import logging`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`import time`
formatting 2023-04-14 19:42:28 +00:00
Extracts chat and data functions from main file. 2023-03-29 01:43:17 +00:00			`import openai`
Use `load_dotenv()` in `chat.py` and `speak.py`. 2023-04-02 13:32:25 +00:00			`from dotenv import load_dotenv`
formatting 2023-04-14 19:42:28 +00:00
Convert to python module named autogpt. Also fixed the Dockerfile. Converting to module makes development easier. Fixes coverage script in CI and test imports. 2023-04-14 16:28:58 +00:00			`from autogpt import token_counter`
formatting 2023-04-14 19:42:28 +00:00			`from autogpt.config import Config`
Convert to python module named autogpt. Also fixed the Dockerfile. Converting to module makes development easier. Fixes coverage script in CI and test imports. 2023-04-14 16:28:58 +00:00			`from autogpt.llm_utils import create_chat_completion`
			`from autogpt.logger import logger`
Apply autopep8 formatting to entire codebase 2023-04-02 08:13:15 +00:00
Code review changes 2023-04-09 13:39:11 +00:00			`cfg = Config()`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00
lint: Fix all E302 linting errors 2023-04-12 21:05:14 +00:00
Extracts chat and data functions from main file. 2023-03-29 01:43:17 +00:00			`def create_chat_message(role, content):`
			`"""`
			`Create a chat message with the given role and content.`

			`Args:`
			`role (str): The role of the message sender, e.g., "system", "user", or "assistant".`
			`content (str): The content of the message.`

			`Returns:`
			`dict: A dictionary containing the role and content of the message.`
			`"""`
			`return {"role": role, "content": content}`

Apply autopep8 formatting to entire codebase 2023-04-02 08:13:15 +00:00
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00			`def generate_context(prompt, relevant_memory, full_message_history, model):`
			`current_context = [`
formatting 2023-04-14 19:42:28 +00:00			`create_chat_message("system", prompt),`
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00			`create_chat_message(`
formatting 2023-04-14 19:42:28 +00:00			`"system", f"The current time and date is {time.strftime('%c')}"`
			`),`
Added time and date to the system message for each context 2023-04-09 02:59:28 +00:00			`create_chat_message(`
formatting 2023-04-14 19:42:28 +00:00			`"system",`
			`f"This reminds you of these events from your past:\n{relevant_memory}\n\n",`
			`),`
			`]`
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00
			`# Add messages from the full message history until we reach the token limit`
			`next_message_to_add_index = len(full_message_history) - 1`
			`insertion_index = len(current_context)`
			`# Count the currently used tokens`
			`current_tokens_used = token_counter.count_message_tokens(current_context, model)`
formatting 2023-04-14 19:42:28 +00:00			`return (`
			`next_message_to_add_index,`
			`current_tokens_used,`
			`insertion_index,`
			`current_context,`
			`)`
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00
			`# TODO: Change debug from hardcode to argument`
Apply autopep8 formatting to entire codebase 2023-04-02 08:13:15 +00:00			`def chat_with_ai(`
formatting 2023-04-14 19:42:28 +00:00			`prompt, user_input, full_message_history, permanent_memory, token_limit`
			`):`
Add documentation 2023-04-02 17:03:37 +00:00			`"""Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory."""`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`while True:`
			`try:`
			`"""`
			`Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory.`
Extracts chat and data functions from main file. 2023-03-29 01:43:17 +00:00
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`Args:`
			`prompt (str): The prompt explaining the rules to the AI.`
			`user_input (str): The input from the user.`
			`full_message_history (list): The list of all messages sent between the user and the AI.`
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00			`permanent_memory (Obj): The memory object containing the permanent memory.`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`token_limit (int): The maximum number of tokens allowed in the API call.`
Extracts chat and data functions from main file. 2023-03-29 01:43:17 +00:00
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`Returns:`
			`str: The AI's response.`
			`"""`
formatting 2023-04-14 19:42:28 +00:00			`model = cfg.fast_llm_model # TODO: Change model from hardcode to argument`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`# Reserve 1000 tokens for the response`
Fix flake8 W293 and W391 2023-04-10 16:46:40 +00:00
fix(logger): fix typewriter simulation doesn't work well with Typing spinner 2023-04-12 12:39:54 +00:00			`logger.debug(f"Token limit: {token_limit}")`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`send_token_limit = token_limit - 1000`

formatting 2023-04-14 19:42:28 +00:00			`relevant_memory = (`
			`""`
			`if len(full_message_history) == 0`
			`else permanent_memory.get_relevant(str(full_message_history[-9:]), 10)`
			`)`
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00
formatting 2023-04-14 19:42:28 +00:00			`logger.debug(f"Memory Stats: {permanent_memory.get_stats()}")`
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00
formatting 2023-04-14 19:42:28 +00:00			`(`
			`next_message_to_add_index,`
			`current_tokens_used,`
			`insertion_index,`
			`current_context,`
			`) = generate_context(prompt, relevant_memory, full_message_history, model)`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00
Pinecone memory and memory usage tracking 2023-04-04 00:31:01 +00:00			`while current_tokens_used > 2500:`
			`# remove memories until we are under 2500 tokens`
			`relevant_memory = relevant_memory[1:]`
formatting 2023-04-14 19:42:28 +00:00			`(`
			`next_message_to_add_index,`
			`current_tokens_used,`
			`insertion_index,`
			`current_context,`
			`) = generate_context(`
			`prompt, relevant_memory, full_message_history, model`
			`)`

			`current_tokens_used += token_counter.count_message_tokens(`
			`[create_chat_message("user", user_input)], model`
			`) # Account for user input (appended later)`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00
			`while next_message_to_add_index >= 0:`
			`# print (f"CURRENT TOKENS USED: {current_tokens_used}")`
			`message_to_add = full_message_history[next_message_to_add_index]`

formatting 2023-04-14 19:42:28 +00:00			`tokens_to_add = token_counter.count_message_tokens(`
			`[message_to_add], model`
			`)`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`if current_tokens_used + tokens_to_add > send_token_limit:`
			`break`

			`# Add the most recent message to the start of the current context, after the two system prompts.`
formatting 2023-04-14 19:42:28 +00:00			`current_context.insert(`
			`insertion_index, full_message_history[next_message_to_add_index]`
			`)`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00
			`# Count the currently used tokens`
			`current_tokens_used += tokens_to_add`
Added time and date to the system message for each context 2023-04-09 02:59:28 +00:00
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`# Move to the next most recent message in the full message history`
			`next_message_to_add_index -= 1`

			`# Append user input, the length of this is accounted for above`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`current_context.extend([create_chat_message("user", user_input)])`

Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`# Calculate remaining tokens`
			`tokens_remaining = token_limit - current_tokens_used`
			`# assert tokens_remaining >= 0, "Tokens remaining is negative. This should never happen, please submit a bug report at https://www.github.com/Torantulino/Auto-GPT"`

Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`# Debug print the current context`
fix(logger): fix typewriter simulation doesn't work well with Typing spinner 2023-04-12 12:39:54 +00:00			`logger.debug(f"Token limit: {token_limit}")`
			`logger.debug(f"Send Token Count: {current_tokens_used}")`
			`logger.debug(f"Tokens remaining for response: {tokens_remaining}")`
			`logger.debug("------------ CONTEXT SENT TO AI ---------------")`
feat(global): errors logs are logged as debug level and activated with program argument 'debug' 2023-04-07 23:05:08 +00:00			`for message in current_context:`
			`# Skip printing the prompt`
			`if message["role"] == "system" and message["content"] == prompt:`
			`continue`
fix(logger): fix typewriter simulation doesn't work well with Typing spinner 2023-04-12 12:39:54 +00:00			`logger.debug(f"{message['role'].capitalize()}: {message['content']}")`
			`logger.debug("")`
			`logger.debug("----------- END OF CONTEXT ----------------")`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00
Use gpt-4 by default for the main thought process Allow specifying the llm through dotenv Move more things into config 2023-04-03 02:35:28 +00:00			`# TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about`
Consolidate calls to openai Starting to abstract away the calls to openai 2023-04-03 02:51:07 +00:00			`assistant_reply = create_chat_completion(`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`model=model,`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`messages=current_context,`
Vastly improves context window management. Now uses tokens and biggest context possible. 2023-04-03 10:28:06 +00:00			`max_tokens=tokens_remaining,`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00			`)`

			`# Update full message history`
formatting 2023-04-14 19:42:28 +00:00			`full_message_history.append(create_chat_message("user", user_input))`
Apply autopep8 formatting to entire codebase 2023-04-02 08:13:15 +00:00			`full_message_history.append(`
formatting 2023-04-14 19:42:28 +00:00			`create_chat_message("assistant", assistant_reply)`
			`)`
Implements handling of OpenAI rate limit error. 2023-04-01 00:30:13 +00:00
			`return assistant_reply`
Fixes incorrect handling of openai RateLimitExceeded errors. 2023-04-02 12:46:31 +00:00			`except openai.error.RateLimitError:`
Fix some typos 2023-04-12 02:10:37 +00:00			`# TODO: When we switch to langchain, this is built in`
Fixes incorrect handling of openai RateLimitExceeded errors. 2023-04-02 12:46:31 +00:00			`print("Error: ", "API Rate Limit Reached. Waiting 10 seconds...")`
			`time.sleep(10)`