2023-04-14 19:42:28 +00:00
import logging
2023-04-01 00:30:13 +00:00
import time
2023-04-14 19:42:28 +00:00
2023-03-29 01:43:17 +00:00
import openai
2023-04-02 13:32:25 +00:00
from dotenv import load_dotenv
2023-04-14 19:42:28 +00:00
2023-04-14 16:28:58 +00:00
from autogpt import token_counter
2023-04-14 19:42:28 +00:00
from autogpt . config import Config
2023-04-14 16:28:58 +00:00
from autogpt . llm_utils import create_chat_completion
from autogpt . logger import logger
2023-04-02 08:13:15 +00:00
2023-04-09 13:39:11 +00:00
cfg = Config ( )
2023-04-03 10:28:06 +00:00
2023-04-12 21:05:14 +00:00
2023-03-29 01:43:17 +00:00
def create_chat_message ( role , content ) :
"""
Create a chat message with the given role and content .
Args :
role ( str ) : The role of the message sender , e . g . , " system " , " user " , or " assistant " .
content ( str ) : The content of the message .
Returns :
dict : A dictionary containing the role and content of the message .
"""
return { " role " : role , " content " : content }
2023-04-02 08:13:15 +00:00
2023-04-04 00:31:01 +00:00
def generate_context ( prompt , relevant_memory , full_message_history , model ) :
current_context = [
2023-04-14 19:42:28 +00:00
create_chat_message ( " system " , prompt ) ,
2023-04-04 00:31:01 +00:00
create_chat_message (
2023-04-14 19:42:28 +00:00
" system " , f " The current time and date is { time . strftime ( ' %c ' ) } "
) ,
2023-04-09 02:59:28 +00:00
create_chat_message (
2023-04-14 19:42:28 +00:00
" system " ,
f " This reminds you of these events from your past: \n { relevant_memory } \n \n " ,
) ,
]
2023-04-04 00:31:01 +00:00
# Add messages from the full message history until we reach the token limit
next_message_to_add_index = len ( full_message_history ) - 1
insertion_index = len ( current_context )
# Count the currently used tokens
current_tokens_used = token_counter . count_message_tokens ( current_context , model )
2023-04-14 19:42:28 +00:00
return (
next_message_to_add_index ,
current_tokens_used ,
insertion_index ,
current_context ,
)
2023-04-04 00:31:01 +00:00
2023-04-03 10:28:06 +00:00
# TODO: Change debug from hardcode to argument
2023-04-02 08:13:15 +00:00
def chat_with_ai (
2023-04-14 19:42:28 +00:00
prompt , user_input , full_message_history , permanent_memory , token_limit
) :
2023-04-02 17:03:37 +00:00
""" Interact with the OpenAI API, sending the prompt, user input, message history, and permanent memory. """
2023-04-01 00:30:13 +00:00
while True :
try :
"""
Interact with the OpenAI API , sending the prompt , user input , message history , and permanent memory .
2023-03-29 01:43:17 +00:00
2023-04-01 00:30:13 +00:00
Args :
prompt ( str ) : The prompt explaining the rules to the AI .
user_input ( str ) : The input from the user .
full_message_history ( list ) : The list of all messages sent between the user and the AI .
2023-04-04 00:31:01 +00:00
permanent_memory ( Obj ) : The memory object containing the permanent memory .
2023-04-01 00:30:13 +00:00
token_limit ( int ) : The maximum number of tokens allowed in the API call .
2023-03-29 01:43:17 +00:00
2023-04-01 00:30:13 +00:00
Returns :
str : The AI ' s response.
"""
2023-04-14 19:42:28 +00:00
model = cfg . fast_llm_model # TODO: Change model from hardcode to argument
2023-04-03 10:28:06 +00:00
# Reserve 1000 tokens for the response
2023-04-10 16:46:40 +00:00
2023-04-12 12:39:54 +00:00
logger . debug ( f " Token limit: { token_limit } " )
2023-04-03 10:28:06 +00:00
send_token_limit = token_limit - 1000
2023-04-14 19:42:28 +00:00
relevant_memory = (
" "
if len ( full_message_history ) == 0
else permanent_memory . get_relevant ( str ( full_message_history [ - 9 : ] ) , 10 )
)
2023-04-04 00:31:01 +00:00
2023-04-14 19:42:28 +00:00
logger . debug ( f " Memory Stats: { permanent_memory . get_stats ( ) } " )
2023-04-04 00:31:01 +00:00
2023-04-14 19:42:28 +00:00
(
next_message_to_add_index ,
current_tokens_used ,
insertion_index ,
current_context ,
) = generate_context ( prompt , relevant_memory , full_message_history , model )
2023-04-03 10:28:06 +00:00
2023-04-04 00:31:01 +00:00
while current_tokens_used > 2500 :
# remove memories until we are under 2500 tokens
relevant_memory = relevant_memory [ 1 : ]
2023-04-14 19:42:28 +00:00
(
next_message_to_add_index ,
current_tokens_used ,
insertion_index ,
current_context ,
) = generate_context (
prompt , relevant_memory , full_message_history , model
)
current_tokens_used + = token_counter . count_message_tokens (
[ create_chat_message ( " user " , user_input ) ] , model
) # Account for user input (appended later)
2023-04-03 10:28:06 +00:00
while next_message_to_add_index > = 0 :
# print (f"CURRENT TOKENS USED: {current_tokens_used}")
message_to_add = full_message_history [ next_message_to_add_index ]
2023-04-14 19:42:28 +00:00
tokens_to_add = token_counter . count_message_tokens (
[ message_to_add ] , model
)
2023-04-03 10:28:06 +00:00
if current_tokens_used + tokens_to_add > send_token_limit :
break
# Add the most recent message to the start of the current context, after the two system prompts.
2023-04-14 19:42:28 +00:00
current_context . insert (
insertion_index , full_message_history [ next_message_to_add_index ]
)
2023-04-03 10:28:06 +00:00
# Count the currently used tokens
current_tokens_used + = tokens_to_add
2023-04-09 02:59:28 +00:00
2023-04-03 10:28:06 +00:00
# Move to the next most recent message in the full message history
next_message_to_add_index - = 1
# Append user input, the length of this is accounted for above
2023-04-01 00:30:13 +00:00
current_context . extend ( [ create_chat_message ( " user " , user_input ) ] )
2023-04-03 10:28:06 +00:00
# Calculate remaining tokens
tokens_remaining = token_limit - current_tokens_used
# assert tokens_remaining >= 0, "Tokens remaining is negative. This should never happen, please submit a bug report at https://www.github.com/Torantulino/Auto-GPT"
2023-04-01 00:30:13 +00:00
# Debug print the current context
2023-04-12 12:39:54 +00:00
logger . debug ( f " Token limit: { token_limit } " )
logger . debug ( f " Send Token Count: { current_tokens_used } " )
logger . debug ( f " Tokens remaining for response: { tokens_remaining } " )
logger . debug ( " ------------ CONTEXT SENT TO AI --------------- " )
2023-04-07 23:05:08 +00:00
for message in current_context :
# Skip printing the prompt
if message [ " role " ] == " system " and message [ " content " ] == prompt :
continue
2023-04-12 12:39:54 +00:00
logger . debug ( f " { message [ ' role ' ] . capitalize ( ) } : { message [ ' content ' ] } " )
logger . debug ( " " )
logger . debug ( " ----------- END OF CONTEXT ---------------- " )
2023-04-01 00:30:13 +00:00
2023-04-03 02:35:28 +00:00
# TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about
2023-04-03 02:51:07 +00:00
assistant_reply = create_chat_completion (
2023-04-03 10:28:06 +00:00
model = model ,
2023-04-01 00:30:13 +00:00
messages = current_context ,
2023-04-03 10:28:06 +00:00
max_tokens = tokens_remaining ,
2023-04-01 00:30:13 +00:00
)
# Update full message history
2023-04-14 19:42:28 +00:00
full_message_history . append ( create_chat_message ( " user " , user_input ) )
2023-04-02 08:13:15 +00:00
full_message_history . append (
2023-04-14 19:42:28 +00:00
create_chat_message ( " assistant " , assistant_reply )
)
2023-04-01 00:30:13 +00:00
return assistant_reply
2023-04-02 12:46:31 +00:00
except openai . error . RateLimitError :
2023-04-12 02:10:37 +00:00
# TODO: When we switch to langchain, this is built in
2023-04-02 12:46:31 +00:00
print ( " Error: " , " API Rate Limit Reached. Waiting 10 seconds... " )
time . sleep ( 10 )