2023-04-01 00:30:13 +00:00
import time
2023-03-29 01:43:17 +00:00
import openai
2023-04-02 13:32:25 +00:00
from dotenv import load_dotenv
2023-04-03 02:35:28 +00:00
from config import Config
2023-04-03 10:28:06 +00:00
import token_counter
2023-04-03 02:35:28 +00:00
cfg = Config ( )
2023-03-29 01:43:17 +00:00
2023-04-03 02:51:07 +00:00
from llm_utils import create_chat_completion
2023-04-02 08:13:15 +00:00
2023-04-03 10:28:06 +00:00
2023-03-29 01:43:17 +00:00
def create_chat_message ( role , content ) :
"""
Create a chat message with the given role and content .
Args :
role ( str ) : The role of the message sender , e . g . , " system " , " user " , or " assistant " .
content ( str ) : The content of the message .
Returns :
dict : A dictionary containing the role and content of the message .
"""
return { " role " : role , " content " : content }
2023-04-02 08:13:15 +00:00
2023-04-04 00:31:01 +00:00
def generate_context ( prompt , relevant_memory , full_message_history , model ) :
current_context = [
create_chat_message (
" system " , prompt ) , create_chat_message (
2023-04-09 05:16:42 +00:00
" system " , f " This reminds you of these events from your past: \n { relevant_memory } \n \n " ) ]
2023-04-04 00:31:01 +00:00
# Add messages from the full message history until we reach the token limit
next_message_to_add_index = len ( full_message_history ) - 1
insertion_index = len ( current_context )
# Count the currently used tokens
current_tokens_used = token_counter . count_message_tokens ( current_context , model )
return next_message_to_add_index , current_tokens_used , insertion_index , current_context
2023-04-03 10:28:06 +00:00
# TODO: Change debug from hardcode to argument
2023-04-02 08:13:15 +00:00
def chat_with_ai (
prompt ,
user_input ,
full_message_history ,
permanent_memory ,
token_limit ,
2023-04-03 10:28:06 +00:00
debug = False ) :
2023-04-01 00:30:13 +00:00
while True :
try :
"""
Interact with the OpenAI API , sending the prompt , user input , message history , and permanent memory .
2023-03-29 01:43:17 +00:00
2023-04-01 00:30:13 +00:00
Args :
prompt ( str ) : The prompt explaining the rules to the AI .
user_input ( str ) : The input from the user .
full_message_history ( list ) : The list of all messages sent between the user and the AI .
2023-04-04 00:31:01 +00:00
permanent_memory ( Obj ) : The memory object containing the permanent memory .
2023-04-01 00:30:13 +00:00
token_limit ( int ) : The maximum number of tokens allowed in the API call .
2023-03-29 01:43:17 +00:00
2023-04-01 00:30:13 +00:00
Returns :
str : The AI ' s response.
"""
2023-04-03 10:28:06 +00:00
model = cfg . fast_llm_model # TODO: Change model from hardcode to argument
# Reserve 1000 tokens for the response
if debug :
print ( f " Token limit: { token_limit } " )
send_token_limit = token_limit - 1000
2023-04-04 00:31:01 +00:00
relevant_memory = permanent_memory . get_relevant ( str ( full_message_history [ - 5 : ] ) , 10 )
if debug :
print ( ' Memory Stats: ' , permanent_memory . get_stats ( ) )
next_message_to_add_index , current_tokens_used , insertion_index , current_context = generate_context (
prompt , relevant_memory , full_message_history , model )
2023-04-03 10:28:06 +00:00
2023-04-04 00:31:01 +00:00
while current_tokens_used > 2500 :
# remove memories until we are under 2500 tokens
relevant_memory = relevant_memory [ 1 : ]
next_message_to_add_index , current_tokens_used , insertion_index , current_context = generate_context (
prompt , relevant_memory , full_message_history , model )
2023-04-03 10:28:06 +00:00
current_tokens_used + = token_counter . count_message_tokens ( [ create_chat_message ( " user " , user_input ) ] , model ) # Account for user input (appended later)
while next_message_to_add_index > = 0 :
# print (f"CURRENT TOKENS USED: {current_tokens_used}")
message_to_add = full_message_history [ next_message_to_add_index ]
tokens_to_add = token_counter . count_message_tokens ( [ message_to_add ] , model )
if current_tokens_used + tokens_to_add > send_token_limit :
break
# Add the most recent message to the start of the current context, after the two system prompts.
current_context . insert ( insertion_index , full_message_history [ next_message_to_add_index ] )
# Count the currently used tokens
current_tokens_used + = tokens_to_add
# Move to the next most recent message in the full message history
next_message_to_add_index - = 1
# Append user input, the length of this is accounted for above
2023-04-01 00:30:13 +00:00
current_context . extend ( [ create_chat_message ( " user " , user_input ) ] )
2023-04-03 10:28:06 +00:00
# Calculate remaining tokens
tokens_remaining = token_limit - current_tokens_used
# assert tokens_remaining >= 0, "Tokens remaining is negative. This should never happen, please submit a bug report at https://www.github.com/Torantulino/Auto-GPT"
2023-04-01 00:30:13 +00:00
# Debug print the current context
if debug :
2023-04-03 10:28:06 +00:00
print ( f " Token limit: { token_limit } " )
print ( f " Send Token Count: { current_tokens_used } " )
print ( f " Tokens remaining for response: { tokens_remaining } " )
2023-04-01 00:30:13 +00:00
print ( " ------------ CONTEXT SENT TO AI --------------- " )
for message in current_context :
# Skip printing the prompt
if message [ " role " ] == " system " and message [ " content " ] == prompt :
continue
2023-04-02 08:13:15 +00:00
print (
f " { message [ ' role ' ] . capitalize ( ) } : { message [ ' content ' ] } " )
2023-04-03 10:28:06 +00:00
print ( )
2023-04-01 00:30:13 +00:00
print ( " ----------- END OF CONTEXT ---------------- " )
2023-04-03 02:35:28 +00:00
# TODO: use a model defined elsewhere, so that model can contain temperature and other settings we care about
2023-04-03 02:51:07 +00:00
assistant_reply = create_chat_completion (
2023-04-03 10:28:06 +00:00
model = model ,
2023-04-01 00:30:13 +00:00
messages = current_context ,
2023-04-03 10:28:06 +00:00
max_tokens = tokens_remaining ,
2023-04-01 00:30:13 +00:00
)
# Update full message history
2023-04-02 08:13:15 +00:00
full_message_history . append (
create_chat_message (
" user " , user_input ) )
full_message_history . append (
create_chat_message (
" assistant " , assistant_reply ) )
2023-04-01 00:30:13 +00:00
return assistant_reply
2023-04-02 12:46:31 +00:00
except openai . error . RateLimitError :
2023-04-03 02:51:07 +00:00
# TODO: WHen we switch to langchain, this is built in
2023-04-02 12:46:31 +00:00
print ( " Error: " , " API Rate Limit Reached. Waiting 10 seconds... " )
time . sleep ( 10 )