Merge pull request #122 from dschonholtz/pinecone-memory

Pinecone Vectorized Memory
2023-04-06 11:31:58 +01:00 · 2023-04-06 11:31:58 +01:00 · 48451e305a
parent a9451f4961 9067a523fe
commit 48451e305a
9 changed files with 153 additions and 29 deletions
--- a/.env.template
+++ b/.env.template
@ -1,3 +1,5 @@
+PINECONE_API_KEY=your-pinecone-api-key
+PINECONE_ENV=your-pinecone-region
 OPENAI_API_KEY=your-openai-api-key
 ELEVENLABS_API_KEY=your-elevenlabs-api-key
 SMART_LLM_MODEL="gpt-4"
--- a/README.md
+++ b/README.md
@ -140,6 +140,35 @@ export CUSTOM_SEARCH_ENGINE_ID="YOUR_CUSTOM_SEARCH_ENGINE_ID"

 ```

+## 🌲 Pinecone API Key Setup
+
+Pinecone enable a vector based memory so a vast memory can be stored and only relevant memories
+are loaded for the agent at any given time.
+
+1. Go to app.pinecone.io and make an account if you don't already have one.
+2. Choose the `Starter` plan to avoid being charged.
+3. Find your API key and region under the default project in the left sidebar.
+
+### Setting up environment variables
+   For Windows Users:
+```
+setx PINECONE_API_KEY "YOUR_GOOGLE_API_KEY"
+export PINECONE_ENV="Your region" # something like: us-east4-gcp
+
+```
+For macOS and Linux users:
+```
+export PINECONE_API_KEY="YOUR_GOOGLE_API_KEY"
+export PINECONE_ENV="Your region" # something like: us-east4-gcp
+
+```
+
+Or you can set them in the `.env` file.
+
+## View Memory Usage
+
+1. View memory usage by using the `--debug` flag :)
+
 ## 💀 Continuous Mode ⚠️
 Run the AI **without** user authorisation, 100% automated.
 Continuous mode is not recommended. 
--- a/requirements.txt
+++ b/requirements.txt
@ -10,4 +10,5 @@ tiktoken==0.3.3
 gTTS==2.3.1
 docker
 duckduckgo-search
-google-api-python-client #(https://developers.google.com/custom-search/v1/overview) 
+google-api-python-client #(https://developers.google.com/custom-search/v1/overview)
+pinecone-client==2.2.1
--- a/scripts/chat.py
+++ b/scripts/chat.py
@ -23,6 +23,19 @@ def create_chat_message(role, content):
    return {"role": role, "content": content}


+def generate_context(prompt, relevant_memory, full_message_history, model):
+    current_context = [
+        create_chat_message(
+            "system", prompt), create_chat_message(
+            "system", f"Permanent memory: {relevant_memory}")]
+
+    # Add messages from the full message history until we reach the token limit
+    next_message_to_add_index = len(full_message_history) - 1
+    insertion_index = len(current_context)
+    # Count the currently used tokens
+    current_tokens_used = token_counter.count_message_tokens(current_context, model)
+    return next_message_to_add_index, current_tokens_used, insertion_index, current_context
+

 # TODO: Change debug from hardcode to argument
 def chat_with_ai(
@ -41,7 +54,7 @@ def chat_with_ai(
            prompt (str): The prompt explaining the rules to the AI.
            user_input (str): The input from the user.
            full_message_history (list): The list of all messages sent between the user and the AI.
-            permanent_memory (list): The list of items in the AI's permanent memory.
+            permanent_memory (Obj): The memory object containing the permanent memory.
            token_limit (int): The maximum number of tokens allowed in the API call.

            Returns:
@ -53,18 +66,20 @@ def chat_with_ai(
                print(f"Token limit: {token_limit}")
            send_token_limit = token_limit - 1000

-            current_context = [
-                create_chat_message(
-                    "system", prompt), create_chat_message(
-                    "system", f"Permanent memory: {permanent_memory}")]                
+            relevant_memory = permanent_memory.get_relevant(str(full_message_history[-5:]), 10)

-            # Add messages from the full message history until we reach the token limit
-            next_message_to_add_index = len(full_message_history) - 1
-            current_tokens_used = 0
-            insertion_index = len(current_context)
+            if debug:
+                print('Memory Stats: ', permanent_memory.get_stats())
+
+            next_message_to_add_index, current_tokens_used, insertion_index, current_context = generate_context(
+                prompt, relevant_memory, full_message_history, model)
+
+            while current_tokens_used > 2500:
+                # remove memories until we are under 2500 tokens
+                relevant_memory = relevant_memory[1:]
+                next_message_to_add_index, current_tokens_used, insertion_index, current_context = generate_context(
+                    prompt, relevant_memory, full_message_history, model)

-            # Count the currently used tokens
-            current_tokens_used = token_counter.count_message_tokens(current_context, model)
            current_tokens_used += token_counter.count_message_tokens([create_chat_message("user", user_input)], model) # Account for user input (appended later)

            while next_message_to_add_index >= 0:
--- a/scripts/commands.py
+++ b/scripts/commands.py
@ -1,6 +1,6 @@
 import browse
 import json
-import memory as mem
+from memory import PineconeMemory
 import datetime
 import agent_manager as agents
 import speak
@ -52,6 +52,7 @@ def get_command(response):


 def execute_command(command_name, arguments):
+    memory = PineconeMemory()
    try:
        if command_name == "google":
            
@ -62,11 +63,7 @@ def execute_command(command_name, arguments):
            else:
                return google_search(arguments["input"])
        elif command_name == "memory_add":
-            return commit_memory(arguments["string"])
-        elif command_name == "memory_del":
-            return delete_memory(arguments["key"])
-        elif command_name == "memory_ovr":
-            return overwrite_memory(arguments["key"], arguments["string"])
+            return memory.add(arguments["string"])
        elif command_name == "start_agent":
            return start_agent(
                arguments["name"],
--- a/scripts/config.py
+++ b/scripts/config.py
@ -4,6 +4,7 @@ from dotenv import load_dotenv
 # Load environment variables from .env file
 load_dotenv()

+
 class Singleton(type):
    """
    Singleton metaclass for ensuring only one instance of a class.
@ -49,11 +50,13 @@ class Config(metaclass=Singleton):
        self.google_api_key = os.getenv("GOOGLE_API_KEY")
        self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID")

+        self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
+        self.pinecone_region = os.getenv("PINECONE_ENV")
+
        # User agent headers to use when browsing web
        # Some websites might just completely deny request with an error code if no user agent was found.
        self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}

-
        # Initialize the OpenAI API client
        openai.api_key = self.openai_api_key

@ -85,4 +88,10 @@ class Config(metaclass=Singleton):
        self.google_api_key = value
    
    def set_custom_search_engine_id(self, value: str):
-        self.custom_search_engine_id = value
+        self.custom_search_engine_id = value
+
+    def set_pinecone_api_key(self, value: str):
+        self.pinecone_api_key = value
+
+    def set_pinecone_region(self, value: str):
+        self.pinecone_region = value
--- a/scripts/data/prompt.txt
+++ b/scripts/data/prompt.txt
@ -1,15 +1,13 @@
 CONSTRAINTS:

-1. ~4000 word limit for memory. Your memory is short, so immediately save important information to long term memory and code to files.
-2. No user assistance
-3. Exclusively use the commands listed in double quotes e.g. "command name"
+1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.
+2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.
+3. No user assistance
+4. Exclusively use the commands listed in double quotes e.g. "command name"

 COMMANDS:

 1. Google Search: "google", args: "input": "<search>"
-2. Memory Add: "memory_add", args: "string": "<string>"
-3. Memory Delete: "memory_del", args: "key": "<key>"
-4. Memory Overwrite: "memory_ovr", args: "key": "<key>", "string": "<string>"
 5. Browse Website: "browse_website", args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"
 6. Start GPT Agent: "start_agent",  args: "name": <name>, "task": "<short_task_desc>", "prompt": "<prompt>"
 7. Message GPT Agent: "message_agent", args: "key": "<key>", "message": "<message>"
--- a/scripts/main.py
+++ b/scripts/main.py
@ -1,7 +1,7 @@
 import json
 import random
 import commands as cmd
-import memory as mem
+from memory import PineconeMemory
 import data
 import chat
 from colorama import Fore, Style
@ -280,6 +280,13 @@ result = None
 # Make a constant:
 user_input = "Determine which next command to use, and respond using the format specified above:"

+# Initialize memory and make sure it is empty.
+# this is particularly important for indexing and referencing pinecone memory
+memory = PineconeMemory()
+memory.clear()
+
+print('Using memory of type: ' + memory.__class__.__name__)
+
 # Interaction Loop
 while True:
    # Send message to AI, get response
@ -288,7 +295,7 @@ while True:
            prompt,
            user_input,
            full_message_history,
-            mem.permanent_memory,
+            memory,
            cfg.fast_token_limit) # TODO: This hardcodes the model to use GPT3.5. Make this an argument

    # print("assistant reply: "+assistant_reply)
@ -349,6 +356,12 @@ while True:
    else:
        result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}"

+    memory_to_add = f"Assistant Reply: {assistant_reply} " \
+                    f"\nResult: {result} " \
+                    f"\nHuman Feedback: {user_input} "
+
+    memory.add(memory_to_add)
+
    # Check if there's a result from the command append it to the message
    # history
    if result is not None:
--- a/scripts/memory.py
+++ b/scripts/memory.py
@ -1 +1,61 @@
-permanent_memory = []
+from config import Config, Singleton
+import pinecone
+import openai
+
+cfg = Config()
+
+
+def get_ada_embedding(text):
+    text = text.replace("\n", " ")
+    return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"]
+
+
+def get_text_from_embedding(embedding):
+    return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"]
+
+
+class PineconeMemory(metaclass=Singleton):
+    def __init__(self):
+        pinecone_api_key = cfg.pinecone_api_key
+        pinecone_region = cfg.pinecone_region
+        pinecone.init(api_key=pinecone_api_key, environment=pinecone_region)
+        dimension = 1536
+        metric = "cosine"
+        pod_type = "p1"
+        table_name = "auto-gpt"
+        # this assumes we don't start with memory.
+        # for now this works.
+        # we'll need a more complicated and robust system if we want to start with memory.
+        self.vec_num = 0
+        if table_name not in pinecone.list_indexes():
+            pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type)
+        self.index = pinecone.Index(table_name)
+
+    def add(self, data):
+        vector = get_ada_embedding(data)
+        # no metadata here. We may wish to change that long term.
+        resp = self.index.upsert([(str(self.vec_num), vector, {"raw_text": data})])
+        _text = f"Inserting data into memory at index: {self.vec_num}:\n data: {data}"
+        self.vec_num += 1
+        return _text
+
+    def get(self, data):
+        return self.get_relevant(data, 1)
+
+    def clear(self):
+        self.index.delete(deleteAll=True)
+        return "Obliviated"
+
+    def get_relevant(self, data, num_relevant=5):
+        """
+        Returns all the data in the memory that is relevant to the given data.
+        :param data: The data to compare to.
+        :param num_relevant: The number of relevant data to return. Defaults to 5
+        """
+        query_embedding = get_ada_embedding(data)
+        results = self.index.query(query_embedding, top_k=num_relevant, include_metadata=True)
+        sorted_results = sorted(results.matches, key=lambda x: x.score)
+        return [str(item['metadata']["raw_text"]) for item in sorted_results]
+
+    def get_stats(self):
+        return self.index.describe_index_stats()