From 8a08b0c01b5be1b70e58a1d6817d4f85c54465ef Mon Sep 17 00:00:00 2001 From: Preston Jensen Date: Mon, 3 Apr 2023 19:20:42 -0600 Subject: [PATCH] ask questions on websites --- scripts/browse.py | 53 +++++++++++++---------------------------- scripts/commands.py | 12 +++++----- scripts/data/prompt.txt | 2 +- 3 files changed, 24 insertions(+), 43 deletions(-) diff --git a/scripts/browse.py b/scripts/browse.py index f096c5f3e..510f9c29d 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -74,30 +74,25 @@ def split_text(text, max_length=8192): yield "\n".join(current_chunk) -def summarize_text(text, is_website=True): - if text == "": +def create_message(chunk, question): + return { + "role": "user", + "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text." + } + +def summarize_text(text, question): + if not text: return "Error: No text to summarize" - print("Text length: " + str(len(text)) + " characters") + text_length = len(text) + print(f"Text length: {text_length} characters") + summaries = [] chunks = list(split_text(text)) for i, chunk in enumerate(chunks): - print("Summarizing chunk " + str(i + 1) + " / " + str(len(chunks))) - if is_website: - messages = [ - { - "role": "user", - "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " + - chunk}, - ] - else: - messages = [ - { - "role": "user", - "content": "Please summarize the following text, focusing on extracting concise and specific information: " + - chunk}, - ] + print(f"Summarizing chunk {i + 1} / {len(chunks)}") + messages = [create_message(chunk, question)] summary = create_chat_completion( model=cfg.fast_llm_model, @@ -105,25 +100,11 @@ def summarize_text(text, is_website=True): max_tokens=300, ) summaries.append(summary) - print("Summarized " + str(len(chunks)) + " chunks.") + + print(f"Summarized {len(chunks)} chunks.") combined_summary = "\n".join(summaries) - - # Summarize the combined summary - if is_website: - messages = [ - { - "role": "user", - "content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " + - combined_summary}, - ] - else: - messages = [ - { - "role": "user", - "content": "Please summarize the following text, focusing on extracting concise and specific infomation: " + - combined_summary}, - ] + messages = [create_message(combined_summary, question)] final_summary = create_chat_completion( model=cfg.fast_llm_model, @@ -131,4 +112,4 @@ def summarize_text(text, is_website=True): max_tokens=300, ) - return final_summary + return final_summary \ No newline at end of file diff --git a/scripts/commands.py b/scripts/commands.py index 8ad953364..ed789c8e9 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -72,7 +72,7 @@ def execute_command(command_name, arguments): elif command_name == "delete_agent": return delete_agent(arguments["key"]) elif command_name == "get_text_summary": - return get_text_summary(arguments["url"]) + return get_text_summary(arguments["url"], arguments["question"]) elif command_name == "get_hyperlinks": return get_hyperlinks(arguments["url"]) elif command_name == "read_file": @@ -84,7 +84,7 @@ def execute_command(command_name, arguments): elif command_name == "delete_file": return delete_file(arguments["file"]) elif command_name == "browse_website": - return browse_website(arguments["url"]) + return browse_website(arguments["url"], arguments["question"]) # TODO: Change these to take in a file rather than pasted code, if # non-file is given, return instructions "Input should be a python # filepath, write your code to file and try again" @@ -152,8 +152,8 @@ def google_official_search(query, num_results=8): # Return the list of search result URLs return search_results_links -def browse_website(url): - summary = get_text_summary(url) +def browse_website(url, question): + summary = get_text_summary(url, question) links = get_hyperlinks(url) # Limit links to 5 @@ -165,9 +165,9 @@ def browse_website(url): return result -def get_text_summary(url): +def get_text_summary(url, question): text = browse.scrape_text(url) - summary = browse.summarize_text(text) + summary = browse.summarize_text(text, question) return """ "Result" : """ + summary diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index d17fa27a1..1596c8105 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -9,7 +9,7 @@ COMMANDS: 2. Memory Add: "memory_add", args: "string": "" 3. Memory Delete: "memory_del", args: "key": "" 4. Memory Overwrite: "memory_ovr", args: "key": "", "string": "" -5. Browse Website: "browse_website", args: "url": "" +5. Browse Website: "browse_website", args: "url": "", "question": "" 6. Start GPT Agent: "start_agent", args: "name": , "task": "", "prompt": "" 7. Message GPT Agent: "message_agent", args: "key": "", "message": "" 8. List GPT Agents: "list_agents", args: ""