ask questions on websites
parent
f6c201f038
commit
8a08b0c01b
|
@ -74,30 +74,25 @@ def split_text(text, max_length=8192):
|
||||||
yield "\n".join(current_chunk)
|
yield "\n".join(current_chunk)
|
||||||
|
|
||||||
|
|
||||||
def summarize_text(text, is_website=True):
|
def create_message(chunk, question):
|
||||||
if text == "":
|
return {
|
||||||
|
"role": "user",
|
||||||
|
"content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text."
|
||||||
|
}
|
||||||
|
|
||||||
|
def summarize_text(text, question):
|
||||||
|
if not text:
|
||||||
return "Error: No text to summarize"
|
return "Error: No text to summarize"
|
||||||
|
|
||||||
print("Text length: " + str(len(text)) + " characters")
|
text_length = len(text)
|
||||||
|
print(f"Text length: {text_length} characters")
|
||||||
|
|
||||||
summaries = []
|
summaries = []
|
||||||
chunks = list(split_text(text))
|
chunks = list(split_text(text))
|
||||||
|
|
||||||
for i, chunk in enumerate(chunks):
|
for i, chunk in enumerate(chunks):
|
||||||
print("Summarizing chunk " + str(i + 1) + " / " + str(len(chunks)))
|
print(f"Summarizing chunk {i + 1} / {len(chunks)}")
|
||||||
if is_website:
|
messages = [create_message(chunk, question)]
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " +
|
|
||||||
chunk},
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Please summarize the following text, focusing on extracting concise and specific information: " +
|
|
||||||
chunk},
|
|
||||||
]
|
|
||||||
|
|
||||||
summary = create_chat_completion(
|
summary = create_chat_completion(
|
||||||
model=cfg.fast_llm_model,
|
model=cfg.fast_llm_model,
|
||||||
|
@ -105,25 +100,11 @@ def summarize_text(text, is_website=True):
|
||||||
max_tokens=300,
|
max_tokens=300,
|
||||||
)
|
)
|
||||||
summaries.append(summary)
|
summaries.append(summary)
|
||||||
print("Summarized " + str(len(chunks)) + " chunks.")
|
|
||||||
|
print(f"Summarized {len(chunks)} chunks.")
|
||||||
|
|
||||||
combined_summary = "\n".join(summaries)
|
combined_summary = "\n".join(summaries)
|
||||||
|
messages = [create_message(combined_summary, question)]
|
||||||
# Summarize the combined summary
|
|
||||||
if is_website:
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Please summarize the following website text, do not describe the general website, but instead concisely extract the specific information this subpage contains.: " +
|
|
||||||
combined_summary},
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Please summarize the following text, focusing on extracting concise and specific infomation: " +
|
|
||||||
combined_summary},
|
|
||||||
]
|
|
||||||
|
|
||||||
final_summary = create_chat_completion(
|
final_summary = create_chat_completion(
|
||||||
model=cfg.fast_llm_model,
|
model=cfg.fast_llm_model,
|
||||||
|
|
|
@ -72,7 +72,7 @@ def execute_command(command_name, arguments):
|
||||||
elif command_name == "delete_agent":
|
elif command_name == "delete_agent":
|
||||||
return delete_agent(arguments["key"])
|
return delete_agent(arguments["key"])
|
||||||
elif command_name == "get_text_summary":
|
elif command_name == "get_text_summary":
|
||||||
return get_text_summary(arguments["url"])
|
return get_text_summary(arguments["url"], arguments["question"])
|
||||||
elif command_name == "get_hyperlinks":
|
elif command_name == "get_hyperlinks":
|
||||||
return get_hyperlinks(arguments["url"])
|
return get_hyperlinks(arguments["url"])
|
||||||
elif command_name == "read_file":
|
elif command_name == "read_file":
|
||||||
|
@ -84,7 +84,7 @@ def execute_command(command_name, arguments):
|
||||||
elif command_name == "delete_file":
|
elif command_name == "delete_file":
|
||||||
return delete_file(arguments["file"])
|
return delete_file(arguments["file"])
|
||||||
elif command_name == "browse_website":
|
elif command_name == "browse_website":
|
||||||
return browse_website(arguments["url"])
|
return browse_website(arguments["url"], arguments["question"])
|
||||||
# TODO: Change these to take in a file rather than pasted code, if
|
# TODO: Change these to take in a file rather than pasted code, if
|
||||||
# non-file is given, return instructions "Input should be a python
|
# non-file is given, return instructions "Input should be a python
|
||||||
# filepath, write your code to file and try again"
|
# filepath, write your code to file and try again"
|
||||||
|
@ -152,8 +152,8 @@ def google_official_search(query, num_results=8):
|
||||||
# Return the list of search result URLs
|
# Return the list of search result URLs
|
||||||
return search_results_links
|
return search_results_links
|
||||||
|
|
||||||
def browse_website(url):
|
def browse_website(url, question):
|
||||||
summary = get_text_summary(url)
|
summary = get_text_summary(url, question)
|
||||||
links = get_hyperlinks(url)
|
links = get_hyperlinks(url)
|
||||||
|
|
||||||
# Limit links to 5
|
# Limit links to 5
|
||||||
|
@ -165,9 +165,9 @@ def browse_website(url):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def get_text_summary(url):
|
def get_text_summary(url, question):
|
||||||
text = browse.scrape_text(url)
|
text = browse.scrape_text(url)
|
||||||
summary = browse.summarize_text(text)
|
summary = browse.summarize_text(text, question)
|
||||||
return """ "Result" : """ + summary
|
return """ "Result" : """ + summary
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ COMMANDS:
|
||||||
2. Memory Add: "memory_add", args: "string": "<string>"
|
2. Memory Add: "memory_add", args: "string": "<string>"
|
||||||
3. Memory Delete: "memory_del", args: "key": "<key>"
|
3. Memory Delete: "memory_del", args: "key": "<key>"
|
||||||
4. Memory Overwrite: "memory_ovr", args: "key": "<key>", "string": "<string>"
|
4. Memory Overwrite: "memory_ovr", args: "key": "<key>", "string": "<string>"
|
||||||
5. Browse Website: "browse_website", args: "url": "<url>"
|
5. Browse Website: "browse_website", args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"
|
||||||
6. Start GPT Agent: "start_agent", args: "name": <name>, "task": "<short_task_desc>", "prompt": "<prompt>"
|
6. Start GPT Agent: "start_agent", args: "name": <name>, "task": "<short_task_desc>", "prompt": "<prompt>"
|
||||||
7. Message GPT Agent: "message_agent", args: "key": "<key>", "message": "<message>"
|
7. Message GPT Agent: "message_agent", args: "key": "<key>", "message": "<message>"
|
||||||
8. List GPT Agents: "list_agents", args: ""
|
8. List GPT Agents: "list_agents", args: ""
|
||||||
|
|
Loading…
Reference in New Issue