Implemented Selenium based web browsing. (#1397)

* Implemented Selenium based web browing. Replaced the default web browsing function with one that uses selenium to gather information with a visual and an overlay. Included a small bug fix for the missing google api key that would attempt to use official google with default api keys from the template. * Fixed flake8 issues.
2023-04-14 13:35:19 -07:00 · 2023-04-14 13:35:19 -07:00 · 55eef983d4
parent 5389b2deb1
commit 55eef983d4
4 changed files with 185 additions and 16 deletions
--- a/autogpt/commands.py
+++ b/autogpt/commands.py
@ -13,7 +13,7 @@ from autogpt.image_gen import generate_image
 from duckduckgo_search import ddg
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
-
+from autogpt.web import browse_website
 cfg = Config()


@ -60,7 +60,8 @@ def execute_command(command_name, arguments):

            # Check if the Google API key is set and use the official search method
            # If the API key is not set or has only whitespaces, use the unofficial search method
-            if cfg.google_api_key and (cfg.google_api_key.strip() if cfg.google_api_key else None):
+            key = cfg.google_api_key
+            if key and key.strip() and key != "your-google-api-key":
                return google_official_search(arguments["input"])
            else:
                return google_search(arguments["input"])
@ -174,20 +175,6 @@ def google_official_search(query, num_results=8):
    return search_results_links


-def browse_website(url, question):
-    """Browse a website and return the summary and links"""
-    summary = get_text_summary(url, question)
-    links = get_hyperlinks(url)
-
-    # Limit links to 5
-    if len(links) > 5:
-        links = links[:5]
-
-    result = f"""Website Content Summary: {summary}\n\nLinks: {links}"""
-
-    return result
-
-
 def get_text_summary(url, question):
    """Return the results of a google search"""
    text = browse.scrape_text(url)
--- a/autogpt/js/overlay.js
+++ b/autogpt/js/overlay.js
@ -0,0 +1,29 @@
+const overlay = document.createElement('div');
+Object.assign(overlay.style, {
+    position: 'fixed',
+    zIndex: 999999,
+    top: 0,
+    left: 0,
+    width: '100%',
+    height: '100%',
+    background: 'rgba(0, 0, 0, 0.7)',
+    color: '#fff',
+    fontSize: '24px',
+    fontWeight: 'bold',
+    display: 'flex',
+    justifyContent: 'center',
+    alignItems: 'center',
+});
+const textContent = document.createElement('div');
+Object.assign(textContent.style, {
+    textAlign: 'center',
+});
+textContent.textContent = 'AutoGPT Analyzing Page';
+overlay.appendChild(textContent);
+document.body.append(overlay);
+document.body.style.overflow = 'hidden';
+let dotCount = 0;
+setInterval(() => {
+    textContent.textContent = 'AutoGPT Analyzing Page' + '.'.repeat(dotCount);
+    dotCount = (dotCount + 1) % 4;
+}, 1000);
--- a/autogpt/summary.py
+++ b/autogpt/summary.py
@ -0,0 +1,67 @@
+from autogpt.llm_utils import create_chat_completion
+
+
+def summarize_text(driver, text, question):
+    if not text:
+        return "Error: No text to summarize"
+
+    text_length = len(text)
+    print(f"Text length: {text_length} characters")
+
+    summaries = []
+    chunks = list(split_text(text))
+
+    scroll_ratio = 1 / len(chunks)
+    for i, chunk in enumerate(chunks):
+        scroll_to_percentage(driver , scroll_ratio * i)
+        print(f"Summarizing chunk {i + 1} / {len(chunks)}")
+        messages = [create_message(chunk, question)]
+
+        summary = create_chat_completion(
+            model="gpt-3.5-turbo",
+            messages=messages,
+            max_tokens=300,
+        )
+        summaries.append(summary)
+
+    print(f"Summarized {len(chunks)} chunks.")
+
+    combined_summary = "\n".join(summaries)
+    messages = [create_message(combined_summary, question)]
+
+    return create_chat_completion(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        max_tokens=300,
+    )
+
+
+def split_text(text, max_length=8192):
+    paragraphs = text.split("\n")
+    current_length = 0
+    current_chunk = []
+
+    for paragraph in paragraphs:
+        if current_length + len(paragraph) + 1 <= max_length:
+            current_chunk.append(paragraph)
+            current_length += len(paragraph) + 1
+        else:
+            yield "\n".join(current_chunk)
+            current_chunk = [paragraph]
+            current_length = len(paragraph) + 1
+
+    if current_chunk:
+        yield "\n".join(current_chunk)
+
+
+def create_message(chunk, question):
+    return {
+        "role": "user",
+        "content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text."
+    }
+
+
+def scroll_to_percentage(driver, ratio):
+    if ratio < 0 or ratio > 1:
+        raise ValueError("Percentage should be between 0 and 1")
+    driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {ratio});")
--- a/autogpt/web.py
+++ b/autogpt/web.py
@ -0,0 +1,86 @@
+from duckduckgo_search import ddg
+from selenium import webdriver
+import autogpt.summary as summary
+from bs4 import BeautifulSoup
+import json
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.chrome.service import Service as ChromeService
+from selenium.webdriver.support import expected_conditions as EC
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.keys import Keys
+import os
+import logging
+from pathlib import Path
+from autogpt.config import Config
+file_dir = Path(__file__).parent
+cfg = Config()
+
+
+def browse_website(url, question):
+    driver, text = scrape_text_with_selenium(url)
+    add_header(driver)
+    summary_text = summary.summarize_text(driver, text, question)
+    links = scrape_links_with_selenium(driver)
+
+    # Limit links to 5
+    if len(links) > 5:
+        links = links[:5]
+    close_browser(driver)
+    return f"Answer gathered from website: {summary_text} \n \n Links: {links}", driver
+
+
+def scrape_text_with_selenium(url):
+    logging.getLogger('selenium').setLevel(logging.CRITICAL)
+
+    options = Options()
+    options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36')
+    driver = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=options)
+    driver.get(url)
+
+    WebDriverWait(driver, 10).until(
+        EC.presence_of_element_located((By.TAG_NAME, 'body'))
+    )
+
+    # Get the HTML content directly from the browser's DOM
+    page_source = driver.execute_script("return document.body.outerHTML;")
+    soup = BeautifulSoup(page_source, "html.parser")
+
+    for script in soup(["script", "style"]):
+        script.extract()
+
+    text = soup.get_text()
+    lines = (line.strip() for line in text.splitlines())
+    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+    text = '\n'.join(chunk for chunk in chunks if chunk)
+    return driver, text
+
+
+def scrape_links_with_selenium(driver):
+    page_source = driver.page_source
+    soup = BeautifulSoup(page_source, "html.parser")
+
+    for script in soup(["script", "style"]):
+        script.extract()
+
+    hyperlinks = extract_hyperlinks(soup)
+
+    return format_hyperlinks(hyperlinks)
+
+
+def close_browser(driver):
+    driver.quit()
+
+
+def extract_hyperlinks(soup):
+    return [(link.text, link['href']) for link in soup.find_all('a', href=True)]
+
+
+def format_hyperlinks(hyperlinks):
+    return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]
+
+
+def add_header(driver):
+    driver.execute_script(open(f"{file_dir}/js/overlay.js", "r").read())