From 57412bcf4e85c7edff4f021c34390658ee80eb06 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Thu, 6 Apr 2023 21:16:05 -0700 Subject: [PATCH 1/9] add kandinsky support - cuda issue --- scripts/image_gen.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 scripts/image_gen.py diff --git a/scripts/image_gen.py b/scripts/image_gen.py new file mode 100644 index 000000000..cdc4fc4d7 --- /dev/null +++ b/scripts/image_gen.py @@ -0,0 +1,44 @@ +from kandinsky2 import get_kandinsky2 +from config import Config + +cfg = Config() + +def generate_image(prompt): + + model = get_kandinsky2('cuda', task_type='text2img', model_version='2.1', use_flash_attention=False) + images = model.generate_text2img( + "red cat, 4k photo", # prompt + num_steps=100, + batch_size=1, + guidance_scale=4, + h=768, w=768, + sampler='p_sampler', + prior_cf_scale=4, + prior_steps="5" + ) + return images + + # base_url = 'http://export.arxiv.org/api/query?' + # query = f'search_query=all:{search_query}&start=0&max_results={max_results}' + # url = base_url + query + # response = requests.get(url) + + # if response.status_code == 200: + # soup = BeautifulSoup(response.content, 'xml') + # entries = soup.find_all('entry') + + # articles = [] + # for entry in entries: + # title = entry.title.text.strip() + # url = entry.id.text.strip() + # published = entry.published.text.strip() + + # articles.append({ + # 'title': title, + # 'url': url, + # 'published': published + # }) + + # return articles + # else: + # return None From 28cc9865e487dec005ef98cffca059cf81c1c806 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 08:02:48 -0700 Subject: [PATCH 2/9] feat(ImageGen): add stable diffusion support --- scripts/commands.py | 3 +++ scripts/config.py | 2 ++ scripts/data/prompt.txt | 1 + scripts/image_gen.py | 54 +++++++++++++++-------------------------- 4 files changed, 25 insertions(+), 35 deletions(-) diff --git a/scripts/commands.py b/scripts/commands.py index fc10d1d05..bf8d79833 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -9,6 +9,7 @@ import ai_functions as ai from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files from execute_code import execute_python_file from json_parser import fix_and_parse_json +from image_gen import generate_image from duckduckgo_search import ddg from googleapiclient.discovery import build from googleapiclient.errors import HttpError @@ -102,6 +103,8 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "generate_image": # Add this command + return generate_image(arguments["prompt"]) elif command_name == "task_complete": shutdown() else: diff --git a/scripts/config.py b/scripts/config.py index fe48d2980..2eca16751 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -53,6 +53,8 @@ class Config(metaclass=Singleton): self.pinecone_api_key = os.getenv("PINECONE_API_KEY") self.pinecone_region = os.getenv("PINECONE_ENV") + self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") + # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index 28797d9e2..363342c07 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -23,6 +23,7 @@ COMMANDS: 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" 19. Task Complete (Shutdown): "task_complete", args: "reason": "" +20. Generate Image: "generate_image", args: "prompt": "" RESOURCES: diff --git a/scripts/image_gen.py b/scripts/image_gen.py index cdc4fc4d7..bb3e7686e 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -1,44 +1,28 @@ -from kandinsky2 import get_kandinsky2 +import requests +import io +import os.path +from PIL import Image from config import Config +import uuid cfg = Config() +working_directory = "auto_gpt_workspace" + +API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" +headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} + def generate_image(prompt): - - model = get_kandinsky2('cuda', task_type='text2img', model_version='2.1', use_flash_attention=False) - images = model.generate_text2img( - "red cat, 4k photo", # prompt - num_steps=100, - batch_size=1, - guidance_scale=4, - h=768, w=768, - sampler='p_sampler', - prior_cf_scale=4, - prior_steps="5" - ) - return images - - # base_url = 'http://export.arxiv.org/api/query?' - # query = f'search_query=all:{search_query}&start=0&max_results={max_results}' - # url = base_url + query - # response = requests.get(url) + response = requests.post(API_URL, headers=headers, json={ + "inputs": prompt, + }) + image = Image.open(io.BytesIO(response.content)) + print("Image Generated for prompt:" + prompt) - # if response.status_code == 200: - # soup = BeautifulSoup(response.content, 'xml') - # entries = soup.find_all('entry') + filename = str(uuid.uuid4()) + ".jpg" - # articles = [] - # for entry in entries: - # title = entry.title.text.strip() - # url = entry.id.text.strip() - # published = entry.published.text.strip() + image.save(os.path.join(working_directory, filename)) - # articles.append({ - # 'title': title, - # 'url': url, - # 'published': published - # }) + print("Saved to disk:" + filename) - # return articles - # else: - # return None + return str("Image " + filename + " saved to disk for prompt: " + prompt) From b56b04e86f4bcd93297cbe48efb8d9117be2566e Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 11:03:23 -0700 Subject: [PATCH 3/9] feat(ImageGen): add DALL-E support --- scripts/image_gen.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/scripts/image_gen.py b/scripts/image_gen.py index bb3e7686e..92cda2908 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -4,6 +4,8 @@ import os.path from PIL import Image from config import Config import uuid +import openai +from base64 import b64decode cfg = Config() @@ -13,16 +15,36 @@ API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion- headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} def generate_image(prompt): + + filename = str(uuid.uuid4()) + ".jpg" + + # DALL-E + openai.api_key = cfg.openai_api_key + + response = openai.Image.create( + prompt=prompt, + n=1, + size="256x256", + response_format="b64_json", + ) + + print("Image Generated for prompt:" + prompt) + print(response["data"][0]["b64_json"][:50]) + + image_data = b64decode(response["data"][0]["b64_json"]) + with open(working_directory + "/" + filename, mode="wb") as png: + png.write(image_data) + + return "Saved to disk:" + filename + + # STABLE DIFFUSION response = requests.post(API_URL, headers=headers, json={ "inputs": prompt, }) image = Image.open(io.BytesIO(response.content)) print("Image Generated for prompt:" + prompt) - filename = str(uuid.uuid4()) + ".jpg" - image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) return str("Image " + filename + " saved to disk for prompt: " + prompt) From f3e64ec4e9128d4757bf5ffadbb73a9b144b2ecb Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 11:29:43 -0700 Subject: [PATCH 4/9] feat(ImageGen): support env vars, update readme --- .env.template | 4 ++- README.md | 11 +++++++++ scripts/image_gen.py | 59 +++++++++++++++++++++++++------------------- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/.env.template b/.env.template index e9ccda5ed..525cd61c5 100644 --- a/.env.template +++ b/.env.template @@ -9,4 +9,6 @@ CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False OPENAI_API_BASE=your-base-url-for-azure OPENAI_API_VERSION=api-version-for-azure -OPENAI_DEPLOYMENT_ID=deployment-id-for-azure \ No newline at end of file +OPENAI_DEPLOYMENT_ID=deployment-id-for-azure +IMAGE_PROVIDER=dalle +HUGGINGFACE_API_TOKEN= \ No newline at end of file diff --git a/README.md b/README.md index a89c5d03b..f6cf6093e 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Your support is greatly appreciated - [Setting up environment variables](#setting-up-environment-variables) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) + - [🖼 Image Generation](#image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) @@ -169,6 +170,7 @@ Or you can set them in the `.env` file. 1. View memory usage by using the `--debug` flag :) + ## 💀 Continuous Mode ⚠️ Run the AI **without** user authorisation, 100% automated. Continuous mode is not recommended. @@ -187,6 +189,15 @@ If you don't have access to the GPT4 api, this mode will allow you to use Auto-G python scripts/main.py --gpt3only ``` +## 🖼 Image Generation +By default, Auto-GPT uses DALL-e for image generation. To use Stable Diffusion, a [HuggingFace API Token](https://huggingface.co/settings/tokens) is required. + +Once you have a token, set these variables in your `.env`: +``` +IMAGE_PROVIDER=sd +HUGGINGFACE_API_TOKEN="YOUR_HUGGINGFACE_API_TOKEN" +``` + ## ⚠️ Limitations This experiment aims to showcase the potential of GPT-4 but comes with some limitations: diff --git a/scripts/image_gen.py b/scripts/image_gen.py index 92cda2908..deda7ed5a 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -11,40 +11,49 @@ cfg = Config() working_directory = "auto_gpt_workspace" -API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" -headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} - def generate_image(prompt): filename = str(uuid.uuid4()) + ".jpg" - + # DALL-E - openai.api_key = cfg.openai_api_key + if cfg.image_provider == 'dalle': + + openai.api_key = cfg.openai_api_key - response = openai.Image.create( - prompt=prompt, - n=1, - size="256x256", - response_format="b64_json", - ) + response = openai.Image.create( + prompt=prompt, + n=1, + size="256x256", + response_format="b64_json", + ) - print("Image Generated for prompt:" + prompt) - print(response["data"][0]["b64_json"][:50]) + print("Image Generated for prompt:" + prompt) + print(response["data"][0]["b64_json"][:50]) - image_data = b64decode(response["data"][0]["b64_json"]) - with open(working_directory + "/" + filename, mode="wb") as png: - png.write(image_data) + image_data = b64decode(response["data"][0]["b64_json"]) - return "Saved to disk:" + filename + with open(working_directory + "/" + filename, mode="wb") as png: + png.write(image_data) + + return "Saved to disk:" + filename # STABLE DIFFUSION - response = requests.post(API_URL, headers=headers, json={ - "inputs": prompt, - }) - image = Image.open(io.BytesIO(response.content)) - print("Image Generated for prompt:" + prompt) + elif cfg.image_provider == 'sd': - image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) + API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" + headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} - return str("Image " + filename + " saved to disk for prompt: " + prompt) + response = requests.post(API_URL, headers=headers, json={ + "inputs": prompt, + }) + + image = Image.open(io.BytesIO(response.content)) + print("Image Generated for prompt:" + prompt) + + image.save(os.path.join(working_directory, filename)) + print("Saved to disk:" + filename) + + return str("Image " + filename + " saved to disk for prompt: " + prompt) + + else: + return "No Image Provider Set" \ No newline at end of file From 091db1d4c3db6a3bf4bc50e882f299719bc65c60 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 12:47:49 -0700 Subject: [PATCH 5/9] chore(ImageGen): cleanup --- scripts/config.py | 1 + scripts/image_gen.py | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index 2eca16751..959c3eb22 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -53,6 +53,7 @@ class Config(metaclass=Singleton): self.pinecone_api_key = os.getenv("PINECONE_API_KEY") self.pinecone_region = os.getenv("PINECONE_ENV") + self.image_provider = os.getenv("IMAGE_PROVIDER") self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") # User agent headers to use when browsing web diff --git a/scripts/image_gen.py b/scripts/image_gen.py index deda7ed5a..185ed4278 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -17,7 +17,7 @@ def generate_image(prompt): # DALL-E if cfg.image_provider == 'dalle': - + openai.api_key = cfg.openai_api_key response = openai.Image.create( @@ -28,7 +28,6 @@ def generate_image(prompt): ) print("Image Generated for prompt:" + prompt) - print(response["data"][0]["b64_json"][:50]) image_data = b64decode(response["data"][0]["b64_json"]) @@ -51,9 +50,8 @@ def generate_image(prompt): print("Image Generated for prompt:" + prompt) image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) - return str("Image " + filename + " saved to disk for prompt: " + prompt) + return "Saved to disk:" + filename else: return "No Image Provider Set" \ No newline at end of file From 9328c8f7b5b9b7eb76dd131f36ad6109e8b28e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Machado=20de=20Oliveira?= Date: Sat, 8 Apr 2023 00:15:14 -0300 Subject: [PATCH 6/9] Settings were being saved and loaded in the wrong directory --- scripts/ai_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ai_config.py b/scripts/ai_config.py index 2f4327486..8cfa183a9 100644 --- a/scripts/ai_config.py +++ b/scripts/ai_config.py @@ -1,6 +1,6 @@ import yaml import data - +import os class AIConfig: def __init__(self, ai_name="", ai_role="", ai_goals=[]): @@ -9,7 +9,7 @@ class AIConfig: self.ai_goals = ai_goals # Soon this will go in a folder where it remembers more stuff about the run(s) - SAVE_FILE = "../ai_settings.yaml" + SAVE_FILE = os.path.join(os.path.dirname(__file__), '..', 'ai_settings.yaml') @classmethod def load(cls, config_file=SAVE_FILE): From cc05139843853ab2ac091c38253a810e418e675c Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Sat, 8 Apr 2023 05:24:53 +0100 Subject: [PATCH 7/9] Update README.md --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 4fc0c349e..6e92e6b95 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Auto-GPT is an experimental open-source application showcasing the capabilities https://user-images.githubusercontent.com/22963551/228855501-2f5777cf-755b-4407-a643-c7299e5b6419.mp4 -## 💖 Help Fund Auto-GPT's Development +

💖 Help Fund Auto-GPT's Development 💖

If you can spare a coffee, you can help to cover the API costs of developing Auto-GPT and help push the boundaries of fully autonomous AI! A full day of development can easily cost as much as $20 in API costs, which for a free project is quite limiting. @@ -17,14 +17,13 @@ Your support is greatly appreciated

- Development of this free, open-source project is made possible by all the contributors and sponsors. If you'd like to sponsor this project and have your avatar or company logo appear below click here. 💖 -

-

-thepok  SpacingLily  m  zkonduit  maxxflyer  tekelsey  nocodeclarity  tjarmain  alexisneuhaus  jaumebalust  robinicus  digisomni   -

+ Development of this free, open-source project is made possible by all the contributors and sponsors. If you'd like to sponsor this project and have your avatar or company logo appear below click here. +

Individual Sponsors

-alexisneuhaus  iokode  jaumebalust  nova-land  robinicus  Void-n-Null  ritesh24  merwanehamadi  raulmarindev  siduppal  goosecubedaddy  pleabargain   +robinicus  prompthero  crizzler  tob-le-rone  FSTatSBS  toverly1  ddtarazona  Nalhos  Kazamario  pingbotan  indoor47  AuroraHolding  kreativai  hunteraraujo  Explorergt92  judegomila   +thepok +  SpacingLily  merwanehamadi  m  zkonduit  maxxflyer  tekelsey  digisomni  nocodeclarity  tjarmain

From 8b36a5cfd33e38641d594bba10c6fe7356438938 Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Sat, 8 Apr 2023 12:27:05 +0100 Subject: [PATCH 8/9] Removes comment --- scripts/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/commands.py b/scripts/commands.py index bf8d79833..a45fb8963 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -103,7 +103,7 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) - elif command_name == "generate_image": # Add this command + elif command_name == "generate_image": return generate_image(arguments["prompt"]) elif command_name == "task_complete": shutdown() From 85d0d27045c2f426fc9618fa8a96b7c89fbdf82a Mon Sep 17 00:00:00 2001 From: "Jonathan S. Rouach" Date: Sat, 8 Apr 2023 15:20:10 +0300 Subject: [PATCH 9/9] fix: add Pillow dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index ce2470985..7b1040401 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ docker duckduckgo-search google-api-python-client #(https://developers.google.com/custom-search/v1/overview) pinecone-client==2.2.1 +Pillow