Spaces:

joelorellana
/

demo_generative_img

Sleeping

App Files Files Community

joelorellana commited on Feb 27, 2024

Commit

70e3e61

0 Parent(s):

initial files for demo of img generation models

Browse files

Files changed (12) hide show

.gitignore +214 -0
app.py +1 -0
dalle_generate_img.py +39 -0
encode_image.py +18 -0
finetune_generate_img.py +57 -0
gpt_vision_prompt.py +58 -0
main.py +20 -0
midjourney_generate_img.py +69 -0
negative_prompt.py +0 -0
progress_bar.py +36 -0
stability_generate_img.py +68 -0
test.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,214 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python,macos
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,macos
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# files
+config.py
+img/
+output_img/
+# End of https://www.toptal.com/developers/gitignore/api/python,macos

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # TODO: Streamlit demo

dalle_generate_img.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""Generate an image using the DALL-E API
+Keyword arguments:
+prompt -- The prompt to generate the image from
+Return: An image saved in a .png file
+"""
+import io
+from openai import OpenAI
+from PIL import Image
+import requests
+from config import OPENAI_API_KEY
+client = OpenAI(api_key=OPENAI_API_KEY)
+def generate_img_with_dalle(prompt="", ):
+    """Generate an image using the DALL-E API"""
+    # DALL-E model parameters
+    size = '1024x1024'  # Choose between '1024x1024', '512x512', '256x256'
+    quality = 'hd'  # Choose between 'standard', 'hd'
+    # Generate image using DALL-E
+    print('Creating DALLE image...')
+    response = client.images.generate(
+    model="dall-e-3",
+    prompt=prompt,
+    size=size,
+    quality=quality,
+    n=1,
+    response_format="url")
+    image_url = response.data[0].url
+    # Download and save the image
+    print('Saving image...')
+    response = requests.get(image_url, timeout=30)
+    img = Image.open(io.BytesIO(response.content))
+    img.save('output_img/dalle_generated_img.png')  # Save the image as a .png file
+    print('Image saved in output_img/dalle_generated_img.png')
+    return "Image saved in output_img/dalle_generated_img.png"

encode_image.py ADDED Viewed

	@@ -0,0 +1,18 @@

+""" Encode the image located at the given path to base64 string. """
+import base64
+# Function to encode the image
+def encode_image(image_path):
+    """
+    Encode the image located at the given path to base64 string.
+    Args:
+    image_path (str): The path to the image file.
+    Returns:
+    str: The base64 encoded string representation of the image.
+    """
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')

finetune_generate_img.py ADDED Viewed

	@@ -0,0 +1,57 @@

+""" Generate an image using the Replicate API
+Keyword arguments:
+prompt -- The prompt to generate the image from
+Return: An image saved in a .png file"""
+import os
+import io
+import replicate
+import requests
+from PIL import Image
+from config import REPLICATE_API_TOKEN
+# Set up environment variables for Replicate API
+os.environ['REPLICATE_API_TOKEN'] = REPLICATE_API_TOKEN
+def generate_finetuned_img(prompt):
+    """
+    Generate a finetuned image based on the given prompt.
+    Args:
+    prompt (str): The prompt for generating the image.
+    Returns:
+    str: The file path of the saved finetuned image.
+    """
+    # Create finetuned image
+    print('Creating finetuned image...')
+    output = replicate.run(
+    "joelorellana/paddle_modelv5:0592deeff5f62cbef89090b705196a9bc06c2874dfee85789547011dfc1f6451",
+        input={
+            "width": 1024,
+            "height": 1024,
+            "prompt": prompt,
+            "refine": "base_image_refiner",
+            "scheduler": "DDIM",
+            "lora_scale": 0.6,
+            "num_outputs": 1,
+            "guidance_scale": 7.5,
+            "apply_watermark": True,
+            "high_noise_frac": 0.8,
+            "negative_prompt":
+            """old, bad eyes, deformed, bad hands, ugly,
+               ancient, showing teeth, open mouth, two or more balls""",
+            "prompt_strength": 0.8,
+            "num_inference_steps": 50
+        }
+    )
+    # Save image
+    print('Saving image...')
+    url = output[0]
+    response = requests.get(url, timeout=30)
+    img = Image.open(io.BytesIO(response.content))
+    img.save('output_img/finetuned_generated_img.png')
+    print('Image saved in output_img/finetuned_generated_img.png')
+    return "output_img/finetuned_generated_img.png"

gpt_vision_prompt.py ADDED Viewed

	@@ -0,0 +1,58 @@

+""" Generate a prompt for Generative AI APIs with the given image and prompt. """
+import requests
+from encode_image import encode_image
+from config import OPENAI_API_KEY
+# prompt for GPT Vision API
+PROMPT = """ Return a prompt to describe the image and pass it
+to DALLE or Stable Diffusion to generate an image.
+The prompt must not exceed 75 tokens.
+The prompt must improve the quality of the original image.
+The prompt must be in the form of:
+[STYLE OF PHOTO] photo of a [SUBJECT], [IMPORTANT
+FEATURE], [MORE DETAILS], [POSE OR ACTION],
+[FRAMING], [SETTING/BACKGROUND], [LIGHTING],
+[CAMERA ANGLE], [CAMERA PROPERTIES],in style of
+[PHOTOGRAPHER],
+"""
+def generate_prompt_with_vision(image_path, prompt=PROMPT, api_key=OPENAI_API_KEY ):
+    """Generate a prompt for Generative AI APIs with the given image and prompt."""
+    # Getting the base64 string
+    print('Encoding image...')
+    base64_image = encode_image(image_path)
+    print("Encoded image. ")
+    headers = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {api_key}"
+    }
+    payload = {
+      "model": "gpt-4-vision-preview",
+      "messages": [
+        {
+          "role": "user",
+          "content": [
+            {
+              "type": "text",
+              "text": prompt
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+              }
+            }
+          ]
+        }
+      ],
+      "max_tokens": 300
+    }
+    print('Creating an special prompt using Vision from OpenAI...')
+    response = requests.post(
+        "https://api.openai.com/v1/chat/completions",
+        headers=headers,
+        json=payload,
+        timeout=30)
+    return response.json()['choices'][0]['message']['content']

main.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""This is a demo of the GPT-Vision-Prompt, DALL-E, Stability, and Finetune APIs.
+"""
+from gpt_vision_prompt import generate_prompt_with_vision
+from dalle_generate_img import generate_img_with_dalle
+from stability_generate_img import generate_image_with_stability
+from finetune_generate_img import generate_finetuned_img
+from midjourney_generate_img import midjourney_generate_img
+# Path to your image
+IMAGE_PATH = "img/img_030.jpg"
+img_prompt = generate_prompt_with_vision(IMAGE_PATH,)
+print(f"""Prompt generated by Vision:\n{img_prompt}""")
+NEW_DALLE_IMG = generate_img_with_dalle(prompt=img_prompt)
+NEW_STABILITY_IMG = generate_image_with_stability(prompt=img_prompt)
+NEW_FINETUNED_IMG = generate_finetuned_img(prompt=img_prompt)
+NEW_MIDJOURNEY_IMG = midjourney_generate_img(prompt=img_prompt)

midjourney_generate_img.py ADDED Viewed

	@@ -0,0 +1,69 @@

+""" Generate an image using the Midjourney API"""
+import io
+import requests
+from PIL import Image
+from progress_bar import print_progress_bar
+from config import GOAPIKEY
+IMAGINE_ENDPOINT= "https://api.midjourneyapi.xyz/mj/v2/imagine"
+FETCH_ENDPOINT = "https://api.midjourneyapi.xyz/mj/v2/fetch"
+headers = {
+    "X-API-KEY": GOAPIKEY
+}
+def midjourney_generate_img(prompt):
+    """Generate an image using the Midjourney API
+    Keyword arguments:
+    prompt -- The prompt to generate the image from
+    Return: An image saved in a .png file
+    """
+    img_generation_data = {
+        "prompt": prompt,
+        "aspect_ratio": "16:9",
+        "process_mode": "fast",
+        "webhook_endpoint": "",
+        "webhook_secret": ""
+    }
+    create_img_response = requests.post(
+        IMAGINE_ENDPOINT,
+        headers=headers,
+        json=img_generation_data,
+        timeout=30)
+    if create_img_response.status_code == 200:
+        print("Request for an img to Midjourney: successfully!")
+        task_id = create_img_response.json()['task_id']
+    else:
+        print(f"Image creation failed, please review details: {create_img_response.status_code} \
+              {create_img_response.text}")
+    print_progress_bar(50, msg='Generating MidJourney img, please wait...', bar_length=20)
+    fetch_img_response = requests.post(FETCH_ENDPOINT,
+                                       headers=headers,
+                                       json={"task_id": task_id},
+                                       timeout=30)
+    status_img = fetch_img_response.json()['status']
+    while status_img != "finished":
+        # pause 10s
+        print_progress_bar(10, msg='Generating MidJourney img, please wait...', bar_length=20)
+        fetch_img_response = requests.post(FETCH_ENDPOINT,
+                                           headers=headers,
+                                           json={"task_id": task_id},
+                                           timeout=30)
+        status_img = fetch_img_response.json()['status']
+        if status_img == "failed":
+            print(
+                f"Image generation failed, please review details: {fetch_img_response.status_code} \
+                  {fetch_img_response.text}"
+                  )
+    # download task_result image
+    print("Saving img...")
+    task_result_image_url = fetch_img_response.json()['task_result']['image_url']
+    image_response = requests.get(task_result_image_url, timeout=30)
+    # saving img to output_img as png
+    img = Image.open(io.BytesIO(image_response.content))
+    img.save('output_img/midjourney_generated_img.png')
+    print("Image saved in output_img/midjourney_generated_img.png")
+    return "output_img/midjourney_generated_img.png"

negative_prompt.py ADDED Viewed

File without changes

progress_bar.py ADDED Viewed

	@@ -0,0 +1,36 @@

+""" progress bar"""
+import time
+import sys
+def print_progress_bar(duration, msg='', bar_length=50):
+    """
+    Prints a progress bar with a message to the terminal.
+    :param duration: The number of seconds the progress bar will run.
+    :param msg: The message to display before the progress bar.
+    :param bar_length: The character length of the bar (default is 50).
+    """
+    for i in range(duration):
+        # Calculate the percentage completion.
+        percent = (i + 1) / duration
+        filled_length = int(bar_length * percent)
+        # Create the bar string.
+        progress = '█' * filled_length + '-' * (bar_length - filled_length)
+        # Print the progress bar with the message.
+        sys.stdout.write(f'\r{msg} |{progress}| {percent*100:.2f}% Complete')
+        sys.stdout.flush()
+        # Sleep for one second.
+        time.sleep(1)
+    # Print a newline at the end to ensure the next terminal output is on a new line.
+    print()
+# You can add some testing code here to see how it works when you run this module directly.
+if __name__ == '__main__':
+    # Test the progress bar function.
+    print_progress_bar(60, msg='Loading')

stability_generate_img.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# pylint: disable=line-too-long
+"""Generate an image using the Stability AI API
+Keyword arguments:
+prompt -- The prompt to generate the image from
+Return: An image saved in a .png file
+"""
+import os
+import io
+import warnings
+from stability_sdk import client
+import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
+from PIL import Image
+from config import STABILITY_API_KEY
+# Set up environment variables for Stability API
+os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
+os.environ['STABILITY_KEY'] = STABILITY_API_KEY
+# Set up our connection to the Stability API.
+stability_api = client.StabilityInference(
+    key=os.environ['STABILITY_KEY'],
+    verbose=True,
+    engine="stable-diffusion-xl-1024-v1-0",
+)
+def generate_image_with_stability(prompt, seed=42, steps=50, cfg_scale=7.0, width=1024, height=1024, samples=1):
+    """
+    Generates an image based on the given prompt using Stability API.
+    :param prompt: The prompt to generate the image from.
+    :param seed: Seed for deterministic generation.
+    :param steps: Number of inference steps.
+    :param cfg_scale: CFG scale for prompt guidance.
+    :param width: Width of the generated image.
+    :param height: Height of the generated image.
+    :param samples: Number of images to generate.
+    :return: A PIL.Image object of the generated image.
+    """
+    print("Creating Stability Image...")
+    answers = stability_api.generate(
+        prompt=prompt,
+        seed=seed,
+        steps=steps,
+        cfg_scale=cfg_scale,
+        width=width,
+        height=height,
+        samples=samples,
+        # sampler=generation.SAMPLER_K_DPMPP_2M # default: auto
+    )
+    # Retrieve and process the generated image
+    for resp in answers:
+        for artifact in resp.artifacts:
+            if artifact.finish_reason == generation.FILTER:
+                warnings.warn(
+                    "Your request activated the API's safety filters and could not be processed."
+                    "Please modify the prompt and try again.")
+            if artifact.type == generation.ARTIFACT_IMAGE:
+                # saving img:
+                img = Image.open(io.BytesIO(artifact.binary))
+                img.save("output_img/sd_generated_img.png")
+                print("Image saved in output_img/sd_generated_img.png")
+                return "Image saved in output_img/sd_generated_img.png"
+    raise ValueError("No image was generated.")

test.py ADDED Viewed

File without changes