joelorellana commited on
Commit
70e3e61
·
0 Parent(s):

initial files for demo of img generation models

Browse files
.gitignore ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,macos
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos
3
+
4
+ ### macOS ###
5
+ # General
6
+ .DS_Store
7
+ .AppleDouble
8
+ .LSOverride
9
+
10
+ # Icon must end with two \r
11
+ Icon
12
+
13
+
14
+ # Thumbnails
15
+ ._*
16
+
17
+ # Files that might appear in the root of a volume
18
+ .DocumentRevisions-V100
19
+ .fseventsd
20
+ .Spotlight-V100
21
+ .TemporaryItems
22
+ .Trashes
23
+ .VolumeIcon.icns
24
+ .com.apple.timemachine.donotpresent
25
+
26
+ # Directories potentially created on remote AFP share
27
+ .AppleDB
28
+ .AppleDesktop
29
+ Network Trash Folder
30
+ Temporary Items
31
+ .apdisk
32
+
33
+ ### macOS Patch ###
34
+ # iCloud generated files
35
+ *.icloud
36
+
37
+ ### Python ###
38
+ # Byte-compiled / optimized / DLL files
39
+ __pycache__/
40
+ *.py[cod]
41
+ *$py.class
42
+
43
+ # C extensions
44
+ *.so
45
+
46
+ # Distribution / packaging
47
+ .Python
48
+ build/
49
+ develop-eggs/
50
+ dist/
51
+ downloads/
52
+ eggs/
53
+ .eggs/
54
+ lib/
55
+ lib64/
56
+ parts/
57
+ sdist/
58
+ var/
59
+ wheels/
60
+ share/python-wheels/
61
+ *.egg-info/
62
+ .installed.cfg
63
+ *.egg
64
+ MANIFEST
65
+
66
+ # PyInstaller
67
+ # Usually these files are written by a python script from a template
68
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
69
+ *.manifest
70
+ *.spec
71
+
72
+ # Installer logs
73
+ pip-log.txt
74
+ pip-delete-this-directory.txt
75
+
76
+ # Unit test / coverage reports
77
+ htmlcov/
78
+ .tox/
79
+ .nox/
80
+ .coverage
81
+ .coverage.*
82
+ .cache
83
+ nosetests.xml
84
+ coverage.xml
85
+ *.cover
86
+ *.py,cover
87
+ .hypothesis/
88
+ .pytest_cache/
89
+ cover/
90
+
91
+ # Translations
92
+ *.mo
93
+ *.pot
94
+
95
+ # Django stuff:
96
+ *.log
97
+ local_settings.py
98
+ db.sqlite3
99
+ db.sqlite3-journal
100
+
101
+ # Flask stuff:
102
+ instance/
103
+ .webassets-cache
104
+
105
+ # Scrapy stuff:
106
+ .scrapy
107
+
108
+ # Sphinx documentation
109
+ docs/_build/
110
+
111
+ # PyBuilder
112
+ .pybuilder/
113
+ target/
114
+
115
+ # Jupyter Notebook
116
+ .ipynb_checkpoints
117
+
118
+ # IPython
119
+ profile_default/
120
+ ipython_config.py
121
+
122
+ # pyenv
123
+ # For a library or package, you might want to ignore these files since the code is
124
+ # intended to run in multiple environments; otherwise, check them in:
125
+ # .python-version
126
+
127
+ # pipenv
128
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
129
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
130
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
131
+ # install all needed dependencies.
132
+ #Pipfile.lock
133
+
134
+ # poetry
135
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
136
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
137
+ # commonly ignored for libraries.
138
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
139
+ #poetry.lock
140
+
141
+ # pdm
142
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
143
+ #pdm.lock
144
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
145
+ # in version control.
146
+ # https://pdm.fming.dev/#use-with-ide
147
+ .pdm.toml
148
+
149
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
150
+ __pypackages__/
151
+
152
+ # Celery stuff
153
+ celerybeat-schedule
154
+ celerybeat.pid
155
+
156
+ # SageMath parsed files
157
+ *.sage.py
158
+
159
+ # Environments
160
+ .env
161
+ .venv
162
+ env/
163
+ venv/
164
+ ENV/
165
+ env.bak/
166
+ venv.bak/
167
+
168
+ # Spyder project settings
169
+ .spyderproject
170
+ .spyproject
171
+
172
+ # Rope project settings
173
+ .ropeproject
174
+
175
+ # mkdocs documentation
176
+ /site
177
+
178
+ # mypy
179
+ .mypy_cache/
180
+ .dmypy.json
181
+ dmypy.json
182
+
183
+ # Pyre type checker
184
+ .pyre/
185
+
186
+ # pytype static type analyzer
187
+ .pytype/
188
+
189
+ # Cython debug symbols
190
+ cython_debug/
191
+
192
+ # PyCharm
193
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
194
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
195
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
196
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
197
+ #.idea/
198
+
199
+ ### Python Patch ###
200
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
201
+ poetry.toml
202
+
203
+ # ruff
204
+ .ruff_cache/
205
+
206
+ # LSP config files
207
+ pyrightconfig.json
208
+
209
+ # files
210
+ config.py
211
+ img/
212
+ output_img/
213
+
214
+ # End of https://www.toptal.com/developers/gitignore/api/python,macos
app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # TODO: Streamlit demo
dalle_generate_img.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Generate an image using the DALL-E API
2
+
3
+ Keyword arguments:
4
+ prompt -- The prompt to generate the image from
5
+ Return: An image saved in a .png file
6
+ """
7
+
8
+
9
+ import io
10
+ from openai import OpenAI
11
+ from PIL import Image
12
+ import requests
13
+ from config import OPENAI_API_KEY
14
+
15
+
16
+ client = OpenAI(api_key=OPENAI_API_KEY)
17
+
18
+ def generate_img_with_dalle(prompt="", ):
19
+ """Generate an image using the DALL-E API"""
20
+ # DALL-E model parameters
21
+ size = '1024x1024' # Choose between '1024x1024', '512x512', '256x256'
22
+ quality = 'hd' # Choose between 'standard', 'hd'
23
+ # Generate image using DALL-E
24
+ print('Creating DALLE image...')
25
+ response = client.images.generate(
26
+ model="dall-e-3",
27
+ prompt=prompt,
28
+ size=size,
29
+ quality=quality,
30
+ n=1,
31
+ response_format="url")
32
+ image_url = response.data[0].url
33
+ # Download and save the image
34
+ print('Saving image...')
35
+ response = requests.get(image_url, timeout=30)
36
+ img = Image.open(io.BytesIO(response.content))
37
+ img.save('output_img/dalle_generated_img.png') # Save the image as a .png file
38
+ print('Image saved in output_img/dalle_generated_img.png')
39
+ return "Image saved in output_img/dalle_generated_img.png"
encode_image.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Encode the image located at the given path to base64 string. """
2
+
3
+ import base64
4
+
5
+ # Function to encode the image
6
+ def encode_image(image_path):
7
+ """
8
+ Encode the image located at the given path to base64 string.
9
+
10
+ Args:
11
+ image_path (str): The path to the image file.
12
+
13
+ Returns:
14
+ str: The base64 encoded string representation of the image.
15
+ """
16
+ with open(image_path, "rb") as image_file:
17
+ return base64.b64encode(image_file.read()).decode('utf-8')
18
+
finetune_generate_img.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Generate an image using the Replicate API
2
+ Keyword arguments:
3
+ prompt -- The prompt to generate the image from
4
+ Return: An image saved in a .png file"""
5
+
6
+ import os
7
+ import io
8
+ import replicate
9
+ import requests
10
+ from PIL import Image
11
+ from config import REPLICATE_API_TOKEN
12
+
13
+ # Set up environment variables for Replicate API
14
+ os.environ['REPLICATE_API_TOKEN'] = REPLICATE_API_TOKEN
15
+
16
+
17
+ def generate_finetuned_img(prompt):
18
+ """
19
+ Generate a finetuned image based on the given prompt.
20
+
21
+ Args:
22
+ prompt (str): The prompt for generating the image.
23
+
24
+ Returns:
25
+ str: The file path of the saved finetuned image.
26
+ """
27
+ # Create finetuned image
28
+ print('Creating finetuned image...')
29
+ output = replicate.run(
30
+ "joelorellana/paddle_modelv5:0592deeff5f62cbef89090b705196a9bc06c2874dfee85789547011dfc1f6451",
31
+ input={
32
+ "width": 1024,
33
+ "height": 1024,
34
+ "prompt": prompt,
35
+ "refine": "base_image_refiner",
36
+ "scheduler": "DDIM",
37
+ "lora_scale": 0.6,
38
+ "num_outputs": 1,
39
+ "guidance_scale": 7.5,
40
+ "apply_watermark": True,
41
+ "high_noise_frac": 0.8,
42
+ "negative_prompt":
43
+ """old, bad eyes, deformed, bad hands, ugly,
44
+ ancient, showing teeth, open mouth, two or more balls""",
45
+ "prompt_strength": 0.8,
46
+ "num_inference_steps": 50
47
+ }
48
+ )
49
+
50
+ # Save image
51
+ print('Saving image...')
52
+ url = output[0]
53
+ response = requests.get(url, timeout=30)
54
+ img = Image.open(io.BytesIO(response.content))
55
+ img.save('output_img/finetuned_generated_img.png')
56
+ print('Image saved in output_img/finetuned_generated_img.png')
57
+ return "output_img/finetuned_generated_img.png"
gpt_vision_prompt.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Generate a prompt for Generative AI APIs with the given image and prompt. """
2
+
3
+ import requests
4
+ from encode_image import encode_image
5
+ from config import OPENAI_API_KEY
6
+
7
+ # prompt for GPT Vision API
8
+ PROMPT = """ Return a prompt to describe the image and pass it
9
+ to DALLE or Stable Diffusion to generate an image.
10
+ The prompt must not exceed 75 tokens.
11
+ The prompt must improve the quality of the original image.
12
+ The prompt must be in the form of:
13
+ [STYLE OF PHOTO] photo of a [SUBJECT], [IMPORTANT
14
+ FEATURE], [MORE DETAILS], [POSE OR ACTION],
15
+ [FRAMING], [SETTING/BACKGROUND], [LIGHTING],
16
+ [CAMERA ANGLE], [CAMERA PROPERTIES],in style of
17
+ [PHOTOGRAPHER],
18
+ """
19
+
20
+
21
+ def generate_prompt_with_vision(image_path, prompt=PROMPT, api_key=OPENAI_API_KEY ):
22
+ """Generate a prompt for Generative AI APIs with the given image and prompt."""
23
+ # Getting the base64 string
24
+ print('Encoding image...')
25
+ base64_image = encode_image(image_path)
26
+ print("Encoded image. ")
27
+ headers = {
28
+ "Content-Type": "application/json",
29
+ "Authorization": f"Bearer {api_key}"
30
+ }
31
+ payload = {
32
+ "model": "gpt-4-vision-preview",
33
+ "messages": [
34
+ {
35
+ "role": "user",
36
+ "content": [
37
+ {
38
+ "type": "text",
39
+ "text": prompt
40
+ },
41
+ {
42
+ "type": "image_url",
43
+ "image_url": {
44
+ "url": f"data:image/jpeg;base64,{base64_image}"
45
+ }
46
+ }
47
+ ]
48
+ }
49
+ ],
50
+ "max_tokens": 300
51
+ }
52
+ print('Creating an special prompt using Vision from OpenAI...')
53
+ response = requests.post(
54
+ "https://api.openai.com/v1/chat/completions",
55
+ headers=headers,
56
+ json=payload,
57
+ timeout=30)
58
+ return response.json()['choices'][0]['message']['content']
main.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This is a demo of the GPT-Vision-Prompt, DALL-E, Stability, and Finetune APIs.
2
+ """
3
+
4
+
5
+ from gpt_vision_prompt import generate_prompt_with_vision
6
+ from dalle_generate_img import generate_img_with_dalle
7
+ from stability_generate_img import generate_image_with_stability
8
+ from finetune_generate_img import generate_finetuned_img
9
+ from midjourney_generate_img import midjourney_generate_img
10
+
11
+
12
+ # Path to your image
13
+ IMAGE_PATH = "img/img_030.jpg"
14
+ img_prompt = generate_prompt_with_vision(IMAGE_PATH,)
15
+ print(f"""Prompt generated by Vision:\n{img_prompt}""")
16
+
17
+ NEW_DALLE_IMG = generate_img_with_dalle(prompt=img_prompt)
18
+ NEW_STABILITY_IMG = generate_image_with_stability(prompt=img_prompt)
19
+ NEW_FINETUNED_IMG = generate_finetuned_img(prompt=img_prompt)
20
+ NEW_MIDJOURNEY_IMG = midjourney_generate_img(prompt=img_prompt)
midjourney_generate_img.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Generate an image using the Midjourney API"""
2
+ import io
3
+ import requests
4
+ from PIL import Image
5
+ from progress_bar import print_progress_bar
6
+ from config import GOAPIKEY
7
+
8
+ IMAGINE_ENDPOINT= "https://api.midjourneyapi.xyz/mj/v2/imagine"
9
+ FETCH_ENDPOINT = "https://api.midjourneyapi.xyz/mj/v2/fetch"
10
+
11
+ headers = {
12
+ "X-API-KEY": GOAPIKEY
13
+ }
14
+
15
+ def midjourney_generate_img(prompt):
16
+ """Generate an image using the Midjourney API
17
+
18
+ Keyword arguments:
19
+ prompt -- The prompt to generate the image from
20
+ Return: An image saved in a .png file
21
+ """
22
+ img_generation_data = {
23
+ "prompt": prompt,
24
+ "aspect_ratio": "16:9",
25
+ "process_mode": "fast",
26
+ "webhook_endpoint": "",
27
+ "webhook_secret": ""
28
+ }
29
+ create_img_response = requests.post(
30
+ IMAGINE_ENDPOINT,
31
+ headers=headers,
32
+ json=img_generation_data,
33
+ timeout=30)
34
+ if create_img_response.status_code == 200:
35
+ print("Request for an img to Midjourney: successfully!")
36
+ task_id = create_img_response.json()['task_id']
37
+ else:
38
+ print(f"Image creation failed, please review details: {create_img_response.status_code} \
39
+ {create_img_response.text}")
40
+
41
+ print_progress_bar(50, msg='Generating MidJourney img, please wait...', bar_length=20)
42
+ fetch_img_response = requests.post(FETCH_ENDPOINT,
43
+ headers=headers,
44
+ json={"task_id": task_id},
45
+ timeout=30)
46
+ status_img = fetch_img_response.json()['status']
47
+ while status_img != "finished":
48
+ # pause 10s
49
+ print_progress_bar(10, msg='Generating MidJourney img, please wait...', bar_length=20)
50
+ fetch_img_response = requests.post(FETCH_ENDPOINT,
51
+ headers=headers,
52
+ json={"task_id": task_id},
53
+ timeout=30)
54
+ status_img = fetch_img_response.json()['status']
55
+ if status_img == "failed":
56
+ print(
57
+ f"Image generation failed, please review details: {fetch_img_response.status_code} \
58
+ {fetch_img_response.text}"
59
+ )
60
+
61
+ # download task_result image
62
+ print("Saving img...")
63
+ task_result_image_url = fetch_img_response.json()['task_result']['image_url']
64
+ image_response = requests.get(task_result_image_url, timeout=30)
65
+ # saving img to output_img as png
66
+ img = Image.open(io.BytesIO(image_response.content))
67
+ img.save('output_img/midjourney_generated_img.png')
68
+ print("Image saved in output_img/midjourney_generated_img.png")
69
+ return "output_img/midjourney_generated_img.png"
negative_prompt.py ADDED
File without changes
progress_bar.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ progress bar"""
2
+
3
+ import time
4
+ import sys
5
+
6
+ def print_progress_bar(duration, msg='', bar_length=50):
7
+ """
8
+ Prints a progress bar with a message to the terminal.
9
+
10
+ :param duration: The number of seconds the progress bar will run.
11
+ :param msg: The message to display before the progress bar.
12
+ :param bar_length: The character length of the bar (default is 50).
13
+ """
14
+ for i in range(duration):
15
+ # Calculate the percentage completion.
16
+ percent = (i + 1) / duration
17
+ filled_length = int(bar_length * percent)
18
+
19
+ # Create the bar string.
20
+ progress = '█' * filled_length + '-' * (bar_length - filled_length)
21
+
22
+ # Print the progress bar with the message.
23
+ sys.stdout.write(f'\r{msg} |{progress}| {percent*100:.2f}% Complete')
24
+ sys.stdout.flush()
25
+
26
+ # Sleep for one second.
27
+ time.sleep(1)
28
+
29
+ # Print a newline at the end to ensure the next terminal output is on a new line.
30
+ print()
31
+
32
+ # You can add some testing code here to see how it works when you run this module directly.
33
+ if __name__ == '__main__':
34
+ # Test the progress bar function.
35
+ print_progress_bar(60, msg='Loading')
36
+
stability_generate_img.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: disable=line-too-long
2
+ """Generate an image using the Stability AI API
3
+
4
+ Keyword arguments:
5
+ prompt -- The prompt to generate the image from
6
+ Return: An image saved in a .png file
7
+ """
8
+
9
+ import os
10
+ import io
11
+ import warnings
12
+
13
+ from stability_sdk import client
14
+ import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
15
+ from PIL import Image
16
+ from config import STABILITY_API_KEY
17
+
18
+ # Set up environment variables for Stability API
19
+ os.environ['STABILITY_HOST'] = 'grpc.stability.ai:443'
20
+ os.environ['STABILITY_KEY'] = STABILITY_API_KEY
21
+
22
+ # Set up our connection to the Stability API.
23
+ stability_api = client.StabilityInference(
24
+ key=os.environ['STABILITY_KEY'],
25
+ verbose=True,
26
+ engine="stable-diffusion-xl-1024-v1-0",
27
+ )
28
+
29
+ def generate_image_with_stability(prompt, seed=42, steps=50, cfg_scale=7.0, width=1024, height=1024, samples=1):
30
+ """
31
+ Generates an image based on the given prompt using Stability API.
32
+
33
+ :param prompt: The prompt to generate the image from.
34
+ :param seed: Seed for deterministic generation.
35
+ :param steps: Number of inference steps.
36
+ :param cfg_scale: CFG scale for prompt guidance.
37
+ :param width: Width of the generated image.
38
+ :param height: Height of the generated image.
39
+ :param samples: Number of images to generate.
40
+ :return: A PIL.Image object of the generated image.
41
+ """
42
+ print("Creating Stability Image...")
43
+ answers = stability_api.generate(
44
+ prompt=prompt,
45
+ seed=seed,
46
+ steps=steps,
47
+ cfg_scale=cfg_scale,
48
+ width=width,
49
+ height=height,
50
+ samples=samples,
51
+ # sampler=generation.SAMPLER_K_DPMPP_2M # default: auto
52
+ )
53
+
54
+ # Retrieve and process the generated image
55
+ for resp in answers:
56
+ for artifact in resp.artifacts:
57
+ if artifact.finish_reason == generation.FILTER:
58
+ warnings.warn(
59
+ "Your request activated the API's safety filters and could not be processed."
60
+ "Please modify the prompt and try again.")
61
+ if artifact.type == generation.ARTIFACT_IMAGE:
62
+ # saving img:
63
+ img = Image.open(io.BytesIO(artifact.binary))
64
+ img.save("output_img/sd_generated_img.png")
65
+ print("Image saved in output_img/sd_generated_img.png")
66
+ return "Image saved in output_img/sd_generated_img.png"
67
+
68
+ raise ValueError("No image was generated.")
test.py ADDED
File without changes