Spaces:
Sleeping
Sleeping
""" Generate a prompt for Generative AI APIs with the given image and prompt. """ | |
import requests | |
from encode_image import encode_image | |
from config import OPENAI_API_KEY | |
# prompt for GPT Vision API | |
PROMPT = """ Return a prompt to describe the image and pass it | |
to DALLE or Stable Diffusion to generate an image. | |
The prompt must not exceed 75 tokens. | |
The prompt must improve the quality of the original image. | |
The prompt must be in the form of: | |
[STYLE OF PHOTO] photo of a [SUBJECT], [IMPORTANT | |
FEATURE], [MORE DETAILS], [POSE OR ACTION], | |
[FRAMING], [SETTING/BACKGROUND], [LIGHTING], | |
[CAMERA ANGLE], [CAMERA PROPERTIES],in style of | |
[PHOTOGRAPHER], | |
""" | |
def generate_prompt_with_vision(image_path, prompt=PROMPT, api_key=OPENAI_API_KEY ): | |
"""Generate a prompt for Generative AI APIs with the given image and prompt.""" | |
# Getting the base64 string | |
print('Encoding image...') | |
base64_image = encode_image(image_path) | |
print("Encoded image. ") | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
} | |
} | |
] | |
} | |
], | |
"max_tokens": 300 | |
} | |
print('Creating an special prompt using Vision from OpenAI...') | |
response = requests.post( | |
"https://api.openai.com/v1/chat/completions", | |
headers=headers, | |
json=payload, | |
timeout=30) | |
print(response.status_code) | |
print(response.text) | |
print(response.json()) | |
return response.json()['choices'][0]['message']['content'] | |