import gradio as gr from openai import OpenAI import requests from PIL import Image from io import BytesIO def process_text(api_key, example, question): if example: question=example client = OpenAI(api_key=api_key) image_response = client.images.generate(model="dall-e-3", prompt=question,size="1024x1024",quality="standard",n=1,) image_url = image_response.data[0].url response = requests.get(image_url) if response.status_code == 200: image = Image.open(BytesIO(response.content)) chat_response = client.chat.completions.create(model="gpt-4-vision-preview", messages=[{"role": "user","content": [{"type": "text", "text": question}, {"type": "image_url","image_url": {"url": image_url,},},],}],max_tokens=300,) answer = chat_response.choices[0].message.content return image, answer demo = gr.Interface( fn=process_text, inputs=[ gr.Textbox(label="Your API Key", type="password"), gr.Radio(["A group of people are crowded around in a living room talking to one another. A man in the foreground introduces two individuals one appears to be a regular human male the other appears to be an animal. What is unusual about this description?", \ "A woman is waiting to get on the elevator. But the people in the elevator are on fire. Where can this event take place?"], label="Example Question"), gr.Textbox(label="Question") ], outputs=[ gr.Image(type="pil", label="Image Generated by DALLĀ·E 3", image_mode="fixed", width=768, height=768), gr.Textbox(label="Answer")], title="Chain of Images for Intuitively Reasoning" ) if __name__ == "__main__": demo.launch(show_api=True)