import base64 import os from io import BytesIO import gradio as gr from huggingface_hub import InferenceClient PROMPT = os.environ.get("PROMPT", "Describe this image.") client = InferenceClient(model="https://text.pollinations.ai/openai") def image_to_base64(image): buf = BytesIO() image.save(buf, "JPEG") buf.seek(0) return base64.b64encode(buf.getvalue()).decode("utf-8") def caption(image, prompt): image = image_to_base64(image) return client.chat.completions.create( model="openai-large", messages=[ { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"} }, {"type": "text", "text": prompt} ] } ], max_tokens=1024 ).choices[0].message.content gr.Interface( caption, inputs=[ gr.Image(type="pil", label="Image"), gr.TextArea(label="Prompt", value=PROMPT) ], outputs=gr.Textbox(label="Caption", show_copy_button=True), title="Image Captioning" ).launch(debug=True)