import os import streamlit as st from openai import OpenAI from PIL import Image import io # Set up the OpenAI client client = OpenAI( base_url="https://api.studio.nebius.ai/v1/", api_key=os.environ.get("NEBIUS_API_KEY") ) # Function to generate caption from image URL def generate_caption(image_data): completion = client.chat.completions.create( model="Qwen/Qwen2-VL-72B-Instruct", messages=[ { "role": "system", "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering].""" }, { "role": "user", "content": [ { "type": "text", "text": """Write a caption for this image""" }, { "type": "image_url", "image_url": { "url": image_data } } ] } ], temperature=0 ) caption = completion.to_json().get("choices", [{}])[0].get("message", {}).get("content", "") return caption # Streamlit UI st.title("Image to Caption Generator") st.write("Upload an image, and the app will generate a detailed caption for it.") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Display the uploaded image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) # Convert image to a base64 string buffered = io.BytesIO() image.save(buffered, format="PNG") img_base64 = buffered.getvalue().decode("utf-8") # Generate caption using the OpenAI API st.write("Generating caption...") caption = generate_caption(img_base64) # Display the generated caption if caption: st.subheader("Generated Caption:") st.write(caption) else: st.write("No caption could be generated.")