Spaces:

mrbeliever
/

Im-prmpt

Running

App Files Files Community

mrbeliever commited on Dec 4, 2024

Commit

8ffbf51

verified ·

1 Parent(s): e3cb9d7

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -75

app.py CHANGED Viewed

@@ -1,92 +1,69 @@
-import streamlit as st
-import base64
 import os
-import requests
 from PIL import Image
-from io import BytesIO
-# Function to compress and resize the image before base64 encoding
-def compress_and_resize_image(image, max_size=(1024, 1024), quality=85):
-    img = Image.open(image)
-    img.thumbnail(max_size)  # Resize image while maintaining aspect ratio
-    with BytesIO() as byte_io:
-        img.save(byte_io, format="JPEG", quality=quality)  # Save with reduced quality
-        byte_io.seek(0)
-        return byte_io
-# Function to convert uploaded image to base64
-def convert_image_to_base64(image):
-    compressed_image = compress_and_resize_image(image)
-    image_bytes = compressed_image.read()
-    encoded_image = base64.b64encode(image_bytes).decode("utf-8")
-    return encoded_image
-# Function to generate caption using Nebius API
-def generate_caption(encoded_image):
-    API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
-    API_KEY = os.environ.get("NEBIUS_API_KEY")
-    headers = {
-        "Authorization": f"Bearer {API_KEY}",
-        "Content-Type": "application/json"
-    }
-    payload = {
-        "model": "Qwen/Qwen2-VL-72B-Instruct",
-        "messages": [
             {
                 "role": "system",
                 "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
             },
             {
                 "role": "user",
-                "content": "Write a caption for this image"
-            },
-            {
-                "role": "user",
-                "content": f"data:image/png;base64,{encoded_image}"  # This is where the image is passed as base64 directly
             }
         ],
-        "temperature": 0
-    }
-    # Send request to Nebius API
-    response = requests.post(API_URL, headers=headers, json=payload)
-    if response.status_code == 200:
-        result = response.json()
-        caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
-        return caption
-    else:
-        st.error(f"API Error {response.status_code}: {response.text}")
-        return None
-# Streamlit app layout
-def main():
-    st.set_page_config(page_title="Image Caption Generator", layout="centered", initial_sidebar_state="collapsed")
-    st.title("🖼️ Image to Caption Generator")
-    uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
-    if uploaded_file:
-        # Display the uploaded image
-        st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
-        if st.button("Generate Caption"):
-            # Convert the uploaded image to base64
-            with st.spinner("Generating caption..."):
-                encoded_image = convert_image_to_base64(uploaded_file)
-                # Debugging: Ensure the encoded image is valid and not too large
-                st.write(f"Encoded image length: {len(encoded_image)} characters")
-                # Get the generated caption from the API
-                caption = generate_caption(encoded_image)
-                if caption:
-                    st.subheader("Generated Caption:")
-                    st.text_area("", caption, height=100, key="caption_area")
-                    st.success("Caption generated successfully!")
-if __name__ == "__main__":
-    main()

 import os
+import streamlit as st
+from openai import OpenAI
 from PIL import Image
+import io
+# Set up the OpenAI client
+client = OpenAI(
+    base_url="https://api.studio.nebius.ai/v1/",
+    api_key=os.environ.get("NEBIUS_API_KEY")
+)
+# Function to generate caption from image URL
+def generate_caption(image_data):
+    completion = client.chat.completions.create(
+        model="Qwen/Qwen2-VL-72B-Instruct",
+        messages=[
             {
                 "role": "system",
                 "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
             },
             {
                 "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": """Write a caption for this image"""
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": image_data
+                        }
+                    }
+                ]
             }
         ],
+        temperature=0
+    )
+    caption = completion.to_json().get("choices", [{}])[0].get("message", {}).get("content", "")
+    return caption
+# Streamlit UI
+st.title("Image to Caption Generator")
+st.write("Upload an image, and the app will generate a detailed caption for it.")
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Display the uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Convert image to a base64 string
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    img_base64 = buffered.getvalue().decode("utf-8")
+    # Generate caption using the OpenAI API
+    st.write("Generating caption...")
+    caption = generate_caption(img_base64)
+    # Display the generated caption
+    if caption:
+        st.subheader("Generated Caption:")
+        st.write(caption)
+    else:
+        st.write("No caption could be generated.")