mrbeliever commited on
Commit
8ffbf51
1 Parent(s): e3cb9d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -75
app.py CHANGED
@@ -1,92 +1,69 @@
1
- import streamlit as st
2
- import base64
3
  import os
4
- import requests
 
5
  from PIL import Image
6
- from io import BytesIO
7
-
8
- # Function to compress and resize the image before base64 encoding
9
- def compress_and_resize_image(image, max_size=(1024, 1024), quality=85):
10
- img = Image.open(image)
11
- img.thumbnail(max_size) # Resize image while maintaining aspect ratio
12
- with BytesIO() as byte_io:
13
- img.save(byte_io, format="JPEG", quality=quality) # Save with reduced quality
14
- byte_io.seek(0)
15
- return byte_io
16
-
17
- # Function to convert uploaded image to base64
18
- def convert_image_to_base64(image):
19
- compressed_image = compress_and_resize_image(image)
20
- image_bytes = compressed_image.read()
21
- encoded_image = base64.b64encode(image_bytes).decode("utf-8")
22
- return encoded_image
23
 
24
- # Function to generate caption using Nebius API
25
- def generate_caption(encoded_image):
26
- API_URL = "https://api.studio.nebius.ai/v1/chat/completions"
27
- API_KEY = os.environ.get("NEBIUS_API_KEY")
 
28
 
29
- headers = {
30
- "Authorization": f"Bearer {API_KEY}",
31
- "Content-Type": "application/json"
32
- }
33
-
34
- payload = {
35
- "model": "Qwen/Qwen2-VL-72B-Instruct",
36
- "messages": [
37
  {
38
  "role": "system",
39
  "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
40
  },
41
  {
42
  "role": "user",
43
- "content": "Write a caption for this image"
44
- },
45
- {
46
- "role": "user",
47
- "content": f"data:image/png;base64,{encoded_image}" # This is where the image is passed as base64 directly
 
 
 
 
 
 
 
48
  }
49
  ],
50
- "temperature": 0
51
- }
 
 
 
52
 
53
- # Send request to Nebius API
54
- response = requests.post(API_URL, headers=headers, json=payload)
 
55
 
56
- if response.status_code == 200:
57
- result = response.json()
58
- caption = result.get("choices", [{}])[0].get("message", {}).get("content", "No caption generated.")
59
- return caption
60
- else:
61
- st.error(f"API Error {response.status_code}: {response.text}")
62
- return None
63
-
64
- # Streamlit app layout
65
- def main():
66
- st.set_page_config(page_title="Image Caption Generator", layout="centered", initial_sidebar_state="collapsed")
67
- st.title("🖼️ Image to Caption Generator")
68
-
69
- uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
70
-
71
- if uploaded_file:
72
- # Display the uploaded image
73
- st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
74
 
75
- if st.button("Generate Caption"):
76
- # Convert the uploaded image to base64
77
- with st.spinner("Generating caption..."):
78
- encoded_image = convert_image_to_base64(uploaded_file)
 
 
 
 
 
79
 
80
- # Debugging: Ensure the encoded image is valid and not too large
81
- st.write(f"Encoded image length: {len(encoded_image)} characters")
 
82
 
83
- # Get the generated caption from the API
84
- caption = generate_caption(encoded_image)
85
-
86
- if caption:
87
- st.subheader("Generated Caption:")
88
- st.text_area("", caption, height=100, key="caption_area")
89
- st.success("Caption generated successfully!")
90
-
91
- if __name__ == "__main__":
92
- main()
 
 
 
1
  import os
2
+ import streamlit as st
3
+ from openai import OpenAI
4
  from PIL import Image
5
+ import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Set up the OpenAI client
8
+ client = OpenAI(
9
+ base_url="https://api.studio.nebius.ai/v1/",
10
+ api_key=os.environ.get("NEBIUS_API_KEY")
11
+ )
12
 
13
+ # Function to generate caption from image URL
14
+ def generate_caption(image_data):
15
+ completion = client.chat.completions.create(
16
+ model="Qwen/Qwen2-VL-72B-Instruct",
17
+ messages=[
 
 
 
18
  {
19
  "role": "system",
20
  "content": """You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 75 words in this format: [image content/subject, description of action, state, and mood], [art form, style], [artist/photographer reference if needed], [additional settings such as camera and lens settings, lighting, colors, effects, texture, background, rendering]."""
21
  },
22
  {
23
  "role": "user",
24
+ "content": [
25
+ {
26
+ "type": "text",
27
+ "text": """Write a caption for this image"""
28
+ },
29
+ {
30
+ "type": "image_url",
31
+ "image_url": {
32
+ "url": image_data
33
+ }
34
+ }
35
+ ]
36
  }
37
  ],
38
+ temperature=0
39
+ )
40
+
41
+ caption = completion.to_json().get("choices", [{}])[0].get("message", {}).get("content", "")
42
+ return caption
43
 
44
+ # Streamlit UI
45
+ st.title("Image to Caption Generator")
46
+ st.write("Upload an image, and the app will generate a detailed caption for it.")
47
 
48
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ if uploaded_file is not None:
51
+ # Display the uploaded image
52
+ image = Image.open(uploaded_file)
53
+ st.image(image, caption="Uploaded Image", use_column_width=True)
54
+
55
+ # Convert image to a base64 string
56
+ buffered = io.BytesIO()
57
+ image.save(buffered, format="PNG")
58
+ img_base64 = buffered.getvalue().decode("utf-8")
59
 
60
+ # Generate caption using the OpenAI API
61
+ st.write("Generating caption...")
62
+ caption = generate_caption(img_base64)
63
 
64
+ # Display the generated caption
65
+ if caption:
66
+ st.subheader("Generated Caption:")
67
+ st.write(caption)
68
+ else:
69
+ st.write("No caption could be generated.")