Spaces:
Sleeping
Sleeping
app.py
Browse files
app.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import base64
|
3 |
+
from openai import OpenAI
|
4 |
+
|
5 |
+
# Function to encode the image to base64
|
6 |
+
def encode_image(image_file):
|
7 |
+
return base64.b64encode(image_file.getvalue()).decode("utf-8")
|
8 |
+
|
9 |
+
st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="collapsed")
|
10 |
+
# Streamlit page setup
|
11 |
+
st.title("MTSS Snapshot: Accessibility Image Textifier: `Alt Text`")
|
12 |
+
|
13 |
+
# Retrieve the OpenAI API key from Streamlit secrets
|
14 |
+
openai_api_key = st.secrets["openai_api_key"]
|
15 |
+
|
16 |
+
# Initialize the OpenAI client with the API key
|
17 |
+
client = OpenAI(api_key=openai_api_key)
|
18 |
+
|
19 |
+
# File uploader allows user to add their own image
|
20 |
+
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
|
21 |
+
|
22 |
+
if uploaded_file:
|
23 |
+
# Display the uploaded image
|
24 |
+
with st.expander("Image", expanded = True):
|
25 |
+
st.image(uploaded_file, caption=uploaded_file.name, use_column_width=True)
|
26 |
+
|
27 |
+
# Toggle for showing additional details input
|
28 |
+
show_details = st.toggle("Add details about the image", value=False)
|
29 |
+
|
30 |
+
if show_details:
|
31 |
+
# Text input for additional details about the image, shown only if toggle is True
|
32 |
+
additional_details = st.text_area(
|
33 |
+
"Add any additional details or context about the image here:",
|
34 |
+
disabled=not show_details
|
35 |
+
)
|
36 |
+
|
37 |
+
# Button to trigger the analysis
|
38 |
+
analyze_button = st.button("Analyse the Scientific Image", type="secondary")
|
39 |
+
|
40 |
+
# Check if an image has been uploaded, if the API key is available, and if the button has been pressed
|
41 |
+
if uploaded_file is not None and api_key and analyze_button:
|
42 |
+
|
43 |
+
with st.spinner("Analysing the image ..."):
|
44 |
+
# Encode the image
|
45 |
+
base64_image = encode_image(uploaded_file)
|
46 |
+
|
47 |
+
# Optimized prompt for additional clarity and detail
|
48 |
+
prompt_text = (
|
49 |
+
"You are a highly knowledgeable accessibility specialist. "
|
50 |
+
"Your task is to examine the following image in detail. "
|
51 |
+
"Provide a comprehensive, factual, and accurate explanation of what the image depicts. "
|
52 |
+
"Highlight key elements and their significance, and present your analysis in clear, well-structured format. "
|
53 |
+
"Assume the reader has a basic understanding of scientific concepts."
|
54 |
+
"Create a detailed image caption using 150 words or less."
|
55 |
+
)
|
56 |
+
|
57 |
+
if show_details and additional_details:
|
58 |
+
prompt_text += (
|
59 |
+
f"\n\nAdditional Context Provided by the User:\n{additional_details}"
|
60 |
+
)
|
61 |
+
|
62 |
+
# Create the payload for the completion request
|
63 |
+
messages = [
|
64 |
+
{
|
65 |
+
"role": "user",
|
66 |
+
"content": [
|
67 |
+
{"type": "text", "text": prompt_text},
|
68 |
+
{
|
69 |
+
"type": "image_url",
|
70 |
+
"image_url": f"data:image/jpeg;base64,{base64_image}",
|
71 |
+
},
|
72 |
+
],
|
73 |
+
}
|
74 |
+
]
|
75 |
+
|
76 |
+
# Make the request to the OpenAI API
|
77 |
+
try:
|
78 |
+
# Without Stream
|
79 |
+
|
80 |
+
# response = client.chat.completions.create(
|
81 |
+
# model="gpt-4-vision-preview", messages=messages, max_tokens=500, stream=False
|
82 |
+
# )
|
83 |
+
|
84 |
+
# Stream the response
|
85 |
+
full_response = ""
|
86 |
+
message_placeholder = st.empty()
|
87 |
+
for completion in client.chat.completions.create(
|
88 |
+
model="gpt-4-vision-preview", messages=messages,
|
89 |
+
max_tokens=1200, stream=True
|
90 |
+
):
|
91 |
+
# Check if there is content to display
|
92 |
+
if completion.choices[0].delta.content is not None:
|
93 |
+
full_response += completion.choices[0].delta.content
|
94 |
+
message_placeholder.markdown(full_response + "▌")
|
95 |
+
# Final update to placeholder after the stream ends
|
96 |
+
message_placeholder.markdown(full_response)
|
97 |
+
|
98 |
+
# Display the response in the app
|
99 |
+
# st.write(response.choices[0].message.content)
|
100 |
+
except Exception as e:
|
101 |
+
st.error(f"An error occurred: {e}")
|
102 |
+
else:
|
103 |
+
# Warnings for user action required
|
104 |
+
if not uploaded_file and analyze_button:
|
105 |
+
st.warning("Please upload an image.")
|
106 |
+
if not api_key:
|
107 |
+
st.warning("Please enter your OpenAI API key.")
|