ProfessorLeVesseur commited on
Commit
9fc4609
·
verified ·
1 Parent(s): c6942c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -59
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from huggingface_hub import InferenceClient
4
  from PIL import Image
5
  import base64
 
6
 
7
  # Streamlit page setup
8
  st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
@@ -17,22 +17,21 @@ st.subheader('Image Alt Text Creator')
17
  # Retrieve the Hugging Face API Key from secrets
18
  huggingface_api_key = st.secrets["huggingface_api_key"]
19
 
20
- # Initialize the image captioning pipeline
21
- image_captioner = pipeline(
22
- "image-to-text",
23
- model="Salesforce/blip-image-captioning-large",
24
- use_auth_token=huggingface_api_key
25
- )
26
 
27
- # Initialize the language model client
28
- client = InferenceClient(token=huggingface_api_key)
 
 
29
 
30
  # File uploader allows user to add their own image
31
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
32
 
33
  if uploaded_file:
34
  # Display the uploaded image
35
- image = Image.open(uploaded_file)
36
  image_width = 200 # Set the desired width in pixels
37
  with st.expander("Image", expanded=True):
38
  st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
@@ -64,58 +63,75 @@ complex_image_prompt_text = (
64
  "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
65
  )
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Check if an image has been uploaded and if the button has been pressed
68
  if uploaded_file is not None and analyze_button:
69
  with st.spinner("Analyzing the image..."):
70
- # Get the caption from the image using the image captioning model
71
- caption_response = image_captioner(image)
72
- image_caption = caption_response[0]['generated_text']
73
 
74
- # Determine which prompt to use based on the complexity of the image
75
- if complex_image:
76
- prompt_text = complex_image_prompt_text
77
  else:
78
- prompt_text = (
79
- "As an expert in image accessibility and alternative text, succinctly describe the image caption provided in less than 125 characters. "
80
- "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
81
- "Skip phrases like 'image of' or 'picture of.' "
82
- "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
83
- )
84
-
85
- # Include additional details if provided
86
- if additional_details:
87
- prompt_text += f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
88
-
89
- # Create the prompt for the language model
90
- full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
91
-
92
- # Prepare messages for chat interface
93
- messages = [
94
- {"role": "user", "content": full_prompt}
95
- ]
96
-
97
- # Use the language model to generate the alt text description
98
- try:
99
- # Stream the response from the language model
100
- stream = client.chat(
101
- model="meta-llama/Llama-2-7b-chat-hf",
102
- messages=messages,
103
- stream=True
104
- )
105
-
106
- # Stream the response
107
- full_response = ""
108
- message_placeholder = st.empty()
109
- for chunk in stream:
110
- if 'generated_text' in chunk:
111
- content = chunk['generated_text']
112
- full_response += content
113
- message_placeholder.markdown(full_response + "▌")
114
- # Final update after stream ends
115
- message_placeholder.markdown(full_response)
116
-
117
- st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
118
- except Exception as e:
119
- st.error(f"An error occurred: {e}")
120
  else:
121
  st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")
 
1
  import streamlit as st
2
+ import requests
 
3
  from PIL import Image
4
  import base64
5
+ import io
6
 
7
  # Streamlit page setup
8
  st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
 
17
  # Retrieve the Hugging Face API Key from secrets
18
  huggingface_api_key = st.secrets["huggingface_api_key"]
19
 
20
+ # API endpoints
21
+ API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
22
+ API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
 
 
 
23
 
24
+ headers = {
25
+ "Authorization": f"Bearer {huggingface_api_key}",
26
+ "Content-Type": "application/json"
27
+ }
28
 
29
  # File uploader allows user to add their own image
30
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
31
 
32
  if uploaded_file:
33
  # Display the uploaded image
34
+ image = Image.open(uploaded_file).convert('RGB')
35
  image_width = 200 # Set the desired width in pixels
36
  with st.expander("Image", expanded=True):
37
  st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
 
63
  "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
64
  )
65
 
66
+ # Functions to query the Hugging Face Inference API
67
+ def query_image_caption(image):
68
+ # Convert PIL image to bytes
69
+ buffered = io.BytesIO()
70
+ image.save(buffered, format="JPEG")
71
+ image_bytes = buffered.getvalue()
72
+
73
+ response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
74
+ return response.json()
75
+
76
+ def query_llm(prompt):
77
+ payload = {
78
+ "inputs": prompt,
79
+ "parameters": {
80
+ "max_new_tokens": 500,
81
+ "return_full_text": False,
82
+ "do_sample": True,
83
+ "temperature": 0.7,
84
+ "top_p": 0.9
85
+ },
86
+ "options": {
87
+ "wait_for_model": True
88
+ }
89
+ }
90
+
91
+ response = requests.post(API_URL_LLM, headers=headers, json=payload)
92
+ return response.json()
93
+
94
  # Check if an image has been uploaded and if the button has been pressed
95
  if uploaded_file is not None and analyze_button:
96
  with st.spinner("Analyzing the image..."):
97
+ # Get the caption from the image using the image captioning API
98
+ caption_response = query_image_caption(image)
 
99
 
100
+ # Handle potential errors from the API
101
+ if isinstance(caption_response, dict) and caption_response.get("error"):
102
+ st.error(f"Error with image captioning model: {caption_response['error']}")
103
  else:
104
+ image_caption = caption_response[0]['generated_text']
105
+
106
+ # Determine which prompt to use based on the complexity of the image
107
+ if complex_image:
108
+ prompt_text = complex_image_prompt_text
109
+ else:
110
+ prompt_text = (
111
+ "As an expert in image accessibility and alternative text, succinctly describe the image caption provided in less than 125 characters. "
112
+ "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
113
+ "Skip phrases like 'image of' or 'picture of.' "
114
+ "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
115
+ )
116
+
117
+ # Include additional details if provided
118
+ if additional_details:
119
+ prompt_text += f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
120
+
121
+ # Create the prompt for the language model
122
+ full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
123
+
124
+ # Use the language model to generate the alt text description
125
+ llm_response = query_llm(full_prompt)
126
+
127
+ # Handle potential errors from the API
128
+ if isinstance(llm_response, dict) and llm_response.get("error"):
129
+ st.error(f"Error with language model: {llm_response['error']}")
130
+ else:
131
+ generated_text = llm_response[0]['generated_text'].strip()
132
+ st.markdown("### Generated Alt Text:")
133
+ st.write(generated_text)
134
+
135
+ st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
 
 
 
 
 
 
 
 
 
 
136
  else:
137
  st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")