ProfessorLeVesseur commited on
Commit
ee54df8
·
verified ·
1 Parent(s): 1e06fb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -36
app.py CHANGED
@@ -1,10 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import requests
3
  from PIL import Image
4
  import io
 
5
 
6
  # Streamlit page setup
7
- st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
8
 
9
  # Add the logo image with a specified width
10
  image_width = 300 # Set the desired width in pixels
@@ -16,14 +145,8 @@ st.subheader('Image Alt Text Creator')
16
  # Retrieve the Hugging Face API Key from secrets
17
  huggingface_api_key = st.secrets["huggingface_api_key"]
18
 
19
- # API endpoints
20
- API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
21
- API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
22
-
23
- headers = {
24
- "Authorization": f"Bearer {huggingface_api_key}",
25
- "Content-Type": "application/json"
26
- }
27
 
28
  # File uploader allows user to add their own image
29
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
@@ -60,32 +183,48 @@ complex_image_prompt_text = (
60
  )
61
 
62
  # Functions to query the Hugging Face Inference API
 
63
  def query_image_caption(image):
64
  # Convert PIL image to bytes
65
  buffered = io.BytesIO()
66
  image.save(buffered, format="JPEG")
67
  image_bytes = buffered.getvalue()
68
 
69
- response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
70
- return response.json()
 
 
 
 
 
71
 
72
  def query_llm(prompt):
73
- payload = {
74
- "inputs": prompt,
75
- "parameters": {
76
- "max_new_tokens": 500,
77
- "return_full_text": False,
78
- "do_sample": True,
79
- "temperature": 0.7,
80
- "top_p": 0.9
81
- },
82
- "options": {
83
- "wait_for_model": True
84
- }
85
- }
86
-
87
- response = requests.post(API_URL_LLM, headers=headers, json=payload)
88
- return response.json()
 
 
 
 
 
 
 
 
 
 
89
 
90
  # Check if an image has been uploaded and if the button has been pressed
91
  if uploaded_file is not None and analyze_button:
@@ -97,6 +236,7 @@ if uploaded_file is not None and analyze_button:
97
  if isinstance(caption_response, dict) and caption_response.get("error"):
98
  st.error(f"Error with image captioning model: {caption_response['error']}")
99
  else:
 
100
  image_caption = caption_response[0]['generated_text']
101
 
102
  # Use the complex image prompt text
@@ -106,20 +246,16 @@ if uploaded_file is not None and analyze_button:
106
  if additional_details:
107
  prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
108
 
109
- # Create the prompt for the language model
110
  full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
111
 
112
  # Use the language model to generate the alt text description
113
  llm_response = query_llm(full_prompt)
114
 
115
- # Handle potential errors from the API
116
- if isinstance(llm_response, dict) and llm_response.get("error"):
117
- st.error(f"Error with language model: {llm_response['error']}")
118
- else:
119
- generated_text = llm_response[0]['generated_text'].strip()
120
- st.markdown("### Generated Alt Text:")
121
- st.write(generated_text)
122
 
123
- st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
124
  else:
125
  st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")
 
1
+ # import streamlit as st
2
+ # import requests
3
+ # from PIL import Image
4
+ # import io
5
+
6
+ # # Streamlit page setup
7
+ # st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
8
+
9
+ # # Add the logo image with a specified width
10
+ # image_width = 300 # Set the desired width in pixels
11
+ # st.image('MTSS.ai_Logo.png', width=image_width)
12
+
13
+ # st.header('VisionTexts™ | Accessibility')
14
+ # st.subheader('Image Alt Text Creator')
15
+
16
+ # # Retrieve the Hugging Face API Key from secrets
17
+ # huggingface_api_key = st.secrets["huggingface_api_key"]
18
+
19
+ # # API endpoints
20
+ # # API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
21
+ # API_URL_CAPTION = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
22
+ # API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
23
+
24
+ # headers = {
25
+ # "Authorization": f"Bearer {huggingface_api_key}",
26
+ # "Content-Type": "application/json"
27
+ # }
28
+
29
+ # # File uploader allows user to add their own image
30
+ # uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
31
+
32
+ # if uploaded_file:
33
+ # # Display the uploaded image
34
+ # image = Image.open(uploaded_file).convert('RGB')
35
+ # image_width = 200 # Set the desired width in pixels
36
+ # with st.expander("Image", expanded=True):
37
+ # st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
38
+ # else:
39
+ # st.warning("Please upload an image.")
40
+
41
+ # # Option for adding additional details
42
+ # show_details = st.checkbox("Add additional details about the image.", value=False)
43
+
44
+ # if show_details:
45
+ # # Text input for additional details about the image
46
+ # additional_details = st.text_area(
47
+ # "Provide specific information that is important to include in the alt text or reflect why the image is being used:"
48
+ # )
49
+ # else:
50
+ # additional_details = ""
51
+
52
+ # # Button to trigger the analysis
53
+ # analyze_button = st.button("Analyze the Image", type="secondary")
54
+
55
+ # # Prompt for complex image description
56
+ # complex_image_prompt_text = (
57
+ # "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
58
+ # "Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
59
+ # "Skip phrases like 'image of' or 'picture of.' "
60
+ # "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
61
+ # )
62
+
63
+ # # Functions to query the Hugging Face Inference API
64
+ # def query_image_caption(image):
65
+ # # Convert PIL image to bytes
66
+ # buffered = io.BytesIO()
67
+ # image.save(buffered, format="JPEG")
68
+ # image_bytes = buffered.getvalue()
69
+
70
+ # response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
71
+ # return response.json()
72
+
73
+ # def query_llm(prompt):
74
+ # payload = {
75
+ # "inputs": prompt,
76
+ # "parameters": {
77
+ # "max_new_tokens": 500,
78
+ # "return_full_text": False,
79
+ # "do_sample": True,
80
+ # "temperature": 0.7,
81
+ # "top_p": 0.9
82
+ # },
83
+ # "options": {
84
+ # "wait_for_model": True
85
+ # }
86
+ # }
87
+
88
+ # response = requests.post(API_URL_LLM, headers=headers, json=payload)
89
+ # return response.json()
90
+
91
+ # # Check if an image has been uploaded and if the button has been pressed
92
+ # if uploaded_file is not None and analyze_button:
93
+ # with st.spinner("Analyzing the image..."):
94
+ # # Get the caption from the image using the image captioning API
95
+ # caption_response = query_image_caption(image)
96
+
97
+ # # Handle potential errors from the API
98
+ # if isinstance(caption_response, dict) and caption_response.get("error"):
99
+ # st.error(f"Error with image captioning model: {caption_response['error']}")
100
+ # else:
101
+ # image_caption = caption_response[0]['generated_text']
102
+
103
+ # # Use the complex image prompt text
104
+ # prompt_text = complex_image_prompt_text
105
+
106
+ # # Include additional details if provided
107
+ # if additional_details:
108
+ # prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
109
+
110
+ # # Create the prompt for the language model
111
+ # full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
112
+
113
+ # # Use the language model to generate the alt text description
114
+ # llm_response = query_llm(full_prompt)
115
+
116
+ # # Handle potential errors from the API
117
+ # if isinstance(llm_response, dict) and llm_response.get("error"):
118
+ # st.error(f"Error with language model: {llm_response['error']}")
119
+ # else:
120
+ # generated_text = llm_response[0]['generated_text'].strip()
121
+ # st.markdown("### Generated Alt Text:")
122
+ # st.write(generated_text)
123
+
124
+ # st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
125
+ # else:
126
+ # st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")
127
+
128
+
129
  import streamlit as st
130
  import requests
131
  from PIL import Image
132
  import io
133
+ from huggingface_hub import InferenceClient
134
 
135
  # Streamlit page setup
136
+ st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered")
137
 
138
  # Add the logo image with a specified width
139
  image_width = 300 # Set the desired width in pixels
 
145
  # Retrieve the Hugging Face API Key from secrets
146
  huggingface_api_key = st.secrets["huggingface_api_key"]
147
 
148
+ # Initialize the Hugging Face inference client
149
+ client = InferenceClient(api_token=huggingface_api_key)
 
 
 
 
 
 
150
 
151
  # File uploader allows user to add their own image
152
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 
183
  )
184
 
185
  # Functions to query the Hugging Face Inference API
186
+
187
  def query_image_caption(image):
188
  # Convert PIL image to bytes
189
  buffered = io.BytesIO()
190
  image.save(buffered, format="JPEG")
191
  image_bytes = buffered.getvalue()
192
 
193
+ # Use the InferenceClient to query the image captioning model
194
+ response = client.post(
195
+ model="Salesforce/blip-image-captioning-large",
196
+ data=image_bytes,
197
+ headers={"Content-Type": "application/octet-stream"},
198
+ )
199
+ return response
200
 
201
  def query_llm(prompt):
202
+ # System prompt (optional)
203
+ system_prompt = "You are an expert in image accessibility and alternative text."
204
+
205
+ # Generate the response using the Hugging Face InferenceClient's chat completion
206
+ response = client.chat_completions.create(
207
+ model="meta-llama/Llama-2-7b-chat-hf",
208
+ messages=[
209
+ {"role": "system", "content": system_prompt}, # Optional system prompt
210
+ {"role": "user", "content": prompt}
211
+ ],
212
+ stream=True,
213
+ temperature=0.5,
214
+ max_tokens=1024,
215
+ top_p=0.7
216
+ )
217
+
218
+ # Collect the streamed response
219
+ response_content = ""
220
+ for message in response:
221
+ if "choices" in message and len(message["choices"]) > 0:
222
+ delta = message["choices"][0].get("delta", {})
223
+ content = delta.get("content", "")
224
+ response_content += content
225
+ # Optionally, you can update the progress to the user here
226
+
227
+ return response_content.strip()
228
 
229
  # Check if an image has been uploaded and if the button has been pressed
230
  if uploaded_file is not None and analyze_button:
 
236
  if isinstance(caption_response, dict) and caption_response.get("error"):
237
  st.error(f"Error with image captioning model: {caption_response['error']}")
238
  else:
239
+ # Extract the generated caption
240
  image_caption = caption_response[0]['generated_text']
241
 
242
  # Use the complex image prompt text
 
246
  if additional_details:
247
  prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
248
 
249
+ # Create the full prompt
250
  full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
251
 
252
  # Use the language model to generate the alt text description
253
  llm_response = query_llm(full_prompt)
254
 
255
+ # Display the generated alt text
256
+ st.markdown("### Generated Alt Text:")
257
+ st.write(llm_response)
 
 
 
 
258
 
259
+ st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
260
  else:
261
  st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")