Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 20, 2024

Commit

9fc4609

verified ·

1 Parent(s): c6942c2

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -59

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import streamlit as st
-from transformers import pipeline
-from huggingface_hub import InferenceClient
 from PIL import Image
 import base64
 # Streamlit page setup
 st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
@@ -17,22 +17,21 @@ st.subheader('Image Alt Text Creator')
 # Retrieve the Hugging Face API Key from secrets
 huggingface_api_key = st.secrets["huggingface_api_key"]
-# Initialize the image captioning pipeline
-image_captioner = pipeline(
-    "image-to-text",
-    model="Salesforce/blip-image-captioning-large",
-    use_auth_token=huggingface_api_key
-)
-# Initialize the language model client
-client = InferenceClient(token=huggingface_api_key)
 # File uploader allows user to add their own image
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file:
     # Display the uploaded image
-    image = Image.open(uploaded_file)
     image_width = 200  # Set the desired width in pixels
     with st.expander("Image", expanded=True):
         st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
@@ -64,58 +63,75 @@ complex_image_prompt_text = (
     "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
 )
 # Check if an image has been uploaded and if the button has been pressed
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image..."):
-        # Get the caption from the image using the image captioning model
-        caption_response = image_captioner(image)
-        image_caption = caption_response[0]['generated_text']
-        # Determine which prompt to use based on the complexity of the image
-        if complex_image:
-            prompt_text = complex_image_prompt_text
         else:
-            prompt_text = (
-                "As an expert in image accessibility and alternative text, succinctly describe the image caption provided in less than 125 characters. "
-                "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
-                "Skip phrases like 'image of' or 'picture of.' "
-                "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
-            )
-        # Include additional details if provided
-        if additional_details:
-            prompt_text += f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
-        # Create the prompt for the language model
-        full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
-        # Prepare messages for chat interface
-        messages = [
-            {"role": "user", "content": full_prompt}
-        ]
-        # Use the language model to generate the alt text description
-        try:
-            # Stream the response from the language model
-            stream = client.chat(
-                model="meta-llama/Llama-2-7b-chat-hf",
-                messages=messages,
-                stream=True
-            )
-            # Stream the response
-            full_response = ""
-            message_placeholder = st.empty()
-            for chunk in stream:
-                if 'generated_text' in chunk:
-                    content = chunk['generated_text']
-                    full_response += content
-                    message_placeholder.markdown(full_response + "▌")
-            # Final update after stream ends
-            message_placeholder.markdown(full_response)
-            st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
 else:
     st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")

 import streamlit as st
+import requests
 from PIL import Image
 import base64
+import io
 # Streamlit page setup
 st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto")
 # Retrieve the Hugging Face API Key from secrets
 huggingface_api_key = st.secrets["huggingface_api_key"]
+# API endpoints
+API_URL_CAPTION = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+API_URL_LLM = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
+headers = {
+    "Authorization": f"Bearer {huggingface_api_key}",
+    "Content-Type": "application/json"
+}
 # File uploader allows user to add their own image
 uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 if uploaded_file:
     # Display the uploaded image
+    image = Image.open(uploaded_file).convert('RGB')
     image_width = 200  # Set the desired width in pixels
     with st.expander("Image", expanded=True):
         st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
     "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
 )
+# Functions to query the Hugging Face Inference API
+def query_image_caption(image):
+    # Convert PIL image to bytes
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    image_bytes = buffered.getvalue()
+    response = requests.post(API_URL_CAPTION, headers={"Authorization": f"Bearer {huggingface_api_key}"}, data=image_bytes)
+    return response.json()
+def query_llm(prompt):
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 500,
+            "return_full_text": False,
+            "do_sample": True,
+            "temperature": 0.7,
+            "top_p": 0.9
+        },
+        "options": {
+            "wait_for_model": True
+        }
+    }
+    response = requests.post(API_URL_LLM, headers=headers, json=payload)
+    return response.json()
 # Check if an image has been uploaded and if the button has been pressed
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image..."):
+        # Get the caption from the image using the image captioning API
+        caption_response = query_image_caption(image)
+        # Handle potential errors from the API
+        if isinstance(caption_response, dict) and caption_response.get("error"):
+            st.error(f"Error with image captioning model: {caption_response['error']}")
         else:
+            image_caption = caption_response[0]['generated_text']
+            # Determine which prompt to use based on the complexity of the image
+            if complex_image:
+                prompt_text = complex_image_prompt_text
+            else:
+                prompt_text = (
+                    "As an expert in image accessibility and alternative text, succinctly describe the image caption provided in less than 125 characters. "
+                    "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
+                    "Skip phrases like 'image of' or 'picture of.' "
+                    "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
+                )
+            # Include additional details if provided
+            if additional_details:
+                prompt_text += f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
+            # Create the prompt for the language model
+            full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
+            # Use the language model to generate the alt text description
+            llm_response = query_llm(full_prompt)
+            # Handle potential errors from the API
+            if isinstance(llm_response, dict) and llm_response.get("error"):
+                st.error(f"Error with language model: {llm_response['error']}")
+            else:
+                generated_text = llm_response[0]['generated_text'].strip()
+                st.markdown("### Generated Alt Text:")
+                st.write(generated_text)
+                st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
 else:
     st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")