Spaces:

ProfessorLeVesseur
/

VisionTexts

Sleeping

App Files Files Community

ProfessorLeVesseur commited on Nov 20, 2024

Commit

bbec170

verified ·

1 Parent(s): 4c77641

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -89

app.py CHANGED Viewed

@@ -1,12 +1,6 @@
 import streamlit as st
-import base64
-import json
 import requests
-# Function to read the image file
-def get_image_bytes(image_file):
-    return image_file.read()
 # Streamlit page setup
 st.set_page_config(
     page_title="MTSS Image Accessibility Alt Text Generator",
@@ -39,7 +33,7 @@ show_details = st.checkbox("Add details about the image.", value=False)
 if show_details:
     # Text input for additional details about the image
     additional_details = st.text_area(
-        "The details could include specific information that is important to include in the alt text or reflect why the image is being used:",
     )
 # Toggle for modifying the prompt for complex images
@@ -48,101 +42,74 @@ complex_image = st.checkbox("Is this a complex image?", value=False)
 if complex_image:
     # Caption explaining the impact of the complex image toggle
     st.caption(
-        "By clicking this toggle, it will instruct the app to create a description that exceeds the 125-character limit. "
-        "Add the description in a placeholder behind the image and 'Description in the content placeholder' in the alt text box."
     )
 # Button to trigger the analysis
 analyze_button = st.button("Analyze the Image")
-# Optimized prompt for complex images
-complex_image_prompt_text = (
-    "As an expert in image accessibility and alternative text, thoroughly describe the image provided. "
-    "Provide a brief description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
-    "Skip phrases like 'image of' or 'picture of.' "
-    "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative."
-)
 # Check if an image has been uploaded and if the analyze button has been pressed
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
         # Read the image bytes
-        image_bytes = get_image_bytes(uploaded_file)
-        # Detect the image content type
-        import imghdr
-        image_type = imghdr.what(None, h=image_bytes)
-        if image_type is None:
-            st.error("Unsupported image type. Please upload a JPEG or PNG image.")
-        else:
-            content_type = f"image/{image_type}"
-            # Determine which prompt to use based on the complexity of the image
-            if complex_image:
-                prompt_text = complex_image_prompt_text
-            else:
-                prompt_text = (
-                    "As an expert in image accessibility and alternative text, succinctly describe the image provided in less than 125 characters. "
-                    "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. "
-                    "Skip phrases like 'image of' or 'picture of.' "
-                    "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes."
-                )
-            if show_details and additional_details:
-                prompt_text += (
-                    f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}"
-                )
-            # Create the payload for the completion request
-            messages = [
-                {
-                    "role": "user",
-                    "content": prompt_text,
-                }
-            ]
-            # Prepare headers and endpoint
-            headers = {
-                "Authorization": f"Bearer {api_key}"
-            }
-            api_url = "https://api-inference.huggingface.co/v1/chat/completions"
-            # Prepare the data payload
-            payload = {
-                "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
-                "messages": messages,
-                "max_tokens": 500
-            }
-            # Make the request to the Hugging Face API
-            try:
-                # Send the request with the image file in the 'files' parameter
-                response = requests.post(
-                    api_url,
-                    headers=headers,
-                    data={"data": json.dumps(payload)},
-                    files={"file": ("image", image_bytes, content_type)},
-                    timeout=60  # Optional: increase timeout if needed
-                )
-                # Check for errors
-                response.raise_for_status()
-                # Parse the response
-                completion = response.json()
-                # Extract the assistant's response
-                assistant_response = completion['choices'][0]['message']['content']
                 # Display the response
                 st.markdown(assistant_response)
                 st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
-            except requests.exceptions.HTTPError as http_err:
-                st.error(f"HTTP error occurred: {http_err}")
-            except Exception as e:
-                st.error(f"An error occurred: {e}")
 else:
     # Warning for user action required
     if not uploaded_file and analyze_button:

 import streamlit as st
 import requests
 # Streamlit page setup
 st.set_page_config(
     page_title="MTSS Image Accessibility Alt Text Generator",
 if show_details:
     # Text input for additional details about the image
     additional_details = st.text_area(
+        "Include any specific information that is important to include in the alt text or reflect why the image is being used:",
     )
 # Toggle for modifying the prompt for complex images
 if complex_image:
     # Caption explaining the impact of the complex image toggle
     st.caption(
+        "By selecting this option, the app will create a detailed description that may exceed the typical 125-character limit for alt text."
     )
 # Button to trigger the analysis
 analyze_button = st.button("Analyze the Image")
 # Check if an image has been uploaded and if the analyze button has been pressed
 if uploaded_file is not None and analyze_button:
     with st.spinner("Analyzing the image ..."):
         # Read the image bytes
+        image_bytes = uploaded_file.read()
+        # Decide on the model to use
+        model_id = "Salesforce/blip-image-captioning-base"  # You can choose another model if desired
+        # Prepare headers and endpoint
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/octet-stream"
+        }
+        api_url = f"https://api-inference.huggingface.co/models/{model_id}"
+        # Prepare the parameters
+        parameters = {
+            # "max_length": 50,  # Adjust as needed
+            # "num_return_sequences": 1,
+        }
+        # Include additional details in the prompt if provided
+        if show_details and additional_details:
+            prompt_text = f"{additional_details}"
+            parameters["inputs"] = prompt_text
+        # Make the request to the Hugging Face API
+        try:
+            # Send the request with the image bytes
+            response = requests.post(
+                api_url,
+                headers=headers,
+                data=image_bytes,
+                params=parameters,
+                timeout=60  # Optional: increase timeout if needed
+            )
+            # Check for errors
+            response.raise_for_status()
+            # Parse the response
+            completion = response.json()
+            # Extract the generated description
+            if isinstance(completion, list) and "generated_text" in completion[0]:
+                assistant_response = completion[0]["generated_text"]
+                # Adjust the description based on complexity
+                if not complex_image and len(assistant_response) > 125:
+                    assistant_response = assistant_response[:125] + "..."
                 # Display the response
                 st.markdown(assistant_response)
                 st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
+            else:
+                st.error("Unexpected response format from the API.")
+        except requests.exceptions.HTTPError as http_err:
+            st.error(f"HTTP error occurred: {http_err}")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
 else:
     # Warning for user action required
     if not uploaded_file and analyze_button: