import streamlit as st from transformers import pipeline from huggingface_hub import InferenceClient from PIL import Image import base64 # Streamlit page setup st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered", initial_sidebar_state="auto") # Add the image with a specified width image_width = 300 # Set the desired width in pixels st.image('MTSS.ai_Logo.png', width=image_width) st.header('VisionTexts™ | Accessibility') st.subheader('Image Alt Text Creator') # Retrieve the Hugging Face API Key from secrets huggingface_api_key = st.secrets["huggingface_api_key"] # Initialize the image captioning pipeline image_captioner = pipeline( "image-to-text", model="Salesforce/blip-image-captioning-large", use_auth_token=huggingface_api_key ) # Initialize the language model client client = InferenceClient(token=huggingface_api_key) # File uploader allows user to add their own image uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) if uploaded_file: # Display the uploaded image image = Image.open(uploaded_file) image_width = 200 # Set the desired width in pixels with st.expander("Image", expanded=True): st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False) else: st.warning("Please upload an image.") # Toggle for showing additional details input show_details = st.checkbox("Add details about the image.", value=False) if show_details: # Text input for additional details about the image additional_details = st.text_area( "The details could include specific information that is important to include in the alt text or reflect why the image is being used:" ) else: additional_details = "" # Toggle for modifying the prompt for complex images complex_image = st.checkbox("Is this a complex image?", value=False) # Button to trigger the analysis analyze_button = st.button("Analyze the Image", type="secondary") # Optimized prompt for complex images complex_image_prompt_text = ( "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. " "Provide a brief description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. " "Skip phrases like 'image of' or 'picture of.' " "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative." ) # Check if an image has been uploaded and if the button has been pressed if uploaded_file is not None and analyze_button: with st.spinner("Analyzing the image..."): # Get the caption from the image using the image captioning model caption_response = image_captioner(image) image_caption = caption_response[0]['generated_text'] # Determine which prompt to use based on the complexity of the image if complex_image: prompt_text = complex_image_prompt_text else: prompt_text = ( "As an expert in image accessibility and alternative text, succinctly describe the image caption provided in less than 125 characters. " "Provide a brief description using not more than 125 characters that conveys the essential information in three or fewer clear and concise sentences for use as alt text. " "Skip phrases like 'image of' or 'picture of.' " "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points and newlines, focusing on creating a seamless narrative for accessibility purposes." ) # Include additional details if provided if additional_details: prompt_text += f"\n\nInclude the additional context provided by the user in your description:\n{additional_details}" # Create the prompt for the language model full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}" # Prepare messages for chat interface messages = [ {"role": "user", "content": full_prompt} ] # Use the language model to generate the alt text description try: # Stream the response from the language model stream = client.chat( model="meta-llama/Llama-2-7b-chat-hf", messages=messages, stream=True ) # Stream the response full_response = "" message_placeholder = st.empty() for chunk in stream: if 'generated_text' in chunk: content = chunk['generated_text'] full_response += content message_placeholder.markdown(full_response + "▌") # Final update after stream ends message_placeholder.markdown(full_response) st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.') except Exception as e: st.error(f"An error occurred: {e}") else: st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")