Spaces:
Sleeping
Sleeping
File size: 5,399 Bytes
5f1f9d6 9fc4609 0075144 9fc4609 ee54df8 7956ee3 5f1f9d6 ee54df8 5f1f9d6 1e06fb8 0075144 5f1f9d6 0075144 ee54df8 e4af4b8 0075144 5f1f9d6 7956ee3 9fc4609 0075144 5f1f9d6 0075144 5f1f9d6 1e06fb8 5f1f9d6 1e06fb8 5f1f9d6 0075144 5f1f9d6 0075144 5f1f9d6 1e06fb8 7956ee3 0075144 1e06fb8 7956ee3 71826d4 7956ee3 71826d4 9fc4609 ee54df8 9fc4609 dbe5bdc e264991 7f1c702 dbe5bdc ee54df8 9fc4609 ee54df8 7fac068 ee54df8 9fc4609 7956ee3 5f1f9d6 0075144 9fc4609 bbec170 9fc4609 7956ee3 c082e41 9fc4609 1e06fb8 9fc4609 1e06fb8 9fc4609 ee54df8 9fc4609 ee54df8 9fc4609 ee54df8 5f1f9d6 0075144 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
import requests
from PIL import Image
import io
from huggingface_hub import InferenceClient
# Streamlit page setup
st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered")
# Add the logo image with a specified width
image_width = 300 # Set the desired width in pixels
st.image('MTSS.ai_Logo.png', width=image_width)
st.header('VisionTexts™ | Accessibility')
st.subheader('Image Alt Text Creator')
# Retrieve the Hugging Face API Key from secrets
huggingface_api_key = st.secrets["huggingface_api_key"]
# Initialize the Hugging Face inference client
client = InferenceClient(token=huggingface_api_key)
# File uploader allows user to add their own image
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
if uploaded_file:
# Display the uploaded image
image = Image.open(uploaded_file).convert('RGB')
image_width = 200 # Set the desired width in pixels
with st.expander("Image", expanded=True):
st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
else:
st.warning("Please upload an image.")
# Option for adding additional details
show_details = st.checkbox("Add additional details about the image.", value=False)
if show_details:
# Text input for additional details about the image
additional_details = st.text_area(
"Provide specific information that is important to include in the alt text or reflect why the image is being used:"
)
else:
additional_details = ""
# Button to trigger the analysis
analyze_button = st.button("Analyze the Image", type="secondary")
# Prompt for complex image description
complex_image_prompt_text = (
"As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
"Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
"Skip phrases like 'image of' or 'picture of.' "
"Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative. "
"Importantly, only describe what is visibly present in the image and avoid making assumptions or adding extraneous information. "
"Stick to the facts and ensure the description is accurate and reliable."
)
# Functions to query the Hugging Face Inference API
def query_image_caption(image):
# Convert PIL image to bytes
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
image_bytes = buffered.getvalue()
# Use the InferenceClient's image_to_text method
response = client.image_to_text(
# model="Salesforce/blip-image-captioning-large",
model="nlpconnect/vit-gpt2-image-captioning",
image=image_bytes,
)
return response
def query_llm(prompt):
# System prompt (optional)
system_prompt = "You are an expert in image accessibility and alternative text."
# Generate the response using the Hugging Face InferenceClient's chat completion
response = client.chat.completions.create(
model="meta-llama/Llama-2-7b-chat-hf",
messages=[
{"role": "system", "content": system_prompt}, # Optional system prompt
{"role": "user", "content": prompt}
],
stream=True,
temperature=0.5,
max_tokens=1024,
top_p=0.7
)
# Collect the streamed response
response_content = ""
for message in response:
if "choices" in message and len(message["choices"]) > 0:
delta = message["choices"][0].get("delta", {})
content = delta.get("content", "")
response_content += content
# Optionally, you can update the progress to the user here
return response_content.strip()
# Check if an image has been uploaded and if the button has been pressed
if uploaded_file is not None and analyze_button:
with st.spinner("Analyzing the image..."):
# Get the caption from the image using the image captioning API
caption_response = query_image_caption(image)
# Handle potential errors from the API
if isinstance(caption_response, dict) and caption_response.get("error"):
st.error(f"Error with image captioning model: {caption_response['error']}")
else:
# Since caption_response is a string, assign it directly
image_caption = caption_response
# Use the complex image prompt text
prompt_text = complex_image_prompt_text
# Include additional details if provided
if additional_details:
prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"
# Create the full prompt
full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"
# Use the language model to generate the alt text description
llm_response = query_llm(full_prompt)
# Display the generated alt text
st.markdown("### Generated Alt Text:")
st.write(llm_response)
st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
else:
st.write("Please upload an image and click 'Analyze the Image' to generate alt text.") |