import streamlit as st
import requests
from PIL import Image
import io
from huggingface_hub import InferenceClient

# Streamlit page setup
st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered")

# Add the logo image with a specified width
image_width = 300  # Set the desired width in pixels
st.image('MTSS.ai_Logo.png', width=image_width)

st.header('VisionTexts™ | Accessibility')
st.subheader('Image Alt Text Creator')

# Retrieve the Hugging Face API Key from secrets
huggingface_api_key = st.secrets["huggingface_api_key"]

# Initialize the Hugging Face inference client
client = InferenceClient(token=huggingface_api_key)

# File uploader allows user to add their own image
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])

if uploaded_file:
    # Display the uploaded image
    image = Image.open(uploaded_file).convert('RGB')
    image_width = 200  # Set the desired width in pixels
    with st.expander("Image", expanded=True):
        st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False)
else:
    st.warning("Please upload an image.")

# Option for adding additional details
show_details = st.checkbox("Add additional details about the image.", value=False)

if show_details:
    # Text input for additional details about the image
    additional_details = st.text_area(
        "Provide specific information that is important to include in the alt text or reflect why the image is being used:"
    )
else:
    additional_details = ""

# Button to trigger the analysis
analyze_button = st.button("Analyze the Image", type="secondary")

# Prompt for complex image description
complex_image_prompt_text = (
    "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. "
    "Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. "
    "Skip phrases like 'image of' or 'picture of.' "
    "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative. "
    "Importantly, only describe what is visibly present in the image and avoid making assumptions or adding extraneous information. "
    "Stick to the facts and ensure the description is accurate and reliable."
)


# Functions to query the Hugging Face Inference API

def query_image_caption(image):
    # Convert PIL image to bytes
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    image_bytes = buffered.getvalue()

    # Use the InferenceClient's image_to_text method
    response = client.image_to_text(
        # model="Salesforce/blip-image-captioning-large",
        model="nlpconnect/vit-gpt2-image-captioning",
        image=image_bytes,
    )
    return response

def query_llm(prompt):
    # System prompt (optional)
    system_prompt = "You are an expert in image accessibility and alternative text."

    # Generate the response using the Hugging Face InferenceClient's chat completion
    response = client.chat.completions.create(
        model="meta-llama/Llama-2-7b-chat-hf",
        messages=[
            {"role": "system", "content": system_prompt},  # Optional system prompt
            {"role": "user", "content": prompt}
        ],
        stream=True,
        temperature=0.5,
        max_tokens=1024,
        top_p=0.7
    )

    # Collect the streamed response
    response_content = ""
    for message in response:
        if "choices" in message and len(message["choices"]) > 0:
            delta = message["choices"][0].get("delta", {})
            content = delta.get("content", "")
            response_content += content
            # Optionally, you can update the progress to the user here

    return response_content.strip()

# Check if an image has been uploaded and if the button has been pressed
if uploaded_file is not None and analyze_button:
    with st.spinner("Analyzing the image..."):
        # Get the caption from the image using the image captioning API
        caption_response = query_image_caption(image)

        # Handle potential errors from the API
        if isinstance(caption_response, dict) and caption_response.get("error"):
            st.error(f"Error with image captioning model: {caption_response['error']}")
        else:
            # Since caption_response is a string, assign it directly
            image_caption = caption_response

            # Use the complex image prompt text
            prompt_text = complex_image_prompt_text

            # Include additional details if provided
            if additional_details:
                prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}"

            # Create the full prompt
            full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}"

            # Use the language model to generate the alt text description
            llm_response = query_llm(full_prompt)

            # Display the generated alt text
            st.markdown("### Generated Alt Text:")
            st.write(llm_response)

            st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
else:
    st.write("Please upload an image and click 'Analyze the Image' to generate alt text.")