Spaces:

frankai98
/

ISOM5240AssignTest

Sleeping

File size: 5,661 Bytes

import nest_asyncio
nest_asyncio.apply()

import streamlit as st
from transformers import pipeline
import torch
from gtts import gTTS
import io
import time
from streamlit.components.v1 import html
import asyncio

if not asyncio.get_event_loop().is_running():
    asyncio.set_event_loop(asyncio.new_event_loop())
    
# Initialize session state
if 'processed_data' not in st.session_state:
    st.session_state.processed_data = {
        'scenario': None,
        'story': None,
        'audio': None
    }


# Timer component with direct DOM manipulation
def timer():
    return """
    <div id="streamlit-timer" style="
        position: fixed;
        top: 15px;
        right: 15px;
        padding: 10px 20px;
        background: #ffffff;
        border-radius: 25px;
        box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        z-index: 999999;
        border: 1px solid #e0e0e0;
        font-family: Arial, sans-serif;
        font-size: 16px;
        color: #333;
    ">
        ⏱️ <span id="timer-display">00:00</span>
    </div>
    <script src="https://gist.githubusercontent.com/streamlit-resources/1837910cd9e3d5f8b7e4a/raw/timer.js"></script>
    """

# Modified stop command
def stop_timer():
    return """
    <script>
    // Safely stop timer if exists
    if (window.parent.timerControls) {
        window.parent.timerControls.stop();
    }
    </script>
    """

# Page setup
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to a Short Audio Story for Children")

# Model loading
@st.cache_resource
def load_models():
    return {
        "img_model": pipeline("image-to-text", "cnmoro/tiny-image-captioning"),
        "story_model": pipeline("text-generation", "Qwen/Qwen2.5-0.5B-Instruct")
    }

models = load_models()

# Processing functions
def img2text(url):
    return models["img_model"](url)[0]["generated_text"]

def text2story(text):
    prompt = f"Generate a brief 100-word story about: {text}"
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    response = models["story_model"](
        messages,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7
    )[0]["generated_text"]
    return response[2]["content"]

def text2audio(story_text):
    audio_io = io.BytesIO()
    tts = gTTS(text=story_text, lang='en', slow=False)
    tts.write_to_fp(audio_io)
    audio_io.seek(0)
    return {'audio': audio_io, 'sampling_rate': 16000}

# UI components
uploaded_file = st.file_uploader("Select an Image After the Models are Loaded...")
 
if uploaded_file is not None:
    # Initialize progress containers
    status_text = st.empty()
    progress_bar = st.progress(0)
    
    # Inject timer HTML
    st.markdown("""
    <div id="timer-container"></div>
    """, unsafe_allow_html=True)
    
    # Load timer system
    html(timer(), height=0, width=0)


    try:
        # Save uploaded file
        bytes_data = uploaded_file.getvalue()
        with open(uploaded_file.name, "wb") as file:
            file.write(bytes_data)

        if st.session_state.get('current_file') != uploaded_file.name:
            st.session_state.current_file = uploaded_file.name

            # Display image
            st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

            # Stage 1: Image to Text
            status_text.markdown("**🖼️ Generating caption...**")
            progress_bar.progress(0)
            st.session_state.processed_data['scenario'] = img2text(uploaded_file.name)
            progress_bar.progress(33)

            # Stage 2: Text to Story
            status_text.markdown("**📖 Generating story...**")
            progress_bar.progress(33)
            st.session_state.processed_data['story'] = text2story(
                st.session_state.processed_data['scenario']
            )
            progress_bar.progress(66)

            # Stage 3: Story to Audio
            status_text.markdown("**🔊 Synthesizing audio...**")
            progress_bar.progress(66)
            st.session_state.processed_data['audio'] = text2audio(
                st.session_state.processed_data['story']
            )
            progress_bar.progress(100)

            # Final status and stop timer
            status_text.success("**✅ Generation complete!**")
            html("""
            <script>
            if (window.timerInterval) {
                clearInterval(window.timerInterval);
                document.getElementById('streamlit-timer').style.background = '#e8f5e9';
                document.getElementById('streamlit-timer').style.color = '#2e7d32';
            }
            </script>
            """)

        # Show results
        # st.subheader("Results")
        st.write("**Caption:**", st.session_state.processed_data['scenario'])
        st.write("**Story:**", st.session_state.processed_data['story'])

    except Exception as e:
        html("""
        <script>
        if (window.timerInterval) {
            clearInterval(window.timerInterval);
            document.getElementById('streamlit-timer').remove();
        }
        </script>
        """)
        status_text.error(f"**❌ Error:** {str(e)}")
        progress_bar.empty()
        raise e

# Audio playback
if st.button("Play Audio of the Story Generated"):
    if st.session_state.processed_data.get('audio'):
        audio_data = st.session_state.processed_data['audio']
        st.audio(
            audio_data['audio'].getvalue(),
            format="audio/mp3"
        )
    else:
        st.warning("Please generate a story first!")