Spaces:
Sleeping
Sleeping
File size: 5,785 Bytes
7f6c08d 7352851 e8b9998 7352851 c337535 3a8f95c 05fbc95 7f6c08d 3a8f95c 7352851 d97169f e854579 ef13d6a ad23ebe 3a8f95c d97169f 3a8f95c ad23ebe 3a8f95c d97169f 3a8f95c 7352851 e8b9998 7352851 e1a461b 7352851 e8b9998 7352851 3a8f95c d97169f 3a8f95c ad23ebe e854579 cb3fae5 e854579 3a8f95c c337535 3a8f95c 6ccb176 3a8f95c 66a966e 3a8f95c 66a966e 7352851 ef13d6a 3a8f95c 7352851 3a8f95c 7352851 dd1055c 3a8f95c 7352851 3a8f95c 7352851 dd1055c 3a8f95c 7352851 af0fd83 dd1055c ad23ebe 3a8f95c d97169f 3a8f95c ad23ebe 7352851 ad23ebe 7352851 3a8f95c 7352851 3a8f95c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import nest_asyncio
nest_asyncio.apply()
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import io
from streamlit.components.v1 import html
import asyncio
if not asyncio.get_event_loop().is_running():
asyncio.set_event_loop(asyncio.new_event_loop())
# Initialize session state
if 'processed_data' not in st.session_state:
st.session_state.processed_data = {
'scenario': None,
'story': None,
'audio': None
}
if 'timer_started' not in st.session_state:
st.session_state.timer_started = False
if 'timer_frozen' not in st.session_state:
st.session_state.timer_frozen = False
# Timer component that uses localStorage to check for a freeze flag
def timer():
return """
<div id="timer" style="font-size:16px;color:#666;margin-bottom:10px;">β±οΈ Elapsed: 00:00</div>
<script>
(function() {
var start = Date.now();
var timerElement = document.getElementById('timer');
// Clear any previous freeze flag
localStorage.removeItem("freezeTimer");
var interval = setInterval(function() {
// Check if freeze flag is set in localStorage.
if(localStorage.getItem("freezeTimer") === "true"){
clearInterval(interval);
timerElement.style.color = '#00cc00'; // Change color to indicate frozen.
return;
}
var elapsed = Date.now() - start;
var minutes = Math.floor(elapsed / 60000);
var seconds = Math.floor((elapsed % 60000) / 1000);
timerElement.innerHTML = 'β±οΈ Elapsed: ' +
(minutes < 10 ? '0' : '') + minutes + ':' +
(seconds < 10 ? '0' : '') + seconds;
}, 1000);
})();
</script>
"""
# Page setup
st.set_page_config(page_title="Your Image to Audio Story", page_icon="π¦")
st.header("Turn Your Image to a Short Audio Story for Children")
# Model loading
@st.cache_resource
def load_models():
return {
"img_model": pipeline("image-to-text", "cnmoro/tiny-image-captioning"),
"story_model": pipeline("text-generation", "Qwen/Qwen2.5-0.5B-Instruct")
}
models = load_models()
# Processing functions
def img2text(url):
return models["img_model"](url)[0]["generated_text"]
def text2story(text):
prompt = f"Generate a 100-word story about: {text}"
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
response = models["story_model"](
messages,
max_new_tokens=100,
do_sample=True,
temperature=0.7
)[0]["generated_text"]
return response[2]["content"]
def text2audio(story_text):
audio_io = io.BytesIO()
tts = gTTS(text=story_text, lang='en', slow=False)
tts.write_to_fp(audio_io)
audio_io.seek(0)
return {'audio': audio_io, 'sampling_rate': 16000}
# UI components
uploaded_file = st.file_uploader("Select an Image After the Models are Loaded...")
if uploaded_file is not None:
# Inject the timer only once, right after image upload.
if not st.session_state.timer_started and not st.session_state.timer_frozen:
st.session_state.timer_started = True
html(timer(), height=50)
# Initialize progress containers
status_text = st.empty()
progress_bar = st.progress(0)
try:
# Save uploaded file
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
if st.session_state.get('current_file') != uploaded_file.name:
st.session_state.current_file = uploaded_file.name
# Display image
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
# Stage 1: Image to Text
status_text.markdown("**πΌοΈ Generating caption...**")
progress_bar.progress(0)
st.session_state.processed_data['scenario'] = img2text(uploaded_file.name)
progress_bar.progress(33)
# Stage 2: Text to Story
status_text.markdown("**π Generating story...**")
progress_bar.progress(33)
st.session_state.processed_data['story'] = text2story(
st.session_state.processed_data['scenario']
)
progress_bar.progress(66)
# Stage 3: Story to Audio
status_text.markdown("**π Synthesizing audio...**")
progress_bar.progress(66)
st.session_state.processed_data['audio'] = text2audio(
st.session_state.processed_data['story']
)
progress_bar.progress(100)
status_text.success("**β
Generation complete!**")
# Immediately freeze the timer by setting the freeze flag.
html("<script>localStorage.setItem('freezeTimer', 'true');</script>", height=0)
st.session_state.timer_frozen = True
except Exception as e:
html("<script>document.getElementById('timer').remove();</script>")
status_text.error(f"**β Error:** {str(e)}")
progress_bar.empty()
raise e
st.write("**Caption:**", st.session_state.processed_data['scenario'])
st.write("**Story:**", st.session_state.processed_data['story'])
# Optionally, you can still have the Play Audio button.
if st.button("Play Audio of the Story Generated"):
if st.session_state.processed_data.get('audio'):
# Since the timer is already frozen by now, just play the audio.
audio_data = st.session_state.processed_data['audio']
st.audio(audio_data['audio'].getvalue(), format="audio/mp3")
else:
st.warning("Please generate a story first!")
|