File size: 2,408 Bytes
dfb3989 8367fb2 6b1de29 8367fb2 dfb3989 8367fb2 c916589 8367fb2 dd489ad 8367fb2 504dc12 33fead7 eb25a05 8367fb2 eb25a05 8367fb2 eb25a05 c916589 8367fb2 c916589 eb25a05 33fead7 c916589 eb25a05 c916589 8367fb2 dfb3989 8367fb2 dd489ad eb25a05 dfb3989 eb25a05 c916589 8367fb2 eb25a05 c916589 dfb3989 8367fb2 eb25a05 dfb3989 eb25a05 dfb3989 c916589 dd4f7ba b3f64ee eb25a05 b3f64ee eb25a05 dfb3989 1c165f8 eb25a05 dfb3989 6b1de29 eb25a05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# app.py
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import tempfile
# —––––––– Page config
st.set_page_config(page_title="Storyteller for Kids", layout="centered")
st.title("🖼️ ➡️ 📖 Interactive Storyteller")
# —––––––– Model loading + warm-up
@st.cache_resource
def load_pipelines():
# 1) Original BLIP-base for captions
captioner = pipeline(
"image-to-text",
model="Salesforce/blip-image-captioning-base",
device=0
)
# 2) Instruction-tuned Flan-T5 small for stories
storyteller = pipeline(
"text2text-generation",
model="google/flan-t5-small",
device=0
)
# Warm up so first real request is faster
dummy = Image.new("RGB", (384, 384), color=(128, 128, 128))
captioner(dummy)
storyteller("Tell me something", max_new_tokens=1)
return captioner, storyteller
captioner, storyteller = load_pipelines()
# —––––––– Main UI
uploaded = st.file_uploader("Upload an image:", type=["jpg","jpeg","png"])
if uploaded:
# 1) Load + downsize
image = Image.open(uploaded).convert("RGB").resize((384, 384), Image.LANCZOS)
st.image(image, caption="Your image", use_container_width=True)
# 2) Caption
with st.spinner("🔍 Generating caption..."):
cap = captioner(image)[0]["generated_text"].strip()
st.markdown(f"**Caption:** {cap}")
# 3) Story generation
prompt = (
f"Here is an image description: “{cap}”.\n"
"Write a playful, 80–100 word story for 3–10 year-olds\n\n"
"Story:"
)
with st.spinner("✍️ Generating story..."):
out = storyteller(
prompt,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.2,
no_repeat_ngram_size=3
)
# strip off the prompt so you only get the story
story = out[0]["generated_text"].split("Story:")[-1].strip()
st.markdown("**Story:**")
st.write(story)
# 4) Text-to-Speech
with st.spinner("🔊 Converting to speech..."):
tts = gTTS(text=story, lang="en")
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
tts.write_to_fp(tmp)
tmp.flush()
st.audio(tmp.name, format="audio/mp3") |