File size: 2,305 Bytes
dfb3989 8367fb2 6b1de29 8367fb2 dfb3989 8367fb2 c916589 8367fb2 6b1de29 8367fb2 504dc12 33fead7 6b1de29 8367fb2 6b1de29 8367fb2 b3f64ee c916589 8367fb2 c916589 6b1de29 33fead7 c916589 8367fb2 dfb3989 8367fb2 6b1de29 dfb3989 6b1de29 dfb3989 c916589 8367fb2 6b1de29 c916589 dfb3989 8367fb2 6b1de29 dfb3989 6b1de29 dfb3989 c916589 dd4f7ba b3f64ee c916589 33fead7 b3f64ee dfb3989 1c165f8 6b1de29 dfb3989 6b1de29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# app.py
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import tempfile
# —––––––– Page config
st.set_page_config(page_title="Storyteller for Kids", layout="centered")
st.title("🖼️ ➡️ 📖 Interactive Storyteller")
# —––––––– Model loading + warm-up
@st.cache_resource
def load_pipelines():
# 1) Original BLIP-base captioner
captioner = pipeline(
"image-to-text",
model="Salesforce/blip-image-captioning-base",
device=0 # set to -1 if CPU-only
)
# 2) Lightweight GPT-Neo for stories
storyteller = pipeline(
"text-generation",
model="EleutherAI/gpt-neo-125M",
device=0
)
# Warm-up so first real request is fast
dummy = Image.new("RGB", (384, 384), color=(128, 128, 128))
captioner(dummy)
storyteller("Hello", max_new_tokens=1)
return captioner, storyteller
captioner, storyteller = load_pipelines()
# —––––––– Image upload & processing
uploaded = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
if uploaded:
# 1) Load + downsize for faster vision encoding
image = Image.open(uploaded).convert("RGB")
image = image.resize((384, 384), Image.LANCZOS)
st.image(image, caption="Your image", use_container_width=True)
# 2) Caption step
with st.spinner("🔍 Generating caption..."):
cap = captioner(image)[0]["generated_text"].strip()
st.markdown(f"**Caption:** {cap}")
# 3) Story generation (greedy for speed)
prompt = (
f"Write an 80–100 word playful story for 3–10 year-olds "
f"based on this description:\n\n“{cap}”\n\nStory:"
)
with st.spinner("✍️ Generating story..."):
out = storyteller(
prompt,
max_new_tokens=120,
do_sample=False
)
story = out[0]["generated_text"].strip()
st.markdown("**Story:**")
st.write(story)
# 4) Text-to-Speech via gTTS (network-based)
with st.spinner("🔊 Converting to speech..."):
tts = gTTS(text=story, lang="en")
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
tts.write_to_fp(tmp)
tmp.flush()
st.audio(tmp.name, format="audio/mp3")
|