File size: 2,613 Bytes
dfb3989 8367fb2 6b1de29 8367fb2 dfb3989 8367fb2 cc355a8 8367fb2 cc355a8 8367fb2 504dc12 33fead7 cc355a8 8367fb2 cc355a8 8367fb2 eb25a05 e9fb854 c916589 8367fb2 cc355a8 0fdc556 c916589 60c225b 8367fb2 dfb3989 8367fb2 dd489ad cc355a8 dfb3989 cc355a8 e9fb854 0fdc556 c916589 8367fb2 60c225b cc355a8 c916589 dfb3989 8367fb2 cc355a8 dfb3989 cc355a8 0fdc556 cc355a8 eb25a05 dfb3989 cc355a8 b3f64ee cc355a8 eb25a05 e9fb854 cc355a8 60c225b b3f64ee cc355a8 eb25a05 dfb3989 1c165f8 cc355a8 6b1de29 e9fb854 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# app.py
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import tempfile
# —––––––– Page config
st.set_page_config(page_title="Storyteller for Kids", layout="centered")
st.title("🖼️ ➡️ 📖 Interactive Storyteller")
# —––––––– Load and warm pipelines
@st.cache_resource
def load_pipelines():
# BLIP-base for captions
captioner = pipeline(
"image-to-text",
model="Salesforce/blip-image-captioning-base",
device=-1 # GPU if available, else -1
)
# Flan-T5-Large for stories
storyteller = pipeline(
"text2text-generation",
model="google/flan-t5-large",
device=0
)
# Warm-up runs so user-facing calls are fast
dummy = Image.new("RGB", (384, 384), color=(128, 128, 128))
captioner(dummy)
storyteller("Warm up", max_new_tokens=1)
return captioner, storyteller
captioner, storyteller = load_pipelines()
# —––––––– Main UI
uploaded = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
if uploaded:
# 1) Preprocess image
image = Image.open(uploaded).convert("RGB")
image = image.resize((384, 384), Image.LANCZOS)
st.image(image, caption="Your image", use_container_width=True)
# 2) Caption
with st.spinner("🔍 Generating caption…"):
cap = captioner(image)[0]["generated_text"].strip()
st.markdown(f"**Caption:** {cap}")
# 3) Build a dynamic prompt
prompt = (
f"Here is an image description: “{cap}”.\n"
"Write an 80–100 word playful story for 3–10 year-old children that:\n"
"1) Describes the scene and subject from the description.\n"
"2) Explains what the subject is doing and how it feels.\n"
"3) Concludes with a fun, imaginative ending.\n\n"
"Story:"
)
# 4) Generate the story
with st.spinner("✍️ Writing the story…"):
output = storyteller(
prompt,
max_new_tokens=120,
do_sample=True,
temperature=0.7,
top_p=0.9,
top_k=50,
repetition_penalty=1.2,
no_repeat_ngram_size=3
)
story = output[0]["generated_text"].strip()
st.markdown("**Story:**")
st.write(story)
# 5) Text-to-Speech
with st.spinner("🔊 Converting to speech…"):
tts = gTTS(text=story, lang="en")
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
tts.write_to_fp(tmp)
tmp.flush()
st.audio(tmp.name, format="audio/mp3")
|