Spaces:

mayf
/

1

Sleeping

File size: 3,204 Bytes

0dcd353
e508bdf
 
8367fb2
fd1d947
e508bdf
 
8367fb2
748a576
8087810
 
 
1573779
8087810
 
 
 
 
fd1d947
8367fb2
8087810
e508bdf
8087810
fd1d947
 
 
 
8087810
b3abd21
fd1d947
e508bdf
8087810
fd1d947
 
 
8087810
fd1d947
88ee0a7
fd1d947
 
2c0fb69
 
6adb177
fd1d947
 
8087810
e508bdf
6adb177
fd1d947
e508bdf
1573779
e508bdf
 
b3abd21
8087810
e508bdf
8087810
 
e508bdf
 
 
 
 
1573779
8087810
 
 
 
e508bdf
8087810
 
 
 
 
 
 
 
 
 
b3abd21
1573779
e508bdf
6adb177
b3abd21
8087810
e508bdf
 
6adb177
8087810
 
 
e508bdf
 
2aae3c9
e508bdf
e616e4e
2c0fb69
1573779

import os
import time
import streamlit as st
from PIL import Image
from transformers import pipeline
from gtts import gTTS
import tempfile

# —––––––– Requirements —–––––––
# streamlit>=1.20
# pillow>=9.0
# torch>=2.0.0
# transformers>=4.30
# sentencepiece>=0.1.97
# gTTS>=2.3.1

# —––––––– Page Setup —–––––––
st.set_page_config(page_title="Magic Story Generator", layout="centered")
st.title("📖✨ Turn Images into Children's Stories")

# —––––––– Load Pipelines (cached) —–––––––
@st.cache_resource(show_spinner=False)
def load_pipelines():
    # 1) Image-captioning pipeline (BLIP)
    captioner = pipeline(
        task="image-to-text",
        model="Salesforce/blip-image-captioning-base",
        device=-1  # CPU; set to 0+ for GPU
    )
    # 2) Story-generation pipeline (DeepSeek-R1-Distill-Qwen)
    storyteller = pipeline(
        task="text2text-generation",
        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        trust_remote_code=True,
        device=-1,                # CPU; set to 0+ for GPU
        temperature=0.6,
        top_p=0.9,
        repetition_penalty=1.1,
        no_repeat_ngram_size=2,
        max_new_tokens=120,
        return_full_text=False
    )
    return captioner, storyteller

captioner, storyteller = load_pipelines()

# —––––––– Main App —–––––––
uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
if uploaded:
    # Load and display the image
    img = Image.open(uploaded).convert("RGB")
    st.image(img, use_container_width=True)

    # Generate caption
    with st.spinner("🔍 Generating caption..."):
        cap = captioner(img)
        caption = cap[0].get("generated_text", "").strip() if isinstance(cap, list) else ""
    if not caption:
        st.error("😢 Couldn't understand this image. Try another one!")
        st.stop()
    st.success(f"**Caption:** {caption}")

    # Build prompt and generate story
    prompt = (
        f"Image description: {caption}\n"
        "Write a coherent, 50-100 word children’s story that flows naturally."
    )
    with st.spinner("📝 Writing story..."):
        start = time.time()
        out = storyteller(prompt)
        gen_time = time.time() - start
        st.text(f"⏱ Generated in {gen_time:.1f}s")
    story = out[0].get("generated_text", "").strip()

    # Enforce ≤100 words
    words = story.split()
    if len(words) > 100:
        story = " ".join(words[:100]) + ("" if story.endswith('.') else ".")

    # Display story
    st.subheader("📚 Your Magical Story")
    st.write(story)

    # Convert to audio
    with st.spinner("🔊 Converting to audio..."):
        try:
            tts = gTTS(text=story, lang="en", slow=False)
            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
            tts.save(tmp.name)
            st.audio(tmp.name, format="audio/mp3")
        except Exception as e:
            st.warning(f"⚠️ TTS failed: {e}")

# Footer
st.markdown("---\n*Made with ❤️ by your friendly story wizard*")