Spaces:

mayf
/

1

Sleeping

File size: 3,767 Bytes

0dcd353
e508bdf
 
8367fb2
6adb177
fd1d947
 
e508bdf
 
8367fb2
748a576
fd1d947
748a576
e508bdf
fd1d947
 
8367fb2
6adb177
e508bdf
6adb177
fd1d947
 
 
 
 
 
 
 
 
 
 
b3abd21
e508bdf
fd1d947
e508bdf
2c0fb69
fd1d947
 
 
 
 
88ee0a7
fd1d947
 
2c0fb69
 
6adb177
 
fd1d947
 
 
6adb177
 
 
fd1d947
 
 
 
6adb177
 
88ee0a7
6adb177
fd1d947
6adb177
fd1d947
 
422a749
fd1d947
6adb177
fd1d947
 
 
88ee0a7
fd1d947
b3abd21
fd1d947
 
6adb177
 
748a576
6adb177
e508bdf
6adb177
fd1d947
e508bdf
 
 
fd1d947
e508bdf
b3abd21
e508bdf
6adb177
e508bdf
 
 
 
 
 
6adb177
b3abd21
e508bdf
6adb177
b3abd21
e508bdf
 
6adb177
e508bdf
 
 
 
 
2aae3c9
e508bdf
e616e4e
2c0fb69

import os
import time
import streamlit as st
from PIL import Image
from io import BytesIO
from transformers import pipeline
from huggingface_hub import login
from gtts import gTTS
import tempfile

# —––––––– Requirements —–––––––
# pip install streamlit pillow gTTS transformers huggingface_hub

# —––––––– Page Config —–––––––
st.set_page_config(page_title="Magic Story Generator (Local Pipeline)", layout="centered")
st.title("📖✨ Turn Images into Children's Stories")

# —––––––– Load Clients & Pipelines (cached) —–––––––
@st.cache_resource(show_spinner=False)
def load_clients():
    # Authenticate to pull private or remote-code models if needed
    hf_token = st.secrets.get("HF_TOKEN")
    if hf_token:
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
        login(hf_token)

    # 1) Image-captioning pipeline (BLIP)
    captioner = pipeline(
        task="image-to-text",
        model="Salesforce/blip-image-captioning-base",
        device=-1  # CPU; change to 0 for GPU
    )

    # 2) Story-generation pipeline (DeepSeek-R1-Distill-Qwen)
    storyteller = pipeline(
        task="text-generation",
        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        trust_remote_code=True,
        device=-1,                # CPU; set 0+ for GPU
        temperature=0.6,
        top_p=0.9,
        repetition_penalty=1.1,
        no_repeat_ngram_size=2,
        max_new_tokens=120,
        return_full_text=False
    )

    return captioner, storyteller

captioner, storyteller = load_clients()

# —––––––– Helpers —–––––––
def generate_caption(img: Image.Image) -> str:
    # Use the BLIP pipeline to generate a caption
    result = captioner(img)
    if isinstance(result, list) and result:
        return result[0].get("generated_text", "").strip()
    return ""


def generate_story(caption: str) -> str:
    # Build a simple prompt incorporating the caption
    prompt = (
        f"Image description: {caption}\n"
        "Write a coherent 50-100 word children's story that flows naturally."
    )

    t0 = time.time()
    outputs = storyteller(
        prompt
    )
    gen_time = time.time() - t0
    st.text(f"⏱ Generated in {gen_time:.1f}s")

    story = outputs[0].get("generated_text", "").strip()
    # Truncate to 100 words
    words = story.split()
    if len(words) > 100:
        story = " ".join(words[:100]) + ('.' if not story.endswith('.') else '')
    return story

# —––––––– Main App —–––––––
uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
if uploaded:
    img = Image.open(uploaded).convert("RGB")
    if max(img.size) > 2048:
        img.thumbnail((2048, 2048))
    st.image(img, use_container_width=True)

    with st.spinner("🔍 Generating caption..."):
        caption = generate_caption(img)
    if not caption:
        st.error("😢 Couldn't understand this image. Try another one!")
        st.stop()
    st.success(f"**Caption:** {caption}")

    with st.spinner("📝 Writing story..."):
        story = generate_story(caption)

    st.subheader("📚 Your Magical Story")
    st.write(story)

    with st.spinner("🔊 Converting to audio..."):
        try:
            tts = gTTS(text=story, lang="en", slow=False)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
                tts.save(fp.name)
                st.audio(fp.name, format="audio/mp3")
        except Exception as e:
            st.warning(f"⚠️ TTS failed: {e}")

# Footer
st.markdown("---\n*Made with ❤️ by your friendly story wizard*")