1
File size: 3,456 Bytes
dfb3989
8367fb2
 
7d2ac1c
 
6b1de29
8367fb2
 
8e5f097
 
 
8367fb2
8e5f097
8367fb2
7d2ac1c
 
8e5f097
 
 
8367fb2
121e41f
7d2ac1c
8367fb2
8e5f097
 
7d2ac1c
8e5f097
7d2ac1c
8e5f097
7d2ac1c
8e5f097
8367fb2
8e5f097
 
6bc44b9
8e5f097
 
 
 
 
 
 
 
6bc44b9
8e5f097
6bc44b9
258bc7e
8e5f097
 
 
 
 
 
 
 
 
 
 
dfb3989
cc355a8
8e5f097
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bc44b9
8e5f097
 
 
258bc7e
8e5f097
 
c2c4e19
8e5f097
 
 
 
c2c4e19
8e5f097
7d2ac1c
8e5f097
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# app.py
import streamlit as st
from PIL import Image
from io import BytesIO
from huggingface_hub import InferenceApi
from gtts import gTTS
import tempfile

# —––––––– Page Config
st.set_page_config(page_title="Magic Story Generator", layout="centered")
st.title("📖✨ Turn Images into Children's Stories")

# —––––––– Clients (cached)
@st.cache_resource
def load_clients():
    hf_token = st.secrets["HF_TOKEN"]
    return (
        InferenceApi("Salesforce/blip-image-captioning-base", token=hf_token),
        InferenceApi("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", token=hf_token)
    )

caption_client, story_client = load_clients()

# —––––––– Main Flow
uploaded = st.file_uploader("Upload a child-friendly image:", type=["jpg", "png", "jpeg"])
if not uploaded:
    st.info("🌈 Please upload an image to start the magic!")
else:
    # Process Image
    img = Image.open(uploaded).convert("RGB")
    st.image(img, use_column_width=True)

    # Generate Caption
    with st.spinner("🔍 Discovering image secrets..."):
        try:
            img_bytes = BytesIO()
            img.save(img_bytes, format="JPEG")
            caption_response = caption_client(data=img_bytes.getvalue())
            caption = caption_response[0]['generated_text'].strip() if isinstance(caption_response, list) else ""
            
            if not caption:
                st.error("😢 Couldn't understand this image. Try another one!")
                st.stop()
        except Exception as e:
            st.error(f"🚨 Oops! Problem making caption: {str(e)}")
            st.stop()

    st.success(f"**Caption Magic:** {caption}")

    # Story Generation Prompt
    story_prompt = (
        f"Image description: {caption}\n\n"
        "Write a 50-100 word children's story that:\n"
        "1. Features the main subject as a friendly character\n"
        "2. Includes a simple adventure or discovery\n"
        "3. Ends with a happy or funny conclusion\n"
        "4. Uses simple language for ages 3-8\n\n"
        "Story:\n"
    )

    # Generate Story
    with st.spinner("📝 Writing magical story..."):
        try:
            story_response = story_client(
                story_prompt,
                max_new_tokens=200,
                temperature=0.8,
                top_p=0.95,
                repetition_penalty=1.15,
                do_sample=True,
                no_repeat_ngram_size=2
            )
            
            # Process response
            full_text = story_response[0]['generated_text']
            story = full_text.split("Story:")[-1].strip()
            
            # Ensure clean ending
            if "." in story:
                story = story.rsplit(".", 1)[0] + "."
            
        except Exception as e:
            st.error(f"🚨 Story magic failed: {str(e)}")
            st.stop()

    # Display Story
    st.subheader("📚 Your Magical Story")
    st.write(story)

    # Audio Conversion
    with st.spinner("🔊 Adding story voice..."):
        try:
            tts = gTTS(text=story, lang="en", slow=False)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
                tts.save(fp.name)
                st.audio(fp.name, format="audio/mp3")
        except Exception as e:
            st.warning("⚠️ Couldn't make audio version: " + str(e))

st.markdown("---\n*Made with ❤️ by your friendly story wizard*")