Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on 19 days ago

Commit

8e5f097

verified ·

1 Parent(s): cab8adc

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -83

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 # app.py
 import streamlit as st
 from PIL import Image
 from io import BytesIO
@@ -7,108 +6,95 @@ from huggingface_hub import InferenceApi
 from gtts import gTTS
 import tempfile
-# —––––––– Page config
-st.set_page_config(page_title="Storyteller for Kids", layout="centered")
-st.title("🖼️ ➡️ 📖 Interactive Storyteller")
-# —––––––– Inference clients (cached)
 @st.cache_resource
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
-    caption_client = InferenceApi(
-        repo_id="Salesforce/blip-image-captioning-base",
-        task="image-to-text",
-        token=hf_token
-    )
-    story_client = InferenceApi(
-        repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        task="text-generation",
-        token=hf_token
     )
-    return caption_client, story_client
 caption_client, story_client = load_clients()
-# —––––––– Main UI
-uploaded = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
 if not uploaded:
-    st.info("Please upload a JPG/PNG image to begin.")
 else:
-    # 1) Display image
     img = Image.open(uploaded).convert("RGB")
-    st.image(img, use_container_width=True)
-    # 2) Generate caption
-    with st.spinner("🔍 Generating caption..."):
         try:
-            buf = BytesIO()
-            img.save(buf, format="PNG")
-            cap_out = caption_client(data=buf.getvalue())
-            # Handle caption response
-            if isinstance(cap_out, list) and cap_out:
-                cap_text = cap_out[0].get("generated_text", "").strip()
-            elif isinstance(cap_out, dict):
-                cap_text = cap_out.get("generated_text", "").strip()
-            else:
-                cap_text = str(cap_out).strip()
         except Exception as e:
-            st.error(f"🚨 Caption generation failed: {str(e)}")
             st.stop()
-    if not cap_text:
-        st.error("😕 Couldn’t generate a caption. Try another image.")
-        st.stop()
-    st.markdown(f"**Caption:** {cap_text}")
-    # 3) Build story prompt
-    prompt = (
-        f"Here’s an image description: “{cap_text}”.\n\n"
-        "Write an 80–100 word playful story for 3–10 year-old children that:\n"
-        "1) Describes the scene and main subject.\n"
-        "2) Explains what it’s doing and how it feels.\n"
-        "3) Concludes with a fun, imaginative ending.\n\n"
-        "Story:"
     )
-# 4) Generate story with corrected parameter format
-with st.spinner("✍️ Generating story..."):
-    try:
-        story_out = story_client(
-            prompt,
-            max_new_tokens=250,  # Direct keyword arguments
-            temperature=0.7,
-            top_p=0.9,
-            top_k=50,
-            repetition_penalty=1.1,
-            do_sample=True,
-            no_repeat_ngram_size=2
-        )
-        # Handle response format
-        if isinstance(story_out, list):
-            story_text = story_out[0].get("generated_text", "")
-        else:  # Handle single-dictionary response
-            story_text = story_out.get("generated_text", "")
-        # Extract story content after last prompt mention
-        story = story_text.split("Story:")[-1].strip()
-    except Exception as e:
-        st.error(f"🚨 Story generation failed: {str(e)}")
-        st.stop()
-    # 5) Text-to-Speech
-    with st.spinner("🔊 Converting to speech..."):
         try:
-            tts = gTTS(text=story, lang="en")
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
-                tts.write_to_fp(tmp)
-                tmp.seek(0)
-                st.audio(tmp.name, format="audio/mp3")
         except Exception as e:
-            st.error(f"🔇 Audio conversion failed: {str(e)}")

 # app.py
 import streamlit as st
 from PIL import Image
 from io import BytesIO
 from gtts import gTTS
 import tempfile
+# —––––––– Page Config
+st.set_page_config(page_title="Magic Story Generator", layout="centered")
+st.title("📖✨ Turn Images into Children's Stories")
+# —––––––– Clients (cached)
 @st.cache_resource
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
+    return (
+        InferenceApi("Salesforce/blip-image-captioning-base", token=hf_token),
+        InferenceApi("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", token=hf_token)
     )
 caption_client, story_client = load_clients()
+# —––––––– Main Flow
+uploaded = st.file_uploader("Upload a child-friendly image:", type=["jpg", "png", "jpeg"])
 if not uploaded:
+    st.info("🌈 Please upload an image to start the magic!")
 else:
+    # Process Image
     img = Image.open(uploaded).convert("RGB")
+    st.image(img, use_column_width=True)
+    # Generate Caption
+    with st.spinner("🔍 Discovering image secrets..."):
         try:
+            img_bytes = BytesIO()
+            img.save(img_bytes, format="JPEG")
+            caption_response = caption_client(data=img_bytes.getvalue())
+            caption = caption_response[0]['generated_text'].strip() if isinstance(caption_response, list) else ""
+            if not caption:
+                st.error("😢 Couldn't understand this image. Try another one!")
+                st.stop()
         except Exception as e:
+            st.error(f"🚨 Oops! Problem making caption: {str(e)}")
             st.stop()
+    st.success(f"**Caption Magic:** {caption}")
+    # Story Generation Prompt
+    story_prompt = (
+        f"Image description: {caption}\n\n"
+        "Write a 50-100 word children's story that:\n"
+        "1. Features the main subject as a friendly character\n"
+        "2. Includes a simple adventure or discovery\n"
+        "3. Ends with a happy or funny conclusion\n"
+        "4. Uses simple language for ages 3-8\n\n"
+        "Story:\n"
     )
+    # Generate Story
+    with st.spinner("📝 Writing magical story..."):
+        try:
+            story_response = story_client(
+                story_prompt,
+                max_new_tokens=200,
+                temperature=0.8,
+                top_p=0.95,
+                repetition_penalty=1.15,
+                do_sample=True,
+                no_repeat_ngram_size=2
+            )
+            # Process response
+            full_text = story_response[0]['generated_text']
+            story = full_text.split("Story:")[-1].strip()
+            # Ensure clean ending
+            if "." in story:
+                story = story.rsplit(".", 1)[0] + "."
+        except Exception as e:
+            st.error(f"🚨 Story magic failed: {str(e)}")
+            st.stop()
+    # Display Story
+    st.subheader("📚 Your Magical Story")
+    st.write(story)
+    # Audio Conversion
+    with st.spinner("🔊 Adding story voice..."):
         try:
+            tts = gTTS(text=story, lang="en", slow=False)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+                tts.save(fp.name)
+                st.audio(fp.name, format="audio/mp3")
         except Exception as e:
+            st.warning("⚠️ Couldn't make audio version: " + str(e))
+st.markdown("---\n*Made with ❤️ by your friendly story wizard*")