Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 29

Commit

6adb177

verified ·

1 Parent(s): e508bdf

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -64

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import os
 import time
 import streamlit as st
-from transformers import pipeline
 from PIL import Image
 from gtts import gTTS
 import tempfile
@@ -10,94 +12,92 @@ import tempfile
 st.set_page_config(page_title="Magic Story Generator", layout="centered")
 st.title("📖✨ Turn Images into Children's Stories")
-# —––––––– Load Pipelines (cached) —–––––––
 @st.cache_resource(show_spinner=False)
-def load_pipelines():
-    # Cache transformers models locally
-    os.environ.setdefault("TRANSFORMERS_CACHE", "./hf_cache")
-    # 1) Image-to-text pipeline for captioning (BLIP)
-    captioner = pipeline(
-        task="image-to-text",
-        model="Salesforce/blip-image-captioning-base",
-        device=-1  # force CPU; use 0 for GPU
     )
-    # 2) Text-generation pipeline for storytelling (GPT-2)
     storyteller = pipeline(
-        task="text-generation",
-        model="gpt2",
-        tokenizer="gpt2",
-        device=-1  # CPU
     )
-    return captioner, storyteller
-captioner, storyteller = load_pipelines()
-# —––––––– Main App Flow —–––––––
-uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 if uploaded:
-    # Load image
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
         img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
-    # Generate caption
     with st.spinner("🔍 Generating caption..."):
-        raw = captioner(img)
-        caption = raw[0].get("generated_text", "").strip()
     if not caption:
         st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
     st.success(f"**Caption:** {caption}")
-    # Build storytelling prompt
-    prompt = f"""
-You are a creative children’s-story author.
-Image description: “{caption}”
-Write a coherent, 50–100 word story that:
-1. Introduces the main character.
-2. Shows a simple problem or discovery.
-3. Has a happy resolution.
-4. Uses clear language for ages 3–8.
-5. Keeps sentences under 20 words.
-Story:
-"""
-    # Generate story
     with st.spinner("📝 Writing story..."):
-        t0 = time.time()
-        outputs = storyteller(
-            prompt,
-            max_new_tokens=120,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            no_repeat_ngram_size=3,
-            do_sample=True,
-            pad_token_id=storyteller.tokenizer.eos_token_id
-        )
-        story_text = outputs[0]["generated_text"].strip()
-        load_time = time.time() - t0
-    st.text(f"⏱ Story generated in {load_time:.1f}s")
-    # Post-process: strip prompt echo and truncate
-    if story_text.startswith(prompt):
-        story_text = story_text[len(prompt):].strip()
-    words = story_text.split()
-    if len(words) > 100:
-        story_text = " ".join(words[:100]) + ("." if not story_text.endswith('.') else "")
-    # Display story
     st.subheader("📚 Your Magical Story")
-    st.write(story_text)
-    # Convert to audio
     with st.spinner("🔊 Converting to audio..."):
         try:
-            tts = gTTS(text=story_text, lang="en", slow=False)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
                 tts.save(fp.name)
                 st.audio(fp.name, format="audio/mp3")
@@ -105,4 +105,4 @@ Story:
             st.warning(f"⚠️ TTS failed: {e}")
 # Footer
-st.markdown("---\n*Made with ❤️ by your friendly story wizard* ")

 import os
 import time
 import streamlit as st
 from PIL import Image
+from io import BytesIO
+from huggingface_hub import InferenceApi, login
+from transformers import pipeline
 from gtts import gTTS
 import tempfile
 st.set_page_config(page_title="Magic Story Generator", layout="centered")
 st.title("📖✨ Turn Images into Children's Stories")
+# —––––––– Load Clients & Pipelines (cached) —–––––––
 @st.cache_resource(show_spinner=False)
+def load_clients():
+    hf_token = st.secrets["HF_TOKEN"]
+    # authenticate so transformers can pick up your token
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
+    login(hf_token)
+    # BLIP captioning via Hugging Face Inference API
+    caption_client = InferenceApi(
+        repo_id="Salesforce/blip-image-captioning-base",
+        token=hf_token
     )
+    # Instruction-tuned story generator: Flan-T5
+    t0 = time.time()
     storyteller = pipeline(
+        task="text2text-generation",
+        model="google/flan-t5-small",
+        device=-1,               # CPU
+        max_length=150           # prompt + generation cap
+    )
+    st.text(f"✅ Story model loaded in {time.time() - t0:.1f}s")
+    return caption_client, storyteller
+caption_client, storyteller = load_clients()
+# —––––––– Helpers —–––––––
+def generate_caption(img: Image.Image) -> str:
+    buf = BytesIO()
+    img.save(buf, format="JPEG")
+    resp = caption_client(data=buf.getvalue())
+    if isinstance(resp, list) and resp:
+        return resp[0].get("generated_text", "").strip()
+    return ""
+def generate_story(caption: str) -> str:
+    prompt = (
+        "You are a creative children’s-story author.\n"
+        f"Image description: “{caption}”\n\n"
+        "Write a coherent 50–100 word story that:\n"
+        "1. Introduces the main character.\n"
+        "2. Shows a simple problem or discovery.\n"
+        "3. Has a happy resolution.\n"
+        "4. Uses clear language for ages 3–8.\n"
+        "5. Keeps each sentence under 20 words.\n"
     )
+    t0 = time.time()
+    out = storyteller(prompt, max_new_tokens=120, temperature=0.7, top_p=0.9)[0]["generated_text"]
+    st.text(f"⏱ Generated in {time.time() - t0:.1f}s")
+    story = out.strip()
+    # Truncate to at most 100 words
+    words = story.split()
+    if len(words) > 100:
+        story = " ".join(words[:100])
+        if not story.endswith("."):
+            story += "."
+    return story
+# —––––––– Main App —–––––––
+uploaded = st.file_uploader("Upload an image:", type=["jpg","png","jpeg"])
 if uploaded:
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
         img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
     with st.spinner("🔍 Generating caption..."):
+        caption = generate_caption(img)
     if not caption:
         st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
     st.success(f"**Caption:** {caption}")
     with st.spinner("📝 Writing story..."):
+        story = generate_story(caption)
     st.subheader("📚 Your Magical Story")
+    st.write(story)
     with st.spinner("🔊 Converting to audio..."):
         try:
+            tts = gTTS(text=story, lang="en", slow=False)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
                 tts.save(fp.name)
                 st.audio(fp.name, format="audio/mp3")
             st.warning(f"⚠️ TTS failed: {e}")
 # Footer
+st.markdown("---\n*Made with ❤️ by your friendly story wizard*")