Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 29

Commit

ed4df47

verified ·

1 Parent(s): 0dcd353

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -46

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import streamlit as st
 from PIL import Image
 from io import BytesIO
@@ -8,31 +9,39 @@ from gtts import gTTS
 import tempfile
 # —––––––– Page Config —–––––––
-st.set_page_config(page_title="Magic Story Generator", layout="centered")
-st.title("📖✨ Turn Images into Children's Stories")
 # —––––––– Clients (cached) —–––––––
-@st.cache_resource
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
-    # 1) Authenticate so transformers can pick up your token automatically
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
     login(hf_token)
-    # 2) BLIP-based image captioning client
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         token=hf_token
     )
-    # 3) Text-generation pipeline for story creation
     story_generator = pipeline(
         task="text-generation",
         model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        device=0  # set to -1 to run on CPU
     )
     return caption_client, story_generator
@@ -44,91 +53,84 @@ def generate_caption(img: Image.Image) -> str:
     buf = BytesIO()
     img.save(buf, format="JPEG")
     try:
-        response = caption_client(data=buf.getvalue())
-        if isinstance(response, list) and response:
-            return response[0].get("generated_text", "").strip()
     except Exception as e:
         st.error(f"Caption generation error: {e}")
     return ""
 # —––––––– Helper: Generate Story via pipeline —–––––––
-def generate_story(prompt: str) -> str:
     outputs = story_generator(
         prompt,
-        max_new_tokens=200,
-        temperature=0.8,
-        top_p=0.95,
-        repetition_penalty=1.15,
-        no_repeat_ngram_size=2,
         do_sample=True
     )
-    text = outputs[0]["generated_text"].strip()
-    # If prompt was echoed, remove it
     if text.startswith(prompt):
         text = text[len(prompt):].strip()
-    # If you included a "Story:" marker, split it out
-    if "Story:" in text:
-        text = text.split("Story:", 1)[1].strip()
-    # Truncate to at most 100 words
     words = text.split()
     if len(words) > 100:
         text = " ".join(words[:100])
         if not text.endswith("."):
             text += "."
     return text
 # —––––––– Main App Flow —–––––––
 uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 if uploaded:
-    # Load & resize
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
         img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
-    # Caption
-    with st.spinner("🔍 Discovering image secrets..."):
         caption = generate_caption(img)
     if not caption:
         st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
     st.success(f"**Caption:** {caption}")
-    # Build prompt
-    story_prompt = (
-        f"Image description: {caption}\n\n"
-        "Write a 50-100 word children's story that:\n"
-        "1. Features the main subject as a friendly character\n"
-        "2. Includes a simple adventure or discovery\n"
-        "3. Ends with a happy or funny conclusion\n"
-        "4. Uses simple language for ages 3-8\n\n"
-        "Story:\n"
-    )
-    # Generate story
     with st.spinner("📝 Writing magical story..."):
-        story = generate_story(story_prompt)
     st.subheader("📚 Your Magical Story")
     st.write(story)
-    # Convert to audio
-    with st.spinner("🔊 Adding story voice..."):
         try:
             tts = gTTS(text=story, lang="en", slow=False)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
                 tts.save(fp.name)
                 st.audio(fp.name, format="audio/mp3")
         except Exception as e:
-            st.warning(f"⚠️ Couldn't make audio version: {e}")
 # Footer
 st.markdown("---\n*Made with ❤️ by your friendly story wizard*")

 import os
+import time
 import streamlit as st
 from PIL import Image
 from io import BytesIO
 import tempfile
 # —––––––– Page Config —–––––––
+st.set_page_config(page_title="Magic Story Generator (CPU)", layout="centered")
+st.title("📖✨ Turn Images into Children's Stories (CPU)")
 # —––––––– Clients (cached) —–––––––
+@st.cache_resource(show_spinner=False)
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
+    # Authenticate once so pipelines use your token automatically
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
     login(hf_token)
+    # Pin cache locally to avoid re-downloads
+    cache_dir = "./hf_cache"
+    os.makedirs(cache_dir, exist_ok=True)
+    os.environ["TRANSFORMERS_CACHE"] = cache_dir
+    # 1) BLIP-based image captioning client
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         token=hf_token
     )
+    # 2) Text-generation pipeline forced onto CPU
+    t0 = time.time()
     story_generator = pipeline(
         task="text-generation",
         model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        device=-1,             # CPU only
+        cache_dir=cache_dir
     )
+    st.text(f"✅ Story model loaded in {time.time() - t0:.1f}s (cached thereafter)")
     return caption_client, story_generator
     buf = BytesIO()
     img.save(buf, format="JPEG")
     try:
+        resp = caption_client(data=buf.getvalue())
+        if isinstance(resp, list) and resp:
+            return resp[0].get("generated_text", "").strip()
     except Exception as e:
         st.error(f"Caption generation error: {e}")
     return ""
 # —––––––– Helper: Generate Story via pipeline —–––––––
+def generate_story(caption: str) -> str:
+    prompt = f"""
+You are a creative children’s-story author.
+Below is the description of an image:
+“{caption}”
+Write a coherent, 50 to 100-word story that:
+1. Introduces the main character from the image.
+2. Shows a simple problem or discovery.
+3. Resolves it in a happy ending.
+4. Uses clear language for ages 3–8.
+5. Keeps each sentence under 20 words.
+Story:
+"""
+    t0 = time.time()
     outputs = story_generator(
         prompt,
+        max_new_tokens=120,
+        temperature=0.7,
+        top_p=0.9,
+        repetition_penalty=1.1,
+        no_repeat_ngram_size=3,
         do_sample=True
     )
+    gen_time = time.time() - t0
+    st.text(f"⏱ Generated in {gen_time:.1f}s on CPU")
+    text = outputs[0]["generated_text"].strip()
+    # Remove the echoed prompt portion
     if text.startswith(prompt):
         text = text[len(prompt):].strip()
+    # Enforce max 100 words
     words = text.split()
     if len(words) > 100:
         text = " ".join(words[:100])
         if not text.endswith("."):
             text += "."
     return text
 # —––––––– Main App Flow —–––––––
 uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 if uploaded:
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
         img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
+    with st.spinner("🔍 Generating caption..."):
         caption = generate_caption(img)
     if not caption:
         st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
     st.success(f"**Caption:** {caption}")
     with st.spinner("📝 Writing magical story..."):
+        story = generate_story(caption)
     st.subheader("📚 Your Magical Story")
     st.write(story)
+    with st.spinner("🔊 Converting to audio..."):
         try:
             tts = gTTS(text=story, lang="en", slow=False)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
                 tts.save(fp.name)
                 st.audio(fp.name, format="audio/mp3")
         except Exception as e:
+            st.warning(f"⚠️ TTS failed: {e}")
 # Footer
 st.markdown("---\n*Made with ❤️ by your friendly story wizard*")