Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 28

Commit

7d2ac1c

verified ·

1 Parent(s): 121e41f

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -44

app.py CHANGED Viewed

@@ -2,7 +2,8 @@
 import streamlit as st
 from PIL import Image
-from transformers import pipeline
 from gtts import gTTS
 import tempfile
@@ -10,48 +11,52 @@ import tempfile
 st.set_page_config(page_title="Storyteller for Kids", layout="centered")
 st.title("🖼️ ➡️ 📖 Interactive Storyteller")
-# —––––––– Load & warm pipelines
 @st.cache_resource
-def load_pipelines():
-    # 1) BLIP-base for captions
-    captioner = pipeline(
-        "image-to-text",
-        model="Salesforce/blip-image-captioning-base",
-        device=0  # set to -1 if you only have CPU
     )
-    # 2) DeepSeek-R1-Distill (Qwen-1.5B) for stories
-    ds_storyteller = pipeline(
-        "text-generation",
-        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        trust_remote_code=True,
-        device=0
     )
-    # Warm-up both so the first real request is faster
-    dummy = Image.new("RGB", (384, 384), color=(128, 128, 128))
-    captioner(dummy)
-    ds_storyteller("Warm up", max_new_tokens=1)
-    return captioner, ds_storyteller
-captioner, ds_storyteller = load_pipelines()
 # —––––––– Main UI
 uploaded = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
-if uploaded:
-    # 1) Preprocess & display
-    image = Image.open(uploaded).convert("RGB")
-    image = image.resize((384, 384), Image.LANCZOS)
-    st.image(image, caption="Your image", use_container_width=True)
-    # 2) Generate caption
     with st.spinner("🔍 Generating caption..."):
-        cap = captioner(image)[0]["generated_text"].strip()
-    st.markdown(f"**Caption:** {cap}")
     # 3) Build prompt
     prompt = (
-        f"Here is an image description: “{cap}”.\n"
         "Write an 80–100 word playful story for 3–10 year-old children that:\n"
         "1) Describes the scene and main subject.\n"
         "2) Explains what it’s doing and how it feels.\n"
@@ -59,20 +64,25 @@ if uploaded:
         "Story:"
     )
-    # 4) Generate story via DeepSeek
-    with st.spinner("✍️ Generating story with DeepSeek..."):
-        out = ds_storyteller(
-            prompt,
-            max_new_tokens=120,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            top_k=50,
-            repetition_penalty=1.2,
-            no_repeat_ngram_size=3
         )
-        story = out[0]["generated_text"].strip()
     st.markdown("**Story:**")
     st.write(story)
@@ -83,3 +93,4 @@ if uploaded:
         tts.write_to_fp(tmp)
         tmp.flush()
     st.audio(tmp.name, format="audio/mp3")

 import streamlit as st
 from PIL import Image
+from io import BytesIO
+from huggingface_hub import InferenceApi
 from gtts import gTTS
 import tempfile
 st.set_page_config(page_title="Storyteller for Kids", layout="centered")
 st.title("🖼️ ➡️ 📖 Interactive Storyteller")
+# —––––––– Inference clients (cached)
 @st.cache_resource
+def load_clients():
+    # read your HF token from Space secrets
+    hf_token = st.secrets["HF_TOKEN"]
+    # caption client: BLIP-base via HF Image-to-Text API
+    caption_client = InferenceApi(
+        repo_id="Salesforce/blip-image-captioning-base",
+        task="image-to-text",
+        token=hf_token
     )
+    # story client: DeepSeek-R1-Distill via HF Text-Generation API
+    story_client = InferenceApi(
+        repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        task="text-generation",
+        token=hf_token
     )
+    return caption_client, story_client
+caption_client, story_client = load_clients()
 # —––––––– Main UI
 uploaded = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
+if not uploaded:
+    st.info("Please upload an image (JPG/PNG) to begin.")
+else:
+    # 1) Display the image
+    img = Image.open(uploaded).convert("RGB")
+    st.image(img, use_container_width=True)
+    # 2) Caption via HF Inference API
     with st.spinner("🔍 Generating caption..."):
+        buf = BytesIO()
+        img.save(buf, format="PNG")
+        caption_output = caption_client(data=buf.getvalue())
+        # handle API return formats
+        if isinstance(caption_output, dict):
+            cap_text = caption_output.get("generated_text", "").strip()
+        else:
+            cap_text = str(caption_output).strip()
+    st.markdown(f"**Caption:** {cap_text}")
     # 3) Build prompt
     prompt = (
+        f"Here’s an image description: “{cap_text}”.\n\n"
         "Write an 80–100 word playful story for 3–10 year-old children that:\n"
         "1) Describes the scene and main subject.\n"
         "2) Explains what it’s doing and how it feels.\n"
         "Story:"
     )
+    # 4) Story via HF Inference API
+    with st.spinner("✍️ Generating story..."):
+        story_output = story_client(
+            inputs=prompt,
+            params={
+                "max_new_tokens": 120,
+                "do_sample": True,
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "top_k": 50,
+                "repetition_penalty": 1.2,
+                "no_repeat_ngram_size": 3
+            }
         )
+        # API returns list of generations or a dict
+        if isinstance(story_output, list):
+            story = story_output[0].get("generated_text", "").strip()
+        else:
+            story = story_output.get("generated_text", "").strip()
     st.markdown("**Story:**")
     st.write(story)
         tts.write_to_fp(tmp)
         tmp.flush()
     st.audio(tmp.name, format="audio/mp3")