Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 28

Commit

258bc7e

verified ·

1 Parent(s): 711740c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -22

app.py CHANGED Viewed

@@ -14,16 +14,12 @@ st.title("🖼️ ➡️ 📖 Interactive Storyteller")
 # —––––––– Inference clients (cached)
 @st.cache_resource
 def load_clients():
-    # read your HF token from Space secrets
     hf_token = st.secrets["HF_TOKEN"]
-    # caption client: BLIP-base via HF Image-to-Text API
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         task="image-to-text",
         token=hf_token
     )
-    # story client: DeepSeek-R1-Distill via HF Text-Generation API
     story_client = InferenceApi(
         repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         task="text-generation",
@@ -34,27 +30,35 @@ def load_clients():
 caption_client, story_client = load_clients()
 # —––––––– Main UI
-uploaded = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
 if not uploaded:
-    st.info("Please upload an image (JPG/PNG) to begin.")
 else:
-    # 1) Display the image
     img = Image.open(uploaded).convert("RGB")
     st.image(img, use_container_width=True)
-    # 2) Caption via HF Inference API
     with st.spinner("🔍 Generating caption..."):
         buf = BytesIO()
         img.save(buf, format="PNG")
-        caption_output = caption_client(data=buf.getvalue())
-        # handle API return formats
-        if isinstance(caption_output, dict):
-            cap_text = caption_output.get("generated_text", "").strip()
         else:
-            cap_text = str(caption_output).strip()
     st.markdown(f"**Caption:** {cap_text}")
-    # 3) Build prompt
     prompt = (
         f"Here’s an image description: “{cap_text}”.\n\n"
         "Write an 80–100 word playful story for 3–10 year-old children that:\n"
@@ -64,11 +68,11 @@ else:
         "Story:"
     )
-    # 4) Story via HF Inference API
     with st.spinner("✍️ Generating story..."):
-        story_output = story_client(
             inputs=prompt,
-            params={
                 "max_new_tokens": 120,
                 "do_sample": True,
                 "temperature": 0.7,
@@ -78,11 +82,17 @@ else:
                 "no_repeat_ngram_size": 3
             }
         )
-        # API returns list of generations or a dict
-        if isinstance(story_output, list):
-            story = story_output[0].get("generated_text", "").strip()
         else:
-            story = story_output.get("generated_text", "").strip()
     st.markdown("**Story:**")
     st.write(story)
@@ -93,5 +103,4 @@ else:
         tts.write_to_fp(tmp)
         tmp.flush()
     st.audio(tmp.name, format="audio/mp3")

 # —––––––– Inference clients (cached)
 @st.cache_resource
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         task="image-to-text",
         token=hf_token
     )
     story_client = InferenceApi(
         repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         task="text-generation",
 caption_client, story_client = load_clients()
 # —––––––– Main UI
+uploaded = st.file_uploader("Upload an image:", type=["jpg","jpeg","png"])
 if not uploaded:
+    st.info("Please upload a JPG/PNG image to begin.")
 else:
+    # 1) Display image
     img = Image.open(uploaded).convert("RGB")
     st.image(img, use_container_width=True)
+    # 2) Generate caption
     with st.spinner("🔍 Generating caption..."):
         buf = BytesIO()
         img.save(buf, format="PNG")
+        cap_out = caption_client(data=buf.getvalue())
+        # Correctly extract from list/dict
+        if isinstance(cap_out, list) and cap_out:
+            cap_text = cap_out[0].get("generated_text", "").strip()
+        elif isinstance(cap_out, dict):
+            cap_text = cap_out.get("generated_text", "").strip()
         else:
+            cap_text = str(cap_out).strip()
+    if not cap_text:
+        st.error("😕 I couldn’t generate a caption. Try uploading a different image.")
+        st.stop()
     st.markdown(f"**Caption:** {cap_text}")
+    # 3) Build prompt for story
     prompt = (
         f"Here’s an image description: “{cap_text}”.\n\n"
         "Write an 80–100 word playful story for 3–10 year-old children that:\n"
         "Story:"
     )
+    # 4) Generate story
     with st.spinner("✍️ Generating story..."):
+        story_out = story_client(
             inputs=prompt,
+            parameters={        # must be `parameters`, not `params`
                 "max_new_tokens": 120,
                 "do_sample": True,
                 "temperature": 0.7,
                 "no_repeat_ngram_size": 3
             }
         )
+        if isinstance(story_out, list) and story_out:
+            story = story_out[0].get("generated_text", "").strip()
+        elif isinstance(story_out, dict):
+            story = story_out.get("generated_text", "").strip()
         else:
+            story = str(story_out).strip()
+    if not story:
+        st.error("😕 I couldn’t generate a story. Please try again!")
+        st.stop()
     st.markdown("**Story:**")
     st.write(story)
         tts.write_to_fp(tmp)
         tmp.flush()
     st.audio(tmp.name, format="audio/mp3")