Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 29

Commit

0dcd353

verified ·

1 Parent(s): 1fca63f

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -17

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import streamlit as st
 from PIL import Image
 from io import BytesIO
-from huggingface_hub import InferenceApi
 from transformers import pipeline
 from gtts import gTTS
 import tempfile
@@ -14,34 +15,45 @@ st.title("📖✨ Turn Images into Children's Stories")
 @st.cache_resource
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
-    # image captioning client as before
-    caption_client = InferenceApi("Salesforce/blip-image-captioning-base", token=hf_token)
-    # text-generation pipeline for story
     story_generator = pipeline(
-        "text-generation",
         model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-        use_auth_token=hf_token,
-        device=0  # or -1 for CPU
     )
     return caption_client, story_generator
 caption_client, story_generator = load_clients()
 # —––––––– Helper: Generate Caption —–––––––
-def generate_caption(img):
     buf = BytesIO()
     img.save(buf, format="JPEG")
     try:
-        out = caption_client(data=buf.getvalue())
-        return out[0].get("generated_text", "").strip()
     except Exception as e:
-        st.error(f"Caption error: {e}")
-        return ""
 # —––––––– Helper: Generate Story via pipeline —–––––––
 def generate_story(prompt: str) -> str:
-    # generate up to ~200 tokens to cover 100 words margin
     outputs = story_generator(
         prompt,
         max_new_tokens=200,
@@ -52,31 +64,43 @@ def generate_story(prompt: str) -> str:
         do_sample=True
     )
     text = outputs[0]["generated_text"].strip()
-    # everything after "Story:" (if you kept that in your prompt)
     if "Story:" in text:
         text = text.split("Story:", 1)[1].strip()
-    # truncate to 100 words
     words = text.split()
     if len(words) > 100:
         text = " ".join(words[:100])
         if not text.endswith("."):
             text += "."
     return text
 # —––––––– Main App Flow —–––––––
 uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 if uploaded:
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
         img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
-    caption = generate_caption(img)
     if not caption:
         st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
     st.success(f"**Caption:** {caption}")
     story_prompt = (
         f"Image description: {caption}\n\n"
         "Write a 50-100 word children's story that:\n"
@@ -87,12 +111,14 @@ if uploaded:
         "Story:\n"
     )
     with st.spinner("📝 Writing magical story..."):
         story = generate_story(story_prompt)
     st.subheader("📚 Your Magical Story")
     st.write(story)
-    # Audio Conversion
     with st.spinner("🔊 Adding story voice..."):
         try:
             tts = gTTS(text=story, lang="en", slow=False)

+import os
 import streamlit as st
 from PIL import Image
 from io import BytesIO
+from huggingface_hub import InferenceApi, login
 from transformers import pipeline
 from gtts import gTTS
 import tempfile
 @st.cache_resource
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
+    # 1) Authenticate so transformers can pick up your token automatically
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
+    login(hf_token)
+    # 2) BLIP-based image captioning client
+    caption_client = InferenceApi(
+        repo_id="Salesforce/blip-image-captioning-base",
+        token=hf_token
+    )
+    # 3) Text-generation pipeline for story creation
     story_generator = pipeline(
+        task="text-generation",
         model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
         tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        device=0  # set to -1 to run on CPU
     )
     return caption_client, story_generator
 caption_client, story_generator = load_clients()
 # —––––––– Helper: Generate Caption —–––––––
+def generate_caption(img: Image.Image) -> str:
     buf = BytesIO()
     img.save(buf, format="JPEG")
     try:
+        response = caption_client(data=buf.getvalue())
+        if isinstance(response, list) and response:
+            return response[0].get("generated_text", "").strip()
     except Exception as e:
+        st.error(f"Caption generation error: {e}")
+    return ""
 # —––––––– Helper: Generate Story via pipeline —–––––––
 def generate_story(prompt: str) -> str:
     outputs = story_generator(
         prompt,
         max_new_tokens=200,
         do_sample=True
     )
     text = outputs[0]["generated_text"].strip()
+    # If prompt was echoed, remove it
+    if text.startswith(prompt):
+        text = text[len(prompt):].strip()
+    # If you included a "Story:" marker, split it out
     if "Story:" in text:
         text = text.split("Story:", 1)[1].strip()
+    # Truncate to at most 100 words
     words = text.split()
     if len(words) > 100:
         text = " ".join(words[:100])
         if not text.endswith("."):
             text += "."
     return text
 # —––––––– Main App Flow —–––––––
 uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 if uploaded:
+    # Load & resize
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
         img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
+    # Caption
+    with st.spinner("🔍 Discovering image secrets..."):
+        caption = generate_caption(img)
     if not caption:
         st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
     st.success(f"**Caption:** {caption}")
+    # Build prompt
     story_prompt = (
         f"Image description: {caption}\n\n"
         "Write a 50-100 word children's story that:\n"
         "Story:\n"
     )
+    # Generate story
     with st.spinner("📝 Writing magical story..."):
         story = generate_story(story_prompt)
     st.subheader("📚 Your Magical Story")
     st.write(story)
+    # Convert to audio
     with st.spinner("🔊 Adding story voice..."):
         try:
             tts = gTTS(text=story, lang="en", slow=False)