Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 29

Commit

e616e4e

verified ·

1 Parent(s): 5a9c362

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -17

app.py CHANGED Viewed

@@ -4,9 +4,8 @@ import streamlit as st
 from PIL import Image
 from io import BytesIO
 from huggingface_hub import InferenceApi, login
-from transformers import pipeline
 import torch
-from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniTokenizer
 from gtts import gTTS
 import tempfile
@@ -18,29 +17,30 @@ st.title("📖✨ Turn Images into Children's Stories (Qwen2.5-Omni-7B)")
 @st.cache_resource(show_spinner=False)
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
-    # Authenticate for HF Hub
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
     login(hf_token)
-    # 1) BLIP captioning via HF Inference API
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         token=hf_token
     )
-    # 2) Qwen2.5-Omni story generator
     t0 = time.time()
-    model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
-        device_map="auto",
-        torch_dtype=torch.bfloat16,
-        attn_implementation="flash_attention_2",
         trust_remote_code=True
     )
-    tokenizer = Qwen2_5OmniTokenizer.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
-        trust_remote_code=True
     )
     storyteller = pipeline(
         task="text2text-generation",
         model=model,
@@ -53,8 +53,7 @@ def load_clients():
         max_new_tokens=120
     )
     load_time = time.time() - t0
-    st.text(f"✅ Story model loaded in {load_time:.1f}s (cached thereafter)")
     return caption_client, storyteller
 caption_client, storyteller = load_clients()
@@ -73,14 +72,19 @@ def generate_story(caption: str) -> str:
     prompt = (
         "You are a creative children's-story author.\n"
         f"Image description: “{caption}”\n\n"
-        "Write a coherent 50–100 word story\n"
     )
     t0 = time.time()
-    outputs = storyteller(prompt)
     gen_time = time.time() - t0
     st.text(f"⏱ Generated in {gen_time:.1f}s on GPU/CPU")
-    story = outputs[0]["generated_text"].strip()
     # Enforce ≤100 words
     words = story.split()
     if len(words) > 100:
@@ -120,4 +124,4 @@ if uploaded:
             st.warning(f"⚠️ TTS failed: {e}")
 # Footer
-st.markdown("---\n*Made with ❤️ by your friendly story wizard*")

 from PIL import Image
 from io import BytesIO
 from huggingface_hub import InferenceApi, login
+from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from gtts import gTTS
 import tempfile
 @st.cache_resource(show_spinner=False)
 def load_clients():
     hf_token = st.secrets["HF_TOKEN"]
+    # Authenticate for Hugging Face Hub
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
     login(hf_token)
+    # 1) BLIP captioning via HTTP API
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         token=hf_token
     )
+    # 2) Load Qwen2.5-Omni model & tokenizer
     t0 = time.time()
+    tokenizer = AutoTokenizer.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
         trust_remote_code=True
     )
+    model = AutoModelForSeq2SeqLM.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
+        trust_remote_code=True,
+        device_map="auto",
+        torch_dtype=torch.bfloat16,
+        attn_implementation="flash_attention_2"
     )
+    # 3) Build text2text pipeline
     storyteller = pipeline(
         task="text2text-generation",
         model=model,
         max_new_tokens=120
     )
     load_time = time.time() - t0
+    st.text(f"✅ Story model loaded in {load_time:.1f}s (cached)")
     return caption_client, storyteller
 caption_client, storyteller = load_clients()
     prompt = (
         "You are a creative children's-story author.\n"
         f"Image description: “{caption}”\n\n"
+        "Write a coherent 50–100 word story that:\n"
+        "1. Introduces the main character.\n"
+        "2. Shows a simple problem or discovery.\n"
+        "3. Has a happy resolution.\n"
+        "4. Uses clear language for ages 3–8.\n"
+        "5. Keeps each sentence under 20 words.\n"
     )
     t0 = time.time()
+    result = storyteller(prompt)
     gen_time = time.time() - t0
     st.text(f"⏱ Generated in {gen_time:.1f}s on GPU/CPU")
+    story = result[0]["generated_text"].strip()
     # Enforce ≤100 words
     words = story.split()
     if len(words) > 100:
             st.warning(f"⚠️ TTS failed: {e}")
 # Footer
+st.markdown("---\n*Made with ❤️ by your friendly story wizard*")