Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 29

Commit

2c0fb69

verified ·

1 Parent(s): 748a576

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -18

app.py CHANGED Viewed

@@ -4,17 +4,16 @@ import streamlit as st
 from PIL import Image
 from io import BytesIO
 from huggingface_hub import InferenceApi, login
-from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from gtts import gTTS
 import tempfile
 # —––––––– Requirements —–––––––
-# This app uses a Hugging Face Transformers version that supports
-# the Qwen2.5-Omni architecture via `trust_remote_code`.
-# Install using:
 # pip install git+https://github.com/huggingface/transformers.git
-# and the rest of the requirements listed at the end.
 # —––––––– Page Config —–––––––
 st.set_page_config(page_title="Magic Story Generator (Qwen2.5)", layout="centered")
@@ -28,27 +27,28 @@ def load_clients():
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
     login(hf_token)
-    # 1) BLIP captioning via HTTP API
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         token=hf_token
     )
-    # 2) Load Qwen2.5-Omni model & tokenizer via remote code
     t0 = time.time()
     tokenizer = AutoTokenizer.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
         trust_remote_code=True
     )
-    model = AutoModelForSeq2SeqLM.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
         trust_remote_code=True,
         device_map="auto",
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2"
     )
     storyteller = pipeline(
-        task="text2text-generation",
         model=model,
         tokenizer=tokenizer,
         device_map="auto",
@@ -56,7 +56,8 @@ def load_clients():
         top_p=0.9,
         repetition_penalty=1.2,
         no_repeat_ngram_size=3,
-        max_new_tokens=120
     )
     load_time = time.time() - t0
     st.text(f"✅ Story model loaded in {load_time:.1f}s (cached)")
@@ -78,19 +79,15 @@ def generate_story(caption: str) -> str:
     prompt = (
         "You are a creative children's-story author.\n"
         f"Image description: “{caption}”\n\n"
-        "Write a coherent 50–100 word story that:\n"
-        "1. Introduces the main character.\n"
-        "2. Shows a simple problem or discovery.\n"
-        "3. Has a happy resolution.\n"
-        "4. Uses clear language for ages 3–8.\n"
-        "5. Keeps each sentence under 20 words.\n"
     )
     t0 = time.time()
-    result = storyteller(prompt)
     gen_time = time.time() - t0
     st.text(f"⏱ Generated in {gen_time:.1f}s on GPU/CPU")
-    story = result[0]["generated_text"].strip()
     words = story.split()
     if len(words) > 100:
         story = " ".join(words[:100]) + ('.' if not story.endswith('.') else '')
@@ -128,3 +125,4 @@ if uploaded:
 # Footer
 st.markdown("---\n*Made with ❤️ by your friendly story wizard*")

 from PIL import Image
 from io import BytesIO
 from huggingface_hub import InferenceApi, login
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import torch
 from gtts import gTTS
 import tempfile
 # —––––––– Requirements —–––––––
+# Install transformers with remote code support:
 # pip install git+https://github.com/huggingface/transformers.git
+# plus:
+# pip install streamlit torch accelerate huggingface_hub sentencepiece pillow gTTS
 # —––––––– Page Config —–––––––
 st.set_page_config(page_title="Magic Story Generator (Qwen2.5)", layout="centered")
     os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
     login(hf_token)
+    # 1) BLIP captioning
     caption_client = InferenceApi(
         repo_id="Salesforce/blip-image-captioning-base",
         token=hf_token
     )
+    # 2) Load Qwen2.5-Omni causal LM
     t0 = time.time()
     tokenizer = AutoTokenizer.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
         trust_remote_code=True
     )
+    model = AutoModelForCausalLM.from_pretrained(
         "Qwen/Qwen2.5-Omni-7B",
         trust_remote_code=True,
         device_map="auto",
         torch_dtype=torch.bfloat16,
         attn_implementation="flash_attention_2"
     )
+    # 3) Text-generation pipeline
     storyteller = pipeline(
+        task="text-generation",
         model=model,
         tokenizer=tokenizer,
         device_map="auto",
         top_p=0.9,
         repetition_penalty=1.2,
         no_repeat_ngram_size=3,
+        max_new_tokens=120,
+        return_full_text=False
     )
     load_time = time.time() - t0
     st.text(f"✅ Story model loaded in {load_time:.1f}s (cached)")
     prompt = (
         "You are a creative children's-story author.\n"
         f"Image description: “{caption}”\n\n"
+        "Write a coherent 50–100 word story."
     )
     t0 = time.time()
+    outputs = storyteller(prompt)
     gen_time = time.time() - t0
     st.text(f"⏱ Generated in {gen_time:.1f}s on GPU/CPU")
+    story = outputs[0]["generated_text"].strip()
+    # Enforce ≤100 words
     words = story.split()
     if len(words) > 100:
         story = " ".join(words[:100]) + ('.' if not story.endswith('.') else '')
 # Footer
 st.markdown("---\n*Made with ❤️ by your friendly story wizard*")