Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on 12 days ago

Commit

fd1d947

verified ·

1 Parent(s): 2c0fb69

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -56

app.py CHANGED Viewed

@@ -3,102 +3,89 @@ import time
 import streamlit as st
 from PIL import Image
 from io import BytesIO
-from huggingface_hub import InferenceApi, login
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-import torch
 from gtts import gTTS
 import tempfile
 # —––––––– Requirements —–––––––
-# Install transformers with remote code support:
-# pip install git+https://github.com/huggingface/transformers.git
-# plus:
-# pip install streamlit torch accelerate huggingface_hub sentencepiece pillow gTTS
 # —––––––– Page Config —–––––––
-st.set_page_config(page_title="Magic Story Generator (Qwen2.5)", layout="centered")
-st.title("📖✨ Turn Images into Children's Stories (Qwen2.5-Omni-7B)")
 # —––––––– Load Clients & Pipelines (cached) —–––––––
 @st.cache_resource(show_spinner=False)
 def load_clients():
-    hf_token = st.secrets["HF_TOKEN"]
-    # Authenticate for Hugging Face Hub
-    os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
-    login(hf_token)
-    # 1) BLIP captioning
-    caption_client = InferenceApi(
-        repo_id="Salesforce/blip-image-captioning-base",
-        token=hf_token
     )
-    # 2) Load Qwen2.5-Omni causal LM
-    t0 = time.time()
-    tokenizer = AutoTokenizer.from_pretrained(
-        "Qwen/Qwen2.5-Omni-7B",
-        trust_remote_code=True
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        "Qwen/Qwen2.5-Omni-7B",
-        trust_remote_code=True,
-        device_map="auto",
-        torch_dtype=torch.bfloat16,
-        attn_implementation="flash_attention_2"
-    )
-    # 3) Text-generation pipeline
     storyteller = pipeline(
         task="text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        device_map="auto",
-        temperature=0.7,
         top_p=0.9,
-        repetition_penalty=1.2,
-        no_repeat_ngram_size=3,
         max_new_tokens=120,
         return_full_text=False
     )
-    load_time = time.time() - t0
-    st.text(f"✅ Story model loaded in {load_time:.1f}s (cached)")
-    return caption_client, storyteller
-caption_client, storyteller = load_clients()
 # —––––––– Helpers —–––––––
 def generate_caption(img: Image.Image) -> str:
-    buf = BytesIO()
-    img.save(buf, format="JPEG")
-    resp = caption_client(data=buf.getvalue())
-    if isinstance(resp, list) and resp:
-        return resp[0].get("generated_text", "").strip()
     return ""
 def generate_story(caption: str) -> str:
     prompt = (
-        "You are a creative children's-story author.\n"
-        f"Image description: “{caption}”\n\n"
-        "Write a coherent 50–100 word story."
     )
     t0 = time.time()
-    outputs = storyteller(prompt)
     gen_time = time.time() - t0
-    st.text(f"⏱ Generated in {gen_time:.1f}s on GPU/CPU")
-    story = outputs[0]["generated_text"].strip()
-    # Enforce ≤100 words
     words = story.split()
     if len(words) > 100:
         story = " ".join(words[:100]) + ('.' if not story.endswith('.') else '')
     return story
 # —––––––– Main App —–––––––
-uploaded = st.file_uploader("Upload an image:", type=["jpg","png","jpeg"])
 if uploaded:
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
-        img.thumbnail((2048,2048))
     st.image(img, use_container_width=True)
     with st.spinner("🔍 Generating caption..."):

 import streamlit as st
 from PIL import Image
 from io import BytesIO
+from transformers import pipeline
+from huggingface_hub import login
 from gtts import gTTS
 import tempfile
 # —––––––– Requirements —–––––––
+# pip install streamlit pillow gTTS transformers huggingface_hub
 # —––––––– Page Config —–––––––
+st.set_page_config(page_title="Magic Story Generator (Local Pipeline)", layout="centered")
+st.title("📖✨ Turn Images into Children's Stories")
 # —––––––– Load Clients & Pipelines (cached) —–––––––
 @st.cache_resource(show_spinner=False)
 def load_clients():
+    # Authenticate to pull private or remote-code models if needed
+    hf_token = st.secrets.get("HF_TOKEN")
+    if hf_token:
+        os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
+        login(hf_token)
+    # 1) Image-captioning pipeline (BLIP)
+    captioner = pipeline(
+        task="image-to-text",
+        model="Salesforce/blip-image-captioning-base",
+        device=-1  # CPU; change to 0 for GPU
     )
+    # 2) Story-generation pipeline (DeepSeek-R1-Distill-Qwen)
     storyteller = pipeline(
         task="text-generation",
+        model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        tokenizer="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+        trust_remote_code=True,
+        device=-1,                # CPU; set 0+ for GPU
+        temperature=0.6,
         top_p=0.9,
+        repetition_penalty=1.1,
+        no_repeat_ngram_size=2,
         max_new_tokens=120,
         return_full_text=False
     )
+    return captioner, storyteller
+captioner, storyteller = load_clients()
 # —––––––– Helpers —–––––––
 def generate_caption(img: Image.Image) -> str:
+    # Use the BLIP pipeline to generate a caption
+    result = captioner(img)
+    if isinstance(result, list) and result:
+        return result[0].get("generated_text", "").strip()
     return ""
 def generate_story(caption: str) -> str:
+    # Build a simple prompt incorporating the caption
     prompt = (
+        f"Image description: {caption}\n"
+        "Write a coherent 50-100 word children's story that flows naturally."
     )
     t0 = time.time()
+    outputs = storyteller(
+        prompt
+    )
     gen_time = time.time() - t0
+    st.text(f"⏱ Generated in {gen_time:.1f}s")
+    story = outputs[0].get("generated_text", "").strip()
+    # Truncate to 100 words
     words = story.split()
     if len(words) > 100:
         story = " ".join(words[:100]) + ('.' if not story.endswith('.') else '')
     return story
 # —––––––– Main App —–––––––
+uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
 if uploaded:
     img = Image.open(uploaded).convert("RGB")
     if max(img.size) > 2048:
+        img.thumbnail((2048, 2048))
     st.image(img, use_container_width=True)
     with st.spinner("🔍 Generating caption..."):