Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 30

Commit

982555a

verified ·

1 Parent(s): 2abb776

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -72

app.py CHANGED Viewed

@@ -1,115 +1,122 @@
-import os
-import time
 import streamlit as st
 from PIL import Image
-from transformers import pipeline
 from gtts import gTTS
-import tempfile
-# --- Requirements ---
-# Update requirements.txt to include:
-"""
-streamlit>=1.20
-pillow>=9.0
-torch>=2.0.0
-transformers>=4.40
-sentencepiece>=0.2.0
-gTTS>=2.3.1
-accelerate>=0.30
-"""
-# --- Page Setup ---
-st.set_page_config(page_title="Magic Story Generator", layout="centered")
 st.title("📖✨ Turn Images into Children's Stories")
-# --- Load Pipelines (cached) ---
 @st.cache_resource(show_spinner=False)
-def load_pipelines():
-    # 1) Image-captioning pipeline (BLIP)
     captioner = pipeline(
-        task="image-to-text",
         model="Salesforce/blip-image-captioning-base",
-        device=-1
     )
-    # 2) Modified story-generation pipeline using Qwen3-1.7B
     storyteller = pipeline(
-        task="text-generation",
         model="Qwen/Qwen3-1.7B",
         device_map="auto",
         trust_remote_code=True,
         torch_dtype="auto",
         max_new_tokens=150,
         temperature=0.7,
-        top_p=0.9,
-        repetition_penalty=1.2,
-        eos_token_id=151645  # Specific to Qwen3 tokenizer
     )
     return captioner, storyteller
-captioner, storyteller = load_pipelines()
-# --- Main App ---
-uploaded = st.file_uploader("Upload an image:", type=["jpg", "png", "jpeg"])
-if uploaded:
-    # Load and display the image
-    img = Image.open(uploaded).convert("RGB")
-    st.image(img, use_container_width=True)
     # Generate caption
-    with st.spinner("🔍 Generating caption..."):
-        cap = captioner(img)
-        caption = cap[0].get("generated_text", "").strip() if isinstance(cap, list) else ""
-    if not caption:
-        st.error("😢 Couldn't understand this image. Try another one!")
         st.stop()
-    st.success(f"**Caption:** {caption}")
-    # Build prompt and generate story
-    prompt = (
         f"<|im_start|>system\n"
-        f"You are a children's story writer. Create a 50-100 word story based on this image description: {caption}\n"
-        f"<|im_end|>\n"
         f"<|im_start|>user\n"
-        f"Write a coherent, child-friendly story that flows naturally with simple vocabulary.<|im_end|>\n"
         f"<|im_start|>assistant\n"
     )
-    with st.spinner("📝 Writing story..."):
-        start = time.time()
-        out = storyteller(
-            prompt,
             do_sample=True,
             num_return_sequences=1
         )
-        gen_time = time.time() - start
-        st.text(f"⏱ Generated in {gen_time:.1f}s")
     # Process output
-    story = out[0]['generated_text'].split("<|im_start|>assistant\n")[-1]
-    story = story.replace("<|im_end|>", "").strip()
-    # Enforce ≤100 words and proper ending
-    words = story.split()
-    if len(words) > 100:
-        story = " ".join(words[:100])
-    if not story.endswith(('.', '!', '?')):
-        story += '.'
     # Display story
-    st.subheader("📚 Your Magical Story")
-    st.write(story)
-    # Convert to audio
-    with st.spinner("🔊 Converting to audio..."):
         try:
-            tts = gTTS(text=story, lang="en", slow=False)
-            tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
-            tts.save(tmp.name)
-            st.audio(tmp.name, format="audio/mp3")
         except Exception as e:
-            st.warning(f"⚠️ TTS failed: {e}")
 # Footer
-st.markdown("---\nMade with ❤️ by your friendly story wizard")

+# Must be FIRST import and FIRST Streamlit command
 import streamlit as st
+st.set_page_config(
+    page_title="Magic Story Generator",
+    layout="centered",
+    page_icon="📖"
+)
+# Other imports AFTER Streamlit config
+import time
+import tempfile
 from PIL import Image
 from gtts import gTTS
+from transformers import pipeline
+# --- Constants & Setup ---
 st.title("📖✨ Turn Images into Children's Stories")
+# --- Model Loading (Cached) ---
 @st.cache_resource(show_spinner=False)
+def load_models():
+    # Image captioning model
     captioner = pipeline(
+        "image-to-text",
         model="Salesforce/blip-image-captioning-base",
+        device=-1  # Use -1 for CPU, 0 for GPU
     )
+    # Story generation model (Qwen3-1.7B)
     storyteller = pipeline(
+        "text-generation",
         model="Qwen/Qwen3-1.7B",
         device_map="auto",
         trust_remote_code=True,
         torch_dtype="auto",
         max_new_tokens=150,
         temperature=0.7,
+        top_p=0.85,
+        repetition_penalty=1.15,
+        eos_token_id=151645  # Qwen3's specific EOS token
     )
     return captioner, storyteller
+caption_pipe, story_pipe = load_models()
+# --- Main Application Flow ---
+uploaded_image = st.file_uploader(
+    "Upload a children's book style image:",
+    type=["jpg", "jpeg", "png"]
+)
+if uploaded_image:
+    # Process image
+    image = Image.open(uploaded_image).convert("RGB")
+    st.image(image, use_container_width=True)
     # Generate caption
+    with st.spinner("🔍 Analyzing image..."):
+        caption_result = caption_pipe(image)
+        image_caption = caption_result[0].get("generated_text", "").strip()
+    if not image_caption:
+        st.error("❌ Couldn't understand this image. Please try another!")
         st.stop()
+    st.success(f"**Image Understanding:** {image_caption}")
+    # Create story prompt
+    story_prompt = (
         f"<|im_start|>system\n"
+        f"You are a children's book author. Create a 50-100 word story based on this image description: {image_caption}\n"
+        "Use simple language, friendly characters, and a positive lesson.<|im_end|>\n"
         f"<|im_start|>user\n"
+        f"Write a short, child-friendly story with a clear beginning, middle, and end.<|im_end|>\n"
         f"<|im_start|>assistant\n"
     )
+    # Generate story
+    with st.spinner("📝 Crafting magical story..."):
+        start_time = time.time()
+        story_result = story_pipe(
+            story_prompt,
             do_sample=True,
             num_return_sequences=1
         )
+        generation_time = time.time() - start_time
+        st.text(f"⏱ Generation time: {generation_time:.1f}s")
     # Process output
+    raw_story = story_result[0]['generated_text']
+    clean_story = raw_story.split("<|im_start|>assistant\n")[-1]
+    clean_story = clean_story.replace("<|im_end|>", "").strip()
+    # Ensure proper story formatting
+    final_story = []
+    for sentence in clean_story.split(". "):
+        if not sentence: continue
+        if not sentence.endswith('.'):
+            sentence += '.'
+        final_story.append(sentence[0].upper() + sentence[1:])
+    final_story = " ".join(final_story).replace("..", ".")[:600]  # Character limit safeguard
     # Display story
+    st.subheader("✨ Your Magical Story")
+    st.write(final_story)
+    # Audio conversion
+    with st.spinner("🔊 Creating audio version..."):
         try:
+            audio = gTTS(text=final_story, lang="en", slow=False)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+                audio.save(tmp_file.name)
+                st.audio(tmp_file.name, format="audio/mp3")
         except Exception as e:
+            st.error(f"❌ Audio conversion failed: {str(e)}")
 # Footer
+st.markdown("---")
+st.markdown("📚 Made with ♥ by The Story Wizard • [Report Issues](https://example.com)")