Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 30

Commit

f913ab4

verified ·

1 Parent(s): d179ebe

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -61

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# FIRST import and FIRST Streamlit command
 import streamlit as st
 st.set_page_config(
     page_title="Magic Story Generator",
@@ -13,38 +13,64 @@ import torch
 import tempfile
 from PIL import Image
 from gtts import gTTS
-from transformers import pipeline
 # --- Constants & Setup ---
 st.title("📖✨ Turn Images into Children's Stories")
-# --- Model Loading (Cached) ---
 @st.cache_resource(show_spinner=False)
 def load_models():
-    # Image captioning model
     captioner = pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
         device=0 if torch.cuda.is_available() else -1
     )
-    # Optimized story generation model
-    storyteller = pipeline(
         "text-generation",
         model="Qwen/Qwen3-0.6B",
         device_map="auto",
         torch_dtype=torch.float16,
-        max_new_tokens=200,
-        temperature=0.9,
-        top_k=50,
         top_p=0.9,
-        repetition_penalty=1.1,
-        eos_token_id=151645
     )
-    return captioner, storyteller
-caption_pipe, story_pipe = load_models()
 # --- Main Application Flow ---
 uploaded_image = st.file_uploader(
@@ -53,7 +79,6 @@ uploaded_image = st.file_uploader(
 )
 if uploaded_image:
-    # Process image
     image = Image.open(uploaded_image).convert("RGB")
     st.image(image, use_column_width=True)
@@ -72,73 +97,54 @@ if uploaded_image:
     st.success(f"**Image Understanding:** {image_caption}")
-    # Create story prompt
-    story_prompt = (
-        f"<|im_start|>system\n"
-        f"You're a children's author. Create a short story (100-150 words) based on: {image_caption}\n"
-        f"Use simple language and include a moral lesson.<|im_end|>\n"
-        f"<|im_start|>assistant\n"
-    )
-    # Generate story with progress
-    progress_bar = st.progress(0)
-    status_text = st.empty()
     try:
         with st.spinner("📝 Crafting magical story..."):
             start_time = time.time()
-            def update_progress(step):
-                progress = min(step/5, 1.0)  # Simulate progress steps
-                progress_bar.progress(progress)
-                status_text.text(f"Step {int(step)}/5: {'📖'*int(step)}")
-            update_progress(1)
             story_result = story_pipe(
                 story_prompt,
-                do_sample=True,
-                num_return_sequences=1
             )
-            update_progress(4)
-            generation_time = time.time() - start_time
-            st.info(f"Story generated in {generation_time:.1f} seconds")
-            # Process output
             raw_story = story_result[0]['generated_text']
-            clean_story = raw_story.split("<|im_start|>assistant\n")[-1]
-            clean_story = re.sub(r'<\|.*?\|>', '', clean_story).strip()
-            # Format story text
-            sentences = []
-            for sent in re.split(r'(?<=[.!?]) +', clean_story):
-                sent = sent.strip()
-                if sent:
-                    if len(sent) > 1 and not sent.endswith(('.','!','?')):
-                        sent += '.'
-                    sentences.append(sent[0].upper() + sent[1:])
-            final_story = ' '.join(sentences)[:600]  # Limit length
-            update_progress(5)
-            time.sleep(0.5)  # Final progress pause
     except Exception as e:
         st.error(f"❌ Story generation failed: {str(e)}")
         st.stop()
-    finally:
-        progress_bar.empty()
-        status_text.empty()
     # Display story
     st.subheader("✨ Your Magical Story")
-    st.write(final_story)
     # Audio conversion
     with st.spinner("🔊 Creating audio version..."):
         try:
-            audio = gTTS(text=final_story, lang="en", slow=False)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
                 audio.save(tmp_file.name)
                 st.audio(tmp_file.name, format="audio/mp3")

+# Import Streamlit first
 import streamlit as st
 st.set_page_config(
     page_title="Magic Story Generator",
 import tempfile
 from PIL import Image
 from gtts import gTTS
+from transformers import pipeline, AutoTokenizer
 # --- Constants & Setup ---
 st.title("📖✨ Turn Images into Children's Stories")
+# --- Enhanced Cleaning Functions ---
+def clean_story_text(raw_text):
+    """Multi-stage cleaning pipeline for generated stories"""
+    # Remove chat template artifacts
+    clean = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', raw_text, flags=re.DOTALL)
+    # Remove thinking chain patterns
+    clean = re.sub(
+        r'(Okay, I need|Let me start|First,|Maybe|I should|How to)(.*?)(?=\n\w|\Z)',
+        '',
+        clean,
+        flags=re.DOTALL|re.IGNORECASE
+    )
+    # Remove special tokens and markdown
+    clean = re.sub(r'<\|.*?\|>|\[.*?\]|\*\*', '', clean)
+    # Split and clean paragraphs
+    paragraphs = [p.strip() for p in clean.split('\n') if p.strip()]
+    return '\n\n'.join(paragraphs[:3])  # Keep max 3 paragraphs
+# --- Optimized Model Loading ---
 @st.cache_resource(show_spinner=False)
 def load_models():
+    # Image captioning
     captioner = pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
         device=0 if torch.cuda.is_available() else -1
     )
+    # Story generator with Qwen-specific config
+    tokenizer = AutoTokenizer.from_pretrained(
+        "Qwen/Qwen3-0.6B",
+        trust_remote_code=True,
+        pad_token='<|endoftext|>'
+    )
+    story_pipe = pipeline(
         "text-generation",
         model="Qwen/Qwen3-0.6B",
+        tokenizer=tokenizer,
         device_map="auto",
         torch_dtype=torch.float16,
+        max_new_tokens=300,  # Increased for better story flow
+        temperature=0.7,      # Lower temperature for more focused output
         top_p=0.9,
+        repetition_penalty=1.2,
+        do_sample=True,
+        eos_token_id=tokenizer.eos_token_id
     )
+    return captioner, story_pipe
 # --- Main Application Flow ---
 uploaded_image = st.file_uploader(
 )
 if uploaded_image:
     image = Image.open(uploaded_image).convert("RGB")
     st.image(image, use_column_width=True)
     st.success(f"**Image Understanding:** {image_caption}")
+    # Enhanced prompt engineering
+    story_prompt = f"""<|im_start|>system
+You are a children's story writer. Create a SHORT STORY based on this image description: "{image_caption}"
+RULES:
+1. Use simple language (Grade 2 level)
+2. Include a magical element
+3. Add a moral lesson about kindness
+4. NO internal thoughts/explanations
+5. 3 paragraphs maximum<|im_end|>
+<|im_start|>user
+Write the story<|im_end|>
+<|im_start|>assistant
+"""
+    # Generate story
     try:
         with st.spinner("📝 Crafting magical story..."):
             start_time = time.time()
             story_result = story_pipe(
                 story_prompt,
+                num_return_sequences=1,
+                stopping_criteria=[lambda _: False]  # Disable default stopping
             )
+            # Enhanced post-processing
             raw_story = story_result[0]['generated_text']
+            clean_story = clean_story_text(raw_story.split("<|im_start|>assistant")[-1])
+            # Format paragraphs
+            formatted_story = "\n\n".join(
+                [f"<p style='font-size:18px; line-height:1.6'>{p}</p>"
+                 for p in clean_story.split("\n\n")]
+            )
     except Exception as e:
         st.error(f"❌ Story generation failed: {str(e)}")
         st.stop()
     # Display story
     st.subheader("✨ Your Magical Story")
+    st.markdown(formatted_story, unsafe_allow_html=True)
     # Audio conversion
     with st.spinner("🔊 Creating audio version..."):
         try:
+            audio = gTTS(text=clean_story, lang="en", slow=False)
             with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
                 audio.save(tmp_file.name)
                 st.audio(tmp_file.name, format="audio/mp3")