Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on Apr 30

Commit

6523fb1

verified ·

1 Parent(s): f913ab4

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -109

app.py CHANGED Viewed

@@ -13,66 +13,58 @@ import torch
 import tempfile
 from PIL import Image
 from gtts import gTTS
-from transformers import pipeline, AutoTokenizer
-# --- Constants & Setup ---
 st.title("📖✨ Turn Images into Children's Stories")
-# --- Enhanced Cleaning Functions ---
 def clean_story_text(raw_text):
-    """Multi-stage cleaning pipeline for generated stories"""
-    # Remove chat template artifacts
-    clean = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', raw_text, flags=re.DOTALL)
-    # Remove thinking chain patterns
-    clean = re.sub(
-        r'(Okay, I need|Let me start|First,|Maybe|I should|How to)(.*?)(?=\n\w|\Z)',
-        '',
-        clean,
-        flags=re.DOTALL|re.IGNORECASE
-    )
-    # Remove special tokens and markdown
-    clean = re.sub(r'<\|.*?\|>|\[.*?\]|\*\*', '', clean)
-    # Split and clean paragraphs
-    paragraphs = [p.strip() for p in clean.split('\n') if p.strip()]
-    return '\n\n'.join(paragraphs[:3])  # Keep max 3 paragraphs
-# --- Optimized Model Loading ---
-@st.cache_resource(show_spinner=False)
-def load_models():
-    # Image captioning
-    captioner = pipeline(
-        "image-to-text",
-        model="Salesforce/blip-image-captioning-base",
-        device=0 if torch.cuda.is_available() else -1
-    )
-    # Story generator with Qwen-specific config
-    tokenizer = AutoTokenizer.from_pretrained(
-        "Qwen/Qwen3-0.6B",
-        trust_remote_code=True,
-        pad_token='<|endoftext|>'
-    )
-    story_pipe = pipeline(
-        "text-generation",
-        model="Qwen/Qwen3-0.6B",
-        tokenizer=tokenizer,
-        device_map="auto",
-        torch_dtype=torch.float16,
-        max_new_tokens=300,  # Increased for better story flow
-        temperature=0.7,      # Lower temperature for more focused output
-        top_p=0.9,
-        repetition_penalty=1.2,
-        do_sample=True,
-        eos_token_id=tokenizer.eos_token_id
-    )
-    return captioner, story_pipe
-# --- Main Application Flow ---
 uploaded_image = st.file_uploader(
     "Upload a children's book style image:",
     type=["jpg", "jpeg", "png"]
@@ -80,76 +72,50 @@ uploaded_image = st.file_uploader(
 if uploaded_image:
     image = Image.open(uploaded_image).convert("RGB")
-    st.image(image, use_column_width=True)
-    # Generate caption
     with st.spinner("🔍 Analyzing image..."):
         try:
             caption_result = caption_pipe(image)
-            image_caption = caption_result[0].get("generated_text", "").strip()
         except Exception as e:
             st.error(f"❌ Image analysis failed: {str(e)}")
             st.stop()
-    if not image_caption:
-        st.error("❌ Couldn't understand this image. Please try another!")
-        st.stop()
-    st.success(f"**Image Understanding:** {image_caption}")
-    # Enhanced prompt engineering
-    story_prompt = f"""<|im_start|>system
-You are a children's story writer. Create a SHORT STORY based on this image description: "{image_caption}"
-RULES:
-1. Use simple language (Grade 2 level)
-2. Include a magical element
-3. Add a moral lesson about kindness
-4. NO internal thoughts/explanations
-5. 3 paragraphs maximum<|im_end|>
-<|im_start|>user
-Write the story<|im_end|>
-<|im_start|>assistant
-"""
-    # Generate story
     try:
         with st.spinner("📝 Crafting magical story..."):
-            start_time = time.time()
             story_result = story_pipe(
                 story_prompt,
-                num_return_sequences=1,
-                stopping_criteria=[lambda _: False]  # Disable default stopping
             )
-            # Enhanced post-processing
             raw_story = story_result[0]['generated_text']
-            clean_story = clean_story_text(raw_story.split("<|im_start|>assistant")[-1])
-            # Format paragraphs
-            formatted_story = "\n\n".join(
-                [f"<p style='font-size:18px; line-height:1.6'>{p}</p>"
-                 for p in clean_story.split("\n\n")]
-            )
-    except Exception as e:
-        st.error(f"❌ Story generation failed: {str(e)}")
-        st.stop()
-    # Display story
-    st.subheader("✨ Your Magical Story")
-    st.markdown(formatted_story, unsafe_allow_html=True)
-    # Audio conversion
-    with st.spinner("🔊 Creating audio version..."):
-        try:
-            audio = gTTS(text=clean_story, lang="en", slow=False)
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                audio.save(tmp_file.name)
-                st.audio(tmp_file.name, format="audio/mp3")
-        except Exception as e:
-            st.error(f"❌ Audio conversion failed: {str(e)}")
 # Footer
 st.markdown("---")

 import tempfile
 from PIL import Image
 from gtts import gTTS
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+# --- Initialize Models First ---
+@st.cache_resource(show_spinner=False)
+def load_models():
+    """Load and return both models at startup"""
+    try:
+        # 1. Image Captioning Model
+        caption_pipe = pipeline(
+            "image-to-text",
+            model="Salesforce/blip-image-captioning-base",
+            device=0 if torch.cuda.is_available() else -1
+        )
+        # 2. Story Generation Model
+        story_tokenizer = AutoTokenizer.from_pretrained(
+            "Qwen/Qwen3-0.6B",
+            trust_remote_code=True
+        )
+        story_model = AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen3-0.6B",
+            device_map="auto",
+            torch_dtype=torch.float16
+        )
+        story_pipe = pipeline(
+            "text-generation",
+            model=story_model,
+            tokenizer=story_tokenizer,
+            max_new_tokens=300,
+            temperature=0.7
+        )
+        return caption_pipe, story_pipe
+    except Exception as e:
+        st.error(f"🚨 Model loading failed: {str(e)}")
+        st.stop()
+# Initialize models immediately when app starts
+caption_pipe, story_pipe = load_models()
+# --- Rest of Application ---
 st.title("📖✨ Turn Images into Children's Stories")
 def clean_story_text(raw_text):
+    """Improved cleaning function"""
+    clean = re.sub(r'<\|.*?\|>', '', raw_text)  # Remove special tokens
+    clean = re.sub(r'Okay, I need.*?(?=\n|$)', '', clean, flags=re.DOTALL)  # Remove thinking chains
+    return clean.strip()
 uploaded_image = st.file_uploader(
     "Upload a children's book style image:",
     type=["jpg", "jpeg", "png"]
 if uploaded_image:
     image = Image.open(uploaded_image).convert("RGB")
+    # Updated parameter here ↓
+    st.image(image, use_container_width=True)  # Changed use_column_width to use_container_width
     with st.spinner("🔍 Analyzing image..."):
         try:
             caption_result = caption_pipe(image)
+            image_caption = caption_result[0].get("generated_text", "")
+            st.success(f"**Image Understanding:** {image_caption}")
         except Exception as e:
             st.error(f"❌ Image analysis failed: {str(e)}")
             st.stop()
+    # Story generation prompt
+    story_prompt = f"""Write a children's story about: {image_caption}
+Rules:
+- Use simple words (Grade 2 level)
+- Exclude thinking processes
+- 3 paragraphs maximum
+Story:"""
     try:
         with st.spinner("📝 Crafting magical story..."):
             story_result = story_pipe(
                 story_prompt,
+                do_sample=True,
+                top_p=0.9,
+                repetition_penalty=1.2
             )
             raw_story = story_result[0]['generated_text']
+            final_story = clean_story_text(raw_story.split("Story:")[-1])
+            st.subheader("✨ Your Magical Story")
+            st.write(final_story)
+            # Audio conversion
+            with st.spinner("🔊 Creating audio version..."):
+                audio = gTTS(text=final_story, lang="en", slow=False)
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+                    audio.save(tmp_file.name)
+                    st.audio(tmp_file.name, format="audio/mp3")
+    except Exception as e:
+        st.error(f"❌ Story generation failed: {str(e)}")
 # Footer
 st.markdown("---")