Spaces:

MonkeyDLLLLLLuffy
/

classagm

Sleeping

App Files Files Community

Leo Liu commited on Mar 9

Commit

a0338c0

verified ·

1 Parent(s): 932fca0

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -132

app.py CHANGED Viewed

@@ -1,104 +1,84 @@
 # import part
 import streamlit as st
-import torch
 from transformers import pipeline
-import math
-import time  # 新增time模块
-from PIL import Image
 import io
-# function part
-def split_story_with_delay(story_text, total_duration, num_chunks=5):
-    """将故事分割为带时间戳的段落（优化版）"""
-    # 按句号分割更符合自然段落
-    sentences = [s.strip() for s in story_text.split('. ') if s]
-    if not sentences:
-        return [(story_text, total_duration)]
-    # 动态计算分段数量（每段最多2句话）
-    chunk_size = max(1, min(2, len(sentences)//num_chunks))
-    chunks = []
-    current_chunk = []
-    for sent in sentences:
-        current_chunk.append(sent)
-        if len(current_chunk) >= chunk_size:
-            chunks.append('. '.join(current_chunk) + '.')
-            current_chunk = []
-    if current_chunk:
-        chunks.append('. '.join(current_chunk) + '.')
-    # 计算每段持续时间
-    chunk_duration = total_duration / len(chunks)
-    return list(zip(chunks, [chunk_duration]*len(chunks)))
-def img2text(uploaded_file):
-    """处理上传文件对象"""
-    try:
-        # 将上传文件转换为PIL Image
-        image = Image.open(io.BytesIO(uploaded_file.getvalue()))
-        # 显示调试信息（可选）
-        st.write(f"✅ 成功读取图片 | 格式: {image.format} | 尺寸: {image.size}")
-        # 创建图片转文本模型
-        image_to_text_model = pipeline(
-            "image-to-text",
-            model="Salesforce/blip-image-captioning-base",
-            device=0 if torch.cuda.is_available() else -1  # 添加GPU支持
-        )
-        # 直接传入PIL Image
-        result = image_to_text_model(image)
-        return result[0]["generated_text"]
-    except Exception as e:
-        st.error(f"❌ 图片处理失败: {str(e)}")
-        st.stop()
 def text2story(text):
-    # 优化prompt模板
-    prompt = f"""Create a magical children's story (for ages 3-8) based on: {text}
-Story requirements:
-🐰 Animal/Fantasy characters
-🎁 Simple moral lesson
-🌈 Vivid descriptions
-✨ 80-120 words
-🌼 Use dialog between characters
-Magical story begins:"""
-    # 添加模型加载进度
-    with st.spinner("📖 Brewing story magic..."):
-        pipe = pipeline(
-            "text-generation",
-            model="pranavpsv/genne-story-generator-v2",
-            max_new_tokens=200,
-            temperature=0.8,
-            top_p=0.95,
-            repetition_penalty=1.1
-        )
-        raw_story = pipe(prompt)[0]['generated_text']
-    # 优化故事提取逻辑
-    story = raw_story.split("Magical story begins:")[-1].strip()
-    return story[:500]  # 确保长度限制
 def text2audio(story_text):
-    # 添加音频生成进度
-    with st.spinner("🔊 Mixing audio potion..."):
-        pipe = pipeline("text-to-audio", model="Matthijs/mms-tts-eng")
-        audio_data = pipe(story_text, return_tensors="pt")  # 优化内存使用
-    return {
-        "array": audio_data["audio"][0].numpy(),
-        "sampling_rate": audio_data["sampling_rate"]
-    }
-def main():
-    st.set_page_config(page_title="Magic Storyteller", page_icon="🧚", layout="wide")
-    # 优化UI样式
     st.markdown("""
     <style>
     @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
@@ -126,59 +106,42 @@ def main():
     st.markdown("""
     <div class="header">
         <h1 style='margin:0;'>🪄 Magic Storyteller</h1>
-        <p style='color: white; font-size: 1.2rem;'>Turn your pictures into magical stories!</p>
     </div>
     """, unsafe_allow_html=True)
-    uploaded_file = st.file_uploader("🌈 Choose your magic picture...", type=["jpg", "png"])
-    if uploaded_file:
-        with st.expander("✨ Your Magic Picture", expanded=True):
-            st.image(uploaded_file, use_column_width=True)
-        # 流程进度管理
-        with st.status("🔮 Story Creation Progress", expanded=True) as status:
-            # Stage 1
-            st.subheader("Step 1: Decoding Picture Magic")
-            scenario = img2text(uploaded_file)
-            st.success(f"**Discovered Magic:** {scenario}")
-            # Stage 2
-            st.subheader("Step 2: Brewing Story Potion")
-            story = text2story(scenario)
-            st.success("**Magical Story Created!**")
-            # Stage 3
-            st.subheader("Step 3: Mixing Audio Spell")
-            audio_data = text2audio(story)
-            st.success("**Audio Potion Ready!**")
-            status.update(label="🎉 All Magic Complete!", state="complete")
-        # 故事展示区域
-        with st.container():
-            st.subheader("📖 Your Magical Story")
-            st.write(story)
-            # 音频播放与字幕
-            st.subheader("🎧 Story Audio")
-            st.audio(
-                audio_data["array"],
-                sample_rate=audio_data["sampling_rate"]
-            )
-            # 字幕显示（静态版本）
-            st.subheader("📜 Story Subtitles")
-            total_duration = len(audio_data["array"]) / audio_data["sampling_rate"]
-            subtitle_chunks = split_story_with_delay(story, total_duration)
-            for idx, (text, duration) in enumerate(subtitle_chunks, 1):
-                st.markdown(f"""
-                <div class="subtitle">
-                    <span style='color: #FF6B6B; font-size: 1.4rem;'>✨ Part {idx}:</span>
-                    {text}
-                </div>
-                """, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

 # import part
 import streamlit as st
 from transformers import pipeline
+from gtts import gTTS
 import io
+# function part
+# img2text
+def img2text(url):
+    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+    text = image_to_text_model(url)[0]["generated_text"]
+    return text
+# text2story
 def text2story(text):
+    # 儿童故事提示模板
+    child_prompt = f"""Write a children's story for ages 3-10 based on: {text}
+    Requirements:
+    - Use simple words (1st-3rd grade level)
+    - Main character: friendly animal
+    - Story elements: magic, friendship, happy ending
+    - Moral lesson: sharing is caring
+    - Length: 100-120 words
+    Story structure:
+    1. Introduce characters and setting
+    2. A problem occurs
+    3. Magical solution with teamwork
+    4. Happy ending with a lesson
+    """
+    pipe = pipeline(
+        "text-generation",
+        model="pranavpsv/genre-story-generator-v2",
+        max_new_tokens=160,    # 160 tokens ≈ 120 words
+        min_new_tokens=134,    # 134 tokens ≈ 100 words
+        temperature=0.7,       # 平衡创造性与稳定性
+        top_p=0.9,
+        repetition_penalty=1.2,
+        pad_token_id=50256,    # 对齐模型config的bos/eos_token_id
+        num_return_sequences=1
+    )
+    # 生成故事
+    raw_story = pipe(child_prompt, return_full_text=False)[0]['generated_text']
+    # 智能后处理
+    def format_story(text):
+        # 移除技术性标记
+        text = text.replace("<|endoftext|>", "").strip()
+        # 寻找自然结尾点
+        end_marks = ['.', '!', '?', '...']
+        last_positions = [text.rfind(mark) for mark in end_marks]
+        valid_positions = [pos for pos in last_positions if pos != -1]
+        cutoff = max(valid_positions) + 1 if valid_positions else len(text)
+        return text[:cutoff]
+    formatted_story = format_story(raw_story)
+    # 字数验证
+    word_count = len(formatted_story.split())
+    if word_count < 100:
+        formatted_story += " And they lived happily ever after."
+    return formatted_story
+# text2audio
 def text2audio(story_text):
+    tts = gTTS(text=story_text, lang='en')
+    audio_bytes = io.BytesIO()
+    tts.write_to_fp(audio_bytes)
+    audio_bytes.seek(0)
+    return audio_bytes
+def main():
+    # Optimize title area to attract children's attention
+    st.set_page_config(page_title="Magic Storyteller", page_icon="🧚")
     st.markdown("""
     <style>
     @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
     st.markdown("""
     <div class="header">
         <h1 style='margin:0;'>🪄 Magic Storyteller</h1>
+        <p style='color: white; font-size: 1.2rem;'>Turn your pictures into stories!</p>
     </div>
     """, unsafe_allow_html=True)
+    uploaded_file = st.file_uploader("👉🏻 Upload your magic picture here...", type=["jpg", "png"])
+    if uploaded_file is not None:
+        bytes_data = uploaded_file.getvalue()
+        with open(uploaded_file.name, "wb") as file:
+            file.write(bytes_data)
+        st.image(uploaded_file, caption="Your Magic Picture ✨", use_container_width=True)
+        status_container = st.empty()
+        progress_bar = st.progress(0)
+        # Stage 1: Image to Text
+        with status_container.status("🔮 **Step 1/3**: Decoding picture magic...", expanded=True) as status:
+            progress_bar.progress(33)
+            scenario = img2text(uploaded_file.name)
+            status.update(label="✅ Picture decoded!", state="complete")
+            st.write(f"**What I see:** {scenario}")
+        # Stage 2: Text to Story
+        with status_container.status("📚 **Step 2/3**: Writing your fairy tale...", expanded=True) as status:
+            progress_bar.progress(66)
+            story = text2story(scenario)
+            status.update(label="✅ Story created!", state="complete")
+            st.write(f"**Your Story:**\n{story}")
+        # Stage 3: Story to Audio data
+        with status_container.status("🎵 **Step 3/3**: Adding magic audio...", expanded=True) as status:
+            progress_bar.progress(100)
+            audio_data = text2audio(story)
+            status.update(label="✅ Start playing the story!", state="complete")
+            st.audio(audio_data,
+                    format="audio/mp3",
+                    autoplay=True)
 if __name__ == "__main__":
     main()