Spaces:

MonkeyDLLLLLLuffy
/

classagm

Sleeping

App Files Files Community

Leo Liu commited on Mar 7

Commit

3ae2d5d

verified ·

1 Parent(s): 59e88fb

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -104

app.py CHANGED Viewed

@@ -2,148 +2,163 @@
 import streamlit as st
 from transformers import pipeline
 import math
 # function part
-# 时间戳
-def split_story_with_delay(story_text, sampling_rate=16000):
-    """将故事分割为带时间戳的段落"""
-    words = story_text.split()
-    chunk_size = max(1, len(words)//5)  # 按词数均分5段
-    chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
-    # 假设每段持续2秒（根据音频长度动态调整更佳）
-    duration = len(audio_data["audio"]) / sampling_rate
-    chunk_duration = duration / len(chunks)
     return list(zip(chunks, [chunk_duration]*len(chunks)))
-# img2text
 def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text = image_to_text_model(url)[0]["generated_text"]
     return text
-# text2story
 def text2story(text):
-    # 添加儿童故事专用prompt模板
-    prompt = f"""Generate a VERY SHORT fairy tale for children aged 3-10 based on: {text}
-Story must:
-1. Have animal/fairy characters
-2. Teach kindness or courage
-3. Use simple words
-4. Be 50-100 words
-Story:"""
-    pipe = pipeline(
-        "text-generation",
-        model="pranavpsv/genre-story-generator-v2",
-        # 优化生成参数
-        max_new_tokens=150,          # 严格控制输出长度
-        min_new_tokens=50,           # 确保最低字数
-        do_sample=True,
-        temperature=0.7,            # 平衡创意与连贯性
-        top_k=40,                   # 加速生成
-        top_p=0.9,
-        repetition_penalty=1.2,
-        num_return_sequences=1      # 减少计算量
-    )
-    # 生成后处理
-    raw_story = pipe(prompt)[0]['generated_text']
-    # 提取核心故事内容（过滤prompt重复）
-    story = raw_story.split("Story:")[-1].strip()
-    # 精确截断至150字（中文按字符计算）
-    return ' '.join(story.split()[:150]) if len(story) > 150 else story
-# text2audio
 def text2audio(story_text):
-    pipe = pipeline("text-to-audio", model="Matthijs/mms-tts-eng")
-    audio_data = pipe(story_text)
-    return audio_data
 def main():
-    st.set_page_config(page_title="Magic Storyteller", page_icon="🧚")
-    # Optimize title area to attract children's attention
     st.markdown("""
     <style>
     @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
     .header {
-        background-image: url('https://huggingface.co/spaces/Leo0129/classagm/resolve/main/background.jpg');
-        background-size: cover;
         border-radius: 15px;
         padding: 2rem;
         text-align: center;
         box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
     }
-    .header h1 {
-        color: #FF9A6C;
         font-family: 'Comic Neue', cursive;
-        font-size: 2.5rem;
-        text-shadow: 2px 2px #FFF;
-        margin-bottom: 0.5rem !important;
     }
     </style>
-    """, unsafe_allow_html=True)
     st.markdown("""
     <div class="header">
-        <h1>🪄 Magic Storyteller </h1>
     </div>
     """, unsafe_allow_html=True)
-    uploaded_file = st.file_uploader("🌈 Choose your magic picture...", type=["jpg", "png"])
-    if uploaded_file is not None:
-        bytes_data = uploaded_file.getvalue()
-        with open(uploaded_file.name, "wb") as file:
-            file.write(bytes_data)
-        st.image(uploaded_file, caption="Your Magic Picture ✨", use_container_width=True)
-        status_container = st.empty()
-        progress_bar = st.progress(0)
-        # Stage 1: Image to Text
-        with status_container.status("🔮 **Step 1/3**: Decoding picture magic...", expanded=True) as status:  # 保持缩进
-            progress_bar.progress(33)
-            scenario = img2text(uploaded_file.name)
-            status.update(label="✅ Picture decoded!", state="complete")
-            st.write(f"**What I see:** {scenario}")
-        #Stage 2: Text to Story
-        with status_container.status("📚 **Step 2/3**: Writing your fairy tale...", expanded=True) as status:
-            progress_bar.progress(66)
             story = text2story(scenario)
-            status.update(label="✅ Story created!", state="complete")
-            st.write(f"**Your Story:**\n{story}")
-        #Stage 3: Story to Audio data
-        with status_container.status("🎵 **Step 3/3**: Adding magic audio...", expanded=True) as status:
-            progress_bar.progress(100)
-            audio_data = text2audio(story)
-            # 新增字幕处理
-            subtitle_chunks = split_story_with_delay(story, audio_data['sampling_rate'])
-            current_subtitle = st.empty()
-            status.update(label="✅ Start playing the story!", state="complete")
-            # 播放音频并更新字幕
-            with st.audio(...):  # 保持原有参数
-                for text, duration in subtitle_chunks:
-                    current_subtitle.markdown(f"""
-                    <div style="
-                        background: rgba(255,255,255,0.9);
-                        padding: 1rem;
-                        border-radius: 10px;
-                        margin: 1rem 0;
-                        font-size: 1.2rem;
-                        color: #FF6B6B;
-                        text-align: center;
-                        font-family: 'Comic Neue', cursive;
-                    ">{text}</div>
-                    """, unsafe_allow_html=True)
-                    time.sleep(duration)  # 需import time
 if __name__ == "__main__":
     main()

 import streamlit as st
 from transformers import pipeline
 import math
+import time  # 新增time模块
 # function part
+def split_story_with_delay(story_text, total_duration, num_chunks=5):
+    """将故事分割为带时间戳的段落（优化版）"""
+    # 按句号分割更符合自然段落
+    sentences = [s.strip() for s in story_text.split('. ') if s]
+    if not sentences:
+        return [(story_text, total_duration)]
+    # 动态计算分段数量（每段最多2句话）
+    chunk_size = max(1, min(2, len(sentences)//num_chunks))
+    chunks = []
+    current_chunk = []
+    for sent in sentences:
+        current_chunk.append(sent)
+        if len(current_chunk) >= chunk_size:
+            chunks.append('. '.join(current_chunk) + '.')
+            current_chunk = []
+    if current_chunk:
+        chunks.append('. '.join(current_chunk) + '.')
+    # 计算每段持续时间
+    chunk_duration = total_duration / len(chunks)
     return list(zip(chunks, [chunk_duration]*len(chunks)))
 def img2text(url):
+    # 添加进度提示
+    with st.spinner("🖼️ Analyzing the magic picture..."):
+        image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+        text = image_to_text_model(url)[0]["generated_text"]
     return text
 def text2story(text):
+    # 优化prompt模板
+    prompt = f"""Create a magical children's story (for ages 3-8) based on: {text}
+Story requirements:
+🐰 Animal/Fantasy characters
+🎁 Simple moral lesson
+🌈 Vivid descriptions
+✨ 80-120 words
+🌼 Use dialog between characters
+Magical story begins:"""
+    # 添加模型加载进度
+    with st.spinner("📖 Brewing story magic..."):
+        pipe = pipeline(
+            "text-generation",
+            model="pranavpsv/genne-story-generator-v2",
+            max_new_tokens=200,
+            temperature=0.8,
+            top_p=0.95,
+            repetition_penalty=1.1
+        )
+        raw_story = pipe(prompt)[0]['generated_text']
+    # 优化故事提取逻辑
+    story = raw_story.split("Magical story begins:")[-1].strip()
+    return story[:500]  # 确保长度限制
 def text2audio(story_text):
+    # 添加音频生成进度
+    with st.spinner("🔊 Mixing audio potion..."):
+        pipe = pipeline("text-to-audio", model="Matthijs/mms-tts-eng")
+        audio_data = pipe(story_text, return_tensors="pt")  # 优化内存使用
+    return {
+        "array": audio_data["audio"][0].numpy(),
+        "sampling_rate": audio_data["sampling_rate"]
+    }
 def main():
+    st.set_page_config(page_title="Magic Storyteller", page_icon="🧚", layout="wide")
+    # 优化UI样式
     st.markdown("""
     <style>
     @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
     .header {
+        background: linear-gradient(45deg, #FF9A6C, #FF6B6B);
         border-radius: 15px;
         padding: 2rem;
         text-align: center;
         box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        margin-bottom: 2rem;
     }
+    .subtitle {
         font-family: 'Comic Neue', cursive;
+        color: #4B4B4B;
+        font-size: 1.2rem;
+        margin: 1rem 0;
+        padding: 1rem;
+        background: rgba(255,255,255,0.9);
+        border-radius: 10px;
+        border-left: 5px solid #FF6B6B;
     }
     </style>
+    """, unsafe_allow_html=True)
     st.markdown("""
     <div class="header">
+        <h1 style='margin:0;'>🪄 Magic Storyteller</h1>
+        <p style='color: white; font-size: 1.2rem;'>Turn your pictures into magical stories!</p>
     </div>
     """, unsafe_allow_html=True)
+    uploaded_file = st.file_uploader("🌈 Choose your magic picture...", type=["jpg", "png"])
+    if uploaded_file:
+        with st.expander("✨ Your Magic Picture", expanded=True):
+            st.image(uploaded_file, use_column_width=True)
+        # 流程进度管理
+        with st.status("🔮 Story Creation Progress", expanded=True) as status:
+            # Stage 1
+            st.subheader("Step 1: Decoding Picture Magic")
+            scenario = img2text(uploaded_file)
+            st.success(f"**Discovered Magic:** {scenario}")
+            # Stage 2
+            st.subheader("Step 2: Brewing Story Potion")
             story = text2story(scenario)
+            st.success("**Magical Story Created!**")
+            # Stage 3
+            st.subheader("Step 3: Mixing Audio Spell")
+            audio_data = text2audio(story)
+            st.success("**Audio Potion Ready!**")
+            status.update(label="🎉 All Magic Complete!", state="complete")
+        # 故事展示区域
+        with st.container():
+            st.subheader("📖 Your Magical Story")
+            st.write(story)
+            # 音频播放与字幕
+            st.subheader("🎧 Story Audio")
+            st.audio(
+                audio_data["array"],
+                sample_rate=audio_data["sampling_rate"]
+            )
+            # 字幕显示（静态版本）
+            st.subheader("📜 Story Subtitles")
+            total_duration = len(audio_data["array"]) / audio_data["sampling_rate"]
+            subtitle_chunks = split_story_with_delay(story, total_duration)
+            for idx, (text, duration) in enumerate(subtitle_chunks, 1):
+                st.markdown(f"""
+                <div class="subtitle">
+                    <span style='color: #FF6B6B; font-size: 1.4rem;'>✨ Part {idx}:</span>
+                    {text}
+                </div>
+                """, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()