Leo Liu commited on
Commit
a0338c0
·
verified ·
1 Parent(s): 932fca0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -132
app.py CHANGED
@@ -1,104 +1,84 @@
1
  # import part
2
  import streamlit as st
3
- import torch
4
  from transformers import pipeline
5
- import math
6
- import time # 新增time模块
7
- from PIL import Image
8
  import io
9
 
10
- # function part
11
- def split_story_with_delay(story_text, total_duration, num_chunks=5):
12
- """将故事分割为带时间戳的段落(优化版)"""
13
- # 按句号分割更符合自然段落
14
- sentences = [s.strip() for s in story_text.split('. ') if s]
15
- if not sentences:
16
- return [(story_text, total_duration)]
17
-
18
- # 动态计算分段数量(每段最多2句话)
19
- chunk_size = max(1, min(2, len(sentences)//num_chunks))
20
- chunks = []
21
- current_chunk = []
22
-
23
- for sent in sentences:
24
- current_chunk.append(sent)
25
- if len(current_chunk) >= chunk_size:
26
- chunks.append('. '.join(current_chunk) + '.')
27
- current_chunk = []
28
-
29
- if current_chunk:
30
- chunks.append('. '.join(current_chunk) + '.')
31
 
32
- # 计算每段持续时间
33
- chunk_duration = total_duration / len(chunks)
34
- return list(zip(chunks, [chunk_duration]*len(chunks)))
 
 
 
35
 
36
- def img2text(uploaded_file):
37
- """处理上传文件对象"""
38
- try:
39
- # 将上传文件转换为PIL Image
40
- image = Image.open(io.BytesIO(uploaded_file.getvalue()))
41
-
42
- # 显示调试信息(可选)
43
- st.write(f"✅ 成功读取图片 | 格式: {image.format} | 尺寸: {image.size}")
44
-
45
- # 创建图片转文本模型
46
- image_to_text_model = pipeline(
47
- "image-to-text",
48
- model="Salesforce/blip-image-captioning-base",
49
- device=0 if torch.cuda.is_available() else -1 # 添加GPU支持
50
- )
51
-
52
- # 直接传入PIL Image
53
- result = image_to_text_model(image)
54
- return result[0]["generated_text"]
55
-
56
- except Exception as e:
57
- st.error(f"❌ 图片处理失败: {str(e)}")
58
- st.stop()
59
 
 
60
  def text2story(text):
61
- # 优化prompt模板
62
- prompt = f"""Create a magical children's story (for ages 3-8) based on: {text}
63
- Story requirements:
64
- 🐰 Animal/Fantasy characters
65
- 🎁 Simple moral lesson
66
- 🌈 Vivid descriptions
67
- ✨ 80-120 words
68
- 🌼 Use dialog between characters
69
 
70
- Magical story begins:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # 添加模型加载进度
73
- with st.spinner("📖 Brewing story magic..."):
74
- pipe = pipeline(
75
- "text-generation",
76
- model="pranavpsv/genne-story-generator-v2",
77
- max_new_tokens=200,
78
- temperature=0.8,
79
- top_p=0.95,
80
- repetition_penalty=1.1
81
- )
82
- raw_story = pipe(prompt)[0]['generated_text']
83
 
84
- # 优化故事提取逻辑
85
- story = raw_story.split("Magical story begins:")[-1].strip()
86
- return story[:500] # 确保长度限制
 
 
 
 
87
 
 
88
  def text2audio(story_text):
89
- # 添加音频生成进度
90
- with st.spinner("🔊 Mixing audio potion..."):
91
- pipe = pipeline("text-to-audio", model="Matthijs/mms-tts-eng")
92
- audio_data = pipe(story_text, return_tensors="pt") # 优化内存使用
93
- return {
94
- "array": audio_data["audio"][0].numpy(),
95
- "sampling_rate": audio_data["sampling_rate"]
96
- }
97
 
98
- def main():
99
- st.set_page_config(page_title="Magic Storyteller", page_icon="🧚", layout="wide")
100
 
101
- # 优化UI样式
 
 
102
  st.markdown("""
103
  <style>
104
  @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
@@ -126,59 +106,42 @@ def main():
126
  st.markdown("""
127
  <div class="header">
128
  <h1 style='margin:0;'>🪄 Magic Storyteller</h1>
129
- <p style='color: white; font-size: 1.2rem;'>Turn your pictures into magical stories!</p>
130
  </div>
131
  """, unsafe_allow_html=True)
 
132
 
133
- uploaded_file = st.file_uploader("🌈 Choose your magic picture...", type=["jpg", "png"])
134
-
135
- if uploaded_file:
136
- with st.expander("✨ Your Magic Picture", expanded=True):
137
- st.image(uploaded_file, use_column_width=True)
138
-
139
- # 流程进度管理
140
- with st.status("🔮 Story Creation Progress", expanded=True) as status:
141
- # Stage 1
142
- st.subheader("Step 1: Decoding Picture Magic")
143
- scenario = img2text(uploaded_file)
144
- st.success(f"**Discovered Magic:** {scenario}")
145
-
146
- # Stage 2
147
- st.subheader("Step 2: Brewing Story Potion")
148
- story = text2story(scenario)
149
- st.success("**Magical Story Created!**")
150
-
151
- # Stage 3
152
- st.subheader("Step 3: Mixing Audio Spell")
153
- audio_data = text2audio(story)
154
- st.success("**Audio Potion Ready!**")
155
-
156
- status.update(label="🎉 All Magic Complete!", state="complete")
157
 
158
- # 故事展示区域
159
- with st.container():
160
- st.subheader("📖 Your Magical Story")
161
- st.write(story)
 
 
162
 
163
- # 音频播放与字幕
164
- st.subheader("🎧 Story Audio")
165
- st.audio(
166
- audio_data["array"],
167
- sample_rate=audio_data["sampling_rate"]
168
- )
169
 
170
- # 字幕显示(静态版本)
171
- st.subheader("📜 Story Subtitles")
172
- total_duration = len(audio_data["array"]) / audio_data["sampling_rate"]
173
- subtitle_chunks = split_story_with_delay(story, total_duration)
174
-
175
- for idx, (text, duration) in enumerate(subtitle_chunks, 1):
176
- st.markdown(f"""
177
- <div class="subtitle">
178
- <span style='color: #FF6B6B; font-size: 1.4rem;'>✨ Part {idx}:</span>
179
- {text}
180
- </div>
181
- """, unsafe_allow_html=True)
182
 
183
  if __name__ == "__main__":
184
  main()
 
1
  # import part
2
  import streamlit as st
 
3
  from transformers import pipeline
4
+ from gtts import gTTS
 
 
5
  import io
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # function part
9
+ # img2text
10
+ def img2text(url):
11
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
12
+ text = image_to_text_model(url)[0]["generated_text"]
13
+ return text
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # text2story
17
  def text2story(text):
18
+ # 儿童故事提示模板
19
+ child_prompt = f"""Write a children's story for ages 3-10 based on: {text}
20
+ Requirements:
21
+ - Use simple words (1st-3rd grade level)
22
+ - Main character: friendly animal
23
+ - Story elements: magic, friendship, happy ending
24
+ - Moral lesson: sharing is caring
25
+ - Length: 100-120 words
26
 
27
+ Story structure:
28
+ 1. Introduce characters and setting
29
+ 2. A problem occurs
30
+ 3. Magical solution with teamwork
31
+ 4. Happy ending with a lesson
32
+ """
33
+
34
+ pipe = pipeline(
35
+ "text-generation",
36
+ model="pranavpsv/genre-story-generator-v2",
37
+ max_new_tokens=160, # 160 tokens ≈ 120 words
38
+ min_new_tokens=134, # 134 tokens ≈ 100 words
39
+ temperature=0.7, # 平衡创造性与稳定性
40
+ top_p=0.9,
41
+ repetition_penalty=1.2,
42
+ pad_token_id=50256, # 对齐模型config的bos/eos_token_id
43
+ num_return_sequences=1
44
+ )
45
+
46
+ # 生成故事
47
+ raw_story = pipe(child_prompt, return_full_text=False)[0]['generated_text']
48
+
49
+ # 智能后处理
50
+ def format_story(text):
51
+ # 移除技术性标记
52
+ text = text.replace("<|endoftext|>", "").strip()
53
+ # 寻找自然结尾点
54
+ end_marks = ['.', '!', '?', '...']
55
+ last_positions = [text.rfind(mark) for mark in end_marks]
56
+ valid_positions = [pos for pos in last_positions if pos != -1]
57
+ cutoff = max(valid_positions) + 1 if valid_positions else len(text)
58
+ return text[:cutoff]
59
 
60
+ formatted_story = format_story(raw_story)
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # 字数验证
63
+ word_count = len(formatted_story.split())
64
+ if word_count < 100:
65
+ formatted_story += " And they lived happily ever after."
66
+
67
+ return formatted_story
68
+
69
 
70
+ # text2audio
71
  def text2audio(story_text):
72
+ tts = gTTS(text=story_text, lang='en')
73
+ audio_bytes = io.BytesIO()
74
+ tts.write_to_fp(audio_bytes)
75
+ audio_bytes.seek(0)
76
+ return audio_bytes
 
 
 
77
 
 
 
78
 
79
+ def main():
80
+ # Optimize title area to attract children's attention
81
+ st.set_page_config(page_title="Magic Storyteller", page_icon="🧚")
82
  st.markdown("""
83
  <style>
84
  @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@700&display=swap');
 
106
  st.markdown("""
107
  <div class="header">
108
  <h1 style='margin:0;'>🪄 Magic Storyteller</h1>
109
+ <p style='color: white; font-size: 1.2rem;'>Turn your pictures into stories!</p>
110
  </div>
111
  """, unsafe_allow_html=True)
112
+ uploaded_file = st.file_uploader("👉🏻 Upload your magic picture here...", type=["jpg", "png"])
113
 
114
+ if uploaded_file is not None:
115
+ bytes_data = uploaded_file.getvalue()
116
+ with open(uploaded_file.name, "wb") as file:
117
+ file.write(bytes_data)
118
+ st.image(uploaded_file, caption="Your Magic Picture ✨", use_container_width=True)
119
+ status_container = st.empty()
120
+ progress_bar = st.progress(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ # Stage 1: Image to Text
123
+ with status_container.status("🔮 **Step 1/3**: Decoding picture magic...", expanded=True) as status:
124
+ progress_bar.progress(33)
125
+ scenario = img2text(uploaded_file.name)
126
+ status.update(label="✅ Picture decoded!", state="complete")
127
+ st.write(f"**What I see:** {scenario}")
128
 
129
+ # Stage 2: Text to Story
130
+ with status_container.status("📚 **Step 2/3**: Writing your fairy tale...", expanded=True) as status:
131
+ progress_bar.progress(66)
132
+ story = text2story(scenario)
133
+ status.update(label="✅ Story created!", state="complete")
134
+ st.write(f"**Your Story:**\n{story}")
135
 
136
+ # Stage 3: Story to Audio data
137
+ with status_container.status("🎵 **Step 3/3**: Adding magic audio...", expanded=True) as status:
138
+ progress_bar.progress(100)
139
+ audio_data = text2audio(story)
140
+ status.update(label="✅ Start playing the story!", state="complete")
141
+
142
+ st.audio(audio_data,
143
+ format="audio/mp3",
144
+ autoplay=True)
 
 
 
145
 
146
  if __name__ == "__main__":
147
  main()