Xu Xuenan commited on
Commit
15c0d2f
·
1 Parent(s): 18d0cba

Update app.py

Browse files
Files changed (2) hide show
  1. app.py +109 -115
  2. mm_story_agent/__init__.py +0 -10
app.py CHANGED
@@ -142,120 +142,114 @@ def compose_storytelling_video_fn(
142
  return Path(config["story_dir"]) / "output.mp4"
143
 
144
 
145
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
146
-
147
- gr.HTML("""
148
- <h1 style="text-align: center;">MM-StoryAgent</h1>
149
- <p style="font-size: 16px;">This is a demo for generating attractive storytelling videos based on the given story setting.</p>
150
- """)
151
-
152
- with gr.Row():
153
- with gr.Column():
154
- story_topic = gr.Textbox(label="Story Topic", value=default_story_setting["story_topic"])
155
- main_role = gr.Textbox(label="Main Role", value=default_story_setting["main_role"])
156
- scene = gr.Textbox(label="Scene", value=default_story_setting["scene"])
157
- chapter_num = gr.Number(label="Chapter Number", value=default_story_gen_config["num_outline"])
158
- temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Temperature", value=default_story_gen_config["temperature"])
159
-
160
- with gr.Accordion("Detailed Image Configuration (Optional)", open=False):
161
- height = gr.Slider(label="Height", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['height'])
162
- width = gr.Slider(label="Width", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['width'])
163
- image_seed = gr.Number(label="Image Seed", value=default_image_config["call_cfg"]['seed'])
164
-
165
- with gr.Accordion("Detailed Sound Configuration (Optional)", open=False):
166
- sound_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=7.0, step=0.5, value=default_sound_config["call_cfg"]['guidance_scale'])
167
- sound_seed = gr.Number(label="Sound Seed", value=default_sound_config["call_cfg"]['seed'])
168
- n_candidate_per_text = gr.Slider(label="Number of Candidates per Text", minimum=0, maximum=5, step=1, value=default_sound_config["call_cfg"]['n_candidate_per_text'])
169
-
170
- with gr.Accordion("Detailed Music Configuration (Optional)", open=False):
171
- music_duration = gr.Number(label="Music Duration", min_width=30.0, maximum=120.0, value=default_music_config["call_cfg"]["duration"])
172
-
173
- with gr.Accordion("Detailed Slideshow Effect (Optional)", open=False):
174
- fade_duration = gr.Slider(label="Fade Duration", minimum=0.1, maximum=1.5, step=0.1, value=default_slideshow_effect['fade_duration'])
175
- slide_duration = gr.Slider(label="Slide Duration", minimum=0.1, maximum=1.0, step=0.1, value=default_slideshow_effect['slide_duration'])
176
- zoom_speed = gr.Slider(label="Zoom Speed", minimum=0.1, maximum=2.0, step=0.1, value=default_slideshow_effect['zoom_speed'])
177
- move_ratio = gr.Slider(label="Move Ratio", minimum=0.8, maximum=1.0, step=0.05, value=default_slideshow_effect['move_ratio'])
178
- sound_volume = gr.Slider(label="Sound Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['sound_volume'])
179
- music_volume = gr.Slider(label="Music Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['music_volume'])
180
- bg_speech_ratio = gr.Slider(label="Background / Speech Ratio", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['bg_speech_ratio'])
181
- fps = gr.Slider(label="FPS", minimum=1, maximum=30, step=1, value=default_slideshow_effect['fps'])
182
-
183
-
184
- with gr.Column():
185
- story_data = gr.State([])
186
-
187
- story_generation_information = gr.Markdown(
188
- label="Story Generation Status",
189
- value="<h3>Generating Story Script ......</h3>",
190
- visible=False)
191
- with gr.Accordion(label="Story Content", open=False, visible=False) as story_accordion:
192
- with gr.Row():
193
- prev_button = gr.Button("Previous Page",)
194
- next_button = gr.Button("Next Page",)
195
- story_content = gr.Textbox(label="Page Content")
196
- video_generation_information = gr.Markdown(label="Generation Status", value="<h3>Generating Video ......</h3>", visible=False)
197
- image_gallery = gr.Gallery(label="Images", show_label=False, visible=False)
198
- video_generation_btn = gr.Button("Generate Video")
199
- video_output = gr.Video(label="Generated Story", interactive=False)
200
-
201
- current_page = gr.State(0)
202
-
203
- prev_button.click(
204
- fn=update_page,
205
- inputs=[gr.State("prev"), current_page, story_data],
206
- outputs=[current_page, story_content]
207
- )
208
- next_button.click(
209
- fn=update_page,
210
- inputs=[gr.State("next"), current_page, story_data],
211
- outputs=[current_page, story_content,])
212
-
213
- # (possibly) update role description and scripts
214
-
215
- video_generation_btn.click(
216
- fn=set_generating_progress_text,
217
- inputs=[gr.State("Generating Story")],
218
- outputs=video_generation_information
219
- ).then(
220
- fn=write_story_fn,
221
- inputs=[story_topic, main_role, scene,
222
- chapter_num, temperature,
223
- current_page],
224
- outputs=[story_data, story_accordion, story_content, video_output]
225
- ).then(
226
- fn=set_generating_progress_text,
227
- inputs=[gr.State("Generating Modality Assets")],
228
- outputs=video_generation_information
229
- ).then(
230
- fn=modality_assets_generation_fn,
231
- inputs=[height, width, image_seed, sound_guidance_scale, sound_seed,
232
- n_candidate_per_text, music_duration,
233
- story_data],
234
- outputs=[image_gallery]
235
- ).then(
236
- fn=set_generating_progress_text,
237
- inputs=[gr.State("Composing Video")],
238
- outputs=video_generation_information
239
- ).then(
240
- fn=compose_storytelling_video_fn,
241
- inputs=[fade_duration, slide_duration, zoom_speed, move_ratio,
242
- sound_volume, music_volume, bg_speech_ratio, fps,
243
- story_data],
244
- outputs=[video_output]
245
- ).then(
246
- fn=lambda : gr.update(visible=False),
247
- inputs=[],
248
- outputs=[image_gallery]
249
- ).then(
250
- fn=set_generating_progress_text,
251
- inputs=[gr.State("Generation Finished")],
252
- outputs=video_generation_information
253
- )
254
-
255
-
256
  if __name__ == "__main__":
257
- parser = argparse.ArgumentParser()
258
- parser.add_argument("--share", default=False, action="store_true")
259
 
260
- args = parser.parse_args()
261
- demo.launch(share=args.share)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  return Path(config["story_dir"]) / "output.mp4"
143
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  if __name__ == "__main__":
 
 
146
 
147
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
148
+
149
+ gr.HTML("""
150
+ <h1 style="text-align: center;">MM-StoryAgent</h1>
151
+ <p style="font-size: 16px;">This is a demo for generating attractive storytelling videos based on the given story setting.</p>
152
+ """)
153
+
154
+ with gr.Row():
155
+ with gr.Column():
156
+ story_topic = gr.Textbox(label="Story Topic", value=default_story_setting["story_topic"])
157
+ main_role = gr.Textbox(label="Main Role", value=default_story_setting["main_role"])
158
+ scene = gr.Textbox(label="Scene", value=default_story_setting["scene"])
159
+ chapter_num = gr.Number(label="Chapter Number", value=default_story_gen_config["num_outline"])
160
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Temperature", value=default_story_gen_config["temperature"])
161
+
162
+ with gr.Accordion("Detailed Image Configuration (Optional)", open=False):
163
+ height = gr.Slider(label="Height", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['height'])
164
+ width = gr.Slider(label="Width", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['width'])
165
+ image_seed = gr.Number(label="Image Seed", value=default_image_config["call_cfg"]['seed'])
166
+
167
+ with gr.Accordion("Detailed Sound Configuration (Optional)", open=False):
168
+ sound_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=7.0, step=0.5, value=default_sound_config["call_cfg"]['guidance_scale'])
169
+ sound_seed = gr.Number(label="Sound Seed", value=default_sound_config["call_cfg"]['seed'])
170
+ n_candidate_per_text = gr.Slider(label="Number of Candidates per Text", minimum=0, maximum=5, step=1, value=default_sound_config["call_cfg"]['n_candidate_per_text'])
171
+
172
+ with gr.Accordion("Detailed Music Configuration (Optional)", open=False):
173
+ music_duration = gr.Number(label="Music Duration", min_width=30.0, maximum=120.0, value=default_music_config["call_cfg"]["duration"])
174
+
175
+ with gr.Accordion("Detailed Slideshow Effect (Optional)", open=False):
176
+ fade_duration = gr.Slider(label="Fade Duration", minimum=0.1, maximum=1.5, step=0.1, value=default_slideshow_effect['fade_duration'])
177
+ slide_duration = gr.Slider(label="Slide Duration", minimum=0.1, maximum=1.0, step=0.1, value=default_slideshow_effect['slide_duration'])
178
+ zoom_speed = gr.Slider(label="Zoom Speed", minimum=0.1, maximum=2.0, step=0.1, value=default_slideshow_effect['zoom_speed'])
179
+ move_ratio = gr.Slider(label="Move Ratio", minimum=0.8, maximum=1.0, step=0.05, value=default_slideshow_effect['move_ratio'])
180
+ sound_volume = gr.Slider(label="Sound Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['sound_volume'])
181
+ music_volume = gr.Slider(label="Music Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['music_volume'])
182
+ bg_speech_ratio = gr.Slider(label="Background / Speech Ratio", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['bg_speech_ratio'])
183
+ fps = gr.Slider(label="FPS", minimum=1, maximum=30, step=1, value=default_slideshow_effect['fps'])
184
+
185
+
186
+ with gr.Column():
187
+ story_data = gr.State([])
188
+
189
+ story_generation_information = gr.Markdown(
190
+ label="Story Generation Status",
191
+ value="<h3>Generating Story Script ......</h3>",
192
+ visible=False)
193
+ with gr.Accordion(label="Story Content", open=False, visible=False) as story_accordion:
194
+ with gr.Row():
195
+ prev_button = gr.Button("Previous Page",)
196
+ next_button = gr.Button("Next Page",)
197
+ story_content = gr.Textbox(label="Page Content")
198
+ video_generation_information = gr.Markdown(label="Generation Status", value="<h3>Generating Video ......</h3>", visible=False)
199
+ image_gallery = gr.Gallery(label="Images", show_label=False, visible=False)
200
+ video_generation_btn = gr.Button("Generate Video")
201
+ video_output = gr.Video(label="Generated Story", interactive=False)
202
+
203
+ current_page = gr.State(0)
204
+
205
+ prev_button.click(
206
+ fn=update_page,
207
+ inputs=[gr.State("prev"), current_page, story_data],
208
+ outputs=[current_page, story_content]
209
+ )
210
+ next_button.click(
211
+ fn=update_page,
212
+ inputs=[gr.State("next"), current_page, story_data],
213
+ outputs=[current_page, story_content,])
214
+
215
+ video_generation_btn.click(
216
+ fn=set_generating_progress_text,
217
+ inputs=[gr.State("Generating Story")],
218
+ outputs=video_generation_information
219
+ ).then(
220
+ fn=write_story_fn,
221
+ inputs=[story_topic, main_role, scene,
222
+ chapter_num, temperature,
223
+ current_page],
224
+ outputs=[story_data, story_accordion, story_content, video_output]
225
+ ).then(
226
+ fn=set_generating_progress_text,
227
+ inputs=[gr.State("Generating Modality Assets")],
228
+ outputs=video_generation_information
229
+ ).then(
230
+ fn=modality_assets_generation_fn,
231
+ inputs=[height, width, image_seed, sound_guidance_scale, sound_seed,
232
+ n_candidate_per_text, music_duration,
233
+ story_data],
234
+ outputs=[image_gallery]
235
+ ).then(
236
+ fn=set_generating_progress_text,
237
+ inputs=[gr.State("Composing Video")],
238
+ outputs=video_generation_information
239
+ ).then(
240
+ fn=compose_storytelling_video_fn,
241
+ inputs=[fade_duration, slide_duration, zoom_speed, move_ratio,
242
+ sound_volume, music_volume, bg_speech_ratio, fps,
243
+ story_data],
244
+ outputs=[video_output]
245
+ ).then(
246
+ fn=lambda : gr.update(visible=False),
247
+ inputs=[],
248
+ outputs=[image_gallery]
249
+ ).then(
250
+ fn=set_generating_progress_text,
251
+ inputs=[gr.State("Generation Finished")],
252
+ outputs=video_generation_information
253
+ )
254
+
255
+ demo.launch()
mm_story_agent/__init__.py CHANGED
@@ -32,16 +32,6 @@ class MMStoryAgent:
32
  def write_story(self, config):
33
  story_writer = QAOutlineStoryWriter(config["story_gen_config"])
34
  pages = story_writer.call(config["story_setting"])
35
- # pages = [
36
- # "In the heart of a dense forest, Flicker the Fox, nestled in his cozy den, stumbled upon an ancient computer hidden beneath a pile of soft moss and forgotten treasures. Surrounded by maps of unexplored territories and codes scribbled on parchment, Flicker's eyes widened with intrigue as he traced his paw over the mysterious machine.",
37
- # "Flicker's den was a testament to his adventurous spirit, a haven filled with artifacts from his previous quests. The discovery of the computer, however, sparked a new kind of excitement within him, a curiosity that went beyond the physical boundaries of his forest home.",
38
- # "With a determined gleam in his eye, Flicker trotted out of his den in search of his parents. He had questions about this relic that couldn't wait, eager to understand the secrets it held and how it functioned in a world so different from his own.",
39
- # "Excited by his parents' encouragement, Flicker eagerly started his journey into the world of typing. His paws clumsily hit the wrong keys at first, resulting in a string of random letters and numbers on the screen. But with every mistake, Flicker's determination grew stronger.",
40
- # "Days turned into weeks, and Flicker's persistence paid off. His paws now moved gracefully across the keyboard, his eyes focused on the screen as he typed out simple messages and commands. The once foreign device was becoming a familiar tool, and Flicker felt a sense of accomplishment wash over him.",
41
- # "One evening, as the moon illuminated the forest, a wise old owl named Ollie perched on a branch outside Flicker's den. With a hoot and a smile, Ollie shared the magic of keyboard shortcuts, turning Flicker's typing sessions into thrilling adventures. Each shortcut was like a secret code, and Flicker couldn't wait to master them all.",
42
- # "Eager to explore beyond the basics, Flicker's curiosity led him to the vast digital world of the internet. With guidance from his parents and Ollie, he learned how to navigate safely, discovering interactive games and educational videos that opened his eyes to the wonders beyond his forest.",
43
- # "Each day, Flicker would sit before the screen, his paws dancing over the keys as he clicked through virtual tours of distant lands, watched videos of creatures he'd never seen, and played games that taught him about science and history. The computer became a window to a world far larger than he could have imagined.",
44
- # ]
45
  return pages
46
 
47
  def generate_modality_assets(self, config, pages):
 
32
  def write_story(self, config):
33
  story_writer = QAOutlineStoryWriter(config["story_gen_config"])
34
  pages = story_writer.call(config["story_setting"])
 
 
 
 
 
 
 
 
 
 
35
  return pages
36
 
37
  def generate_modality_assets(self, config, pages):