Spaces:
Running
on
L4
Running
on
L4
Xu Xuenan
commited on
Commit
·
15c0d2f
1
Parent(s):
18d0cba
Update app.py
Browse files- app.py +109 -115
- mm_story_agent/__init__.py +0 -10
app.py
CHANGED
@@ -142,120 +142,114 @@ def compose_storytelling_video_fn(
|
|
142 |
return Path(config["story_dir"]) / "output.mp4"
|
143 |
|
144 |
|
145 |
-
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
146 |
-
|
147 |
-
gr.HTML("""
|
148 |
-
<h1 style="text-align: center;">MM-StoryAgent</h1>
|
149 |
-
<p style="font-size: 16px;">This is a demo for generating attractive storytelling videos based on the given story setting.</p>
|
150 |
-
""")
|
151 |
-
|
152 |
-
with gr.Row():
|
153 |
-
with gr.Column():
|
154 |
-
story_topic = gr.Textbox(label="Story Topic", value=default_story_setting["story_topic"])
|
155 |
-
main_role = gr.Textbox(label="Main Role", value=default_story_setting["main_role"])
|
156 |
-
scene = gr.Textbox(label="Scene", value=default_story_setting["scene"])
|
157 |
-
chapter_num = gr.Number(label="Chapter Number", value=default_story_gen_config["num_outline"])
|
158 |
-
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Temperature", value=default_story_gen_config["temperature"])
|
159 |
-
|
160 |
-
with gr.Accordion("Detailed Image Configuration (Optional)", open=False):
|
161 |
-
height = gr.Slider(label="Height", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['height'])
|
162 |
-
width = gr.Slider(label="Width", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['width'])
|
163 |
-
image_seed = gr.Number(label="Image Seed", value=default_image_config["call_cfg"]['seed'])
|
164 |
-
|
165 |
-
with gr.Accordion("Detailed Sound Configuration (Optional)", open=False):
|
166 |
-
sound_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=7.0, step=0.5, value=default_sound_config["call_cfg"]['guidance_scale'])
|
167 |
-
sound_seed = gr.Number(label="Sound Seed", value=default_sound_config["call_cfg"]['seed'])
|
168 |
-
n_candidate_per_text = gr.Slider(label="Number of Candidates per Text", minimum=0, maximum=5, step=1, value=default_sound_config["call_cfg"]['n_candidate_per_text'])
|
169 |
-
|
170 |
-
with gr.Accordion("Detailed Music Configuration (Optional)", open=False):
|
171 |
-
music_duration = gr.Number(label="Music Duration", min_width=30.0, maximum=120.0, value=default_music_config["call_cfg"]["duration"])
|
172 |
-
|
173 |
-
with gr.Accordion("Detailed Slideshow Effect (Optional)", open=False):
|
174 |
-
fade_duration = gr.Slider(label="Fade Duration", minimum=0.1, maximum=1.5, step=0.1, value=default_slideshow_effect['fade_duration'])
|
175 |
-
slide_duration = gr.Slider(label="Slide Duration", minimum=0.1, maximum=1.0, step=0.1, value=default_slideshow_effect['slide_duration'])
|
176 |
-
zoom_speed = gr.Slider(label="Zoom Speed", minimum=0.1, maximum=2.0, step=0.1, value=default_slideshow_effect['zoom_speed'])
|
177 |
-
move_ratio = gr.Slider(label="Move Ratio", minimum=0.8, maximum=1.0, step=0.05, value=default_slideshow_effect['move_ratio'])
|
178 |
-
sound_volume = gr.Slider(label="Sound Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['sound_volume'])
|
179 |
-
music_volume = gr.Slider(label="Music Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['music_volume'])
|
180 |
-
bg_speech_ratio = gr.Slider(label="Background / Speech Ratio", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['bg_speech_ratio'])
|
181 |
-
fps = gr.Slider(label="FPS", minimum=1, maximum=30, step=1, value=default_slideshow_effect['fps'])
|
182 |
-
|
183 |
-
|
184 |
-
with gr.Column():
|
185 |
-
story_data = gr.State([])
|
186 |
-
|
187 |
-
story_generation_information = gr.Markdown(
|
188 |
-
label="Story Generation Status",
|
189 |
-
value="<h3>Generating Story Script ......</h3>",
|
190 |
-
visible=False)
|
191 |
-
with gr.Accordion(label="Story Content", open=False, visible=False) as story_accordion:
|
192 |
-
with gr.Row():
|
193 |
-
prev_button = gr.Button("Previous Page",)
|
194 |
-
next_button = gr.Button("Next Page",)
|
195 |
-
story_content = gr.Textbox(label="Page Content")
|
196 |
-
video_generation_information = gr.Markdown(label="Generation Status", value="<h3>Generating Video ......</h3>", visible=False)
|
197 |
-
image_gallery = gr.Gallery(label="Images", show_label=False, visible=False)
|
198 |
-
video_generation_btn = gr.Button("Generate Video")
|
199 |
-
video_output = gr.Video(label="Generated Story", interactive=False)
|
200 |
-
|
201 |
-
current_page = gr.State(0)
|
202 |
-
|
203 |
-
prev_button.click(
|
204 |
-
fn=update_page,
|
205 |
-
inputs=[gr.State("prev"), current_page, story_data],
|
206 |
-
outputs=[current_page, story_content]
|
207 |
-
)
|
208 |
-
next_button.click(
|
209 |
-
fn=update_page,
|
210 |
-
inputs=[gr.State("next"), current_page, story_data],
|
211 |
-
outputs=[current_page, story_content,])
|
212 |
-
|
213 |
-
# (possibly) update role description and scripts
|
214 |
-
|
215 |
-
video_generation_btn.click(
|
216 |
-
fn=set_generating_progress_text,
|
217 |
-
inputs=[gr.State("Generating Story")],
|
218 |
-
outputs=video_generation_information
|
219 |
-
).then(
|
220 |
-
fn=write_story_fn,
|
221 |
-
inputs=[story_topic, main_role, scene,
|
222 |
-
chapter_num, temperature,
|
223 |
-
current_page],
|
224 |
-
outputs=[story_data, story_accordion, story_content, video_output]
|
225 |
-
).then(
|
226 |
-
fn=set_generating_progress_text,
|
227 |
-
inputs=[gr.State("Generating Modality Assets")],
|
228 |
-
outputs=video_generation_information
|
229 |
-
).then(
|
230 |
-
fn=modality_assets_generation_fn,
|
231 |
-
inputs=[height, width, image_seed, sound_guidance_scale, sound_seed,
|
232 |
-
n_candidate_per_text, music_duration,
|
233 |
-
story_data],
|
234 |
-
outputs=[image_gallery]
|
235 |
-
).then(
|
236 |
-
fn=set_generating_progress_text,
|
237 |
-
inputs=[gr.State("Composing Video")],
|
238 |
-
outputs=video_generation_information
|
239 |
-
).then(
|
240 |
-
fn=compose_storytelling_video_fn,
|
241 |
-
inputs=[fade_duration, slide_duration, zoom_speed, move_ratio,
|
242 |
-
sound_volume, music_volume, bg_speech_ratio, fps,
|
243 |
-
story_data],
|
244 |
-
outputs=[video_output]
|
245 |
-
).then(
|
246 |
-
fn=lambda : gr.update(visible=False),
|
247 |
-
inputs=[],
|
248 |
-
outputs=[image_gallery]
|
249 |
-
).then(
|
250 |
-
fn=set_generating_progress_text,
|
251 |
-
inputs=[gr.State("Generation Finished")],
|
252 |
-
outputs=video_generation_information
|
253 |
-
)
|
254 |
-
|
255 |
-
|
256 |
if __name__ == "__main__":
|
257 |
-
parser = argparse.ArgumentParser()
|
258 |
-
parser.add_argument("--share", default=False, action="store_true")
|
259 |
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
return Path(config["story_dir"]) / "output.mp4"
|
143 |
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
if __name__ == "__main__":
|
|
|
|
|
146 |
|
147 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
148 |
+
|
149 |
+
gr.HTML("""
|
150 |
+
<h1 style="text-align: center;">MM-StoryAgent</h1>
|
151 |
+
<p style="font-size: 16px;">This is a demo for generating attractive storytelling videos based on the given story setting.</p>
|
152 |
+
""")
|
153 |
+
|
154 |
+
with gr.Row():
|
155 |
+
with gr.Column():
|
156 |
+
story_topic = gr.Textbox(label="Story Topic", value=default_story_setting["story_topic"])
|
157 |
+
main_role = gr.Textbox(label="Main Role", value=default_story_setting["main_role"])
|
158 |
+
scene = gr.Textbox(label="Scene", value=default_story_setting["scene"])
|
159 |
+
chapter_num = gr.Number(label="Chapter Number", value=default_story_gen_config["num_outline"])
|
160 |
+
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Temperature", value=default_story_gen_config["temperature"])
|
161 |
+
|
162 |
+
with gr.Accordion("Detailed Image Configuration (Optional)", open=False):
|
163 |
+
height = gr.Slider(label="Height", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['height'])
|
164 |
+
width = gr.Slider(label="Width", minimum=256, maximum=1024, step=32, value=default_image_config["obj_cfg"]['width'])
|
165 |
+
image_seed = gr.Number(label="Image Seed", value=default_image_config["call_cfg"]['seed'])
|
166 |
+
|
167 |
+
with gr.Accordion("Detailed Sound Configuration (Optional)", open=False):
|
168 |
+
sound_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=7.0, step=0.5, value=default_sound_config["call_cfg"]['guidance_scale'])
|
169 |
+
sound_seed = gr.Number(label="Sound Seed", value=default_sound_config["call_cfg"]['seed'])
|
170 |
+
n_candidate_per_text = gr.Slider(label="Number of Candidates per Text", minimum=0, maximum=5, step=1, value=default_sound_config["call_cfg"]['n_candidate_per_text'])
|
171 |
+
|
172 |
+
with gr.Accordion("Detailed Music Configuration (Optional)", open=False):
|
173 |
+
music_duration = gr.Number(label="Music Duration", min_width=30.0, maximum=120.0, value=default_music_config["call_cfg"]["duration"])
|
174 |
+
|
175 |
+
with gr.Accordion("Detailed Slideshow Effect (Optional)", open=False):
|
176 |
+
fade_duration = gr.Slider(label="Fade Duration", minimum=0.1, maximum=1.5, step=0.1, value=default_slideshow_effect['fade_duration'])
|
177 |
+
slide_duration = gr.Slider(label="Slide Duration", minimum=0.1, maximum=1.0, step=0.1, value=default_slideshow_effect['slide_duration'])
|
178 |
+
zoom_speed = gr.Slider(label="Zoom Speed", minimum=0.1, maximum=2.0, step=0.1, value=default_slideshow_effect['zoom_speed'])
|
179 |
+
move_ratio = gr.Slider(label="Move Ratio", minimum=0.8, maximum=1.0, step=0.05, value=default_slideshow_effect['move_ratio'])
|
180 |
+
sound_volume = gr.Slider(label="Sound Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['sound_volume'])
|
181 |
+
music_volume = gr.Slider(label="Music Volume", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['music_volume'])
|
182 |
+
bg_speech_ratio = gr.Slider(label="Background / Speech Ratio", minimum=0.0, maximum=1.0, step=0.1, value=default_slideshow_effect['bg_speech_ratio'])
|
183 |
+
fps = gr.Slider(label="FPS", minimum=1, maximum=30, step=1, value=default_slideshow_effect['fps'])
|
184 |
+
|
185 |
+
|
186 |
+
with gr.Column():
|
187 |
+
story_data = gr.State([])
|
188 |
+
|
189 |
+
story_generation_information = gr.Markdown(
|
190 |
+
label="Story Generation Status",
|
191 |
+
value="<h3>Generating Story Script ......</h3>",
|
192 |
+
visible=False)
|
193 |
+
with gr.Accordion(label="Story Content", open=False, visible=False) as story_accordion:
|
194 |
+
with gr.Row():
|
195 |
+
prev_button = gr.Button("Previous Page",)
|
196 |
+
next_button = gr.Button("Next Page",)
|
197 |
+
story_content = gr.Textbox(label="Page Content")
|
198 |
+
video_generation_information = gr.Markdown(label="Generation Status", value="<h3>Generating Video ......</h3>", visible=False)
|
199 |
+
image_gallery = gr.Gallery(label="Images", show_label=False, visible=False)
|
200 |
+
video_generation_btn = gr.Button("Generate Video")
|
201 |
+
video_output = gr.Video(label="Generated Story", interactive=False)
|
202 |
+
|
203 |
+
current_page = gr.State(0)
|
204 |
+
|
205 |
+
prev_button.click(
|
206 |
+
fn=update_page,
|
207 |
+
inputs=[gr.State("prev"), current_page, story_data],
|
208 |
+
outputs=[current_page, story_content]
|
209 |
+
)
|
210 |
+
next_button.click(
|
211 |
+
fn=update_page,
|
212 |
+
inputs=[gr.State("next"), current_page, story_data],
|
213 |
+
outputs=[current_page, story_content,])
|
214 |
+
|
215 |
+
video_generation_btn.click(
|
216 |
+
fn=set_generating_progress_text,
|
217 |
+
inputs=[gr.State("Generating Story")],
|
218 |
+
outputs=video_generation_information
|
219 |
+
).then(
|
220 |
+
fn=write_story_fn,
|
221 |
+
inputs=[story_topic, main_role, scene,
|
222 |
+
chapter_num, temperature,
|
223 |
+
current_page],
|
224 |
+
outputs=[story_data, story_accordion, story_content, video_output]
|
225 |
+
).then(
|
226 |
+
fn=set_generating_progress_text,
|
227 |
+
inputs=[gr.State("Generating Modality Assets")],
|
228 |
+
outputs=video_generation_information
|
229 |
+
).then(
|
230 |
+
fn=modality_assets_generation_fn,
|
231 |
+
inputs=[height, width, image_seed, sound_guidance_scale, sound_seed,
|
232 |
+
n_candidate_per_text, music_duration,
|
233 |
+
story_data],
|
234 |
+
outputs=[image_gallery]
|
235 |
+
).then(
|
236 |
+
fn=set_generating_progress_text,
|
237 |
+
inputs=[gr.State("Composing Video")],
|
238 |
+
outputs=video_generation_information
|
239 |
+
).then(
|
240 |
+
fn=compose_storytelling_video_fn,
|
241 |
+
inputs=[fade_duration, slide_duration, zoom_speed, move_ratio,
|
242 |
+
sound_volume, music_volume, bg_speech_ratio, fps,
|
243 |
+
story_data],
|
244 |
+
outputs=[video_output]
|
245 |
+
).then(
|
246 |
+
fn=lambda : gr.update(visible=False),
|
247 |
+
inputs=[],
|
248 |
+
outputs=[image_gallery]
|
249 |
+
).then(
|
250 |
+
fn=set_generating_progress_text,
|
251 |
+
inputs=[gr.State("Generation Finished")],
|
252 |
+
outputs=video_generation_information
|
253 |
+
)
|
254 |
+
|
255 |
+
demo.launch()
|
mm_story_agent/__init__.py
CHANGED
@@ -32,16 +32,6 @@ class MMStoryAgent:
|
|
32 |
def write_story(self, config):
|
33 |
story_writer = QAOutlineStoryWriter(config["story_gen_config"])
|
34 |
pages = story_writer.call(config["story_setting"])
|
35 |
-
# pages = [
|
36 |
-
# "In the heart of a dense forest, Flicker the Fox, nestled in his cozy den, stumbled upon an ancient computer hidden beneath a pile of soft moss and forgotten treasures. Surrounded by maps of unexplored territories and codes scribbled on parchment, Flicker's eyes widened with intrigue as he traced his paw over the mysterious machine.",
|
37 |
-
# "Flicker's den was a testament to his adventurous spirit, a haven filled with artifacts from his previous quests. The discovery of the computer, however, sparked a new kind of excitement within him, a curiosity that went beyond the physical boundaries of his forest home.",
|
38 |
-
# "With a determined gleam in his eye, Flicker trotted out of his den in search of his parents. He had questions about this relic that couldn't wait, eager to understand the secrets it held and how it functioned in a world so different from his own.",
|
39 |
-
# "Excited by his parents' encouragement, Flicker eagerly started his journey into the world of typing. His paws clumsily hit the wrong keys at first, resulting in a string of random letters and numbers on the screen. But with every mistake, Flicker's determination grew stronger.",
|
40 |
-
# "Days turned into weeks, and Flicker's persistence paid off. His paws now moved gracefully across the keyboard, his eyes focused on the screen as he typed out simple messages and commands. The once foreign device was becoming a familiar tool, and Flicker felt a sense of accomplishment wash over him.",
|
41 |
-
# "One evening, as the moon illuminated the forest, a wise old owl named Ollie perched on a branch outside Flicker's den. With a hoot and a smile, Ollie shared the magic of keyboard shortcuts, turning Flicker's typing sessions into thrilling adventures. Each shortcut was like a secret code, and Flicker couldn't wait to master them all.",
|
42 |
-
# "Eager to explore beyond the basics, Flicker's curiosity led him to the vast digital world of the internet. With guidance from his parents and Ollie, he learned how to navigate safely, discovering interactive games and educational videos that opened his eyes to the wonders beyond his forest.",
|
43 |
-
# "Each day, Flicker would sit before the screen, his paws dancing over the keys as he clicked through virtual tours of distant lands, watched videos of creatures he'd never seen, and played games that taught him about science and history. The computer became a window to a world far larger than he could have imagined.",
|
44 |
-
# ]
|
45 |
return pages
|
46 |
|
47 |
def generate_modality_assets(self, config, pages):
|
|
|
32 |
def write_story(self, config):
|
33 |
story_writer = QAOutlineStoryWriter(config["story_gen_config"])
|
34 |
pages = story_writer.call(config["story_setting"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
return pages
|
36 |
|
37 |
def generate_modality_assets(self, config, pages):
|