Update app.py
Browse files
app.py
CHANGED
@@ -223,14 +223,15 @@ with gr.Blocks(css=css) as demo :
|
|
223 |
<h2 style="text-align: center;">Soft Video Understanding</h2>
|
224 |
<p style="text-align: center;">
|
225 |
An experiment to try to achieve what i call "soft video understanding" with open-source available models. <br />
|
226 |
-
We use moondream1 to caption extracted frames, salmonn to analyze extracted audio, then
|
227 |
-
Instructions prompt is available for further discussion with the Community.
|
|
|
228 |
</p>
|
229 |
""")
|
230 |
with gr.Row():
|
231 |
with gr.Column():
|
232 |
video_in = gr.Video(label="Video input")
|
233 |
-
with gr.Accordion("System Instructions", open=False):
|
234 |
system_instruction = gr.Markdown(
|
235 |
value = standard_sys
|
236 |
)
|
|
|
223 |
<h2 style="text-align: center;">Soft Video Understanding</h2>
|
224 |
<p style="text-align: center;">
|
225 |
An experiment to try to achieve what i call "soft video understanding" with open-source available models. <br />
|
226 |
+
We use moondream1 to caption extracted frames, salmonn to analyze extracted audio, then give visual and audio details to Zephyr which is instructed to resume what it understood.<br />
|
227 |
+
Instructions prompt is available for further discussion with the Community. <br />
|
228 |
+
Note that audio is crucial for better overall vision. Video longer than 10 seconds will be cut.
|
229 |
</p>
|
230 |
""")
|
231 |
with gr.Row():
|
232 |
with gr.Column():
|
233 |
video_in = gr.Video(label="Video input")
|
234 |
+
with gr.Accordion("System Instructions (for your curiosity)", open=False):
|
235 |
system_instruction = gr.Markdown(
|
236 |
value = standard_sys
|
237 |
)
|