Update app.py
Browse files
app.py
CHANGED
@@ -221,7 +221,7 @@ with gr.Blocks(css=css) as demo :
|
|
221 |
<p style="text-align: center;">
|
222 |
An experiment to try to achieve what i call "soft video understanding" with open-source available models. <br />
|
223 |
We use moondream1 to caption extracted frames, salmonn to analyze extracted audio, then send visual and audio details to Zephyr which is instructed to resume what it understood.
|
224 |
-
Instructions prompt is available for further discussion with the Community.
|
225 |
</p>
|
226 |
""")
|
227 |
with gr.Row():
|
@@ -233,7 +233,7 @@ with gr.Blocks(css=css) as demo :
|
|
233 |
)
|
234 |
gr.Examples(
|
235 |
examples = ["examples/train.mp4"],
|
236 |
-
inputs = [
|
237 |
)
|
238 |
with gr.Column():
|
239 |
video_cut = gr.Video(label="Video cut to 10 seconds", interactive=False)
|
|
|
221 |
<p style="text-align: center;">
|
222 |
An experiment to try to achieve what i call "soft video understanding" with open-source available models. <br />
|
223 |
We use moondream1 to caption extracted frames, salmonn to analyze extracted audio, then send visual and audio details to Zephyr which is instructed to resume what it understood.
|
224 |
+
Instructions prompt is available for further discussion with the Community. Note that audio is crucial for better overall vision. Video longer than 10 seconds will be cut.
|
225 |
</p>
|
226 |
""")
|
227 |
with gr.Row():
|
|
|
233 |
)
|
234 |
gr.Examples(
|
235 |
examples = ["examples/train.mp4"],
|
236 |
+
inputs = [video_cut]
|
237 |
)
|
238 |
with gr.Column():
|
239 |
video_cut = gr.Video(label="Video cut to 10 seconds", interactive=False)
|