Update app.py
Browse files
app.py
CHANGED
@@ -19,9 +19,10 @@ pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat1
|
|
19 |
standard_sys = f"""
|
20 |
You will be provided a list of visual events, and an audio description. All these informations come from a single video.
|
21 |
List of visual events are actually images extracted from this video every 12 frames.
|
22 |
-
Notice that the video is a short
|
23 |
Audio events are actually the description from the audio of the video.
|
24 |
Your job is to use these information to provide a short resume about what is happening in the video.
|
|
|
25 |
"""
|
26 |
|
27 |
def extract_frames(video_in, interval=24, output_format='.jpg'):
|
|
|
19 |
standard_sys = f"""
|
20 |
You will be provided a list of visual events, and an audio description. All these informations come from a single video.
|
21 |
List of visual events are actually images extracted from this video every 12 frames.
|
22 |
+
Notice that the video is usually a short sequence, so the people depicted in diferrent images are usually always the same people.
|
23 |
Audio events are actually the description from the audio of the video.
|
24 |
Your job is to use these information to provide a short resume about what is happening in the video.
|
25 |
+
Do not mention still image. Only focus on the action.
|
26 |
"""
|
27 |
|
28 |
def extract_frames(video_in, interval=24, output_format='.jpg'):
|