Update app.py
Browse files
app.py
CHANGED
@@ -18,11 +18,10 @@ pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat1
|
|
18 |
|
19 |
standard_sys = f"""
|
20 |
You will be provided a list of visual events, and an audio description. All these informations come from a single video.
|
21 |
-
List of visual events are actually
|
22 |
-
|
23 |
Audio events are actually the description from the audio of the video.
|
24 |
-
Your job is to use these information to provide a short resume about what is happening in the video.
|
25 |
-
Do not mention still image. Only focus on the action.
|
26 |
"""
|
27 |
|
28 |
def extract_frames(video_in, interval=24, output_format='.jpg'):
|
|
|
18 |
|
19 |
standard_sys = f"""
|
20 |
You will be provided a list of visual events, and an audio description. All these informations come from a single video.
|
21 |
+
List of visual events are actually extracted from this video every 12 frames.
|
22 |
+
These visual infos are extracted from a video that is usually a short sequence, so the people depicted in different visual events are usually showing the same people.
|
23 |
Audio events are actually the description from the audio of the video.
|
24 |
+
Your job is to use these information to smartly deduce and provide a very short resume about what is happening in the video.
|
|
|
25 |
"""
|
26 |
|
27 |
def extract_frames(video_in, interval=24, output_format='.jpg'):
|