Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,10 @@ zephyr_model = "HuggingFaceH4/zephyr-7b-beta"
|
|
17 |
pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
|
18 |
|
19 |
standard_sys = f"""
|
20 |
-
|
|
|
|
|
|
|
21 |
"""
|
22 |
|
23 |
def extract_frames(video_in, interval=24, output_format='.jpg'):
|
@@ -83,7 +86,7 @@ def extract_audio(video_path):
|
|
83 |
return "output_audio.mp3"
|
84 |
|
85 |
def get_salmonn(audio_in):
|
86 |
-
salmonn_prompt = "
|
87 |
client = Client("fffiloni/SALMONN-7B-gradio")
|
88 |
result = client.predict(
|
89 |
audio_in, # filepath in 'Audio' Audio component
|
@@ -141,9 +144,9 @@ def infer(video_in):
|
|
141 |
print(formatted_captions)
|
142 |
|
143 |
# Send formatted captions to LLM
|
144 |
-
|
145 |
|
146 |
-
return
|
147 |
|
148 |
with gr.Blocks() as demo :
|
149 |
with gr.Column(elem_id="col-container"):
|
|
|
17 |
pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")
|
18 |
|
19 |
standard_sys = f"""
|
20 |
+
You will be provided a list of visual events, and an audio description. All these informations come from a single video.
|
21 |
+
List of visual events are actually images extracted from this video every 12 frames.
|
22 |
+
Audio events are actually the description from the audio of the video.
|
23 |
+
Your job is to use these information to provide a short resume about what is happening in the video.
|
24 |
"""
|
25 |
|
26 |
def extract_frames(video_in, interval=24, output_format='.jpg'):
|
|
|
86 |
return "output_audio.mp3"
|
87 |
|
88 |
def get_salmonn(audio_in):
|
89 |
+
salmonn_prompt = "Please describe the audio"
|
90 |
client = Client("fffiloni/SALMONN-7B-gradio")
|
91 |
result = client.predict(
|
92 |
audio_in, # filepath in 'Audio' Audio component
|
|
|
144 |
print(formatted_captions)
|
145 |
|
146 |
# Send formatted captions to LLM
|
147 |
+
video_description_from_llm = llm_process(formatted_captions)
|
148 |
|
149 |
+
return video_description_from_llm
|
150 |
|
151 |
with gr.Blocks() as demo :
|
152 |
with gr.Column(elem_id="col-container"):
|