Spaces:
Running
on
Zero
Running
on
Zero
Add streaming
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoModelForCausalLM, AutoProcessor
|
3 |
import librosa
|
|
|
4 |
|
5 |
def split_audio(audio_arrays, chunk_limit=480000):
|
6 |
CHUNK_LIM = chunk_limit
|
@@ -15,6 +16,14 @@ def split_audio(audio_arrays, chunk_limit=480000):
|
|
15 |
return audio_splits
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# Placeholder for your actual LLM processing API call
|
19 |
def process_audio(audio, text, chat_history):
|
20 |
conversation = [
|
@@ -24,6 +33,7 @@ def process_audio(audio, text, chat_history):
|
|
24 |
],
|
25 |
},
|
26 |
]
|
|
|
27 |
audio = librosa.load(audio, sr=16000)[0]
|
28 |
|
29 |
if audio is not None:
|
@@ -35,7 +45,7 @@ def process_audio(audio, text, chat_history):
|
|
35 |
"audio": "placeholder",
|
36 |
}
|
37 |
)
|
38 |
-
chat_history.append({"
|
39 |
|
40 |
conversation[0]["content"].append(
|
41 |
{
|
@@ -45,22 +55,26 @@ def process_audio(audio, text, chat_history):
|
|
45 |
)
|
46 |
|
47 |
chat_history.append({"role": "user", "content": text})
|
|
|
|
|
48 |
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
|
49 |
inputs = processor(text=prompt, audios=splitted_audio, sampling_rate=16000, return_tensors="pt", padding=True)
|
50 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
)
|
62 |
-
|
63 |
-
|
|
|
|
|
64 |
|
65 |
with gr.Blocks() as demo:
|
66 |
gr.Markdown("## 🎙️ Aero-1-Audio")
|
@@ -90,6 +104,11 @@ with gr.Blocks() as demo:
|
|
90 |
chatbot_clear = gr.ClearButton([text_input, audio_input, chatbot], value="Clear")
|
91 |
chatbot_submit = gr.Button("Submit", variant="primary")
|
92 |
chatbot_submit.click(
|
|
|
|
|
|
|
|
|
|
|
93 |
process_audio,
|
94 |
inputs=[audio_input, text_input, chatbot],
|
95 |
outputs=[chatbot],
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoProcessor, TextIteratorStreamer
|
3 |
import librosa
|
4 |
+
from threading import Thread
|
5 |
|
6 |
def split_audio(audio_arrays, chunk_limit=480000):
|
7 |
CHUNK_LIM = chunk_limit
|
|
|
16 |
return audio_splits
|
17 |
|
18 |
|
19 |
+
def user(audio, text, chat_history):
|
20 |
+
|
21 |
+
if audio is not None:
|
22 |
+
chat_history.append(gr.ChatMessage(role="user", content={"path": audio, "alt_text": "Audio"}))
|
23 |
+
chat_history.append({"role": "user", "content": text})
|
24 |
+
return "", chat_history
|
25 |
+
|
26 |
+
|
27 |
# Placeholder for your actual LLM processing API call
|
28 |
def process_audio(audio, text, chat_history):
|
29 |
conversation = [
|
|
|
33 |
],
|
34 |
},
|
35 |
]
|
36 |
+
audio_path = audio
|
37 |
audio = librosa.load(audio, sr=16000)[0]
|
38 |
|
39 |
if audio is not None:
|
|
|
45 |
"audio": "placeholder",
|
46 |
}
|
47 |
)
|
48 |
+
# chat_history.append(gr.ChatMessage(role="user", content={"path": audio_path, "alt_text": "Audio"}))
|
49 |
|
50 |
conversation[0]["content"].append(
|
51 |
{
|
|
|
55 |
)
|
56 |
|
57 |
chat_history.append({"role": "user", "content": text})
|
58 |
+
# Set up the streamer for token generation
|
59 |
+
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
|
60 |
prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
|
61 |
inputs = processor(text=prompt, audios=splitted_audio, sampling_rate=16000, return_tensors="pt", padding=True)
|
62 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
63 |
+
# Set up generation arguments including max tokens and streamer
|
64 |
+
generation_args = {
|
65 |
+
"max_new_tokens": 4096,
|
66 |
+
"streamer": streamer,
|
67 |
+
**inputs
|
68 |
+
}
|
69 |
+
# Start a separate thread for model generation to allow streaming output
|
70 |
+
thread = Thread(
|
71 |
+
target=model.generate,
|
72 |
+
kwargs=generation_args,
|
73 |
)
|
74 |
+
thread.start()
|
75 |
+
for character in streamer:
|
76 |
+
chat_history[-1]['content'] += character
|
77 |
+
yield chat_history
|
78 |
|
79 |
with gr.Blocks() as demo:
|
80 |
gr.Markdown("## 🎙️ Aero-1-Audio")
|
|
|
104 |
chatbot_clear = gr.ClearButton([text_input, audio_input, chatbot], value="Clear")
|
105 |
chatbot_submit = gr.Button("Submit", variant="primary")
|
106 |
chatbot_submit.click(
|
107 |
+
user,
|
108 |
+
inputs=[audio_input, text_input, chatbot],
|
109 |
+
outputs=[text_input, chatbot],
|
110 |
+
queue=False
|
111 |
+
).then(
|
112 |
process_audio,
|
113 |
inputs=[audio_input, text_input, chatbot],
|
114 |
outputs=[chatbot],
|