kcz358 commited on
Commit
f3f60d0
·
1 Parent(s): 9cdb7cc

Add streaming

Browse files
Files changed (1) hide show
  1. app.py +33 -14
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoProcessor
3
  import librosa
 
4
 
5
  def split_audio(audio_arrays, chunk_limit=480000):
6
  CHUNK_LIM = chunk_limit
@@ -15,6 +16,14 @@ def split_audio(audio_arrays, chunk_limit=480000):
15
  return audio_splits
16
 
17
 
 
 
 
 
 
 
 
 
18
  # Placeholder for your actual LLM processing API call
19
  def process_audio(audio, text, chat_history):
20
  conversation = [
@@ -24,6 +33,7 @@ def process_audio(audio, text, chat_history):
24
  ],
25
  },
26
  ]
 
27
  audio = librosa.load(audio, sr=16000)[0]
28
 
29
  if audio is not None:
@@ -35,7 +45,7 @@ def process_audio(audio, text, chat_history):
35
  "audio": "placeholder",
36
  }
37
  )
38
- chat_history.append({"role": "user", "content": gr.Audio(value=(16000, audio))})
39
 
40
  conversation[0]["content"].append(
41
  {
@@ -45,22 +55,26 @@ def process_audio(audio, text, chat_history):
45
  )
46
 
47
  chat_history.append({"role": "user", "content": text})
 
 
48
  prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
49
  inputs = processor(text=prompt, audios=splitted_audio, sampling_rate=16000, return_tensors="pt", padding=True)
50
  inputs = {k: v.to("cuda") for k, v in inputs.items()}
51
- outputs = model.generate(**inputs, eos_token_id=151645, pad_token_id=151643, max_new_tokens=4096)
52
-
53
- cont = outputs[:, inputs["input_ids"].shape[-1] :]
54
-
55
- result = processor.batch_decode(cont, skip_special_tokens=True)[0]
56
- chat_history.append(
57
- {
58
- "role": "assistant",
59
- "content": result,
60
- }
61
  )
62
-
63
- return chat_history
 
 
64
 
65
  with gr.Blocks() as demo:
66
  gr.Markdown("## 🎙️ Aero-1-Audio")
@@ -90,6 +104,11 @@ with gr.Blocks() as demo:
90
  chatbot_clear = gr.ClearButton([text_input, audio_input, chatbot], value="Clear")
91
  chatbot_submit = gr.Button("Submit", variant="primary")
92
  chatbot_submit.click(
 
 
 
 
 
93
  process_audio,
94
  inputs=[audio_input, text_input, chatbot],
95
  outputs=[chatbot],
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoProcessor, TextIteratorStreamer
3
  import librosa
4
+ from threading import Thread
5
 
6
  def split_audio(audio_arrays, chunk_limit=480000):
7
  CHUNK_LIM = chunk_limit
 
16
  return audio_splits
17
 
18
 
19
+ def user(audio, text, chat_history):
20
+
21
+ if audio is not None:
22
+ chat_history.append(gr.ChatMessage(role="user", content={"path": audio, "alt_text": "Audio"}))
23
+ chat_history.append({"role": "user", "content": text})
24
+ return "", chat_history
25
+
26
+
27
  # Placeholder for your actual LLM processing API call
28
  def process_audio(audio, text, chat_history):
29
  conversation = [
 
33
  ],
34
  },
35
  ]
36
+ audio_path = audio
37
  audio = librosa.load(audio, sr=16000)[0]
38
 
39
  if audio is not None:
 
45
  "audio": "placeholder",
46
  }
47
  )
48
+ # chat_history.append(gr.ChatMessage(role="user", content={"path": audio_path, "alt_text": "Audio"}))
49
 
50
  conversation[0]["content"].append(
51
  {
 
55
  )
56
 
57
  chat_history.append({"role": "user", "content": text})
58
+ # Set up the streamer for token generation
59
+ streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
60
  prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
61
  inputs = processor(text=prompt, audios=splitted_audio, sampling_rate=16000, return_tensors="pt", padding=True)
62
  inputs = {k: v.to("cuda") for k, v in inputs.items()}
63
+ # Set up generation arguments including max tokens and streamer
64
+ generation_args = {
65
+ "max_new_tokens": 4096,
66
+ "streamer": streamer,
67
+ **inputs
68
+ }
69
+ # Start a separate thread for model generation to allow streaming output
70
+ thread = Thread(
71
+ target=model.generate,
72
+ kwargs=generation_args,
73
  )
74
+ thread.start()
75
+ for character in streamer:
76
+ chat_history[-1]['content'] += character
77
+ yield chat_history
78
 
79
  with gr.Blocks() as demo:
80
  gr.Markdown("## 🎙️ Aero-1-Audio")
 
104
  chatbot_clear = gr.ClearButton([text_input, audio_input, chatbot], value="Clear")
105
  chatbot_submit = gr.Button("Submit", variant="primary")
106
  chatbot_submit.click(
107
+ user,
108
+ inputs=[audio_input, text_input, chatbot],
109
+ outputs=[text_input, chatbot],
110
+ queue=False
111
+ ).then(
112
  process_audio,
113
  inputs=[audio_input, text_input, chatbot],
114
  outputs=[chatbot],