freddyaboulton HF staff commited on
Commit
3e40464
·
verified ·
1 Parent(s): e1218ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -19
app.py CHANGED
@@ -12,23 +12,15 @@ import uuid
12
  from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor
13
  import logging
14
 
15
- # Configure the root logger to WARNING to suppress debug messages from other libraries
16
  logging.basicConfig(level=logging.WARNING)
17
-
18
- # Create a console handler
19
  console_handler = logging.StreamHandler()
20
  console_handler.setLevel(logging.DEBUG)
21
-
22
- # Create a formatter
23
  formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
24
  console_handler.setFormatter(formatter)
25
-
26
- # Configure the logger for your specific library
27
  logger = logging.getLogger("gradio_webrtc")
28
  logger.setLevel(logging.DEBUG)
29
  logger.addHandler(console_handler)
30
 
31
-
32
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")
33
  model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct", device_map="auto")
34
 
@@ -48,16 +40,9 @@ else:
48
  rtc_configuration = None
49
 
50
 
51
-
52
- def yield_audio(audio: tuple[int, np.ndarray]):
53
- yield AdditionalOutputs(audio)
54
-
55
-
56
-
57
  @spaces.GPU
58
- def respond(transformers_convo: list[dict], gradio_convo: list[dict], audio: tuple[int, np.ndarray], ):
59
  segment = AudioSegment(audio[1].tobytes(), frame_rate=audio[0], sample_width=audio[1].dtype.itemsize, channels=1)
60
-
61
  name = str(uuid.uuid4()) + ".mp3"
62
  segment.export(name, format="mp3")
63
  transformers_convo.append({"role": "user", "content": [{"type": "audio", "audio_url": name}]})
@@ -79,10 +64,11 @@ def respond(transformers_convo: list[dict], gradio_convo: list[dict], audio: tup
79
  generate_ids = model.generate(**inputs, max_length=256)
80
  generate_ids = generate_ids[:, inputs["input_ids"].size(1):]
81
  response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
 
82
  transformers_convo.append({"role": "assistant", "content": response})
83
  gradio_convo.append({"role": "assistant", "content": response})
84
 
85
- yield transformers_convo, gradio_convo
86
 
87
 
88
  with gr.Blocks() as demo:
@@ -98,8 +84,8 @@ with gr.Blocks() as demo:
98
  with gr.Column():
99
  transcript = gr.Chatbot(label="transcript", type="messages")
100
 
101
- audio.stream(ReplyOnPause(yield_audio), inputs=[audio], outputs=[audio])
102
- audio.on_additional_outputs(respond, outputs=[transformers_convo, transcript])
103
 
104
  if __name__ == "__main__":
105
  demo.launch()
 
12
  from transformers import Qwen2AudioForConditionalGeneration, AutoProcessor
13
  import logging
14
 
 
15
  logging.basicConfig(level=logging.WARNING)
 
 
16
  console_handler = logging.StreamHandler()
17
  console_handler.setLevel(logging.DEBUG)
 
 
18
  formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s")
19
  console_handler.setFormatter(formatter)
 
 
20
  logger = logging.getLogger("gradio_webrtc")
21
  logger.setLevel(logging.DEBUG)
22
  logger.addHandler(console_handler)
23
 
 
24
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")
25
  model = Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct", device_map="auto")
26
 
 
40
  rtc_configuration = None
41
 
42
 
 
 
 
 
 
 
43
  @spaces.GPU
44
+ def transcribe(audio: tuple[int, np.ndarray], transformers_convo: list[dict], gradio_convo: list[dict]):
45
  segment = AudioSegment(audio[1].tobytes(), frame_rate=audio[0], sample_width=audio[1].dtype.itemsize, channels=1)
 
46
  name = str(uuid.uuid4()) + ".mp3"
47
  segment.export(name, format="mp3")
48
  transformers_convo.append({"role": "user", "content": [{"type": "audio", "audio_url": name}]})
 
64
  generate_ids = model.generate(**inputs, max_length=256)
65
  generate_ids = generate_ids[:, inputs["input_ids"].size(1):]
66
  response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
67
+ print("response", response)
68
  transformers_convo.append({"role": "assistant", "content": response})
69
  gradio_convo.append({"role": "assistant", "content": response})
70
 
71
+ yield AdditionalOutputs(transformers_convo, gradio_convo)
72
 
73
 
74
  with gr.Blocks() as demo:
 
84
  with gr.Column():
85
  transcript = gr.Chatbot(label="transcript", type="messages")
86
 
87
+ audio.stream(ReplyOnPause(transcribe), inputs=[audio, transformers_convo, transcript], outputs=[audio])
88
+ audio.on_additional_outputs(lambda s,a: (s,a), outputs=[transformers_convo, transcript])
89
 
90
  if __name__ == "__main__":
91
  demo.launch()