Steveeeeeeen HF Staff commited on
Commit
c567179
·
verified ·
1 Parent(s): 8baccb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -17
app.py CHANGED
@@ -6,6 +6,8 @@ from twilio.rest import Client
6
  import os
7
  import torch
8
  import librosa
 
 
9
 
10
  pipe = transformers.pipeline(
11
  model="reach-vb/smolvox-smollm2-whisper-turbo",
@@ -21,7 +23,9 @@ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
21
 
22
  if account_sid and auth_token:
23
  client = Client(account_sid, auth_token)
 
24
  token = client.tokens.create()
 
25
  rtc_configuration = {
26
  "iceServers": token.ice_servers,
27
  "iceTransportPolicy": "relay",
@@ -29,8 +33,12 @@ if account_sid and auth_token:
29
  else:
30
  rtc_configuration = None
31
 
32
-
33
- def transcribe(audio: tuple[int, np.ndarray], transformers_chat: list[dict], conversation: list[dict]):
 
 
 
 
34
  original_sr = audio[0]
35
  target_sr = 16000
36
 
@@ -40,7 +48,7 @@ def transcribe(audio: tuple[int, np.ndarray], transformers_chat: list[dict], con
40
 
41
  tf_input = [d for d in transformers_chat]
42
 
43
- # Generate a response from the pipeline using the audio input
44
  output = pipe(
45
  {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
46
  max_new_tokens=512,
@@ -56,16 +64,22 @@ def transcribe(audio: tuple[int, np.ndarray], transformers_chat: list[dict], con
56
 
57
  yield AdditionalOutputs(transformers_chat, conversation)
58
 
59
-
60
- def respond_text(user_text: str, transformers_chat: list[dict], conversation: list[dict]):
 
 
 
 
61
  if not user_text.strip():
 
62
  return transformers_chat, conversation
63
 
64
  # Append the user message from the textbox
65
  conversation.append({"role": "user", "content": user_text})
66
  transformers_chat.append({"role": "user", "content": user_text})
67
 
68
- # Generate a response using the pipeline. We assume it can process text input via "text"
 
69
  output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
70
 
71
  conversation.append({"role": "assistant", "content": output})
@@ -88,7 +102,6 @@ with gr.Blocks() as demo:
88
  </p>
89
  """
90
  )
91
-
92
  # Shared conversation state
93
  transformers_chat = gr.State(
94
  value=[
@@ -99,15 +112,13 @@ with gr.Blocks() as demo:
99
  ]
100
  )
101
 
102
- # Chat transcript at the top
103
- transcript = gr.Chatbot(label="Transcript", type="messages")
104
-
105
- # Lower row: text input and audio input side by side
106
  with gr.Row():
107
  with gr.Column(scale=1):
 
108
  text_input = gr.Textbox(
109
- placeholder="Type your message here and press Enter...", label="Your Message"
110
  )
 
111
  with gr.Column(scale=1):
112
  audio = WebRTC(
113
  rtc_configuration=rtc_configuration,
@@ -116,7 +127,7 @@ with gr.Blocks() as demo:
116
  modality="audio",
117
  )
118
 
119
- # Audio stream: process audio when speaking stops.
120
  audio.stream(
121
  ReplyOnPause(transcribe),
122
  inputs=[audio, transformers_chat, transcript],
@@ -130,14 +141,14 @@ with gr.Blocks() as demo:
130
  show_progress="hidden",
131
  )
132
 
133
- # Text input: submit callback when pressing Enter.
134
- text_input.submit(
135
  respond_text,
136
  inputs=[text_input, transformers_chat, transcript],
137
  outputs=[transformers_chat, transcript],
138
  )
139
- # Clear text input after submission.
140
- text_input.submit(lambda: "", inputs=[], outputs=[text_input])
141
 
142
  if __name__ == "__main__":
143
  demo.launch()
 
6
  import os
7
  import torch
8
  import librosa
9
+ import spaces
10
+
11
 
12
  pipe = transformers.pipeline(
13
  model="reach-vb/smolvox-smollm2-whisper-turbo",
 
23
 
24
  if account_sid and auth_token:
25
  client = Client(account_sid, auth_token)
26
+
27
  token = client.tokens.create()
28
+
29
  rtc_configuration = {
30
  "iceServers": token.ice_servers,
31
  "iceTransportPolicy": "relay",
 
33
  else:
34
  rtc_configuration = None
35
 
36
+ @spaces.GPU(duration=90)
37
+ def transcribe(
38
+ audio: tuple[int, np.ndarray],
39
+ transformers_chat: list[dict],
40
+ conversation: list[dict],
41
+ ):
42
  original_sr = audio[0]
43
  target_sr = 16000
44
 
 
48
 
49
  tf_input = [d for d in transformers_chat]
50
 
51
+ # Generate response from the pipeline using the audio input
52
  output = pipe(
53
  {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
54
  max_new_tokens=512,
 
64
 
65
  yield AdditionalOutputs(transformers_chat, conversation)
66
 
67
+ @spaces.GPU(duration=90)
68
+ def respond_text(
69
+ user_text: str,
70
+ transformers_chat: list[dict],
71
+ conversation: list[dict],
72
+ ):
73
  if not user_text.strip():
74
+ # Do nothing if the textbox is empty
75
  return transformers_chat, conversation
76
 
77
  # Append the user message from the textbox
78
  conversation.append({"role": "user", "content": user_text})
79
  transformers_chat.append({"role": "user", "content": user_text})
80
 
81
+ # Generate a response using the pipeline.
82
+ # Here we assume the pipeline can also process text input via the "text" key.
83
  output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
84
 
85
  conversation.append({"role": "assistant", "content": output})
 
102
  </p>
103
  """
104
  )
 
105
  # Shared conversation state
106
  transformers_chat = gr.State(
107
  value=[
 
112
  ]
113
  )
114
 
 
 
 
 
115
  with gr.Row():
116
  with gr.Column(scale=1):
117
+ transcript = gr.Chatbot(label="Transcript", type="messages")
118
  text_input = gr.Textbox(
119
+ placeholder="Type your message here...", label="Your Message"
120
  )
121
+ send_button = gr.Button("Send")
122
  with gr.Column(scale=1):
123
  audio = WebRTC(
124
  rtc_configuration=rtc_configuration,
 
127
  modality="audio",
128
  )
129
 
130
+ # Audio stream: when you stop speaking, process the audio input.
131
  audio.stream(
132
  ReplyOnPause(transcribe),
133
  inputs=[audio, transformers_chat, transcript],
 
141
  show_progress="hidden",
142
  )
143
 
144
+ # Text input: when you click "Send", process the typed message.
145
+ send_button.click(
146
  respond_text,
147
  inputs=[text_input, transformers_chat, transcript],
148
  outputs=[transformers_chat, transcript],
149
  )
150
+ # Optionally clear the text box after sending:
151
+ send_button.click(lambda: "", inputs=[], outputs=[text_input])
152
 
153
  if __name__ == "__main__":
154
  demo.launch()