Steveeeeeeen HF Staff commited on
Commit
a7e49b4
·
verified ·
1 Parent(s): 691f0df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -39
app.py CHANGED
@@ -6,8 +6,6 @@ from twilio.rest import Client
6
  import os
7
  import torch
8
  import librosa
9
- import spaces
10
-
11
 
12
  pipe = transformers.pipeline(
13
  model="reach-vb/smolvox-smollm2-whisper-turbo",
@@ -23,9 +21,7 @@ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
23
 
24
  if account_sid and auth_token:
25
  client = Client(account_sid, auth_token)
26
-
27
  token = client.tokens.create()
28
-
29
  rtc_configuration = {
30
  "iceServers": token.ice_servers,
31
  "iceTransportPolicy": "relay",
@@ -33,12 +29,8 @@ if account_sid and auth_token:
33
  else:
34
  rtc_configuration = None
35
 
36
- @spaces.GPU(duration=90)
37
- def transcribe(
38
- audio: tuple[int, np.ndarray],
39
- transformers_chat: list[dict],
40
- conversation: list[dict],
41
- ):
42
  original_sr = audio[0]
43
  target_sr = 16000
44
 
@@ -48,7 +40,7 @@ def transcribe(
48
 
49
  tf_input = [d for d in transformers_chat]
50
 
51
- # Generate response from the pipeline using the audio input
52
  output = pipe(
53
  {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
54
  max_new_tokens=512,
@@ -64,22 +56,16 @@ def transcribe(
64
 
65
  yield AdditionalOutputs(transformers_chat, conversation)
66
 
67
- @spaces.GPU(duration=90)
68
- def respond_text(
69
- user_text: str,
70
- transformers_chat: list[dict],
71
- conversation: list[dict],
72
- ):
73
  if not user_text.strip():
74
- # Do nothing if the textbox is empty
75
  return transformers_chat, conversation
76
 
77
  # Append the user message from the textbox
78
  conversation.append({"role": "user", "content": user_text})
79
  transformers_chat.append({"role": "user", "content": user_text})
80
 
81
- # Generate a response using the pipeline.
82
- # Here we assume the pipeline can also process text input via the "text" key.
83
  output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
84
 
85
  conversation.append({"role": "assistant", "content": output})
@@ -90,18 +76,19 @@ def respond_text(
90
  with gr.Blocks() as demo:
91
  gr.HTML(
92
  """
93
- <h1 style='text-align: center'>
94
- Talk to Smolvox Smollm2 (Powered by WebRTC ⚡️)
95
- </h1>
96
- <p style='text-align: center'>
97
- Once you grant access to your microphone, you can talk naturally to Ultravox.
98
- When you stop talking, the audio will be sent for processing.
99
- </p>
100
- <p style='text-align: center'>
101
- Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
102
- </p>
103
- """
104
  )
 
105
  # Shared conversation state
106
  transformers_chat = gr.State(
107
  value=[
@@ -112,13 +99,15 @@ with gr.Blocks() as demo:
112
  ]
113
  )
114
 
 
 
 
 
115
  with gr.Row():
116
  with gr.Column(scale=1):
117
- transcript = gr.Chatbot(label="Transcript", type="messages")
118
  text_input = gr.Textbox(
119
- placeholder="Type your message here...", label="Your Message"
120
  )
121
- send_button = gr.Button("Send")
122
  with gr.Column(scale=1):
123
  audio = WebRTC(
124
  rtc_configuration=rtc_configuration,
@@ -127,7 +116,7 @@ with gr.Blocks() as demo:
127
  modality="audio",
128
  )
129
 
130
- # Audio stream: when you stop speaking, process the audio input.
131
  audio.stream(
132
  ReplyOnPause(transcribe),
133
  inputs=[audio, transformers_chat, transcript],
@@ -141,14 +130,14 @@ with gr.Blocks() as demo:
141
  show_progress="hidden",
142
  )
143
 
144
- # Text input: when you click "Send", process the typed message.
145
- send_button.click(
146
  respond_text,
147
  inputs=[text_input, transformers_chat, transcript],
148
  outputs=[transformers_chat, transcript],
149
  )
150
- # Optionally clear the text box after sending:
151
- send_button.click(lambda: "", inputs=[], outputs=[text_input])
152
 
153
  if __name__ == "__main__":
154
  demo.launch()
 
6
  import os
7
  import torch
8
  import librosa
 
 
9
 
10
  pipe = transformers.pipeline(
11
  model="reach-vb/smolvox-smollm2-whisper-turbo",
 
21
 
22
  if account_sid and auth_token:
23
  client = Client(account_sid, auth_token)
 
24
  token = client.tokens.create()
 
25
  rtc_configuration = {
26
  "iceServers": token.ice_servers,
27
  "iceTransportPolicy": "relay",
 
29
  else:
30
  rtc_configuration = None
31
 
32
+
33
+ def transcribe(audio: tuple[int, np.ndarray], transformers_chat: list[dict], conversation: list[dict]):
 
 
 
 
34
  original_sr = audio[0]
35
  target_sr = 16000
36
 
 
40
 
41
  tf_input = [d for d in transformers_chat]
42
 
43
+ # Generate a response from the pipeline using the audio input
44
  output = pipe(
45
  {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
46
  max_new_tokens=512,
 
56
 
57
  yield AdditionalOutputs(transformers_chat, conversation)
58
 
59
+
60
+ def respond_text(user_text: str, transformers_chat: list[dict], conversation: list[dict]):
 
 
 
 
61
  if not user_text.strip():
 
62
  return transformers_chat, conversation
63
 
64
  # Append the user message from the textbox
65
  conversation.append({"role": "user", "content": user_text})
66
  transformers_chat.append({"role": "user", "content": user_text})
67
 
68
+ # Generate a response using the pipeline. We assume it can process text input via "text"
 
69
  output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
70
 
71
  conversation.append({"role": "assistant", "content": output})
 
76
  with gr.Blocks() as demo:
77
  gr.HTML(
78
  """
79
+ <h1 style='text-align: center'>
80
+ Talk to Smolvox Smollm2 1.7b (Powered by WebRTC ⚡️)
81
+ </h1>
82
+ <p style='text-align: center'>
83
+ Once you grant access to your microphone, you can talk naturally to Ultravox.
84
+ When you stop talking, the audio will be sent for processing.
85
+ </p>
86
+ <p style='text-align: center'>
87
+ Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
88
+ </p>
89
+ """
90
  )
91
+
92
  # Shared conversation state
93
  transformers_chat = gr.State(
94
  value=[
 
99
  ]
100
  )
101
 
102
+ # Chat transcript at the top
103
+ transcript = gr.Chatbot(label="Transcript", type="messages")
104
+
105
+ # Lower row: text input and audio input side by side
106
  with gr.Row():
107
  with gr.Column(scale=1):
 
108
  text_input = gr.Textbox(
109
+ placeholder="Type your message here and press Enter...", label="Your Message"
110
  )
 
111
  with gr.Column(scale=1):
112
  audio = WebRTC(
113
  rtc_configuration=rtc_configuration,
 
116
  modality="audio",
117
  )
118
 
119
+ # Audio stream: process audio when speaking stops.
120
  audio.stream(
121
  ReplyOnPause(transcribe),
122
  inputs=[audio, transformers_chat, transcript],
 
130
  show_progress="hidden",
131
  )
132
 
133
+ # Text input: submit callback when pressing Enter.
134
+ text_input.submit(
135
  respond_text,
136
  inputs=[text_input, transformers_chat, transcript],
137
  outputs=[transformers_chat, transcript],
138
  )
139
+ # Clear text input after submission.
140
+ text_input.submit(lambda: "", inputs=[], outputs=[text_input])
141
 
142
  if __name__ == "__main__":
143
  demo.launch()