wasmdashai commited on
Commit
3a636cb
·
verified ·
1 Parent(s): c39b385

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -0
app.py CHANGED
@@ -190,7 +190,13 @@ def modelspeech(text):
190
  wav = model_vits(input_ids=inputs["input_ids"]).waveform.cpu().numpy().reshape(-1)
191
  # display(Audio(wav, rate=model.config.sampling_rate))
192
  return model_vits.config.sampling_rate,wav#remove_noise_nr(wav)
 
 
 
193
 
 
 
 
194
  import re
195
  def clean_text(text):
196
  # Remove symbols and extra spaces
@@ -217,8 +223,30 @@ def text_to_speech(text,session_ai):
217
  # yield stream_wav
218
  if pad_text!='':
219
  yield modelspeech(pad_text),session_ai
 
220
  # for stream_wav in generate_audio(pad_text):
221
  # yield stream_wav
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  def dash(text,session_ai,is_state=True):
223
 
224
  response,session_ai=get_answer_ai(text,session_ai)
@@ -251,6 +279,11 @@ with gr.Blocks() as demo:
251
  text_input2 = gr.Textbox(label="Enter Text")
252
  audio_output = gr.Audio(streaming=True,autoplay=True)
253
  text_input2.submit(text_to_speech, [text_input2,session_ai], [audio_output,session_ai])
 
 
 
 
 
254
 
255
 
256
  demo.launch(show_error=True)
 
190
  wav = model_vits(input_ids=inputs["input_ids"]).waveform.cpu().numpy().reshape(-1)
191
  # display(Audio(wav, rate=model.config.sampling_rate))
192
  return model_vits.config.sampling_rate,wav#remove_noise_nr(wav)
193
+ def modelspeechstr(text):
194
+ with torch.no_grad():
195
+ inputs = tokenizer(text, return_tensors="pt")#.cuda()
196
 
197
+ wav = model_vits(input_ids=inputs["input_ids"]).waveform.cpu().numpy().reshape(-1)
198
+ # display(Audio(wav, rate=model.config.sampling_rate))
199
+ return np.array2string(wav)
200
  import re
201
  def clean_text(text):
202
  # Remove symbols and extra spaces
 
223
  # yield stream_wav
224
  if pad_text!='':
225
  yield modelspeech(pad_text),session_ai
226
+
227
  # for stream_wav in generate_audio(pad_text):
228
  # yield stream_wav
229
+
230
+
231
+
232
+ def text_to_speechstr(text,session_ai):
233
+
234
+ response = dash(text,session_ai,False)
235
+ pad_text=''
236
+ k=0
237
+ for chunk in response:
238
+ chunk,session_ai=chunk
239
+ pad_text+=str(clean_text(chunk))
240
+
241
+ if pad_text!='' and len(pad_text)>10:
242
+ out=pad_text
243
+ pad_text=''
244
+ k+=1
245
+ yield modelspeechstr(out),session_ai
246
+ # for stream_wav in generate_audio(out):
247
+ # yield stream_wav
248
+ if pad_text!='':
249
+ yield modelspeechstr(pad_text),session_ai
250
  def dash(text,session_ai,is_state=True):
251
 
252
  response,session_ai=get_answer_ai(text,session_ai)
 
279
  text_input2 = gr.Textbox(label="Enter Text")
280
  audio_output = gr.Audio(streaming=True,autoplay=True)
281
  text_input2.submit(text_to_speech, [text_input2,session_ai], [audio_output,session_ai])
282
+ with gr.Tab("AI Speechstr"):
283
+ gr.Markdown("# Text to Speech")
284
+ text_input3 = gr.Textbox(label="Enter Text")
285
+ text_input4 = gr.Textbox(label="out Text")
286
+ text_input3.submit(text_to_speechstr, [text_input3,session_ai], [text_input4,session_ai])
287
 
288
 
289
  demo.launch(show_error=True)