Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -36,6 +36,7 @@ from pydub import AudioSegment
|
|
36 |
import librosa
|
37 |
|
38 |
from scipy.io.wavfile import write, read
|
|
|
39 |
|
40 |
import subprocess
|
41 |
|
@@ -194,16 +195,15 @@ def voice_conversion(apikey, ta, audio, choice1):
|
|
194 |
tts = gTTS(chat_response, lang='zh-CN')
|
195 |
tts.save("output.wav")
|
196 |
|
197 |
-
target_audio =
|
198 |
-
reference_audio =
|
199 |
-
driving_audio =
|
200 |
|
201 |
-
|
202 |
-
da = "output.wav"
|
203 |
|
204 |
write(target_audio, ta[0], ta[1])
|
205 |
-
write(reference_audio,
|
206 |
-
write(driving_audio,
|
207 |
|
208 |
# !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
|
209 |
# !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
|
@@ -236,20 +236,19 @@ def voice_conversion(apikey, ta, audio, choice1):
|
|
236 |
# print("Reference Audio after decoder:")
|
237 |
# IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
|
238 |
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
|
248 |
-
|
249 |
-
|
250 |
|
251 |
-
|
252 |
-
return (ap.sample_rate, ref_wav_voc)
|
253 |
|
254 |
c1=gr.Interface(
|
255 |
fn=voice_conversion,
|
@@ -260,8 +259,7 @@ c1=gr.Interface(
|
|
260 |
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
261 |
],
|
262 |
outputs=[
|
263 |
-
|
264 |
-
gr.Audio(label="Audio with Custom Voice"),
|
265 |
],
|
266 |
#theme="huggingface",
|
267 |
description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
|
@@ -276,8 +274,7 @@ c2=gr.Interface(
|
|
276 |
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
277 |
],
|
278 |
outputs=[
|
279 |
-
|
280 |
-
gr.Audio(label="Audio with Custom Voice"),
|
281 |
],
|
282 |
#theme="huggingface",
|
283 |
description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
|
|
|
36 |
import librosa
|
37 |
|
38 |
from scipy.io.wavfile import write, read
|
39 |
+
from scipy.io import wavfile
|
40 |
|
41 |
import subprocess
|
42 |
|
|
|
195 |
tts = gTTS(chat_response, lang='zh-CN')
|
196 |
tts.save("output.wav")
|
197 |
|
198 |
+
target_audio = 'target.wav'
|
199 |
+
reference_audio = 'reference.wav'
|
200 |
+
driving_audio = 'driving.wav'
|
201 |
|
202 |
+
rate1, data1 = wavfile.read("mywav.wav")
|
|
|
203 |
|
204 |
write(target_audio, ta[0], ta[1])
|
205 |
+
write(reference_audio, rate1, data1)
|
206 |
+
write(driving_audio, rate1, data1)
|
207 |
|
208 |
# !ffmpeg-normalize $target_audio -nt rms -t=-27 -o $target_audio -ar 16000 -f
|
209 |
# !ffmpeg-normalize $reference_audio -nt rms -t=-27 -o $reference_audio -ar 16000 -f
|
|
|
236 |
# print("Reference Audio after decoder:")
|
237 |
# IPython.display.display(Audio(ref_wav_voc, rate=ap.sample_rate))
|
238 |
|
239 |
+
voicefixer.restore(input=ref_wav_voc, # input wav file path
|
240 |
+
output="audio1.wav", # output wav file path
|
241 |
+
cuda=True, # whether to use gpu acceleration
|
242 |
+
mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
|
243 |
|
244 |
+
noisy = enhance_model.load_audio(
|
245 |
+
"audio1.wav"
|
246 |
+
).unsqueeze(0)
|
247 |
|
248 |
+
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
249 |
+
torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
|
250 |
|
251 |
+
return [result.text, chat_response, "enhanced.wav"]
|
|
|
252 |
|
253 |
c1=gr.Interface(
|
254 |
fn=voice_conversion,
|
|
|
259 |
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
260 |
],
|
261 |
outputs=[
|
262 |
+
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
|
|
|
263 |
],
|
264 |
#theme="huggingface",
|
265 |
description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
|
|
|
274 |
gr.Radio(["TOEFL", "Therapist", "Alice"], label="TOEFL Examiner, Therapist Tina, or Assistant Alice?"),
|
275 |
],
|
276 |
outputs=[
|
277 |
+
gr.Textbox(label="Speech to Text"), gr.Textbox(label="ChatGPT Output"), gr.Audio(label="Audio with Custom Voice"),
|
|
|
278 |
],
|
279 |
#theme="huggingface",
|
280 |
description = "🤖 - 让有人文关怀的AI造福每一个人!AI向善,文明璀璨!TalktoAI - Enable the future!",
|