Spaces:
Running
Running
修复上传干声采样率不齐导致的语速问题
Browse files
app.py
CHANGED
@@ -41,10 +41,11 @@ def create_fn(model, spk):
|
|
41 |
return 0, None
|
42 |
sr, audio = input_audio
|
43 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
|
|
44 |
if len(audio.shape) > 1:
|
45 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
46 |
temp_path = "temp.wav"
|
47 |
-
soundfile.write(temp_path, audio,
|
48 |
|
49 |
model.hubert_model = hubert_dict[model.speech_encoder]
|
50 |
out_audio = model.slice_inference(raw_audio_path=temp_path,
|
@@ -58,7 +59,7 @@ def create_fn(model, spk):
|
|
58 |
auto_predict_f0=auto_f0)
|
59 |
model.clear_empty()
|
60 |
os.remove(temp_path)
|
61 |
-
return
|
62 |
|
63 |
async def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
|
64 |
if input_text == '':
|
@@ -79,7 +80,7 @@ def create_fn(model, spk):
|
|
79 |
temp_path = "temp.wav"
|
80 |
wavfile.write(temp_path, sampling_rate, (audio * np.iinfo(np.int16).max).astype(np.int16))
|
81 |
sr, audio = gr_pu.audio_from_file(temp_path)
|
82 |
-
input_audio = (
|
83 |
return svc_fn(input_audio, vc_transform, auto_f0, f0p)
|
84 |
|
85 |
return svc_fn, tts_fn
|
|
|
41 |
return 0, None
|
42 |
sr, audio = input_audio
|
43 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
44 |
+
audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
|
45 |
if len(audio.shape) > 1:
|
46 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
47 |
temp_path = "temp.wav"
|
48 |
+
soundfile.write(temp_path, audio, sampling_rate, format="wav")
|
49 |
|
50 |
model.hubert_model = hubert_dict[model.speech_encoder]
|
51 |
out_audio = model.slice_inference(raw_audio_path=temp_path,
|
|
|
59 |
auto_predict_f0=auto_f0)
|
60 |
model.clear_empty()
|
61 |
os.remove(temp_path)
|
62 |
+
return sampling_rate, out_audio
|
63 |
|
64 |
async def tts_fn(input_text, gender, tts_rate, vc_transform, auto_f0, f0p):
|
65 |
if input_text == '':
|
|
|
80 |
temp_path = "temp.wav"
|
81 |
wavfile.write(temp_path, sampling_rate, (audio * np.iinfo(np.int16).max).astype(np.int16))
|
82 |
sr, audio = gr_pu.audio_from_file(temp_path)
|
83 |
+
input_audio = (sampling_rate, audio)
|
84 |
return svc_fn(input_audio, vc_transform, auto_f0, f0p)
|
85 |
|
86 |
return svc_fn, tts_fn
|