cantabile-kwok commited on
Commit
eda4a4a
·
1 Parent(s): 17d49c8

change UI and auto convert to mono

Browse files
Files changed (2) hide show
  1. app.py +4 -1
  2. vec2wav2/utils/utils.py +13 -8
app.py CHANGED
@@ -23,7 +23,10 @@ def create_interface():
23
  )
24
  with gr.Blocks(title="Voice Conversion") as demo:
25
  gr.Markdown("# vec2wav 2.0 Voice Conversion Demo")
26
- gr.Markdown("Upload source audio and target speaker audio to convert the voice.")
 
 
 
27
 
28
  with gr.Row():
29
  source_audio = gr.Audio(label="Source Audio", type="filepath")
 
23
  )
24
  with gr.Blocks(title="Voice Conversion") as demo:
25
  gr.Markdown("# vec2wav 2.0 Voice Conversion Demo")
26
+ gr.Markdown("Upload source audio and target speaker audio to convert the voice.\n"
27
+ "Note that this space could be slow since it's running on a free CPU server. We recommend running this locally for faster results.\n"
28
+ "For more information, visit the [vec2wav 2.0 GitHub repository](https://github.com/cantabile-kwok/vec2wav2.0)\n"
29
+ "MPEG format is not supported. Please convert it to WAV format before uploading.")
30
 
31
  with gr.Row():
32
  source_audio = gr.Audio(label="Source Audio", type="filepath")
vec2wav2/utils/utils.py CHANGED
@@ -28,17 +28,22 @@ def read_wav_16k(audio_path):
28
  sr = audio_path[0]
29
  else: # Regular file path
30
  assert os.path.exists(audio_path), f"File not found: {audio_path}"
31
- wav, sr = sf.read(audio_path)
32
-
33
- if sr != 16000:
34
- audio_tensor = torch.tensor(wav, dtype=torch.float32)
35
- resampler = transforms.Resample(orig_freq=sr, new_freq=16000)
36
- wav = resampler(audio_tensor)
37
- wav = wav.numpy()
 
 
 
 
 
 
38
  return wav
39
 
40
 
41
-
42
  def find_files(root_dir, query="*.wav", include_root_dir=True):
43
  """Find files recursively.
44
 
 
28
  sr = audio_path[0]
29
  else: # Regular file path
30
  assert os.path.exists(audio_path), f"File not found: {audio_path}"
31
+ if audio_path.endswith(".wav"):
32
+ wav, sr = sf.read(audio_path)
33
+ if wav.ndim > 1:
34
+ wav = wav.mean(axis=-1) # Convert to mono
35
+
36
+ if sr != 16000:
37
+ audio_tensor = torch.tensor(wav, dtype=torch.float32)
38
+ resampler = transforms.Resample(orig_freq=sr, new_freq=16000)
39
+ wav = resampler(audio_tensor)
40
+ wav = wav.numpy()
41
+ else:
42
+ import librosa
43
+ wav, sr = librosa.load(audio_path, sr=16000, mono=True)
44
  return wav
45
 
46
 
 
47
  def find_files(root_dir, query="*.wav", include_root_dir=True):
48
  """Find files recursively.
49