zero-shot-tts

Sleeping

mrfakename commited on Nov 1, 2024

Commit

b0bca14

verified ·

1 Parent(s): 1df5e0e

Sync from GitHub repo

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (3) hide show

src/f5_tts/api.py CHANGED Viewed

@@ -15,6 +15,9 @@ from f5_tts.infer.utils_infer import (
     infer_process,
     remove_silence_for_generated_wav,
     save_spectrogram,
 )
@@ -31,10 +34,8 @@ class F5TTS:
     ):
         # Initialize parameters
         self.final_wave = None
-        self.target_sample_rate = 24000
-        self.n_mel_channels = 100
-        self.hop_length = 256
-        self.target_rms = 0.1
         self.seed = -1
         # Set device
@@ -97,6 +98,9 @@ class F5TTS:
             seed = random.randint(0, sys.maxsize)
         seed_everything(seed)
         self.seed = seed
         wav, sr, spect = infer_process(
             ref_file,
             ref_text,

     infer_process,
     remove_silence_for_generated_wav,
     save_spectrogram,
+    preprocess_ref_audio_text,
+    target_sample_rate,
+    hop_length,
 )
     ):
         # Initialize parameters
         self.final_wave = None
+        self.target_sample_rate = target_sample_rate
+        self.hop_length = hop_length
         self.seed = -1
         # Set device
             seed = random.randint(0, sys.maxsize)
         seed_everything(seed)
         self.seed = seed
+        ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device)
         wav, sr, spect = infer_process(
             ref_file,
             ref_text,

src/f5_tts/infer/infer_cli.py CHANGED Viewed

@@ -161,6 +161,8 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence, speed
     chunks = re.split(reg1, text_gen)
     reg2 = r"\[(\w+)\]"
     for text in chunks:
         match = re.match(reg2, text)
         if match:
             voice = match[1]

     chunks = re.split(reg1, text_gen)
     reg2 = r"\[(\w+)\]"
     for text in chunks:
+        if not text.strip():
+            continue
         match = re.match(reg2, text)
         if match:
             voice = match[1]

src/f5_tts/train/finetune_gradio.py CHANGED Viewed

@@ -1216,7 +1216,7 @@ def infer(project, file_checkpoint, exp_name, ref_text, ref_audio, gen_text, nfe
     else:
         device_test = None
-    if last_checkpoint != file_checkpoint or last_device != device_test or last_ema != use_ema:
         if last_checkpoint != file_checkpoint:
             last_checkpoint = file_checkpoint

     else:
         device_test = None
+    if last_checkpoint != file_checkpoint or last_device != device_test or last_ema != use_ema or tts_api is None:
         if last_checkpoint != file_checkpoint:
             last_checkpoint = file_checkpoint