Spaces:
Sleeping
Sleeping
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
src/f5_tts/train/finetune_gradio.py
CHANGED
@@ -147,6 +147,8 @@ def load_settings(project_name):
|
|
147 |
|
148 |
with open(file_setting, "r") as f:
|
149 |
settings = json.load(f)
|
|
|
|
|
150 |
return (
|
151 |
settings["exp_name"],
|
152 |
settings["learning_rate"],
|
@@ -735,6 +737,22 @@ def format_seconds_to_hms(seconds):
|
|
735 |
return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
|
736 |
|
737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
738 |
def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
|
739 |
path_project = os.path.join(path_data, name_project)
|
740 |
path_project_wavs = os.path.join(path_project, "wavs")
|
@@ -764,7 +782,7 @@ def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
|
|
764 |
continue
|
765 |
name_audio, text = sp_line[:2]
|
766 |
|
767 |
-
file_audio =
|
768 |
|
769 |
if not os.path.isfile(file_audio):
|
770 |
error_files.append([file_audio, "error path"])
|
@@ -1363,6 +1381,10 @@ for tutorial and updates check here (https://github.com/SWivid/F5-TTS/discussion
|
|
1363 |
|
1364 |
with gr.Tabs():
|
1365 |
with gr.TabItem("transcribe Data"):
|
|
|
|
|
|
|
|
|
1366 |
ch_manual = gr.Checkbox(label="audio from path", value=False)
|
1367 |
|
1368 |
mark_info_transcribe = gr.Markdown(
|
@@ -1435,6 +1457,10 @@ Using the extended model, you can fine-tune to a new language that is missing sy
|
|
1435 |
)
|
1436 |
|
1437 |
with gr.TabItem("prepare Data"):
|
|
|
|
|
|
|
|
|
1438 |
gr.Markdown(
|
1439 |
"""```plaintext
|
1440 |
place all your wavs folder and your metadata.csv file in {your name project}
|
@@ -1447,10 +1473,10 @@ Using the extended model, you can fine-tune to a new language that is missing sy
|
|
1447 |
│
|
1448 |
└── metadata.csv
|
1449 |
|
1450 |
-
file format metadata.csv
|
1451 |
|
1452 |
-
audio1|text1
|
1453 |
-
audio2|text1
|
1454 |
...
|
1455 |
|
1456 |
```"""
|
|
|
147 |
|
148 |
with open(file_setting, "r") as f:
|
149 |
settings = json.load(f)
|
150 |
+
if "logger" not in settings:
|
151 |
+
settings["logger"] = "wandb"
|
152 |
return (
|
153 |
settings["exp_name"],
|
154 |
settings["learning_rate"],
|
|
|
737 |
return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
|
738 |
|
739 |
|
740 |
+
def get_correct_audio_path(audio_input, base_path="wavs"):
|
741 |
+
# Case 1: If it's a full path, use it directly
|
742 |
+
if os.path.isabs(audio_input):
|
743 |
+
file_audio = audio_input
|
744 |
+
|
745 |
+
# Case 2: If it has .wav but is not a full path
|
746 |
+
elif audio_input.endswith(".wav") and not os.path.isabs(audio_input):
|
747 |
+
file_audio = os.path.join(base_path, audio_input)
|
748 |
+
|
749 |
+
# Case 3: If only the name (no .wav and not a full path)
|
750 |
+
elif not audio_input.endswith(".wav") and not os.path.isabs(audio_input):
|
751 |
+
file_audio = os.path.join(base_path, audio_input + ".wav")
|
752 |
+
|
753 |
+
return file_audio
|
754 |
+
|
755 |
+
|
756 |
def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
|
757 |
path_project = os.path.join(path_data, name_project)
|
758 |
path_project_wavs = os.path.join(path_project, "wavs")
|
|
|
782 |
continue
|
783 |
name_audio, text = sp_line[:2]
|
784 |
|
785 |
+
file_audio = get_correct_audio_path(name_audio, path_project_wavs)
|
786 |
|
787 |
if not os.path.isfile(file_audio):
|
788 |
error_files.append([file_audio, "error path"])
|
|
|
1381 |
|
1382 |
with gr.Tabs():
|
1383 |
with gr.TabItem("transcribe Data"):
|
1384 |
+
gr.Markdown("""```plaintext
|
1385 |
+
Skip this step if you have your dataset, metadata.csv, and a folder wavs with all the audio files.
|
1386 |
+
```""")
|
1387 |
+
|
1388 |
ch_manual = gr.Checkbox(label="audio from path", value=False)
|
1389 |
|
1390 |
mark_info_transcribe = gr.Markdown(
|
|
|
1457 |
)
|
1458 |
|
1459 |
with gr.TabItem("prepare Data"):
|
1460 |
+
gr.Markdown("""```plaintext
|
1461 |
+
Skip this step if you have your dataset, raw.arrow , duraction.json and vocab.txt
|
1462 |
+
```""")
|
1463 |
+
|
1464 |
gr.Markdown(
|
1465 |
"""```plaintext
|
1466 |
place all your wavs folder and your metadata.csv file in {your name project}
|
|
|
1473 |
│
|
1474 |
└── metadata.csv
|
1475 |
|
1476 |
+
file format metadata.csv
|
1477 |
|
1478 |
+
audio1|text1 or audio1.wav|text1 or your_path/audio1.wav|text1
|
1479 |
+
audio2|text1 or audio2.wav|text1 or your_path/audio1.wav|text1
|
1480 |
...
|
1481 |
|
1482 |
```"""
|