Spaces:
Running
Running
drewThomasson
commited on
Commit
•
4afa1d4
1
Parent(s):
77be666
Fixed Custom model loading issue
Browse files
app.py
CHANGED
@@ -73,7 +73,7 @@ parser.add_argument("--custom_model_url", type=str,
|
|
73 |
"'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
|
74 |
"More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
|
75 |
parser.add_argument("--temperature", type=float, default=0.65, help="Temperature for the model. Defaults to 0.65. Higher Tempatures will lead to more creative outputs IE: more Hallucinations. Lower Tempatures will be more monotone outputs IE: less Hallucinations.")
|
76 |
-
parser.add_argument("--length_penalty", type=float, default=1.0, help="A length penalty applied to the autoregressive decoder. Defaults to 1.0.")
|
77 |
parser.add_argument("--repetition_penalty", type=float, default=2.0, help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
|
78 |
parser.add_argument("--top_k", type=int, default=50, help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
|
79 |
parser.add_argument("--top_p", type=float, default=0.8, help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
|
@@ -643,7 +643,9 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, tempe
|
|
643 |
print(f"Generating fragment: {fragment}...")
|
644 |
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
|
645 |
if custom_model:
|
646 |
-
|
|
|
|
|
647 |
torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
648 |
else:
|
649 |
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
|
@@ -904,7 +906,7 @@ def run_gradio_interface():
|
|
904 |
maximum=10.0,
|
905 |
step=0.1,
|
906 |
value=1.0,
|
907 |
-
info="Penalize longer sequences. Higher values produce shorter outputs."
|
908 |
)
|
909 |
repetition_penalty = gr.Slider(
|
910 |
label="Repetition Penalty",
|
@@ -1036,4 +1038,4 @@ if args.headless:
|
|
1036 |
|
1037 |
else:
|
1038 |
# Launch Gradio UI
|
1039 |
-
run_gradio_interface()
|
|
|
73 |
"'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
|
74 |
"More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
|
75 |
parser.add_argument("--temperature", type=float, default=0.65, help="Temperature for the model. Defaults to 0.65. Higher Tempatures will lead to more creative outputs IE: more Hallucinations. Lower Tempatures will be more monotone outputs IE: less Hallucinations.")
|
76 |
+
parser.add_argument("--length_penalty", type=float, default=1.0, help="A length penalty applied to the autoregressive decoder. Defaults to 1.0. Not applied to custom models.")
|
77 |
parser.add_argument("--repetition_penalty", type=float, default=2.0, help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
|
78 |
parser.add_argument("--top_k", type=int, default=50, help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
|
79 |
parser.add_argument("--top_p", type=float, default=0.8, help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
|
|
|
643 |
print(f"Generating fragment: {fragment}...")
|
644 |
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
|
645 |
if custom_model:
|
646 |
+
# length penalty will not apply for custome models, its just too much of a headache perhaps if someone else can do it for me lol, im just one man :(
|
647 |
+
out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature=temperature, repetition_penalty=repetition_penalty, top_k=top_k, top_p=top_p, speed=speed, enable_text_splitting=enable_text_splitting)
|
648 |
+
#out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting)
|
649 |
torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
650 |
else:
|
651 |
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
|
|
|
906 |
maximum=10.0,
|
907 |
step=0.1,
|
908 |
value=1.0,
|
909 |
+
info="Penalize longer sequences. Higher values produce shorter outputs. Not applied to custom models."
|
910 |
)
|
911 |
repetition_penalty = gr.Slider(
|
912 |
label="Repetition Penalty",
|
|
|
1038 |
|
1039 |
else:
|
1040 |
# Launch Gradio UI
|
1041 |
+
run_gradio_interface()
|