drewThomasson commited on
Commit
4afa1d4
1 Parent(s): 77be666

Fixed Custom model loading issue

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -73,7 +73,7 @@ parser.add_argument("--custom_model_url", type=str,
73
  "'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
74
  "More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
75
  parser.add_argument("--temperature", type=float, default=0.65, help="Temperature for the model. Defaults to 0.65. Higher Tempatures will lead to more creative outputs IE: more Hallucinations. Lower Tempatures will be more monotone outputs IE: less Hallucinations.")
76
- parser.add_argument("--length_penalty", type=float, default=1.0, help="A length penalty applied to the autoregressive decoder. Defaults to 1.0.")
77
  parser.add_argument("--repetition_penalty", type=float, default=2.0, help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
78
  parser.add_argument("--top_k", type=int, default=50, help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
79
  parser.add_argument("--top_p", type=float, default=0.8, help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
@@ -643,7 +643,9 @@ def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, tempe
643
  print(f"Generating fragment: {fragment}...")
644
  fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
645
  if custom_model:
646
- out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting)
 
 
647
  torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
648
  else:
649
  speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
@@ -904,7 +906,7 @@ def run_gradio_interface():
904
  maximum=10.0,
905
  step=0.1,
906
  value=1.0,
907
- info="Penalize longer sequences. Higher values produce shorter outputs."
908
  )
909
  repetition_penalty = gr.Slider(
910
  label="Repetition Penalty",
@@ -1036,4 +1038,4 @@ if args.headless:
1036
 
1037
  else:
1038
  # Launch Gradio UI
1039
- run_gradio_interface()
 
73
  "'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
74
  "More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
75
  parser.add_argument("--temperature", type=float, default=0.65, help="Temperature for the model. Defaults to 0.65. Higher Tempatures will lead to more creative outputs IE: more Hallucinations. Lower Tempatures will be more monotone outputs IE: less Hallucinations.")
76
+ parser.add_argument("--length_penalty", type=float, default=1.0, help="A length penalty applied to the autoregressive decoder. Defaults to 1.0. Not applied to custom models.")
77
  parser.add_argument("--repetition_penalty", type=float, default=2.0, help="A penalty that prevents the autoregressive decoder from repeating itself. Defaults to 2.0.")
78
  parser.add_argument("--top_k", type=int, default=50, help="Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 50.")
79
  parser.add_argument("--top_p", type=float, default=0.8, help="Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Defaults to 0.8.")
 
643
  print(f"Generating fragment: {fragment}...")
644
  fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav")
645
  if custom_model:
646
+ # length penalty will not apply for custome models, its just too much of a headache perhaps if someone else can do it for me lol, im just one man :(
647
+ out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature=temperature, repetition_penalty=repetition_penalty, top_k=top_k, top_p=top_p, speed=speed, enable_text_splitting=enable_text_splitting)
648
+ #out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature, length_penalty, repetition_penalty, top_k, top_p, speed, enable_text_splitting)
649
  torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000)
650
  else:
651
  speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path
 
906
  maximum=10.0,
907
  step=0.1,
908
  value=1.0,
909
+ info="Penalize longer sequences. Higher values produce shorter outputs. Not applied to custom models."
910
  )
911
  repetition_penalty = gr.Slider(
912
  label="Repetition Penalty",
 
1038
 
1039
  else:
1040
  # Launch Gradio UI
1041
+ run_gradio_interface()