gorkemgoknar commited on
Commit
6d2da72
1 Parent(s): f17c59e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -13
app.py CHANGED
@@ -44,10 +44,10 @@ st = os.stat("ffmpeg")
44
  os.chmod("ffmpeg", st.st_mode | stat.S_IEXEC)
45
 
46
  # This will trigger downloading model
47
- print("Downloading if not downloaded Coqui XTTS V1.1")
48
  from TTS.utils.manage import ModelManager
49
 
50
- model_name = "tts_models/multilingual/multi-dataset/xtts_v1.1"
51
  ModelManager().download_model(model_name)
52
  model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
53
  print("XTTS downloaded")
@@ -55,10 +55,6 @@ print("XTTS downloaded")
55
  config = XttsConfig()
56
  config.load_json(os.path.join(model_path, "config.json"))
57
 
58
- # it should be there just to be sure
59
- if "ja" not in config.languages:
60
- config.languages.append("ja")
61
-
62
  model = Xtts.init_from_config(config)
63
  model.load_checkpoint(
64
  config,
@@ -74,11 +70,8 @@ DEVICE_ASSERT_DETECTED = 0
74
  DEVICE_ASSERT_PROMPT = None
75
  DEVICE_ASSERT_LANG = None
76
 
77
-
78
- # supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
79
  supported_languages = config.languages
80
 
81
-
82
  def predict(
83
  prompt,
84
  language,
@@ -254,8 +247,7 @@ def predict(
254
  language,
255
  gpt_cond_latent,
256
  speaker_embedding,
257
- diffusion_conditioning,
258
- decoder="ne_hifigan",
259
  )
260
  inference_time = time.time() - t0
261
  print(f"I: Time to generate audio: {round(inference_time*1000)} milliseconds")
@@ -272,8 +264,7 @@ def predict(
272
  prompt,
273
  language,
274
  gpt_cond_latent,
275
- speaker_embedding,
276
- decoder="ne_hifigan",
277
  )
278
 
279
  first_chunk = True
 
44
  os.chmod("ffmpeg", st.st_mode | stat.S_IEXEC)
45
 
46
  # This will trigger downloading model
47
+ print("Downloading if not downloaded Coqui XTTS V2")
48
  from TTS.utils.manage import ModelManager
49
 
50
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
51
  ModelManager().download_model(model_name)
52
  model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
53
  print("XTTS downloaded")
 
55
  config = XttsConfig()
56
  config.load_json(os.path.join(model_path, "config.json"))
57
 
 
 
 
 
58
  model = Xtts.init_from_config(config)
59
  model.load_checkpoint(
60
  config,
 
70
  DEVICE_ASSERT_PROMPT = None
71
  DEVICE_ASSERT_LANG = None
72
 
 
 
73
  supported_languages = config.languages
74
 
 
75
  def predict(
76
  prompt,
77
  language,
 
247
  language,
248
  gpt_cond_latent,
249
  speaker_embedding,
250
+ diffusion_conditioning
 
251
  )
252
  inference_time = time.time() - t0
253
  print(f"I: Time to generate audio: {round(inference_time*1000)} milliseconds")
 
264
  prompt,
265
  language,
266
  gpt_cond_latent,
267
+ speaker_embedding
 
268
  )
269
 
270
  first_chunk = True