test-rtechs commited on
Commit
4265432
·
verified ·
1 Parent(s): cd7a065

Update app_rvc.py

Browse files
Files changed (1) hide show
  1. app_rvc.py +32 -18
app_rvc.py CHANGED
@@ -4,10 +4,6 @@ os.system("pip install -q piper-tts==1.2.0")
4
  os.system("pip install -q -r requirements_xtts.txt")
5
  os.system("pip install -q TTS==0.21.1 --no-deps")
6
  import spaces
7
- import torch
8
- if os.environ.get("ZERO_GPU") != "TRUE" and torch.cuda.is_available():
9
- # onnxruntime GPU
10
- os.system("pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/")
11
  import librosa
12
  from soni_translate.logging_setup import (
13
  logger,
@@ -15,6 +11,7 @@ from soni_translate.logging_setup import (
15
  configure_logging_libs,
16
  ); configure_logging_libs() # noqa
17
  import whisperx
 
18
  import os
19
  from soni_translate.audio_segments import create_translated_audio
20
  from soni_translate.text_to_speech import (
@@ -350,6 +347,7 @@ class SoniTranslate(SoniTrCache):
350
  edit_text_arg = kwargs[31]
351
  get_text_arg = kwargs[32]
352
 
 
353
  is_gui_arg = kwargs[-1]
354
 
355
  kwargs = kwargs[3:]
@@ -365,6 +363,14 @@ class SoniTranslate(SoniTrCache):
365
  return self.multilingual_media_conversion(
366
  media_batch[0], "", "", *kwargs
367
  )
 
 
 
 
 
 
 
 
368
 
369
  if "SET_LIMIT" == os.getenv("DEMO") or "TRUE" == os.getenv("ZERO_GPU"):
370
  media_batch = [media_batch[0]]
@@ -672,7 +678,7 @@ class SoniTranslate(SoniTrCache):
672
  raise RuntimeError(
673
  "The audio is too long to process in this demo. Alternatively, you"
674
  " can install the app locally or use the Colab notebook available "
675
- "in the SoniTranslate repository."
676
  )
677
  elif duration_verify > 300:
678
  tts_voices_list = [
@@ -687,7 +693,7 @@ class SoniTranslate(SoniTrCache):
687
  "XTTS is too slow to be used for audio longer than 5 "
688
  "minutes in this demo. Alternatively, you can install "
689
  "the app locally or use the Colab notebook available in"
690
- " the SoniTranslate repository."
691
  )
692
 
693
  if not self.task_in_cache("refine_vocals", [vocal_refinement], {}):
@@ -1342,7 +1348,7 @@ class SoniTranslate(SoniTrCache):
1342
  "This option is disabled in this demo. "
1343
  "Alternatively, you can install "
1344
  "the app locally or use the Colab notebook available in"
1345
- " the SoniTranslate repository."
1346
  )
1347
 
1348
  if "videobook" in output_type:
@@ -1477,7 +1483,7 @@ class SoniTranslate(SoniTrCache):
1477
  return output
1478
 
1479
 
1480
- title = "<center><strong><font size='7'>📽️ SoniTranslate 🈷️</font></strong></center>"
1481
 
1482
 
1483
  def create_gui(theme, logs_in_gui=False):
@@ -1582,7 +1588,7 @@ def create_gui(theme, logs_in_gui=False):
1582
  max_speakers = gr.Slider(
1583
  1,
1584
  MAX_TTS,
1585
- value=1,
1586
  step=1,
1587
  label=lg_conf["max_sk"],
1588
  )
@@ -1606,7 +1612,7 @@ def create_gui(theme, logs_in_gui=False):
1606
  SoniTr.tts_info.tts_list(),
1607
  value="en-US-AndrewMultilingualNeural-Male",
1608
  label=lg_conf["sk2"],
1609
- visible=False,
1610
  interactive=True,
1611
  )
1612
  tts_voice02 = gr.Dropdown(
@@ -1796,6 +1802,12 @@ def create_gui(theme, logs_in_gui=False):
1796
  with gr.Accordion(
1797
  lg_conf["extra_setting"], open=False
1798
  ):
 
 
 
 
 
 
1799
  audio_accelerate = gr.Slider(
1800
  label=lg_conf["acc_max_label"],
1801
  value=1.9,
@@ -2004,7 +2016,7 @@ def create_gui(theme, logs_in_gui=False):
2004
  edit_sub_check = gr.Checkbox(
2005
  label=lg_conf["edit_sub_label"],
2006
  info=lg_conf["edit_sub_info"],
2007
- interactive=(False if os.environ.get("IS_DEMO") == "TRUE" else True),
2008
  )
2009
  dummy_false_check = gr.Checkbox(
2010
  False,
@@ -2584,16 +2596,16 @@ def create_gui(theme, logs_in_gui=False):
2584
 
2585
  class Logger:
2586
  def __init__(self, filename):
2587
- self.terminal = sys.stdout
2588
- self.log = open(filename, "w")
2589
 
2590
  def write(self, message):
2591
- self.terminal.write(message)
2592
- self.log.write(message)
2593
 
2594
  def flush(self):
2595
- self.terminal.flush()
2596
- self.log.flush()
2597
 
2598
  def isatty(self):
2599
  return False
@@ -2689,6 +2701,7 @@ def create_gui(theme, logs_in_gui=False):
2689
  AUDIO_MIX,
2690
  audio_accelerate,
2691
  acceleration_rate_regulation_gui,
 
2692
  volume_original_mix,
2693
  volume_translated_mix,
2694
  sub_type_output,
@@ -2756,6 +2769,7 @@ def create_gui(theme, logs_in_gui=False):
2756
  AUDIO_MIX,
2757
  audio_accelerate,
2758
  acceleration_rate_regulation_gui,
 
2759
  volume_original_mix,
2760
  volume_translated_mix,
2761
  sub_type_output,
@@ -2917,7 +2931,7 @@ if __name__ == "__main__":
2917
 
2918
  app.launch(
2919
  max_threads=1,
2920
- share=args.public_url,
2921
  show_error=True,
2922
  quiet=False,
2923
  debug=(True if logger.isEnabledFor(logging.DEBUG) else False),
 
4
  os.system("pip install -q -r requirements_xtts.txt")
5
  os.system("pip install -q TTS==0.21.1 --no-deps")
6
  import spaces
 
 
 
 
7
  import librosa
8
  from soni_translate.logging_setup import (
9
  logger,
 
11
  configure_logging_libs,
12
  ); configure_logging_libs() # noqa
13
  import whisperx
14
+ import torch
15
  import os
16
  from soni_translate.audio_segments import create_translated_audio
17
  from soni_translate.text_to_speech import (
 
347
  edit_text_arg = kwargs[31]
348
  get_text_arg = kwargs[32]
349
 
350
+ video_acceleration_rate_regulation = kwargs[34] # Adjust the index as needed
351
  is_gui_arg = kwargs[-1]
352
 
353
  kwargs = kwargs[3:]
 
363
  return self.multilingual_media_conversion(
364
  media_batch[0], "", "", *kwargs
365
  )
366
+ if video_acceleration_rate_regulation:
367
+ logger.info("Video acceleration rate regulation is enabled.")
368
+ try:
369
+ self.accelerate_video_segments()
370
+ logger.info("Video segments accelerated successfully.")
371
+ except Exception as e:
372
+ logger.error(f"Failed to accelerate video segments: {e}")
373
+ raise
374
 
375
  if "SET_LIMIT" == os.getenv("DEMO") or "TRUE" == os.getenv("ZERO_GPU"):
376
  media_batch = [media_batch[0]]
 
678
  raise RuntimeError(
679
  "The audio is too long to process in this demo. Alternatively, you"
680
  " can install the app locally or use the Colab notebook available "
681
+ "in the ALEPH-WEBETA repository."
682
  )
683
  elif duration_verify > 300:
684
  tts_voices_list = [
 
693
  "XTTS is too slow to be used for audio longer than 5 "
694
  "minutes in this demo. Alternatively, you can install "
695
  "the app locally or use the Colab notebook available in"
696
+ " the aleph-webeta repository."
697
  )
698
 
699
  if not self.task_in_cache("refine_vocals", [vocal_refinement], {}):
 
1348
  "This option is disabled in this demo. "
1349
  "Alternatively, you can install "
1350
  "the app locally or use the Colab notebook available in"
1351
+ " the ALEPH-WEBETA repository."
1352
  )
1353
 
1354
  if "videobook" in output_type:
 
1483
  return output
1484
 
1485
 
1486
+ title = "<center><strong><font size='7'>📽️ ALEPH-WEO-WEBETA V2 🈷️</font></strong></center>"
1487
 
1488
 
1489
  def create_gui(theme, logs_in_gui=False):
 
1588
  max_speakers = gr.Slider(
1589
  1,
1590
  MAX_TTS,
1591
+ value=2,
1592
  step=1,
1593
  label=lg_conf["max_sk"],
1594
  )
 
1612
  SoniTr.tts_info.tts_list(),
1613
  value="en-US-AndrewMultilingualNeural-Male",
1614
  label=lg_conf["sk2"],
1615
+ visible=True,
1616
  interactive=True,
1617
  )
1618
  tts_voice02 = gr.Dropdown(
 
1802
  with gr.Accordion(
1803
  lg_conf["extra_setting"], open=False
1804
  ):
1805
+ # Add the new video acceleration rate regulation option
1806
+ video_acceleration_rate_regulation_gui = gr.Checkbox(
1807
+ False,
1808
+ label="Video Acceleration Rate Regulation",
1809
+ info="Enable this option to regulate the video segments rate to match the translated audio segments length and regulate overall video length.",
1810
+ )
1811
  audio_accelerate = gr.Slider(
1812
  label=lg_conf["acc_max_label"],
1813
  value=1.9,
 
2016
  edit_sub_check = gr.Checkbox(
2017
  label=lg_conf["edit_sub_label"],
2018
  info=lg_conf["edit_sub_info"],
2019
+ interactive=True, # Always enable the checkbox
2020
  )
2021
  dummy_false_check = gr.Checkbox(
2022
  False,
 
2596
 
2597
  class Logger:
2598
  def __init__(self, filename):
2599
+ this.terminal = sys.stdout
2600
+ this.log = open(filename, "w")
2601
 
2602
  def write(self, message):
2603
+ this.terminal.write(message)
2604
+ this.log.write(message)
2605
 
2606
  def flush(self):
2607
+ this.terminal.flush()
2608
+ this.log.flush()
2609
 
2610
  def isatty(self):
2611
  return False
 
2701
  AUDIO_MIX,
2702
  audio_accelerate,
2703
  acceleration_rate_regulation_gui,
2704
+ video_acceleration_rate_regulation_gui, # New option
2705
  volume_original_mix,
2706
  volume_translated_mix,
2707
  sub_type_output,
 
2769
  AUDIO_MIX,
2770
  audio_accelerate,
2771
  acceleration_rate_regulation_gui,
2772
+ video_acceleration_rate_regulation_gui, # New option
2773
  volume_original_mix,
2774
  volume_translated_mix,
2775
  sub_type_output,
 
2931
 
2932
  app.launch(
2933
  max_threads=1,
2934
+ share=True,
2935
  show_error=True,
2936
  quiet=False,
2937
  debug=(True if logger.isEnabledFor(logging.DEBUG) else False),