Spaces:
Sleeping
Sleeping
test-rtechs
commited on
Update app_rvc.py
Browse files- app_rvc.py +32 -18
app_rvc.py
CHANGED
@@ -4,10 +4,6 @@ os.system("pip install -q piper-tts==1.2.0")
|
|
4 |
os.system("pip install -q -r requirements_xtts.txt")
|
5 |
os.system("pip install -q TTS==0.21.1 --no-deps")
|
6 |
import spaces
|
7 |
-
import torch
|
8 |
-
if os.environ.get("ZERO_GPU") != "TRUE" and torch.cuda.is_available():
|
9 |
-
# onnxruntime GPU
|
10 |
-
os.system("pip install ort-nightly-gpu --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple/")
|
11 |
import librosa
|
12 |
from soni_translate.logging_setup import (
|
13 |
logger,
|
@@ -15,6 +11,7 @@ from soni_translate.logging_setup import (
|
|
15 |
configure_logging_libs,
|
16 |
); configure_logging_libs() # noqa
|
17 |
import whisperx
|
|
|
18 |
import os
|
19 |
from soni_translate.audio_segments import create_translated_audio
|
20 |
from soni_translate.text_to_speech import (
|
@@ -350,6 +347,7 @@ class SoniTranslate(SoniTrCache):
|
|
350 |
edit_text_arg = kwargs[31]
|
351 |
get_text_arg = kwargs[32]
|
352 |
|
|
|
353 |
is_gui_arg = kwargs[-1]
|
354 |
|
355 |
kwargs = kwargs[3:]
|
@@ -365,6 +363,14 @@ class SoniTranslate(SoniTrCache):
|
|
365 |
return self.multilingual_media_conversion(
|
366 |
media_batch[0], "", "", *kwargs
|
367 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
|
369 |
if "SET_LIMIT" == os.getenv("DEMO") or "TRUE" == os.getenv("ZERO_GPU"):
|
370 |
media_batch = [media_batch[0]]
|
@@ -672,7 +678,7 @@ class SoniTranslate(SoniTrCache):
|
|
672 |
raise RuntimeError(
|
673 |
"The audio is too long to process in this demo. Alternatively, you"
|
674 |
" can install the app locally or use the Colab notebook available "
|
675 |
-
"in the
|
676 |
)
|
677 |
elif duration_verify > 300:
|
678 |
tts_voices_list = [
|
@@ -687,7 +693,7 @@ class SoniTranslate(SoniTrCache):
|
|
687 |
"XTTS is too slow to be used for audio longer than 5 "
|
688 |
"minutes in this demo. Alternatively, you can install "
|
689 |
"the app locally or use the Colab notebook available in"
|
690 |
-
" the
|
691 |
)
|
692 |
|
693 |
if not self.task_in_cache("refine_vocals", [vocal_refinement], {}):
|
@@ -1342,7 +1348,7 @@ class SoniTranslate(SoniTrCache):
|
|
1342 |
"This option is disabled in this demo. "
|
1343 |
"Alternatively, you can install "
|
1344 |
"the app locally or use the Colab notebook available in"
|
1345 |
-
" the
|
1346 |
)
|
1347 |
|
1348 |
if "videobook" in output_type:
|
@@ -1477,7 +1483,7 @@ class SoniTranslate(SoniTrCache):
|
|
1477 |
return output
|
1478 |
|
1479 |
|
1480 |
-
title = "<center><strong><font size='7'>📽️
|
1481 |
|
1482 |
|
1483 |
def create_gui(theme, logs_in_gui=False):
|
@@ -1582,7 +1588,7 @@ def create_gui(theme, logs_in_gui=False):
|
|
1582 |
max_speakers = gr.Slider(
|
1583 |
1,
|
1584 |
MAX_TTS,
|
1585 |
-
value=
|
1586 |
step=1,
|
1587 |
label=lg_conf["max_sk"],
|
1588 |
)
|
@@ -1606,7 +1612,7 @@ def create_gui(theme, logs_in_gui=False):
|
|
1606 |
SoniTr.tts_info.tts_list(),
|
1607 |
value="en-US-AndrewMultilingualNeural-Male",
|
1608 |
label=lg_conf["sk2"],
|
1609 |
-
visible=
|
1610 |
interactive=True,
|
1611 |
)
|
1612 |
tts_voice02 = gr.Dropdown(
|
@@ -1796,6 +1802,12 @@ def create_gui(theme, logs_in_gui=False):
|
|
1796 |
with gr.Accordion(
|
1797 |
lg_conf["extra_setting"], open=False
|
1798 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
1799 |
audio_accelerate = gr.Slider(
|
1800 |
label=lg_conf["acc_max_label"],
|
1801 |
value=1.9,
|
@@ -2004,7 +2016,7 @@ def create_gui(theme, logs_in_gui=False):
|
|
2004 |
edit_sub_check = gr.Checkbox(
|
2005 |
label=lg_conf["edit_sub_label"],
|
2006 |
info=lg_conf["edit_sub_info"],
|
2007 |
-
interactive=
|
2008 |
)
|
2009 |
dummy_false_check = gr.Checkbox(
|
2010 |
False,
|
@@ -2584,16 +2596,16 @@ def create_gui(theme, logs_in_gui=False):
|
|
2584 |
|
2585 |
class Logger:
|
2586 |
def __init__(self, filename):
|
2587 |
-
|
2588 |
-
|
2589 |
|
2590 |
def write(self, message):
|
2591 |
-
|
2592 |
-
|
2593 |
|
2594 |
def flush(self):
|
2595 |
-
|
2596 |
-
|
2597 |
|
2598 |
def isatty(self):
|
2599 |
return False
|
@@ -2689,6 +2701,7 @@ def create_gui(theme, logs_in_gui=False):
|
|
2689 |
AUDIO_MIX,
|
2690 |
audio_accelerate,
|
2691 |
acceleration_rate_regulation_gui,
|
|
|
2692 |
volume_original_mix,
|
2693 |
volume_translated_mix,
|
2694 |
sub_type_output,
|
@@ -2756,6 +2769,7 @@ def create_gui(theme, logs_in_gui=False):
|
|
2756 |
AUDIO_MIX,
|
2757 |
audio_accelerate,
|
2758 |
acceleration_rate_regulation_gui,
|
|
|
2759 |
volume_original_mix,
|
2760 |
volume_translated_mix,
|
2761 |
sub_type_output,
|
@@ -2917,7 +2931,7 @@ if __name__ == "__main__":
|
|
2917 |
|
2918 |
app.launch(
|
2919 |
max_threads=1,
|
2920 |
-
share=
|
2921 |
show_error=True,
|
2922 |
quiet=False,
|
2923 |
debug=(True if logger.isEnabledFor(logging.DEBUG) else False),
|
|
|
4 |
os.system("pip install -q -r requirements_xtts.txt")
|
5 |
os.system("pip install -q TTS==0.21.1 --no-deps")
|
6 |
import spaces
|
|
|
|
|
|
|
|
|
7 |
import librosa
|
8 |
from soni_translate.logging_setup import (
|
9 |
logger,
|
|
|
11 |
configure_logging_libs,
|
12 |
); configure_logging_libs() # noqa
|
13 |
import whisperx
|
14 |
+
import torch
|
15 |
import os
|
16 |
from soni_translate.audio_segments import create_translated_audio
|
17 |
from soni_translate.text_to_speech import (
|
|
|
347 |
edit_text_arg = kwargs[31]
|
348 |
get_text_arg = kwargs[32]
|
349 |
|
350 |
+
video_acceleration_rate_regulation = kwargs[34] # Adjust the index as needed
|
351 |
is_gui_arg = kwargs[-1]
|
352 |
|
353 |
kwargs = kwargs[3:]
|
|
|
363 |
return self.multilingual_media_conversion(
|
364 |
media_batch[0], "", "", *kwargs
|
365 |
)
|
366 |
+
if video_acceleration_rate_regulation:
|
367 |
+
logger.info("Video acceleration rate regulation is enabled.")
|
368 |
+
try:
|
369 |
+
self.accelerate_video_segments()
|
370 |
+
logger.info("Video segments accelerated successfully.")
|
371 |
+
except Exception as e:
|
372 |
+
logger.error(f"Failed to accelerate video segments: {e}")
|
373 |
+
raise
|
374 |
|
375 |
if "SET_LIMIT" == os.getenv("DEMO") or "TRUE" == os.getenv("ZERO_GPU"):
|
376 |
media_batch = [media_batch[0]]
|
|
|
678 |
raise RuntimeError(
|
679 |
"The audio is too long to process in this demo. Alternatively, you"
|
680 |
" can install the app locally or use the Colab notebook available "
|
681 |
+
"in the ALEPH-WEBETA repository."
|
682 |
)
|
683 |
elif duration_verify > 300:
|
684 |
tts_voices_list = [
|
|
|
693 |
"XTTS is too slow to be used for audio longer than 5 "
|
694 |
"minutes in this demo. Alternatively, you can install "
|
695 |
"the app locally or use the Colab notebook available in"
|
696 |
+
" the aleph-webeta repository."
|
697 |
)
|
698 |
|
699 |
if not self.task_in_cache("refine_vocals", [vocal_refinement], {}):
|
|
|
1348 |
"This option is disabled in this demo. "
|
1349 |
"Alternatively, you can install "
|
1350 |
"the app locally or use the Colab notebook available in"
|
1351 |
+
" the ALEPH-WEBETA repository."
|
1352 |
)
|
1353 |
|
1354 |
if "videobook" in output_type:
|
|
|
1483 |
return output
|
1484 |
|
1485 |
|
1486 |
+
title = "<center><strong><font size='7'>📽️ ALEPH-WEO-WEBETA V2 🈷️</font></strong></center>"
|
1487 |
|
1488 |
|
1489 |
def create_gui(theme, logs_in_gui=False):
|
|
|
1588 |
max_speakers = gr.Slider(
|
1589 |
1,
|
1590 |
MAX_TTS,
|
1591 |
+
value=2,
|
1592 |
step=1,
|
1593 |
label=lg_conf["max_sk"],
|
1594 |
)
|
|
|
1612 |
SoniTr.tts_info.tts_list(),
|
1613 |
value="en-US-AndrewMultilingualNeural-Male",
|
1614 |
label=lg_conf["sk2"],
|
1615 |
+
visible=True,
|
1616 |
interactive=True,
|
1617 |
)
|
1618 |
tts_voice02 = gr.Dropdown(
|
|
|
1802 |
with gr.Accordion(
|
1803 |
lg_conf["extra_setting"], open=False
|
1804 |
):
|
1805 |
+
# Add the new video acceleration rate regulation option
|
1806 |
+
video_acceleration_rate_regulation_gui = gr.Checkbox(
|
1807 |
+
False,
|
1808 |
+
label="Video Acceleration Rate Regulation",
|
1809 |
+
info="Enable this option to regulate the video segments rate to match the translated audio segments length and regulate overall video length.",
|
1810 |
+
)
|
1811 |
audio_accelerate = gr.Slider(
|
1812 |
label=lg_conf["acc_max_label"],
|
1813 |
value=1.9,
|
|
|
2016 |
edit_sub_check = gr.Checkbox(
|
2017 |
label=lg_conf["edit_sub_label"],
|
2018 |
info=lg_conf["edit_sub_info"],
|
2019 |
+
interactive=True, # Always enable the checkbox
|
2020 |
)
|
2021 |
dummy_false_check = gr.Checkbox(
|
2022 |
False,
|
|
|
2596 |
|
2597 |
class Logger:
|
2598 |
def __init__(self, filename):
|
2599 |
+
this.terminal = sys.stdout
|
2600 |
+
this.log = open(filename, "w")
|
2601 |
|
2602 |
def write(self, message):
|
2603 |
+
this.terminal.write(message)
|
2604 |
+
this.log.write(message)
|
2605 |
|
2606 |
def flush(self):
|
2607 |
+
this.terminal.flush()
|
2608 |
+
this.log.flush()
|
2609 |
|
2610 |
def isatty(self):
|
2611 |
return False
|
|
|
2701 |
AUDIO_MIX,
|
2702 |
audio_accelerate,
|
2703 |
acceleration_rate_regulation_gui,
|
2704 |
+
video_acceleration_rate_regulation_gui, # New option
|
2705 |
volume_original_mix,
|
2706 |
volume_translated_mix,
|
2707 |
sub_type_output,
|
|
|
2769 |
AUDIO_MIX,
|
2770 |
audio_accelerate,
|
2771 |
acceleration_rate_regulation_gui,
|
2772 |
+
video_acceleration_rate_regulation_gui, # New option
|
2773 |
volume_original_mix,
|
2774 |
volume_translated_mix,
|
2775 |
sub_type_output,
|
|
|
2931 |
|
2932 |
app.launch(
|
2933 |
max_threads=1,
|
2934 |
+
share=True,
|
2935 |
show_error=True,
|
2936 |
quiet=False,
|
2937 |
debug=(True if logger.isEnabledFor(logging.DEBUG) else False),
|