Blakus commited on
Commit
3702a88
·
verified ·
1 Parent(s): b02eca0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -160
app.py CHANGED
@@ -1,174 +1,86 @@
1
  import os
2
- import re
3
- import time
4
- import gradio as gr
5
- from pydub import AudioSegment
6
- from TTS.api import TTS
7
- from TTS.tts.configs.xtts_config import XttsConfig
8
- from TTS.tts.models.xtts import Xtts
9
- from TTS.utils.generic_utils import get_user_data_dir
10
- from huggingface_hub import hf_hub_download
11
- import subprocess
12
  import sys
 
13
 
14
- # Configuración inicial
15
- os.environ["COQUI_TOS_AGREED"] = "1"
16
-
17
- # Función para descargar y configurar UniDic
18
- def setup_unidic():
19
  try:
20
- subprocess.check_call([sys.executable, '-m', 'unidic', 'download'])
21
- print("UniDic descargado correctamente")
22
- except subprocess.CalledProcessError:
23
- print("Error al descargar UniDic")
24
  return False
25
-
26
- # Configurar la variable de entorno para MeCab
27
- import unidic
28
- mecab_dic_dir = unidic.DICDIR
29
- os.environ['MECABRC'] = os.path.join(mecab_dic_dir, 'mecabrc')
30
- print(f"MECABRC configurado en: {os.environ['MECABRC']}")
31
  return True
32
 
33
- # Llamar a la función de configuración
34
- if not setup_unidic():
35
- print("No se pudo configurar UniDic. El programa podría no funcionar correctamente.")
36
-
37
- # Descargar y configurar el modelo
38
- repo_id = "Blakus/Pedro_Lab_XTTS"
39
- local_dir = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v2")
40
- os.makedirs(local_dir, exist_ok=True)
41
- files_to_download = ["config.json", "model.pth", "vocab.json"]
42
-
43
- for file_name in files_to_download:
44
- print(f"Downloading {file_name} from {repo_id}")
45
- hf_hub_download(repo_id=repo_id, filename=file_name, local_dir=local_dir)
46
-
47
- config_path = os.path.join(local_dir, "config.json")
48
- checkpoint_path = os.path.join(local_dir, "model.pth")
49
- vocab_path = os.path.join(local_dir, "vocab.json")
50
-
51
- config = XttsConfig()
52
- config.load_json(config_path)
53
-
54
- model = Xtts.init_from_config(config)
55
- model.load_checkpoint(config, checkpoint_path=checkpoint_path, vocab_path=vocab_path, eval=True, use_deepspeed=False)
56
-
57
- print("Modelo cargado en CPU")
58
-
59
- # Funciones auxiliares
60
- def split_text(text):
61
- return re.split(r'(?<=[.!?])\s+', text)
62
-
63
- def predict(prompt, language, reference_audio):
64
  try:
65
- if len(prompt) < 2 or len(prompt) > 600:
66
- return None, "El texto debe tener entre 2 y 600 caracteres."
67
-
68
- sentences = split_text(prompt)
69
-
70
- temperature = config.inference.get("temperature", 0.75)
71
- repetition_penalty = config.inference.get("repetition_penalty", 5.0)
72
- gpt_cond_len = config.inference.get("gpt_cond_len", 30)
73
- gpt_cond_chunk_len = config.inference.get("gpt_cond_chunk_len", 4)
74
- max_ref_length = config.inference.get("max_ref_length", 60)
75
-
76
- gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
77
- audio_path=reference_audio,
78
- gpt_cond_len=gpt_cond_len,
79
- gpt_cond_chunk_len=gpt_cond_chunk_len,
80
- max_ref_length=max_ref_length
81
- )
82
-
83
- start_time = time.time()
84
- combined_audio = AudioSegment.empty()
85
-
86
- for sentence in sentences:
87
- out = model.inference(
88
- sentence,
89
- language,
90
- gpt_cond_latent,
91
- speaker_embedding,
92
- temperature=temperature,
93
- repetition_penalty=repetition_penalty,
94
- )
95
- audio_segment = AudioSegment(
96
- out["wav"].tobytes(),
97
- frame_rate=24000,
98
- sample_width=2,
99
- channels=1
100
- )
101
- combined_audio += audio_segment
102
- combined_audio += AudioSegment.silent(duration=500) # 0.5 segundos de silencio
103
-
104
- inference_time = time.time() - start_time
105
-
106
- output_path = "output.wav"
107
- combined_audio.export(output_path, format="wav")
108
-
109
- audio_length = len(combined_audio) / 1000 # duración del audio en segundos
110
- real_time_factor = inference_time / audio_length
111
-
112
- metrics_text = f"Tiempo de generación: {inference_time:.2f} segundos\n"
113
- metrics_text += f"Factor de tiempo real: {real_time_factor:.2f}"
114
 
115
- return output_path, metrics_text
 
 
 
 
 
 
 
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  except Exception as e:
118
- print(f"Error detallado: {str(e)}")
119
- return None, f"Error: {str(e)}"
120
-
121
- # Configuración de la interfaz de Gradio
122
- supported_languages = ["es", "en"]
123
- reference_audios = [
124
- "serio.wav",
125
- "neutral.wav",
126
- "alegre.wav",
127
- ]
128
-
129
- theme = gr.themes.Soft(
130
- primary_hue="blue",
131
- secondary_hue="gray",
132
- ).set(
133
- body_background_fill='*neutral_100',
134
- body_background_fill_dark='*neutral_900',
135
- )
136
-
137
- description = """
138
- # Sintetizador de voz de Pedro Labattaglia 🎙️
139
-
140
- Sintetizador de voz con la voz del locutor argentino Pedro Labattaglia.
141
-
142
- ## Cómo usarlo:
143
- - Elija el idioma (Español o Inglés)
144
- - Elija un audio de referencia de la lista
145
- - Escriba el texto que desea sintetizar
146
- - Presione generar voz
147
- """
148
-
149
- # Interfaz de Gradio
150
- with gr.Blocks(theme=theme) as demo:
151
- gr.Markdown(description)
152
-
153
- with gr.Row():
154
- gr.Image("https://i1.sndcdn.com/artworks-000237574740-gwz61j-t500x500.jpg", label="", show_label=False, width=250, height=250)
155
-
156
- with gr.Row():
157
- with gr.Column(scale=2):
158
- language_selector = gr.Dropdown(label="Idioma", choices=supported_languages)
159
- reference_audio = gr.Dropdown(label="Audio de referencia", choices=reference_audios)
160
- input_text = gr.Textbox(label="Texto a sintetizar", placeholder="Escribe aquí el texto que quieres convertir a voz...")
161
- generate_button = gr.Button("Generar voz", variant="primary")
162
-
163
- with gr.Column(scale=1):
164
- generated_audio = gr.Audio(label="Audio generado", interactive=False)
165
- metrics_output = gr.Textbox(label="Métricas", value="Tiempo de generación: -- segundos\nFactor de tiempo real: --")
166
 
167
- generate_button.click(
168
- predict,
169
- inputs=[input_text, language_selector, reference_audio],
170
- outputs=[generated_audio, metrics_output]
171
- )
 
 
 
 
 
 
 
172
 
173
  if __name__ == "__main__":
174
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
 
 
 
 
 
 
 
 
2
  import sys
3
+ import subprocess
4
 
5
+ def check_mecab():
6
+ print("Checking MeCab installation...")
 
 
 
7
  try:
8
+ import MeCab
9
+ print("MeCab imported successfully")
10
+ except ImportError as e:
11
+ print(f"Error importing MeCab: {e}")
12
  return False
13
+
 
 
 
 
 
14
  return True
15
 
16
+ def check_unidic():
17
+ print("Checking UniDic installation...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
+ import unidic
20
+ print(f"UniDic found at: {unidic.DICDIR}")
21
+ return True
22
+ except ImportError as e:
23
+ print(f"Error importing UniDic: {e}")
24
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def setup_unidic():
27
+ print("Attempting to download UniDic...")
28
+ try:
29
+ subprocess.check_call([sys.executable, '-m', 'unidic', 'download'])
30
+ print("UniDic downloaded successfully")
31
+ return True
32
+ except subprocess.CalledProcessError as e:
33
+ print(f"Error downloading UniDic: {e}")
34
+ return False
35
 
36
+ def configure_mecab():
37
+ print("Configuring MeCab...")
38
+ try:
39
+ import unidic
40
+ mecab_dic_dir = unidic.DICDIR
41
+ mecabrc_path = os.path.join(mecab_dic_dir, 'mecabrc')
42
+ if os.path.exists(mecabrc_path):
43
+ os.environ['MECABRC'] = mecabrc_path
44
+ print(f"MECABRC configured to: {mecabrc_path}")
45
+ return True
46
+ else:
47
+ print(f"mecabrc file not found at: {mecabrc_path}")
48
+ return False
49
  except Exception as e:
50
+ print(f"Error configuring MeCab: {e}")
51
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ def test_mecab():
54
+ print("Testing MeCab...")
55
+ try:
56
+ import MeCab
57
+ tagger = MeCab.Tagger()
58
+ result = tagger.parse("これはテストです。")
59
+ print("MeCab test successful. Output:")
60
+ print(result)
61
+ return True
62
+ except Exception as e:
63
+ print(f"Error testing MeCab: {e}")
64
+ return False
65
 
66
  if __name__ == "__main__":
67
+ if not check_mecab():
68
+ print("Attempting to install MeCab...")
69
+ subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'mecab-python3'])
70
+
71
+ if not check_unidic():
72
+ print("Attempting to install UniDic...")
73
+ subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'unidic-lite'])
74
+
75
+ if not setup_unidic():
76
+ print("Failed to setup UniDic")
77
+
78
+ if not configure_mecab():
79
+ print("Failed to configure MeCab")
80
+
81
+ if test_mecab():
82
+ print("MeCab is working correctly")
83
+ else:
84
+ print("MeCab test failed")
85
+
86
+ print("Diagnostic complete")