Uhhy commited on
Commit
6f5b5a3
verified
1 Parent(s): 33f1e50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -28
app.py CHANGED
@@ -58,8 +58,8 @@ if torch.cuda.is_available():
58
  supported_languages = config.languages
59
  if not "vi" in supported_languages:
60
  supported_languages.append("vi")
61
- if not "es-AR" in supported_languages:
62
- supported_languages.append("es-AR")
63
 
64
  def normalize_vietnamese_text(text):
65
  text = (
@@ -76,20 +76,6 @@ def normalize_vietnamese_text(text):
76
  )
77
  return text
78
 
79
-
80
- def calculate_keep_len(text, lang):
81
- if lang in ["ja", "zh-cn"]:
82
- return -1
83
-
84
- word_count = len(text.split())
85
- num_punct = text.count(".") + text.count("!") + text.count("?") + text.count(",")
86
-
87
- if word_count < 5:
88
- return 15000 * word_count + 2000 * num_punct
89
- elif word_count < 10:
90
- return 13000 * word_count + 2000 * num_punct
91
- return -1
92
-
93
  def analyze_sentiment(text):
94
  sia = SentimentIntensityAnalyzer()
95
  scores = sia.polarity_scores(text)
@@ -99,6 +85,10 @@ def change_pitch(audio_data, sampling_rate, sentiment):
99
  semitones = sentiment * 2
100
  return pyrubberband.pitch_shift(audio_data, sampling_rate, semitones)
101
 
 
 
 
 
102
  @spaces.GPU(duration=0)
103
  def predict(
104
  prompt,
@@ -118,12 +108,6 @@ def predict(
118
  metrics_text = gr.Warning("Por favor, introduce un texto m谩s largo.")
119
  return (None, metrics_text)
120
 
121
- if len(prompt) > 250000000:
122
- metrics_text = gr.Warning(
123
- f"El texto tiene {len(prompt)} caracteres. Es demasiado largo, por favor, mantenlo por debajo de 250000000 caracteres."
124
- )
125
- return (None, metrics_text)
126
-
127
  try:
128
  metrics_text = ""
129
  t_latent = time.time()
@@ -173,13 +157,12 @@ def predict(
173
  real_time_factor = (time.time() - t0) / out["wav"].shape[-1] * 24000
174
  metrics_text += f"Factor de tiempo real (RTF): {real_time_factor:.2f}\n"
175
 
176
- keep_len = calculate_keep_len(prompt, language)
177
- out["wav"] = out["wav"][:keep_len]
178
-
179
  audio_data = np.array(out["wav"])
180
 
181
  modified_audio = change_pitch(audio_data, 24000, sentiment)
182
 
 
 
183
  torchaudio.save("output.wav", torch.tensor(modified_audio).unsqueeze(0), 24000)
184
 
185
  except RuntimeError as e:
@@ -246,13 +229,12 @@ with gr.Blocks(analytics_enabled=False) as demo:
246
  with gr.Column():
247
  input_text_gr = gr.Textbox(
248
  label="Texto a convertir a voz",
249
- info="Cada frase debe tener al menos 10 palabras. M谩ximo 250 caracteres (alrededor de 2-3 frases).",
250
  value="Hola, soy un modelo de texto a voz.",
251
  )
252
  language_gr = gr.Dropdown(
253
  label="Idioma",
254
  choices=[
255
- "es-AR",
256
  "vi",
257
  "en",
258
  "es",
@@ -273,7 +255,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
273
  "hi",
274
  ],
275
  max_choices=1,
276
- value="es-AR",
277
  )
278
  normalize_text = gr.Checkbox(
279
  label="Normalizar texto en vietnamita",
 
58
  supported_languages = config.languages
59
  if not "vi" in supported_languages:
60
  supported_languages.append("vi")
61
+ if not "es-ar" in supported_languages:
62
+ supported_languages.append("es-ar")
63
 
64
  def normalize_vietnamese_text(text):
65
  text = (
 
76
  )
77
  return text
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def analyze_sentiment(text):
80
  sia = SentimentIntensityAnalyzer()
81
  scores = sia.polarity_scores(text)
 
85
  semitones = sentiment * 2
86
  return pyrubberband.pitch_shift(audio_data, sampling_rate, semitones)
87
 
88
+ def apply_distortion(audio_data, sentiment):
89
+ distortion_factor = abs(sentiment) * 0.5
90
+ return audio_data * (1 + distortion_factor * np.random.randn(len(audio_data)))
91
+
92
  @spaces.GPU(duration=0)
93
  def predict(
94
  prompt,
 
108
  metrics_text = gr.Warning("Por favor, introduce un texto m谩s largo.")
109
  return (None, metrics_text)
110
 
 
 
 
 
 
 
111
  try:
112
  metrics_text = ""
113
  t_latent = time.time()
 
157
  real_time_factor = (time.time() - t0) / out["wav"].shape[-1] * 24000
158
  metrics_text += f"Factor de tiempo real (RTF): {real_time_factor:.2f}\n"
159
 
 
 
 
160
  audio_data = np.array(out["wav"])
161
 
162
  modified_audio = change_pitch(audio_data, 24000, sentiment)
163
 
164
+ modified_audio = apply_distortion(modified_audio, sentiment)
165
+
166
  torchaudio.save("output.wav", torch.tensor(modified_audio).unsqueeze(0), 24000)
167
 
168
  except RuntimeError as e:
 
229
  with gr.Column():
230
  input_text_gr = gr.Textbox(
231
  label="Texto a convertir a voz",
 
232
  value="Hola, soy un modelo de texto a voz.",
233
  )
234
  language_gr = gr.Dropdown(
235
  label="Idioma",
236
  choices=[
237
+ "es-ar",
238
  "vi",
239
  "en",
240
  "es",
 
255
  "hi",
256
  ],
257
  max_choices=1,
258
+ value="es-ar",
259
  )
260
  normalize_text = gr.Checkbox(
261
  label="Normalizar texto en vietnamita",