mrolando commited on
Commit
1ffabd9
1 Parent(s): 2fd4c61

fixed space

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -1,12 +1,10 @@
1
  from diffusers import AudioLDMPipeline
2
  import torch
3
  import gradio as gr
4
- from transformers import pipeline
5
  #from googletrans import Translator
6
  import os
7
 
8
-
9
-
10
  if torch.cuda.is_available():
11
  device = "cuda"
12
  torch_dtype = torch.float16
@@ -17,8 +15,6 @@ print(device)
17
  repo_id = "cvssp/audioldm-m-full"
18
  pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
19
  pipe = pipe.to(device)
20
- # pipe.unet = torch.compile(pipe.unet)
21
- #pipe.unet = torch.compile(pipe.unet)
22
 
23
 
24
 
@@ -27,14 +23,32 @@ import base64
27
  with open("Iso_Logotipo_Ceibal.png", "rb") as image_file:
28
  encoded_image = base64.b64encode(image_file.read()).decode()
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def generate_sound(text,steps,audio_length,negative_prompt):
32
  print(text)
33
- # text=translate_text(text)
34
  text = translate_text(text)
35
  negative_prompt = translate_text(negative_prompt)
36
- #translator = Translator()
37
- #text=translator.translate(text, src='es',dest="en").text
38
  print(text)
39
  waveforms = pipe(text,
40
  num_inference_steps=steps,
@@ -42,14 +56,6 @@ def generate_sound(text,steps,audio_length,negative_prompt):
42
  negative_prompt = negative_prompt).audios
43
  rate =16000
44
  return rate, waveforms[0]
45
- #return gr.make_waveform((rate, waveforms[0]))
46
-
47
- es_en_translator = pipeline("translation",model = "Helsinki-NLP/opus-mt-es-en")
48
-
49
-
50
- def translate_text(text):
51
- text = es_en_translator(text)[0].get("translation_text")
52
- return text
53
 
54
  with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto.") as demo:
55
  gr.Markdown("""
@@ -79,7 +85,7 @@ with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto
79
  with gr.Column():
80
  audio_len = gr.Slider(label="Duración del sonido", minimum=1, maximum=30, value=5, step = 1,
81
  info="Cuánto mayor sonido, mayor será el tiempo de procesamiento.")
82
- steps = gr.Slider(label="Paos de Inferencia", minimum=1, maximum=100, value=20,step =1 ,
83
  info="Al aumentar los pasos de inferencia se puede acercar más a la descripción del texto pero con un mayor tiempo de procesamiento.")
84
  with gr.Row():
85
  examples = gr.Examples(inputs=[prompt,negative_prompt],examples=[["Un martillo golpeando madera","low quality"]])
@@ -89,5 +95,4 @@ with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto
89
 
90
  btn.click(fn=generate_sound, inputs=[prompt,steps,audio_len,negative_prompt], outputs=[output]) #steps,guidance,width,height]
91
 
92
- gr.close_all()
93
  demo.launch()
 
1
  from diffusers import AudioLDMPipeline
2
  import torch
3
  import gradio as gr
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
  #from googletrans import Translator
6
  import os
7
 
 
 
8
  if torch.cuda.is_available():
9
  device = "cuda"
10
  torch_dtype = torch.float16
 
15
  repo_id = "cvssp/audioldm-m-full"
16
  pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
17
  pipe = pipe.to(device)
 
 
18
 
19
 
20
 
 
23
  with open("Iso_Logotipo_Ceibal.png", "rb") as image_file:
24
  encoded_image = base64.b64encode(image_file.read()).decode()
25
 
26
+ # es_en_translator = pipeline("translation",model = "Helsinki-NLP/opus-mt-es-en")
27
+ # def translate_text(text):
28
+ # text = es_en_translator(text)[0].get("translation_text")
29
+ # return text
30
+ CKPT = "facebook/nllb-200-distilled-600M"
31
+
32
+ model = AutoModelForSeq2SeqLM.from_pretrained(CKPT)
33
+ tokenizer = AutoTokenizer.from_pretrained(CKPT)
34
+
35
+ def translate_text(text):
36
+ translation_pipeline = pipeline("translation",
37
+ model=model,
38
+ tokenizer=tokenizer,
39
+ src_lang="spa_Latn",
40
+ tgt_lang="eng_Latn",
41
+ max_length=400,
42
+ device=device)
43
+
44
+ result = translation_pipeline(text)
45
+ return result[0]['translation_text']
46
+
47
 
48
  def generate_sound(text,steps,audio_length,negative_prompt):
49
  print(text)
 
50
  text = translate_text(text)
51
  negative_prompt = translate_text(negative_prompt)
 
 
52
  print(text)
53
  waveforms = pipe(text,
54
  num_inference_steps=steps,
 
56
  negative_prompt = negative_prompt).audios
57
  rate =16000
58
  return rate, waveforms[0]
 
 
 
 
 
 
 
 
59
 
60
  with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto.") as demo:
61
  gr.Markdown("""
 
85
  with gr.Column():
86
  audio_len = gr.Slider(label="Duración del sonido", minimum=1, maximum=30, value=5, step = 1,
87
  info="Cuánto mayor sonido, mayor será el tiempo de procesamiento.")
88
+ steps = gr.Slider(label="Paos de Inferencia", minimum=1, maximum=100, value=15,step =1 ,
89
  info="Al aumentar los pasos de inferencia se puede acercar más a la descripción del texto pero con un mayor tiempo de procesamiento.")
90
  with gr.Row():
91
  examples = gr.Examples(inputs=[prompt,negative_prompt],examples=[["Un martillo golpeando madera","low quality"]])
 
95
 
96
  btn.click(fn=generate_sound, inputs=[prompt,steps,audio_len,negative_prompt], outputs=[output]) #steps,guidance,width,height]
97
 
 
98
  demo.launch()