texto_a_sonido

Runtime error

App Files Files Community

mrolando commited on Oct 19, 2023

Commit

1ffabd9

1 Parent(s): 2fd4c61

fixed space

Browse files

Files changed (1) hide show

app.py +23 -18

app.py CHANGED Viewed

@@ -1,12 +1,10 @@
 from diffusers import AudioLDMPipeline
 import torch
 import gradio as gr
-from transformers import pipeline
 #from googletrans import Translator
 import os
 if torch.cuda.is_available():
     device = "cuda"
     torch_dtype = torch.float16
@@ -17,8 +15,6 @@ print(device)
 repo_id = "cvssp/audioldm-m-full"
 pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
 pipe = pipe.to(device)
-# pipe.unet = torch.compile(pipe.unet)
-#pipe.unet = torch.compile(pipe.unet)
@@ -27,14 +23,32 @@ import base64
 with open("Iso_Logotipo_Ceibal.png", "rb") as image_file:
     encoded_image = base64.b64encode(image_file.read()).decode()
 def generate_sound(text,steps,audio_length,negative_prompt):
     print(text)
-    # text=translate_text(text)
     text = translate_text(text)
     negative_prompt = translate_text(negative_prompt)
-    #translator = Translator()
-    #text=translator.translate(text, src='es',dest="en").text
     print(text)
     waveforms = pipe(text,
                      num_inference_steps=steps,
@@ -42,14 +56,6 @@ def generate_sound(text,steps,audio_length,negative_prompt):
                      negative_prompt = negative_prompt).audios
     rate =16000
     return rate, waveforms[0]
-    #return gr.make_waveform((rate, waveforms[0]))
-es_en_translator = pipeline("translation",model = "Helsinki-NLP/opus-mt-es-en")
-def translate_text(text):
-    text = es_en_translator(text)[0].get("translation_text")
-    return text
 with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto.") as demo:
     gr.Markdown("""
@@ -79,7 +85,7 @@ with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto
                             with gr.Column():
                                 audio_len = gr.Slider(label="Duración del sonido", minimum=1, maximum=30, value=5, step = 1,
                                 info="Cuánto mayor sonido, mayor será el tiempo de procesamiento.")
-                                steps = gr.Slider(label="Paos de Inferencia", minimum=1, maximum=100, value=20,step =1 ,
                                 info="Al aumentar los pasos de inferencia se puede acercar más a la descripción del texto pero con un mayor tiempo de procesamiento.")
             with gr.Row():
                 examples = gr.Examples(inputs=[prompt,negative_prompt],examples=[["Un martillo golpeando madera","low quality"]])
@@ -89,5 +95,4 @@ with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto
     btn.click(fn=generate_sound, inputs=[prompt,steps,audio_len,negative_prompt], outputs=[output])  #steps,guidance,width,height]
-gr.close_all()
 demo.launch()

 from diffusers import AudioLDMPipeline
 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 #from googletrans import Translator
 import os
 if torch.cuda.is_available():
     device = "cuda"
     torch_dtype = torch.float16
 repo_id = "cvssp/audioldm-m-full"
 pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
 pipe = pipe.to(device)
 with open("Iso_Logotipo_Ceibal.png", "rb") as image_file:
     encoded_image = base64.b64encode(image_file.read()).decode()
+# es_en_translator = pipeline("translation",model = "Helsinki-NLP/opus-mt-es-en")
+# def translate_text(text):
+#     text = es_en_translator(text)[0].get("translation_text")
+#     return text
+CKPT = "facebook/nllb-200-distilled-600M"
+model = AutoModelForSeq2SeqLM.from_pretrained(CKPT)
+tokenizer = AutoTokenizer.from_pretrained(CKPT)
+def translate_text(text):
+    translation_pipeline = pipeline("translation",
+                                    model=model,
+                                    tokenizer=tokenizer,
+                                    src_lang="spa_Latn",
+                                    tgt_lang="eng_Latn",
+                                    max_length=400,
+                                    device=device)
+    result = translation_pipeline(text)
+    return result[0]['translation_text']
 def generate_sound(text,steps,audio_length,negative_prompt):
     print(text)
     text = translate_text(text)
     negative_prompt = translate_text(negative_prompt)
     print(text)
     waveforms = pipe(text,
                      num_inference_steps=steps,
                      negative_prompt = negative_prompt).audios
     rate =16000
     return rate, waveforms[0]
 with gr.Blocks(title="Uso de AI para la generación de sonidos a partir de texto.") as demo:
     gr.Markdown("""
                             with gr.Column():
                                 audio_len = gr.Slider(label="Duración del sonido", minimum=1, maximum=30, value=5, step = 1,
                                 info="Cuánto mayor sonido, mayor será el tiempo de procesamiento.")
+                                steps = gr.Slider(label="Paos de Inferencia", minimum=1, maximum=100, value=15,step =1 ,
                                 info="Al aumentar los pasos de inferencia se puede acercar más a la descripción del texto pero con un mayor tiempo de procesamiento.")
             with gr.Row():
                 examples = gr.Examples(inputs=[prompt,negative_prompt],examples=[["Un martillo golpeando madera","low quality"]])
     btn.click(fn=generate_sound, inputs=[prompt,steps,audio_len,negative_prompt], outputs=[output])  #steps,guidance,width,height]
 demo.launch()