Spaces:

id2223-SE
/

id2223-lab2

Sleeping

EPark25 commited on Dec 4, 2024

Commit

1e937ce

1 Parent(s): d2e67c6

unreliably working

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-from transformers import pipeline
-from scipy.io.wavfile import write as write_wav
-AUDIO_FILE_PATH = "bark_generation.wav"
-synthesizer = pipeline("text-to-speech", "suno/bark-small")
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
@@ -18,10 +23,6 @@ with gr.Blocks() as demo:
     msg = gr.Textbox(submit_btn=True)
     clear = gr.Button("Clear")
-    def synthesize_audio(text):
-        speech = synthesizer(text, forward_params={"do_sample": True})
-        write_wav(AUDIO_FILE_PATH, rate=speech["sampling_rate"], data=speech["audio"])
     def user(user_message, history: list):
         return "", history + [{"role": "user", "content": user_message}]
@@ -33,14 +34,20 @@ with gr.Blocks() as demo:
         ):
             token = message.choices[0].delta.content
             history[-1]["content"] += token
-            yield history, None
-        synthesize_audio(history[-1]["content"])
-        return history, AUDIO_FILE_PATH
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, chatbot, [chatbot, audio_box]
-    )
     clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":

 import gradio as gr
+import torch
 from huggingface_hub import InferenceClient
+from transformers import BarkModel
+from transformers import AutoProcessor
+model = BarkModel.from_pretrained("suno/bark-small")
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+processor = AutoProcessor.from_pretrained("suno/bark")
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     msg = gr.Textbox(submit_btn=True)
     clear = gr.Button("Clear")
     def user(user_message, history: list):
         return "", history + [{"role": "user", "content": user_message}]
         ):
             token = message.choices[0].delta.content
             history[-1]["content"] += token
+            yield history
+        return history
+    def read(history: list):
+        text = history[-1]["content"]
+        inputs = processor(text=text, return_tensors="pt").to(device)
+        speech = model.generate(**inputs.to(device))
+        sampling_rate = model.generation_config.sample_rate
+        return tuple((sampling_rate, speech.cpu().numpy().squeeze()))
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, chatbot, chatbot
+    ).then(read, chatbot, audio_box)
     clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":