Spaces:

Omnibus
/

EZ-Voice-Clone

Runtime error

App Files Files Community

Omnibus commited on Feb 13, 2024

Commit

3846d12

verified ·

1 Parent(s): 01bbbb5

Update vc.py

Browse files

Files changed (1) hide show

vc.py +45 -12

vc.py CHANGED Viewed

@@ -13,18 +13,49 @@ uid = uuid.uuid4()
 device = "cuda" if torch.cuda.is_available() else "cpu"
-def custom_bark(inp, in_aud=None, trim_aud=None, in_aud_mic=None):
-    if in_aud_mic != None:
-        speaker_wav=in_aud_mic
-    if in_aud !=None and trim_aud==None:
-        speaker_wav=in_aud
-        #speaker_wav=Path(f"{uid}-tmp_aud.mp4")
-    if trim_aud != None:
-        speaker_wav=Path(f"{uid}-trim.wav")
-    tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
-    tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
-    return (f"{uid}-output.wav")
 def load_video_yt(vid):
     yt = YouTube(vid)
     vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
@@ -63,6 +94,8 @@ with gr.Blocks() as app:
     with gr.Group():
         with gr.Row():
             gr.Markdown('''<H1> Audio Source:''')
         with gr.Row():
             with gr.Column():

 device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = AutoProcessor.from_pretrained("suno/bark-small")
+model = BarkModel.from_pretrained("suno/bark-small").to(device)
+num_list = ["1","2","3","4","5","6","7","8","9","10"]
+lang_list = ["en","de"]
+#SAMPLE_RATE = 24_000
+def run_bark(text, n='1', lang='en'):
+    uid=uuid.uuid4()
+    #history_prompt = []
+    semantic_prompt=f"v2/{lang}_speaker_{int(n)-1}"
+        #text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
+    inputs = processor(text=text,
+        voice_preset = semantic_prompt,
+        return_tensors="pt",
+    )
+    print("generating")
+    speech_values = model.generate(
+        **inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
+    )
+    #speech_values = model.generate(**inputs, do_sample=True)
+    sampling_rate = model.generation_config.sample_rate
+    #sampling_rate = 24_000
+    print("writing")
+    scipy.io.wavfile.write(f"bark_out-{uid}.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
+    return (f"bark_out-{uid}.wav")
+def custom_bark(inp, tog, in_aud=None, trim_aud=None, in_aud_mic=None):
+    if tog=="Custom":
+        if in_aud_mic != None:
+            speaker_wav=in_aud_mic
+        if in_aud !=None and trim_aud==None:
+            speaker_wav=in_aud
+            #speaker_wav=Path(f"{uid}-tmp_aud.mp4")
+        if trim_aud != None:
+            speaker_wav=Path(f"{uid}-trim.wav")
+        tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
+        tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
+        return (f"{uid}-output.wav")
+    if tog=="Preset":
+        return (run_bark(inp))
 def load_video_yt(vid):
     yt = YouTube(vid)
     vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
     with gr.Group():
         with gr.Row():
             gr.Markdown('''<H1> Audio Source:''')
+        with gr.Row():
+            with gr.Radio(label="Input Type", choices=["Preset","Custom"], value="Preset")
         with gr.Row():
             with gr.Column():