File size: 1,012 Bytes
9659078
 
 
5a84593
 
 
 
 
 
 
 
 
 
 
 
 
9659078
 
 
 
 
 
 
5a84593
9659078
 
 
 
 
 
a4b64d4
 
 
 
8102081
9659078
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr



from huggingface_hub import snapshot_download




model_ids = [
    'suno/bark',
]
for model_id in model_ids:
    model_name = model_id.split('/')[-1]
    snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')



from TTS.tts.configs.bark_config import BarkConfig
from TTS.tts.models.bark import Bark

config = BarkConfig()
model = Bark.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)

def infer(prompt):

    text = "Hello, my name is Manmay , how are you?"

    # with random speaker
    #output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)

    # cloning a speaker.
    # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
    output_dict = model.synthesize(text, config, speaker_id="en_speaker_6", voice_dirs="voice_dirs/")

    return "done"

gr.Interface(fn=infer, inputs=[gr.Textbox()], outputs=[gr.Textbox()]).launch()