usamakenway georg-suno commited on
Commit
5a027a4
·
0 Parent(s):

Duplicate from suno/bark

Browse files

Co-authored-by: Georg Kucsko <[email protected]>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +146 -0
  4. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Bark
3
+ emoji: 🐶
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.24.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-4.0
11
+ duplicated_from: suno/bark
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from bark import SAMPLE_RATE, generate_audio, preload_models
4
+ from bark.generation import SUPPORTED_LANGS
5
+
6
+ DEBUG_MODE = False
7
+
8
+ if not DEBUG_MODE:
9
+ _ = preload_models()
10
+
11
+ AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
12
+ PROMPT_LOOKUP = {}
13
+ for _, lang in SUPPORTED_LANGS:
14
+ for n in range(10):
15
+ label = f"Speaker {n} ({lang})"
16
+ AVAILABLE_PROMPTS.append(label)
17
+ PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
18
+ PROMPT_LOOKUP["Unconditional"] = None
19
+ PROMPT_LOOKUP["Announcer"] = "announcer"
20
+
21
+ default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
22
+
23
+ title = "<div style='text-align:left'>🐶 Bark</div>"
24
+
25
+ description = """
26
+ <div>
27
+ <a style="display:inline-block" href='https://github.com/suno-ai/bark'><img src='https://img.shields.io/github/stars/suno-ai/bark?style=social' /></a>
28
+ <a style='display:inline-block' href='https://discord.gg/J2B2vsjKuE'><img src='https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat' /></a>
29
+ <a style="display:inline-block; margin-left: 1em" href="https://huggingface.co/spaces/suno/bark?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space%20to%20skip%20the%20queue-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>
30
+ </div>
31
+ Bark is a universal text-to-audio model created by [Suno](www.suno.ai), with code publicly available [here](https://github.com/suno-ai/bark). \
32
+ Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \
33
+ This demo should be used for research purposes only. Commercial use is strictly prohibited. \
34
+ The model output is not censored and the authors do not endorse the opinions in the generated content. \
35
+ Use at your own risk.
36
+ """
37
+
38
+ article = """
39
+
40
+ ## 🌎 Foreign Language
41
+
42
+ Bark supports various languages out-of-the-box and automatically determines language from input text. \
43
+ When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.
44
+
45
+ Try the prompt:
46
+
47
+ ```
48
+ Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.
49
+ ```
50
+
51
+ ## 🤭 Non-Speech Sounds
52
+
53
+ Below is a list of some known non-speech sounds, but we are finding more every day. \
54
+ Please let us know if you find patterns that work particularly well on Discord!
55
+
56
+ * [laughter]
57
+ * [laughs]
58
+ * [sighs]
59
+ * [music]
60
+ * [gasps]
61
+ * [clears throat]
62
+ * — or ... for hesitations
63
+ * ♪ for song lyrics
64
+ * capitalization for emphasis of a word
65
+ * MAN/WOMAN: for bias towards speaker
66
+
67
+ Try the prompt:
68
+
69
+ ```
70
+ " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪."
71
+ ```
72
+
73
+ ## 🎶 Music
74
+ Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \
75
+ Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.
76
+
77
+ Try the prompt:
78
+
79
+ ```
80
+ ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
81
+ ```
82
+
83
+ ## 🧬 Voice Cloning
84
+
85
+ Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \
86
+ The model also attempts to preserve music, ambient noise, etc. from input audio. \
87
+ However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.
88
+
89
+ ## 👥 Speaker Prompts
90
+
91
+ You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \
92
+ Please note that these are not always respected, especially if a conflicting audio history prompt is given.
93
+
94
+ Try the prompt:
95
+
96
+ ```
97
+ WOMAN: I would like an oatmilk latte please.
98
+ MAN: Wow, that's expensive!
99
+ ```
100
+
101
+ ## Details
102
+
103
+ Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark) and model weights. \
104
+ Gradio demo supported by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC, see details on [GitHub](https://github.com/suno-ai/bark).
105
+
106
+
107
+ """
108
+
109
+ examples = [
110
+ ["Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!", "Unconditional"],#, 0.7, 0.7],
111
+ ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.", "Speaker 1 (en)"],#, 0.7, 0.7],
112
+ ["Buenos días Miguel. Tu colega piensa que tu alem��n es extremadamente malo. But I suppose your english isn't terrible.", "Speaker 0 (es)"],#, 0.7, 0.7],
113
+ ]
114
+
115
+
116
+ def gen_tts(text, history_prompt):#, temp_semantic, temp_waveform):
117
+ history_prompt = PROMPT_LOOKUP[history_prompt]
118
+ if DEBUG_MODE:
119
+ audio_arr = np.zeros(SAMPLE_RATE)
120
+ else:
121
+ audio_arr = generate_audio(text, history_prompt=history_prompt)#, text_temp=temp_semantic, waveform_temp=temp_waveform)
122
+ audio_arr = (audio_arr * 32767).astype(np.int16)
123
+ return (SAMPLE_RATE, audio_arr)
124
+
125
+ iface = gr.Interface(
126
+ fn=gen_tts,
127
+ inputs=[
128
+ gr.Textbox(label="Input Text", lines=2, value=default_text),
129
+ gr.Dropdown(AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt"),
130
+ # gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
131
+ # gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
132
+ ],
133
+ outputs=[
134
+ gr.Audio(label="Generated Audio", type="numpy"),
135
+ ],
136
+ title=title,
137
+ description=description,
138
+ article=article,
139
+ examples=examples,
140
+ cache_examples=False,
141
+ )
142
+
143
+ with gr.Group(elem_id="share-btn-container", visible=False):
144
+ share_button = gr.Button("Share to community", elem_id="share-btn")
145
+
146
+ iface.launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ git+https://github.com/suno-ai/bark.git
2
+ https://download.pytorch.org/whl/nightly/pytorch_triton-2.1.0%2B46672772b4-cp38-cp38-linux_x86_64.whl
3
+ https://download.pytorch.org/whl/nightly/cu117/torch-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
4
+ https://download.pytorch.org/whl/nightly/cu117/torchvision-0.16.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl
5
+ https://download.pytorch.org/whl/nightly/cu117/torchaudio-2.1.0.dev20230413%2Bcu117-cp38-cp38-linux_x86_64.whl