Spaces:
Runtime error
Runtime error
Commit
•
5c99329
1
Parent(s):
b52d0fa
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import torch
|
2 |
from threading import Thread
|
3 |
|
@@ -43,35 +44,28 @@ Bark can generate highly realistic, multilingual speech as well as other audio -
|
|
43 |
In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
|
44 |
"""
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
bark = BarkModel.from_pretrained(HUB_PATH)
|
49 |
-
else:
|
50 |
-
bark = BarkModel.from_pretrained(HUB_PATH).to(device)
|
51 |
-
bark = bark.to_bettertransformer()
|
52 |
|
53 |
|
54 |
-
# Inference
|
55 |
-
|
|
|
56 |
if voice_preset not in speaker_embeddings:
|
57 |
voice_preset = None
|
58 |
-
|
59 |
-
|
60 |
-
text,
|
61 |
-
]
|
62 |
-
inputs = processor(sentences, voice_preset=voice_preset).to(device)
|
63 |
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
64 |
|
65 |
fine_output = bark.generate(
|
66 |
**inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
|
67 |
)
|
68 |
-
|
69 |
print("Fine tokens generated")
|
70 |
-
|
71 |
with torch.no_grad():
|
72 |
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
73 |
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
74 |
-
|
75 |
return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
|
76 |
|
77 |
|
|
|
1 |
+
import spaces
|
2 |
import torch
|
3 |
from threading import Thread
|
4 |
|
|
|
44 |
In this demo, we leverage charactr's Vocos model to create high quality audio from Bark. \
|
45 |
"""
|
46 |
|
47 |
+
bark = BarkModel.from_pretrained(HUB_PATH).to(device)
|
48 |
+
bark = bark.to_bettertransformer()
|
|
|
|
|
|
|
|
|
49 |
|
50 |
|
51 |
+
# Inference on Zero GPU
|
52 |
+
@spaces.GPU
|
53 |
+
def generate_audio(text, voice_preset=None, lag=0):
|
54 |
if voice_preset not in speaker_embeddings:
|
55 |
voice_preset = None
|
56 |
+
|
57 |
+
inputs = processor([text], voice_preset=voice_preset).to(device)
|
|
|
|
|
|
|
58 |
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
59 |
|
60 |
fine_output = bark.generate(
|
61 |
**inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
|
62 |
)
|
|
|
63 |
print("Fine tokens generated")
|
64 |
+
|
65 |
with torch.no_grad():
|
66 |
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
67 |
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
68 |
+
|
69 |
return (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
|
70 |
|
71 |
|