File size: 784 Bytes
f754f08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from transformers import AutoModel
import librosa
import wget
from modeling_diva import DiVAModel
filename = wget.download(
"https://github.com/ffaisal93/SD-QA/raw/refs/heads/master/dev/eng/irl/wav_eng/-1008642825401516622.wav"
)
speech_data, _ = librosa.load(filename, sr=16_000)
model = DiVAModel.from_pretrained("./")
print(model.generate([speech_data]))
print(model.generate([speech_data], ["Repeat verbatim what is said to you."]))
filename = wget.download(
"https://github.com/ffaisal93/SD-QA/raw/refs/heads/master/dev/eng/irl/wav_eng/-2426554427049983479.wav"
)
speech_data2, _ = librosa.load(filename, sr=16_000)
print(
model.generate(
[speech_data, speech_data2],
["Reply Briefly Like A Pirate", "Reply Briefly Like A New Yorker"],
)
)
|