Update README.md
Browse files
README.md
CHANGED
@@ -11,6 +11,39 @@ base_model:
|
|
11 |
This is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It is trained using distillation loss. More details in the [pre-print](https://arxiv.org/abs/2410.02678) here.
|
12 |
|
13 |
See the model in action at [diva-audio.github.io](https://diva-audio.github.io).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
## Citation
|
15 |
**BibTeX:**
|
16 |
|
|
|
11 |
This is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It is trained using distillation loss. More details in the [pre-print](https://arxiv.org/abs/2410.02678) here.
|
12 |
|
13 |
See the model in action at [diva-audio.github.io](https://diva-audio.github.io).
|
14 |
+
|
15 |
+
### Inference Example
|
16 |
+
```
|
17 |
+
from transformers import AutoModel
|
18 |
+
import librosa
|
19 |
+
import wget
|
20 |
+
from modeling_diva import DiVAModel
|
21 |
+
|
22 |
+
filename = wget.download(
|
23 |
+
"https://github.com/ffaisal93/SD-QA/raw/refs/heads/master/dev/eng/irl/wav_eng/-1008642825401516622.wav"
|
24 |
+
)
|
25 |
+
|
26 |
+
speech_data, _ = librosa.load(filename, sr=16_000)
|
27 |
+
|
28 |
+
model = AutoModel.from_pretrained("WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True)
|
29 |
+
|
30 |
+
print(model.generate([speech_data]))
|
31 |
+
print(model.generate([speech_data], ["Reply Briefly Like A Pirate"]))
|
32 |
+
|
33 |
+
filename = wget.download(
|
34 |
+
"https://github.com/ffaisal93/SD-QA/raw/refs/heads/master/dev/eng/irl/wav_eng/-2426554427049983479.wav"
|
35 |
+
)
|
36 |
+
|
37 |
+
speech_data2, _ = librosa.load(filename, sr=16_000)
|
38 |
+
|
39 |
+
print(
|
40 |
+
model.generate(
|
41 |
+
[speech_data, speech_data2],
|
42 |
+
["Reply Briefly Like A Pirate", "Reply Briefly Like A New Yorker"],
|
43 |
+
)
|
44 |
+
)
|
45 |
+
```
|
46 |
+
|
47 |
## Citation
|
48 |
**BibTeX:**
|
49 |
|