Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,15 +2,19 @@ import numpy as np
|
|
2 |
import gradio as gr
|
3 |
from transformers import Wav2Vec2ForCTC,Wav2Vec2Processor
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
|
10 |
def generate_text(audio):
|
11 |
sr, data = audio
|
12 |
print(data)
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
|
15 |
input_audio = gr.Audio(
|
16 |
sources=["microphone"],
|
@@ -22,9 +26,9 @@ input_audio = gr.Audio(
|
|
22 |
),
|
23 |
)
|
24 |
demo = gr.Interface(
|
25 |
-
fn=
|
26 |
inputs=input_audio,
|
27 |
-
outputs="
|
28 |
)
|
29 |
|
30 |
if __name__ == "__main__":
|
|
|
2 |
import gradio as gr
|
3 |
from transformers import Wav2Vec2ForCTC,Wav2Vec2Processor
|
4 |
|
5 |
+
asr_model_id = "Norphel/wav2vec2-large-mms-1b-dzo-colab"
|
6 |
+
asr_model = Wav2Vec2ForCTC.from_pretrained(asr_model_id, target_lang="dzo")
|
7 |
+
asr_processor = Wav2Vec2Processor.from_pretrained(asr_model_id)
|
8 |
+
asr_processor.tokenizer.set_target_lang("dzo")
|
9 |
|
10 |
def generate_text(audio):
|
11 |
sr, data = audio
|
12 |
print(data)
|
13 |
+
input_dict = asr_processor(aud_arr, sampling_rate=16_000, return_tensors="pt", padding=True)
|
14 |
+
logits = asr_model(input_dict.input_values.to("cuda")).logits
|
15 |
+
pred_ids = torch.argmax(logits, dim=-1)[0]
|
16 |
+
|
17 |
+
return asr_processor.decode(pred_ids)
|
18 |
|
19 |
input_audio = gr.Audio(
|
20 |
sources=["microphone"],
|
|
|
26 |
),
|
27 |
)
|
28 |
demo = gr.Interface(
|
29 |
+
fn=generate_text,
|
30 |
inputs=input_audio,
|
31 |
+
outputs="text"
|
32 |
)
|
33 |
|
34 |
if __name__ == "__main__":
|