Irpan
commited on
Commit
•
f6cde70
1
Parent(s):
3c9ecf2
app
Browse files
asr.py
CHANGED
@@ -1,5 +1,7 @@
|
|
|
|
1 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
2 |
import torch
|
|
|
3 |
from umsc import UgMultiScriptConverter
|
4 |
import util
|
5 |
|
@@ -13,9 +15,22 @@ asr_processor.tokenizer.set_target_lang("uig-script_latin")
|
|
13 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
asr_model = asr_model.to(device)
|
15 |
|
16 |
-
def asr(
|
17 |
# Load and resample user audio
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Process audio through ASR model
|
21 |
inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|
|
|
1 |
+
import numpy as np
|
2 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
3 |
import torch
|
4 |
+
import torchaudio
|
5 |
from umsc import UgMultiScriptConverter
|
6 |
import util
|
7 |
|
|
|
15 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
16 |
asr_model = asr_model.to(device)
|
17 |
|
18 |
+
def asr(audio_data, target_rate = 16000):
|
19 |
# Load and resample user audio
|
20 |
+
if isinstance(audio_data, tuple):
|
21 |
+
# microphone
|
22 |
+
sampling_rate, audio_input = audio_data
|
23 |
+
audio_input = (audio_input / 32768.0).astype(np.float32)
|
24 |
+
elif isinstance(audio_data, str):
|
25 |
+
# file upload
|
26 |
+
audio_input, sampling_rate = torchaudio.load(audio_data)
|
27 |
+
else:
|
28 |
+
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
|
29 |
+
|
30 |
+
# Resample if needed
|
31 |
+
if sampling_rate != target_rate:
|
32 |
+
resampler = torchaudio.transforms.Resample(sampling_rate, target_rate)
|
33 |
+
audio_input = resampler(audio_input)
|
34 |
|
35 |
# Process audio through ASR model
|
36 |
inputs = asr_processor(audio_input.squeeze(), sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|