roman
commited on
Commit
·
87bebbb
1
Parent(s):
e9f45f4
chose from list, increase sampling rate
Browse files
app.py
CHANGED
@@ -2,20 +2,26 @@ import streamlit as st
|
|
2 |
import librosa
|
3 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
4 |
|
|
|
|
|
5 |
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
|
11 |
-
|
12 |
|
13 |
def map_to_pred(file_path):
|
14 |
# load audio file
|
15 |
audio, _ = librosa.load(file_path)
|
16 |
|
17 |
# preprocess audio and generate standard
|
18 |
-
input_features = processor([audio], return_tensors="pt", sampling_rate=
|
19 |
generated_ids = model.generate(inputs=input_features)
|
20 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
21 |
text = processor.tokenizer._normalize(transcription[0])
|
|
|
2 |
import librosa
|
3 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
4 |
|
5 |
+
# Define available models
|
6 |
+
available_models = ["Yehor/whisper-small-ukrainian"]
|
7 |
|
8 |
+
st.title("Voice Recognition App")
|
9 |
|
10 |
+
# Model selection dropdown
|
11 |
+
model_choice = st.selectbox("Choose a model", available_models)
|
12 |
+
|
13 |
+
processor = AutoProcessor.from_pretrained(model_choice)
|
14 |
|
15 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_choice)
|
16 |
|
17 |
+
uploaded_file = st.file_uploader("Choose file", type=["wav", "mp3"])
|
18 |
|
19 |
def map_to_pred(file_path):
|
20 |
# load audio file
|
21 |
audio, _ = librosa.load(file_path)
|
22 |
|
23 |
# preprocess audio and generate standard
|
24 |
+
input_features = processor([audio], return_tensors="pt", sampling_rate=32_000).input_features
|
25 |
generated_ids = model.generate(inputs=input_features)
|
26 |
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
|
27 |
text = processor.tokenizer._normalize(transcription[0])
|