roman commited on
Commit
87bebbb
·
1 Parent(s): e9f45f4

chose from list, increase sampling rate

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -2,20 +2,26 @@ import streamlit as st
2
  import librosa
3
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
4
 
 
 
5
 
 
6
 
7
- uploaded_file = st.file_uploader("Choose file", type=["wav", "mp3"])
 
 
 
8
 
9
- processor = AutoProcessor.from_pretrained("Yehor/whisper-small-ukrainian")
10
 
11
- model = AutoModelForSpeechSeq2Seq.from_pretrained("Yehor/whisper-small-ukrainian")
12
 
13
  def map_to_pred(file_path):
14
  # load audio file
15
  audio, _ = librosa.load(file_path)
16
 
17
  # preprocess audio and generate standard
18
- input_features = processor([audio], return_tensors="pt", sampling_rate=16_000).input_features
19
  generated_ids = model.generate(inputs=input_features)
20
  transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
21
  text = processor.tokenizer._normalize(transcription[0])
 
2
  import librosa
3
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
4
 
5
+ # Define available models
6
+ available_models = ["Yehor/whisper-small-ukrainian"]
7
 
8
+ st.title("Voice Recognition App")
9
 
10
+ # Model selection dropdown
11
+ model_choice = st.selectbox("Choose a model", available_models)
12
+
13
+ processor = AutoProcessor.from_pretrained(model_choice)
14
 
15
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_choice)
16
 
17
+ uploaded_file = st.file_uploader("Choose file", type=["wav", "mp3"])
18
 
19
  def map_to_pred(file_path):
20
  # load audio file
21
  audio, _ = librosa.load(file_path)
22
 
23
  # preprocess audio and generate standard
24
+ input_features = processor([audio], return_tensors="pt", sampling_rate=32_000).input_features
25
  generated_ids = model.generate(inputs=input_features)
26
  transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
27
  text = processor.tokenizer._normalize(transcription[0])