sarahai commited on
Commit
d18c50a
·
verified ·
1 Parent(s): e3d8566

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torchaudio
3
+ import torch
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+
6
+ # Load the fine-tuned model and processor
7
+ model_name_or_path = "./fine-tuned-uzbek-stt" # Replace with your model's path
8
+ processor = Wav2Vec2Processor.from_pretrained(model_name_or_path)
9
+ model = Wav2Vec2ForCTC.from_pretrained(model_name_or_path)
10
+
11
+ # Function to preprocess and transcribe audio
12
+ def preprocess_audio(file):
13
+ speech_array, sampling_rate = torchaudio.load(file)
14
+
15
+ # Resample to 16 kHz if necessary
16
+ if sampling_rate != 16000:
17
+ resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)
18
+ speech_array = resampler(speech_array)
19
+
20
+ speech_array = speech_array.squeeze().numpy()
21
+ return speech_array
22
+
23
+ def transcribe_audio(speech_array):
24
+ input_values = processor(speech_array, return_tensors="pt", sampling_rate=16000).input_values
25
+ with torch.no_grad():
26
+ logits = model(input_values).logits
27
+ predicted_ids = torch.argmax(logits, dim=-1)
28
+ transcription = processor.decode(predicted_ids[0])
29
+ return transcription.replace("[UNK]", "'")
30
+
31
+ # Streamlit interface
32
+ st.title("Speech-to-Text Transcription App")
33
+ st.write("Upload an audio file to transcribe.")
34
+
35
+ audio_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
36
+
37
+ if audio_file is not None:
38
+ # Preprocess and transcribe
39
+ speech_array = preprocess_audio(audio_file)
40
+ transcription = transcribe_audio(speech_array)
41
+
42
+ st.write("Transcription:")
43
+ st.text(transcription)