semakoc commited on
Commit
b626f3f
·
verified ·
1 Parent(s): 2914d5a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import librosa
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+
6
+ # Load Wav2Vec2 Model
7
+ MODEL_NAME = "facebook/wav2vec2-large-960h"
8
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
9
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
10
+
11
+ def transcribe(audio_file):
12
+ """
13
+ Transcribes speech from an uploaded audio file or live microphone input.
14
+ """
15
+ try:
16
+ # Load and convert audio to 16kHz
17
+ audio, rate = librosa.load(audio_file, sr=16000)
18
+
19
+ # Convert audio to tensor format for Wav2Vec
20
+ input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
21
+
22
+ # Run the model for transcription
23
+ with torch.no_grad():
24
+ logits = model(input_values).logits
25
+
26
+ # Convert predicted tokens into text
27
+ predicted_ids = torch.argmax(logits, dim=-1)
28
+ transcription = processor.batch_decode(predicted_ids)[0]
29
+
30
+ return transcription
31
+
32
+ except Exception as e:
33
+ return "Error processing file"
34
+
35
+ # UI Build
36
+ interface = gr.Interface(
37
+ fn=transcribe,
38
+ inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Speak or Upload Audio"),
39
+ outputs="text",
40
+ title="Wav2Vec2 Speech-to-Text Transcription",
41
+ description="Speak into your microphone or upload an audio file to get an automatic transcription.",
42
+ live=True # Real-time microphone processing
43
+ )
44
+
45
+ interface.launch(share=True)