File size: 1,044 Bytes
4cd4d17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from transformers import pipeline
import torch

# Initialize the transcriber
def initialize_transcriber():
    return pipeline("automatic-speech-recognition", 
                   model="vinai/PhoWhisper-medium",
                   device="cuda" if torch.cuda.is_available() else "cpu")

transcriber = initialize_transcriber()

# Function to transcribe audio
def transcribe_audio(audio_path):
    try:
        # Transcribe the audio
        result = transcriber(audio_path)
        transcribed_text = result["text"]
        return transcribed_text
    except Exception as e:
        return f"Error during transcription: {str(e)}"

# Create the Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs="text",
    title="Vietnamese Speech-to-Text",
    description="Record audio in Vietnamese and get the transcription",
    examples=[],
    theme=gr.themes.Soft()
)

# Launch the app
if __name__ == "__main__":
    interface.launch(share=True)