File size: 1,847 Bytes
0f2d91c
 
e7ce94f
0f2d91c
e7ce94f
0f2d91c
 
0893bcc
0f2d91c
e7ce94f
 
0893bcc
e7ce94f
0893bcc
e7ce94f
 
 
 
0893bcc
e7ce94f
0893bcc
 
e7ce94f
0893bcc
 
 
 
e7ce94f
0f2d91c
e7ce94f
0f2d91c
0893bcc
 
e7ce94f
0893bcc
 
 
0f2d91c
e7ce94f
0f2d91c
e7ce94f
 
 
 
 
 
 
 
 
 
0f2d91c
 
e7ce94f
0f2d91c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
import whisper
import difflib

# Load the Whisper model (base model is a good balance between speed and accuracy)
model = whisper.load_model("base")

def pronunciation_feedback(transcription, reference_text):
    """
    Function to provide basic pronunciation feedback by comparing the transcription
    with the reference (expected) text.
    """
    # Compare transcription with reference text using difflib
    diff = difflib.ndiff(reference_text.split(), transcription.split())
    
    # Identify words that are incorrect or missing in transcription
    errors = [word for word in diff if word.startswith('- ')]
    
    if errors:
        feedback = "Mispronounced words: " + ', '.join([error[2:] for error in errors])
    else:
        feedback = "Great job! Your pronunciation is spot on."
    
    return feedback

def transcribe_and_feedback(audio, reference_text):
    """
    Transcribe the audio and provide pronunciation feedback.
    """
    # Transcribe the audio using Whisper model
    result = model.transcribe(audio)
    transcription = result['text']
    
    # Provide pronunciation feedback
    feedback = pronunciation_feedback(transcription, reference_text)
    
    return transcription, feedback

# Set up the Gradio interface
interface = gr.Interface(
    fn=transcribe_and_feedback,  # Function to transcribe and provide feedback
    inputs=[
        gr.Audio(source="microphone", type="filepath"),  # Live audio input
        gr.Textbox(label="Expected Text")  # User provides the reference text
    ],
    outputs=[
        gr.Textbox(label="Transcription"),  # Display transcription
        gr.Textbox(label="Pronunciation Feedback")  # Display feedback
    ],
    live=True  # Enable real-time transcription
)

# Launch the Gradio interface on Hugging Face Spaces
interface.launch(share=True)