import gradio as gr import whisper import difflib # Load the Whisper model (base model is a good balance between speed and accuracy) model = whisper.load_model("base") def pronunciation_feedback(transcription, reference_text): """ Function to provide basic pronunciation feedback by comparing the transcription with the reference (expected) text. """ # Compare transcription with reference text using difflib diff = difflib.ndiff(reference_text.split(), transcription.split()) # Identify words that are incorrect or missing in transcription errors = [word for word in diff if word.startswith('- ')] if errors: feedback = "Mispronounced words: " + ', '.join([error[2:] for error in errors]) else: feedback = "Great job! Your pronunciation is spot on." return feedback def transcribe_and_feedback(audio, reference_text): """ Transcribe the audio and provide pronunciation feedback. """ # Transcribe the audio using Whisper model result = model.transcribe(audio) transcription = result['text'] # Provide pronunciation feedback feedback = pronunciation_feedback(transcription, reference_text) return transcription, feedback # Set up the Gradio interface interface = gr.Interface( fn=transcribe_and_feedback, # Function to transcribe and provide feedback inputs=[ gr.Audio(source="microphone", type="filepath"), # Live audio input gr.Textbox(label="Expected Text") # User provides the reference text ], outputs=[ gr.Textbox(label="Transcription"), # Display transcription gr.Textbox(label="Pronunciation Feedback") # Display feedback ], live=True # Enable real-time transcription ) # Launch the Gradio interface on Hugging Face Spaces interface.launch(share=True)