import speech_recognition as sr import difflib import gradio as gr # Step 1: Transcribe the audio file def transcribe_audio(audio): recognizer = sr.Recognizer() # Convert audio into recognizable format for the Recognizer audio_file = sr.AudioFile(audio) with audio_file as source: audio_data = recognizer.record(source) try: # Recognize the audio using Google Web Speech API transcription = recognizer.recognize_google(audio_data) return transcription except sr.UnknownValueError: return "Google Speech Recognition could not understand the audio" except sr.RequestError as e: return f"Error with Google Speech Recognition service: {e}" # Step 2: Compare the transcribed text with the input paragraph def compare_texts(reference_text, transcribed_text): word_scores = [] reference_words = reference_text.split() transcribed_words = transcribed_text.split() sm = difflib.SequenceMatcher(None, reference_text, transcribed_text) similarity_score = round(sm.ratio() * 100, 2) for i, word in enumerate(reference_words): try: if word.lower() == transcribed_words[i].lower(): word_scores.append({"word": word, "quality_score": 100}) else: word_scores.append({"word": word, "quality_score": 50}) # Assuming 50 if it's wrong except IndexError: word_scores.append({"word": word, "quality_score": 0}) fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT" output = { "quota_remaining": -1, "reference_text_from_application": reference_text, "status": "success", "text_score": { "fidelity_class": fidelity_class, "quality_score": similarity_score, "text": reference_text, "transcribedText": transcribed_text, "word_score_list": word_scores }, "version": "1.1" } return output # Gradio Interface Function def gradio_function(paragraph, audio): # Transcribe the audio transcribed_text = transcribe_audio(audio) # Compare the original paragraph with the transcribed text comparison_result = compare_texts(paragraph, transcribed_text) # Return comparison result return comparison_result # Gradio Interface using the updated API interface = gr.Interface( fn=gradio_function, inputs=[ gr.Textbox(lines=5, label="Input Paragraph"), gr.Audio(type="filepath", label="Record Audio") ], outputs="json", title="Speech Recognition Comparison", description="Input a paragraph, record your audio, and compare the transcription to the original text." ) # Launch Gradio app interface.launch()