import speech_recognition as sr import difflib import wave import pyaudio import gradio as gr # Step 1: Record audio def record_audio(filename): chunk = 1024 # Record in chunks of 1024 samples sample_format = pyaudio.paInt16 # 16 bits per sample channels = 1 fs = 44100 # Record at 44100 samples per second seconds = 10 # Length of recording p = pyaudio.PyAudio() # Create an interface to PortAudio print("Recording...") stream = p.open(format=sample_format, channels=channels, rate=fs, frames_per_buffer=chunk, input=True) frames = [] # Initialize array to store frames # Store data in chunks for the specified duration for _ in range(0, int(fs / chunk * seconds)): data = stream.read(chunk) frames.append(data) # Stop and close the stream stream.stop_stream() stream.close() p.terminate() # Save the recorded audio as a WAV file wf = wave.open(filename, 'wb') wf.setnchannels(channels) wf.setsampwidth(p.get_sample_size(sample_format)) wf.setframerate(fs) wf.writeframes(b''.join(frames)) wf.close() print("Recording completed.") # Step 2: Transcribe the audio file def transcribe_audio(filename): recognizer = sr.Recognizer() # Open the audio file for transcription with sr.AudioFile(filename) as source: audio = recognizer.record(source) try: # Recognize the audio using Google Web Speech API print("Transcribing the audio...") transcription = recognizer.recognize_google(audio) print("Transcription completed.") return transcription except sr.UnknownValueError: print("Google Speech Recognition could not understand the audio") return "" except sr.RequestError as e: print(f"Error with Google Speech Recognition service: {e}") return "" # Step 3: Compare the transcribed text with the input paragraph def compare_texts(reference_text, transcribed_text): word_scores = [] reference_words = reference_text.split() transcribed_words = transcribed_text.split() sm = difflib.SequenceMatcher(None, reference_text, transcribed_text) similarity_score = round(sm.ratio() * 100, 2) for i, word in enumerate(reference_words): try: if word.lower() == transcribed_words[i].lower(): word_scores.append({"word": word, "quality_score": 100}) else: word_scores.append({"word": word, "quality_score": 50}) # Assuming 50 if it's wrong except IndexError: word_scores.append({"word": word, "quality_score": 0}) fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT" output = { "quota_remaining": -1, "reference_text_from_application": reference_text, "status": "success", "text_score": { "fidelity_class": fidelity_class, "quality_score": similarity_score, "text": reference_text, "transcribedText": transcribed_text, "word_score_list": word_scores }, "version": "1.1" } return output # Gradio Interface Function def gradio_function(paragraph): # Record the audio (the filename will be 'recorded_audio.wav') record_audio("recorded_audio.wav") # Transcribe the audio transcribed_text = transcribe_audio("recorded_audio.wav") # Compare the original paragraph with the transcribed text comparison_result = compare_texts(paragraph, transcribed_text) # Return comparison result return comparison_result # Gradio Interface interface = gr.Interface( fn=gradio_function, inputs=gr.inputs.Textbox(lines=5, label="Input Paragraph"), outputs="json", title="Speech Recognition Comparison", description="Input a paragraph, record your audio, and compare the transcription to the original text." ) # Launch Gradio app interface.launch()