# Import required libraries import os import requests import speech_recognition as sr import difflib import gradio as gr from gtts import gTTS import io from pydub import AudioSegment import time import eng_to_ipa as ipa # Function to create pronunciation audio def create_pronunciation_audio(word): try: tts = gTTS(word) audio_file_path = f"audio/{word}.mp3" tts.save(audio_file_path) return audio_file_path # Return the local path instead of uploading except Exception as e: return f"Failed to create pronunciation audio: {e}" # Function to upload audio files to the server def upfilepath(local_filename): ts = time.time() upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}" files = {'files': open(local_filename, 'rb')} try: response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds) if response.status_code == 200: result = response.json() extracted_path = result[0] return extracted_path else: return None except requests.exceptions.Timeout: return "Request timed out. Please try again." except Exception as e: return f"An error occurred: {e}" # Update the compare_texts function def compare_texts(reference_text, transcribed_text): reference_words = reference_text.split() transcribed_words = transcribed_text.split() incorrect_words_audios = [] # Store audio paths for incorrect words sm = difflib.SequenceMatcher(None, reference_text, transcribed_text) similarity_score = round(sm.ratio() * 100, 2) # Construct HTML output html_output = f"Fidelity Class: " if similarity_score >= 85: html_output += f"GOOD (>=85%)
" elif similarity_score >= 70: html_output += f"ACCEPTABLE (70% - 85%)
" elif similarity_score >= 50: html_output += f"NEEDS IMPROVEMENT (50% - 70%)
" else: html_output += f"POOR (<50%)
" html_output += f"Quality Score: {similarity_score}%
" html_output += f"Transcribed Text: {transcribed_text}
" html_output += f"IPA Transcription: {ipa_transcription(reference_text)}
" html_output += "Word Score List:
" # Generate colored word score list and audio links for i, word in enumerate(reference_words): try: if word.lower() == transcribed_words[i].lower(): html_output += f'{word} ' # Correct words in green elif difflib.get_close_matches(word, [transcribed_words[i]]): html_output += f'{word} ' # Close matches in yellow else: # Incorrect words in red html_output += f'{word} ' # Create pronunciation audio for the incorrect word audio_file_path = create_pronunciation_audio(word) incorrect_words_audios.append((word, audio_file_path)) except IndexError: # Word in reference that was not transcribed html_output += f'{word} ' # Provide audio for incorrect words if incorrect_words_audios: html_output += "
Pronunciation for Incorrect Words:
" for word, audio in incorrect_words_audios: up_audio = upfilepath(audio) # Upload the audio audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}" # Use the upload URL html_output += f'{word}: ' html_output += f'
' return [html_output] # Step 4: Text-to-Speech Function def text_to_speech(paragraph): if not paragraph: return None # Handle the case when no text is provided tts = gTTS(paragraph) audio_file_path = "audio/paragraph.mp3" # Save the audio to a file tts.save(audio_file_path) return audio_file_path # Return the file path # Gradio Interface Function def gradio_function(paragraph, audio): # Transcribe the audio transcribed_text = transcribe_audio(audio) # Compare the original paragraph with the transcribed text comparison_result = compare_texts(paragraph, transcribed_text) # Return comparison result return comparison_result # Gradio Interface using the updated API interface = gr.Interface( fn=gradio_function, inputs=[ gr.Textbox(lines=5, label="Input Paragraph"), gr.Audio(type="filepath", label="Record Audio") ], outputs=["html"], title="Speech Recognition Comparison", description="Input a paragraph, record your audio, and compare the transcription to the original text." ) # Gradio Interface for Text-to-Speech tts_interface = gr.Interface( fn=text_to_speech, inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"), outputs=gr.Audio(label="Text-to-Speech Output"), title="Text-to-Speech", description="This tool will read your input paragraph aloud." ) # Combine both interfaces into one demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"]) # Launch Gradio app demo.launch()