mr2along's picture
Update app.py
006f012 verified
raw
history blame
5.55 kB
# Import required libraries
import os
import requests
import speech_recognition as sr
import difflib
import gradio as gr
from gtts import gTTS
import io
from pydub import AudioSegment
import time
import eng_to_ipa as ipa
# Function to create pronunciation audio
def create_pronunciation_audio(word):
try:
tts = gTTS(word)
audio_file_path = f"audio/{word}.mp3"
tts.save(audio_file_path)
return audio_file_path # Return the local path instead of uploading
except Exception as e:
return f"Failed to create pronunciation audio: {e}"
# Function to upload audio files to the server
def upfilepath(local_filename):
ts = time.time()
upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
files = {'files': open(local_filename, 'rb')}
try:
response = requests.post(upload_url, files=files, timeout=30) # Set timeout (e.g., 30 seconds)
if response.status_code == 200:
result = response.json()
extracted_path = result[0]
return extracted_path
else:
return None
except requests.exceptions.Timeout:
return "Request timed out. Please try again."
except Exception as e:
return f"An error occurred: {e}"
# Update the compare_texts function
def compare_texts(reference_text, transcribed_text):
reference_words = reference_text.split()
transcribed_words = transcribed_text.split()
incorrect_words_audios = [] # Store audio paths for incorrect words
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
similarity_score = round(sm.ratio() * 100, 2)
# Construct HTML output
html_output = f"<strong>Fidelity Class:</strong> "
if similarity_score >= 85:
html_output += f"<strong>GOOD (>=85%)</strong><br>"
elif similarity_score >= 70:
html_output += f"<strong>ACCEPTABLE (70% - 85%)</strong><br>"
elif similarity_score >= 50:
html_output += f"<strong>NEEDS IMPROVEMENT (50% - 70%)</strong><br>"
else:
html_output += f"<strong>POOR (<50%)</strong><br>"
html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>"
html_output += "<strong>Word Score List:</strong><br>"
# Generate colored word score list and audio links
for i, word in enumerate(reference_words):
try:
if word.lower() == transcribed_words[i].lower():
html_output += f'<span style="color: green;">{word}</span> ' # Correct words in green
elif difflib.get_close_matches(word, [transcribed_words[i]]):
html_output += f'<span style="color: yellow;">{word}</span> ' # Close matches in yellow
else:
# Incorrect words in red
html_output += f'<span style="color: red;">{word}</span> '
# Create pronunciation audio for the incorrect word
audio_file_path = create_pronunciation_audio(word)
incorrect_words_audios.append((word, audio_file_path))
except IndexError:
# Word in reference that was not transcribed
html_output += f'<span style="color: red;">{word}</span> '
# Provide audio for incorrect words
if incorrect_words_audios:
html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
for word, audio in incorrect_words_audios:
up_audio = upfilepath(audio) # Upload the audio
audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}" # Use the upload URL
html_output += f'{word}: '
html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
return [html_output]
# Step 4: Text-to-Speech Function
def text_to_speech(paragraph):
if not paragraph:
return None # Handle the case when no text is provided
tts = gTTS(paragraph)
audio_file_path = "audio/paragraph.mp3" # Save the audio to a file
tts.save(audio_file_path)
return audio_file_path # Return the file path
# Gradio Interface Function
def gradio_function(paragraph, audio):
# Transcribe the audio
transcribed_text = transcribe_audio(audio)
# Compare the original paragraph with the transcribed text
comparison_result = compare_texts(paragraph, transcribed_text)
# Return comparison result
return comparison_result
# Gradio Interface using the updated API
interface = gr.Interface(
fn=gradio_function,
inputs=[
gr.Textbox(lines=5, label="Input Paragraph"),
gr.Audio(type="filepath", label="Record Audio")
],
outputs=["html"],
title="Speech Recognition Comparison",
description="Input a paragraph, record your audio, and compare the transcription to the original text."
)
# Gradio Interface for Text-to-Speech
tts_interface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
outputs=gr.Audio(label="Text-to-Speech Output"),
title="Text-to-Speech",
description="This tool will read your input paragraph aloud."
)
# Combine both interfaces into one
demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
# Launch Gradio app
demo.launch()