mr2along's picture
Update app.py
61c9f90 verified
raw
history blame
3.57 kB
import speech_recognition as sr
import difflib
import gradio as gr
from gtts import gTTS
import os
# Step 1: Transcribe the audio file
def transcribe_audio(audio):
recognizer = sr.Recognizer()
# Convert audio into recognizable format for the Recognizer
audio_file = sr.AudioFile(audio)
with audio_file as source:
audio_data = recognizer.record(source)
try:
# Recognize the audio using Google Web Speech API
transcription = recognizer.recognize_google(audio_data)
return transcription
except sr.UnknownValueError:
return "Google Speech Recognition could not understand the audio"
except sr.RequestError as e:
return f"Error with Google Speech Recognition service: {e}"
# Step 2: Compare the transcribed text with the input paragraph
def compare_texts(reference_text, transcribed_text):
word_scores = []
reference_words = reference_text.split()
transcribed_words = transcribed_text.split()
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
similarity_score = round(sm.ratio() * 100, 2)
# Construct HTML output
html_output = f"<strong>Fidelity Class:</strong> {'CORRECT' if similarity_score > 50 else 'INCORRECT'}<br>"
html_output += f"<strong>Quality Score:</strong> {similarity_score}<br>"
html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
html_output += "<strong>Word Score List:</strong><br>"
# Generate colored word score list
for i, word in enumerate(reference_words):
try:
if word.lower() == transcribed_words[i].lower():
html_output += f'<span style="color: green;">{word}</span> ' # Correct words in green
elif difflib.get_close_matches(word, transcribed_words):
html_output += f'<span style="color: yellow;">{word}</span> ' # Close matches in yellow
else:
html_output += f'<span style="color: red;">{word}</span> ' # Incorrect words in red
except IndexError:
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
return html_output
# Step 3: Text-to-Speech Function
def text_to_speech(paragraph):
tts = gTTS(paragraph)
tts.save("paragraph.mp3")
return "paragraph.mp3"
# Gradio Interface Function
def gradio_function(paragraph, audio):
# Transcribe the audio
transcribed_text = transcribe_audio(audio)
# Compare the original paragraph with the transcribed text
comparison_result = compare_texts(paragraph, transcribed_text)
# Return comparison result
return comparison_result
# Gradio Interface using the updated API
interface = gr.Interface(
fn=gradio_function,
inputs=[
gr.Textbox(lines=5, label="Input Paragraph"),
gr.Audio(type="filepath", label="Record Audio")
],
outputs="html",
title="Speech Recognition Comparison",
description="Input a paragraph, record your audio, and compare the transcription to the original text."
)
# Gradio Interface for Text-to-Speech
tts_interface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
outputs=gr.Audio(label="Text-to-Speech Output", type="filepath"),
title="Text-to-Speech",
description="This tool will read your input paragraph aloud."
)
# Combine both interfaces into one
demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
# Launch Gradio app
demo.launch()