Spaces:
Runtime error
Runtime error
File size: 3,569 Bytes
d558c26 85d956d d558c26 8ef9310 d558c26 8ef9310 0498d1c 8ef9310 d558c26 61c9f90 d558c26 61c9f90 d558c26 61c9f90 d558c26 61c9f90 d558c26 85d956d d558c26 8ef9310 d558c26 8ef9310 d558c26 58f3405 d558c26 8ef9310 58f3405 0498d1c 8ef9310 61c9f90 d558c26 85d956d d558c26 85d956d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import speech_recognition as sr
import difflib
import gradio as gr
from gtts import gTTS
import os
# Step 1: Transcribe the audio file
def transcribe_audio(audio):
recognizer = sr.Recognizer()
# Convert audio into recognizable format for the Recognizer
audio_file = sr.AudioFile(audio)
with audio_file as source:
audio_data = recognizer.record(source)
try:
# Recognize the audio using Google Web Speech API
transcription = recognizer.recognize_google(audio_data)
return transcription
except sr.UnknownValueError:
return "Google Speech Recognition could not understand the audio"
except sr.RequestError as e:
return f"Error with Google Speech Recognition service: {e}"
# Step 2: Compare the transcribed text with the input paragraph
def compare_texts(reference_text, transcribed_text):
word_scores = []
reference_words = reference_text.split()
transcribed_words = transcribed_text.split()
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
similarity_score = round(sm.ratio() * 100, 2)
# Construct HTML output
html_output = f"<strong>Fidelity Class:</strong> {'CORRECT' if similarity_score > 50 else 'INCORRECT'}<br>"
html_output += f"<strong>Quality Score:</strong> {similarity_score}<br>"
html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
html_output += "<strong>Word Score List:</strong><br>"
# Generate colored word score list
for i, word in enumerate(reference_words):
try:
if word.lower() == transcribed_words[i].lower():
html_output += f'<span style="color: green;">{word}</span> ' # Correct words in green
elif difflib.get_close_matches(word, transcribed_words):
html_output += f'<span style="color: yellow;">{word}</span> ' # Close matches in yellow
else:
html_output += f'<span style="color: red;">{word}</span> ' # Incorrect words in red
except IndexError:
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
return html_output
# Step 3: Text-to-Speech Function
def text_to_speech(paragraph):
tts = gTTS(paragraph)
tts.save("paragraph.mp3")
return "paragraph.mp3"
# Gradio Interface Function
def gradio_function(paragraph, audio):
# Transcribe the audio
transcribed_text = transcribe_audio(audio)
# Compare the original paragraph with the transcribed text
comparison_result = compare_texts(paragraph, transcribed_text)
# Return comparison result
return comparison_result
# Gradio Interface using the updated API
interface = gr.Interface(
fn=gradio_function,
inputs=[
gr.Textbox(lines=5, label="Input Paragraph"),
gr.Audio(type="filepath", label="Record Audio")
],
outputs="html",
title="Speech Recognition Comparison",
description="Input a paragraph, record your audio, and compare the transcription to the original text."
)
# Gradio Interface for Text-to-Speech
tts_interface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
outputs=gr.Audio(label="Text-to-Speech Output", type="filepath"),
title="Text-to-Speech",
description="This tool will read your input paragraph aloud."
)
# Combine both interfaces into one
demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
# Launch Gradio app
demo.launch()
|