mr2along's picture
Update app.py
d558c26 verified
raw
history blame
4.08 kB
import speech_recognition as sr
import difflib
import wave
import pyaudio
import gradio as gr
# Step 1: Record audio
def record_audio(filename):
chunk = 1024 # Record in chunks of 1024 samples
sample_format = pyaudio.paInt16 # 16 bits per sample
channels = 1
fs = 44100 # Record at 44100 samples per second
seconds = 10 # Length of recording
p = pyaudio.PyAudio() # Create an interface to PortAudio
print("Recording...")
stream = p.open(format=sample_format,
channels=channels,
rate=fs,
frames_per_buffer=chunk,
input=True)
frames = [] # Initialize array to store frames
# Store data in chunks for the specified duration
for _ in range(0, int(fs / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
# Stop and close the stream
stream.stop_stream()
stream.close()
p.terminate()
# Save the recorded audio as a WAV file
wf = wave.open(filename, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(sample_format))
wf.setframerate(fs)
wf.writeframes(b''.join(frames))
wf.close()
print("Recording completed.")
# Step 2: Transcribe the audio file
def transcribe_audio(filename):
recognizer = sr.Recognizer()
# Open the audio file for transcription
with sr.AudioFile(filename) as source:
audio = recognizer.record(source)
try:
# Recognize the audio using Google Web Speech API
print("Transcribing the audio...")
transcription = recognizer.recognize_google(audio)
print("Transcription completed.")
return transcription
except sr.UnknownValueError:
print("Google Speech Recognition could not understand the audio")
return ""
except sr.RequestError as e:
print(f"Error with Google Speech Recognition service: {e}")
return ""
# Step 3: Compare the transcribed text with the input paragraph
def compare_texts(reference_text, transcribed_text):
word_scores = []
reference_words = reference_text.split()
transcribed_words = transcribed_text.split()
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
similarity_score = round(sm.ratio() * 100, 2)
for i, word in enumerate(reference_words):
try:
if word.lower() == transcribed_words[i].lower():
word_scores.append({"word": word, "quality_score": 100})
else:
word_scores.append({"word": word, "quality_score": 50}) # Assuming 50 if it's wrong
except IndexError:
word_scores.append({"word": word, "quality_score": 0})
fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT"
output = {
"quota_remaining": -1,
"reference_text_from_application": reference_text,
"status": "success",
"text_score": {
"fidelity_class": fidelity_class,
"quality_score": similarity_score,
"text": reference_text,
"transcribedText": transcribed_text,
"word_score_list": word_scores
},
"version": "1.1"
}
return output
# Gradio Interface Function
def gradio_function(paragraph):
# Record the audio (the filename will be 'recorded_audio.wav')
record_audio("recorded_audio.wav")
# Transcribe the audio
transcribed_text = transcribe_audio("recorded_audio.wav")
# Compare the original paragraph with the transcribed text
comparison_result = compare_texts(paragraph, transcribed_text)
# Return comparison result
return comparison_result
# Gradio Interface
interface = gr.Interface(
fn=gradio_function,
inputs=gr.inputs.Textbox(lines=5, label="Input Paragraph"),
outputs="json",
title="Speech Recognition Comparison",
description="Input a paragraph, record your audio, and compare the transcription to the original text."
)
# Launch Gradio app
interface.launch()