speech_recognize

Runtime error

App Files Files Community

speech_recognize / app.py

mr2along

Update app.py

d558c26 verified 9 months ago

raw

history blame

4.08 kB

	import speech_recognition as sr
	import difflib
	import wave
	import pyaudio
	import gradio as gr

	# Step 1: Record audio
	def record_audio(filename):
	chunk = 1024 # Record in chunks of 1024 samples
	sample_format = pyaudio.paInt16 # 16 bits per sample
	channels = 1
	fs = 44100 # Record at 44100 samples per second
	seconds = 10 # Length of recording

	p = pyaudio.PyAudio() # Create an interface to PortAudio

	print("Recording...")
	stream = p.open(format=sample_format,
	channels=channels,
	rate=fs,
	frames_per_buffer=chunk,
	input=True)

	frames = [] # Initialize array to store frames

	# Store data in chunks for the specified duration
	for _ in range(0, int(fs / chunk * seconds)):
	data = stream.read(chunk)
	frames.append(data)

	# Stop and close the stream
	stream.stop_stream()
	stream.close()
	p.terminate()

	# Save the recorded audio as a WAV file
	wf = wave.open(filename, 'wb')
	wf.setnchannels(channels)
	wf.setsampwidth(p.get_sample_size(sample_format))
	wf.setframerate(fs)
	wf.writeframes(b''.join(frames))
	wf.close()

	print("Recording completed.")

	# Step 2: Transcribe the audio file
	def transcribe_audio(filename):
	recognizer = sr.Recognizer()

	# Open the audio file for transcription
	with sr.AudioFile(filename) as source:
	audio = recognizer.record(source)
	try:
	# Recognize the audio using Google Web Speech API
	print("Transcribing the audio...")
	transcription = recognizer.recognize_google(audio)
	print("Transcription completed.")
	return transcription
	except sr.UnknownValueError:
	print("Google Speech Recognition could not understand the audio")
	return ""
	except sr.RequestError as e:
	print(f"Error with Google Speech Recognition service: {e}")
	return ""

	# Step 3: Compare the transcribed text with the input paragraph
	def compare_texts(reference_text, transcribed_text):
	word_scores = []
	reference_words = reference_text.split()
	transcribed_words = transcribed_text.split()

	sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
	similarity_score = round(sm.ratio() * 100, 2)

	for i, word in enumerate(reference_words):
	try:
	if word.lower() == transcribed_words[i].lower():
	word_scores.append({"word": word, "quality_score": 100})
	else:
	word_scores.append({"word": word, "quality_score": 50}) # Assuming 50 if it's wrong
	except IndexError:
	word_scores.append({"word": word, "quality_score": 0})

	fidelity_class = "CORRECT" if similarity_score > 50 else "INCORRECT"

	output = {
	"quota_remaining": -1,
	"reference_text_from_application": reference_text,
	"status": "success",
	"text_score": {
	"fidelity_class": fidelity_class,
	"quality_score": similarity_score,
	"text": reference_text,
	"transcribedText": transcribed_text,
	"word_score_list": word_scores
	},
	"version": "1.1"
	}

	return output

	# Gradio Interface Function
	def gradio_function(paragraph):
	# Record the audio (the filename will be 'recorded_audio.wav')
	record_audio("recorded_audio.wav")

	# Transcribe the audio
	transcribed_text = transcribe_audio("recorded_audio.wav")

	# Compare the original paragraph with the transcribed text
	comparison_result = compare_texts(paragraph, transcribed_text)

	# Return comparison result
	return comparison_result

	# Gradio Interface
	interface = gr.Interface(
	fn=gradio_function,
	inputs=gr.inputs.Textbox(lines=5, label="Input Paragraph"),
	outputs="json",
	title="Speech Recognition Comparison",
	description="Input a paragraph, record your audio, and compare the transcription to the original text."
	)

	# Launch Gradio app
	interface.launch()