speech_recognize1

Sleeping

App Files Files Community

speech_recognize1 / app.py

mr2along

Update app.py

304ed82 verified 9 months ago

raw

history blame

4.66 kB

	import os
	import speech_recognition as sr
	import difflib
	import gradio as gr
	from transformers import pipeline

	# Tạo thư mục audio nếu chưa tồn tại
	if not os.path.exists('audio'):
	os.makedirs('audio')

	# Bước 1: Chuyển đổi âm thanh thành văn bản
	def transcribe_audio(audio):
	if audio is None:
	return "No audio file provided." # Xử lý trường hợp không có tệp âm thanh

	recognizer = sr.Recognizer()
	audio_file = sr.AudioFile(audio)

	with audio_file as source:
	audio_data = recognizer.record(source)

	try:
	transcription = recognizer.recognize_google(audio_data)
	return transcription
	except sr.UnknownValueError:
	return "Google Speech Recognition could not understand the audio"
	except sr.RequestError as e:
	return f"Error with Google Speech Recognition service: {e}"

	# Bước 2: Tạo âm thanh phát âm cho các từ sai
	def create_pronunciation_audio(word):
	# Cập nhật mô hình ở đây nếu cần
	tts = pipeline("text-to-speech", model="tts_models/en/ljspeech/fastspeech2_hifigan") # Mô hình TTS

	audio_file_path = f"audio/{word}.wav"
	tts(word, output_file=audio_file_path) # Tạo âm thanh từ văn bản
	return audio_file_path

	# Bước 3: So sánh văn bản đã chuyển đổi với đoạn văn bản gốc
	def compare_texts(reference_text, transcribed_text):
	word_scores = []
	reference_words = reference_text.split()
	transcribed_words = transcribed_text.split()
	incorrect_words_audios = [] # Lưu trữ đường dẫn âm thanh cho các từ sai

	sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
	similarity_score = round(sm.ratio() * 100, 2)

	# Tạo đầu ra HTML
	html_output = f"<strong>Fidelity Class:</strong> {'CORRECT' if similarity_score > 50 else 'INCORRECT'}<br>"
	html_output += f"<strong>Quality Score:</strong> {similarity_score}<br>"
	html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
	html_output += "<strong>Word Score List:</strong><br>"

	# Tạo danh sách điểm số từ màu sắc
	for i, word in enumerate(reference_words):
	try:
	if word.lower() == transcribed_words[i].lower():
	html_output += f'<span style="color: green;">{word}</span> ' # Từ đúng màu xanh
	elif difflib.get_close_matches(word, transcribed_words):
	html_output += f'<span style="color: yellow;">{word}</span> ' # Từ gần đúng màu vàng
	else:
	html_output += f'<span style="color: red;">{word}</span> ' # Từ sai màu đỏ
	# Tạo âm thanh phát âm cho từ sai
	audio_file_path = create_pronunciation_audio(word)
	incorrect_words_audios.append((word, audio_file_path))
	except IndexError:
	html_output += f'<span style="color: red;">{word}</span> ' # Từ tham chiếu không được chuyển đổi

	# Cung cấp âm thanh cho các từ sai
	if incorrect_words_audios:
	html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
	for word, audio in incorrect_words_audios:
	html_output += f'{word}: '
	html_output += f'<audio controls><source src="{audio}" type="audio/wav">Your browser does not support the audio tag.</audio><br>'

	return html_output

	# Bước 4: Chức năng Text-to-Speech
	def text_to_speech(paragraph):
	audio_file_path = create_pronunciation_audio(paragraph) # Sử dụng hàm đã sửa
	return audio_file_path

	# Giao diện Gradio
	def gradio_function(paragraph, audio):
	transcribed_text = transcribe_audio(audio)
	comparison_result = compare_texts(paragraph, transcribed_text)
	return comparison_result

	interface = gr.Interface(
	fn=gradio_function,
	inputs=[
	gr.Textbox(lines=5, label="Input Paragraph"),
	gr.Audio(type="filepath", label="Record Audio")
	],
	outputs="html",
	title="Speech Recognition Comparison",
	description="Input a paragraph, record your audio, and compare the transcription to the original text."
	)

	tts_interface = gr.Interface(
	fn=text_to_speech,
	inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
	outputs=gr.Audio(label="Text-to-Speech Output"),
	title="Text-to-Speech",
	description="This tool will read your input paragraph aloud."
	)

	# Kết hợp cả hai giao diện
	demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])

	# Khởi động ứng dụng Gradio
	demo.launch()