Spaces:
Runtime error
Runtime error
File size: 7,491 Bytes
46e19ef ad60c49 d558c26 85d956d 9586c71 3e9568e 1a15c77 46e19ef 56e0df1 075bc07 56e0df1 c527a66 56e0df1 c527a66 56e0df1 075bc07 8ef9310 0f433ab d558c26 3e9568e d558c26 8ef9310 0498d1c 3e9568e 8ef9310 91a2ea1 1a15c77 3e9568e 0f433ab 290e8e0 6d6d02d 582ce9b 91a2ea1 d558c26 0f433ab d558c26 61c9f90 d558c26 3e9568e d558c26 61c9f90 d558c26 91a2ea1 0f433ab 3e9568e 61c9f90 9586c71 91a2ea1 9586c71 bb765b3 9586c71 bb765b3 91a2ea1 61c9f90 d558c26 91a2ea1 85d956d 290e8e0 79b4e39 3e9568e 290e8e0 85d956d d558c26 8ef9310 d558c26 29d4c2c 8ef9310 d558c26 810fd45 58f3405 d558c26 8ef9310 58f3405 0498d1c 8ef9310 61c9f90 d558c26 85d956d 9586c71 85d956d 8a9dc3b d558c26 85d956d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import os
import requests
import speech_recognition as sr
import difflib
import gradio as gr
from gtts import gTTS
import io
from pydub import AudioSegment
import time
# Create audio directory if it doesn't exist
if not os.path.exists('audio'):
os.makedirs('audio')
def upfilepath():
# URL của tệp âm thanh (nguồn từ internet)
file_url = "https://st.ielts-fighter.com/src/ielts-fighter/2019/09/09/i%20ng%E1%BA%AFn.mp3"
# URL để upload tệp âm thanh
upload_url = "https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id=yw09d367te"
# Tải tệp âm thanh từ link
response = requests.get(file_url)
# Kiểm tra xem tải tệp thành công hay không
if response.status_code == 200:
# Lưu tệp vào bộ nhớ tạm thời
local_filename = "temp_audio_file.mp3"
with open(local_filename, 'wb') as f:
f.write(response.content)
# Mở tệp và gửi POST request để upload
with open(local_filename, 'rb') as audio_file:
files = {'file': audio_file}
upload_response = requests.post(upload_url, files=files)
# Xử lý phản hồi từ server sau khi upload
if upload_response.status_code == 200:
result = upload_response.json()
# Lấy đường dẫn của tệp đã upload
if isinstance(result, list) and result:
file_url = result[0]
extracted_path = os.path.dirname(file_url)
print(f"Đường dẫn tệp đã tách: {extracted_path}")
return extracted_path
else:
print(f"Lỗi khi tải lên: {upload_response.status_code}")
# Xóa tệp tạm nếu cần
os.remove(local_filename)
else:
print(f"Lỗi khi tải tệp từ URL: {response.status_code}")
upfilepath()
# Step 1: Transcribe the audio file
def transcribe_audio(audio):
if audio is None:
return "No audio file provided." # Handle the case when no audio is uploaded
recognizer = sr.Recognizer()
audio_format = audio.split('.')[-1].lower()
# Convert to WAV if the audio is not in a supported format
if audio_format != 'wav':
try:
# Load the audio file with pydub
audio_segment = AudioSegment.from_file(audio)
wav_path = audio.replace(audio_format, 'wav')
audio_segment.export(wav_path, format='wav') # Convert to WAV
audio = wav_path # Update audio path to the converted file
except Exception as e:
return f"Error converting audio: {e}"
# Convert audio into recognizable format for the Recognizer
audio_file = sr.AudioFile(audio)
with audio_file as source:
audio_data = recognizer.record(source)
try:
# Recognize the audio using Google Web Speech API
transcription = recognizer.recognize_google(audio_data)
return transcription
except sr.UnknownValueError:
return "Google Speech Recognition could not understand the audio"
except sr.RequestError as e:
return f"Error with Google Speech Recognition service: {e}"
# Step 2: Create pronunciation audio for incorrect words
def create_pronunciation_audio(word):
time.sleep(5) # Chờ 5 giây
tts = gTTS(word)
audio_file_path = f"audio/{word}.mp3" # Save the audio to a file
tts.save(audio_file_path)
print(f"audio/{word}: {(os.path.abspath(audio_file_path))}")
return audio_file_path # Return the file path of the saved audio
# Step 3: Compare the transcribed text with the input paragraph
def compare_texts(reference_text, transcribed_text):
reference_words = reference_text.split()
transcribed_words = transcribed_text.split()
incorrect_words_audios = [] # Store audio paths for incorrect words
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
similarity_score = round(sm.ratio() * 100, 2)
# Construct HTML output
html_output = f"<strong>Fidelity Class:</strong> {'CORRECT' if similarity_score > 50 else 'INCORRECT'}<br>"
html_output += f"<strong>Quality Score:</strong> {similarity_score}<br>"
html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
html_output += "<strong>Word Score List:</strong><br>"
# Generate colored word score list
for i, word in enumerate(reference_words):
try:
if word.lower() == transcribed_words[i].lower():
html_output += f'<span style="color: green;">{word}</span> ' # Correct words in green
elif difflib.get_close_matches(word, transcribed_words):
html_output += f'<span style="color: yellow;">{word}</span> ' # Close matches in yellow
else:
# Incorrect words in red
html_output += f'<span style="color: red;">{word}</span> '
# Create pronunciation audio for the incorrect word
audio_file_path = create_pronunciation_audio(word)
incorrect_words_audios.append((word, audio_file_path))
except IndexError:
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
# Provide audio for incorrect words
if incorrect_words_audios:
html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
for word, audio in incorrect_words_audios:
suggestion = difflib.get_close_matches(word, reference_words, n=1)
suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
html_output += f'{word}: '
html_output += f'<audio controls><source src="{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
return html_output
# Step 4: Text-to-Speech Function
def text_to_speech(paragraph):
if not paragraph:
return None # Handle the case when no text is provided
tts = gTTS(paragraph)
audio_file_path = "audio/paragraph.mp3" # Save the audio to a file
tts.save(audio_file_path)
return audio_file_path # Return the file path instead of None
# Gradio Interface Function
def gradio_function(paragraph, audio):
# Transcribe the audio
filePath=upfilepath()
transcribed_text = transcribe_audio(audio)
# Compare the original paragraph with the transcribed text
comparison_result = compare_texts(paragraph, transcribed_text)
# Return comparison result
return comparison_result
# Gradio Interface using the updated API
interface = gr.Interface(
fn=gradio_function,
inputs=[
gr.Textbox(lines=5, label="Input Paragraph"),
gr.Audio(type="filepath", label="Record Audio")
],
outputs="html",
title="Speech Recognition Comparison",
description="Input a paragraph, record your audio, and compare the transcription to the original text."
)
# Gradio Interface for Text-to-Speech
tts_interface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
outputs=gr.Audio(label="Text-to-Speech Output"),
title="Text-to-Speech",
description="This tool will read your input paragraph aloud."
)
# Combine both interfaces into one
demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])
# Launch Gradio app
demo.launch()
|