import os
import speech_recognition as sr
import difflib
import gradio as gr
from transformers import pipeline

# Tạo thư mục audio nếu chưa tồn tại
if not os.path.exists('audio'):
    os.makedirs('audio')

# Bước 1: Chuyển đổi âm thanh thành văn bản
def transcribe_audio(audio):
    if audio is None:
        return "No audio file provided."  # Xử lý trường hợp không có tệp âm thanh
    
    recognizer = sr.Recognizer()
    audio_file = sr.AudioFile(audio)
    
    with audio_file as source:
        audio_data = recognizer.record(source)

    try:
        transcription = recognizer.recognize_google(audio_data)
        return transcription
    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand the audio"
    except sr.RequestError as e:
        return f"Error with Google Speech Recognition service: {e}"

# Bước 2: Tạo âm thanh phát âm cho các từ sai
def create_pronunciation_audio(word):
    tts = pipeline("text-to-speech", model="tts_models/en/ljspeech/tacotron2")  # Sử dụng pipeline TTS
    audio_file_path = f"audio/{word}.wav"
    tts(word, output_file=audio_file_path)  # Tạo âm thanh từ văn bản
    return audio_file_path

# Bước 3: So sánh văn bản đã chuyển đổi với đoạn văn bản gốc
def compare_texts(reference_text, transcribed_text):
    word_scores = []
    reference_words = reference_text.split()
    transcribed_words = transcribed_text.split()
    incorrect_words_audios = []  # Lưu trữ đường dẫn âm thanh cho các từ sai

    sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
    similarity_score = round(sm.ratio() * 100, 2)

    # Tạo đầu ra HTML
    html_output = f"<strong>Fidelity Class:</strong> {'CORRECT' if similarity_score > 50 else 'INCORRECT'}<br>"
    html_output += f"<strong>Quality Score:</strong> {similarity_score}<br>"
    html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
    html_output += "<strong>Word Score List:</strong><br>"

    # Tạo danh sách điểm số từ màu sắc
    for i, word in enumerate(reference_words):
        try:
            if word.lower() == transcribed_words[i].lower():
                html_output += f'<span style="color: green;">{word}</span> '  # Từ đúng màu xanh
            elif difflib.get_close_matches(word, transcribed_words):
                html_output += f'<span style="color: yellow;">{word}</span> '  # Từ gần đúng màu vàng
            else:
                html_output += f'<span style="color: red;">{word}</span> '  # Từ sai màu đỏ
                # Tạo âm thanh phát âm cho từ sai
                audio_file_path = create_pronunciation_audio(word)
                incorrect_words_audios.append((word, audio_file_path))
        except IndexError:
            html_output += f'<span style="color: red;">{word}</span> '  # Từ tham chiếu không được chuyển đổi

    # Cung cấp âm thanh cho các từ sai
    if incorrect_words_audios:
        html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
        for word, audio in incorrect_words_audios:
            html_output += f'{word}: '
            html_output += f'<audio controls><source src="{audio}" type="audio/wav">Your browser does not support the audio tag.</audio><br>'

    return html_output

# Bước 4: Chức năng Text-to-Speech
def text_to_speech(paragraph):
    audio_file_path = create_pronunciation_audio(paragraph)  # Sử dụng hàm đã sửa
    return audio_file_path

# Giao diện Gradio
def gradio_function(paragraph, audio):
    transcribed_text = transcribe_audio(audio)
    comparison_result = compare_texts(paragraph, transcribed_text)
    return comparison_result

interface = gr.Interface(
    fn=gradio_function, 
    inputs=[
        gr.Textbox(lines=5, label="Input Paragraph"),
        gr.Audio(type="filepath", label="Record Audio")
    ], 
    outputs="html",
    title="Speech Recognition Comparison",
    description="Input a paragraph, record your audio, and compare the transcription to the original text."
)

tts_interface = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(lines=5, label="Input Paragraph to Read Aloud"),
    outputs=gr.Audio(label="Text-to-Speech Output"),
    title="Text-to-Speech",
    description="This tool will read your input paragraph aloud."
)

# Kết hợp cả hai giao diện
demo = gr.TabbedInterface([interface, tts_interface], ["Speech Recognition", "Text-to-Speech"])

# Khởi động ứng dụng Gradio
demo.launch()