import spaces
import torch
import gradio as gr
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import tempfile
import os
import openai
import requests
import json
import time

# Program 1 constants
MODEL_NAME = "openai/whisper-large-v3-turbo"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files

device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# OpenAI API key
api_key = os.getenv('OenAI_API')
if not api_key:
    raise ValueError("請設置 'OPENAI_API_KEY' 環境變數")
openai_api_key = api_key

# Function to handle transcription and share text with other tabs
def transcribe(inputs):
    if inputs is None:
        raise gr.Error("未提供音訊檔案！請在提交請求前上傳或錄製一個音訊檔案。")
    
    if isinstance(inputs, tuple):
        audio_data, sampling_rate = inputs
        inputs = {"array": audio_data, "sampling_rate": sampling_rate}
    elif isinstance(inputs, str):
        # inputs 是 filepath，讀取檔案
        with open(inputs, "rb") as f:
            inputs = f.read()
        # 使用 ffmpeg_read 來解析音訊
        inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
        inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
    else:
        raise gr.Error("無法識別的音訊格式。")

    # 使用 pipeline 來轉錄音訊
    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
    return text

# Enhanced translation function with retry logic
def translate_text(text, target_language):
    if not text:
        return "無轉錄結果，請在前面步驟中提供音訊。"

    prompt_instruction = f"請將以下文字翻譯成{target_language}：\n'{text}'"
    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }

    data = {
        "model": "gpt-4o",
        "messages": [{"role": "user", "content": prompt_instruction}],
        "max_tokens": 500
    }

    # Retry logic for handling rate limits (429 errors)
    for attempt in range(5):  # Retry up to 5 times
        try:
            response = requests.post(url, headers=headers, data=json.dumps(data))
            response.raise_for_status()  # 檢查請求是否成功
            response_json = response.json()

            # 檢查 API 回應是否包含翻譯結果
            if 'choices' in response_json and len(response_json['choices']) > 0:
                return response_json['choices'][0]['message']['content']
            else:
                return "翻譯出錯，無法獲取翻譯結果。請再試一次。"

        except requests.exceptions.HTTPError as http_err:
            if response.status_code == 429:
                # API rate limit reached, wait and retry
                time.sleep(5)  # 等待 5 秒後重試
                continue
            return f"HTTP error occurred: {http_err}"
        except Exception as err:
            return f"發生錯誤：{err}"

    return "超過重試次數，請稍後再試。"

# Gradio Interface
demo = gr.Blocks(theme=gr.themes.Ocean())

# 設置狀態變數用於跨分頁共享數據
state = gr.State()

# 麥克風錄音轉文字
def transcribe_and_save(inputs):
    text = transcribe(inputs)
    state.value = text  # 將文字存儲到 state
    return text

mf_transcribe = gr.Interface(
    fn=transcribe_and_save,
    inputs=gr.Audio(sources="microphone", type="numpy"),  # 確保錄音文件格式為numpy數據
    outputs="text",  
    title="清華多模態Team2_語音轉文字_即時錄音",
    description="使用麥克風錄音轉錄為文字",
    allow_flagging="never",
)

# 音訊檔案上傳轉文字
file_transcribe = gr.Interface(
    fn=transcribe_and_save,
    inputs=gr.Audio(sources="upload", type="filepath", label="音訊檔案"),
    outputs="text",  # 只輸出轉錄結果
    title="清華多模態Team2_語音轉文字_音訊檔案上傳轉錄文字",
    description="上傳音訊檔案轉錄為文字",
    allow_flagging="never",
)

# 聊天與翻譯的幫手
def show_transcription_and_translate(language):
    if state.value:
        translated_text = translate_text(state.value, language)
        return f"轉錄結果：\n'{state.value}'\n翻譯為 {language}：\n{translated_text}"
    else:
        return "無轉錄結果，請在前面步驟中提供音訊。"

chat_interface = gr.Interface(
    fn=show_transcription_and_translate,
    inputs=gr.Dropdown(["English", "Chinese", "Japanese", "Spanish"], label="選擇目標語言"),
    outputs="text",
    title="翻譯小助手",
    description="呈現前面聲音轉錄文字的結果，並自動翻譯成所選語言",
)

with demo:
    gr.TabbedInterface(
        [mf_transcribe, file_transcribe, chat_interface],
        ["麥克風轉錄", "音訊檔案轉錄", "翻譯小助手"]
    )

demo.queue().launch(ssr_mode=False)