import gradio as gr
import pandas as pd
import requests
from bs4 import BeautifulSoup
from docx import Document
import os
from openai import OpenAI
import json

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound


from moviepy.editor import VideoFileClip
from pytube import YouTube
import os

from google.cloud import storage
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.http import MediaIoBaseUpload

import io


from urllib.parse import urlparse, parse_qs


# 假设您的环境变量或Secret的名称是GOOGLE_APPLICATION_CREDENTIALS_JSON
# credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
# credentials_dict = json.loads(credentials_json_string)
# SCOPES = ['https://www.googleapis.com/auth/drive']
# credentials = service_account.Credentials.from_service_account_info(
#         credentials_dict, scopes=SCOPES)
# service = build('drive', 'v3', credentials=credentials)
# # 列出 Google Drive 上的前10個文件
# results = service.files().list(pageSize=10, fields="nextPageToken, files(id, name)").execute()
# items = results.get('files', [])

# if not items:
#     print('No files found.')
# else:
#     print("=====Google Drive 上的前10個文件=====")
#     print('Files:')
#     for item in items:
#         print(u'{0} ({1})'.format(item['name'], item['id']))


OUTPUT_PATH = 'videos'
TRANSCRIPTS = []
CURRENT_INDEX = 0
VIDEO_ID = ""

OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
client = OpenAI(api_key=OPEN_AI_KEY)
DRIVE_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
GCS_KEY = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")

# ====gcs====
def init_gcs_client(service_account_key_string):
    """使用服务账号密钥文件创建 GCS 客户端"""
    credentials_json_string = service_account_key_string
    credentials_dict = json.loads(credentials_json_string)
    credentials = service_account.Credentials.from_service_account_info(credentials_dict)
    gcs_client = storage.Client(credentials=credentials, project=credentials_dict['project_id'])
    return gcs_client

def gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, folder_name):
    """检查是否存在特定名称的文件夹（前缀），如果不存在则创建一个标记文件来模拟文件夹"""
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(folder_name)
    if not blob.exists():
        blob.upload_from_string('', content_type='application/x-www-form-urlencoded;charset=UTF-8')
        print(f"GCS Folder '{folder_name}' created.")
    else:
        print(f"GCS Folder '{folder_name}' already exists.")

def gcs_check_folder_exists(gcs_client, bucket_name, folder_name):
    """检查 GCS 存储桶中是否存在指定的文件夹"""
    bucket = gcs_client.bucket(bucket_name)
    blobs = list(bucket.list_blobs(prefix=folder_name))
    return len(blobs) > 0

def gcs_check_file_exists(gcs_client, bucket_name, file_name):
    """
    检查 GCS 存储桶中是否存在指定的文件
    file_name 格式：{folder_name}/{file_name}
    """
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(file_name)
    return blob.exists()

def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, source_file_name):
    """上传文件到指定的 GCS 存储桶"""
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print(f"File {source_file_name} uploaded to {destination_blob_name}.")

def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
    """从 GCS 下载文件内容到字符串"""
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    return blob.download_as_text()

def make_blob_public(gcs_client, bucket_name, blob_name):
    """将指定的 GCS 对象设置为公共可读"""
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.make_public()
    print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")

def get_blob_public_url(gcs_client, bucket_name, blob_name):
    """获取指定 GCS 对象的公开 URL"""
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    return blob.public_url

def upload_img_and_get_public_url(gcs_client, bucket_name, file_name, file_path):
    """上传图片到 GCS 并获取其公开 URL"""
    # 上传图片
    upload_file_to_gcs(gcs_client, bucket_name, file_name, file_path)
    # 将上传的图片设置为公开
    make_blob_public(gcs_client, bucket_name, file_name)
    # 获取图片的公开 URL
    public_url = get_blob_public_url(gcs_client, bucket_name, file_name)
    print(f"Public URL for the uploaded image: {public_url}")
    return public_url

def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
    # Get all files from the folder
    query = f"'{drive_folder_id}' in parents and trashed = false"
    response = drive_service.files().list(q=query).execute()
    files = response.get('files', [])
    for file in files:
        # Copy each file to GCS
        file_id = file['id']
        file_name = file['name']
        gcs_destination_path = f"{gcs_folder_name}/{file_name}"
        copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)

def copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path):
    # Download file content from Drive
    request = drive_service.files().get_media(fileId=file_id)
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while not done:
        status, done = downloader.next_chunk()
    fh.seek(0)
    file_content = fh.getvalue()

    # Upload file content to GCS
    bucket = gcs_client.bucket(bucket_name)
    blob = bucket.blob(gcs_destination_path)
    blob.upload_from_string(file_content)
    print(f"File {file_id} copied to GCS at {gcs_destination_path}.")

# # ====drive====初始化
def init_drive_service():
    credentials_json_string = DRIVE_KEY
    credentials_dict = json.loads(credentials_json_string)
    SCOPES = ['https://www.googleapis.com/auth/drive']
    credentials = service_account.Credentials.from_service_account_info(
            credentials_dict, scopes=SCOPES)
    service = build('drive', 'v3', credentials=credentials)
    return service

def create_folder_if_not_exists(service, folder_name, parent_id):
    print("检查是否存在特定名称的文件夹，如果不存在则创建")
    query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false"
    response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute()
    folders = response.get('files', [])
    if not folders:
        # 文件夹不存在，创建新文件夹
        file_metadata = {
            'name': folder_name,
            'mimeType': 'application/vnd.google-apps.folder',
            'parents': [parent_id]
        }
        folder = service.files().create(body=file_metadata, fields='id').execute()
        return folder.get('id')
    else:
        # 文件夹已存在
        return folders[0]['id']

# 检查Google Drive上是否存在文件
def check_file_exists(service, folder_name, file_name):
    query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
    response = service.files().list(q=query).execute()
    files = response.get('files', [])
    return len(files) > 0, files[0]['id'] if files else None

def upload_content_directly(service, file_name, folder_id, content):
    """
    直接将内容上传到Google Drive中的新文件。
    """
    if not file_name:
        raise ValueError("文件名不能为空")
    if not folder_id:
        raise ValueError("文件夹ID不能为空")
    if content is None:  # 允许空字符串上传，但不允许None
        raise ValueError("内容不能为空")
    
    file_metadata = {'name': file_name, 'parents': [folder_id]}
    # 使用io.BytesIO为文本内容创建一个内存中的文件对象
    try:
        with io.BytesIO(content.encode('utf-8')) as fh:
            media = MediaIoBaseUpload(fh, mimetype='text/plain', resumable=True)
            
            print("==content==")
            print(content)
            print("==content==")

            print("==media==")
            print(media)
            print("==media==")
            # 执行上传
            file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
            return file.get('id')
    except Exception as e:
        print(f"上传文件时发生错误: {e}")
        raise  # 重新抛出异常，调用者可以根据需要处理或忽略

def upload_file_directly(service, file_name, folder_id, file_path):
    # 上傳 .json to Google Drive
    file_metadata = {'name': file_name, 'parents': [folder_id]}
    media = MediaFileUpload(file_path, mimetype='application/json')
    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    
    
    # return file.get('id')  # 返回文件ID
    return True

def upload_img_directly(service, file_name, folder_id, file_path):
    file_metadata = {'name': file_name, 'parents': [folder_id]}
    media = MediaFileUpload(file_path, mimetype='image/jpeg')
    file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    return file.get('id')  # 返回文件ID

def download_file_as_string(service, file_id):
    """
    从Google Drive下载文件并将其作为字符串返回。
    """
    request = service.files().get_media(fileId=file_id)
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
    fh.seek(0)
    content = fh.read().decode('utf-8')
    return content

def set_public_permission(service, file_id):
    service.permissions().create(
        fileId=file_id,
        body={"type": "anyone", "role": "reader"},
        fields='id',
    ).execute()

def update_file_on_drive(service, file_id, file_content):
    """
    更新Google Drive上的文件内容。
    
    参数:
    - service: Google Drive API服务实例。
    - file_id: 要更新的文件的ID。
    - file_content: 新的文件内容，字符串格式。
    """
    # 将新的文件内容转换为字节流
    fh = io.BytesIO(file_content.encode('utf-8'))
    media = MediaIoBaseUpload(fh, mimetype='application/json', resumable=True)
    
    # 更新文件
    updated_file = service.files().update(
        fileId=file_id,
        media_body=media
    ).execute()
    
    print(f"文件已更新，文件ID: {updated_file['id']}")


# ====drive====

def process_file(file):
    # 读取文件
    if file.name.endswith('.csv'):
        df = pd.read_csv(file)
        text = df_to_text(df)
    elif file.name.endswith('.xlsx'):
        df = pd.read_excel(file)
        text = df_to_text(df)
    elif file.name.endswith('.docx'):
        text = docx_to_text(file)
    else:
        raise ValueError("Unsupported file type")

    df_string = df.to_string()
    # 宜蘭：移除@XX@符号 to |
    df_string = df_string.replace("@XX@", "|")

    # 根据上传的文件内容生成问题
    questions = generate_questions(df_string)
    summary = generate_summarise(df_string)

    # 返回按钮文本和 DataFrame 字符串
    return questions[0] if len(questions) > 0 else "", \
           questions[1] if len(questions) > 1 else "", \
           questions[2] if len(questions) > 2 else "", \
           summary, \
           df_string

def df_to_text(df):
    # 将 DataFrame 转换为纯文本
    return df.to_string()

def docx_to_text(file):
    # 将 Word 文档转换为纯文本
    doc = Document(file)
    return "\n".join([para.text for para in doc.paragraphs])

def format_seconds_to_time(seconds):
    """将秒数格式化为 时:分:秒 的形式"""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int(seconds % 60)
    return f"{hours:02}:{minutes:02}:{seconds:02}"

def extract_youtube_id(url):
    parsed_url = urlparse(url)
    
    if "youtube.com" in parsed_url.netloc:
        # 对于标准链接，视频ID在查询参数'v'中
        query_params = parse_qs(parsed_url.query)
        return query_params.get("v")[0] if "v" in query_params else None
    elif "youtu.be" in parsed_url.netloc:
        # 对于短链接，视频ID是路径的一部分
        return parsed_url.path.lstrip('/')
    else:
        return None

def get_transcript(video_id):
    languages = ['zh-TW', 'zh-Hant', 'en']  # 優先順序列表
    for language in languages:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
            return transcript  # 成功獲取字幕，直接返回結果
        except NoTranscriptFound:
            continue  # 當前語言的字幕沒有找到，繼續嘗試下一個語言
    return None  # 所有嘗試都失敗，返回None

def process_transcript_and_screenshots(video_id):
    print("====process_transcript_and_screenshots====")

    # Drive
    service = init_drive_service()
    parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
    folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
    
    # 逐字稿文件名
    file_name = f'{video_id}_transcript.json'
    # 检查逐字稿是否存在
    exists, file_id = check_file_exists(service, folder_id, file_name)
    if not exists:
        # 从YouTube获取逐字稿并上传
        transcript = get_transcript(video_id)
        if transcript:
            print("成功獲取字幕")
        else:
            print("沒有找到字幕")
        transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
        file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
        print("逐字稿已上传到Google Drive")
    else:
        # 逐字稿已存在，下载逐字稿内容
        print("逐字稿已存在于Google Drive中")
        transcript_text = download_file_as_string(service, file_id)
        transcript = json.loads(transcript_text)

    # 处理逐字稿中的每个条目，检查并上传截图
    for entry in transcript:
        if 'img_file_id' not in entry:
            screenshot_path = screenshot_youtube_video(video_id, entry['start'])
            img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
            set_public_permission(service, img_file_id)
            entry['img_file_id'] = img_file_id
            print(f"截图已上传到Google Drive: {img_file_id}")
    
    # 更新逐字稿文件
    updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
    update_file_on_drive(service, file_id, updated_transcript_text)
    print("逐字稿已更新，包括截图链接")

    # init gcs client
    gcs_client = init_gcs_client(GCS_KEY)
    bucket_name = 'video_ai_assistant'
    # 检查 folder 是否存在
    is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
    if not is_gcs_exists:
        gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
        copy_all_files_from_drive_to_gcs(service, gcs_client, folder_id, bucket_name, video_id)
        print("Drive file 已上传到GCS")
    else:
        print("GCS folder:{video_id} 已存在")

    return transcript

def process_transcript_and_screenshots_on_gcs(video_id):
    print("====process_transcript_and_screenshots_on_gcs====")
    # GCS
    gcs_client = init_gcs_client(GCS_KEY)
    bucket_name = 'video_ai_assistant'
    # 检查 folder 是否存在
    is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
    if not is_gcs_exists:
        gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
        print("GCS folder:{video_id} 已创建")
    else:
        print("GCS folder:{video_id} 已存在")

    # 逐字稿文件名
    file_name = f'{video_id}_transcript.json'
    # 检查逐字稿是否存在
    exists = gcs_check_file_exists(gcs_client, bucket_name, file_name)
    if not exists:
        # 从YouTube获取逐字稿并上传
        transcript = get_transcript(video_id)
        if transcript:
            print("成功獲取字幕")
        else:
            print("沒有找到字幕")
        transcript
    
    # 处理逐字稿中的每个条目，检查并上传截图 到 GCS，然後設定 GCS 權限
    for entry in transcript:
        if 'img_file_id' not in entry:
            screenshot_path = screenshot_youtube_video(video_id, entry['start'])
            img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
            entry['img_file_id'] = img_file_id
            print(f"截图已上传到GCS: {img_file_id}")
        
    # 更新逐字稿文件
    updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
    upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
    print("逐字稿已更新，包括截图链接")

    return transcript


def process_youtube_link(link):
    # 使用 YouTube API 获取逐字稿
    # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
    video_id = extract_youtube_id(link)
    global VIDEO_ID
    VIDEO_ID = video_id
    download_youtube_video(video_id, output_path=OUTPUT_PATH)
    
    try:
        # transcript = process_transcript_and_screenshots(video_id)
        transcript = process_transcript_and_screenshots_on_gcs(video_id)
    except Exception as e:
        error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
        print("===process_youtube_link error===")
        print(error_msg)
        raise gr.Error(error_msg)

    formatted_transcript = []
    formatted_simple_transcript =[]
    screenshot_paths = []
    for entry in transcript:
        start_time = format_seconds_to_time(entry['start'])
        end_time = format_seconds_to_time(entry['start'] + entry['duration'])
        embed_url = get_embedded_youtube_link(video_id, entry['start'])
        img_file_id = entry['img_file_id']
        screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
        line = {
            "start_time": start_time,
            "end_time": end_time,
            "text": entry['text'],
            "embed_url": embed_url,
            "screenshot_path": screenshot_path
        }
        formatted_transcript.append(line)
        # formatted_simple_transcript 只要 start_time, end_time, text
        simple_line = {
            "start_time": start_time,
            "end_time": end_time,
            "text": entry['text']
        }
        formatted_simple_transcript.append(simple_line)
        screenshot_paths.append(screenshot_path)

    global TRANSCRIPTS
    TRANSCRIPTS = formatted_transcript

    # 基于逐字稿生成其他所需的输出
    questions = get_questions(video_id, formatted_simple_transcript)
    formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
    summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
    summary = summary_json["summary"]
    html_content = format_transcript_to_html(formatted_transcript)
    simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
    first_image = formatted_transcript[0]['screenshot_path']
    first_text = formatted_transcript[0]['text']
    mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
    mind_map = mind_map_json["mind_map"]
    mind_map_html = get_mind_map_html(mind_map)

    # 确保返回与 UI 组件预期匹配的输出
    return questions[0] if len(questions) > 0 else "", \
            questions[1] if len(questions) > 1 else "", \
            questions[2] if len(questions) > 2 else "", \
            formatted_transcript_json, \
            summary, \
            mind_map, \
            mind_map_html, \
            html_content, \
            simple_html_content, \
            first_image, \
            first_text,
            

def format_transcript_to_html(formatted_transcript):
    html_content = ""
    for entry in formatted_transcript:
        html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
        html_content += f"<p>{entry['text']}</p>"
        html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
    return html_content

def format_simple_transcript_to_html(formatted_transcript):
    html_content = ""
    for entry in formatted_transcript:
        html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
        html_content += f"<p>{entry['text']}</p>"
    return html_content

def get_embedded_youtube_link(video_id, start_time):
    int_start_time = int(start_time)
    embed_url = f"https://www.youtube.com/embed/{video_id}?start={int_start_time}&autoplay=1"
    return embed_url

def download_youtube_video(youtube_id, output_path=OUTPUT_PATH):
    # Construct the full YouTube URL
    youtube_url = f'https://www.youtube.com/watch?v={youtube_id}'

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # Download the video
    yt = YouTube(youtube_url)
    video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
    video_stream.download(output_path=output_path, filename=youtube_id+".mp4")

    print(f"Video downloaded successfully: {output_path}/{youtube_id}.mp4")


def screenshot_youtube_video(youtube_id, snapshot_sec):    
    video_path = f'{OUTPUT_PATH}/{youtube_id}.mp4'
    file_name = f"{youtube_id}_{snapshot_sec}.jpg"
    with VideoFileClip(video_path) as video:
        screenshot_path = f'{OUTPUT_PATH}/{file_name}'
        video.save_frame(screenshot_path, snapshot_sec)
    
    return screenshot_path

def process_web_link(link):
    # 抓取和解析网页内容
    response = requests.get(link)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup.get_text()

def get_mind_map(video_id, df_string):
    # 先抓 g drive 看看有沒有 {video_id}_mind_map.json
    print("===get_mind_map===")
    service = init_drive_service()
    parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
    folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
    file_name = f'{video_id}_mind_map.json'

    # 检查檔案是否存在
    exists, file_id = check_file_exists(service, folder_id, file_name)
    if not exists:
        mind_map = generate_mind_map(df_string)
        mind_map_json = {"mind_map": str(mind_map)}
        mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
        upload_content_directly(service, file_name, folder_id, mind_map_text)
        print("mind_map已上傳到Google Drive")
    else:
        # mindmap已存在，下载内容
        print("mind_map已存在于Google Drive中")
        mind_map_text = download_file_as_string(service, file_id)
        mind_map_json = json.loads(mind_map_text)

    return mind_map_json

def generate_mind_map(df_string):
    # 使用 OpenAI 生成基于上传数据的问题
    sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，使用 zh-TW"
    user_content = f"""
        請根據 {df_string} 文本建立 markdown 心智圖
        注意：不需要前後文敘述，直接給出 markdown 文本即可
        這對我很重要
    """
    messages = [
        {"role": "system", "content": sys_content},
        {"role": "user", "content": user_content}
    ]

    request_payload = {
        "model": "gpt-4-1106-preview",
        "messages": messages,
        "max_tokens": 4000,
    }

    response = client.chat.completions.create(**request_payload)
    mind_map = response.choices[0].message.content.strip()
    print("=====mind_map=====")
    print(mind_map)
    print("=====mind_map=====")

    return mind_map

def get_mind_map_html(mind_map):
    mind_map_markdown = mind_map.replace("```markdown", "").replace("```", "")

    mind_map_html = f"""
    <div class="markmap">
        <script type="text/template">
            {mind_map_markdown}
        </script>
    </div>
    """
    return mind_map_html

def processed_video_summary_to_json(summary):
    """
        整體格式為：
        1. 內容類型
        2. 整體摘要
        3. 條列式重點
        4. 關鍵時刻（段落摘要）
        5. 結論反思（為什麼我們要學這個？）
        6. 延伸小問題

        使用 regex 拆解 summary 抓取各個部分

        example:
        1. 內容類型：影片類型\n\n2. 整體摘要\n本段影片透過一組劇情式的場景講述，描述了一群人物進行VR教育體驗的故事，涵蓋了冒險、探索、學習和救援等元素。影片同時充分融合了互動問答和地理科學知識，並對南極和北極的地理環境、生態系統以及國際政治局勢進行了介紹。\n\n3. 條列式重點\n- VR教育體驗的場景設置。\n- 冒險遊戲中融入地理科學知識。\n- 南北極的環境差異和重要性。\n- 介紹了南極條約的內容。\n- 探討全球暖化對極地生物的影響。\n\n4. 關鍵時刻（段落摘要）\n【00:00:05 - 00:00:21】: 一群人物於周末下午前往VR教育體驗館，選擇了\"極地探險\"遊戲，透過體感裝置體驗寒冷和震動。\n【00:00:34 - 00:02:00】: 故事背景設置，玩家需解開科學家R的神秘失蹤之謎，在極地嚴酷環境中尋找線索。期間介紹了南極的地理氣候和與英國的關係。\n【00:03:08 - 00:03:23】: 透過與店員互動回答來隱喻人工智慧語言模型的限制，並提示玩家搜集線索進行冒險。\n【00:04:41 - 00:05:03】: 發現北極熊的照片，玩家決定行動至北極，並提供了地形知識問答。\n【00:06:01 - 00:08:11】: 揭露科學家R被綁架的原因，並討論了北極的政治和經濟重要性。最後強調保護環境的重要性。\n\n5. 結論反思（為什麼我們要學這個？）\n通過這個故事情節，學習者不僅能夠體驗虛擬實境的樂趣，也能夠學習到關於地理、生態、以及環境保護的知識，提高對全球環境議題的認知和理解。它教導我們通過娛樂來學習如何關懷地球的未來，同時也啟發了對於科學研究與國際政治的基本認知。\n\n6. 延伸小問題\n- 你認為VR遊戲在教育上有哪些潛力？\n- 與南極相比，為什麼北極會成為各國政治和經濟角力的場所？\n- 全球暖化對極地動物的生存造成了哪些影響？我們能做些什麼來幫助改善這種情況？
    """
    # 1. 內容類型 ->  針對 1. 內容類型：？ 進行處理
    content_type = summary.split("1. 內容類型：")[1].split("\n")[0].strip()
    # 2. 整體摘要
    overall_summary = summary.split("2. 整體摘要")[1].split("\n\n")[1].strip()
    # 3. 條列式重點
    key_points = summary.split("3. 條列式重點")[1].split("\n\n")[1].strip()
    # 4. 關鍵時刻（段落摘要）
    key_moments = summary.split("4. 關鍵時刻（段落摘要）")[1].split("\n\n")[1].strip()
    # 5. 結論反思（為什麼我們要學這個？）
    conclusion_reflection = summary.split("5. 結論反思（為什麼我們要學這個？）")[1].split("\n\n")[1].strip()
    # 6. 延伸小問題
    extension_questions = summary.split("6. 延伸小問題")[1].split("\n\n")[1].strip()

    summary_json = {
        "content_type": content_type,
        "overall_summary": overall_summary,
        "key_points": key_points,
        "key_moments": key_moments,
        "conclusion_reflection": conclusion_reflection,
        "extension_questions": extension_questions
    }
    print("===processed_video_summary_to_json===")
    print(summary_json)
    print("===processed_video_summary_to_json===")

    return summary_json

# get video_id_summary.json content
def get_video_id_summary(video_id, df_string):
    print("===get_video_id_summary===")
    service = init_drive_service()
    parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
    folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
    file_name = f'{video_id}_summary.json'

    # 检查逐字稿是否存在
    exists, file_id = check_file_exists(service, folder_id, file_name)
    if not exists:
        summary = generate_summarise(df_string)
        # processed_summary = processed_video_summary_to_json(summary)
        summary_json = {"summary": str(summary)}
        summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
        
        try:
            upload_content_directly(service, file_name, folder_id, summary_text)
            print("summary已上傳到Google Drive")

        except Exception as e:
            error_msg = f" {video_id} 摘要錯誤: {str(e)}"
            print("===get_video_id_summary error===")
            print(error_msg)
            print("===get_video_id_summary error===")


        # 存在 local at OUTPUT_PATH as {video_id}_summary.json
        # with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
        #     f.write(summary_text)
        #     print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
        # file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')


    else:
        # 逐字稿已存在，下载逐字稿内容
        print("summary已存在Google Drive中")
        summary_text = download_file_as_string(service, file_id)
        summary_json = json.loads(summary_text)
    
    return summary_json
    

def generate_summarise(df_string):
    # 使用 OpenAI 生成基于上传数据的问题
    sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，使用 zh-TW"
    user_content = f"""
        請根據 {df_string}，判斷這份文本
        如果是資料類型，請提估欄位敘述、資料樣態與資料分析，告訴學生這張表的意義，以及可能的結論與對應方式
        
        如果是影片類型，請提估影片內容，告訴學生這部影片的意義，
        小範圍切出不同段落的相對應時間軸的重點摘要，最多不超過五段
        注意不要遺漏任何一段時間軸的內容
        格式為 【start - end】: 摘要
        以及可能的結論與結尾延伸小問題提供學生作反思

        整體格式為：
        🗂️ 1. 內容類型：？
        📚 2. 整體摘要
        🔖 3. 條列式重點
        🔑 4. 關鍵時刻（段落摘要）
        💡 5. 結論反思（為什麼我們要學這個？）
        ❓ 6. 延伸小問題
    """

    # 🗂️ 1. 內容類型：？
    #     📚 2. 整體摘要
    #     🔖 3. 條列式重點
    #     🔑 4. 關鍵時刻（段落摘要）
    #     💡 5. 結論反思（為什麼我們要學這個？）
    #     ❓ 6. 延伸小問題

    messages = [
        {"role": "system", "content": sys_content},
        {"role": "user", "content": user_content}
    ]

    request_payload = {
        "model": "gpt-4-turbo-preview",
        "messages": messages,
        "max_tokens": 4000,
    }

    response = client.chat.completions.create(**request_payload)
    df_summarise = response.choices[0].message.content.strip()
    print("=====df_summarise=====")
    print(df_summarise)
    print("=====df_summarise=====")

    return df_summarise


def generate_questions(df_string):
    # 使用 OpenAI 生成基于上传数据的问题

    sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，並用既有資料為本質猜測用戶可能會問的問題，使用 zh-TW"
    user_content = f"請根據 {df_string} 生成三個問題，並用 JSON 格式返回 questions:[q1的敘述text, q2的敘述text, q3的敘述text]"
    messages = [
        {"role": "system", "content": sys_content},
        {"role": "user", "content": user_content}
    ]
    response_format = { "type": "json_object" }

    print("=====messages=====")
    print(messages)
    print("=====messages=====")


    request_payload = {
        "model": "gpt-4-1106-preview",
        "messages": messages,
        "max_tokens": 4000,
        "response_format": response_format
    }

    response = client.chat.completions.create(**request_payload)
    questions = json.loads(response.choices[0].message.content)["questions"]
    print("=====json_response=====")
    print(questions)
    print("=====json_response=====")

    return questions

def get_questions(video_id, df_string):
    # 去 g drive 確認是有有 video_id_questions.json
    print("===get_questions===")
    service = init_drive_service()
    parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
    folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
    file_name = f'{video_id}_questions.json'

    # 检查檔案是否存在
    exists, file_id = check_file_exists(service, folder_id, file_name)
    if not exists:
        questions = generate_questions(df_string)
        questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
        upload_content_directly(service, file_name, folder_id, questions_text)
        print("questions已上傳到Google Drive")
    else:
        # 逐字稿已存在，下载逐字稿内容
        print("questions已存在于Google Drive中")
        questions_text = download_file_as_string(service, file_id)
        questions = json.loads(questions_text)

    q1 = questions[0] if len(questions) > 0 else ""
    q2 = questions[1] if len(questions) > 1 else ""
    q3 = questions[2] if len(questions) > 2 else ""
    print("=====get_questions=====")
    print(f"q1: {q1}")
    print(f"q2: {q2}")
    print(f"q3: {q3}")
    print("=====get_questions=====")
    return q1, q2, q3

def change_questions(df_string):
    questions = generate_questions(df_string)
    q1 = questions[0] if len(questions) > 0 else ""
    q2 = questions[1] if len(questions) > 1 else ""
    q3 = questions[2] if len(questions) > 2 else ""
    print("=====get_questions=====")
    print(f"q1: {q1}")
    print(f"q2: {q2}")
    print(f"q3: {q3}")
    print("=====get_questions=====")
    return q1, q2, q3

def respond(user_message, df_string_output, chat_history, socratic_mode=False):
    print("=== 變數：user_message ===")
    print(user_message)
    print("=== 變數：chat_history ===")
    print(chat_history)

    data = df_string_output
    for entry in data:
        entry.pop('embed_url', None)  # Remove 'embed_url' if it exists
        entry.pop('screenshot_path', None) 

    if socratic_mode:
        sys_content = f"""
            你是一個擅長資料分析跟影片教學的老師，user 為學生
            請用 {data} 為資料文本，自行判斷資料的種類，
            並進行對話，使用 zh-TW

            如果是影片類型，不用解釋逐字稿格式，直接回答學生問題
            請你用蘇格拉底式的提問方式，引導學生思考，並且給予學生一些提示
            不要直接給予答案，讓學生自己思考
            但可以給予一些提示跟引導，例如給予影片的時間軸，讓學生自己去找答案

            如果學生問了一些問題你無法判斷，請告訴學生你無法判斷，並建議學生可以問其他問題
            或者你可以問學生一些問題，幫助學生更好的理解資料

            如果學生的問題與資料文本無關，請告訴學生你無法回答超出範圍的問題

            最後，在你回答的開頭標註【蘇格拉底助教】
        """
    else:
        sys_content = f"""
            你是一個擅長資料分析跟影片教學的老師，user 為學生
            請用 {data} 為資料文本，自行判斷資料的種類，
            並進行對話，使用 zh-TW

            如果是影片類型，不用解釋逐字稿格式，直接回答學生問題
            但可以給予一些提示跟引導，例如給予影片的時間軸，讓學生可以找到相對應的時間點

            如果學生問了一些問題你無法判斷，請告訴學生你無法判斷，並建議學生可以問其他問題
            或者你可以問學生一些問題，幫助學生更好的理解資料

            如果學生的問題與資料文本無關，請告訴學生你無法回答超出範圍的問題
        """

    print("=== socratic_mode ===")
    print(socratic_mode)
    print("=== socratic_mode ===")

    print("=== sys_content ===")
    print(sys_content)
    print("=== sys_content ===")


    messages = [
        {"role": "system", "content": sys_content}
    ]

    # if chat_history is not none, append role, content to messages
    # chat_history = [(user, assistant), (user, assistant), ...]
    # In the list, first one is user, then assistant
    if chat_history is not None:
        # 如果超過10則訊息，只保留最後10則訊息
        if len(chat_history) > 10:
            chat_history = chat_history[-10:]

        for chat in chat_history:
            old_messages = [
                {"role": "user", "content": chat[0]},
                {"role": "assistant", "content": chat[1]}
            ]
            messages += old_messages

    else:
        pass
        
    messages.append({"role": "user", "content": user_message})

    print("=====messages=====")
    print(messages)
    print("=====messages=====")


    request_payload = {
        "model": "gpt-4-1106-preview",
        "messages": messages,
        "max_tokens": 4000  # 設定一個較大的值，可根據需要調整
    }

    response = client.chat.completions.create(**request_payload)
    print(response)
    
    response_text = response.choices[0].message.content.strip()

    # 更新聊天历史
    new_chat_history = (user_message, response_text)
    if chat_history is None:
        chat_history = [new_chat_history]
    else:
        chat_history.append(new_chat_history)

    # 返回聊天历史和空字符串清空输入框
    return "", chat_history

def update_slide(direction):
    global TRANSCRIPTS
    global CURRENT_INDEX

    print("=== 更新投影片 ===")
    print(f"CURRENT_INDEX: {CURRENT_INDEX}")
    # print(f"TRANSCRIPTS: {TRANSCRIPTS}")

    CURRENT_INDEX += direction
    if CURRENT_INDEX < 0:
        CURRENT_INDEX = 0  # 防止索引小于0
    elif CURRENT_INDEX >= len(TRANSCRIPTS):
        CURRENT_INDEX = len(TRANSCRIPTS) - 1  # 防止索引超出范围
    
    # 获取当前条目的文本和截图 URL
    current_transcript = TRANSCRIPTS[CURRENT_INDEX]
    slide_image = current_transcript["screenshot_path"]
    slide_text = current_transcript["text"]
    
    return slide_image, slide_text

def prev_slide():
    return update_slide(-1)

# 包装函数来处理 "下一个" 按钮点击事件
def next_slide():
    return update_slide(1)

def get_video_id():
    return VIDEO_ID

HEAD = """
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <style>
    svg.markmap {{
        width: 100%;
        height: 100vh;
    }}
    </style>
    <script src="https://cdn.jsdelivr.net/npm/markmap-autoloader@0.15.2"></script>

    <script>
        const mind_map_tab_button = document.querySelector("#mind_map_tab-button");
        
        if (mind_map_tab_button) {
            mind_map_tab_button.addEventListener('click', function() {
                const mind_map_markdown = document.querySelector("#mind_map_markdown > label > textarea");
                if (mind_map_markdown) {
                // 当按钮被点击时，打印当前的textarea的值
                console.log('Value changed to: ' + mind_map_markdown.value);
                markmap.autoLoader.renderAll();
                }
            });
        }
    </script>
"""


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=2):
            file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
            youtube_link = gr.Textbox(label="Enter YouTube Link")
            youtube_link_btn = gr.Button("Submit_YouTube_Link")
            web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
            chatbot = gr.Chatbot()
            socratic_mode_btn = gr.Checkbox(label="蘇格拉底家教助理模式", value=False)
            msg = gr.Textbox(label="Message")
            send_button = gr.Button("Send")

        with gr.Column(scale=3):
            with gr.Tab("圖文"):
                transcript_html = gr.HTML(label="YouTube Transcript and Video") 
            with gr.Tab("投影片"):
                slide_image = gr.Image()
                slide_text = gr.Textbox()
                with gr.Row():
                    prev_button = gr.Button("Previous")
                    next_button = gr.Button("Next")
                prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
                next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])         
            with gr.Tab("逐字稿"):
                simple_html_content = gr.HTML(label="Simple Transcript")
            with gr.Tab("本文"):
                df_string_output = gr.Textbox(lines=40, label="Data Text")
            with gr.Tab("重點"):
                df_summarise = gr.Textbox(container=True, show_copy_button=True, lines=40)    
            with gr.Tab("問題"):
                gr.Markdown("## 常用問題")
                btn_1 = gr.Button()
                btn_2 = gr.Button()
                btn_3 = gr.Button()
                gr.Markdown("## 重新生成問題")
                btn_create_question = gr.Button("Create Questions")
            with gr.Tab("markdown"):
                gr.Markdown("## 請複製以下 markdown 並貼到你的心智圖工具中，建議使用：https://markmap.js.org/repl")
                mind_map = gr.Textbox(container=True, show_copy_button=True, lines=40, elem_id="mind_map_markdown")
            with gr.Tab("心智圖",elem_id="mind_map_tab"):
                mind_map_html = gr.HTML()

    send_button.click(
        respond, 
        inputs=[msg, df_string_output, chatbot, socratic_mode_btn], 
        outputs=[msg, chatbot]
    )
    # 连接按钮点击事件
    btn_1.click(respond, inputs=[btn_1, df_string_output, chatbot, socratic_mode_btn], outputs=[msg, chatbot])
    btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot, socratic_mode_btn], outputs=[msg, chatbot])
    btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot, socratic_mode_btn], outputs=[msg, chatbot])

    btn_create_question.click(change_questions, inputs = [df_string_output], outputs = [btn_1, btn_2, btn_3])

    # file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
    file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])

    # 当输入 YouTube 链接时触发
    youtube_link.change(
        process_youtube_link, 
        inputs=youtube_link, 
        outputs=[
            btn_1, 
            btn_2, 
            btn_3, 
            df_string_output, 
            df_summarise, 
            mind_map, 
            mind_map_html,
            transcript_html,
            simple_html_content, 
            slide_image, 
            slide_text
            ]
        )
    
    youtube_link_btn.click(
        process_youtube_link, 
        inputs=youtube_link, 
        outputs=[
            btn_1, 
            btn_2, 
            btn_3, 
            df_string_output, 
            df_summarise, 
            mind_map, 
            mind_map_html,
            transcript_html, 
            simple_html_content,
            slide_image, 
            slide_text
            ]
        )

    # 当输入网页链接时触发
    # web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])

demo.launch(allowed_paths=["videos"])