import gradio as gr
from huggingface_hub import InferenceClient
import os
import pandas as pd
from typing import List, Dict, Tuple

# 추론 API 클라이언트 설정
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))

def load_code(filename: str) -> str:
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        return f"{filename} 파일을 찾을 수 없습니다."
    except Exception as e:
        return f"파일을 읽는 중 오류가 발생했습니다: {str(e)}"

def load_parquet(filename: str) -> str:
    try:
        df = pd.read_parquet(filename, engine='pyarrow')
        return df.head(10).to_markdown(index=False)
    except FileNotFoundError:
        return f"{filename} 파일을 찾을 수 없습니다."
    except Exception as e:
        return f"파일을 읽는 중 오류가 발생했습니다: {str(e)}"

# 코드 파일 로드
fashion_code = load_code('fashion.cod')
uhdimage_code = load_code('uhdimage.cod')
MixGEN_code = load_code('mgen.cod')

def respond(
    message: str,
    history: List[Dict[str, str]],
    system_message: str = "",
    max_tokens: int = 1000,
    temperature: float = 0.7,
    top_p: float = 0.9,
    parquet_data: Dict = None
) -> str:
    # 시스템 프롬프트 설정
    system_prefix = """반드시 한글로 답변할 것. 너는 주어진 소스코드를 기반으로 "서비스 사용 설명 및 안내, Q&A를 하는 역할이다". 아주 친절하고 자세하게 Markdown 형식으로 작성하라. 너는 코드를 기반으로 사용 설명 및 질의 응답을 진행하며, 이용자에게 도움을 주어야 한다. 이용자가 궁금해할 만한 내용에 친절하게 알려주도록 하라. 코드 전체 내용에 대해서는 보안을 유지하고, 키 값 및 엔드포인트와 구체적인 모델은 공개하지 마라."""


    # Parquet 데이터 포함
    if parquet_data:
        df = pd.read_json(parquet_data)
        parquet_content = df.head(10).to_markdown(index=False)
        system_prefix += f"\n\n업로드된 Parquet 파일 내용:\n```markdown\n{parquet_content}\n```"
        message = "업로드된 Parquet 파일에 대한 내용을 학습하였습니다. 관련하여 궁금한 점이 있으면 물어보세요."

    # 시스템 메시지와 대화 기록 결합
    messages = [{"role": "system", "content": system_prefix}]
    for chat in history:
        messages.append({"role": chat['role'], "content": chat['content']})
    messages.append({"role": "user", "content": message})

    try:
        # 모델에 메시지 전송 및 응답 받기
        response = ""
        for msg in hf_client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = msg.choices[0].delta.get('content', None)
            if token:
                response += token
                yield response
    except Exception as e:
        yield f"추론 중 오류가 발생했습니다: {str(e)}"

def upload_csv(file_path: str) -> Tuple[str, str]:
    try:
        # CSV 파일 읽기
        df = pd.read_csv(file_path, sep=',')

        # 필수 컬럼 확인
        required_columns = {'id', 'text', 'label', 'metadata'}
        available_columns = set(df.columns)
        missing_columns = required_columns - available_columns

        if missing_columns:
            return f"CSV 파일에 다음 필수 컬럼이 누락되었습니다: {', '.join(missing_columns)}", ""

        # 데이터 클렌징
        df.drop_duplicates(inplace=True)
        df.fillna('', inplace=True)

        # 데이터 유형 최적화
        df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})

        # Parquet 파일로 변환
        parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
        df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')

        return f"{parquet_filename} 파일이 성공적으로 업로드되고 변환되었습니다.", parquet_filename
    except Exception as e:
        return f"CSV 파일 업로드 및 변환 중 오류가 발생했습니다: {str(e)}", ""

def upload_parquet(file_path: str) -> Tuple[str, str, str]:
    try:
        # Parquet 파일 읽기
        df = pd.read_parquet(file_path, engine='pyarrow')

        # Markdown으로 변환하여 미리보기
        parquet_content = df.head(10).to_markdown(index=False)

        # DataFrame을 JSON으로 변환
        parquet_json = df.to_json()

        return "Parquet 파일이 성공적으로 업로드되었습니다.", parquet_content, parquet_json
    except Exception as e:
        return f"Parquet 파일 업로드 중 오류가 발생했습니다: {str(e)}", "", ""

def text_to_parquet(text: str) -> Tuple[str, str, str]:
    try:
        # 텍스트를 DataFrame으로 변환 (각 행은 콤마로 구분)
        data = [line.split(',') for line in text.strip().split('\n')]
        df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])

        # 데이터 유형 최적화
        df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})

        # Parquet 파일로 변환
        parquet_filename = 'text_to_parquet.parquet'
        df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')

        # Parquet 파일 내용 미리보기
        parquet_content = load_parquet(parquet_filename)

        return f"{parquet_filename} 파일이 성공적으로 변환되었습니다.", parquet_content, parquet_filename
    except Exception as e:
        return f"텍스트 변환 중 오류가 발생했습니다: {str(e)}", "", ""

# CSS 설정
css = """
footer {
    visibility: hidden;
}
#chatbot-container, #chatbot-data-upload {
    height: 600px;
    overflow-y: scroll;
}
#chatbot-container .message, #chatbot-data-upload .message {
    font-size: 14px;
}
/* 입력창 배경색 및 글자색 변경 */
textarea, input[type="text"] {
    background-color: #ffffff; /* 흰색 배경 */
    color: #000000; /* 검정색 글자 */
}
"""

# Gradio Blocks 인터페이스 설정
with gr.Blocks(css=css) as demo:
    gr.Markdown("# My RAG: LLM이 나만의 데이터로 학습한 콘텐츠 생성/답변")
    gr.Markdown("### 1) 나만의 데이터를 텍스트로 입력하거나 CSV를 업로드하여 Parquet 포맷 데이터셋 자동 변환합니다.")
    gr.Markdown("### 2) Parquet 포맷 데이터셋을 업로드하면, LLM이 맞춤 학습 데이터로 활용하여 응답을 시작합니다.")
    gr.Markdown("### Tip) '예제'를 통해 다양한 활용 방법을 체험하고 응용해 보세요.")   
    
    # 첫 번째 탭: 챗봇 데이터 업로드 (탭 이름 변경: "My 데이터셋+LLM")
    with gr.Tab("My 데이터셋+LLM"):
        gr.Markdown("### Parquet 파일 업로드 및 질문하기")
        with gr.Row():
            with gr.Column():
                parquet_upload = gr.File(label="Parquet 파일 업로드", type="filepath")
                parquet_upload_button = gr.Button("업로드")
                parquet_upload_status = gr.Textbox(label="업로드 상태", interactive=False)
                parquet_preview_chat = gr.Markdown(label="Parquet 파일 미리보기")
                parquet_data_state = gr.State()

                def handle_parquet_upload(file_path: str):
                    message, parquet_content, parquet_json = upload_parquet(file_path)
                    if parquet_json:
                        return message, parquet_content, parquet_json
                    else:
                        return message, "", ""

                parquet_upload_button.click(
                    handle_parquet_upload,
                    inputs=parquet_upload,
                    outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
                )
        
        gr.Markdown("### LLM과 대화하기")
        chatbot_data_upload = gr.Chatbot(label="챗봇", type="messages", elem_id="chatbot-data-upload")
        msg_data_upload = gr.Textbox(label="메시지 입력", placeholder="여기에 메시지를 입력하세요...")
        send_data_upload = gr.Button("전송")

        with gr.Accordion("시스템 프롬프트 및 옵션 설정", open=False):
            system_message = gr.Textbox(label="System Message", value="너는 AI 조언자 역할이다.")
            max_tokens = gr.Slider(minimum=1, maximum=8000, value=1000, label="Max Tokens")
            temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
            top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")

        def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
            history = history or []
            history.append({"role": "user", "content": message})
            try:
                # 응답 생성
                response_gen = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_data)
                partial_response = ""
                for partial in response_gen:
                    partial_response = partial
                    # 어시스턴트의 마지막 메시지를 업데이트하여 스트리밍 효과 제공
                    if len(history) > 0 and history[-1]['role'] == 'assistant':
                        history[-1]['content'] = partial_response
                    else:
                        history.append({"role": "assistant", "content": partial_response})
                    yield history, ""
            except Exception as e:
                response = f"추론 중 오류가 발생했습니다: {str(e)}"
                history.append({"role": "assistant", "content": response})
                yield history, ""

        send_data_upload.click(
            handle_message_data_upload,
            inputs=[msg_data_upload, chatbot_data_upload, system_message, max_tokens, temperature, top_p, parquet_data_state],
            outputs=[chatbot_data_upload, msg_data_upload],
            queue=True
        )

    # 두 번째 탭: 데이터 변환 (탭 이름 변경: "CSV to My 데이터셋")
    with gr.Tab("CSV to My 데이터셋"):
        gr.Markdown("### CSV 파일 업로드 및 Parquet 변환")
        with gr.Row():
            with gr.Column():
                csv_file = gr.File(label="CSV 파일 업로드", type="filepath")
                upload_button = gr.Button("업로드 및 변환")
                upload_status = gr.Textbox(label="업로드 상태", interactive=False)
                parquet_preview = gr.Markdown(label="Parquet 파일 미리보기")
                download_button = gr.File(label="Parquet 파일 다운로드", interactive=False)

                def handle_csv_upload(file_path: str):
                    message, parquet_filename = upload_csv(file_path)
                    if parquet_filename:
                        parquet_content = load_parquet(parquet_filename)
                        return message, parquet_content, parquet_filename
                    else:
                        return message, "", None

                upload_button.click(
                    handle_csv_upload,
                    inputs=csv_file,
                    outputs=[upload_status, parquet_preview, download_button]
                )

    # 세 번째 탭: 텍스트 to csv to parquet 변환 (탭 이름 변경: "Text to My 데이터셋")
    with gr.Tab("Text to My 데이터셋"):
        gr.Markdown("### 텍스트를 입력하면 CSV로 변환 후 Parquet으로 자동 전환됩니다.")
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="텍스트 입력 (각 행은 `id,text,label,metadata` 형식으로 입력)",
                    lines=10,
                    placeholder="예: 1,Sample Text,Label1,Metadata1\n2,Another Text,Label2,Metadata2"
                )
                convert_button = gr.Button("변환 및 다운로드")
                convert_status = gr.Textbox(label="변환 상태", interactive=False)
                parquet_preview_convert = gr.Markdown(label="Parquet 파일 미리보기")
                download_parquet_convert = gr.File(label="Parquet 파일 다운로드", interactive=False)

                def handle_text_to_parquet(text: str):
                    message, parquet_content, parquet_filename = text_to_parquet(text)
                    if parquet_filename:
                        return message, parquet_content, parquet_filename
                    else:
                        return message, "", None

                convert_button.click(
                    handle_text_to_parquet,
                    inputs=text_input,
                    outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
                )


    gr.Markdown("### Arxivgpt@gmail.com")

if __name__ == "__main__":
    demo.launch()