RAGOndevice

Running on Zero

File size: 32,034 Bytes

3db2af2
 
 
 
1cfe513
1be852d
 
 
3db2af2
9e9b867
1c47184
9e9b867
3c893d2
1c47184
9e9b867
 
58e272a
50ef49c
0223744
 
 
 
3c893d2
 
 
 
3db2af2
3c893d2
 
 
 
58d9d19
9affa6d
3c893d2
3db2af2
1be852d
 
 
 
 
 
 
 
 
 
 
 
 
 
3db2af2
 
 
 
 
 
3c893d2
2317674
0223744
9e9b867
0cdbe8f
9e9b867
 
 
3c893d2
9a66aa0
9e9b867
58e272a
 
 
9e9b867
50ef49c
 
 
 
 
 
9e9b867
3db2af2
0223744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3db2af2
0223744
 
 
3db2af2
58e272a
50ef49c
 
58e272a
50ef49c
58e272a
 
 
 
 
0223744
50ef49c
 
 
 
 
58e272a
9e9b867
3db2af2
fcd720a
3db2af2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcd720a
0223744
3db2af2
0223744
 
 
 
 
 
fcd720a
0223744
fcd720a
3db2af2
0223744
 
 
3db2af2
0223744
 
 
 
fcd720a
3db2af2
0223744
 
fcd720a
6360699
3db2af2
6adfca3
3db2af2
 
 
 
 
6adfca3
 
 
 
3db2af2
 
6adfca3
3c893d2
 
 
3db2af2
fcd720a
 
 
 
 
3db2af2
fcd720a
3c893d2
 
3db2af2
fcd720a
3c893d2
3db2af2
fcd720a
3c893d2
 
fcd720a
3db2af2
3c893d2
 
fcd720a
3c893d2
 
3db2af2
3c893d2
 
fcd720a
3db2af2
 
3c893d2
 
3db2af2
 
 
 
 
 
 
 
 
 
 
 
3c893d2
 
fcd720a
3db2af2
 
6adfca3
 
 
 
 
fcd720a
 
 
 
 
3db2af2
fcd720a
3c893d2
 
3db2af2
fcd720a
3c893d2
3db2af2
fcd720a
6adfca3
3c893d2
fcd720a
3db2af2
6adfca3
 
 
fcd720a
3db2af2
 
3c893d2
6adfca3
 
 
 
 
3db2af2
3c893d2
 
 
3db2af2
3c893d2
3db2af2
fcd720a
3c893d2
 
 
 
3db2af2
fcd720a
 
 
 
 
3c893d2
 
 
3db2af2
fcd720a
 
 
 
 
3db2af2
3c893d2
6adfca3
 
fcd720a
3db2af2
6adfca3
fcd720a
6adfca3
b03b509
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9a0841
b03b509
 
c9a0841
fd896d4
b03b509
c9a0841
 
 
fd896d4
 
b03b509
c9a0841
b03b509
0b0ac38
 
 
 
 
 
 
 
 
 
 
 
 
6e53936
c9a0841
0b0ac38
6e53936
 
0b0ac38
fd896d4
b03b509
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f1dbfb
c9a0841
7f1dbfb
 
 
c9a0841
7f1dbfb
 
 
 
 
 
c9a0841
7f1dbfb
c9a0841
 
0b0ac38
3db2af2
0b0ac38
 
3db2af2
7f1dbfb
 
fd896d4
6d46b35
 
7f1dbfb
0b0ac38
 
 
 
 
b03b509
 
3c893d2
 
 
 
 
3db2af2
3c893d2
 
 
9affa6d
 
 
3db2af2
3c893d2
 
 
9affa6d
 
3c893d2
3db2af2
3c893d2
 
 
 
3db2af2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e468070
3db2af2
 
 
 
 
 
 
 
 
 
3c893d2
3db2af2
e468070
3c893d2
 
3db2af2
e468070
 
3c893d2
e468070
 
 
3db2af2
e468070
 
 
 
3db2af2
 
 
 
58d9d19
e468070
 
 
 
3db2af2
3c893d2
 
 
 
 
3db2af2
3c893d2
 
e468070
9affa6d
 
e468070
 
9affa6d
 
 
 
 
 
 
 
e468070
 
3db2af2
9affa6d
e468070
 
 
 
 
 
 
9affa6d
 
 
 
 
 
 
 
 
e468070
 
9affa6d
3db2af2
9affa6d
 
3db2af2
9affa6d
 
 
 
 
 
 
 
58d9d19
9affa6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3db2af2
9affa6d
3db2af2
 
58d9d19
e468070
9affa6d
3db2af2
9affa6d
3db2af2
e468070
9affa6d
e468070
3db2af2
e468070
 
 
 
3db2af2
 
e468070
9affa6d
e468070
3db2af2
9affa6d
3c893d2
3db2af2
9affa6d
e468070
 
 
9affa6d
e468070
9affa6d
 
 
58d9d19
 
 
9affa6d
 
 
 
 
 
 
 
 
 
 
58d9d19
9affa6d
 
 
 
 
 
 
 
e468070
9affa6d
 
 
 
 
 
3c893d2
 
e468070
1be852d
 
 
e468070
3c893d2
e468070
 
 
7773cb1
 
3c893d2
 
5f4c99e
d6a3ccb
 
3c893d2
3db2af2
c53bcba
 
 
 
 
 
3db2af2
c53bcba
 
 
 
 
 
 
 
 
 
3db2af2
3c893d2
c53bcba
 
d6a3ccb
c53bcba
 
 
 
3db2af2
c53bcba
 
d6a3ccb
c53bcba
 
 
3db2af2
3c893d2
 
 
 
 
 
3db2af2
d6a3ccb
c53bcba
 
 
 
d6a3ccb
c53bcba
 
 
d6a3ccb
0b0ac38
c53bcba
 
 
d6a3ccb
0b0ac38
c53bcba
 
d6a3ccb
b03b509
c53bcba
 
d6a3ccb
c53bcba
 
 
 
d6a3ccb
 
 
 
c53bcba
 
 
 
3c893d2
 
 
d6a3ccb
3c893d2
c53bcba
 
 
 
 
e468070
c53bcba
 
 
 
 
e468070
 
9affa6d
 
 
 
e468070
 
 
 
 
 
 
 
 
 
3c893d2
 
 
 
 
 
7773cb1
2317674
3db2af2
2317674
7773cb1
9affa6d

import os
# Dynamo 완전 비활성화
os.environ["TORCH_DYNAMO_DISABLE"] = "1"

import torch
# 성능 최적화를 위한 설정 (TensorFloat32 연산 활성화)
torch.set_float32_matmul_precision('high')

import torch._dynamo
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

from threading import Thread
import random
from datasets import load_dataset
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from typing import List, Tuple
import json
from datetime import datetime
import pyarrow.parquet as pq
import pypdf
import io
import pyarrow.parquet as pq
from tabulate import tabulate
import platform
import subprocess
import pytesseract
from pdf2image import convert_from_path
import queue  # 추가: queue.Empty 예외 처리를 위해
import time  # 추가: 스트리밍 타이밍을 위해

# -------------------- 추가: PDF to Markdown 변환 관련 import --------------------
try:
    import re
    import requests
    from bs4 import BeautifulSoup
    import urllib.request
    import ocrmypdf
    import pytz
    import urllib.parse
    from pypdf import PdfReader
except ModuleNotFoundError as e:
    raise ModuleNotFoundError(
        "필수 모듈이 누락되었습니다. 'beautifulsoup4' 패키지를 설치해주세요.\n"
        "예: pip install beautifulsoup4"
    )
# ---------------------------------------------------------------------------

# 1) Dynamo suppress_errors 옵션 사용 (오류 시 eager로 fallback)
torch._dynamo.config.suppress_errors = True

# 전역 변수
current_file_context = None

# 환경 변수 설정
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_ID = "CohereForAI/c4ai-command-r7b-12-2024"
MODELS = os.environ.get("MODELS")
MODEL_NAME = MODEL_ID.split("/")[-1]

model = None  # 전역 변수로 선언
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# 위키피디아 데이터셋 로드
wiki_dataset = load_dataset("lcw99/wikipedia-korean-20240501-1million-qna")
print("Wikipedia dataset loaded:", wiki_dataset)

# TF-IDF 벡터라이저 초기화 및 학습
print("TF-IDF 벡터화 시작...")
questions = wiki_dataset['train']['question'][:10000]  # 처음 10000개만 사용
vectorizer = TfidfVectorizer(max_features=1000)
question_vectors = vectorizer.fit_transform(questions)
print("TF-IDF 벡터화 완료")


class ChatHistory:
    def __init__(self):
        self.history = []
        self.history_file = "/tmp/chat_history.json"
        self.load_history()

    def add_conversation(self, user_msg: str, assistant_msg: str):
        conversation = {
            "timestamp": datetime.now().isoformat(),
            "messages": [
                {"role": "user", "content": user_msg},
                {"role": "assistant", "content": assistant_msg}
            ]
        }
        self.history.append(conversation)
        self.save_history()

    def format_for_display(self):
        formatted = []
        for conv in self.history:
            formatted.append([
                conv["messages"][0]["content"],
                conv["messages"][1]["content"]
            ])
        return formatted

    def get_messages_for_api(self):
        messages = []
        for conv in self.history:
            messages.extend([
                {"role": "user", "content": conv["messages"][0]["content"]},
                {"role": "assistant", "content": conv["messages"][1]["content"]}
            ])
        return messages

    def clear_history(self):
        self.history = []
        self.save_history()

    def save_history(self):
        try:
            with open(self.history_file, 'w', encoding='utf-8') as f:
                json.dump(self.history, f, ensure_ascii=False, indent=2)
        except Exception as e:
            print(f"히스토리 저장 실패: {e}")

    def load_history(self):
        try:
            if os.path.exists(self.history_file):
                with open(self.history_file, 'r', encoding='utf-8') as f:
                    self.history = json.load(f)
        except Exception as e:
            print(f"히스토리 로드 실패: {e}")
            self.history = []


# 전역 ChatHistory 인스턴스 생성
chat_history = ChatHistory()


def find_relevant_context(query, top_k=3):
    # 쿼리 벡터화
    query_vector = vectorizer.transform([query])
    # 코사인 유사도 계산
    similarities = (query_vector * question_vectors.T).toarray()[0]
    # 가장 유사한 질문들의 인덱스
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    # 관련 컨텍스트 추출
    relevant_contexts = []
    for idx in top_indices:
        if similarities[idx] > 0:
            relevant_contexts.append({
                'question': questions[idx],
                'answer': wiki_dataset['train']['answer'][idx],
                'similarity': similarities[idx]
            })
    return relevant_contexts


def init_msg():
    return "파일을 분석하고 있습니다..."


# -------------------- PDF 파일을 Markdown으로 변환하는 유틸 함수들 --------------------
def extract_text_from_pdf(reader: PdfReader) -> str:
    """
    PyPDF를 사용해 모든 페이지 텍스트를 추출.
    만약 텍스트가 없으면 빈 문자열 반환.
    """
    full_text = ""
    for idx, page in enumerate(reader.pages):
        text = page.extract_text() or ""
        if len(text) > 0:
            full_text += f"---- Page {idx+1} ----\n" + text + "\n\n"
    return full_text.strip()


def convert_pdf_to_markdown(pdf_file: str):
    """
    PDF 파일을 읽고 텍스트를 추출한 뒤,
    이미지가 많고 텍스트가 적은 경우에는 OCR을 시도한다.
    최종적으로 Markdown 형식으로 변환 가능한 텍스트를 반환한다.
    메타데이터도 함께 반환.
    """
    try:
        reader = PdfReader(pdf_file)
    except Exception as e:
        return f"PDF 파일을 읽는 중 오류 발생: {e}", None, None

    # Extract metadata
    raw_meta = reader.metadata
    metadata = {
        "author": raw_meta.author if raw_meta else None,
        "creator": raw_meta.creator if raw_meta else None,
        "producer": raw_meta.producer if raw_meta else None,
        "subject": raw_meta.subject if raw_meta else None,
        "title": raw_meta.title if raw_meta else None,
    }

    # Extract text
    full_text = extract_text_from_pdf(reader)

    # 이미지가 많고 텍스트가 너무 짧으면 OCR 시도
    image_count = 0
    for page in reader.pages:
        image_count += len(page.images)

    if image_count > 0 and len(full_text) < 1000:
        try:
            out_pdf_file = pdf_file.replace(".pdf", "_ocr.pdf")
            ocrmypdf.ocr(pdf_file, out_pdf_file, force_ocr=True)
            # Re-extract text from OCR-processed PDF
            reader_ocr = PdfReader(out_pdf_file)
            full_text = extract_text_from_pdf(reader_ocr)
        except Exception as e:
            full_text = f"OCR 처리 중 오류 발생: {e}\n\n원본 PDF 텍스트:\n\n" + full_text

    return full_text, metadata, pdf_file


# ---------------------------------------------------------------------------

def analyze_file_content(content, file_type):
    """파일 내용을 간단히 분석한 후 구조 요약을 반환."""
    if file_type in ['parquet', 'csv']:
        try:
            lines = content.split('\n')
            header = lines[0]
            columns = header.count('|') - 1
            rows = len(lines) - 3
            return f"📊 Dataset Structure: {columns} columns, {rows} rows"
        except:
            return "❌ Failed to analyze dataset structure"

    lines = content.split('\n')
    total_lines = len(lines)
    non_empty_lines = len([line for line in lines if line.strip()])

    if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
        functions = len([line for line in lines if 'def ' in line])
        classes = len([line for line in lines if 'class ' in line])
        imports = len([line for line in lines if 'import ' in line or 'from ' in line])
        return f"💻 Code Structure: {total_lines} lines (Functions: {functions}, Classes: {classes}, Imports: {imports})"

    paragraphs = content.count('\n\n') + 1
    words = len(content.split())
    return f"📝 Document Structure: {total_lines} lines, {paragraphs} paragraphs, approximately {words} words"


def read_uploaded_file(file):
    """
    업로드된 파일을 처리하여
    1) 파일 타입별로 내용을 읽고
    2) 분석 결과와 함께 반환
    """
    if file is None:
        return "", ""
    try:
        file_ext = os.path.splitext(file.name)[1].lower()

        # Parquet
        if file_ext == '.parquet':
            try:
                table = pq.read_table(file.name)
                df = table.to_pandas()

                content = f"📊 Parquet File Analysis:\n\n"
                content += f"1. Basic Information:\n"
                content += f"- Total Rows: {len(df):,}\n"
                content += f"- Total Columns: {len(df.columns)}\n"
                content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"

                content += f"2. Column Information:\n"
                for col in df.columns:
                    content += f"- {col} ({df[col].dtype})\n"

                content += f"\n3. Data Preview:\n"
                content += tabulate(df.head(5), headers='keys', tablefmt='pipe', showindex=False)

                content += f"\n\n4. Missing Values:\n"
                null_counts = df.isnull().sum()
                for col, count in null_counts[null_counts > 0].items():
                    content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"

                numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
                if len(numeric_cols) > 0:
                    content += f"\n5. Numeric Column Statistics:\n"
                    stats_df = df[numeric_cols].describe()
                    content += tabulate(stats_df, headers='keys', tablefmt='pipe')

                return content, "parquet"
            except Exception as e:
                return f"Error reading Parquet file: {str(e)}", "error"

        # PDF (Markdown 변환)
        if file_ext == '.pdf':
            try:
                markdown_text, metadata, processed_pdf_path = convert_pdf_to_markdown(file.name)
                if metadata is None:
                    return f"PDF 파일 변환 오류 또는 읽기 실패.\n\n원본 메시지:\n{markdown_text}", "error"

                content = "# PDF to Markdown Conversion\n\n"
                content += "## Metadata\n"
                for k, v in metadata.items():
                    content += f"**{k.capitalize()}**: {v}\n\n"

                content += "## Extracted Text\n\n"
                content += markdown_text

                return content, "pdf"
            except Exception as e:
                return f"Error reading PDF file: {str(e)}", "error"

        # CSV
        elif file_ext == '.csv':
            encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
            for encoding in encodings:
                try:
                    df = pd.read_csv(file.name, encoding=encoding)
                    content = f"📊 CSV File Analysis:\n\n"
                    content += f"1. Basic Information:\n"
                    content += f"- Total Rows: {len(df):,}\n"
                    content += f"- Total Columns: {len(df.columns)}\n"
                    content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"

                    content += f"2. Column Information:\n"
                    for col in df.columns:
                        content += f"- {col} ({df[col].dtype})\n"

                    content += f"\n3. Data Preview:\n"
                    content += df.head(5).to_markdown(index=False)

                    content += f"\n\n4. Missing Values:\n"
                    null_counts = df.isnull().sum()
                    for col, count in null_counts[null_counts > 0].items():
                        content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"

                    return content, "csv"
                except UnicodeDecodeError:
                    continue
            raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")

        # 일반 텍스트 파일
        else:
            encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
            for encoding in encodings:
                try:
                    with open(file.name, 'r', encoding=encoding) as f:
                        content = f.read()

                    lines = content.split('\n')
                    total_lines = len(lines)
                    non_empty_lines = len([line for line in lines if line.strip()])

                    is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])

                    analysis = f"\n📝 File Analysis:\n"
                    if is_code:
                        functions = len([line for line in lines if 'def ' in line])
                        classes = len([line for line in lines if 'class ' in line])
                        imports = len([line for line in lines if 'import ' in line or 'from ' in line])

                        analysis += f"- File Type: Code\n"
                        analysis += f"- Total Lines: {total_lines:,}\n"
                        analysis += f"- Functions: {functions}\n"
                        analysis += f"- Classes: {classes}\n"
                        analysis += f"- Import Statements: {imports}\n"
                    else:
                        words = len(content.split())
                        chars = len(content)

                        analysis += f"- File Type: Text\n"
                        analysis += f"- Total Lines: {total_lines:,}\n"
                        analysis += f"- Non-empty Lines: {non_empty_lines:,}\n"
                        analysis += f"- Word Count: {words:,}\n"
                        analysis += f"- Character Count: {chars:,}\n"

                    return content + analysis, "text"
                except UnicodeDecodeError:
                    continue
            raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")

    except Exception as e:
        return f"Error reading file: {str(e)}", "error"


CSS = """
/* 3D 스타일 CSS */
:root {
    --primary-color: #2196f3;
    --secondary-color: #1976d2;
    --background-color: #f0f2f5;
    --card-background: #ffffff;
    --text-color: #333333;
    --shadow-color: rgba(0, 0, 0, 0.1);
}
body {
    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
    min-height: 100vh;
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.container {
    transform-style: preserve-3d;
    perspective: 1000px;
}
.chatbot {
    background: var(--card-background);
    border-radius: 20px;
    box-shadow: 
        0 10px 20px var(--shadow-color),
        0 6px 6px var(--shadow-color);
    transform: translateZ(0);
    transition: transform 0.3s ease;
    backdrop-filter: blur(10px);
}
.chatbot:hover {
    transform: translateZ(10px);
}
/* 메시지 입력 영역 */
.input-area {
    background: var(--card-background);
    border-radius: 15px;
    padding: 15px;
    margin-top: 20px;
    box-shadow: 
        0 5px 15px var(--shadow-color),
        0 3px 3px var(--shadow-color);
    transform: translateZ(0);
    transition: all 0.3s ease;
    display: flex;
    align-items: center;
    gap: 10px;
}
.input-area:hover {
    transform: translateZ(5px);
}
/* 버튼 스타일 */
.custom-button {
    background: linear-gradient(145deg, var(--primary-color), var(--secondary-color));
    color: white;
    border: none;
    border-radius: 10px;
    padding: 10px 20px;
    font-weight: 600;
    cursor: pointer;
    transform: translateZ(0);
    transition: all 0.3s ease;
    box-shadow: 
        0 4px 6px var(--shadow-color),
        0 1px 3px var(--shadow-color);
}
.custom-button:hover {
    transform: translateZ(5px) translateY(-2px);
    box-shadow: 
        0 7px 14px var(--shadow-color),
        0 3px 6px var(--shadow-color);
}
/* 파일 업로드 버튼 */
.file-upload-icon {
    background: linear-gradient(145deg, #64b5f6, #42a5f5);
    color: white;
    border-radius: 8px;
    font-size: 2em;
    cursor: pointer;
    display: flex;
    align-items: center;
    justify-content: center;
    height: 70px;
    width: 70px;
    transition: all 0.3s ease;
    box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.file-upload-icon:hover {
    transform: translateY(-2px);
    box-shadow: 0 4px 8px rgba(0,0,0,0.2);
}
/* 파일 업로드 버튼 내부 요소 스타일링 */
.file-upload-icon > .wrap {
    display: flex !important;
    align-items: center;
    justify-content: center;
    width: 100%;
    height: 100%;
}
.file-upload-icon > .wrap > p {
    display: none !important;
}
.file-upload-icon > .wrap::before {
    content: "📁";
    font-size: 2em;
    display: block;
}
/* 메시지 스타일 */
.message {
    background: var(--card-background);
    border-radius: 15px;
    padding: 15px;
    margin: 10px 0;
    box-shadow: 
        0 4px 6px var(--shadow-color),
        0 1px 3px var(--shadow-color);
    transform: translateZ(0);
    transition: all 0.3s ease;
}
.message:hover {
    transform: translateZ(5px);
}
.chat-container {
    height: 600px !important;
    margin-bottom: 10px;
}
.input-container {
    height: 70px !important;
    display: flex;
    align-items: center;
    gap: 10px;
    margin-top: 5px;
}
.input-textbox {
    height: 70px !important;
    border-radius: 8px !important;
    font-size: 1.1em !important;
    padding: 10px 15px !important;
    display: flex !important;
    align-items: flex-start !important;
}
.input-textbox textarea {
    padding-top: 5px !important;
}
.send-button {
    height: 70px !important;
    min-width: 70px !important;
    font-size: 1.1em !important;
}
/* 설정 패널 기본 스타일 */
.settings-panel {
    padding: 20px;
    margin-top: 20px;
}
"""

def clear_cuda_memory():
    if hasattr(torch.cuda, 'empty_cache'):
        with torch.cuda.device('cuda'):
            torch.cuda.empty_cache()


@spaces.GPU
def load_model():
    try:
        # 메모리 정리 먼저 수행
        clear_cuda_memory()
        
        loaded_model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            # 낮은 메모리 사용을 위한 설정 추가
            low_cpu_mem_usage=True,
        )
        return loaded_model
    except Exception as e:
        print(f"모델 로드 오류: {str(e)}")
        raise

def _truncate_tokens_for_context(input_ids_str: str, desired_input_length: int) -> str:
    """
    입력 문자열이 desired_input_length 토큰을 넘으면, 앞부분(오래된 컨텍스트)을 잘라내는 함수.
    """
    tokens = input_ids_str.split()
    if len(tokens) > desired_input_length:
        tokens = tokens[-desired_input_length:]
    return " ".join(tokens)


# build_prompt 함수: 대화 내역을 문자열로 변환
def build_prompt(conversation: list) -> str:
    """
    conversation은 각 항목이 {"role": "user" 또는 "assistant", "content": ...} 형태의 딕셔너리 목록입니다.
    이를 단순 텍스트 프롬프트로 변환합니다.
    """
    prompt = ""
    for msg in conversation:
        if msg["role"] == "user":
            prompt += "User: " + msg["content"] + "\n"
        elif msg["role"] == "assistant":
            prompt += "Assistant: " + msg["content"] + "\n"
    prompt += "Assistant: "
    return prompt


@spaces.GPU
def stream_chat(
    message: str,
    history: list,
    uploaded_file,
    temperature: float,
    max_new_tokens: int,
    top_p: float,
    top_k: int,
    penalty: float
):
    global model, current_file_context

    try:
        if model is None:
            model = load_model()

        print(f'message is - {message}')
        print(f'history is - {history}')

        # 파일 업로드 처리
        file_context = ""
        if uploaded_file and message == "파일을 분석하고 있습니다...":
            current_file_context = None
            try:
                content, file_type = read_uploaded_file(uploaded_file)
                if content:
                    file_analysis = analyze_file_content(content, file_type)
                    file_context = (
                        f"\n\n📄 파일 분석 결과:\n{file_analysis}"
                        f"\n\n파일 내용:\n```\n{content}\n```"
                    )
                    current_file_context = file_context
                    message = "업로드된 파일을 분석해주세요."
            except Exception as e:
                print(f"파일 분석 오류: {str(e)}")
                file_context = f"\n\n❌ 파일 분석 중 오류가 발생했습니다: {str(e)}"
        elif current_file_context:
            file_context = current_file_context

        if torch.cuda.is_available():
            print(f"CUDA 메모리 사용량: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")

        max_history_length = 10
        if len(history) > max_history_length:
            history = history[-max_history_length:]

        # 위키피디아 컨텍스트 검색
        wiki_context = ""
        try:
            relevant_contexts = find_relevant_context(message)
            if relevant_contexts:  # 결과가 있을 경우만 추가
                wiki_context = "\n\n관련 위키피디아 정보:\n"
                for ctx in relevant_contexts:
                    wiki_context += (
                        f"Q: {ctx['question']}\n"
                        f"A: {ctx['answer']}\n"
                        f"유사도: {ctx['similarity']:.3f}\n\n"
                    )
        except Exception as e:
            print(f"컨텍스트 검색 오류: {str(e)}")

        # 대화 내역 구성
        conversation = []
        for prompt, answer in history:
            conversation.extend([
                {"role": "user", "content": prompt},
                {"role": "assistant", "content": answer}
            ])

        # 최종 메시지 구성
        final_message = message
        if file_context:
            final_message = file_context + "\n현재 질문: " + message
        if wiki_context:
            final_message = wiki_context + "\n현재 질문: " + message
        if file_context and wiki_context:
            final_message = file_context + wiki_context + "\n현재 질문: " + message
            
        conversation.append({"role": "user", "content": final_message})

        # 프롬프트 구성 및 토큰화
        input_ids_str = build_prompt(conversation)
        
        # 먼저 컨텍스트 길이 확인 및 제한
        max_context = 8192
        tokenized_input = tokenizer(input_ids_str, return_tensors="pt")
        input_length = tokenized_input["input_ids"].shape[1]
        
        # 컨텍스트가 너무 길면 자르기
        if input_length > max_context - max_new_tokens:
            print(f"입력이 너무 깁니다: {input_length} 토큰. 자르는 중...")
            # 최소 생성 토큰 수 확보
            min_generation = min(256, max_new_tokens)
            new_desired_input_length = max_context - min_generation
            
            # 입력 텍스트를 토큰 단위로 자르기
            tokens = tokenizer.encode(input_ids_str)
            if len(tokens) > new_desired_input_length:
                tokens = tokens[-new_desired_input_length:]
                input_ids_str = tokenizer.decode(tokens)
            
            # 다시 토큰화
            tokenized_input = tokenizer(input_ids_str, return_tensors="pt")
            input_length = tokenized_input["input_ids"].shape[1]
            
        print(f"최종 입력 길이: {input_length} 토큰")
        
        # CUDA로 입력 이동
        inputs = tokenized_input.to("cuda")
        
        # 남은 토큰 수 계산 및 max_new_tokens 조정
        remaining = max_context - input_length
        if remaining < max_new_tokens:
            print(f"max_new_tokens 조정: {max_new_tokens} -> {remaining}")
            max_new_tokens = remaining

        print(f"입력 텐서 생성 후 CUDA 메모리: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")

        # 스트리머 설정
        streamer = TextIteratorStreamer(
            tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
        )

        # 생성 매개변수 설정
        generate_kwargs = dict(
            **inputs,
            streamer=streamer,
            top_k=top_k,
            top_p=top_p,
            repetition_penalty=penalty,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            eos_token_id=tokenizer.eos_token_id,  # 명시적 EOS 토큰 지정
        )

        # 메모리 정리
        clear_cuda_memory()

        # 별도 스레드에서 생성 실행
        thread = Thread(target=model.generate, kwargs=generate_kwargs)
        thread.start()

        # 응답 스트리밍
        buffer = ""
        partial_message = ""
        last_yield_time = time.time()
        
        try:
            for new_text in streamer:
                buffer += new_text
                partial_message += new_text
                
                # 일정 시간마다 또는 텍스트가 쌓일 때마다 결과 업데이트
                current_time = time.time()
                if current_time - last_yield_time > 0.1 or len(partial_message) > 20:
                    yield "", history + [[message, buffer]]
                    partial_message = ""
                    last_yield_time = current_time
                    
            # 마지막 응답 확인
            if buffer:
                yield "", history + [[message, buffer]]
                
            # 대화 기록에 저장
            chat_history.add_conversation(message, buffer)
                
        except Exception as e:
            print(f"스트리밍 중 오류 발생: {str(e)}")
            if not buffer:  # 버퍼가 비어있으면 오류 메시지 표시
                buffer = f"응답 생성 중 오류가 발생했습니다: {str(e)}"
            yield "", history + [[message, buffer]]
            
        # 스레드가 여전히 실행 중이면 종료 대기
        if thread.is_alive():
            thread.join(timeout=5.0)
            
        # 메모리 정리
        clear_cuda_memory()

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        error_message = f"오류가 발생했습니다: {str(e)}\n{error_details}"
        print(f"Stream chat 오류: {error_message}")
        clear_cuda_memory()
        yield "", history + [[message, error_message]]


def create_demo():
    with gr.Blocks(css=CSS) as demo:
        with gr.Column(elem_classes="markdown-style"):
            gr.Markdown("""
                # 🤖 RAGOndevice
                #### 📊 RAG: Upload and Analyze Files (TXT, CSV, PDF, Parquet files)
                Upload your files for data analysis and learning
            """)

        chatbot = gr.Chatbot(
            value=[],
            height=600,
            label="GiniGEN AI Assistant",
            elem_classes="chat-container"
        )

        with gr.Row(elem_classes="input-container"):
            with gr.Column(scale=1, min_width=70):
                file_upload = gr.File(
                    type="filepath",
                    elem_classes="file-upload-icon",
                    scale=1,
                    container=True,
                    interactive=True,
                    show_label=False
                )

            with gr.Column(scale=3):
                msg = gr.Textbox(
                    show_label=False,
                    placeholder="Type your message here... 💭",
                    container=False,
                    elem_classes="input-textbox",
                    scale=1
                )

            with gr.Column(scale=1, min_width=70):
                send = gr.Button(
                    "Send",
                    elem_classes="send-button custom-button",
                    scale=1
                )

            with gr.Column(scale=1, min_width=70):
                clear = gr.Button(
                    "Clear",
                    elem_classes="clear-button custom-button",
                    scale=1
                )

        with gr.Accordion("🎮 Advanced Settings", open=False):
            with gr.Row():
                with gr.Column(scale=1):
                    temperature = gr.Slider(
                        minimum=0, maximum=1, step=0.1, value=0.8,
                        label="Creativity Level 🎨"
                    )
                    max_new_tokens = gr.Slider(
                        minimum=128, maximum=8000, step=1, value=4000,
                        label="Maximum Token Count 📝"
                    )
                with gr.Column(scale=1):
                    top_p = gr.Slider(
                        minimum=0.0, maximum=1.0, step=0.1, value=0.8,
                        label="Diversity Control 🎯"
                    )
                    top_k = gr.Slider(
                        minimum=1, maximum=20, step=1, value=20,
                        label="Selection Range 📊"
                    )
                    penalty = gr.Slider(
                        minimum=0.0, maximum=2.0, step=0.1, value=1.0,
                        label="Repetition Penalty 🔄"
                    )

        gr.Examples(
            examples=[
                ["Please analyze this code and suggest improvements:\ndef fibonacci(n):\n    if n <= 1: return n\n    return fibonacci(n-1) + fibonacci(n-2)"],
                ["Please analyze this data and provide insights:\nAnnual Revenue (Million)\n2019: 1200\n2020: 980\n2021: 1450\n2022: 2100\n2023: 1890"],
                ["Please solve this math problem step by step: 'When a circle's area is twice that of its inscribed square, find the relationship between the circle's radius and the square's side length.'"],
                ["Please analyze this marketing campaign's ROI and suggest improvements:\nTotal Cost: $50,000\nReach: 1M users\nClick Rate: 2.3%\nConversion Rate: 0.8%\nAverage Purchase: $35"],
            ],
            inputs=msg
        )

        def clear_conversation():
            global current_file_context
            current_file_context = None
            return [], None, "Start a new conversation..."

        msg.submit(
            stream_chat,
            inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
            outputs=[msg, chatbot]
        )

        send.click(
            stream_chat,
            inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
            outputs=[msg, chatbot]
        )

        file_upload.change(
            fn=lambda: ("처리 중...", [["시스템", "파일을 분석 중입니다. 잠시만 기다려주세요..."]]),
            outputs=[msg, chatbot],
            queue=False
        ).then(
            fn=init_msg,
            outputs=msg,
            queue=False
        ).then(
            fn=stream_chat,
            inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
            outputs=[msg, chatbot],
            queue=True
        )

        clear.click(
            fn=clear_conversation,
            outputs=[chatbot, file_upload, msg],
            queue=False
        )

        return demo


if __name__ == "__main__":
    demo = create_demo()
    demo.launch()