RAGOndevice

Running on Zero

App Files Files Community

openfree commited on Feb 1

Commit

3db2af2

verified ·

1 Parent(s): 467a8c5

Update app.py

Browse files

Files changed (1) hide show

app.py +248 -128

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import torch
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-import os
 from threading import Thread
 import random
 from datasets import load_dataset
@@ -17,15 +21,29 @@ import pyarrow.parquet as pq
 import pypdf
 import io
 import pyarrow.parquet as pq
-from pdfminer.high_level import extract_text
-from pdfminer.layout import LAParams
-from tabulate import tabulate  # tabulate 추가
 import platform
 import subprocess
 import pytesseract
 from pdf2image import convert_from_path
-# 전역 변수 추가
 current_file_context = None
 # 환경 변수 설정
@@ -48,6 +66,7 @@ vectorizer = TfidfVectorizer(max_features=1000)
 question_vectors = vectorizer.fit_transform(questions)
 print("TF-IDF 벡터화 완료")
 class ChatHistory:
     def __init__(self):
         self.history = []
@@ -103,19 +122,18 @@ class ChatHistory:
             print(f"히스토리 로드 실패: {e}")
             self.history = []
 # 전역 ChatHistory 인스턴스 생성
 chat_history = ChatHistory()
 def find_relevant_context(query, top_k=3):
     # 쿼리 벡터화
     query_vector = vectorizer.transform([query])
     # 코사인 유사도 계산
     similarities = (query_vector * question_vectors.T).toarray()[0]
     # 가장 유사한 질문들의 인덱스
     top_indices = np.argsort(similarities)[-top_k:][::-1]
     # 관련 컨텍스트 추출
     relevant_contexts = []
     for idx in top_indices:
@@ -125,14 +143,74 @@ def find_relevant_context(query, top_k=3):
                 'answer': wiki_dataset['train']['answer'][idx],
                 'similarity': similarities[idx]
             })
     return relevant_contexts
 def init_msg():
-    return "Analyzing file..."
 def analyze_file_content(content, file_type):
-    """Analyze file content and return structural summary"""
     if file_type in ['parquet', 'csv']:
         try:
             lines = content.split('\n')
@@ -142,115 +220,87 @@ def analyze_file_content(content, file_type):
             return f"📊 Dataset Structure: {columns} columns, {rows} rows"
         except:
             return "❌ Failed to analyze dataset structure"
     lines = content.split('\n')
     total_lines = len(lines)
     non_empty_lines = len([line for line in lines if line.strip()])
     if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
         functions = len([line for line in lines if 'def ' in line])
         classes = len([line for line in lines if 'class ' in line])
         imports = len([line for line in lines if 'import ' in line or 'from ' in line])
         return f"💻 Code Structure: {total_lines} lines (Functions: {functions}, Classes: {classes}, Imports: {imports})"
     paragraphs = content.count('\n\n') + 1
     words = len(content.split())
     return f"📝 Document Structure: {total_lines} lines, {paragraphs} paragraphs, approximately {words} words"
 def read_uploaded_file(file):
     if file is None:
         return "", ""
     try:
         file_ext = os.path.splitext(file.name)[1].lower()
-        # Parquet file processing
         if file_ext == '.parquet':
             try:
                 table = pq.read_table(file.name)
                 df = table.to_pandas()
                 content = f"📊 Parquet File Analysis:\n\n"
                 content += f"1. Basic Information:\n"
                 content += f"- Total Rows: {len(df):,}\n"
                 content += f"- Total Columns: {len(df.columns)}\n"
                 content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
                 content += f"2. Column Information:\n"
                 for col in df.columns:
                     content += f"- {col} ({df[col].dtype})\n"
                 content += f"\n3. Data Preview:\n"
                 content += tabulate(df.head(5), headers='keys', tablefmt='pipe', showindex=False)
                 content += f"\n\n4. Missing Values:\n"
                 null_counts = df.isnull().sum()
                 for col, count in null_counts[null_counts > 0].items():
                     content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"
                 numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
                 if len(numeric_cols) > 0:
                     content += f"\n5. Numeric Column Statistics:\n"
                     stats_df = df[numeric_cols].describe()
                     content += tabulate(stats_df, headers='keys', tablefmt='pipe')
                 return content, "parquet"
             except Exception as e:
                 return f"Error reading Parquet file: {str(e)}", "error"
-        # PDF file processing
         if file_ext == '.pdf':
             try:
-                pdf_reader = pypdf.PdfReader(file.name)
-                total_pages = len(pdf_reader.pages)
-                content = f"📑 PDF Document Analysis:\n\n"
-                content += f"1. Basic Information:\n"
-                content += f"- Total Pages: {total_pages}\n"
-                if pdf_reader.metadata:
-                    content += "\n2. Metadata:\n"
-                    for key, value in pdf_reader.metadata.items():
-                        if value and str(key).startswith('/'):
-                            content += f"- {key[1:]}: {value}\n"
-                try:
-                    text = extract_text(
-                        file.name,
-                        laparams=LAParams(
-                            line_margin=0.5,
-                            word_margin=0.1,
-                            char_margin=2.0,
-                            all_texts=True
-                        )
-                    )
-                except:
-                    text = ""
-                if not text.strip():
-                    text = extract_pdf_text_with_ocr(file.name)
-                if text:
-                    words = text.split()
-                    lines = text.split('\n')
-                    content += f"\n3. Text Analysis:\n"
-                    content += f"- Total Words: {len(words):,}\n"
-                    content += f"- Unique Words: {len(set(words)):,}\n"
-                    content += f"- Total Lines: {len(lines):,}\n"
-                    content += f"\n4. Content Preview:\n"
-                    preview_length = min(2000, len(text))
-                    content += f"--- First {preview_length} characters ---\n"
-                    content += text[:preview_length]
-                    if len(text) > preview_length:
-                        content += f"\n... (Showing partial content of {len(text):,} characters)\n"
-                else:
-                    content += "\n⚠️ Text extraction failed"
                 return content, "pdf"
             except Exception as e:
                 return f"Error reading PDF file: {str(e)}", "error"
-        # CSV file processing
         elif file_ext == '.csv':
             encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
             for encoding in encodings:
@@ -261,44 +311,44 @@ def read_uploaded_file(file):
                     content += f"- Total Rows: {len(df):,}\n"
                     content += f"- Total Columns: {len(df.columns)}\n"
                     content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
                     content += f"2. Column Information:\n"
                     for col in df.columns:
                         content += f"- {col} ({df[col].dtype})\n"
                     content += f"\n3. Data Preview:\n"
                     content += df.head(5).to_markdown(index=False)
                     content += f"\n\n4. Missing Values:\n"
                     null_counts = df.isnull().sum()
                     for col, count in null_counts[null_counts > 0].items():
                         content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"
                     return content, "csv"
                 except UnicodeDecodeError:
                     continue
             raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")
-        # Text file processing
         else:
             encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
             for encoding in encodings:
                 try:
                     with open(file.name, 'r', encoding=encoding) as f:
                         content = f.read()
                     lines = content.split('\n')
                     total_lines = len(lines)
                     non_empty_lines = len([line for line in lines if line.strip()])
                     is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
                     analysis = f"\n📝 File Analysis:\n"
                     if is_code:
                         functions = len([line for line in lines if 'def ' in line])
                         classes = len([line for line in lines if 'class ' in line])
                         imports = len([line for line in lines if 'import ' in line or 'from ' in line])
                         analysis += f"- File Type: Code\n"
                         analysis += f"- Total Lines: {total_lines:,}\n"
                         analysis += f"- Functions: {functions}\n"
@@ -307,18 +357,18 @@ def read_uploaded_file(file):
                     else:
                         words = len(content.split())
                         chars = len(content)
                         analysis += f"- File Type: Text\n"
                         analysis += f"- Total Lines: {total_lines:,}\n"
                         analysis += f"- Non-empty Lines: {non_empty_lines:,}\n"
                         analysis += f"- Word Count: {words:,}\n"
                         analysis += f"- Character Count: {chars:,}\n"
                     return content + analysis, "text"
                 except UnicodeDecodeError:
                     continue
             raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")
     except Exception as e:
         return f"Error reading file: {str(e)}", "error"
@@ -461,10 +511,10 @@ body {
     font-size: 1.1em !important;
     padding: 10px 15px !important;
     display: flex !important;
-    align-items: flex-start !important;  /* 텍스트 입력 위치를 위로 조정 */
 }
 .input-textbox textarea {
-    padding-top: 5px !important;  /* 텍스트 상단 여백 조정 */
 }
 .send-button {
     height: 70px !important;
@@ -478,73 +528,118 @@ body {
 }
 """
-# GPU 메모리 관리 함수 수정
 def clear_cuda_memory():
     if hasattr(torch.cuda, 'empty_cache'):
         with torch.cuda.device('cuda'):
             torch.cuda.empty_cache()
-# 모델 로드 함수 수정
 @spaces.GPU
 def load_model():
     try:
-        model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             torch_dtype=torch.bfloat16,
             device_map="auto",
         )
-        return model
     except Exception as e:
         print(f"모델 로드 오류: {str(e)}")
         raise
 @spaces.GPU
-def stream_chat(message: str, history: list, uploaded_file, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     global model, current_file_context
     try:
         if model is None:
             model = load_model()
         print(f'message is - {message}')
         print(f'history is - {history}')
         # 파일 업로드 처리
         file_context = ""
         if uploaded_file and message == "파일을 분석하고 있습니다...":
             try:
                 content, file_type = read_uploaded_file(uploaded_file)
                 if content:
                     file_analysis = analyze_file_content(content, file_type)
-                    file_context = f"\n\n📄 파일 분석 결과:\n{file_analysis}\n\n파일 내용:\n```\n{content}\n```"
                     current_file_context = file_context  # 파일 컨텍스트 저장
                     message = "업로드된 파일을 분석해주세요."
             except Exception as e:
                 print(f"파일 분석 오류: {str(e)}")
                 file_context = f"\n\n❌ 파일 분석 중 오류가 발생했습니다: {str(e)}"
-        elif current_file_context:  # 저장된 파일 컨텍스트가 있으면 사용
             file_context = current_file_context
         # 메모리 사용량 모니터링
         if torch.cuda.is_available():
             print(f"CUDA 메모리 사용량: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
         # 대화 히스토리가 너무 길면 잘라내기
-        max_history_length = 10  # 최대 히스토리 길이 설정
         if len(history) > max_history_length:
             history = history[-max_history_length:]
-        # 관련 컨텍스트 찾기
         try:
             relevant_contexts = find_relevant_context(message)
             wiki_context = "\n\n관련 위키피디아 정보:\n"
             for ctx in relevant_contexts:
-                wiki_context += f"Q: {ctx['question']}\nA: {ctx['answer']}\n유사도: {ctx['similarity']:.3f}\n\n"
         except Exception as e:
             print(f"컨텍스트 검색 오류: {str(e)}")
             wiki_context = ""
         # 대화 히스토리 구성
         conversation = []
         for prompt, answer in history:
@@ -557,36 +652,63 @@ def stream_chat(message: str, history: list, uploaded_file, temperature: float,
         final_message = file_context + wiki_context + "\n현재 질문: " + message
         conversation.append({"role": "user", "content": final_message})
-        # 토큰 수 제한
-        input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
-        max_length = 4096  # 또는 모델의 최대 컨텍스트 길이
-        if len(input_ids.split()) > max_length:
-            # 컨텍스트가 너무 길면 잘라내기
-            input_ids = " ".join(input_ids.split()[-max_length:])
-        inputs = tokenizer(input_ids, return_tensors="pt").to("cuda")
-        # 메모리 사용량 체크
         if torch.cuda.is_available():
             print(f"입력 텐서 생성 후 CUDA 메모리: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
-        streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
         generate_kwargs = dict(
-            inputs,
             streamer=streamer,
             top_k=top_k,
             top_p=top_p,
             repetition_penalty=penalty,
-            max_new_tokens=min(max_new_tokens, 2048),  # 최대 토큰 수 제한
-            do_sample=True,
             temperature=temperature,
-            eos_token_id=[255001],
         )
         # 생성 시작 전 메모리 정리
         clear_cuda_memory()
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
@@ -601,12 +723,10 @@ def stream_chat(message: str, history: list, uploaded_file, temperature: float,
     except Exception as e:
         error_message = f"오류가 발생했습니다: {str(e)}"
         print(f"Stream chat 오류: {error_message}")
-        # 오류 발생 시에도 메모리 정리
         clear_cuda_memory()
         yield "", history + [[message, error_message]]
 def create_demo():
     with gr.Blocks(css=CSS) as demo:
         with gr.Column(elem_classes="markdown-style"):
@@ -615,14 +735,14 @@ def create_demo():
                 #### 📊 RAG: Upload and Analyze Files (TXT, CSV, PDF, Parquet files)
                 Upload your files for data analysis and learning
             """)
         chatbot = gr.Chatbot(
             value=[],
             height=600,
             label="GiniGEN AI Assistant",
             elem_classes="chat-container"
         )
         with gr.Row(elem_classes="input-container"):
             with gr.Column(scale=1, min_width=70):
                 file_upload = gr.File(
@@ -633,7 +753,7 @@ def create_demo():
                     interactive=True,
                     show_label=False
                 )
             with gr.Column(scale=3):
                 msg = gr.Textbox(
                     show_label=False,
@@ -642,21 +762,21 @@ def create_demo():
                     elem_classes="input-textbox",
                     scale=1
                 )
             with gr.Column(scale=1, min_width=70):
                 send = gr.Button(
                     "Send",
                     elem_classes="send-button custom-button",
                     scale=1
                 )
             with gr.Column(scale=1, min_width=70):
                 clear = gr.Button(
                     "Clear",
                     elem_classes="clear-button custom-button",
                     scale=1
                 )
         with gr.Accordion("🎮 Advanced Settings", open=False):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -697,7 +817,7 @@ def create_demo():
             current_file_context = None
             return [], None, "Start a new conversation..."
-        # Event bindings
         msg.submit(
             stream_chat,
             inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
@@ -721,7 +841,6 @@ def create_demo():
             queue=True
         )
-        # Clear button event binding
         clear.click(
             fn=clear_conversation,
             outputs=[chatbot, file_upload, msg],
@@ -730,6 +849,7 @@ def create_demo():
         return demo
 if __name__ == "__main__":
     demo = create_demo()
-    demo.launch()

+import os
+# Dynamo 완전 비활성화
+os.environ["TORCH_DYNAMO_DISABLE"] = "1"
 import torch
+import torch._dynamo
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from threading import Thread
 import random
 from datasets import load_dataset
 import pypdf
 import io
 import pyarrow.parquet as pq
+from tabulate import tabulate
 import platform
 import subprocess
 import pytesseract
 from pdf2image import convert_from_path
+# -------------------- 추가: PDF to Markdown 변환 관련 import --------------------
+import re
+import requests
+from bs4 import BeautifulSoup
+import urllib.request
+import ocrmypdf
+import pytz
+import urllib.parse
+from pypdf import PdfReader
+# ---------------------------------------------------------------------------
+# --------------------
+# 1) Dynamo suppress_errors 옵션 사용 (오류 시 eager로 fallback)
+# --------------------
+torch._dynamo.config.suppress_errors = True
+# 전역 변수
 current_file_context = None
 # 환경 변수 설정
 question_vectors = vectorizer.fit_transform(questions)
 print("TF-IDF 벡터화 완료")
 class ChatHistory:
     def __init__(self):
         self.history = []
             print(f"히스토리 로드 실패: {e}")
             self.history = []
 # 전역 ChatHistory 인스턴스 생성
 chat_history = ChatHistory()
 def find_relevant_context(query, top_k=3):
     # 쿼리 벡터화
     query_vector = vectorizer.transform([query])
     # 코사인 유사도 계산
     similarities = (query_vector * question_vectors.T).toarray()[0]
     # 가장 유사한 질문들의 인덱스
     top_indices = np.argsort(similarities)[-top_k:][::-1]
     # 관련 컨텍스트 추출
     relevant_contexts = []
     for idx in top_indices:
                 'answer': wiki_dataset['train']['answer'][idx],
                 'similarity': similarities[idx]
             })
     return relevant_contexts
 def init_msg():
+    return "파일을 분석하고 있습니다..."
+# -------------------- PDF 파일을 Markdown으로 변환하는 유틸 함수들 --------------------
+def extract_text_from_pdf(reader: PdfReader) -> str:
+    """
+    PyPDF를 사용해 모든 페이지 텍스트를 추출.
+    만약 텍스트가 없으면 빈 문자열 반환.
+    """
+    full_text = ""
+    for idx, page in enumerate(reader.pages):
+        text = page.extract_text() or ""
+        if len(text) > 0:
+            full_text += f"---- Page {idx+1} ----\n" + text + "\n\n"
+    return full_text.strip()
+def convert_pdf_to_markdown(pdf_file: str):
+    """
+    PDF 파일을 읽고 텍스트를 추출한 뒤,
+    이미지가 많고 텍스트가 적은 경우에는 OCR을 시도한다.
+    최종적으로 Markdown 형식으로 변환 가능한 텍스트를 반환한다.
+    메타데이터도 함께 반환.
+    """
+    try:
+        reader = PdfReader(pdf_file)
+    except Exception as e:
+        return f"PDF 파일을 읽는 중 오류 발생: {e}", None, None
+    # Extract metadata
+    raw_meta = reader.metadata
+    metadata = {
+        "author": raw_meta.author if raw_meta else None,
+        "creator": raw_meta.creator if raw_meta else None,
+        "producer": raw_meta.producer if raw_meta else None,
+        "subject": raw_meta.subject if raw_meta else None,
+        "title": raw_meta.title if raw_meta else None,
+    }
+    # Extract text
+    full_text = extract_text_from_pdf(reader)
+    # 이미지가 많고 텍스트가 너무 짧으면 OCR 시도
+    image_count = 0
+    for page in reader.pages:
+        image_count += len(page.images)
+    if image_count > 0 and len(full_text) < 1000:
+        try:
+            out_pdf_file = pdf_file.replace(".pdf", "_ocr.pdf")
+            ocrmypdf.ocr(pdf_file, out_pdf_file, force_ocr=True)
+            # Re-extract text from OCR-processed PDF
+            reader_ocr = PdfReader(out_pdf_file)
+            full_text = extract_text_from_pdf(reader_ocr)
+        except Exception as e:
+            full_text = f"OCR 처리 중 오류 발생: {e}\n\n원본 PDF 텍스트:\n\n" + full_text
+    return full_text, metadata, pdf_file
+# ---------------------------------------------------------------------------
 def analyze_file_content(content, file_type):
+    """파일 내용을 간단히 분석한 후 구조 요약을 반환."""
     if file_type in ['parquet', 'csv']:
         try:
             lines = content.split('\n')
             return f"📊 Dataset Structure: {columns} columns, {rows} rows"
         except:
             return "❌ Failed to analyze dataset structure"
     lines = content.split('\n')
     total_lines = len(lines)
     non_empty_lines = len([line for line in lines if line.strip()])
     if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
         functions = len([line for line in lines if 'def ' in line])
         classes = len([line for line in lines if 'class ' in line])
         imports = len([line for line in lines if 'import ' in line or 'from ' in line])
         return f"💻 Code Structure: {total_lines} lines (Functions: {functions}, Classes: {classes}, Imports: {imports})"
     paragraphs = content.count('\n\n') + 1
     words = len(content.split())
     return f"📝 Document Structure: {total_lines} lines, {paragraphs} paragraphs, approximately {words} words"
 def read_uploaded_file(file):
+    """
+    업로드된 파일을 처리하여
+    1) 파일 타입별로 내용을 읽고
+    2) 분석 결과와 함께 반환
+    """
     if file is None:
         return "", ""
     try:
         file_ext = os.path.splitext(file.name)[1].lower()
+        # Parquet
         if file_ext == '.parquet':
             try:
                 table = pq.read_table(file.name)
                 df = table.to_pandas()
                 content = f"📊 Parquet File Analysis:\n\n"
                 content += f"1. Basic Information:\n"
                 content += f"- Total Rows: {len(df):,}\n"
                 content += f"- Total Columns: {len(df.columns)}\n"
                 content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
                 content += f"2. Column Information:\n"
                 for col in df.columns:
                     content += f"- {col} ({df[col].dtype})\n"
                 content += f"\n3. Data Preview:\n"
                 content += tabulate(df.head(5), headers='keys', tablefmt='pipe', showindex=False)
                 content += f"\n\n4. Missing Values:\n"
                 null_counts = df.isnull().sum()
                 for col, count in null_counts[null_counts > 0].items():
                     content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"
                 numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
                 if len(numeric_cols) > 0:
                     content += f"\n5. Numeric Column Statistics:\n"
                     stats_df = df[numeric_cols].describe()
                     content += tabulate(stats_df, headers='keys', tablefmt='pipe')
                 return content, "parquet"
             except Exception as e:
                 return f"Error reading Parquet file: {str(e)}", "error"
+        # PDF (Markdown 변환)
         if file_ext == '.pdf':
             try:
+                markdown_text, metadata, processed_pdf_path = convert_pdf_to_markdown(file.name)
+                if metadata is None:
+                    return f"PDF 파일 변환 오류 또는 읽기 실패.\n\n원본 메시지:\n{markdown_text}", "error"
+                content = "# PDF to Markdown Conversion\n\n"
+                content += "## Metadata\n"
+                for k, v in metadata.items():
+                    content += f"**{k.capitalize()}**: {v}\n\n"
+                content += "## Extracted Text\n\n"
+                content += markdown_text
                 return content, "pdf"
             except Exception as e:
                 return f"Error reading PDF file: {str(e)}", "error"
+        # CSV
         elif file_ext == '.csv':
             encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
             for encoding in encodings:
                     content += f"- Total Rows: {len(df):,}\n"
                     content += f"- Total Columns: {len(df.columns)}\n"
                     content += f"- Memory Usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\n\n"
                     content += f"2. Column Information:\n"
                     for col in df.columns:
                         content += f"- {col} ({df[col].dtype})\n"
                     content += f"\n3. Data Preview:\n"
                     content += df.head(5).to_markdown(index=False)
                     content += f"\n\n4. Missing Values:\n"
                     null_counts = df.isnull().sum()
                     for col, count in null_counts[null_counts > 0].items():
                         content += f"- {col}: {count:,} ({count/len(df)*100:.1f}%)\n"
                     return content, "csv"
                 except UnicodeDecodeError:
                     continue
             raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")
+        # 일반 텍스트 파일
         else:
             encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
             for encoding in encodings:
                 try:
                     with open(file.name, 'r', encoding=encoding) as f:
                         content = f.read()
                     lines = content.split('\n')
                     total_lines = len(lines)
                     non_empty_lines = len([line for line in lines if line.strip()])
                     is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
                     analysis = f"\n📝 File Analysis:\n"
                     if is_code:
                         functions = len([line for line in lines if 'def ' in line])
                         classes = len([line for line in lines if 'class ' in line])
                         imports = len([line for line in lines if 'import ' in line or 'from ' in line])
                         analysis += f"- File Type: Code\n"
                         analysis += f"- Total Lines: {total_lines:,}\n"
                         analysis += f"- Functions: {functions}\n"
                     else:
                         words = len(content.split())
                         chars = len(content)
                         analysis += f"- File Type: Text\n"
                         analysis += f"- Total Lines: {total_lines:,}\n"
                         analysis += f"- Non-empty Lines: {non_empty_lines:,}\n"
                         analysis += f"- Word Count: {words:,}\n"
                         analysis += f"- Character Count: {chars:,}\n"
                     return content + analysis, "text"
                 except UnicodeDecodeError:
                     continue
             raise UnicodeDecodeError(f"Unable to read file with supported encodings ({', '.join(encodings)})")
     except Exception as e:
         return f"Error reading file: {str(e)}", "error"
     font-size: 1.1em !important;
     padding: 10px 15px !important;
     display: flex !important;
+    align-items: flex-start !important;
 }
 .input-textbox textarea {
+    padding-top: 5px !important;
 }
 .send-button {
     height: 70px !important;
 }
 """
 def clear_cuda_memory():
     if hasattr(torch.cuda, 'empty_cache'):
         with torch.cuda.device('cuda'):
             torch.cuda.empty_cache()
 @spaces.GPU
 def load_model():
     try:
+        loaded_model = AutoModelForCausalLM.from_pretrained(
             MODEL_ID,
             torch_dtype=torch.bfloat16,
             device_map="auto",
         )
+        return loaded_model
     except Exception as e:
         print(f"모델 로드 오류: {str(e)}")
         raise
+def _truncate_tokens_for_context(input_ids_str: str, desired_input_length: int) -> str:
+    """
+    입력 문자열이 desired_input_length 토큰을 넘으면, 앞부분(오래된 컨텍스트)을 잘라내는 함수.
+    """
+    tokens = input_ids_str.split()
+    if len(tokens) > desired_input_length:
+        # 가장 오래된 부분을 버리고, 뒤에서 desired_input_length만 남김
+        tokens = tokens[-desired_input_length:]
+    return " ".join(tokens)
+# build_prompt 함수: 대화 내역을 문자열로 변환
+def build_prompt(conversation: list) -> str:
+    """
+    conversation은 각 항목이 {"role": "user" 또는 "assistant", "content": ...} 형태의 딕셔너리 목록입니다.
+    이를 단순 텍스트 프롬프트로 변환합니다.
+    """
+    prompt = ""
+    for msg in conversation:
+        if msg["role"] == "user":
+            prompt += "User: " + msg["content"] + "\n"
+        elif msg["role"] == "assistant":
+            prompt += "Assistant: " + msg["content"] + "\n"
+    # 마지막에 어시스턴트 응답을 기대하도록 추가
+    prompt += "Assistant: "
+    return prompt
 @spaces.GPU
+def stream_chat(
+    message: str,
+    history: list,
+    uploaded_file,
+    temperature: float,
+    max_new_tokens: int,
+    top_p: float,
+    top_k: int,
+    penalty: float
+):
     global model, current_file_context
     try:
         if model is None:
             model = load_model()
         print(f'message is - {message}')
         print(f'history is - {history}')
         # 파일 업로드 처리
         file_context = ""
         if uploaded_file and message == "파일을 분석하고 있습니다...":
+            # 새로운 파일 업로드 시에는 기존 메모리 컨텍스트 초기화
+            current_file_context = None
             try:
                 content, file_type = read_uploaded_file(uploaded_file)
                 if content:
                     file_analysis = analyze_file_content(content, file_type)
+                    file_context = (
+                        f"\n\n📄 파일 분석 결과:\n{file_analysis}"
+                        f"\n\n파일 내용:\n```\n{content}\n```"
+                    )
                     current_file_context = file_context  # 파일 컨텍스트 저장
                     message = "업로드된 파일을 분석해주세요."
             except Exception as e:
                 print(f"파일 분석 오류: {str(e)}")
                 file_context = f"\n\n❌ 파일 분석 중 오류가 발생했습니다: {str(e)}"
+        elif current_file_context:
+            # 이미 업로드된 파일 컨텍스트가 있다면 사용
             file_context = current_file_context
         # 메모리 사용량 모니터링
         if torch.cuda.is_available():
             print(f"CUDA 메모리 사용량: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
         # 대화 히스토리가 너무 길면 잘라내기
+        max_history_length = 10
         if len(history) > max_history_length:
             history = history[-max_history_length:]
+        # 위키 컨텍스트 찾기
         try:
             relevant_contexts = find_relevant_context(message)
             wiki_context = "\n\n관련 위키피디아 정보:\n"
             for ctx in relevant_contexts:
+                wiki_context += (
+                    f"Q: {ctx['question']}\n"
+                    f"A: {ctx['answer']}\n"
+                    f"유사도: {ctx['similarity']:.3f}\n\n"
+                )
         except Exception as e:
             print(f"컨텍스트 검색 오류: {str(e)}")
             wiki_context = ""
         # 대화 히스토리 구성
         conversation = []
         for prompt, answer in history:
         final_message = file_context + wiki_context + "\n현재 질문: " + message
         conversation.append({"role": "user", "content": final_message})
+        # build_prompt 사용 (기존 tokenizer.apply_chat_template 대신)
+        input_ids_str = build_prompt(conversation)
+        # 먼저 6000 토큰 이내로 잘라주기 (임의의 수치, 필요에 따라 조정 가능)
+        input_ids_str = _truncate_tokens_for_context(input_ids_str, 6000)
+        inputs = tokenizer(input_ids_str, return_tensors="pt").to("cuda")
+        # 최대 컨텍스트 8192 고려하여, 남은 자리가 적으면 max_new_tokens 줄이기
+        max_context = 8192
+        input_length = inputs["input_ids"].shape[1]
+        remaining = max_context - input_length
+        # 최소 128 토큰 정도는 생성할 수 있게 만들고 싶다면,
+        # remaining이 128 미만이면, 추가로 input을 더 잘라낸다.
+        min_generation = 128
+        if remaining < min_generation:
+            # 더 잘라서 충분한 출력 토큰 확보
+            must_cut = min_generation - remaining  # 몇 토큰만큼 더 자를지
+            new_desired_input_length = max(1, input_length - must_cut)
+            print(f"[주의] 입력이 너무 길어 {must_cut}토큰 더 제거하여, input_length={input_length} -> {new_desired_input_length} 재조정")
+            # 문자열 다시 만들어서 tokenizer
+            input_ids_str = _truncate_tokens_for_context(input_ids_str, new_desired_input_length)
+            inputs = tokenizer(input_ids_str, return_tensors="pt").to("cuda")
+            input_length = inputs["input_ids"].shape[1]
+            remaining = max_context - input_length
+        # 최종적으로 (input + max_new_tokens) <= 8192 되도록
+        if remaining < max_new_tokens:
+            print(f"[주의] 입력 토큰이 많아 max_new_tokens={max_new_tokens} -> {remaining}로 조정합니다.")
+            max_new_tokens = remaining
+        if max_new_tokens < 1:
+            # 그래도 1 미만이면 1 토큰만 생성
+            max_new_tokens = 1
         if torch.cuda.is_available():
             print(f"입력 텐서 생성 후 CUDA 메모리: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
+        streamer = TextIteratorStreamer(
+            tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True
+        )
         generate_kwargs = dict(
+            **inputs,
             streamer=streamer,
             top_k=top_k,
             top_p=top_p,
             repetition_penalty=penalty,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
             temperature=temperature,
+            eos_token_id=255001,  # 수정: 리스트 대신 정수형 사용
         )
         # 생성 시작 전 메모리 정리
         clear_cuda_memory()
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
     except Exception as e:
         error_message = f"오류가 발생했습니다: {str(e)}"
         print(f"Stream chat 오류: {error_message}")
         clear_cuda_memory()
         yield "", history + [[message, error_message]]
 def create_demo():
     with gr.Blocks(css=CSS) as demo:
         with gr.Column(elem_classes="markdown-style"):
                 #### 📊 RAG: Upload and Analyze Files (TXT, CSV, PDF, Parquet files)
                 Upload your files for data analysis and learning
             """)
         chatbot = gr.Chatbot(
             value=[],
             height=600,
             label="GiniGEN AI Assistant",
             elem_classes="chat-container"
         )
         with gr.Row(elem_classes="input-container"):
             with gr.Column(scale=1, min_width=70):
                 file_upload = gr.File(
                     interactive=True,
                     show_label=False
                 )
             with gr.Column(scale=3):
                 msg = gr.Textbox(
                     show_label=False,
                     elem_classes="input-textbox",
                     scale=1
                 )
             with gr.Column(scale=1, min_width=70):
                 send = gr.Button(
                     "Send",
                     elem_classes="send-button custom-button",
                     scale=1
                 )
             with gr.Column(scale=1, min_width=70):
                 clear = gr.Button(
                     "Clear",
                     elem_classes="clear-button custom-button",
                     scale=1
                 )
         with gr.Accordion("🎮 Advanced Settings", open=False):
             with gr.Row():
                 with gr.Column(scale=1):
             current_file_context = None
             return [], None, "Start a new conversation..."
+        # 이벤트 연결
         msg.submit(
             stream_chat,
             inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
             queue=True
         )
         clear.click(
             fn=clear_conversation,
             outputs=[chatbot, file_upload, msg],
         return demo
 if __name__ == "__main__":
     demo = create_demo()
+    demo.launch()