myezrag

Running

App Files Files Community

ginipick commited on Oct 25, 2024

Commit

f013686

verified ·

1 Parent(s): a958a41

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -43

app.py CHANGED Viewed

@@ -31,32 +31,31 @@ def load_parquet(filename: str) -> str:
     except Exception as e:
         return f"파일을 읽는 중 오류가 발생했습니다: {str(e)}"
-def respond(
-    message: str,
-    history: List[Dict[str, str]],
-    system_message: str = "",
-    max_tokens: int = 4000,
-    temperature: float = 0.5,
-    top_p: float = 0.9,
-    parquet_data: str = None
-) -> str:
-    # 시스템 프롬프트 설정
     if parquet_data:
-        system_prefix = """반드시 한글로 답변할 것. 너는 업로드된 데이터를 기반으로 질문에 답변하는 역할을 한다. 데이터를 분석하여 사용자에게 도움이 되는 정보를 제공하라. 데이터를 활용하여 상세하고 정확한 답변을 제공하되, 민감한 정보나 개인 정보를 노출하지 마라."""
         try:
             df = pd.read_json(io.StringIO(parquet_data))
-            # 데이터의 요약 정보 생성
             data_summary = df.describe(include='all').to_string()
-            system_prefix += f"\n\n업로드된 데이터의 요약 정보:\n{data_summary}"
         except Exception as e:
-            print(f"데이터 로드 중 오류 발생: {str(e)}\n{traceback.format_exc()}")
-            system_prefix += "\n\n데이터를 로드하는 중 오류가 발생했습니다."
-    else:
-        system_prefix = system_message or "너는 AI 조언자 역할이다."
-    # 메시지 생성
     prompt = system_prefix + "\n\n"
-    for chat in history:
         if chat['role'] == 'user':
             prompt += f"사용자: {chat['content']}\n"
         else:
@@ -64,7 +63,6 @@ def respond(
     prompt += f"사용자: {message}\nAI:"
     try:
-        # 모델에 메시지 전송 및 응답 받기
         response = ""
         stream = hf_client.text_generation(
             prompt=prompt,
@@ -72,16 +70,35 @@ def respond(
             stream=True,
             temperature=temperature,
             top_p=top_p,
         )
         for msg in stream:
             if msg:
                 response += msg
                 yield response
     except Exception as e:
-        error_message = f"추론 중 오류가 발생했습니다: {str(e)}\n{traceback.format_exc()}"
         print(error_message)
         yield error_message
 def upload_csv(file_path: str) -> Tuple[str, str]:
     try:
         # CSV 파일 읽기
@@ -312,38 +329,33 @@ with gr.Blocks(css=css) as demo:
         parquet_data_state = gr.State()
-        def handle_message_data_upload(
-            message: str,
-            history: List[Dict[str, str]],
-            system_message: str,
-            max_tokens: int,
-            temperature: float,
-            top_p: float,
-            parquet_data: str
-        ):
             history = history or []
             try:
-                # 사용자의 메시지를 히스토리에 추가
                 history.append({"role": "user", "content": message})
-                # 응답 생성
-                response_gen = respond(
-                    message, history, system_message, max_tokens, temperature, top_p, parquet_data
-                )
                 partial_response = ""
                 for partial in response_gen:
                     partial_response = partial
-                    # 대화 내역 업데이트
-                    display_history = history + [
-                        {"role": "assistant", "content": partial_response}
-                    ]
                     yield display_history, ""
-                # 어시스턴트의 응답을 히스토리에 추가
-                history.append({"role": "assistant", "content": partial_response})
             except Exception as e:
-                response = f"추론 중 오류가 발생했습니다: {str(e)}"
                 history.append({"role": "assistant", "content": response})
                 yield history, ""
         send_data_upload.click(
             handle_message_data_upload,
             inputs=[

     except Exception as e:
         return f"파일을 읽는 중 오류가 발생했습니다: {str(e)}"
+def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
+    # 시스템 프롬프트에 중복 방지 지시 추가
+    system_prefix = """반드시 한글로 답변할 것. 너는 업로드된 데이터를 기반으로 질문에 답변하는 역할을 한다.
+    중요 규칙:
+    1. 이전 대화에서 이미 답변한 내용을 반복하지 말 것
+    2. 질문과 직접 관련된 내용만 답변할 것
+    3. 불필요한 예시나 부연 설명은 최소화할 것
+    4. 답변은 명확하고 간결하게 할 것
+    5. 동일한 내용을 다른 표현으로 반복하지 말 것
+    """
     if parquet_data:
         try:
             df = pd.read_json(io.StringIO(parquet_data))
             data_summary = df.describe(include='all').to_string()
+            system_prefix += f"\n\n업로드된 데이터 요약:\n{data_summary}"
         except Exception as e:
+            print(f"데이터 로드 오류: {str(e)}")
+    # 이전 대화 컨텍스트 최적화
+    recent_history = history[-3:] if history else []  # 최근 3개 대화만 유지
     prompt = system_prefix + "\n\n"
+    for chat in recent_history:
         if chat['role'] == 'user':
             prompt += f"사용자: {chat['content']}\n"
         else:
     prompt += f"사용자: {message}\nAI:"
     try:
         response = ""
         stream = hf_client.text_generation(
             prompt=prompt,
             stream=True,
             temperature=temperature,
             top_p=top_p,
+            repetition_penalty=1.2,  # 반복 페널티 추가
+            no_repeat_ngram_size=3,  # n-gram 반복 방지
         )
         for msg in stream:
             if msg:
                 response += msg
+                # 중복 문장 제거
+                response = remove_duplicates(response)
                 yield response
     except Exception as e:
+        error_message = f"추론 오류: {str(e)}"
         print(error_message)
         yield error_message
+def remove_duplicates(text: str) -> str:
+    """중복 문장 제거 함수"""
+    sentences = text.split('.')
+    unique_sentences = []
+    seen = set()
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if sentence and sentence not in seen:
+            seen.add(sentence)
+            unique_sentences.append(sentence)
+    return '. '.join(unique_sentences)
 def upload_csv(file_path: str) -> Tuple[str, str]:
     try:
         # CSV 파일 읽기
         parquet_data_state = gr.State()
+        def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
             history = history or []
+            # 중복 질문 체크
+            if history and any(chat['role'] == 'user' and chat['content'].strip() == message.strip() for chat in history[-3:]):
+                yield history + [{"role": "assistant", "content": "동일한 질문이 최근에 있었습니다. 다른 질문을 해주세요."}], ""
+                return
             try:
                 history.append({"role": "user", "content": message})
+                response_gen = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_data)
                 partial_response = ""
                 for partial in response_gen:
                     partial_response = partial
+                    # 중복 제거된 응답으로 업데이트
+                    display_history = history + [{"role": "assistant", "content": remove_duplicates(partial_response)}]
                     yield display_history, ""
+                history.append({"role": "assistant", "content": remove_duplicates(partial_response)})
             except Exception as e:
+                response = f"오류 발생: {str(e)}"
                 history.append({"role": "assistant", "content": response})
                 yield history, ""
         send_data_upload.click(
             handle_message_data_upload,
             inputs=[