Update app.py
Browse files
app.py
CHANGED
@@ -32,27 +32,27 @@ def load_parquet(filename: str) -> str:
|
|
32 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
33 |
|
34 |
def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
|
35 |
-
# ์์คํ
|
36 |
system_prefix = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ. ๋๋ ์
๋ก๋๋ ๋ฐ์ดํฐ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ๋ต๋ณํ๋ ์ญํ ์ ํ๋ค.
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
if parquet_data:
|
47 |
try:
|
48 |
df = pd.read_json(io.StringIO(parquet_data))
|
49 |
data_summary = df.describe(include='all').to_string()
|
50 |
-
system_prefix += f"\n\n
|
51 |
except Exception as e:
|
52 |
print(f"๋ฐ์ดํฐ ๋ก๋ ์ค๋ฅ: {str(e)}")
|
53 |
|
54 |
-
#
|
55 |
-
recent_history = history[-3:] if history else []
|
56 |
|
57 |
prompt = system_prefix + "\n\n"
|
58 |
for chat in recent_history:
|
@@ -68,23 +68,45 @@ def respond(message: str, history: List[Dict[str, str]], system_message: str = "
|
|
68 |
prompt=prompt,
|
69 |
max_new_tokens=max_tokens,
|
70 |
stream=True,
|
71 |
-
temperature=temperature,
|
72 |
top_p=top_p,
|
73 |
-
repetition_penalty=1.2, # ๋ฐ๋ณต
|
74 |
-
no_repeat_ngram_size=3, # n-gram ๋ฐ๋ณต ๋ฐฉ์ง
|
75 |
)
|
76 |
|
77 |
for msg in stream:
|
78 |
if msg:
|
79 |
response += msg
|
80 |
-
#
|
81 |
-
|
82 |
-
yield
|
83 |
except Exception as e:
|
84 |
error_message = f"์ถ๋ก ์ค๋ฅ: {str(e)}"
|
85 |
print(error_message)
|
86 |
yield error_message
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
def remove_duplicates(text: str) -> str:
|
89 |
"""์ค๋ณต ๋ฌธ์ฅ ์ ๊ฑฐ ํจ์"""
|
90 |
sentences = text.split('.')
|
@@ -332,28 +354,37 @@ with gr.Blocks(css=css) as demo:
|
|
332 |
def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
|
333 |
history = history or []
|
334 |
|
335 |
-
|
336 |
-
|
|
|
337 |
yield history + [{"role": "assistant", "content": "๋์ผํ ์ง๋ฌธ์ด ์ต๊ทผ์ ์์์ต๋๋ค. ๋ค๋ฅธ ์ง๋ฌธ์ ํด์ฃผ์ธ์."}], ""
|
338 |
return
|
339 |
|
340 |
try:
|
341 |
history.append({"role": "user", "content": message})
|
342 |
-
response_gen = respond(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
partial_response = ""
|
345 |
for partial in response_gen:
|
346 |
partial_response = partial
|
347 |
-
|
348 |
-
display_history = history + [{"role": "assistant", "content": remove_duplicates(partial_response)}]
|
349 |
yield display_history, ""
|
350 |
|
351 |
-
history.append({"role": "assistant", "content":
|
352 |
except Exception as e:
|
353 |
response = f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
354 |
history.append({"role": "assistant", "content": response})
|
355 |
yield history, ""
|
356 |
|
|
|
357 |
|
358 |
|
359 |
send_data_upload.click(
|
|
|
32 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
33 |
|
34 |
def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
|
35 |
+
# ์์คํ
ํ๋กฌํํธ ๊ฐํ
|
36 |
system_prefix = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ. ๋๋ ์
๋ก๋๋ ๋ฐ์ดํฐ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ๋ต๋ณํ๋ ์ญํ ์ ํ๋ค.
|
37 |
+
|
38 |
+
์ฃผ์ ์ง์นจ:
|
39 |
+
1. ์ง๋ฌธ๊ณผ ์ง์ ๊ด๋ จ๋ ๋ด์ฉ๋ง ๊ฐ๋จ๋ช
๋ฃํ๊ฒ ๋ต๋ณํ ๊ฒ
|
40 |
+
2. ์ด์ ๋ต๋ณ๊ณผ ์ค๋ณต๋๋ ๋ด์ฉ์ ์ ์ธํ ๊ฒ
|
41 |
+
3. ๋ถํ์ํ ์์๋ ๋ถ์ฐ ์ค๋ช
์ ํ์ง ๋ง ๊ฒ
|
42 |
+
4. ๋์ผํ ๋ด์ฉ์ ๋ค๋ฅธ ํํ์ผ๋ก ๋ฐ๋ณตํ์ง ๋ง ๊ฒ
|
43 |
+
5. ํต์ฌ ์ ๋ณด๋ง ์ ๋ฌํ ๊ฒ
|
44 |
+
"""
|
45 |
+
|
46 |
if parquet_data:
|
47 |
try:
|
48 |
df = pd.read_json(io.StringIO(parquet_data))
|
49 |
data_summary = df.describe(include='all').to_string()
|
50 |
+
system_prefix += f"\n\n๋ฐ์ดํฐ ์์ฝ:\n{data_summary}"
|
51 |
except Exception as e:
|
52 |
print(f"๋ฐ์ดํฐ ๋ก๋ ์ค๋ฅ: {str(e)}")
|
53 |
|
54 |
+
# ์ต๊ทผ ๋ํ ์ปจํ
์คํธ๋ง ์ ์ง
|
55 |
+
recent_history = history[-3:] if history else []
|
56 |
|
57 |
prompt = system_prefix + "\n\n"
|
58 |
for chat in recent_history:
|
|
|
68 |
prompt=prompt,
|
69 |
max_new_tokens=max_tokens,
|
70 |
stream=True,
|
71 |
+
temperature=temperature, # ๋ฎ์ temperature๋ก ์ผ๊ด์ฑ ์ ์ง
|
72 |
top_p=top_p,
|
73 |
+
repetition_penalty=1.2, # ๋ฐ๋ณต ํ๋ํฐ๋ง ์ ์ฉ
|
|
|
74 |
)
|
75 |
|
76 |
for msg in stream:
|
77 |
if msg:
|
78 |
response += msg
|
79 |
+
# ์๋ต ์ ์
|
80 |
+
cleaned_response = clean_response(response)
|
81 |
+
yield cleaned_response
|
82 |
except Exception as e:
|
83 |
error_message = f"์ถ๋ก ์ค๋ฅ: {str(e)}"
|
84 |
print(error_message)
|
85 |
yield error_message
|
86 |
|
87 |
+
def clean_response(text: str) -> str:
|
88 |
+
"""์๋ต ํ
์คํธ ์ ์ ํจ์"""
|
89 |
+
# ๋ฌธ์ฅ ๋จ์๋ก ๋ถ๋ฆฌ
|
90 |
+
sentences = [s.strip() for s in text.split('.') if s.strip()]
|
91 |
+
|
92 |
+
# ์ค๋ณต ์ ๊ฑฐ
|
93 |
+
unique_sentences = []
|
94 |
+
seen = set()
|
95 |
+
|
96 |
+
for sentence in sentences:
|
97 |
+
# ๋ฌธ์ฅ ์ ๊ทํ (๊ณต๋ฐฑ ์ ๊ฑฐ, ์๋ฌธ์ ๋ณํ)
|
98 |
+
normalized = ' '.join(sentence.lower().split())
|
99 |
+
if normalized not in seen:
|
100 |
+
seen.add(normalized)
|
101 |
+
unique_sentences.append(sentence)
|
102 |
+
|
103 |
+
# ์ ์ ๋ ๋ฌธ์ฅ ๊ฒฐํฉ
|
104 |
+
cleaned_text = '. '.join(unique_sentences)
|
105 |
+
if cleaned_text and not cleaned_text.endswith('.'):
|
106 |
+
cleaned_text += '.'
|
107 |
+
|
108 |
+
return cleaned_text
|
109 |
+
|
110 |
def remove_duplicates(text: str) -> str:
|
111 |
"""์ค๋ณต ๋ฌธ์ฅ ์ ๊ฑฐ ํจ์"""
|
112 |
sentences = text.split('.')
|
|
|
354 |
def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
|
355 |
history = history or []
|
356 |
|
357 |
+
# ์ค๋ณต ์ง๋ฌธ ๊ฒ์ฌ
|
358 |
+
recent_questions = [chat['content'].strip().lower() for chat in history[-3:] if chat['role'] == 'user']
|
359 |
+
if message.strip().lower() in recent_questions:
|
360 |
yield history + [{"role": "assistant", "content": "๋์ผํ ์ง๋ฌธ์ด ์ต๊ทผ์ ์์์ต๋๋ค. ๋ค๋ฅธ ์ง๋ฌธ์ ํด์ฃผ์ธ์."}], ""
|
361 |
return
|
362 |
|
363 |
try:
|
364 |
history.append({"role": "user", "content": message})
|
365 |
+
response_gen = respond(
|
366 |
+
message,
|
367 |
+
history,
|
368 |
+
system_message,
|
369 |
+
max_tokens,
|
370 |
+
temperature=0.3, # ๋ฎ์ temperature ์ฌ์ฉ
|
371 |
+
top_p=top_p,
|
372 |
+
parquet_data=parquet_data
|
373 |
+
)
|
374 |
|
375 |
partial_response = ""
|
376 |
for partial in response_gen:
|
377 |
partial_response = partial
|
378 |
+
display_history = history + [{"role": "assistant", "content": partial_response}]
|
|
|
379 |
yield display_history, ""
|
380 |
|
381 |
+
history.append({"role": "assistant", "content": partial_response})
|
382 |
except Exception as e:
|
383 |
response = f"์ค๋ฅ ๋ฐ์: {str(e)}"
|
384 |
history.append({"role": "assistant", "content": response})
|
385 |
yield history, ""
|
386 |
|
387 |
+
|
388 |
|
389 |
|
390 |
send_data_upload.click(
|