Update app.py
Browse files
app.py
CHANGED
@@ -31,32 +31,31 @@ def load_parquet(filename: str) -> str:
|
|
31 |
except Exception as e:
|
32 |
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
33 |
|
34 |
-
def respond(
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
if parquet_data:
|
45 |
-
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ
λ‘λλ λ°μ΄ν°λ₯Ό κΈ°λ°μΌλ‘ μ§λ¬Έμ λ΅λ³νλ μν μ νλ€. λ°μ΄ν°λ₯Ό λΆμνμ¬ μ¬μ©μμκ² λμμ΄ λλ μ 보λ₯Ό μ 곡νλΌ. λ°μ΄ν°λ₯Ό νμ©νμ¬ μμΈνκ³ μ νν λ΅λ³μ μ 곡νλ, λ―Όκ°ν μ 보λ κ°μΈ μ 보λ₯Ό λ
ΈμΆνμ§ λ§λΌ."""
|
46 |
try:
|
47 |
df = pd.read_json(io.StringIO(parquet_data))
|
48 |
-
# λ°μ΄ν°μ μμ½ μ 보 μμ±
|
49 |
data_summary = df.describe(include='all').to_string()
|
50 |
-
system_prefix += f"\n\nμ
λ‘λλ
|
51 |
except Exception as e:
|
52 |
-
print(f"λ°μ΄ν° λ‘λ
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
# λ©μμ§ μμ±
|
58 |
prompt = system_prefix + "\n\n"
|
59 |
-
for chat in
|
60 |
if chat['role'] == 'user':
|
61 |
prompt += f"μ¬μ©μ: {chat['content']}\n"
|
62 |
else:
|
@@ -64,7 +63,6 @@ def respond(
|
|
64 |
prompt += f"μ¬μ©μ: {message}\nAI:"
|
65 |
|
66 |
try:
|
67 |
-
# λͺ¨λΈμ λ©μμ§ μ μ‘ λ° μλ΅ λ°κΈ°
|
68 |
response = ""
|
69 |
stream = hf_client.text_generation(
|
70 |
prompt=prompt,
|
@@ -72,16 +70,35 @@ def respond(
|
|
72 |
stream=True,
|
73 |
temperature=temperature,
|
74 |
top_p=top_p,
|
|
|
|
|
75 |
)
|
|
|
76 |
for msg in stream:
|
77 |
if msg:
|
78 |
response += msg
|
|
|
|
|
79 |
yield response
|
80 |
except Exception as e:
|
81 |
-
error_message = f"μΆλ‘
|
82 |
print(error_message)
|
83 |
yield error_message
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def upload_csv(file_path: str) -> Tuple[str, str]:
|
86 |
try:
|
87 |
# CSV νμΌ μ½κΈ°
|
@@ -312,38 +329,33 @@ with gr.Blocks(css=css) as demo:
|
|
312 |
|
313 |
parquet_data_state = gr.State()
|
314 |
|
315 |
-
def handle_message_data_upload(
|
316 |
-
message: str,
|
317 |
-
history: List[Dict[str, str]],
|
318 |
-
system_message: str,
|
319 |
-
max_tokens: int,
|
320 |
-
temperature: float,
|
321 |
-
top_p: float,
|
322 |
-
parquet_data: str
|
323 |
-
):
|
324 |
history = history or []
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
try:
|
326 |
-
# μ¬μ©μμ λ©μμ§λ₯Ό νμ€ν 리μ μΆκ°
|
327 |
history.append({"role": "user", "content": message})
|
328 |
-
|
329 |
-
|
330 |
-
message, history, system_message, max_tokens, temperature, top_p, parquet_data
|
331 |
-
)
|
332 |
partial_response = ""
|
333 |
for partial in response_gen:
|
334 |
partial_response = partial
|
335 |
-
#
|
336 |
-
display_history = history + [
|
337 |
-
{"role": "assistant", "content": partial_response}
|
338 |
-
]
|
339 |
yield display_history, ""
|
340 |
-
|
341 |
-
history.append({"role": "assistant", "content": partial_response})
|
342 |
except Exception as e:
|
343 |
-
response = f"
|
344 |
history.append({"role": "assistant", "content": response})
|
345 |
yield history, ""
|
346 |
|
|
|
|
|
347 |
send_data_upload.click(
|
348 |
handle_message_data_upload,
|
349 |
inputs=[
|
|
|
31 |
except Exception as e:
|
32 |
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
33 |
|
34 |
+
def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
|
35 |
+
# μμ€ν
ν둬ννΈμ μ€λ³΅ λ°©μ§ μ§μ μΆκ°
|
36 |
+
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ
λ‘λλ λ°μ΄ν°λ₯Ό κΈ°λ°μΌλ‘ μ§λ¬Έμ λ΅λ³νλ μν μ νλ€.
|
37 |
+
|
38 |
+
μ€μ κ·μΉ:
|
39 |
+
1. μ΄μ λνμμ μ΄λ―Έ λ΅λ³ν λ΄μ©μ λ°λ³΅νμ§ λ§ κ²
|
40 |
+
2. μ§λ¬Έκ³Ό μ§μ κ΄λ ¨λ λ΄μ©λ§ λ΅λ³ν κ²
|
41 |
+
3. λΆνμν μμλ λΆμ° μ€λͺ
μ μ΅μνν κ²
|
42 |
+
4. λ΅λ³μ λͺ
ννκ³ κ°κ²°νκ² ν κ²
|
43 |
+
5. λμΌν λ΄μ©μ λ€λ₯Έ ννμΌλ‘ λ°λ³΅νμ§ λ§ κ²
|
44 |
+
"""
|
45 |
+
|
46 |
if parquet_data:
|
|
|
47 |
try:
|
48 |
df = pd.read_json(io.StringIO(parquet_data))
|
|
|
49 |
data_summary = df.describe(include='all').to_string()
|
50 |
+
system_prefix += f"\n\nμ
λ‘λλ λ°μ΄ν° μμ½:\n{data_summary}"
|
51 |
except Exception as e:
|
52 |
+
print(f"λ°μ΄ν° λ‘λ μ€λ₯: {str(e)}")
|
53 |
+
|
54 |
+
# μ΄μ λν 컨ν
μ€νΈ μ΅μ ν
|
55 |
+
recent_history = history[-3:] if history else [] # μ΅κ·Ό 3κ° λνλ§ μ μ§
|
56 |
+
|
|
|
57 |
prompt = system_prefix + "\n\n"
|
58 |
+
for chat in recent_history:
|
59 |
if chat['role'] == 'user':
|
60 |
prompt += f"μ¬μ©μ: {chat['content']}\n"
|
61 |
else:
|
|
|
63 |
prompt += f"μ¬μ©μ: {message}\nAI:"
|
64 |
|
65 |
try:
|
|
|
66 |
response = ""
|
67 |
stream = hf_client.text_generation(
|
68 |
prompt=prompt,
|
|
|
70 |
stream=True,
|
71 |
temperature=temperature,
|
72 |
top_p=top_p,
|
73 |
+
repetition_penalty=1.2, # λ°λ³΅ νλν° μΆκ°
|
74 |
+
no_repeat_ngram_size=3, # n-gram λ°λ³΅ λ°©μ§
|
75 |
)
|
76 |
+
|
77 |
for msg in stream:
|
78 |
if msg:
|
79 |
response += msg
|
80 |
+
# μ€λ³΅ λ¬Έμ₯ μ κ±°
|
81 |
+
response = remove_duplicates(response)
|
82 |
yield response
|
83 |
except Exception as e:
|
84 |
+
error_message = f"μΆλ‘ μ€λ₯: {str(e)}"
|
85 |
print(error_message)
|
86 |
yield error_message
|
87 |
|
88 |
+
def remove_duplicates(text: str) -> str:
|
89 |
+
"""μ€λ³΅ λ¬Έμ₯ μ κ±° ν¨μ"""
|
90 |
+
sentences = text.split('.')
|
91 |
+
unique_sentences = []
|
92 |
+
seen = set()
|
93 |
+
|
94 |
+
for sentence in sentences:
|
95 |
+
sentence = sentence.strip()
|
96 |
+
if sentence and sentence not in seen:
|
97 |
+
seen.add(sentence)
|
98 |
+
unique_sentences.append(sentence)
|
99 |
+
|
100 |
+
return '. '.join(unique_sentences)
|
101 |
+
|
102 |
def upload_csv(file_path: str) -> Tuple[str, str]:
|
103 |
try:
|
104 |
# CSV νμΌ μ½κΈ°
|
|
|
329 |
|
330 |
parquet_data_state = gr.State()
|
331 |
|
332 |
+
def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
history = history or []
|
334 |
+
|
335 |
+
# μ€λ³΅ μ§λ¬Έ 체ν¬
|
336 |
+
if history and any(chat['role'] == 'user' and chat['content'].strip() == message.strip() for chat in history[-3:]):
|
337 |
+
yield history + [{"role": "assistant", "content": "λμΌν μ§λ¬Έμ΄ μ΅κ·Όμ μμμ΅λλ€. λ€λ₯Έ μ§λ¬Έμ ν΄μ£ΌμΈμ."}], ""
|
338 |
+
return
|
339 |
+
|
340 |
try:
|
|
|
341 |
history.append({"role": "user", "content": message})
|
342 |
+
response_gen = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_data)
|
343 |
+
|
|
|
|
|
344 |
partial_response = ""
|
345 |
for partial in response_gen:
|
346 |
partial_response = partial
|
347 |
+
# μ€λ³΅ μ κ±°λ μλ΅μΌλ‘ μ
λ°μ΄νΈ
|
348 |
+
display_history = history + [{"role": "assistant", "content": remove_duplicates(partial_response)}]
|
|
|
|
|
349 |
yield display_history, ""
|
350 |
+
|
351 |
+
history.append({"role": "assistant", "content": remove_duplicates(partial_response)})
|
352 |
except Exception as e:
|
353 |
+
response = f"μ€λ₯ λ°μ: {str(e)}"
|
354 |
history.append({"role": "assistant", "content": response})
|
355 |
yield history, ""
|
356 |
|
357 |
+
|
358 |
+
|
359 |
send_data_upload.click(
|
360 |
handle_message_data_upload,
|
361 |
inputs=[
|