Spaces:
Sleeping
Sleeping
seawolf2357
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ from datasets import load_dataset
|
|
8 |
import pandas as pd
|
9 |
from fuzzywuzzy import process
|
10 |
|
|
|
11 |
# ํ์ฌ ์์
๋๋ ํ ๋ฆฌ ์ถ๋ ฅ
|
12 |
print("Current Working Directory:", os.getcwd())
|
13 |
|
@@ -28,16 +29,28 @@ else:
|
|
28 |
def load_optimized_dataset(data_files):
|
29 |
data_frames = [pd.read_csv(file) for file in data_files]
|
30 |
full_data = pd.concat(data_frames, ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
31 |
# ์ฌ๊ฑด๋ช
์ ํค๋ก ํ๊ณ ์ฌ๊ฑด๋ฒํธ์ ์ ๋ฌธ์ ์ ์ฅํ๋ ๋์
๋๋ฆฌ ์์ฑ
|
32 |
name_to_number = full_data.groupby('์ฌ๊ฑด๋ช
')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
|
|
33 |
number_to_fulltext = full_data.set_index('์ฌ๊ฑด๋ฒํธ')['์ ๋ฌธ'].to_dict()
|
34 |
-
|
|
|
35 |
|
36 |
-
name_to_number, number_to_fulltext = load_optimized_dataset(data_files)
|
37 |
print("Dataset loaded successfully.")
|
38 |
|
39 |
-
# ์ฌ๊ฑด๋ช
๋ฆฌ์คํธ ์์ฑ
|
40 |
all_case_names = list(name_to_number.keys())
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
# ๋ก๊น
์ค์
|
43 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
|
@@ -79,9 +92,10 @@ class MyClient(discord.Client):
|
|
79 |
|
80 |
self.is_processing = True
|
81 |
try:
|
82 |
-
|
83 |
-
if
|
84 |
-
|
|
|
85 |
else:
|
86 |
await message.channel.send("์ฃ์กํฉ๋๋ค, ์ ๊ณตํ ์ ์๋ ์ ๋ณด๊ฐ ์์ต๋๋ค.")
|
87 |
finally:
|
@@ -98,16 +112,24 @@ async def generate_response(message):
|
|
98 |
user_input = message.content.strip()
|
99 |
user_mention = message.author.mention
|
100 |
|
101 |
-
# ์ ์ฌํ ์ฌ๊ฑด๋ช
์ฐพ๊ธฐ
|
102 |
-
matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
104 |
if matched_case_names:
|
105 |
-
case_numbers = []
|
106 |
for case_name, score in matched_case_names:
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
111 |
elif user_input in number_to_fulltext:
|
112 |
full_text = number_to_fulltext[user_input]
|
113 |
system_message = f"{user_mention}, ์ฌ๊ฑด๋ฒํธ '{user_input}'์ ์ ๋ฌธ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n\n{full_text}"
|
@@ -116,15 +138,14 @@ async def generate_response(message):
|
|
116 |
|
117 |
# ๋ฉ์์ง ๊ธธ์ด ์ ํ ์ฒ๋ฆฌ
|
118 |
max_length = 2000
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
return response_parts[0] if response_parts else "์ฃ์กํฉ๋๋ค, ์ ๊ณตํ ์ ์๋ ์ ๋ณด๊ฐ ์์ต๋๋ค."
|
126 |
-
return system_message
|
127 |
|
128 |
if __name__ == "__main__":
|
129 |
discord_client = MyClient(intents=intents)
|
130 |
-
discord_client.run(os.getenv('DISCORD_TOKEN'))
|
|
|
|
8 |
import pandas as pd
|
9 |
from fuzzywuzzy import process
|
10 |
|
11 |
+
|
12 |
# ํ์ฌ ์์
๋๋ ํ ๋ฆฌ ์ถ๋ ฅ
|
13 |
print("Current Working Directory:", os.getcwd())
|
14 |
|
|
|
29 |
def load_optimized_dataset(data_files):
|
30 |
data_frames = [pd.read_csv(file) for file in data_files]
|
31 |
full_data = pd.concat(data_frames, ignore_index=True)
|
32 |
+
|
33 |
+
# NaN ๊ฐ ์ฒ๋ฆฌ
|
34 |
+
full_data['ํ์์ฌํญ'] = full_data['ํ์์ฌํญ'].fillna('')
|
35 |
+
full_data['์ฌ๊ฑด๋ช
'] = full_data['์ฌ๊ฑด๋ช
'].fillna('')
|
36 |
+
|
37 |
# ์ฌ๊ฑด๋ช
์ ํค๋ก ํ๊ณ ์ฌ๊ฑด๋ฒํธ์ ์ ๋ฌธ์ ์ ์ฅํ๋ ๋์
๋๋ฆฌ ์์ฑ
|
38 |
name_to_number = full_data.groupby('์ฌ๊ฑด๋ช
')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
39 |
+
summary_to_number = full_data.groupby('ํ์์ฌํญ')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
40 |
number_to_fulltext = full_data.set_index('์ฌ๊ฑด๋ฒํธ')['์ ๋ฌธ'].to_dict()
|
41 |
+
|
42 |
+
return name_to_number, summary_to_number, number_to_fulltext
|
43 |
|
44 |
+
name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
|
45 |
print("Dataset loaded successfully.")
|
46 |
|
47 |
+
# ์ฌ๊ฑด๋ช
๋ฐ ํ์์ฌํญ ๋ฆฌ์คํธ ์์ฑ
|
48 |
all_case_names = list(name_to_number.keys())
|
49 |
+
all_case_summaries = list(summary_to_number.keys())
|
50 |
+
|
51 |
+
# ๋๋ฒ๊น
์ฉ ๋ก๊น
|
52 |
+
logging.debug(f"Sample all_case_names: {all_case_names[:3]}")
|
53 |
+
logging.debug(f"Sample all_case_summaries: {all_case_summaries[:3]}")
|
54 |
|
55 |
# ๋ก๊น
์ค์
|
56 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
|
|
|
92 |
|
93 |
self.is_processing = True
|
94 |
try:
|
95 |
+
response_parts = await generate_response(message)
|
96 |
+
if response_parts:
|
97 |
+
for part in response_parts:
|
98 |
+
await message.channel.send(part)
|
99 |
else:
|
100 |
await message.channel.send("์ฃ์กํฉ๋๋ค, ์ ๊ณตํ ์ ์๋ ์ ๋ณด๊ฐ ์์ต๋๋ค.")
|
101 |
finally:
|
|
|
112 |
user_input = message.content.strip()
|
113 |
user_mention = message.author.mention
|
114 |
|
115 |
+
# ์ ์ฌํ ์ฌ๊ฑด๋ช
๋ฐ ํ์์ฌํญ ๊ฐ๊ฐ ์ฐพ๊ธฐ
|
116 |
+
matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
|
117 |
+
matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
|
118 |
+
|
119 |
+
logging.debug(f"Matched case names: {matched_case_names}")
|
120 |
+
logging.debug(f"Matched case summaries: {matched_case_summaries}")
|
121 |
+
|
122 |
+
case_numbers_set = set()
|
123 |
if matched_case_names:
|
|
|
124 |
for case_name, score in matched_case_names:
|
125 |
+
case_numbers_set.update(name_to_number.get(case_name, []))
|
126 |
+
if matched_case_summaries:
|
127 |
+
for case_summary, score in matched_case_summaries:
|
128 |
+
case_numbers_set.update(summary_to_number.get(case_summary, []))
|
129 |
+
|
130 |
+
if case_numbers_set:
|
131 |
+
case_numbers_str = "\n".join(case_numbers_set)
|
132 |
+
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ์ฌ๊ฑด์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
133 |
elif user_input in number_to_fulltext:
|
134 |
full_text = number_to_fulltext[user_input]
|
135 |
system_message = f"{user_mention}, ์ฌ๊ฑด๋ฒํธ '{user_input}'์ ์ ๋ฌธ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n\n{full_text}"
|
|
|
138 |
|
139 |
# ๋ฉ์์ง ๊ธธ์ด ์ ํ ์ฒ๋ฆฌ
|
140 |
max_length = 2000
|
141 |
+
response_parts = []
|
142 |
+
for i in range(0, len(system_message), max_length):
|
143 |
+
part_response = system_message[i:i + max_length]
|
144 |
+
response_parts.append(part_response)
|
145 |
+
|
146 |
+
return response_parts
|
|
|
|
|
147 |
|
148 |
if __name__ == "__main__":
|
149 |
discord_client = MyClient(intents=intents)
|
150 |
+
discord_client.run(os.getenv('DISCORD_TOKEN'))
|
151 |
+
|