seawolf2357 commited on
Commit
1a116fa
ยท
verified ยท
1 Parent(s): 97ea916

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -23
app.py CHANGED
@@ -8,6 +8,7 @@ from datasets import load_dataset
8
  import pandas as pd
9
  from fuzzywuzzy import process
10
 
 
11
  # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
12
  print("Current Working Directory:", os.getcwd())
13
 
@@ -28,16 +29,28 @@ else:
28
  def load_optimized_dataset(data_files):
29
  data_frames = [pd.read_csv(file) for file in data_files]
30
  full_data = pd.concat(data_frames, ignore_index=True)
 
 
 
 
 
31
  # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
32
  name_to_number = full_data.groupby('์‚ฌ๊ฑด๋ช…')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
 
33
  number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
34
- return name_to_number, number_to_fulltext
 
35
 
36
- name_to_number, number_to_fulltext = load_optimized_dataset(data_files)
37
  print("Dataset loaded successfully.")
38
 
39
- # ์‚ฌ๊ฑด๋ช… ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ
40
  all_case_names = list(name_to_number.keys())
 
 
 
 
 
41
 
42
  # ๋กœ๊น… ์„ค์ •
43
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
@@ -79,9 +92,10 @@ class MyClient(discord.Client):
79
 
80
  self.is_processing = True
81
  try:
82
- response = await generate_response(message)
83
- if response and response.strip():
84
- await message.channel.send(response)
 
85
  else:
86
  await message.channel.send("์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
87
  finally:
@@ -98,16 +112,24 @@ async def generate_response(message):
98
  user_input = message.content.strip()
99
  user_mention = message.author.mention
100
 
101
- # ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช… ์ฐพ๊ธฐ
102
- matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=80)
103
-
 
 
 
 
 
104
  if matched_case_names:
105
- case_numbers = []
106
  for case_name, score in matched_case_names:
107
- case_numbers.extend(name_to_number[case_name])
108
- case_numbers = list(set(case_numbers)) # ์ค‘๋ณต ์ œ๊ฑฐ
109
- case_numbers_str = "\n".join(case_numbers)
110
- system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช…์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
 
 
 
 
111
  elif user_input in number_to_fulltext:
112
  full_text = number_to_fulltext[user_input]
113
  system_message = f"{user_mention}, ์‚ฌ๊ฑด๋ฒˆํ˜ธ '{user_input}'์˜ ์ „๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n\n{full_text}"
@@ -116,15 +138,14 @@ async def generate_response(message):
116
 
117
  # ๋ฉ”์‹œ์ง€ ๊ธธ์ด ์ œํ•œ ์ฒ˜๋ฆฌ
118
  max_length = 2000
119
- if len(system_message) > max_length:
120
- response_parts = []
121
- for i in range(0, len(system_message), max_length):
122
- part_response = system_message[i:i + max_length]
123
- await message.channel.send(part_response)
124
- response_parts.append(part_response)
125
- return response_parts[0] if response_parts else "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
126
- return system_message
127
 
128
  if __name__ == "__main__":
129
  discord_client = MyClient(intents=intents)
130
- discord_client.run(os.getenv('DISCORD_TOKEN'))
 
 
8
  import pandas as pd
9
  from fuzzywuzzy import process
10
 
11
+
12
  # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
13
  print("Current Working Directory:", os.getcwd())
14
 
 
29
  def load_optimized_dataset(data_files):
30
  data_frames = [pd.read_csv(file) for file in data_files]
31
  full_data = pd.concat(data_frames, ignore_index=True)
32
+
33
+ # NaN ๊ฐ’ ์ฒ˜๋ฆฌ
34
+ full_data['ํŒ์‹œ์‚ฌํ•ญ'] = full_data['ํŒ์‹œ์‚ฌํ•ญ'].fillna('')
35
+ full_data['์‚ฌ๊ฑด๋ช…'] = full_data['์‚ฌ๊ฑด๋ช…'].fillna('')
36
+
37
  # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
38
  name_to_number = full_data.groupby('์‚ฌ๊ฑด๋ช…')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
39
+ summary_to_number = full_data.groupby('ํŒ์‹œ์‚ฌํ•ญ')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
40
  number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
41
+
42
+ return name_to_number, summary_to_number, number_to_fulltext
43
 
44
+ name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
45
  print("Dataset loaded successfully.")
46
 
47
+ # ์‚ฌ๊ฑด๋ช… ๋ฐ ํŒ์‹œ์‚ฌํ•ญ ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ
48
  all_case_names = list(name_to_number.keys())
49
+ all_case_summaries = list(summary_to_number.keys())
50
+
51
+ # ๋””๋ฒ„๊น…์šฉ ๋กœ๊น…
52
+ logging.debug(f"Sample all_case_names: {all_case_names[:3]}")
53
+ logging.debug(f"Sample all_case_summaries: {all_case_summaries[:3]}")
54
 
55
  # ๋กœ๊น… ์„ค์ •
56
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
 
92
 
93
  self.is_processing = True
94
  try:
95
+ response_parts = await generate_response(message)
96
+ if response_parts:
97
+ for part in response_parts:
98
+ await message.channel.send(part)
99
  else:
100
  await message.channel.send("์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
101
  finally:
 
112
  user_input = message.content.strip()
113
  user_mention = message.author.mention
114
 
115
+ # ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช… ๋ฐ ํŒ์‹œ์‚ฌํ•ญ ๊ฐ๊ฐ ์ฐพ๊ธฐ
116
+ matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
117
+ matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
118
+
119
+ logging.debug(f"Matched case names: {matched_case_names}")
120
+ logging.debug(f"Matched case summaries: {matched_case_summaries}")
121
+
122
+ case_numbers_set = set()
123
  if matched_case_names:
 
124
  for case_name, score in matched_case_names:
125
+ case_numbers_set.update(name_to_number.get(case_name, []))
126
+ if matched_case_summaries:
127
+ for case_summary, score in matched_case_summaries:
128
+ case_numbers_set.update(summary_to_number.get(case_summary, []))
129
+
130
+ if case_numbers_set:
131
+ case_numbers_str = "\n".join(case_numbers_set)
132
+ system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
133
  elif user_input in number_to_fulltext:
134
  full_text = number_to_fulltext[user_input]
135
  system_message = f"{user_mention}, ์‚ฌ๊ฑด๋ฒˆํ˜ธ '{user_input}'์˜ ์ „๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n\n{full_text}"
 
138
 
139
  # ๋ฉ”์‹œ์ง€ ๊ธธ์ด ์ œํ•œ ์ฒ˜๋ฆฌ
140
  max_length = 2000
141
+ response_parts = []
142
+ for i in range(0, len(system_message), max_length):
143
+ part_response = system_message[i:i + max_length]
144
+ response_parts.append(part_response)
145
+
146
+ return response_parts
 
 
147
 
148
  if __name__ == "__main__":
149
  discord_client = MyClient(intents=intents)
150
+ discord_client.run(os.getenv('DISCORD_TOKEN'))
151
+