seawolf2357 commited on
Commit
a8450bf
Β·
verified Β·
1 Parent(s): be78b1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -30,13 +30,9 @@ def load_optimized_dataset(data_files):
30
  data_frames = [pd.read_csv(file) for file in data_files]
31
  full_data = pd.concat(data_frames, ignore_index=True)
32
 
33
- # 확인을 μœ„ν•œ 데이터 μƒ˜ν”Œ 좜λ ₯
34
- print(full_data[['사건λͺ…', 'μ‚¬κ±΄λ²ˆν˜Έ', 'νŒμ‹œμ‚¬ν•­']].head()) # 'νŒμ‹œμ‚¬ν•­' ν•„λ“œμ˜ 데이터 μƒ˜ν”Œ 좜λ ₯
35
- logging.debug(f"Columns in dataset: {full_data.columns}")
36
- logging.debug(f"Sample data from 'νŒμ‹œμ‚¬ν•­': {full_data['νŒμ‹œμ‚¬ν•­'].dropna().head()}")
37
-
38
  # NaN κ°’ 처리
39
  full_data['νŒμ‹œμ‚¬ν•­'] = full_data['νŒμ‹œμ‚¬ν•­'].fillna('')
 
40
 
41
  # 사건λͺ…을 ν‚€λ‘œ ν•˜κ³  μ‚¬κ±΄λ²ˆν˜Έμ™€ 전문을 μ €μž₯ν•˜λŠ” λ”•μ…”λ„ˆλ¦¬ 생성
42
  name_to_number = full_data.groupby('사건λͺ…')['μ‚¬κ±΄λ²ˆν˜Έ'].apply(list).to_dict()
@@ -116,27 +112,24 @@ async def generate_response(message):
116
  user_input = message.content.strip()
117
  user_mention = message.author.mention
118
 
119
- # μœ μ‚¬ν•œ 사건λͺ… 및 νŒμ‹œμ‚¬ν•­ μ°ΎκΈ°
120
  matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
121
  matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
122
 
123
  logging.debug(f"Matched case names: {matched_case_names}")
124
  logging.debug(f"Matched case summaries: {matched_case_summaries}")
125
 
 
126
  if matched_case_names:
127
- case_numbers = []
128
  for case_name, score in matched_case_names:
129
- case_numbers.extend(name_to_number.get(case_name, []))
130
- case_numbers = list(set(case_numbers))
131
- case_numbers_str = "\n".join(case_numbers)
132
- system_message = f"{user_mention}, '{user_input}'와 μœ μ‚¬ν•œ 사건λͺ…μ˜ μ‚¬κ±΄λ²ˆν˜ΈλŠ” λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:\n{case_numbers_str}"
133
- elif matched_case_summaries:
134
- case_numbers = []
135
  for case_summary, score in matched_case_summaries:
136
- case_numbers.extend(summary_to_number.get(case_summary, []))
137
- case_numbers = list(set(case_numbers))
138
- case_numbers_str = "\n".join(case_numbers)
139
- system_message = f"{user_mention}, '{user_input}'와 μœ μ‚¬ν•œ νŒμ‹œμ‚¬ν•­μ˜ μ‚¬κ±΄λ²ˆν˜ΈλŠ” λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:\n{case_numbers_str}"
 
140
  elif user_input in number_to_fulltext:
141
  full_text = number_to_fulltext[user_input]
142
  system_message = f"{user_mention}, μ‚¬κ±΄λ²ˆν˜Έ '{user_input}'의 전문은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:\n\n{full_text}"
 
30
  data_frames = [pd.read_csv(file) for file in data_files]
31
  full_data = pd.concat(data_frames, ignore_index=True)
32
 
 
 
 
 
 
33
  # NaN κ°’ 처리
34
  full_data['νŒμ‹œμ‚¬ν•­'] = full_data['νŒμ‹œμ‚¬ν•­'].fillna('')
35
+ full_data['사건λͺ…'] = full_data['사건λͺ…'].fillna('')
36
 
37
  # 사건λͺ…을 ν‚€λ‘œ ν•˜κ³  μ‚¬κ±΄λ²ˆν˜Έμ™€ 전문을 μ €μž₯ν•˜λŠ” λ”•μ…”λ„ˆλ¦¬ 생성
38
  name_to_number = full_data.groupby('사건λͺ…')['μ‚¬κ±΄λ²ˆν˜Έ'].apply(list).to_dict()
 
112
  user_input = message.content.strip()
113
  user_mention = message.author.mention
114
 
115
+ # μœ μ‚¬ν•œ 사건λͺ… 및 νŒμ‹œμ‚¬ν•­ 각각 μ°ΎκΈ°
116
  matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
117
  matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
118
 
119
  logging.debug(f"Matched case names: {matched_case_names}")
120
  logging.debug(f"Matched case summaries: {matched_case_summaries}")
121
 
122
+ case_numbers_set = set()
123
  if matched_case_names:
 
124
  for case_name, score in matched_case_names:
125
+ case_numbers_set.update(name_to_number.get(case_name, []))
126
+ if matched_case_summaries:
 
 
 
 
127
  for case_summary, score in matched_case_summaries:
128
+ case_numbers_set.update(summary_to_number.get(case_summary, []))
129
+
130
+ if case_numbers_set:
131
+ case_numbers_str = "\n".join(case_numbers_set)
132
+ system_message = f"{user_mention}, '{user_input}'와 μœ μ‚¬ν•œ μ‚¬κ±΄μ˜ μ‚¬κ±΄λ²ˆν˜ΈλŠ” λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:\n{case_numbers_str}"
133
  elif user_input in number_to_fulltext:
134
  full_text = number_to_fulltext[user_input]
135
  system_message = f"{user_mention}, μ‚¬κ±΄λ²ˆν˜Έ '{user_input}'의 전문은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€:\n\n{full_text}"