ginipick commited on
Commit
0c18059
โ€ข
1 Parent(s): 7b8d8d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -2
app.py CHANGED
@@ -156,14 +156,95 @@ def upload_parquet(file_path: str) -> Tuple[str, str, str]:
156
  try:
157
  # Parquet ํŒŒ์ผ ์ฝ๊ธฐ
158
  df = pd.read_parquet(file_path, engine='pyarrow')
 
 
 
 
 
 
 
 
 
 
 
 
159
  # Markdown์œผ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
160
- parquet_content = df.head(10).to_markdown(index=False)
161
- # DataFrame์„ JSON ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
 
 
 
 
 
 
 
162
  parquet_json = df.to_json(orient='records', force_ascii=False)
 
163
  return "Parquet ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_json
164
  except Exception as e:
165
  return f"Parquet ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", ""
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  def text_to_parquet(text: str) -> Tuple[str, str, str]:
168
  try:
169
  from io import StringIO
 
156
  try:
157
  # Parquet ํŒŒ์ผ ์ฝ๊ธฐ
158
  df = pd.read_parquet(file_path, engine='pyarrow')
159
+
160
+ # ๋ฐ์ดํ„ฐ ๊ฒ€์ฆ ๋ฐ ์ „์ฒ˜๋ฆฌ
161
+ if 'text' not in df.columns or 'label' not in df.columns:
162
+ return "ํ•„์ˆ˜ ์ปฌ๋Ÿผ(text, label)์ด ์—†์Šต๋‹ˆ๋‹ค.", "", ""
163
+
164
+ # ๋ฐ์ดํ„ฐ ์š”์•ฝ ์ •๋ณด ์ƒ์„ฑ
165
+ data_info = {
166
+ "์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜": len(df),
167
+ "๋ ˆ์ด๋ธ” ๋ถ„ํฌ": df['label'].value_counts().to_dict(),
168
+ "์ปฌ๋Ÿผ ๋ชฉ๋ก": list(df.columns)
169
+ }
170
+
171
  # Markdown์œผ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
172
+ parquet_content = f"""### ๋ฐ์ดํ„ฐ์…‹ ์ •๋ณด:
173
+ - ์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜: {data_info['์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜']}
174
+ - ๋ ˆ์ด๋ธ” ๋ถ„ํฌ: {data_info['๋ ˆ์ด๋ธ” ๋ถ„ํฌ']}
175
+ - ์ปฌ๋Ÿผ: {', '.join(data_info['์ปฌ๋Ÿผ ๋ชฉ๋ก'])}
176
+
177
+ ### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:
178
+ {df.head(10).to_markdown(index=False)}
179
+ """
180
+ # DataFrame์„ JSON ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜ (Q&A์—์„œ ์‚ฌ์šฉ)
181
  parquet_json = df.to_json(orient='records', force_ascii=False)
182
+
183
  return "Parquet ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_json
184
  except Exception as e:
185
  return f"Parquet ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", ""
186
 
187
+ def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
188
+ try:
189
+ if parquet_data:
190
+ # JSON ๋ฌธ์ž์—ด์„ DataFrame์œผ๋กœ ๋ณ€ํ™˜
191
+ df = pd.read_json(io.StringIO(parquet_data))
192
+
193
+ # ๋ฐ์ดํ„ฐ์…‹ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
194
+ data_context = f"""
195
+ ํ˜„์žฌ ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ์ •๋ณด:
196
+ - ์ด {len(df)} ๊ฐœ์˜ ๋ ˆ์ฝ”๋“œ
197
+ - ๋ ˆ์ด๋ธ” ์ข…๋ฅ˜: {', '.join(df['label'].unique())}
198
+ - ๋ฐ์ดํ„ฐ ํ•„๋“œ: {', '.join(df.columns)}
199
+
200
+ ๋‹ค์Œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜์„ธ์š”:
201
+ {df.head(50).to_string()} # ์ฒ˜์Œ 50๊ฐœ ๋ ˆ์ฝ”๋“œ๋งŒ ์ปจํ…์ŠคํŠธ๋กœ ์ œ๊ณต
202
+ """
203
+ system_prompt = f"""๋‹น์‹ ์€ ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹์„ ๋ถ„์„ํ•˜๊ณ  ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
204
+
205
+ ์ฃผ์š” ์ง€์นจ:
206
+ 1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ
207
+ 2. ๋ฐ์ดํ„ฐ์…‹์˜ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
208
+ 3. ๋ฐ์ดํ„ฐ์— ์—†๋Š” ๋‚ด์šฉ์€ ์ถ”์ธกํ•˜์ง€ ๋ง ๊ฒƒ
209
+ 4. ๋‹ต๋ณ€์€ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ํ•  ๊ฒƒ
210
+ 5. ๋ฐ์ดํ„ฐ ํ”„๋ผ์ด๋ฒ„์‹œ๋ฅผ ๊ณ ๋ คํ•˜์—ฌ ๋‹ต๋ณ€ํ•  ๊ฒƒ
211
+
212
+ ๋ฐ์ดํ„ฐ์…‹ ์ปจํ…์ŠคํŠธ:
213
+ {data_context}
214
+ """
215
+ else:
216
+ system_prompt = system_message or "๋„ˆ๋Š” AI ์กฐ์–ธ์ž ์—ญํ• ์ด๋‹ค."
217
+
218
+ # OpenAI API ํ˜ธ์ถœ
219
+ messages = [{"role": "system", "content": system_prompt}]
220
+
221
+ # ์ตœ๊ทผ ๋Œ€ํ™” ๊ธฐ๋ก ์ถ”๊ฐ€
222
+ recent_history = history[-3:] if history else []
223
+ for chat in recent_history:
224
+ messages.append({"role": chat["role"], "content": chat["content"]})
225
+
226
+ messages.append({"role": "user", "content": message})
227
+
228
+ response = client.chat.completions.create(
229
+ model="gpt-4-0125-preview",
230
+ messages=messages,
231
+ max_tokens=max_tokens,
232
+ temperature=temperature,
233
+ top_p=top_p,
234
+ stream=True
235
+ )
236
+
237
+ full_response = ""
238
+ for chunk in response:
239
+ if chunk.choices[0].delta.content:
240
+ full_response += chunk.choices[0].delta.content
241
+ yield clean_response(full_response)
242
+
243
+ except Exception as e:
244
+ error_message = f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
245
+ print(f"{error_message}\n{traceback.format_exc()}")
246
+ yield error_message
247
+
248
  def text_to_parquet(text: str) -> Tuple[str, str, str]:
249
  try:
250
  from io import StringIO