ginipick commited on
Commit
b2e08df
ยท
verified ยท
1 Parent(s): d2a7d2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -284
app.py CHANGED
@@ -7,9 +7,49 @@ import json
7
  import io
8
  import traceback
9
  import csv
10
- # HuggingFace ํด๋ผ์ด์–ธํŠธ ๋Œ€์‹  OpenAI ํด๋ผ์ด์–ธํŠธ ์‚ฌ์šฉ
11
  from openai import OpenAI
12
- import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
15
  hf_client = InferenceClient(
@@ -34,88 +74,24 @@ def load_parquet(filename: str) -> str:
34
  except Exception as e:
35
  return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
36
 
37
-
38
- # OpenAI ํด๋ผ์ด์–ธํŠธ ์„ค์ •
39
- client = OpenAI(api_key=os.getenv("OPEN_AI"))
40
-
41
- # respond ํ•จ์ˆ˜ ์ˆ˜์ •
42
- def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
43
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •
44
- system_prefix = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ. ๋„ˆ๋Š” ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” ์—ญํ• ์„ ํ•œ๋‹ค.
45
-
46
- ์ฃผ์š” ์ง€์นจ:
47
- 1. ์งˆ๋ฌธ๊ณผ ์ง์ ‘ ๊ด€๋ จ๋œ ๋‚ด์šฉ๋งŒ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
48
- 2. ์ด์ „ ๋‹ต๋ณ€๊ณผ ์ค‘๋ณต๋˜๋Š” ๋‚ด์šฉ์€ ์ œ์™ธํ•  ๊ฒƒ
49
- 3. ๋ถˆํ•„์š”ํ•œ ์˜ˆ์‹œ๋‚˜ ๋ถ€์—ฐ ์„ค๋ช…์€ ํ•˜์ง€ ๋ง ๊ฒƒ
50
- 4. ๋™์ผํ•œ ๋‚ด์šฉ์„ ๋‹ค๋ฅธ ํ‘œํ˜„์œผ๋กœ ๋ฐ˜๋ณตํ•˜์ง€ ๋ง ๊ฒƒ
51
- 5. ํ•ต์‹ฌ ์ •๋ณด๋งŒ ์ „๋‹ฌํ•  ๊ฒƒ
52
- """
53
-
54
- if parquet_data:
55
- try:
56
- df = pd.read_json(io.StringIO(parquet_data))
57
- data_summary = df.describe(include='all').to_string()
58
- system_prefix += f"\n\n๋ฐ์ดํ„ฐ ์š”์•ฝ:\n{data_summary}"
59
- except Exception as e:
60
- print(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
61
-
62
- # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๊ตฌ์„ฑ
63
- messages = [{"role": "system", "content": system_prefix}]
64
-
65
- # ์ตœ๊ทผ ๋Œ€ํ™” ์ปจํ…์ŠคํŠธ๋งŒ ์œ ์ง€
66
- recent_history = history[-3:] if history else []
67
- for chat in recent_history:
68
- messages.append({"role": chat["role"], "content": chat["content"]})
69
-
70
- messages.append({"role": "user", "content": message})
71
-
72
- try:
73
- # OpenAI API ํ˜ธ์ถœ
74
- response = client.chat.completions.create(
75
- model="gpt-4o-mini", # GPT-4-mini ๋ชจ๋ธ ์‚ฌ์šฉ
76
- messages=messages,
77
- max_tokens=max_tokens,
78
- temperature=temperature,
79
- top_p=top_p,
80
- stream=True
81
- )
82
-
83
- full_response = ""
84
- for chunk in response:
85
- if chunk.choices[0].delta.content:
86
- full_response += chunk.choices[0].delta.content
87
- # ์‘๋‹ต ์ •์ œ
88
- cleaned_response = clean_response(full_response)
89
- yield cleaned_response
90
-
91
- except Exception as e:
92
- error_message = f"์ถ”๋ก  ์˜ค๋ฅ˜: {str(e)}"
93
- print(error_message)
94
- yield error_message
95
-
96
  def clean_response(text: str) -> str:
97
  """์‘๋‹ต ํ…์ŠคํŠธ ์ •์ œ ํ•จ์ˆ˜"""
98
- # ๋ฌธ์žฅ ๋‹จ์œ„๋กœ ๋ถ„๋ฆฌ
99
  sentences = [s.strip() for s in text.split('.') if s.strip()]
100
-
101
- # ์ค‘๋ณต ์ œ๊ฑฐ
102
  unique_sentences = []
103
  seen = set()
104
 
105
  for sentence in sentences:
106
- # ๋ฌธ์žฅ ์ •๊ทœํ™” (๊ณต๋ฐฑ ์ œ๊ฑฐ, ์†Œ๋ฌธ์ž ๋ณ€ํ™˜)
107
  normalized = ' '.join(sentence.lower().split())
108
  if normalized not in seen:
109
  seen.add(normalized)
110
  unique_sentences.append(sentence)
111
 
112
- # ์ •์ œ๋œ ๋ฌธ์žฅ ๊ฒฐํ•ฉ
113
  cleaned_text = '. '.join(unique_sentences)
114
  if cleaned_text and not cleaned_text.endswith('.'):
115
  cleaned_text += '.'
116
 
117
  return cleaned_text
118
-
119
  def remove_duplicates(text: str) -> str:
120
  """์ค‘๋ณต ๋ฌธ์žฅ ์ œ๊ฑฐ ํ•จ์ˆ˜"""
121
  sentences = text.split('.')
@@ -132,20 +108,17 @@ def remove_duplicates(text: str) -> str:
132
 
133
  def upload_csv(file_path: str) -> Tuple[str, str]:
134
  try:
135
- # CSV ํŒŒ์ผ ์ฝ๊ธฐ
136
  df = pd.read_csv(file_path, sep=',')
137
- # ํ•„์ˆ˜ ์ปฌ๋Ÿผ ํ™•์ธ
138
  required_columns = {'id', 'text', 'label', 'metadata'}
139
  available_columns = set(df.columns)
140
  missing_columns = required_columns - available_columns
141
  if missing_columns:
142
  return f"CSV ํŒŒ์ผ์— ๋‹ค์Œ ํ•„์ˆ˜ ์ปฌ๋Ÿผ์ด ๋ˆ„๋ฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {', '.join(missing_columns)}", ""
143
- # ๋ฐ์ดํ„ฐ ํด๋ Œ์ง•
144
  df.drop_duplicates(inplace=True)
145
  df.fillna('', inplace=True)
146
- # ๋ฐ์ดํ„ฐ ์œ ํ˜• ์ตœ์ ํ™”
147
  df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
148
- # Parquet ํŒŒ์ผ๋กœ ๋ณ€ํ™˜
149
  parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
150
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
151
  return f"{parquet_filename} ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜๊ณ  ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_filename
@@ -154,10 +127,8 @@ def upload_csv(file_path: str) -> Tuple[str, str]:
154
 
155
  def upload_parquet(file_path: str) -> Tuple[str, str, str]:
156
  try:
157
- # Parquet ํŒŒ์ผ ์ฝ๊ธฐ
158
  df = pd.read_parquet(file_path, engine='pyarrow')
159
 
160
- # ๋ฐ์ดํ„ฐ ๊ธฐ๋ณธ ์ •๋ณด ์ˆ˜์ง‘
161
  data_info = {
162
  "์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜": len(df),
163
  "์ปฌ๋Ÿผ ๋ชฉ๋ก": list(df.columns),
@@ -165,143 +136,53 @@ def upload_parquet(file_path: str) -> Tuple[str, str, str]:
165
  "๊ฒฐ์ธก์น˜ ์ •๋ณด": df.isnull().sum().to_dict()
166
  }
167
 
168
- # ๋ฐ์ดํ„ฐ ์š”์•ฝ ์ •๋ณด ์ƒ์„ฑ
169
  summary = []
170
  summary.append(f"### ๋ฐ์ดํ„ฐ์…‹ ๊ธฐ๋ณธ ์ •๋ณด:")
171
  summary.append(f"- ์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜: {data_info['์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜']}")
172
  summary.append(f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(data_info['์ปฌ๋Ÿผ ๋ชฉ๋ก'])}")
173
 
174
- # ๊ฐ ์ปฌ๋Ÿผ๋ณ„ ํ†ต๊ณ„ ์ •๋ณด ์ƒ์„ฑ
175
  summary.append("\n### ์ปฌ๋Ÿผ๋ณ„ ์ •๋ณด:")
176
  for col in df.columns:
177
  if df[col].dtype in ['int64', 'float64']:
178
- # ์ˆ˜์น˜ํ˜• ๋ฐ์ดํ„ฐ
179
  stats = df[col].describe()
180
  summary.append(f"\n{col} (์ˆ˜์น˜ํ˜•):")
181
  summary.append(f"- ํ‰๊ท : {stats['mean']:.2f}")
182
  summary.append(f"- ์ตœ์†Œ: {stats['min']}")
183
  summary.append(f"- ์ตœ๋Œ€: {stats['max']}")
184
  elif df[col].dtype == 'object' or df[col].dtype == 'string':
185
- # ๋ฌธ์ž์—ด ๋ฐ์ดํ„ฐ
186
  unique_count = df[col].nunique()
187
  summary.append(f"\n{col} (ํ…์ŠคํŠธ):")
188
  summary.append(f"- ๊ณ ์œ ๊ฐ’ ์ˆ˜: {unique_count}")
189
- if unique_count < 10: # ๊ณ ์œ ๊ฐ’์ด ์ ์€ ๊ฒฝ์šฐ๋งŒ ํ‘œ์‹œ
190
  value_counts = df[col].value_counts().head(5)
191
  summary.append("- ์ƒ์œ„ 5๊ฐœ ๊ฐ’:")
192
  for val, count in value_counts.items():
193
  summary.append(f" โ€ข {val}: {count}๊ฐœ")
194
 
195
- # ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ƒ์„ฑ
196
  preview = df.head(10).to_markdown(index=False)
197
  summary.append("\n### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:")
198
  summary.append(preview)
199
 
200
  parquet_content = "\n".join(summary)
201
-
202
- # DataFrame์„ JSON ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜ (Q&A์—์„œ ์‚ฌ์šฉ)
203
  parquet_json = df.to_json(orient='records', force_ascii=False)
204
 
205
  return "Parquet ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_json
206
  except Exception as e:
207
  return f"Parquet ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", ""
208
 
209
-
210
- def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
211
- try:
212
- if parquet_data:
213
- # JSON ๋ฌธ์ž์—ด์„ DataFrame์œผ๋กœ ๋ณ€ํ™˜
214
- df = pd.read_json(io.StringIO(parquet_data))
215
-
216
- # ๋ฐ์ดํ„ฐ์…‹ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
217
- columns_info = []
218
- for col in df.columns:
219
- if df[col].dtype in ['int64', 'float64']:
220
- col_type = "์ˆ˜์น˜ํ˜•"
221
- stats = df[col].describe()
222
- col_info = f"- {col} ({col_type}): ํ‰๊ท ={stats['mean']:.2f}, ์ตœ์†Œ={stats['min']}, ์ตœ๋Œ€={stats['max']}"
223
- else:
224
- col_type = "ํ…์ŠคํŠธ"
225
- unique_count = df[col].nunique()
226
- col_info = f"- {col} ({col_type}): ๊ณ ์œ ๊ฐ’ {unique_count}๊ฐœ"
227
- columns_info.append(col_info)
228
-
229
- data_context = f"""
230
- ํ˜„์žฌ ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹ ์ •๋ณด:
231
- - ์ด {len(df)} ๊ฐœ์˜ ๋ ˆ์ฝ”๋“œ
232
- - ์ปฌ๋Ÿผ ์ •๋ณด:
233
- {chr(10).join(columns_info)}
234
-
235
- ์ƒ˜ํ”Œ ๋ฐ์ดํ„ฐ:
236
- {df.head(20).to_string()}
237
- """
238
- system_prompt = f"""๋‹น์‹ ์€ ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ์…‹์„ ๋ถ„์„ํ•˜๊ณ  ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
239
-
240
- ์ฃผ์š” ์ง€์นจ:
241
- 1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ
242
- 2. ๋ฐ์ดํ„ฐ์…‹์˜ ์‹ค์ œ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
243
- 3. ๋ฐ์ดํ„ฐ์— ์—†๋Š” ๋‚ด์šฉ์€ ์ถ”์ธกํ•˜์ง€ ๋ง ๊ฒƒ
244
- 4. ๋‹ต๋ณ€์€ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ํ•  ๊ฒƒ
245
- 5. ๋ฐ์ดํ„ฐ ํ”„๋ผ์ด๋ฒ„์‹œ๋ฅผ ๊ณ ๋ คํ•˜์—ฌ ๋‹ต๋ณ€ํ•  ๊ฒƒ
246
-
247
- ๋ฐ์ดํ„ฐ์…‹ ๊ตฌ์กฐ ์„ค๋ช…:
248
- {chr(10).join(columns_info)}
249
-
250
- ์ฐธ๊ณ ํ•  ๋ฐ์ดํ„ฐ ์ƒ˜ํ”Œ:
251
- {data_context}
252
- """
253
- else:
254
- system_prompt = system_message or "๋„ˆ๋Š” AI ์กฐ์–ธ์ž ์—ญํ• ์ด๋‹ค."
255
-
256
- # OpenAI API ํ˜ธ์ถœ
257
- messages = [{"role": "system", "content": system_prompt}]
258
-
259
- # ์ตœ๊ทผ ๋Œ€ํ™” ๊ธฐ๋ก ์ถ”๊ฐ€
260
- recent_history = history[-3:] if history else []
261
- for chat in recent_history:
262
- messages.append({"role": chat["role"], "content": chat["content"]})
263
-
264
- messages.append({"role": "user", "content": message})
265
-
266
- response = client.chat.completions.create(
267
- model="gpt-4-0125-preview",
268
- messages=messages,
269
- max_tokens=max_tokens,
270
- temperature=temperature,
271
- top_p=top_p,
272
- stream=True
273
- )
274
-
275
- full_response = ""
276
- for chunk in response:
277
- if chunk.choices[0].delta.content:
278
- full_response += chunk.choices[0].delta.content
279
- yield clean_response(full_response)
280
-
281
- except Exception as e:
282
- error_message = f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
283
- print(f"{error_message}\n{traceback.format_exc()}")
284
- yield error_message
285
-
286
  def text_to_parquet(text: str) -> Tuple[str, str, str]:
287
  try:
288
- # ์ž…๋ ฅ ํ…์ŠคํŠธ๋ฅผ ์ค„ ๋‹จ์œ„๋กœ ๋ถ„๋ฆฌ
289
  lines = [line.strip() for line in text.split('\n') if line.strip()]
290
-
291
- # ๋ฐ์ดํ„ฐ๋ฅผ ์ €์žฅํ•  ๋ฆฌ์ŠคํŠธ
292
  data = []
293
 
294
  for line in lines:
295
  try:
296
- # ์ •๊ทœ์‹์„ ์‚ฌ์šฉํ•˜์—ฌ CSV ํ˜•์‹ ํŒŒ์‹ฑ
297
  import re
298
  pattern = r'(\d+),([^,]+),([^,]+),(.+)'
299
  match = re.match(pattern, line)
300
 
301
  if match:
302
  id_val, text_val, label_val, metadata_val = match.groups()
303
-
304
- # ์Œ๋”ฐ์˜ดํ‘œ ์ œ๊ฑฐ ๋ฐ ์ •์ œ
305
  text_val = text_val.strip().strip('"')
306
  label_val = label_val.strip().strip('"')
307
  metadata_val = metadata_val.strip().strip('"')
@@ -319,10 +200,7 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
319
  if not data:
320
  return "๋ณ€ํ™˜ํ•  ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", ""
321
 
322
- # DataFrame ์ƒ์„ฑ
323
  df = pd.DataFrame(data)
324
-
325
- # ๋ฐ์ดํ„ฐ ํƒ€์ž… ์„ค์ •
326
  df = df.astype({
327
  'id': 'int32',
328
  'text': 'string',
@@ -330,11 +208,8 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
330
  'metadata': 'string'
331
  })
332
 
333
- # Parquet ํŒŒ์ผ๋กœ ๋ณ€ํ™˜
334
  parquet_filename = 'text_to_parquet.parquet'
335
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
336
-
337
- # ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ƒ์„ฑ
338
  preview = df.to_markdown(index=False)
339
 
340
  return (
@@ -348,34 +223,46 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
348
  print(f"{error_message}\n{traceback.format_exc()}")
349
  return error_message, "", ""
350
 
351
- # preprocess_text_with_llm ํ•จ์ˆ˜๋„ ์ˆ˜์ •
352
- def preprocess_text_with_llm(input_text: str) -> str:
353
- if not input_text.strip():
354
- return "์ž…๋ ฅ ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค."
355
-
356
- system_prompt = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ๋‹ต๋ณ€ํ•˜์‹œ์˜ค. ๋‹น์‹ ์€ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž…๋ ฅ๋œ ํ…์ŠคํŠธ๋ฅผ CSV ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜์„ธ์š”.
357
 
358
- ๊ทœ์น™:
359
- 1. ์ถœ๋ ฅ ํ˜•์‹: id,text,label,metadata
360
- 2. id: 1๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋Š” ์ˆœ์ฐจ์  ๋ฒˆํ˜ธ
361
- 3. text: ์˜๋ฏธ ์žˆ๋Š” ๋‹จ์œ„๋กœ ๋ถ„๋ฆฌ๋œ ํ…์ŠคํŠธ
362
- 4. label: ํ…์ŠคํŠธ์˜ ์ฃผ์ œ๋‚˜ ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์•„๋ž˜ ๊ธฐ์ค€์œผ๋กœ ์ •ํ™•ํ•˜๊ฒŒ ํ•œ ๊ฐœ๋งŒ ์„ ํƒ
363
- - Historical_Figure (์—ญ์‚ฌ์  ์ธ๋ฌผ)
364
- - Military_History (๊ตฐ์‚ฌ ์—ญ์‚ฌ)
365
- - Technology (๊ธฐ์ˆ )
366
- - Politics (์ •์น˜)
367
- - Culture (๋ฌธํ™”)
368
- 5. metadata: ๋‚ ์งœ, ์ถœ์ฒ˜ ๋“ฑ ์ถ”๊ฐ€ ์ •๋ณด"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
  try:
371
  response = client.chat.completions.create(
372
  model="gpt-4-0125-preview",
373
- messages=[
374
- {"role": "system", "content": system_prompt},
375
- {"role": "user", "content": input_text}
376
- ],
377
- max_tokens=4000,
378
- temperature=0.1,
379
  stream=True
380
  )
381
 
@@ -383,26 +270,19 @@ def preprocess_text_with_llm(input_text: str) -> str:
383
  for chunk in response:
384
  if chunk.choices[0].delta.content:
385
  full_response += chunk.choices[0].delta.content
 
386
 
387
- # ์‘๋‹ต ์ •์ œ
388
- processed_text = clean_response(full_response)
389
-
390
- # CSV ํ˜•์‹ ๊ฒ€์ฆ
391
- try:
392
- from io import StringIO
393
- import csv
394
- csv.reader(StringIO(processed_text))
395
- return processed_text
396
- except csv.Error:
397
- return "LLM์ด ์˜ฌ๋ฐ”๋ฅธ CSV ํ˜•์‹์„ ์ƒ์„ฑํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
398
-
399
  except Exception as e:
400
- error_message = f"์ „์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
401
- print(error_message)
402
- return error_message# preprocess_text_with_llm ํ•จ์ˆ˜๋„ ์ˆ˜์ •
403
- def preprocess_text_with_llm(input_text: str) -> str:
404
- if not input_text.strip():
405
- return "์ž…๋ ฅ ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค."
 
 
 
 
406
 
407
  system_prompt = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ๋‹ต๋ณ€ํ•˜์‹œ์˜ค. ๋‹น์‹ ์€ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž…๋ ฅ๋œ ํ…์ŠคํŠธ๋ฅผ CSV ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜์„ธ์š”.
408
 
@@ -420,7 +300,7 @@ def preprocess_text_with_llm(input_text: str) -> str:
420
 
421
  try:
422
  response = client.chat.completions.create(
423
- model="gpt-4o-mini",
424
  messages=[
425
  {"role": "system", "content": system_prompt},
426
  {"role": "user", "content": input_text}
@@ -435,10 +315,8 @@ def preprocess_text_with_llm(input_text: str) -> str:
435
  if chunk.choices[0].delta.content:
436
  full_response += chunk.choices[0].delta.content
437
 
438
- # ์‘๋‹ต ์ •์ œ
439
  processed_text = clean_response(full_response)
440
 
441
- # CSV ํ˜•์‹ ๊ฒ€์ฆ
442
  try:
443
  from io import StringIO
444
  import csv
@@ -452,46 +330,50 @@ def preprocess_text_with_llm(input_text: str) -> str:
452
  print(error_message)
453
  return error_message
454
 
455
- # CSS ์„ค์ •
456
- css = """
457
- footer {
458
- visibility: hidden;
459
- }
460
- #chatbot-container, #chatbot-data-upload {
461
- height: 700px;
462
- overflow-y: scroll;
463
- }
464
- #chatbot-container .message, #chatbot-data-upload .message {
465
- font-size: 14px;
466
- }
467
- /* ์ž…๋ ฅ์ฐฝ ๋ฐฐ๊ฒฝ์ƒ‰ ๋ฐ ๊ธ€์ž์ƒ‰ ๋ณ€๊ฒฝ */
468
- textarea, input[type="text"] {
469
- background-color: #ffffff; /* ํฐ์ƒ‰ ๋ฐฐ๊ฒฝ */
470
- color: #000000; /* ๊ฒ€์ •์ƒ‰ ๊ธ€์ž */
471
- }
472
- /* ํŒŒ์ผ ์—…๋กœ๋“œ ์˜์—ญ ๋†’์ด ์กฐ์ ˆ */
473
- #parquet-upload-area {
474
- max-height: 150px;
475
- overflow-y: auto;
476
- }
477
- /* ์ดˆ๊ธฐ ์„ค๋ช… ๊ธ€์”จ ํฌ๊ธฐ ์กฐ์ ˆ */
478
- #initial-description {
479
- font-size: 14px;
480
- }
481
- """
482
 
483
  # Gradio Blocks ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
484
  with gr.Blocks(css=css) as demo:
 
 
485
  gr.Markdown("# MyEzRAG: LLM์ด ๋‚˜๋งŒ์˜ ๋ฐ์ดํ„ฐ๋กœ ํ•™์Šตํ•œ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ/๋‹ต๋ณ€", elem_id="initial-description")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  gr.Markdown(
487
  "### '์‚ฌ์šฉ ๋ฐฉ๋ฒ•' ํƒญ์„ ํ†ตํ•ด ์ž์„ธํ•œ ์ด์šฉ ๋ฐฉ๋ฒ•์„ ์ฐธ๊ณ ํ•˜์„ธ์š”.\n"
488
  "### Tip) '์˜ˆ์ œ'๋ฅผ ํ†ตํ•ด ๋‹ค์–‘ํ•œ ํ™œ์šฉ ๋ฐฉ๋ฒ•์„ ์ฒดํ—˜ํ•˜๊ณ  ์‘์šฉํ•ด ๋ณด์„ธ์š”, ๋ฐ์ดํ„ฐ์…‹ ์—…๋กœ๋“œ์‹œ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋Š” 10๊ฑด๋งŒ ์ถœ๋ ฅ",
489
  elem_id="initial-description"
490
  )
491
 
492
-
493
-
494
- # ์ฒซ ๋ฒˆ์งธ ํƒญ: ์ฑ—๋ด‡ ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ (ํƒญ ์ด๋ฆ„ ๋ณ€๊ฒฝ: "My ๋ฐ์ดํ„ฐ์…‹+LLM")
495
  with gr.Tab("My ๋ฐ์ดํ„ฐ์…‹+LLM"):
496
  gr.Markdown("### LLM๊ณผ ๋Œ€ํ™”ํ•˜๊ธฐ")
497
  chatbot_data_upload = gr.Chatbot(label="์ฑ—๋ด‡", type="messages", elem_id="chatbot-data-upload")
@@ -506,10 +388,14 @@ with gr.Blocks(css=css) as demo:
506
 
507
  parquet_data_state = gr.State()
508
 
509
- def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str):
 
 
 
 
 
 
510
  history = history or []
511
-
512
- # ์ค‘๋ณต ์งˆ๋ฌธ ๊ฒ€์‚ฌ
513
  recent_questions = [chat['content'].strip().lower() for chat in history[-3:] if chat['role'] == 'user']
514
  if message.strip().lower() in recent_questions:
515
  yield history + [{"role": "assistant", "content": "๋™์ผํ•œ ์งˆ๋ฌธ์ด ์ตœ๊ทผ์— ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ํ•ด์ฃผ์„ธ์š”."}], ""
@@ -522,9 +408,10 @@ with gr.Blocks(css=css) as demo:
522
  history,
523
  system_message,
524
  max_tokens,
525
- temperature=0.3, # ๋‚ฎ์€ temperature ์‚ฌ์šฉ
526
  top_p=top_p,
527
- parquet_data=parquet_data
 
528
  )
529
 
530
  partial_response = ""
@@ -539,9 +426,6 @@ with gr.Blocks(css=css) as demo:
539
  history.append({"role": "assistant", "content": response})
540
  yield history, ""
541
 
542
-
543
-
544
-
545
  send_data_upload.click(
546
  handle_message_data_upload,
547
  inputs=[
@@ -551,13 +435,14 @@ with gr.Blocks(css=css) as demo:
551
  max_tokens,
552
  temperature,
553
  top_p,
554
- parquet_data_state, # parquet_data_state๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ๋ฅผ ์ „๋‹ฌ
 
555
  ],
556
  outputs=[chatbot_data_upload, msg_data_upload],
557
  queue=True
558
  )
559
 
560
- # ์˜ˆ์ œ ์ถ”๊ฐ€
561
  with gr.Accordion("์˜ˆ์ œ", open=False):
562
  gr.Examples(
563
  examples=[
@@ -572,7 +457,7 @@ with gr.Blocks(css=css) as demo:
572
  label="์˜ˆ์ œ ์„ ํƒ",
573
  )
574
 
575
- # Parquet ํŒŒ์ผ ์—…๋กœ๋“œ๋ฅผ ํ™”๋ฉด ํ•˜๋‹จ์œผ๋กœ ์ด๋™
576
  gr.Markdown("### Parquet ํŒŒ์ผ ์—…๋กœ๋“œ")
577
  with gr.Row():
578
  with gr.Column():
@@ -596,7 +481,7 @@ with gr.Blocks(css=css) as demo:
596
  outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
597
  )
598
 
599
- # ๋‘ ๋ฒˆ์งธ ํƒญ: ๋ฐ์ดํ„ฐ ๋ณ€ํ™˜ (ํƒญ ์ด๋ฆ„ ๋ณ€๊ฒฝ: "CSV to My ๋ฐ์ดํ„ฐ์…‹")
600
  with gr.Tab("CSV to My ๋ฐ์ดํ„ฐ์…‹"):
601
  gr.Markdown("### CSV ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ Parquet ๋ณ€ํ™˜")
602
  with gr.Row():
@@ -621,7 +506,7 @@ with gr.Blocks(css=css) as demo:
621
  outputs=[upload_status, parquet_preview, download_button]
622
  )
623
 
624
- # ์„ธ ๋ฒˆ์งธ ํƒญ: ํ…์ŠคํŠธ to csv to parquet ๋ณ€ํ™˜ (ํƒญ ์ด๋ฆ„ ๋ณ€๊ฒฝ: "Text to My ๋ฐ์ดํ„ฐ์…‹")
625
  with gr.Tab("Text to My ๋ฐ์ดํ„ฐ์…‹"):
626
  gr.Markdown("### ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด CSV๋กœ ๋ณ€ํ™˜ ํ›„ Parquet์œผ๋กœ ์ž๋™ ์ „ํ™˜๋ฉ๋‹ˆ๋‹ค.")
627
  with gr.Row():
@@ -649,7 +534,7 @@ with gr.Blocks(css=css) as demo:
649
  outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
650
  )
651
 
652
- # ๋„ค๋ฒˆ์งธ ํƒญ์˜ UI ๋ถ€๋ถ„ ์ˆ˜์ •
653
  with gr.Tab("Text Preprocessing with LLM"):
654
  gr.Markdown("### ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด LLM์ด ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์— ๋งž๊ฒŒ ์ „์ฒ˜๋ฆฌํ•˜์—ฌ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
655
  with gr.Row():
@@ -676,33 +561,29 @@ with gr.Blocks(css=css) as demo:
676
  interactive=False
677
  )
678
 
679
- # Parquet ๋ณ€ํ™˜ ๋ฐ ๋‹ค์šด๋กœ๋“œ ์„น์…˜
680
  convert_to_parquet_button = gr.Button("Parquet์œผ๋กœ ๋ณ€ํ™˜")
681
  download_parquet = gr.File(label="๋ณ€ํ™˜๋œ Parquet ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ")
682
 
683
-
684
-
685
-
686
- def handle_text_preprocessing(input_text: str):
 
687
  if not input_text.strip():
688
- return "์ž…๋ ฅ ํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
 
689
 
690
  try:
691
- preprocess_status_msg = "์ „์ฒ˜๋ฆฌ๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค..."
692
- yield preprocess_status_msg, ""
693
-
694
- processed_text = preprocess_text_with_llm(input_text)
695
 
696
  if processed_text:
697
- preprocess_status_msg = "์ „์ฒ˜๋ฆฌ๊ฐ€ ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
698
- yield preprocess_status_msg, processed_text
699
  else:
700
- preprocess_status_msg = "์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
701
- yield preprocess_status_msg, ""
702
 
703
  except Exception as e:
704
- error_msg = f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
705
- yield error_msg, ""
706
 
707
  def clear_inputs():
708
  return "", "๋Œ€๊ธฐ ์ค‘...", ""
@@ -719,10 +600,9 @@ with gr.Blocks(css=css) as demo:
719
  except Exception as e:
720
  return f"Parquet ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", None
721
 
722
- # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ์—ฐ๊ฒฐ
723
  preprocess_button.click(
724
  handle_text_preprocessing,
725
- inputs=[raw_text_input],
726
  outputs=[preprocess_status, processed_text_output],
727
  queue=True
728
  )
@@ -738,7 +618,6 @@ with gr.Blocks(css=css) as demo:
738
  outputs=[preprocess_status, download_parquet]
739
  )
740
 
741
- # ์˜ˆ์ œ ํ…์ŠคํŠธ ์ถ”๊ฐ€
742
  with gr.Accordion("์˜ˆ์ œ ํ…์ŠคํŠธ", open=False):
743
  gr.Examples(
744
  examples=[
@@ -749,12 +628,17 @@ with gr.Blocks(css=css) as demo:
749
  label="์˜ˆ์ œ ์„ ํƒ"
750
  )
751
 
 
752
  with gr.Tab("๐Ÿ“š ์‚ฌ์šฉ ๋ฐฉ๋ฒ•"):
753
  gr.Markdown("""
754
  # MyEzRAG ์‚ฌ์šฉ ๊ฐ€์ด๋“œ
755
 
 
 
 
 
 
756
  ## 1๏ธโƒฃ My ๋ฐ์ดํ„ฐ์…‹+LLM ํƒญ
757
- ![Tab1](https://your-image-url.com/tab1.png)
758
  ### ๊ธฐ๋Šฅ
759
  - ์—…๋กœ๋“œ๋œ Parquet ๋ฐ์ดํ„ฐ์…‹์„ ๊ธฐ๋ฐ˜์œผ๋กœ LLM๊ณผ ๋Œ€ํ™”
760
  - ๋ฐ์ดํ„ฐ์…‹์˜ ๋‚ด์šฉ์„ ํ™œ์šฉํ•œ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ
@@ -771,7 +655,6 @@ with gr.Blocks(css=css) as demo:
771
  ---
772
 
773
  ## 2๏ธโƒฃ CSV to My ๋ฐ์ดํ„ฐ์…‹ ํƒญ
774
- ![Tab2](https://your-image-url.com/tab2.png)
775
  ### ๊ธฐ๋Šฅ
776
  - CSV ํŒŒ์ผ์„ Parquet ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
777
  - ๋ฐ์ดํ„ฐ ์ตœ์ ํ™” ๋ฐ ์ •์ œ
@@ -788,7 +671,6 @@ with gr.Blocks(css=css) as demo:
788
  ---
789
 
790
  ## 3๏ธโƒฃ Text to My ๋ฐ์ดํ„ฐ์…‹ ํƒญ
791
- ![Tab3](https://your-image-url.com/tab3.png)
792
  ### ๊ธฐ๋Šฅ
793
  - ํ…์ŠคํŠธ ํ˜•์‹์˜ ๋ฐ์ดํ„ฐ๋ฅผ Parquet์œผ๋กœ ๋ณ€ํ™˜
794
  - ์ˆ˜๋™ ๋ฐ์ดํ„ฐ ์ž…๋ ฅ ์ง€์›
@@ -811,7 +693,6 @@ with gr.Blocks(css=css) as demo:
811
  ---
812
 
813
  ## 4๏ธโƒฃ Text Preprocessing with LLM ํƒญ
814
- ![Tab4](https://your-image-url.com/tab4.png)
815
  ### ๊ธฐ๋Šฅ
816
  - LLM์„ ํ™œ์šฉํ•œ ์ž๋™ ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ
817
  - ๊ตฌ์กฐํ™”๋œ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ
@@ -828,26 +709,28 @@ with gr.Blocks(css=css) as demo:
828
  - ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”
829
 
830
  ## ๐Ÿ’ก ์ผ๋ฐ˜์ ์ธ ํŒ
 
831
  - ๊ฐ ํƒญ์˜ ์˜ˆ์ œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ๋ฒ• ๏ฟฝ๏ฟฝํžˆ๊ธฐ
832
  - ๋ฐ์ดํ„ฐ ํ’ˆ์งˆ์ด ์ข‹์„์ˆ˜๋ก ๋” ๋‚˜์€ ๊ฒฐ๊ณผ ์ œ๊ณต
833
  - ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ
834
  - ๋Œ€์šฉ๋Ÿ‰ ์ฒ˜๋ฆฌ ์‹œ ์ ์ ˆํ•œ ์ฒญํฌ ํฌ๊ธฐ๋กœ ๋ถ„ํ•  ์ฒ˜๋ฆฌ
835
 
836
  ## โš ๏ธ ์ฃผ์˜์‚ฌํ•ญ
 
837
  - ๋ฏผ๊ฐํ•œ ๊ฐœ์ธ์ •๋ณด ํฌํ•จํ•˜์ง€ ์•Š๊ธฐ
838
  - ๋ฐ์ดํ„ฐ ๋ฐฑ์—… ๊ถŒ์žฅ
839
  - ๋„คํŠธ์›Œํฌ ์ƒํƒœ ํ™•์ธ
840
  - ๋ธŒ๋ผ์šฐ์ € ์บ์‹œ ์ฃผ๊ธฐ์  ์ •๋ฆฌ
841
 
842
  ## ๐Ÿ” ๋ฌธ์ œ ํ•ด๊ฒฐ
 
843
  - ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ
844
  - ํŒŒ์ผ ์—…๋กœ๋“œ ์‹คํŒจ ์‹œ ํŒŒ์ผ ํฌ๊ธฐ ๋ฐ ํ˜•์‹ ํ™•์ธ
845
  - ๋ณ€ํ™˜ ์‹คํŒจ ์‹œ ๋ฐ์ดํ„ฐ ์ธ์ฝ”๋”ฉ ํ™•์ธ
846
  - ์‘๋‹ต์ด ๋Š๋ฆด ๊ฒฝ์šฐ ๋ฐ์ดํ„ฐ ํฌ๊ธฐ ์กฐ์ •
847
  """)
848
 
849
-
850
  gr.Markdown("### [email protected]", elem_id="initial-description")
851
 
852
  if __name__ == "__main__":
853
- demo.launch(share=True)
 
7
  import io
8
  import traceback
9
  import csv
 
10
  from openai import OpenAI
11
+ from functools import lru_cache
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ import math
14
+
15
+ # CSS ์„ค์ •
16
+ css = """
17
+ footer {
18
+ visibility: hidden;
19
+ }
20
+ #chatbot-container, #chatbot-data-upload {
21
+ height: 700px;
22
+ overflow-y: scroll;
23
+ }
24
+ #chatbot-container .message, #chatbot-data-upload .message {
25
+ font-size: 14px;
26
+ }
27
+ /* ์ž…๋ ฅ์ฐฝ ๋ฐฐ๊ฒฝ์ƒ‰ ๋ฐ ๊ธ€์ž์ƒ‰ ๋ณ€๊ฒฝ */
28
+ textarea, input[type="text"] {
29
+ background-color: #ffffff;
30
+ color: #000000;
31
+ }
32
+ /* ํŒŒ์ผ ์—…๋กœ๋“œ ์˜์—ญ ๋†’์ด ์กฐ์ ˆ */
33
+ #parquet-upload-area {
34
+ max-height: 150px;
35
+ overflow-y: auto;
36
+ }
37
+ /* ์ดˆ๊ธฐ ์„ค๋ช… ๊ธ€์”จ ํฌ๊ธฐ ์กฐ์ ˆ */
38
+ #initial-description {
39
+ font-size: 14px;
40
+ }
41
+ /* API Key ์ž…๋ ฅ ์„น์…˜ ์Šคํƒ€์ผ */
42
+ .api-key-section {
43
+ margin: 10px 0;
44
+ padding: 10px;
45
+ border: 1px solid #ddd;
46
+ border-radius: 5px;
47
+ }
48
+ .api-key-status {
49
+ margin-top: 5px;
50
+ font-weight: bold;
51
+ }
52
+ """
53
 
54
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
55
  hf_client = InferenceClient(
 
74
  except Exception as e:
75
  return f"ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def clean_response(text: str) -> str:
78
  """์‘๋‹ต ํ…์ŠคํŠธ ์ •์ œ ํ•จ์ˆ˜"""
 
79
  sentences = [s.strip() for s in text.split('.') if s.strip()]
 
 
80
  unique_sentences = []
81
  seen = set()
82
 
83
  for sentence in sentences:
 
84
  normalized = ' '.join(sentence.lower().split())
85
  if normalized not in seen:
86
  seen.add(normalized)
87
  unique_sentences.append(sentence)
88
 
 
89
  cleaned_text = '. '.join(unique_sentences)
90
  if cleaned_text and not cleaned_text.endswith('.'):
91
  cleaned_text += '.'
92
 
93
  return cleaned_text
94
+
95
  def remove_duplicates(text: str) -> str:
96
  """์ค‘๋ณต ๋ฌธ์žฅ ์ œ๊ฑฐ ํ•จ์ˆ˜"""
97
  sentences = text.split('.')
 
108
 
109
  def upload_csv(file_path: str) -> Tuple[str, str]:
110
  try:
 
111
  df = pd.read_csv(file_path, sep=',')
 
112
  required_columns = {'id', 'text', 'label', 'metadata'}
113
  available_columns = set(df.columns)
114
  missing_columns = required_columns - available_columns
115
  if missing_columns:
116
  return f"CSV ํŒŒ์ผ์— ๋‹ค์Œ ํ•„์ˆ˜ ์ปฌ๋Ÿผ์ด ๋ˆ„๋ฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค: {', '.join(missing_columns)}", ""
117
+
118
  df.drop_duplicates(inplace=True)
119
  df.fillna('', inplace=True)
 
120
  df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
121
+
122
  parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
123
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
124
  return f"{parquet_filename} ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜๊ณ  ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_filename
 
127
 
128
  def upload_parquet(file_path: str) -> Tuple[str, str, str]:
129
  try:
 
130
  df = pd.read_parquet(file_path, engine='pyarrow')
131
 
 
132
  data_info = {
133
  "์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜": len(df),
134
  "์ปฌ๋Ÿผ ๋ชฉ๋ก": list(df.columns),
 
136
  "๊ฒฐ์ธก์น˜ ์ •๋ณด": df.isnull().sum().to_dict()
137
  }
138
 
 
139
  summary = []
140
  summary.append(f"### ๋ฐ์ดํ„ฐ์…‹ ๊ธฐ๋ณธ ์ •๋ณด:")
141
  summary.append(f"- ์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜: {data_info['์ด ๋ ˆ์ฝ”๋“œ ์ˆ˜']}")
142
  summary.append(f"- ์ปฌ๋Ÿผ ๋ชฉ๋ก: {', '.join(data_info['์ปฌ๋Ÿผ ๋ชฉ๋ก'])}")
143
 
 
144
  summary.append("\n### ์ปฌ๋Ÿผ๋ณ„ ์ •๋ณด:")
145
  for col in df.columns:
146
  if df[col].dtype in ['int64', 'float64']:
 
147
  stats = df[col].describe()
148
  summary.append(f"\n{col} (์ˆ˜์น˜ํ˜•):")
149
  summary.append(f"- ํ‰๊ท : {stats['mean']:.2f}")
150
  summary.append(f"- ์ตœ์†Œ: {stats['min']}")
151
  summary.append(f"- ์ตœ๋Œ€: {stats['max']}")
152
  elif df[col].dtype == 'object' or df[col].dtype == 'string':
 
153
  unique_count = df[col].nunique()
154
  summary.append(f"\n{col} (ํ…์ŠคํŠธ):")
155
  summary.append(f"- ๊ณ ์œ ๊ฐ’ ์ˆ˜: {unique_count}")
156
+ if unique_count < 10:
157
  value_counts = df[col].value_counts().head(5)
158
  summary.append("- ์ƒ์œ„ 5๊ฐœ ๊ฐ’:")
159
  for val, count in value_counts.items():
160
  summary.append(f" โ€ข {val}: {count}๊ฐœ")
161
 
 
162
  preview = df.head(10).to_markdown(index=False)
163
  summary.append("\n### ๋ฐ์ดํ„ฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:")
164
  summary.append(preview)
165
 
166
  parquet_content = "\n".join(summary)
 
 
167
  parquet_json = df.to_json(orient='records', force_ascii=False)
168
 
169
  return "Parquet ํŒŒ์ผ์ด ์„ฑ๊ณต์ ์œผ๋กœ ์—…๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", parquet_content, parquet_json
170
  except Exception as e:
171
  return f"Parquet ํŒŒ์ผ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", "", ""
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  def text_to_parquet(text: str) -> Tuple[str, str, str]:
174
  try:
 
175
  lines = [line.strip() for line in text.split('\n') if line.strip()]
 
 
176
  data = []
177
 
178
  for line in lines:
179
  try:
 
180
  import re
181
  pattern = r'(\d+),([^,]+),([^,]+),(.+)'
182
  match = re.match(pattern, line)
183
 
184
  if match:
185
  id_val, text_val, label_val, metadata_val = match.groups()
 
 
186
  text_val = text_val.strip().strip('"')
187
  label_val = label_val.strip().strip('"')
188
  metadata_val = metadata_val.strip().strip('"')
 
200
  if not data:
201
  return "๋ณ€ํ™˜ํ•  ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", ""
202
 
 
203
  df = pd.DataFrame(data)
 
 
204
  df = df.astype({
205
  'id': 'int32',
206
  'text': 'string',
 
208
  'metadata': 'string'
209
  })
210
 
 
211
  parquet_filename = 'text_to_parquet.parquet'
212
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
 
 
213
  preview = df.to_markdown(index=False)
214
 
215
  return (
 
223
  print(f"{error_message}\n{traceback.format_exc()}")
224
  return error_message, "", ""
225
 
226
+ def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None, api_key: str = None) -> str:
227
+ if not api_key:
228
+ yield "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
229
+ return
 
 
230
 
231
+ # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
232
+ client = OpenAI(api_key=api_key)
233
+
234
+ system_prefix = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ. ๋„ˆ๋Š” ์—…๋กœ๋“œ๋œ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜๋Š” ์—ญํ• ์„ ํ•œ๋‹ค.
235
+
236
+ ์ฃผ์š” ์ง€์นจ:
237
+ 1. ์งˆ๋ฌธ๊ณผ ์ง์ ‘ ๊ด€๋ จ๋œ ๋‚ด์šฉ๋งŒ ๊ฐ„๋‹จ๋ช…๋ฃŒํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•  ๊ฒƒ
238
+ 2. ์ด์ „ ๋‹ต๋ณ€๊ณผ ์ค‘๋ณต๋˜๋Š” ๋‚ด์šฉ์€ ์ œ์™ธํ•  ๊ฒƒ
239
+ 3. ๋ถˆํ•„์š”ํ•œ ์˜ˆ์‹œ๋‚˜ ๋ถ€์—ฐ ์„ค๋ช…์€ ํ•˜์ง€ ๋ง ๊ฒƒ
240
+ 4. ๋™์ผํ•œ ๋‚ด์šฉ์„ ๋‹ค๋ฅธ ํ‘œํ˜„์œผ๋กœ ๋ฐ˜๋ณตํ•˜์ง€ ๋ง ๊ฒƒ
241
+ 5. ํ•ต์‹ฌ ์ •๋ณด๋งŒ ์ „๋‹ฌํ•  ๊ฒƒ
242
+ """
243
+
244
+ if parquet_data:
245
+ try:
246
+ df = pd.read_json(io.StringIO(parquet_data))
247
+ data_summary = df.describe(include='all').to_string()
248
+ system_prefix += f"\n\n๋ฐ์ดํ„ฐ ์š”์•ฝ:\n{data_summary}"
249
+ except Exception as e:
250
+ print(f"๋ฐ์ดํ„ฐ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}")
251
+
252
+ messages = [{"role": "system", "content": system_prefix}]
253
+ recent_history = history[-3:] if history else []
254
+ for chat in recent_history:
255
+ messages.append({"role": chat["role"], "content": chat["content"]})
256
+
257
+ messages.append({"role": "user", "content": message})
258
 
259
  try:
260
  response = client.chat.completions.create(
261
  model="gpt-4-0125-preview",
262
+ messages=messages,
263
+ max_tokens=max_tokens,
264
+ temperature=temperature,
265
+ top_p=top_p,
 
 
266
  stream=True
267
  )
268
 
 
270
  for chunk in response:
271
  if chunk.choices[0].delta.content:
272
  full_response += chunk.choices[0].delta.content
273
+ yield clean_response(full_response)
274
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  except Exception as e:
276
+ error_message = f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
277
+ print(f"{error_message}\n{traceback.format_exc()}")
278
+ yield error_message
279
+
280
+ def preprocess_text_with_llm(input_text: str, api_key: str = None) -> str:
281
+ if not api_key:
282
+ return "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
283
+
284
+ # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
285
+ client = OpenAI(api_key=api_key)
286
 
287
  system_prompt = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ๋‹ต๋ณ€ํ•˜์‹œ์˜ค. ๋‹น์‹ ์€ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ž…๋ ฅ๋œ ํ…์ŠคํŠธ๋ฅผ CSV ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜์„ธ์š”.
288
 
 
300
 
301
  try:
302
  response = client.chat.completions.create(
303
+ model="gpt-4-0125-preview",
304
  messages=[
305
  {"role": "system", "content": system_prompt},
306
  {"role": "user", "content": input_text}
 
315
  if chunk.choices[0].delta.content:
316
  full_response += chunk.choices[0].delta.content
317
 
 
318
  processed_text = clean_response(full_response)
319
 
 
320
  try:
321
  from io import StringIO
322
  import csv
 
330
  print(error_message)
331
  return error_message
332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
  # Gradio Blocks ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ •
335
  with gr.Blocks(css=css) as demo:
336
+ api_key_state = gr.State("") # API ํ‚ค๋ฅผ ์ €์žฅํ•  State ์ถ”๊ฐ€
337
+
338
  gr.Markdown("# MyEzRAG: LLM์ด ๋‚˜๋งŒ์˜ ๋ฐ์ดํ„ฐ๋กœ ํ•™์Šตํ•œ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ/๋‹ต๋ณ€", elem_id="initial-description")
339
+
340
+ # API ํ‚ค ์ž…๋ ฅ ์„น์…˜ ์ถ”๊ฐ€
341
+ with gr.Row(elem_classes="api-key-section"):
342
+ with gr.Column(scale=3):
343
+ api_key_input = gr.Textbox(
344
+ label="OpenAI API Key",
345
+ placeholder="sk-...",
346
+ type="password",
347
+ show_label=True
348
+ )
349
+ with gr.Column(scale=1):
350
+ api_key_button = gr.Button("API Key ์„ค์ •", variant="primary")
351
+
352
+ # API ํ‚ค ์ƒํƒœ ํ‘œ์‹œ
353
+ api_key_status = gr.Markdown("โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.", elem_classes="api-key-status")
354
+
355
+ # API ํ‚ค ์„ค์ • ํ•จ์ˆ˜
356
+ def set_api_key(api_key: str):
357
+ if not api_key.strip():
358
+ return "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.", ""
359
+ if not api_key.startswith("sk-"):
360
+ return "โŒ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์€ API Key ํ˜•๏ฟฝ๏ฟฝ๏ฟฝ์ž…๋‹ˆ๋‹ค. ๋‹ค์‹œ ํ™•์ธํ•ด์ฃผ์„ธ์š”.", ""
361
+ return "โœ… API Key๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", api_key
362
+
363
+ # API ํ‚ค ์„ค์ • ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
364
+ api_key_button.click(
365
+ set_api_key,
366
+ inputs=[api_key_input],
367
+ outputs=[api_key_status, api_key_state]
368
+ )
369
+
370
  gr.Markdown(
371
  "### '์‚ฌ์šฉ ๋ฐฉ๋ฒ•' ํƒญ์„ ํ†ตํ•ด ์ž์„ธํ•œ ์ด์šฉ ๋ฐฉ๋ฒ•์„ ์ฐธ๊ณ ํ•˜์„ธ์š”.\n"
372
  "### Tip) '์˜ˆ์ œ'๋ฅผ ํ†ตํ•ด ๋‹ค์–‘ํ•œ ํ™œ์šฉ ๋ฐฉ๋ฒ•์„ ์ฒดํ—˜ํ•˜๊ณ  ์‘์šฉํ•ด ๋ณด์„ธ์š”, ๋ฐ์ดํ„ฐ์…‹ ์—…๋กœ๋“œ์‹œ ๋ฏธ๋ฆฌ๋ณด๊ธฐ๋Š” 10๊ฑด๋งŒ ์ถœ๋ ฅ",
373
  elem_id="initial-description"
374
  )
375
 
376
+ # ์ฒซ ๋ฒˆ์งธ ํƒญ: My ๋ฐ์ดํ„ฐ์…‹+LLM
 
 
377
  with gr.Tab("My ๋ฐ์ดํ„ฐ์…‹+LLM"):
378
  gr.Markdown("### LLM๊ณผ ๋Œ€ํ™”ํ•˜๊ธฐ")
379
  chatbot_data_upload = gr.Chatbot(label="์ฑ—๋ด‡", type="messages", elem_id="chatbot-data-upload")
 
388
 
389
  parquet_data_state = gr.State()
390
 
391
+ def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: str, api_key: str):
392
+ if not api_key:
393
+ history = history or []
394
+ history.append({"role": "assistant", "content": "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋น„์Šค ์ด์šฉ์„ ์œ„ํ•ด API Key๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."})
395
+ yield history, ""
396
+ return
397
+
398
  history = history or []
 
 
399
  recent_questions = [chat['content'].strip().lower() for chat in history[-3:] if chat['role'] == 'user']
400
  if message.strip().lower() in recent_questions:
401
  yield history + [{"role": "assistant", "content": "๋™์ผํ•œ ์งˆ๋ฌธ์ด ์ตœ๊ทผ์— ์žˆ์—ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ์งˆ๋ฌธ์„ ํ•ด์ฃผ์„ธ์š”."}], ""
 
408
  history,
409
  system_message,
410
  max_tokens,
411
+ temperature=0.3,
412
  top_p=top_p,
413
+ parquet_data=parquet_data,
414
+ api_key=api_key
415
  )
416
 
417
  partial_response = ""
 
426
  history.append({"role": "assistant", "content": response})
427
  yield history, ""
428
 
 
 
 
429
  send_data_upload.click(
430
  handle_message_data_upload,
431
  inputs=[
 
435
  max_tokens,
436
  temperature,
437
  top_p,
438
+ parquet_data_state,
439
+ api_key_state,
440
  ],
441
  outputs=[chatbot_data_upload, msg_data_upload],
442
  queue=True
443
  )
444
 
445
+ # ์˜ˆ์ œ ์ถ”๊ฐ€
446
  with gr.Accordion("์˜ˆ์ œ", open=False):
447
  gr.Examples(
448
  examples=[
 
457
  label="์˜ˆ์ œ ์„ ํƒ",
458
  )
459
 
460
+ # Parquet ํŒŒ์ผ ์—…๋กœ๋“œ
461
  gr.Markdown("### Parquet ํŒŒ์ผ ์—…๋กœ๋“œ")
462
  with gr.Row():
463
  with gr.Column():
 
481
  outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
482
  )
483
 
484
+ # ๋‘ ๋ฒˆ์งธ ํƒญ: CSV to My ๋ฐ์ดํ„ฐ์…‹
485
  with gr.Tab("CSV to My ๋ฐ์ดํ„ฐ์…‹"):
486
  gr.Markdown("### CSV ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ Parquet ๋ณ€ํ™˜")
487
  with gr.Row():
 
506
  outputs=[upload_status, parquet_preview, download_button]
507
  )
508
 
509
+ # ์„ธ ๋ฒˆ์งธ ํƒญ: Text to My ๋ฐ์ดํ„ฐ์…‹
510
  with gr.Tab("Text to My ๋ฐ์ดํ„ฐ์…‹"):
511
  gr.Markdown("### ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด CSV๋กœ ๋ณ€ํ™˜ ํ›„ Parquet์œผ๋กœ ์ž๋™ ์ „ํ™˜๋ฉ๋‹ˆ๋‹ค.")
512
  with gr.Row():
 
534
  outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
535
  )
536
 
537
+ # ๋„ค ๋ฒˆ์งธ ํƒญ: Text Preprocessing with LLM
538
  with gr.Tab("Text Preprocessing with LLM"):
539
  gr.Markdown("### ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด LLM์ด ๋ฐ์ดํ„ฐ์…‹ ํ˜•์‹์— ๋งž๊ฒŒ ์ „์ฒ˜๋ฆฌํ•˜์—ฌ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
540
  with gr.Row():
 
561
  interactive=False
562
  )
563
 
 
564
  convert_to_parquet_button = gr.Button("Parquet์œผ๋กœ ๋ณ€ํ™˜")
565
  download_parquet = gr.File(label="๋ณ€ํ™˜๋œ Parquet ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ")
566
 
567
+ def handle_text_preprocessing(input_text: str, api_key: str):
568
+ if not api_key:
569
+ yield "โš ๏ธ API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", ""
570
+ return
571
+
572
  if not input_text.strip():
573
+ yield "์ž…๋ ฅ ํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
574
+ return
575
 
576
  try:
577
+ yield "์ „์ฒ˜๋ฆฌ๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค...", ""
578
+ processed_text = preprocess_text_with_llm(input_text, api_key)
 
 
579
 
580
  if processed_text:
581
+ yield "์ „์ฒ˜๋ฆฌ๊ฐ€ ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", processed_text
 
582
  else:
583
+ yield "์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
 
584
 
585
  except Exception as e:
586
+ yield f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}", ""
 
587
 
588
  def clear_inputs():
589
  return "", "๋Œ€๊ธฐ ์ค‘...", ""
 
600
  except Exception as e:
601
  return f"Parquet ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", None
602
 
 
603
  preprocess_button.click(
604
  handle_text_preprocessing,
605
+ inputs=[raw_text_input, api_key_state],
606
  outputs=[preprocess_status, processed_text_output],
607
  queue=True
608
  )
 
618
  outputs=[preprocess_status, download_parquet]
619
  )
620
 
 
621
  with gr.Accordion("์˜ˆ์ œ ํ…์ŠคํŠธ", open=False):
622
  gr.Examples(
623
  examples=[
 
628
  label="์˜ˆ์ œ ์„ ํƒ"
629
  )
630
 
631
+ # ์‚ฌ์šฉ ๋ฐฉ๋ฒ• ํƒญ
632
  with gr.Tab("๐Ÿ“š ์‚ฌ์šฉ ๋ฐฉ๋ฒ•"):
633
  gr.Markdown("""
634
  # MyEzRAG ์‚ฌ์šฉ ๊ฐ€์ด๋“œ
635
 
636
+ ## ๐Ÿ”‘ API Key ์„ค์ •
637
+ 1. OpenAI API Key๋ฅผ ์ƒ๋‹จ ์ž…๋ ฅ์ฐฝ์— ์ž…๋ ฅ
638
+ 2. 'API Key ์„ค์ •' ๋ฒ„ํŠผ ํด๋ฆญ
639
+ 3. ์„ค์ • ์„ฑ๊ณต ๋ฉ”์‹œ์ง€ ํ™•์ธ
640
+
641
  ## 1๏ธโƒฃ My ๋ฐ์ดํ„ฐ์…‹+LLM ํƒญ
 
642
  ### ๊ธฐ๋Šฅ
643
  - ์—…๋กœ๋“œ๋œ Parquet ๋ฐ์ดํ„ฐ์…‹์„ ๊ธฐ๋ฐ˜์œผ๋กœ LLM๊ณผ ๋Œ€ํ™”
644
  - ๋ฐ์ดํ„ฐ์…‹์˜ ๋‚ด์šฉ์„ ํ™œ์šฉํ•œ ์ฝ˜ํ…์ธ  ์ƒ์„ฑ
 
655
  ---
656
 
657
  ## 2๏ธโƒฃ CSV to My ๋ฐ์ดํ„ฐ์…‹ ํƒญ
 
658
  ### ๊ธฐ๋Šฅ
659
  - CSV ํŒŒ์ผ์„ Parquet ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
660
  - ๋ฐ์ดํ„ฐ ์ตœ์ ํ™” ๋ฐ ์ •์ œ
 
671
  ---
672
 
673
  ## 3๏ธโƒฃ Text to My ๋ฐ์ดํ„ฐ์…‹ ํƒญ
 
674
  ### ๊ธฐ๋Šฅ
675
  - ํ…์ŠคํŠธ ํ˜•์‹์˜ ๋ฐ์ดํ„ฐ๋ฅผ Parquet์œผ๋กœ ๋ณ€ํ™˜
676
  - ์ˆ˜๋™ ๋ฐ์ดํ„ฐ ์ž…๋ ฅ ์ง€์›
 
693
  ---
694
 
695
  ## 4๏ธโƒฃ Text Preprocessing with LLM ํƒญ
 
696
  ### ๊ธฐ๋Šฅ
697
  - LLM์„ ํ™œ์šฉํ•œ ์ž๋™ ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ
698
  - ๊ตฌ์กฐํ™”๋œ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ
 
709
  - ๋ฐ์ดํ„ฐ ์ •๊ทœํ™”
710
 
711
  ## ๐Ÿ’ก ์ผ๋ฐ˜์ ์ธ ํŒ
712
+ - API Key๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๋ณด๊ด€ํ•˜๊ณ  ์ฃผ๊ธฐ์ ์œผ๋กœ ๊ฐฑ์‹ 
713
  - ๊ฐ ํƒญ์˜ ์˜ˆ์ œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ๋ฒ• ๏ฟฝ๏ฟฝํžˆ๊ธฐ
714
  - ๋ฐ์ดํ„ฐ ํ’ˆ์งˆ์ด ์ข‹์„์ˆ˜๋ก ๋” ๋‚˜์€ ๊ฒฐ๊ณผ ์ œ๊ณต
715
  - ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ
716
  - ๋Œ€์šฉ๋Ÿ‰ ์ฒ˜๋ฆฌ ์‹œ ์ ์ ˆํ•œ ์ฒญํฌ ํฌ๊ธฐ๋กœ ๋ถ„ํ•  ์ฒ˜๋ฆฌ
717
 
718
  ## โš ๏ธ ์ฃผ์˜์‚ฌํ•ญ
719
+ - API Key๋ฅผ ํƒ€์ธ๊ณผ ๊ณต์œ ํ•˜์ง€ ์•Š๊ธฐ
720
  - ๋ฏผ๊ฐํ•œ ๊ฐœ์ธ์ •๋ณด ํฌํ•จํ•˜์ง€ ์•Š๊ธฐ
721
  - ๋ฐ์ดํ„ฐ ๋ฐฑ์—… ๊ถŒ์žฅ
722
  - ๋„คํŠธ์›Œํฌ ์ƒํƒœ ํ™•์ธ
723
  - ๋ธŒ๋ผ์šฐ์ € ์บ์‹œ ์ฃผ๊ธฐ์  ์ •๋ฆฌ
724
 
725
  ## ๐Ÿ” ๋ฌธ์ œ ํ•ด๊ฒฐ
726
+ - API Key ์˜ค๋ฅ˜: ํ‚ค ํ˜•์‹ ๋ฐ ์œ ํšจ์„ฑ ํ™•์ธ
727
  - ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ
728
  - ํŒŒ์ผ ์—…๋กœ๋“œ ์‹คํŒจ ์‹œ ํŒŒ์ผ ํฌ๊ธฐ ๋ฐ ํ˜•์‹ ํ™•์ธ
729
  - ๋ณ€ํ™˜ ์‹คํŒจ ์‹œ ๋ฐ์ดํ„ฐ ์ธ์ฝ”๋”ฉ ํ™•์ธ
730
  - ์‘๋‹ต์ด ๋Š๋ฆด ๊ฒฝ์šฐ ๋ฐ์ดํ„ฐ ํฌ๊ธฐ ์กฐ์ •
731
  """)
732
 
 
733
  gr.Markdown("### [email protected]", elem_id="initial-description")
734
 
735
  if __name__ == "__main__":
736
+ demo.launch(share=True)