ginipick commited on
Commit
2bb387f
β€’
1 Parent(s): f6e3821

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -11
app.py CHANGED
@@ -135,10 +135,13 @@ def upload_parquet(file_path):
135
 
136
  def text_to_parquet(text):
137
  try:
138
- # ν…μŠ€νŠΈλ₯Ό DataFrame으둜 λ³€ν™˜ (μ˜ˆμ‹œ: 각 쀄을 μƒˆλ‘œμš΄ ν–‰μœΌλ‘œ)
139
  data = [line.split(',') for line in text.strip().split('\n')]
140
  df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
141
 
 
 
 
142
  # Parquet 파일둜 λ³€ν™˜
143
  parquet_filename = 'text_to_parquet.parquet'
144
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
@@ -150,7 +153,7 @@ def text_to_parquet(text):
150
  with open(parquet_filename, "rb") as f:
151
  data = f.read()
152
 
153
- return f"{parquet_filename} 파일이 μ„±κ³΅μ μœΌλ‘œ λ³€ν™˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_content, (parquet_filename, data)
154
  except Exception as e:
155
  return f"ν…μŠ€νŠΈ λ³€ν™˜ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "", None
156
 
@@ -158,10 +161,18 @@ css = """
158
  footer {
159
  visibility: hidden;
160
  }
161
- .chatbot-container {
162
  height: 600px;
 
163
  }
164
- .chatbot-container .message {
 
 
 
 
 
 
 
165
  font-size: 14px;
166
  }
167
  """
@@ -173,7 +184,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
173
  with gr.Tab("챗봇"):
174
  gr.Markdown("### LLMκ³Ό λŒ€ν™”ν•˜κΈ°")
175
  chatbot = gr.Chatbot(label="챗봇", elem_id="chatbot-container")
176
- msg = gr.Textbox(label="λ©”μ‹œμ§€ μž…λ ₯")
177
  send = gr.Button("전솑")
178
 
179
  with gr.Accordion("μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ 및 μ˜΅μ…˜ μ„€μ •", open=False):
@@ -204,6 +215,25 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
204
  outputs=[chatbot, msg]
205
  )
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  with gr.Tab("데이터 λ³€ν™˜"):
208
  gr.Markdown("### CSV 파일 μ—…λ‘œλ“œ 및 Parquet λ³€ν™˜")
209
  with gr.Row():
@@ -221,7 +251,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
221
  # νŒŒμΌμ„ λ‹€μš΄λ‘œλ“œν•  수 μžˆλ„λ‘ λ°”μ΄λ„ˆλ¦¬ λ°μ΄ν„°λ‘œ 읽기
222
  with open(parquet_filename, "rb") as f:
223
  data = f.read()
224
- return message, load_parquet(parquet_filename), (parquet_filename, data)
225
  else:
226
  return message, "", None
227
 
@@ -248,9 +278,9 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
248
  def handle_parquet_upload(file_path):
249
  message, parquet_content, parquet_json = upload_parquet(file_path)
250
  if parquet_json:
251
- return message, parquet_preview_chat.update(value=parquet_content), parquet_data_state.update(value=parquet_json)
252
  else:
253
- return message, gr.Markdown.update(value=""), parquet_data_state.update(value=None)
254
 
255
  parquet_upload_button.click(
256
  handle_parquet_upload,
@@ -260,12 +290,13 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
260
 
261
  gr.Markdown("### LLMκ³Ό λŒ€ν™”ν•˜κΈ°")
262
  chatbot_data_upload = gr.Chatbot(label="챗봇 데이터 μ—…λ‘œλ“œ", elem_id="chatbot-data-upload")
263
- msg_data_upload = gr.Textbox(label="λ©”μ‹œμ§€ μž…λ ₯")
264
  send_data_upload = gr.Button("전솑")
265
 
266
  # 챗봇 λ©”μ‹œμ§€ 처리 ν•¨μˆ˜ (데이터 μ—…λ‘œλ“œ 버전)
267
  def handle_message_data_upload(message, history, system_message, max_tokens, temperature, top_p, parquet_data):
268
- # Update history with user message
 
269
  history = history or []
270
  history.append({"role": "user", "content": message})
271
  # Generate response
@@ -289,12 +320,13 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
289
  gr.Markdown("### ν…μŠ€νŠΈλ₯Ό μž…λ ₯ν•˜λ©΄ CSV둜 λ³€ν™˜ ν›„ Parquet으둜 μžλ™ μ „ν™˜λ©λ‹ˆλ‹€.")
290
  with gr.Row():
291
  with gr.Column():
292
- text_input = gr.Textbox(label="ν…μŠ€νŠΈ μž…λ ₯ (각 행은 `id,text,label,metadata` ν˜•μ‹μœΌλ‘œ μž…λ ₯)", lines=10)
293
  convert_button = gr.Button("λ³€ν™˜ 및 λ‹€μš΄λ‘œλ“œ")
294
  convert_status = gr.Textbox(label="λ³€ν™˜ μƒνƒœ", interactive=False)
295
  parquet_preview_convert = gr.Markdown(label="Parquet 파일 미리보기")
296
  download_parquet_convert = gr.File(label="Parquet 파일 λ‹€μš΄λ‘œλ“œ", type="binary", interactive=False)
297
 
 
298
  def handle_text_to_parquet(text):
299
  message, parquet_content, file_data = text_to_parquet(text)
300
  if file_data:
@@ -322,3 +354,4 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
322
 
323
  if __name__ == "__main__":
324
  demo.launch()
 
 
135
 
136
  def text_to_parquet(text):
137
  try:
138
+ # ν…μŠ€νŠΈλ₯Ό DataFrame으둜 λ³€ν™˜ (μ˜ˆμ‹œ: 각 쀄을 μƒˆλ‘œμš΄ ν–‰μœΌλ‘œ, 콀마둜 ꡬ뢄)
139
  data = [line.split(',') for line in text.strip().split('\n')]
140
  df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
141
 
142
+ # 데이터 μœ ν˜• μ΅œμ ν™”
143
+ df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
144
+
145
  # Parquet 파일둜 λ³€ν™˜
146
  parquet_filename = 'text_to_parquet.parquet'
147
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
 
153
  with open(parquet_filename, "rb") as f:
154
  data = f.read()
155
 
156
+ return f"{parquet_filename} 파일이 μ„±κ³΅μ μœΌλ‘œ λ³€ν™˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_content, data
157
  except Exception as e:
158
  return f"ν…μŠ€νŠΈ λ³€ν™˜ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "", None
159
 
 
161
  footer {
162
  visibility: hidden;
163
  }
164
+ #chatbot-container {
165
  height: 600px;
166
+ overflow-y: scroll;
167
  }
168
+ #chatbot-container .message {
169
+ font-size: 14px;
170
+ }
171
+ #chatbot-data-upload {
172
+ height: 600px;
173
+ overflow-y: scroll;
174
+ }
175
+ #chatbot-data-upload .message {
176
  font-size: 14px;
177
  }
178
  """
 
184
  with gr.Tab("챗봇"):
185
  gr.Markdown("### LLMκ³Ό λŒ€ν™”ν•˜κΈ°")
186
  chatbot = gr.Chatbot(label="챗봇", elem_id="chatbot-container")
187
+ msg = gr.Textbox(label="λ©”μ‹œμ§€ μž…λ ₯", placeholder="여기에 λ©”μ‹œμ§€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...")
188
  send = gr.Button("전솑")
189
 
190
  with gr.Accordion("μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ 및 μ˜΅μ…˜ μ„€μ •", open=False):
 
215
  outputs=[chatbot, msg]
216
  )
217
 
218
+ # 예제 λ²„νŠΌ (λ³΅μ›λœ examples)
219
+ with gr.Accordion("예제", open=False):
220
+ example_buttons = gr.Examples(
221
+ examples=[
222
+ ["νŒ¨μ…˜ μ½”λ“œ μ‹€ν–‰"],
223
+ ["UHD 이미지 μ½”λ“œ μ‹€ν–‰"],
224
+ ["MixGEN μ½”λ“œ μ‹€ν–‰"],
225
+ ["test.parquet μ‹€ν–‰"],
226
+ ["μƒμ„Έν•œ μ‚¬μš© 방법을 마치 화면을 λ³΄λ©΄μ„œ μ„€λͺ…ν•˜λ“―이 4000 토큰 이상 μžμ„Ένžˆ μ„€λͺ…ν•˜λΌ"],
227
+ ["FAQ 20건을 μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜λΌ. 4000토큰 이상 μ‚¬μš©ν•˜λΌ."],
228
+ ["μ‚¬μš© 방법과 차별점, νŠΉμ§•, 강점을 μ€‘μ‹¬μœΌλ‘œ 4000 토큰 이상 유튜브 μ˜μƒ 슀크립트 ν˜•νƒœλ‘œ μž‘μ„±ν•˜λΌ"],
229
+ ["λ³Έ μ„œλΉ„μŠ€λ₯Ό SEO μ΅œμ ν™”ν•˜μ—¬ λΈ”λ‘œκ·Έ 포슀트(λ°°κ²½ 및 ν•„μš”μ„±, κΈ°μ‘΄ μœ μ‚¬ μ„œλΉ„μŠ€μ™€ λΉ„κ΅ν•˜μ—¬ 특μž₯점, ν™œμš©μ²˜, κ°€μΉ˜, κΈ°λŒ€νš¨κ³Ό, 결둠을 포함)둜 4000 토큰 이상 μž‘μ„±ν•˜λΌ"],
230
+ ["νŠΉν—ˆ μΆœμ›μ— ν™œμš©ν•  기술 및 λΉ„μ¦ˆλ‹ˆμŠ€λͺ¨λΈ 츑면을 ν¬ν•¨ν•˜μ—¬ νŠΉν—ˆ μΆœμ›μ„œ ꡬ성에 맞게 ν˜μ‹ μ μΈ 창의 발λͺ… λ‚΄μš©μ„ μ€‘μ‹¬μœΌλ‘œ 4000 토큰 이상 μž‘μ„±ν•˜λΌ."],
231
+ ["계속 μ΄μ–΄μ„œ λ‹΅λ³€ν•˜λΌ"],
232
+ ],
233
+ inputs=msg,
234
+ label="예제 선택",
235
+ )
236
+
237
  with gr.Tab("데이터 λ³€ν™˜"):
238
  gr.Markdown("### CSV 파일 μ—…λ‘œλ“œ 및 Parquet λ³€ν™˜")
239
  with gr.Row():
 
251
  # νŒŒμΌμ„ λ‹€μš΄λ‘œλ“œν•  수 μžˆλ„λ‘ λ°”μ΄λ„ˆλ¦¬ λ°μ΄ν„°λ‘œ 읽기
252
  with open(parquet_filename, "rb") as f:
253
  data = f.read()
254
+ return message, load_parquet(parquet_filename), data
255
  else:
256
  return message, "", None
257
 
 
278
  def handle_parquet_upload(file_path):
279
  message, parquet_content, parquet_json = upload_parquet(file_path)
280
  if parquet_json:
281
+ return message, parquet_content, parquet_json
282
  else:
283
+ return message, "", None
284
 
285
  parquet_upload_button.click(
286
  handle_parquet_upload,
 
290
 
291
  gr.Markdown("### LLMκ³Ό λŒ€ν™”ν•˜κΈ°")
292
  chatbot_data_upload = gr.Chatbot(label="챗봇 데이터 μ—…λ‘œλ“œ", elem_id="chatbot-data-upload")
293
+ msg_data_upload = gr.Textbox(label="λ©”μ‹œμ§€ μž…λ ₯", placeholder="여기에 λ©”μ‹œμ§€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...")
294
  send_data_upload = gr.Button("전솑")
295
 
296
  # 챗봇 λ©”μ‹œμ§€ 처리 ν•¨μˆ˜ (데이터 μ—…λ‘œλ“œ 버전)
297
  def handle_message_data_upload(message, history, system_message, max_tokens, temperature, top_p, parquet_data):
298
+ # Parquet 데이터가 μ—…λ‘œλ“œλœ 경우, 좔가적인 λ‘œμ§μ„ κ΅¬ν˜„ν•  수 있음
299
+ # ν˜„μž¬λŠ” 기본적으둜 λ©”μ‹œμ§€λ₯Ό 처리
300
  history = history or []
301
  history.append({"role": "user", "content": message})
302
  # Generate response
 
320
  gr.Markdown("### ν…μŠ€νŠΈλ₯Ό μž…λ ₯ν•˜λ©΄ CSV둜 λ³€ν™˜ ν›„ Parquet으둜 μžλ™ μ „ν™˜λ©λ‹ˆλ‹€.")
321
  with gr.Row():
322
  with gr.Column():
323
+ text_input = gr.Textbox(label="ν…μŠ€νŠΈ μž…λ ₯ (각 행은 `id,text,label,metadata` ν˜•μ‹μœΌλ‘œ μž…λ ₯)", lines=10, placeholder="예: 1,Sample Text,Label1,Metadata1\n2,Another Text,Label2,Metadata2")
324
  convert_button = gr.Button("λ³€ν™˜ 및 λ‹€μš΄λ‘œλ“œ")
325
  convert_status = gr.Textbox(label="λ³€ν™˜ μƒνƒœ", interactive=False)
326
  parquet_preview_convert = gr.Markdown(label="Parquet 파일 미리보기")
327
  download_parquet_convert = gr.File(label="Parquet 파일 λ‹€μš΄λ‘œλ“œ", type="binary", interactive=False)
328
 
329
+ # λ³€ν™˜ λ²„νŠΌ 클릭 μ‹œ μ‹€ν–‰ν•  ν•¨μˆ˜
330
  def handle_text_to_parquet(text):
331
  message, parquet_content, file_data = text_to_parquet(text)
332
  if file_data:
 
354
 
355
  if __name__ == "__main__":
356
  demo.launch()
357
+