ginipick commited on
Commit
749529a
β€’
1 Parent(s): 2442eca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -3,6 +3,7 @@ from huggingface_hub import InferenceClient
3
  import os
4
  import pandas as pd
5
  from typing import List, Dict, Tuple
 
6
 
7
  # μΆ”λ‘  API ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
8
  hf_client = InferenceClient(
@@ -27,11 +28,6 @@ def load_parquet(filename: str) -> str:
27
  except Exception as e:
28
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
29
 
30
- # μ½”λ“œ 파일 λ‘œλ“œ
31
- fashion_code = load_code('fashion.cod')
32
- uhdimage_code = load_code('uhdimage.cod')
33
- MixGEN_code = load_code('mgen.cod')
34
-
35
  def respond(
36
  message: str,
37
  history: List[Dict[str, str]],
@@ -42,19 +38,23 @@ def respond(
42
  parquet_data: Dict = None
43
  ) -> str:
44
  # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ μ„€μ •
45
- system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•  것. λ„ˆλŠ” 주어진 데이터λ₯Ό 기반으둜 "μ„œλΉ„μŠ€ μ‚¬μš© μ„€λͺ… 및 μ•ˆλ‚΄, Q&Aλ₯Ό ν•˜λŠ” 역할이닀". μ•„μ£Ό μΉœμ ˆν•˜κ³  μžμ„Έν•˜κ²Œ Markdown ν˜•μ‹μœΌλ‘œ μž‘μ„±ν•˜λΌ. λ„ˆλŠ” 데이터λ₯Ό 기반으둜 μ‚¬μš© μ„€λͺ… 및 질의 응닡을 μ§„ν–‰ν•˜λ©°, μ΄μš©μžμ—κ²Œ 도움을 μ£Όμ–΄μ•Ό ν•œλ‹€. μ΄μš©μžκ°€ κΆκΈˆν•΄ν•  λ§Œν•œ λ‚΄μš©μ— μΉœμ ˆν•˜κ²Œ μ•Œλ €μ£Όλ„λ‘ ν•˜λΌ. 데이터 전체 λ‚΄μš©μ— λŒ€ν•΄μ„œλŠ” λ³΄μ•ˆμ„ μœ μ§€ν•˜κ³ , λ―Όκ°ν•œ μ •λ³΄λŠ” κ³΅κ°œν•˜μ§€ 마라."""
46
-
47
- # Parquet 데이터 포함
48
  if parquet_data:
 
 
 
 
49
  df = pd.read_json(parquet_data)
50
- parquet_content = df.head(10).to_markdown(index=False)
51
- system_prefix += f"\n\nμ—…λ‘œλ“œλœ Parquet 파일 λ‚΄μš©:\n```markdown\n{parquet_content}\n```"
 
 
 
 
52
 
53
  # μ‹œμŠ€ν…œ λ©”μ‹œμ§€μ™€ λŒ€ν™” 기둝 κ²°ν•©
54
  messages = [{"role": "system", "content": system_prefix}]
55
  for chat in history:
56
  messages.append({"role": chat['role'], "content": chat['content']})
57
- # λ§ˆμ§€λ§‰ λ©”μ‹œμ§€λŠ” 이미 history에 μžˆμœΌλ―€λ‘œ μΆ”κ°€ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.
58
 
59
  try:
60
  # λͺ¨λΈμ— λ©”μ‹œμ§€ 전솑 및 응닡 λ°›κΈ°
@@ -66,7 +66,7 @@ def respond(
66
  temperature=temperature,
67
  top_p=top_p,
68
  ):
69
- token = msg.choices[0].delta.get('content', None)
70
  if token:
71
  response += token
72
  yield response
@@ -101,8 +101,8 @@ def upload_parquet(file_path: str) -> Tuple[str, str, str]:
101
  df = pd.read_parquet(file_path, engine='pyarrow')
102
  # Markdown으둜 λ³€ν™˜ν•˜μ—¬ 미리보기
103
  parquet_content = df.head(10).to_markdown(index=False)
104
- # DataFrame을 JSON으둜 λ³€ν™˜
105
- parquet_json = df.to_json()
106
  return "Parquet 파일이 μ„±κ³΅μ μœΌλ‘œ μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_content, parquet_json
107
  except Exception as e:
108
  return f"Parquet 파일 μ—…λ‘œλ“œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "", ""
@@ -113,7 +113,7 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
113
  data = [line.split(',') for line in text.strip().split('\n')]
114
  df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
115
  # 데이터 μœ ν˜• μ΅œμ ν™”
116
- df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
117
  # Parquet 파일둜 λ³€ν™˜
118
  parquet_filename = 'text_to_parquet.parquet'
119
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
@@ -173,6 +173,8 @@ with gr.Blocks(css=css) as demo:
173
  temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
174
  top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
175
 
 
 
176
  def handle_message_data_upload(
177
  message: str,
178
  history: List[Dict[str, str]],
@@ -214,7 +216,7 @@ with gr.Blocks(css=css) as demo:
214
  max_tokens,
215
  temperature,
216
  top_p,
217
- gr.State(), # parquet_data_stateλŠ” μ•„λž˜μ—μ„œ μ—…λ°μ΄νŠΈλ©λ‹ˆλ‹€.
218
  ],
219
  outputs=[chatbot_data_upload, msg_data_upload],
220
  queue=True
@@ -245,7 +247,6 @@ with gr.Blocks(css=css) as demo:
245
  parquet_upload_button = gr.Button("μ—…λ‘œλ“œ")
246
  parquet_upload_status = gr.Textbox(label="μ—…λ‘œλ“œ μƒνƒœ", interactive=False)
247
  parquet_preview_chat = gr.Markdown(label="Parquet 파일 미리보기")
248
- parquet_data_state = gr.State()
249
 
250
  def handle_parquet_upload(file_path: str):
251
  message, parquet_content, parquet_json = upload_parquet(file_path)
@@ -293,7 +294,7 @@ with gr.Blocks(css=css) as demo:
293
  text_input = gr.Textbox(
294
  label="ν…μŠ€νŠΈ μž…λ ₯ (각 행은 `id,text,label,metadata` ν˜•μ‹μœΌλ‘œ μž…λ ₯)",
295
  lines=10,
296
- placeholder="예: 1,Sample Text,Label1,Metadata1\n2,Another Text,Label2,Metadata2"
297
  )
298
  convert_button = gr.Button("λ³€ν™˜ 및 λ‹€μš΄λ‘œλ“œ")
299
  convert_status = gr.Textbox(label="λ³€ν™˜ μƒνƒœ", interactive=False)
@@ -317,3 +318,4 @@ with gr.Blocks(css=css) as demo:
317
 
318
  if __name__ == "__main__":
319
  demo.launch()
 
 
3
  import os
4
  import pandas as pd
5
  from typing import List, Dict, Tuple
6
+ import json
7
 
8
  # μΆ”λ‘  API ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
9
  hf_client = InferenceClient(
 
28
  except Exception as e:
29
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
30
 
 
 
 
 
 
31
  def respond(
32
  message: str,
33
  history: List[Dict[str, str]],
 
38
  parquet_data: Dict = None
39
  ) -> str:
40
  # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ μ„€μ •
 
 
 
41
  if parquet_data:
42
+ # Parquet 데이터가 μ—…λ‘œλ“œλœ 경우
43
+ system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•  것. λ„ˆλŠ” μ—…λ‘œλ“œλœ 데이터λ₯Ό 기반으둜 μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λŠ” 역할을 ν•œλ‹€. 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ μ‚¬μš©μžμ—κ²Œ 도움이 λ˜λŠ” 정보λ₯Ό μ œκ³΅ν•˜λΌ. 데이터λ₯Ό ν™œμš©ν•˜μ—¬ μƒμ„Έν•˜κ³  μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜λ˜, λ―Όκ°ν•œ μ •λ³΄λ‚˜ 개인 정보λ₯Ό λ…ΈμΆœν•˜μ§€ 마라."""
44
+ # Parquet 데이터λ₯Ό λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•˜μ—¬ μ‹œμŠ€ν…œ λ©”μ‹œμ§€μ— 포함
45
+ # 데이터가 λ„ˆλ¬΄ 클 경우 토큰 μ œν•œμ΄ μžˆμœΌλ―€λ‘œ, μ£Όμš” λ‚΄μš©μ„ μš”μ•½ν•˜μ—¬ ν¬ν•¨ν•˜κ±°λ‚˜ λ°μ΄ν„°μ˜ ꡬ쑰와 νŠΉμ§•μ„ μ„€λͺ…ν•˜λ„λ‘ ν•œλ‹€.
46
  df = pd.read_json(parquet_data)
47
+ # λ°μ΄ν„°μ˜ μš”μ•½ 정보 생성
48
+ data_summary = df.describe(include='all').to_string()
49
+ system_prefix += f"\n\nμ—…λ‘œλ“œλœ λ°μ΄ν„°μ˜ μš”μ•½ 정보:\n{data_summary}"
50
+ else:
51
+ # Parquet 데이터가 μ—…λ‘œλ“œλ˜μ§€ μ•Šμ€ 경우
52
+ system_prefix = system_message or "λ„ˆλŠ” AI μ‘°μ–Έμž 역할이닀."
53
 
54
  # μ‹œμŠ€ν…œ λ©”μ‹œμ§€μ™€ λŒ€ν™” 기둝 κ²°ν•©
55
  messages = [{"role": "system", "content": system_prefix}]
56
  for chat in history:
57
  messages.append({"role": chat['role'], "content": chat['content']})
 
58
 
59
  try:
60
  # λͺ¨λΈμ— λ©”μ‹œμ§€ 전솑 및 응닡 λ°›κΈ°
 
66
  temperature=temperature,
67
  top_p=top_p,
68
  ):
69
+ token = msg.get('delta', {}).get('content', '')
70
  if token:
71
  response += token
72
  yield response
 
101
  df = pd.read_parquet(file_path, engine='pyarrow')
102
  # Markdown으둜 λ³€ν™˜ν•˜μ—¬ 미리보기
103
  parquet_content = df.head(10).to_markdown(index=False)
104
+ # DataFrame을 JSON λ¬Έμžμ—΄λ‘œ λ³€ν™˜
105
+ parquet_json = df.to_json(orient='records', force_ascii=False)
106
  return "Parquet 파일이 μ„±κ³΅μ μœΌλ‘œ μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_content, parquet_json
107
  except Exception as e:
108
  return f"Parquet 파일 μ—…λ‘œλ“œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "", ""
 
113
  data = [line.split(',') for line in text.strip().split('\n')]
114
  df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
115
  # 데이터 μœ ν˜• μ΅œμ ν™”
116
+ df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
117
  # Parquet 파일둜 λ³€ν™˜
118
  parquet_filename = 'text_to_parquet.parquet'
119
  df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
 
173
  temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
174
  top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
175
 
176
+ parquet_data_state = gr.State()
177
+
178
  def handle_message_data_upload(
179
  message: str,
180
  history: List[Dict[str, str]],
 
216
  max_tokens,
217
  temperature,
218
  top_p,
219
+ parquet_data_state, # parquet_data_stateλ₯Ό μ‚¬μš©ν•˜μ—¬ μ—…λ‘œλ“œλœ 데이터λ₯Ό 전달
220
  ],
221
  outputs=[chatbot_data_upload, msg_data_upload],
222
  queue=True
 
247
  parquet_upload_button = gr.Button("μ—…λ‘œλ“œ")
248
  parquet_upload_status = gr.Textbox(label="μ—…λ‘œλ“œ μƒνƒœ", interactive=False)
249
  parquet_preview_chat = gr.Markdown(label="Parquet 파일 미리보기")
 
250
 
251
  def handle_parquet_upload(file_path: str):
252
  message, parquet_content, parquet_json = upload_parquet(file_path)
 
294
  text_input = gr.Textbox(
295
  label="ν…μŠ€νŠΈ μž…λ ₯ (각 행은 `id,text,label,metadata` ν˜•μ‹μœΌλ‘œ μž…λ ₯)",
296
  lines=10,
297
+ placeholder="예: 1,μ΄μˆœμ‹ ,μž₯κ΅°,거뢁선\n2,원균,μž₯κ΅°,λͺ¨ν•¨\n3,μ„ μ‘°,μ™•,μ‹œκΈ°\n4,λ„μš”ν† λ―Έ νžˆλ°μš”μ‹œ,μ™•,침랡"
298
  )
299
  convert_button = gr.Button("λ³€ν™˜ 및 λ‹€μš΄λ‘œλ“œ")
300
  convert_status = gr.Textbox(label="λ³€ν™˜ μƒνƒœ", interactive=False)
 
318
 
319
  if __name__ == "__main__":
320
  demo.launch()
321
+