ginipick commited on
Commit
dc6bd76
β€’
1 Parent(s): 5fd7b47

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +299 -80
app-backup.py CHANGED
@@ -2,13 +2,17 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import pandas as pd
5
- from typing import List, Tuple
 
 
6
 
 
7
  # μΆ”λ‘  API ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
8
- hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
9
- # hf_client = InferenceClient("CohereForAI/aya-23-35B", token=os.getenv("HF_TOKEN"))
 
10
 
11
- def load_code(filename):
12
  try:
13
  with open(filename, 'r', encoding='utf-8') as file:
14
  return file.read()
@@ -17,97 +21,312 @@ def load_code(filename):
17
  except Exception as e:
18
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
19
 
20
- def load_parquet(filename):
21
  try:
22
  df = pd.read_parquet(filename, engine='pyarrow')
23
- # λ°μ΄ν„°ν”„λ ˆμž„μ˜ 첫 λͺ‡ 행을 λ¬Έμžμ—΄λ‘œ λ³€ν™˜ (Markdown ν‘œ ν˜•μ‹)
24
  return df.head(10).to_markdown(index=False)
25
  except FileNotFoundError:
26
  return f"{filename} νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
27
  except Exception as e:
28
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
29
 
30
- # μ½”λ“œ 파일 λ‘œλ“œ
31
- fashion_code = load_code('fashion.cod')
32
- uhdimage_code = load_code('uhdimage.cod')
33
- MixGEN_code = load_code('mgen.cod')
34
-
35
- # Parquet 파일 λ‘œλ“œ
36
- test_parquet_content = load_parquet('test.parquet')
37
 
38
  def respond(
39
- message,
40
- history: List[Tuple[str, str]],
41
- system_message="", # κΈ°λ³Έκ°’ μΆ”κ°€
42
- max_tokens=1024, # κΈ°λ³Έκ°’ μΆ”κ°€
43
- temperature=0.7, # κΈ°λ³Έκ°’ μΆ”κ°€
44
- top_p=0.9, # κΈ°λ³Έκ°’ μΆ”κ°€
45
- ):
46
- global fashion_code, uhdimage_code, MixGEN_code, test_parquet_content
47
- system_message = system_message or ""
48
- system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ 닡변할것. λ„ˆλŠ” 주어진 μ†ŒμŠ€μ½”λ“œλ₯Ό 기반으둜 "μ„œλΉ„μŠ€ μ‚¬μš© μ„€λͺ… 및 μ•ˆλ‚΄, Q&Aλ₯Ό ν•˜λŠ” 역할이닀". μ•„μ£Ό μΉœμ ˆν•˜κ³  μžμ„Έν•˜κ²Œ 4000토큰 이상 Markdown ν˜•μ‹μœΌλ‘œ μž‘μ„±ν•˜λΌ. λ„ˆλŠ” μ½”λ“œλ₯Ό 기반으둜 μ‚¬μš© μ„€λͺ… 및 질의 응닡을 μ§„ν–‰ν•˜λ©°, μ΄μš©μžμ—κ²Œ 도움을 μ£Όμ–΄μ•Ό ν•œλ‹€. μ΄μš©μžκ°€ κΆκΈˆν•΄ ν•  λ§Œν•œ λ‚΄μš©μ— μΉœμ ˆν•˜κ²Œ μ•Œλ €μ£Όλ„λ‘ ν•˜λΌ. μ½”λ“œ 전체 λ‚΄μš©μ— λŒ€ν•΄μ„œλŠ” λ³΄μ•ˆμ„ μœ μ§€ν•˜κ³ , ν‚€ κ°’ 및 μ—”λ“œν¬μΈνŠΈμ™€ ꡬ체적인 λͺ¨λΈμ€ κ³΅κ°œν•˜μ§€ 마라."""
49
-
50
- if message.lower() == "νŒ¨μ…˜ μ½”λ“œ μ‹€ν–‰":
51
- system_message += f"\n\nνŒ¨μ…˜ μ½”λ“œ λ‚΄μš©:\n```python\n{fashion_code}\n```"
52
- message = "νŒ¨μ…˜ κ°€μƒν”ΌνŒ…μ— λŒ€ν•œ λ‚΄μš©μ„ ν•™μŠ΅ν•˜μ˜€κ³ , μ„€λͺ…ν•  μ€€λΉ„κ°€ λ˜μ–΄μžˆλ‹€κ³  μ•Œλ¦¬κ³  μ„œλΉ„μŠ€ URL(https://aiqcamp-fash.hf.space)을 톡해 ν…ŒμŠ€νŠΈ 해보라고 좜λ ₯ν•˜λΌ."
53
- elif message.lower() == "uhd 이미지 μ½”λ“œ μ‹€ν–‰":
54
- system_message += f"\n\nUHD 이미지 μ½”λ“œ λ‚΄μš©:\n```python\n{uhdimage_code}\n```"
55
- message = "UHD 이미지 생성에 λŒ€ν•œ λ‚΄μš©μ„ ν•™μŠ΅ν•˜μ˜€κ³ , μ„€λͺ…ν•  μ€€λΉ„κ°€ λ˜μ–΄μžˆλ‹€κ³  μ•Œλ¦¬κ³  μ„œλΉ„μŠ€ URL(https://openfree-ultpixgen.hf.space)을 톡해 ν…ŒμŠ€νŠΈ 해보라고 좜λ ₯ν•˜λΌ."
56
- elif message.lower() == "mixgen μ½”λ“œ μ‹€ν–‰":
57
- system_message += f"\n\nMixGEN μ½”λ“œ λ‚΄μš©:\n```python\n{MixGEN_code}\n```"
58
- message = "MixGEN3 이미지 생성에 λŒ€ν•œ λ‚΄μš©μ„ ν•™μŠ΅ν•˜μ˜€κ³ , μ„€λͺ…ν•  μ€€λΉ„κ°€ λ˜μ–΄μžˆλ‹€κ³  μ•Œλ¦¬κ³  μ„œλΉ„μŠ€ URL(https://openfree-mixgen3.hf.space)을 톡해 ν…ŒμŠ€νŠΈ 해보라고 좜λ ₯ν•˜λΌ."
59
- elif message.lower() == "test.parquet μ‹€ν–‰":
60
- system_message += f"\n\ntest.parquet 파일 λ‚΄μš©:\n```markdown\n{test_parquet_content}\n```"
61
- message = "test.parquet νŒŒμΌμ— λŒ€ν•œ λ‚΄μš©μ„ ν•™μŠ΅ν•˜μ˜€κ³ , κ΄€λ ¨ μ„€λͺ… 및 Q&Aλ₯Ό 진행할 μ€€λΉ„κ°€ λ˜μ–΄μžˆλ‹€. κΆκΈˆν•œ 점이 있으면 물어보라."
62
-
63
- messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
64
- for val in history:
65
- if val[0]:
66
- messages.append({"role": "user", "content": val[0]})
67
- if val[1]:
68
- messages.append({"role": "assistant", "content": val[1]})
69
- messages.append({"role": "user", "content": message})
70
-
71
- response = ""
72
  try:
73
- for message in hf_client.chat_completion(
74
- messages,
75
- max_tokens=max_tokens,
 
 
76
  stream=True,
77
  temperature=temperature,
78
  top_p=top_p,
79
- ):
80
- token = message.choices[0].delta.get('content', None)
81
- if token:
82
- response += token
83
  yield response
84
  except Exception as e:
85
- yield f"μΆ”λ‘  쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
86
-
87
- # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ • 뢀뢄도 μˆ˜μ •
88
- demo = gr.ChatInterface(
89
- respond,
90
- additional_inputs=[
91
- gr.Textbox(label="System Message", value=""),
92
- gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens"),
93
- gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
94
- gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P"),
95
- ],
96
- examples=[
97
- ["νŒ¨μ…˜ μ½”λ“œ μ‹€ν–‰"],
98
- ["UHD 이미지 μ½”λ“œ μ‹€ν–‰"],
99
- ["MixGEN μ½”λ“œ μ‹€ν–‰"],
100
- ["test.parquet μ‹€ν–‰"], # μƒˆλ‘œμš΄ 예제 μΆ”κ°€
101
- ["μƒμ„Έν•œ μ‚¬μš© 방법을 마치 화면을 λ³΄λ©΄μ„œ μ„€λͺ…ν•˜λ“―이 4000 토큰 이상 μžμ„Ένžˆ μ„€λͺ…ν•˜λΌ"],
102
- ["FAQ 20건을 μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜λΌ. 4000토큰 이상 μ‚¬μš©ν•˜λΌ."],
103
- ["μ‚¬μš© 방법과 차별점, νŠΉμ§•, 강점을 μ€‘μ‹¬μœΌλ‘œ 4000 토큰 이상 유튜브 μ˜μƒ 슀크립트 ν˜•νƒœλ‘œ μž‘μ„±ν•˜λΌ"],
104
- ["λ³Έ μ„œλΉ„μŠ€λ₯Ό SEO μ΅œμ ν™”ν•˜μ—¬ λΈ”λ‘œκ·Έ 포슀트(λ°°κ²½ 및 ν•„μš”μ„±, κΈ°μ‘΄ μœ μ‚¬ μ„œλΉ„μŠ€μ™€ λΉ„κ΅ν•˜μ—¬ 특μž₯점, ν™œμš©μ²˜, κ°€μΉ˜, κΈ°λŒ€νš¨κ³Ό, 결둠을 포함)둜 4000 토큰 이상 μž‘μ„±ν•˜λΌ"],
105
- ["νŠΉν—ˆ μΆœμ›μ— ν™œμš©ν•  기술 및 λΉ„μ¦ˆλ‹ˆμŠ€λͺ¨λΈ 츑면을 ν¬ν•¨ν•˜μ—¬ νŠΉν—ˆ μΆœμ›μ„œ ꡬ성에 맞게 ν˜μ‹ μ μΈ 창의 발λͺ… λ‚΄μš©μ„ μ€‘μ‹¬μœΌλ‘œ 4000 토큰 이상 μž‘μ„±ν•˜λΌ."],
106
- ["계속 μ΄μ–΄μ„œ λ‹΅λ³€ν•˜λΌ"],
107
- ],
108
- theme="Nymbo/Nymbo_Theme",
109
- cache_examples=False, # 캐싱 λΉ„ν™œμ„±ν™” μ„€μ •
110
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  if __name__ == "__main__":
113
- demo.launch()
 
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import pandas as pd
5
+ from typing import List, Dict, Tuple
6
+ import json
7
+ import io
8
 
9
+ import traceback
10
  # μΆ”λ‘  API ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
11
+ hf_client = InferenceClient(
12
+ "CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")
13
+ )
14
 
15
+ def load_code(filename: str) -> str:
16
  try:
17
  with open(filename, 'r', encoding='utf-8') as file:
18
  return file.read()
 
21
  except Exception as e:
22
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
23
 
24
+ def load_parquet(filename: str) -> str:
25
  try:
26
  df = pd.read_parquet(filename, engine='pyarrow')
 
27
  return df.head(10).to_markdown(index=False)
28
  except FileNotFoundError:
29
  return f"{filename} νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
30
  except Exception as e:
31
  return f"νŒŒμΌμ„ μ½λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
32
 
 
 
 
 
 
 
 
33
 
34
  def respond(
35
+ message: str,
36
+ history: List[Dict[str, str]],
37
+ system_message: str = "",
38
+ max_tokens: int = 4000,
39
+ temperature: float = 0.5,
40
+ top_p: float = 0.9,
41
+ parquet_data: str = None
42
+ ) -> str:
43
+ # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ μ„€μ •
44
+ if parquet_data:
45
+ system_prefix = """λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•  것. λ„ˆλŠ” μ—…λ‘œλ“œλœ 데이터λ₯Ό 기반으둜 μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λŠ” 역할을 ν•œλ‹€. 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ μ‚¬μš©μžμ—κ²Œ 도움이 λ˜λŠ” 정보λ₯Ό μ œκ³΅ν•˜λΌ. 데이터λ₯Ό ν™œμš©ν•˜μ—¬ μƒμ„Έν•˜κ³  μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜λ˜, λ―Όκ°ν•œ μ •λ³΄λ‚˜ 개인 정보λ₯Ό λ…ΈμΆœν•˜μ§€ 마라."""
46
+ try:
47
+ df = pd.read_json(io.StringIO(parquet_data))
48
+ # λ°μ΄ν„°μ˜ μš”μ•½ 정보 οΏ½οΏ½οΏ½μ„±
49
+ data_summary = df.describe(include='all').to_string()
50
+ system_prefix += f"\n\nμ—…λ‘œλ“œλœ λ°μ΄ν„°μ˜ μš”μ•½ 정보:\n{data_summary}"
51
+ except Exception as e:
52
+ print(f"데이터 λ‘œλ“œ 쀑 였λ₯˜ λ°œμƒ: {str(e)}\n{traceback.format_exc()}")
53
+ system_prefix += "\n\n데이터λ₯Ό λ‘œλ“œν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
54
+ else:
55
+ system_prefix = system_message or "λ„ˆλŠ” AI μ‘°μ–Έμž 역할이닀."
56
+
57
+ # λ©”μ‹œμ§€ 생성
58
+ prompt = system_prefix + "\n\n"
59
+ for chat in history:
60
+ if chat['role'] == 'user':
61
+ prompt += f"μ‚¬μš©μž: {chat['content']}\n"
62
+ else:
63
+ prompt += f"AI: {chat['content']}\n"
64
+ prompt += f"μ‚¬μš©μž: {message}\nAI:"
65
+
 
 
66
  try:
67
+ # λͺ¨λΈμ— λ©”μ‹œμ§€ 전솑 및 응닡 λ°›κΈ°
68
+ response = ""
69
+ stream = hf_client.text_generation(
70
+ prompt=prompt,
71
+ max_new_tokens=max_tokens,
72
  stream=True,
73
  temperature=temperature,
74
  top_p=top_p,
75
+ )
76
+ for msg in stream:
77
+ if msg:
78
+ response += msg
79
  yield response
80
  except Exception as e:
81
+ error_message = f"μΆ”λ‘  쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n{traceback.format_exc()}"
82
+ print(error_message)
83
+ yield error_message
84
+
85
+
86
+ def upload_csv(file_path: str) -> Tuple[str, str]:
87
+ try:
88
+ # CSV 파일 읽기
89
+ df = pd.read_csv(file_path, sep=',')
90
+ # ν•„μˆ˜ 컬럼 확인
91
+ required_columns = {'id', 'text', 'label', 'metadata'}
92
+ available_columns = set(df.columns)
93
+ missing_columns = required_columns - available_columns
94
+ if missing_columns:
95
+ return f"CSV νŒŒμΌμ— λ‹€μŒ ν•„μˆ˜ 컬럼이 λˆ„λ½λ˜μ—ˆμŠ΅λ‹ˆλ‹€: {', '.join(missing_columns)}", ""
96
+ # 데이터 ν΄λ Œμ§•
97
+ df.drop_duplicates(inplace=True)
98
+ df.fillna('', inplace=True)
99
+ # 데이터 μœ ν˜• μ΅œμ ν™”
100
+ df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
101
+ # Parquet 파일둜 λ³€ν™˜
102
+ parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
103
+ df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
104
+ return f"{parquet_filename} 파일이 μ„±κ³΅μ μœΌλ‘œ μ—…λ‘œλ“œλ˜κ³  λ³€ν™˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_filename
105
+ except Exception as e:
106
+ return f"CSV 파일 μ—…λ‘œλ“œ 및 λ³€ν™˜ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", ""
107
+
108
+ def upload_parquet(file_path: str) -> Tuple[str, str, str]:
109
+ try:
110
+ # Parquet 파일 읽기
111
+ df = pd.read_parquet(file_path, engine='pyarrow')
112
+ # Markdown으둜 λ³€ν™˜ν•˜μ—¬ 미리보기
113
+ parquet_content = df.head(10).to_markdown(index=False)
114
+ # DataFrame을 JSON λ¬Έμžμ—΄λ‘œ λ³€ν™˜
115
+ parquet_json = df.to_json(orient='records', force_ascii=False)
116
+ return "Parquet 파일이 μ„±κ³΅μ μœΌλ‘œ μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_content, parquet_json
117
+ except Exception as e:
118
+ return f"Parquet 파일 μ—…λ‘œλ“œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "", ""
119
+
120
+ def text_to_parquet(text: str) -> Tuple[str, str, str]:
121
+ try:
122
+ # ν…μŠ€νŠΈλ₯Ό DataFrame으둜 λ³€ν™˜ (각 행은 콀마둜 ꡬ뢄)
123
+ data = [line.split(',') for line in text.strip().split('\n')]
124
+ df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
125
+ # 데이터 μœ ν˜• μ΅œμ ν™”
126
+ df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
127
+ # Parquet 파일둜 λ³€ν™˜
128
+ parquet_filename = 'text_to_parquet.parquet'
129
+ df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
130
+ # Parquet 파일 λ‚΄μš© 미리보기
131
+ parquet_content = load_parquet(parquet_filename)
132
+ return f"{parquet_filename} 파일이 μ„±κ³΅μ μœΌλ‘œ λ³€ν™˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€.", parquet_content, parquet_filename
133
+ except Exception as e:
134
+ return f"ν…μŠ€νŠΈ λ³€ν™˜ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}", "", ""
135
+
136
+ # CSS μ„€μ •
137
+ css = """
138
+ footer {
139
+ visibility: hidden;
140
+ }
141
+ #chatbot-container, #chatbot-data-upload {
142
+ height: 700px;
143
+ overflow-y: scroll;
144
+ }
145
+ #chatbot-container .message, #chatbot-data-upload .message {
146
+ font-size: 14px;
147
+ }
148
+ /* μž…λ ₯μ°½ 배경색 및 κΈ€μžμƒ‰ λ³€κ²½ */
149
+ textarea, input[type="text"] {
150
+ background-color: #ffffff; /* 흰색 λ°°κ²½ */
151
+ color: #000000; /* 검정색 κΈ€μž */
152
+ }
153
+ /* 파일 μ—…λ‘œλ“œ μ˜μ—­ 높이 쑰절 */
154
+ #parquet-upload-area {
155
+ max-height: 150px;
156
+ overflow-y: auto;
157
+ }
158
+ /* 초기 μ„€λͺ… 글씨 크기 쑰절 */
159
+ #initial-description {
160
+ font-size: 14px;
161
+ }
162
+ """
163
+
164
+
165
+ # Gradio Blocks μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
166
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
167
+ gr.Markdown("# My RAG: LLM이 λ‚˜λ§Œμ˜ λ°μ΄ν„°λ‘œ ν•™μŠ΅ν•œ μ½˜ν…μΈ  생성/λ‹΅λ³€", elem_id="initial-description")
168
+ gr.Markdown(
169
+ "### 1) λ‚˜λ§Œμ˜ 데이터λ₯Ό μž…λ ₯ λ˜λŠ” CSV μ—…λ‘œλ“œλ‘œ Parquet 데이터셋 μžλ™ λ³€ν™˜ 2) Parquet 데이터셋을 μ—…λ‘œλ“œν•˜λ©΄, LLM이 맞좀 ν•™μŠ΅ λ°μ΄ν„°λ‘œ ν™œμš©ν•˜μ—¬ 응닡\n"
170
+ "### Tip) '예제'λ₯Ό 톡해 λ‹€μ–‘ν•œ ν™œμš© 방법을 μ²΄ν—˜ν•˜κ³  μ‘μš©ν•΄ λ³΄μ„Έμš”, 데이터셋 μ—…λ‘œλ“œμ‹œ λ―Έλ¦¬λ³΄κΈ°λŠ” 10건만 좜λ ₯",
171
+ elem_id="initial-description"
172
+ )
173
+
174
+ # 첫 번째 νƒ­: 챗봇 데이터 μ—…λ‘œλ“œ (νƒ­ 이름 λ³€κ²½: "My 데이터셋+LLM")
175
+ with gr.Tab("My 데이터셋+LLM"):
176
+ gr.Markdown("### LLMκ³Ό λŒ€ν™”ν•˜κΈ°")
177
+ chatbot_data_upload = gr.Chatbot(label="챗봇", type="messages", elem_id="chatbot-data-upload")
178
+ msg_data_upload = gr.Textbox(label="λ©”μ‹œμ§€ μž…λ ₯", placeholder="여기에 λ©”μ‹œμ§€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...")
179
+ send_data_upload = gr.Button("전솑")
180
+
181
+ with gr.Accordion("μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ 및 μ˜΅μ…˜ μ„€μ •", open=False):
182
+ system_message = gr.Textbox(label="System Message", value="λ„ˆλŠ” AI μ‘°μ–Έμž 역할이닀.")
183
+ max_tokens = gr.Slider(minimum=1, maximum=8000, value=1000, label="Max Tokens")
184
+ temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
185
+ top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
186
+
187
+ parquet_data_state = gr.State()
188
+
189
+ def handle_message_data_upload(
190
+ message: str,
191
+ history: List[Dict[str, str]],
192
+ system_message: str,
193
+ max_tokens: int,
194
+ temperature: float,
195
+ top_p: float,
196
+ parquet_data: str
197
+ ):
198
+ history = history or []
199
+ try:
200
+ # μ‚¬μš©μžμ˜ λ©”μ‹œμ§€λ₯Ό νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€
201
+ history.append({"role": "user", "content": message})
202
+ # 응닡 생성
203
+ response_gen = respond(
204
+ message, history, system_message, max_tokens, temperature, top_p, parquet_data
205
+ )
206
+ partial_response = ""
207
+ for partial in response_gen:
208
+ partial_response = partial
209
+ # λŒ€ν™” λ‚΄μ—­ μ—…λ°μ΄νŠΈ
210
+ display_history = history + [
211
+ {"role": "assistant", "content": partial_response}
212
+ ]
213
+ yield display_history, ""
214
+ # μ–΄μ‹œμŠ€ν„΄νŠΈμ˜ 응닡을 νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€
215
+ history.append({"role": "assistant", "content": partial_response})
216
+ except Exception as e:
217
+ response = f"μΆ”λ‘  쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
218
+ history.append({"role": "assistant", "content": response})
219
+ yield history, ""
220
+
221
+ send_data_upload.click(
222
+ handle_message_data_upload,
223
+ inputs=[
224
+ msg_data_upload,
225
+ chatbot_data_upload,
226
+ system_message,
227
+ max_tokens,
228
+ temperature,
229
+ top_p,
230
+ parquet_data_state, # parquet_data_stateλ₯Ό μ‚¬μš©ν•˜μ—¬ μ—…λ‘œλ“œλœ 데이터λ₯Ό 전달
231
+ ],
232
+ outputs=[chatbot_data_upload, msg_data_upload],
233
+ queue=True
234
+ )
235
+
236
+ # 예제 μΆ”κ°€
237
+ with gr.Accordion("예제", open=False):
238
+ gr.Examples(
239
+ examples=[
240
+ ["μ—…λ‘œλ“œλœ 데이터셋에 λŒ€ν•΄ μš”μ•½ μ„€λͺ…ν•˜λΌ."],
241
+ ["μ—…λ‘œλ“œλœ 데이터셋 νŒŒμΌμ„ ν•™μŠ΅ λ°μ΄ν„°λ‘œ ν™œμš©ν•˜μ—¬, λ³Έ μ„œλΉ„μŠ€λ₯Ό SEO μ΅œμ ν™”ν•˜μ—¬ λΈ”λ‘œκ·Έ 포슀트(κ°œμš”, λ°°κ²½ 및 ν•„μš”μ„±, κΈ°μ‘΄ μœ μ‚¬ μ œν’ˆ/μ„œλΉ„μŠ€μ™€ λΉ„κ΅ν•˜μ—¬ 특μž₯점, ν™œμš©μ²˜, κ°€μΉ˜, κΈ°λŒ€νš¨κ³Ό, 결둠을 포함)둜 4000 토큰 이상 μž‘μ„±ν•˜λΌ"],
242
+ ["μ—…λ‘œλ“œλœ 데이터셋 νŒŒμΌμ„ ν•™μŠ΅ λ°μ΄ν„°λ‘œ ν™œμš©ν•˜μ—¬, μ‚¬μš© 방법과 차별점, νŠΉμ§•, 강점을 μ€‘μ‹¬μœΌλ‘œ 4000 토큰 이상 유튜브 μ˜μƒ 슀크립트 ν˜•νƒœλ‘œ μž‘μ„±ν•˜λΌ"],
243
+ ["μ—…λ‘œλ“œλœ 데이터셋 νŒŒμΌμ„ ν•™μŠ΅ λ°μ΄ν„°λ‘œ ν™œμš©ν•˜μ—¬, μ œν’ˆ 상세 νŽ˜μ΄μ§€ ν˜•μ‹μ˜ λ‚΄μš©μ„ 4000 토큰 이상 μžμ„Ένžˆ μ„€λͺ…ν•˜λΌ"],
244
+ ["μ—…λ‘œλ“œλœ 데이터셋 νŒŒμΌμ„ ν•™μŠ΅ λ°μ΄ν„°λ‘œ ν™œμš©ν•˜μ—¬, FAQ 20건을 μƒμ„Έν•˜κ²Œ μž‘μ„±ν•˜λΌ. 4000토큰 이상 μ‚¬μš©ν•˜λΌ."],
245
+ ["μ—…λ‘œλ“œλœ 데이터셋 νŒŒμΌμ„ ν•™μŠ΅ λ°μ΄ν„°λ‘œ ν™œμš©ν•˜μ—¬, νŠΉν—ˆ μΆœμ›μ— ν™œμš©ν•  기술 및 λΉ„μ¦ˆλ‹ˆμŠ€ λͺ¨λΈ 츑면을 ν¬ν•¨ν•˜μ—¬ νŠΉν—ˆ μΆœμ›μ„œ ꡬ성에 맞게 ν˜μ‹ μ μΈ 창의 발λͺ… λ‚΄μš©μ„ μ€‘μ‹¬μœΌλ‘œ 4000 토큰 이상 μž‘μ„±ν•˜λΌ."],
246
+ ],
247
+ inputs=msg_data_upload,
248
+ label="예제 선택",
249
+ )
250
+
251
+ # Parquet 파일 μ—…λ‘œλ“œλ₯Ό ν™”λ©΄ ν•˜λ‹¨μœΌλ‘œ 이동
252
+ gr.Markdown("### Parquet 파일 μ—…λ‘œλ“œ")
253
+ with gr.Row():
254
+ with gr.Column():
255
+ parquet_upload = gr.File(
256
+ label="Parquet 파일 μ—…λ‘œλ“œ", type="filepath", elem_id="parquet-upload-area"
257
+ )
258
+ parquet_upload_button = gr.Button("μ—…λ‘œλ“œ")
259
+ parquet_upload_status = gr.Textbox(label="μ—…λ‘œλ“œ μƒνƒœ", interactive=False)
260
+ parquet_preview_chat = gr.Markdown(label="Parquet 파일 미리보기")
261
+
262
+ def handle_parquet_upload(file_path: str):
263
+ message, parquet_content, parquet_json = upload_parquet(file_path)
264
+ if parquet_json:
265
+ return message, parquet_content, parquet_json
266
+ else:
267
+ return message, "", ""
268
+
269
+ parquet_upload_button.click(
270
+ handle_parquet_upload,
271
+ inputs=parquet_upload,
272
+ outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
273
+ )
274
+
275
+ # 두 번째 νƒ­: 데이터 λ³€ν™˜ (νƒ­ 이름 λ³€κ²½: "CSV to My 데이터셋")
276
+ with gr.Tab("CSV to My 데이터셋"):
277
+ gr.Markdown("### CSV 파일 μ—…λ‘œλ“œ 및 Parquet λ³€ν™˜")
278
+ with gr.Row():
279
+ with gr.Column():
280
+ csv_file = gr.File(label="CSV 파일 μ—…λ‘œλ“œ", type="filepath")
281
+ upload_button = gr.Button("μ—…λ‘œλ“œ 및 λ³€ν™˜")
282
+ upload_status = gr.Textbox(label="μ—…λ‘œλ“œ μƒνƒœ", interactive=False)
283
+ parquet_preview = gr.Markdown(label="Parquet 파일 미리보기")
284
+ download_button = gr.File(label="Parquet 파일 λ‹€μš΄λ‘œλ“œ", interactive=False)
285
+
286
+ def handle_csv_upload(file_path: str):
287
+ message, parquet_filename = upload_csv(file_path)
288
+ if parquet_filename:
289
+ parquet_content = load_parquet(parquet_filename)
290
+ return message, parquet_content, parquet_filename
291
+ else:
292
+ return message, "", None
293
+
294
+ upload_button.click(
295
+ handle_csv_upload,
296
+ inputs=csv_file,
297
+ outputs=[upload_status, parquet_preview, download_button]
298
+ )
299
+
300
+ # μ„Έ 번째 νƒ­: ν…μŠ€νŠΈ to csv to parquet λ³€ν™˜ (νƒ­ 이름 λ³€κ²½: "Text to My 데이터셋")
301
+ with gr.Tab("Text to My 데이터셋"):
302
+ gr.Markdown("### ν…μŠ€νŠΈλ₯Ό μž…λ ₯ν•˜λ©΄ CSV둜 λ³€ν™˜ ν›„ Parquet으둜 μžλ™ μ „ν™˜λ©λ‹ˆλ‹€.")
303
+ with gr.Row():
304
+ with gr.Column():
305
+ text_input = gr.Textbox(
306
+ label="ν…μŠ€νŠΈ μž…λ ₯ (각 행은 `id,text,label,metadata` ν˜•μ‹μœΌλ‘œ μž…λ ₯)",
307
+ lines=10,
308
+ placeholder="예: 1,μ΄μˆœμ‹ ,μž₯κ΅°,거뢁선\n2,원균,μž₯κ΅°,λͺ¨ν•¨\n3,μ„ μ‘°,μ™•,μ‹œκΈ°\n4,λ„μš”ν† λ―Έ νžˆλ°μš”μ‹œ,μ™•,침랡"
309
+ )
310
+ convert_button = gr.Button("λ³€ν™˜ 및 λ‹€μš΄λ‘œλ“œ")
311
+ convert_status = gr.Textbox(label="λ³€ν™˜ μƒνƒœ", interactive=False)
312
+ parquet_preview_convert = gr.Markdown(label="Parquet 파일 미리보기")
313
+ download_parquet_convert = gr.File(label="Parquet 파일 λ‹€μš΄λ‘œλ“œ", interactive=False)
314
+
315
+ def handle_text_to_parquet(text: str):
316
+ message, parquet_content, parquet_filename = text_to_parquet(text)
317
+ if parquet_filename:
318
+ return message, parquet_content, parquet_filename
319
+ else:
320
+ return message, "", None
321
+
322
+ convert_button.click(
323
+ handle_text_to_parquet,
324
+ inputs=text_input,
325
+ outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
326
+ )
327
+
328
+ gr.Markdown("### [email protected]", elem_id="initial-description")
329
 
330
  if __name__ == "__main__":
331
+ demo.launch()
332
+