Update app-backup.py
Browse files- app-backup.py +299 -80
app-backup.py
CHANGED
@@ -2,13 +2,17 @@ import gradio as gr
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
-
from typing import List, Tuple
|
|
|
|
|
6 |
|
|
|
7 |
# μΆλ‘ API ν΄λΌμ΄μΈνΈ μ€μ
|
8 |
-
hf_client = InferenceClient(
|
9 |
-
|
|
|
10 |
|
11 |
-
def load_code(filename):
|
12 |
try:
|
13 |
with open(filename, 'r', encoding='utf-8') as file:
|
14 |
return file.read()
|
@@ -17,97 +21,312 @@ def load_code(filename):
|
|
17 |
except Exception as e:
|
18 |
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
19 |
|
20 |
-
def load_parquet(filename):
|
21 |
try:
|
22 |
df = pd.read_parquet(filename, engine='pyarrow')
|
23 |
-
# λ°μ΄ν°νλ μμ 첫 λͺ νμ λ¬Έμμ΄λ‘ λ³ν (Markdown ν νμ)
|
24 |
return df.head(10).to_markdown(index=False)
|
25 |
except FileNotFoundError:
|
26 |
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€."
|
27 |
except Exception as e:
|
28 |
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
29 |
|
30 |
-
# μ½λ νμΌ λ‘λ
|
31 |
-
fashion_code = load_code('fashion.cod')
|
32 |
-
uhdimage_code = load_code('uhdimage.cod')
|
33 |
-
MixGEN_code = load_code('mgen.cod')
|
34 |
-
|
35 |
-
# Parquet νμΌ λ‘λ
|
36 |
-
test_parquet_content = load_parquet('test.parquet')
|
37 |
|
38 |
def respond(
|
39 |
-
message,
|
40 |
-
history: List[
|
41 |
-
system_message="",
|
42 |
-
max_tokens=
|
43 |
-
temperature=0.
|
44 |
-
top_p=0.9,
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
response = ""
|
72 |
try:
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
76 |
stream=True,
|
77 |
temperature=temperature,
|
78 |
top_p=top_p,
|
79 |
-
)
|
80 |
-
|
81 |
-
if
|
82 |
-
response +=
|
83 |
yield response
|
84 |
except Exception as e:
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
[
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
-
demo.launch()
|
|
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
+
from typing import List, Dict, Tuple
|
6 |
+
import json
|
7 |
+
import io
|
8 |
|
9 |
+
import traceback
|
10 |
# μΆλ‘ API ν΄λΌμ΄μΈνΈ μ€μ
|
11 |
+
hf_client = InferenceClient(
|
12 |
+
"CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")
|
13 |
+
)
|
14 |
|
15 |
+
def load_code(filename: str) -> str:
|
16 |
try:
|
17 |
with open(filename, 'r', encoding='utf-8') as file:
|
18 |
return file.read()
|
|
|
21 |
except Exception as e:
|
22 |
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
23 |
|
24 |
+
def load_parquet(filename: str) -> str:
|
25 |
try:
|
26 |
df = pd.read_parquet(filename, engine='pyarrow')
|
|
|
27 |
return df.head(10).to_markdown(index=False)
|
28 |
except FileNotFoundError:
|
29 |
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€."
|
30 |
except Exception as e:
|
31 |
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def respond(
|
35 |
+
message: str,
|
36 |
+
history: List[Dict[str, str]],
|
37 |
+
system_message: str = "",
|
38 |
+
max_tokens: int = 4000,
|
39 |
+
temperature: float = 0.5,
|
40 |
+
top_p: float = 0.9,
|
41 |
+
parquet_data: str = None
|
42 |
+
) -> str:
|
43 |
+
# μμ€ν
ν둬ννΈ μ€μ
|
44 |
+
if parquet_data:
|
45 |
+
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ
λ‘λλ λ°μ΄ν°λ₯Ό κΈ°λ°μΌλ‘ μ§λ¬Έμ λ΅λ³νλ μν μ νλ€. λ°μ΄ν°λ₯Ό λΆμνμ¬ μ¬μ©μμκ² λμμ΄ λλ μ 보λ₯Ό μ 곡νλΌ. λ°μ΄ν°λ₯Ό νμ©νμ¬ μμΈνκ³ μ νν λ΅λ³μ μ 곡νλ, λ―Όκ°ν μ 보λ κ°μΈ μ 보λ₯Ό λ
ΈμΆνμ§ λ§λΌ."""
|
46 |
+
try:
|
47 |
+
df = pd.read_json(io.StringIO(parquet_data))
|
48 |
+
# λ°μ΄ν°μ μμ½ μ 보 οΏ½οΏ½οΏ½μ±
|
49 |
+
data_summary = df.describe(include='all').to_string()
|
50 |
+
system_prefix += f"\n\nμ
λ‘λλ λ°μ΄ν°μ μμ½ μ 보:\n{data_summary}"
|
51 |
+
except Exception as e:
|
52 |
+
print(f"λ°μ΄ν° λ‘λ μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
53 |
+
system_prefix += "\n\nλ°μ΄ν°λ₯Ό λ‘λνλ μ€ μ€λ₯κ° λ°μνμ΅λλ€."
|
54 |
+
else:
|
55 |
+
system_prefix = system_message or "λλ AI μ‘°μΈμ μν μ΄λ€."
|
56 |
+
|
57 |
+
# λ©μμ§ μμ±
|
58 |
+
prompt = system_prefix + "\n\n"
|
59 |
+
for chat in history:
|
60 |
+
if chat['role'] == 'user':
|
61 |
+
prompt += f"μ¬μ©μ: {chat['content']}\n"
|
62 |
+
else:
|
63 |
+
prompt += f"AI: {chat['content']}\n"
|
64 |
+
prompt += f"μ¬μ©μ: {message}\nAI:"
|
65 |
+
|
|
|
|
|
66 |
try:
|
67 |
+
# λͺ¨λΈμ λ©μμ§ μ μ‘ λ° μλ΅ λ°κΈ°
|
68 |
+
response = ""
|
69 |
+
stream = hf_client.text_generation(
|
70 |
+
prompt=prompt,
|
71 |
+
max_new_tokens=max_tokens,
|
72 |
stream=True,
|
73 |
temperature=temperature,
|
74 |
top_p=top_p,
|
75 |
+
)
|
76 |
+
for msg in stream:
|
77 |
+
if msg:
|
78 |
+
response += msg
|
79 |
yield response
|
80 |
except Exception as e:
|
81 |
+
error_message = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}\n{traceback.format_exc()}"
|
82 |
+
print(error_message)
|
83 |
+
yield error_message
|
84 |
+
|
85 |
+
|
86 |
+
def upload_csv(file_path: str) -> Tuple[str, str]:
|
87 |
+
try:
|
88 |
+
# CSV νμΌ μ½κΈ°
|
89 |
+
df = pd.read_csv(file_path, sep=',')
|
90 |
+
# νμ μ»¬λΌ νμΈ
|
91 |
+
required_columns = {'id', 'text', 'label', 'metadata'}
|
92 |
+
available_columns = set(df.columns)
|
93 |
+
missing_columns = required_columns - available_columns
|
94 |
+
if missing_columns:
|
95 |
+
return f"CSV νμΌμ λ€μ νμ 컬λΌμ΄ λλ½λμμ΅λλ€: {', '.join(missing_columns)}", ""
|
96 |
+
# λ°μ΄ν° ν΄λ μ§
|
97 |
+
df.drop_duplicates(inplace=True)
|
98 |
+
df.fillna('', inplace=True)
|
99 |
+
# λ°μ΄ν° μ ν μ΅μ ν
|
100 |
+
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
|
101 |
+
# Parquet νμΌλ‘ λ³ν
|
102 |
+
parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
|
103 |
+
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
104 |
+
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_filename
|
105 |
+
except Exception as e:
|
106 |
+
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", ""
|
107 |
+
|
108 |
+
def upload_parquet(file_path: str) -> Tuple[str, str, str]:
|
109 |
+
try:
|
110 |
+
# Parquet νμΌ μ½κΈ°
|
111 |
+
df = pd.read_parquet(file_path, engine='pyarrow')
|
112 |
+
# MarkdownμΌλ‘ λ³ννμ¬ λ―Έλ¦¬λ³΄κΈ°
|
113 |
+
parquet_content = df.head(10).to_markdown(index=False)
|
114 |
+
# DataFrameμ JSON λ¬Έμμ΄λ‘ λ³ν
|
115 |
+
parquet_json = df.to_json(orient='records', force_ascii=False)
|
116 |
+
return "Parquet νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλμμ΅λλ€.", parquet_content, parquet_json
|
117 |
+
except Exception as e:
|
118 |
+
return f"Parquet νμΌ μ
λ‘λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "", ""
|
119 |
+
|
120 |
+
def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
121 |
+
try:
|
122 |
+
# ν
μ€νΈλ₯Ό DataFrameμΌλ‘ λ³ν (κ° νμ μ½€λ§λ‘ ꡬλΆ)
|
123 |
+
data = [line.split(',') for line in text.strip().split('\n')]
|
124 |
+
df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
|
125 |
+
# λ°μ΄ν° μ ν μ΅μ ν
|
126 |
+
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
|
127 |
+
# Parquet νμΌλ‘ λ³ν
|
128 |
+
parquet_filename = 'text_to_parquet.parquet'
|
129 |
+
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
130 |
+
# Parquet νμΌ λ΄μ© 미리보기
|
131 |
+
parquet_content = load_parquet(parquet_filename)
|
132 |
+
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³νλμμ΅λλ€.", parquet_content, parquet_filename
|
133 |
+
except Exception as e:
|
134 |
+
return f"ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "", ""
|
135 |
+
|
136 |
+
# CSS μ€μ
|
137 |
+
css = """
|
138 |
+
footer {
|
139 |
+
visibility: hidden;
|
140 |
+
}
|
141 |
+
#chatbot-container, #chatbot-data-upload {
|
142 |
+
height: 700px;
|
143 |
+
overflow-y: scroll;
|
144 |
+
}
|
145 |
+
#chatbot-container .message, #chatbot-data-upload .message {
|
146 |
+
font-size: 14px;
|
147 |
+
}
|
148 |
+
/* μ
λ ₯μ°½ λ°°κ²½μ λ° κΈμμ λ³κ²½ */
|
149 |
+
textarea, input[type="text"] {
|
150 |
+
background-color: #ffffff; /* ν°μ λ°°κ²½ */
|
151 |
+
color: #000000; /* κ²μ μ κΈμ */
|
152 |
+
}
|
153 |
+
/* νμΌ μ
λ‘λ μμ λμ΄ μ‘°μ */
|
154 |
+
#parquet-upload-area {
|
155 |
+
max-height: 150px;
|
156 |
+
overflow-y: auto;
|
157 |
+
}
|
158 |
+
/* μ΄κΈ° μ€λͺ
κΈμ¨ ν¬κΈ° μ‘°μ */
|
159 |
+
#initial-description {
|
160 |
+
font-size: 14px;
|
161 |
+
}
|
162 |
+
"""
|
163 |
+
|
164 |
+
|
165 |
+
# Gradio Blocks μΈν°νμ΄μ€ μ€μ
|
166 |
+
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
167 |
+
gr.Markdown("# My RAG: LLMμ΄ λλ§μ λ°μ΄ν°λ‘ νμ΅ν μ½ν
μΈ μμ±/λ΅λ³", elem_id="initial-description")
|
168 |
+
gr.Markdown(
|
169 |
+
"### 1) λλ§μ λ°μ΄ν°λ₯Ό μ
λ ₯ λλ CSV μ
λ‘λλ‘ Parquet λ°μ΄ν°μ
μλ λ³ν 2) Parquet λ°μ΄ν°μ
μ μ
λ‘λνλ©΄, LLMμ΄ λ§μΆ€ νμ΅ λ°μ΄ν°λ‘ νμ©νμ¬ μλ΅\n"
|
170 |
+
"### Tip) 'μμ 'λ₯Ό ν΅ν΄ λ€μν νμ© λ°©λ²μ 체ννκ³ μμ©ν΄ 보μΈμ, λ°μ΄ν°μ
μ
λ‘λμ 미리보기λ 10κ±΄λ§ μΆλ ₯",
|
171 |
+
elem_id="initial-description"
|
172 |
+
)
|
173 |
+
|
174 |
+
# 첫 λ²μ§Έ ν: μ±λ΄ λ°μ΄ν° μ
λ‘λ (ν μ΄λ¦ λ³κ²½: "My λ°μ΄ν°μ
+LLM")
|
175 |
+
with gr.Tab("My λ°μ΄ν°μ
+LLM"):
|
176 |
+
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
177 |
+
chatbot_data_upload = gr.Chatbot(label="μ±λ΄", type="messages", elem_id="chatbot-data-upload")
|
178 |
+
msg_data_upload = gr.Textbox(label="λ©μμ§ μ
λ ₯", placeholder="μ¬κΈ°μ λ©μμ§λ₯Ό μ
λ ₯νμΈμ...")
|
179 |
+
send_data_upload = gr.Button("μ μ‘")
|
180 |
+
|
181 |
+
with gr.Accordion("μμ€ν
ν둬ννΈ λ° μ΅μ
μ€μ ", open=False):
|
182 |
+
system_message = gr.Textbox(label="System Message", value="λλ AI μ‘°μΈμ μν μ΄λ€.")
|
183 |
+
max_tokens = gr.Slider(minimum=1, maximum=8000, value=1000, label="Max Tokens")
|
184 |
+
temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
|
185 |
+
top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
|
186 |
+
|
187 |
+
parquet_data_state = gr.State()
|
188 |
+
|
189 |
+
def handle_message_data_upload(
|
190 |
+
message: str,
|
191 |
+
history: List[Dict[str, str]],
|
192 |
+
system_message: str,
|
193 |
+
max_tokens: int,
|
194 |
+
temperature: float,
|
195 |
+
top_p: float,
|
196 |
+
parquet_data: str
|
197 |
+
):
|
198 |
+
history = history or []
|
199 |
+
try:
|
200 |
+
# μ¬μ©μμ λ©μμ§λ₯Ό νμ€ν 리μ μΆκ°
|
201 |
+
history.append({"role": "user", "content": message})
|
202 |
+
# μλ΅ μμ±
|
203 |
+
response_gen = respond(
|
204 |
+
message, history, system_message, max_tokens, temperature, top_p, parquet_data
|
205 |
+
)
|
206 |
+
partial_response = ""
|
207 |
+
for partial in response_gen:
|
208 |
+
partial_response = partial
|
209 |
+
# λν λ΄μ μ
λ°μ΄νΈ
|
210 |
+
display_history = history + [
|
211 |
+
{"role": "assistant", "content": partial_response}
|
212 |
+
]
|
213 |
+
yield display_history, ""
|
214 |
+
# μ΄μμ€ν΄νΈμ μλ΅μ νμ€ν 리μ μΆκ°
|
215 |
+
history.append({"role": "assistant", "content": partial_response})
|
216 |
+
except Exception as e:
|
217 |
+
response = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
218 |
+
history.append({"role": "assistant", "content": response})
|
219 |
+
yield history, ""
|
220 |
+
|
221 |
+
send_data_upload.click(
|
222 |
+
handle_message_data_upload,
|
223 |
+
inputs=[
|
224 |
+
msg_data_upload,
|
225 |
+
chatbot_data_upload,
|
226 |
+
system_message,
|
227 |
+
max_tokens,
|
228 |
+
temperature,
|
229 |
+
top_p,
|
230 |
+
parquet_data_state, # parquet_data_stateλ₯Ό μ¬μ©νμ¬ μ
λ‘λλ λ°μ΄ν°λ₯Ό μ λ¬
|
231 |
+
],
|
232 |
+
outputs=[chatbot_data_upload, msg_data_upload],
|
233 |
+
queue=True
|
234 |
+
)
|
235 |
+
|
236 |
+
# μμ μΆκ°
|
237 |
+
with gr.Accordion("μμ ", open=False):
|
238 |
+
gr.Examples(
|
239 |
+
examples=[
|
240 |
+
["μ
λ‘λλ λ°μ΄ν°μ
μ λν΄ μμ½ μ€λͺ
νλΌ."],
|
241 |
+
["μ
λ‘λλ λ°μ΄ν°μ
νμΌμ νμ΅ λ°μ΄ν°λ‘ νμ©νμ¬, λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(κ°μ, λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μ ν/μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
242 |
+
["μ
λ‘λλ λ°μ΄ν°μ
νμΌμ νμ΅ λ°μ΄ν°λ‘ νμ©νμ¬, μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
243 |
+
["μ
λ‘λλ λ°μ΄ν°μ
νμΌμ νμ΅ λ°μ΄ν°λ‘ νμ©νμ¬, μ ν μμΈ νμ΄μ§ νμμ λ΄μ©μ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
244 |
+
["μ
λ‘λλ λ°μ΄ν°μ
νμΌμ νμ΅ λ°μ΄ν°λ‘ νμ©νμ¬, FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
245 |
+
["μ
λ‘λλ λ°μ΄ν°μ
νμΌμ νμ΅ λ°μ΄ν°λ‘ νμ©νμ¬, νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€ λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
246 |
+
],
|
247 |
+
inputs=msg_data_upload,
|
248 |
+
label="μμ μ ν",
|
249 |
+
)
|
250 |
+
|
251 |
+
# Parquet νμΌ μ
λ‘λλ₯Ό νλ©΄ νλ¨μΌλ‘ μ΄λ
|
252 |
+
gr.Markdown("### Parquet νμΌ μ
λ‘λ")
|
253 |
+
with gr.Row():
|
254 |
+
with gr.Column():
|
255 |
+
parquet_upload = gr.File(
|
256 |
+
label="Parquet νμΌ μ
λ‘λ", type="filepath", elem_id="parquet-upload-area"
|
257 |
+
)
|
258 |
+
parquet_upload_button = gr.Button("μ
λ‘λ")
|
259 |
+
parquet_upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False)
|
260 |
+
parquet_preview_chat = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
261 |
+
|
262 |
+
def handle_parquet_upload(file_path: str):
|
263 |
+
message, parquet_content, parquet_json = upload_parquet(file_path)
|
264 |
+
if parquet_json:
|
265 |
+
return message, parquet_content, parquet_json
|
266 |
+
else:
|
267 |
+
return message, "", ""
|
268 |
+
|
269 |
+
parquet_upload_button.click(
|
270 |
+
handle_parquet_upload,
|
271 |
+
inputs=parquet_upload,
|
272 |
+
outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state]
|
273 |
+
)
|
274 |
+
|
275 |
+
# λ λ²μ§Έ ν: λ°μ΄ν° λ³ν (ν μ΄λ¦ λ³κ²½: "CSV to My λ°μ΄ν°μ
")
|
276 |
+
with gr.Tab("CSV to My λ°μ΄ν°μ
"):
|
277 |
+
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν")
|
278 |
+
with gr.Row():
|
279 |
+
with gr.Column():
|
280 |
+
csv_file = gr.File(label="CSV νμΌ μ
λ‘λ", type="filepath")
|
281 |
+
upload_button = gr.Button("μ
λ‘λ λ° λ³ν")
|
282 |
+
upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False)
|
283 |
+
parquet_preview = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
284 |
+
download_button = gr.File(label="Parquet νμΌ λ€μ΄λ‘λ", interactive=False)
|
285 |
+
|
286 |
+
def handle_csv_upload(file_path: str):
|
287 |
+
message, parquet_filename = upload_csv(file_path)
|
288 |
+
if parquet_filename:
|
289 |
+
parquet_content = load_parquet(parquet_filename)
|
290 |
+
return message, parquet_content, parquet_filename
|
291 |
+
else:
|
292 |
+
return message, "", None
|
293 |
+
|
294 |
+
upload_button.click(
|
295 |
+
handle_csv_upload,
|
296 |
+
inputs=csv_file,
|
297 |
+
outputs=[upload_status, parquet_preview, download_button]
|
298 |
+
)
|
299 |
+
|
300 |
+
# μΈ λ²μ§Έ ν: ν
μ€νΈ to csv to parquet λ³ν (ν μ΄λ¦ λ³κ²½: "Text to My λ°μ΄ν°μ
")
|
301 |
+
with gr.Tab("Text to My λ°μ΄ν°μ
"):
|
302 |
+
gr.Markdown("### ν
μ€νΈλ₯Ό μ
λ ₯νλ©΄ CSVλ‘ λ³ν ν ParquetμΌλ‘ μλ μ νλ©λλ€.")
|
303 |
+
with gr.Row():
|
304 |
+
with gr.Column():
|
305 |
+
text_input = gr.Textbox(
|
306 |
+
label="ν
μ€νΈ μ
λ ₯ (κ° νμ `id,text,label,metadata` νμμΌλ‘ μ
λ ₯)",
|
307 |
+
lines=10,
|
308 |
+
placeholder="μ: 1,μ΄μμ ,μ₯κ΅°,κ±°λΆμ \n2,μκ· ,μ₯κ΅°,λͺ¨ν¨\n3,μ μ‘°,μ,μκΈ°\n4,λμν λ―Έ νλ°μμ,μ,μΉ¨λ΅"
|
309 |
+
)
|
310 |
+
convert_button = gr.Button("λ³ν λ° λ€μ΄λ‘λ")
|
311 |
+
convert_status = gr.Textbox(label="λ³ν μν", interactive=False)
|
312 |
+
parquet_preview_convert = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
313 |
+
download_parquet_convert = gr.File(label="Parquet νμΌ λ€μ΄λ‘λ", interactive=False)
|
314 |
+
|
315 |
+
def handle_text_to_parquet(text: str):
|
316 |
+
message, parquet_content, parquet_filename = text_to_parquet(text)
|
317 |
+
if parquet_filename:
|
318 |
+
return message, parquet_content, parquet_filename
|
319 |
+
else:
|
320 |
+
return message, "", None
|
321 |
+
|
322 |
+
convert_button.click(
|
323 |
+
handle_text_to_parquet,
|
324 |
+
inputs=text_input,
|
325 |
+
outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
|
326 |
+
)
|
327 |
+
|
328 |
+
gr.Markdown("### [email protected]", elem_id="initial-description")
|
329 |
|
330 |
if __name__ == "__main__":
|
331 |
+
demo.launch()
|
332 |
+
|