Update app.py
Browse files
app.py
CHANGED
@@ -135,10 +135,13 @@ def upload_parquet(file_path):
|
|
135 |
|
136 |
def text_to_parquet(text):
|
137 |
try:
|
138 |
-
# ν
μ€νΈλ₯Ό DataFrameμΌλ‘ λ³ν (μμ: κ° μ€μ μλ‘μ΄
|
139 |
data = [line.split(',') for line in text.strip().split('\n')]
|
140 |
df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
|
141 |
|
|
|
|
|
|
|
142 |
# Parquet νμΌλ‘ λ³ν
|
143 |
parquet_filename = 'text_to_parquet.parquet'
|
144 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
@@ -150,7 +153,7 @@ def text_to_parquet(text):
|
|
150 |
with open(parquet_filename, "rb") as f:
|
151 |
data = f.read()
|
152 |
|
153 |
-
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³νλμμ΅λλ€.", parquet_content,
|
154 |
except Exception as e:
|
155 |
return f"ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "", None
|
156 |
|
@@ -158,10 +161,18 @@ css = """
|
|
158 |
footer {
|
159 |
visibility: hidden;
|
160 |
}
|
161 |
-
|
162 |
height: 600px;
|
|
|
163 |
}
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
font-size: 14px;
|
166 |
}
|
167 |
"""
|
@@ -173,7 +184,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
173 |
with gr.Tab("μ±λ΄"):
|
174 |
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
175 |
chatbot = gr.Chatbot(label="μ±λ΄", elem_id="chatbot-container")
|
176 |
-
msg = gr.Textbox(label="λ©μμ§ μ
λ ₯")
|
177 |
send = gr.Button("μ μ‘")
|
178 |
|
179 |
with gr.Accordion("μμ€ν
ν둬ννΈ λ° μ΅μ
μ€μ ", open=False):
|
@@ -204,6 +215,25 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
204 |
outputs=[chatbot, msg]
|
205 |
)
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
with gr.Tab("λ°μ΄ν° λ³ν"):
|
208 |
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν")
|
209 |
with gr.Row():
|
@@ -221,7 +251,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
221 |
# νμΌμ λ€μ΄λ‘λν μ μλλ‘ λ°μ΄λ리 λ°μ΄ν°λ‘ μ½κΈ°
|
222 |
with open(parquet_filename, "rb") as f:
|
223 |
data = f.read()
|
224 |
-
return message, load_parquet(parquet_filename),
|
225 |
else:
|
226 |
return message, "", None
|
227 |
|
@@ -248,9 +278,9 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
248 |
def handle_parquet_upload(file_path):
|
249 |
message, parquet_content, parquet_json = upload_parquet(file_path)
|
250 |
if parquet_json:
|
251 |
-
return message,
|
252 |
else:
|
253 |
-
return message,
|
254 |
|
255 |
parquet_upload_button.click(
|
256 |
handle_parquet_upload,
|
@@ -260,12 +290,13 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
260 |
|
261 |
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
262 |
chatbot_data_upload = gr.Chatbot(label="μ±λ΄ λ°μ΄ν° μ
λ‘λ", elem_id="chatbot-data-upload")
|
263 |
-
msg_data_upload = gr.Textbox(label="λ©μμ§ μ
λ ₯")
|
264 |
send_data_upload = gr.Button("μ μ‘")
|
265 |
|
266 |
# μ±λ΄ λ©μμ§ μ²λ¦¬ ν¨μ (λ°μ΄ν° μ
λ‘λ λ²μ )
|
267 |
def handle_message_data_upload(message, history, system_message, max_tokens, temperature, top_p, parquet_data):
|
268 |
-
#
|
|
|
269 |
history = history or []
|
270 |
history.append({"role": "user", "content": message})
|
271 |
# Generate response
|
@@ -289,12 +320,13 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
289 |
gr.Markdown("### ν
μ€νΈλ₯Ό μ
λ ₯νλ©΄ CSVλ‘ λ³ν ν ParquetμΌλ‘ μλ μ νλ©λλ€.")
|
290 |
with gr.Row():
|
291 |
with gr.Column():
|
292 |
-
text_input = gr.Textbox(label="ν
μ€νΈ μ
λ ₯ (κ° νμ `id,text,label,metadata` νμμΌλ‘ μ
λ ₯)", lines=10)
|
293 |
convert_button = gr.Button("λ³ν λ° λ€μ΄λ‘λ")
|
294 |
convert_status = gr.Textbox(label="λ³ν μν", interactive=False)
|
295 |
parquet_preview_convert = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
296 |
download_parquet_convert = gr.File(label="Parquet νμΌ λ€μ΄λ‘λ", type="binary", interactive=False)
|
297 |
|
|
|
298 |
def handle_text_to_parquet(text):
|
299 |
message, parquet_content, file_data = text_to_parquet(text)
|
300 |
if file_data:
|
@@ -322,3 +354,4 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
322 |
|
323 |
if __name__ == "__main__":
|
324 |
demo.launch()
|
|
|
|
135 |
|
136 |
def text_to_parquet(text):
|
137 |
try:
|
138 |
+
# ν
μ€νΈλ₯Ό DataFrameμΌλ‘ λ³ν (μμ: κ° μ€μ μλ‘μ΄ νμΌλ‘, μ½€λ§λ‘ ꡬλΆ)
|
139 |
data = [line.split(',') for line in text.strip().split('\n')]
|
140 |
df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
|
141 |
|
142 |
+
# λ°μ΄ν° μ ν μ΅μ ν
|
143 |
+
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
|
144 |
+
|
145 |
# Parquet νμΌλ‘ λ³ν
|
146 |
parquet_filename = 'text_to_parquet.parquet'
|
147 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
|
|
153 |
with open(parquet_filename, "rb") as f:
|
154 |
data = f.read()
|
155 |
|
156 |
+
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³νλμμ΅λλ€.", parquet_content, data
|
157 |
except Exception as e:
|
158 |
return f"ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "", None
|
159 |
|
|
|
161 |
footer {
|
162 |
visibility: hidden;
|
163 |
}
|
164 |
+
#chatbot-container {
|
165 |
height: 600px;
|
166 |
+
overflow-y: scroll;
|
167 |
}
|
168 |
+
#chatbot-container .message {
|
169 |
+
font-size: 14px;
|
170 |
+
}
|
171 |
+
#chatbot-data-upload {
|
172 |
+
height: 600px;
|
173 |
+
overflow-y: scroll;
|
174 |
+
}
|
175 |
+
#chatbot-data-upload .message {
|
176 |
font-size: 14px;
|
177 |
}
|
178 |
"""
|
|
|
184 |
with gr.Tab("μ±λ΄"):
|
185 |
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
186 |
chatbot = gr.Chatbot(label="μ±λ΄", elem_id="chatbot-container")
|
187 |
+
msg = gr.Textbox(label="λ©μμ§ μ
λ ₯", placeholder="μ¬κΈ°μ λ©μμ§λ₯Ό μ
λ ₯νμΈμ...")
|
188 |
send = gr.Button("μ μ‘")
|
189 |
|
190 |
with gr.Accordion("μμ€ν
ν둬ννΈ λ° μ΅μ
μ€μ ", open=False):
|
|
|
215 |
outputs=[chatbot, msg]
|
216 |
)
|
217 |
|
218 |
+
# μμ λ²νΌ (볡μλ examples)
|
219 |
+
with gr.Accordion("μμ ", open=False):
|
220 |
+
example_buttons = gr.Examples(
|
221 |
+
examples=[
|
222 |
+
["ν¨μ
μ½λ μ€ν"],
|
223 |
+
["UHD μ΄λ―Έμ§ μ½λ μ€ν"],
|
224 |
+
["MixGEN μ½λ μ€ν"],
|
225 |
+
["test.parquet μ€ν"],
|
226 |
+
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
227 |
+
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
228 |
+
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
229 |
+
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
230 |
+
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
231 |
+
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"],
|
232 |
+
],
|
233 |
+
inputs=msg,
|
234 |
+
label="μμ μ ν",
|
235 |
+
)
|
236 |
+
|
237 |
with gr.Tab("λ°μ΄ν° λ³ν"):
|
238 |
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν")
|
239 |
with gr.Row():
|
|
|
251 |
# νμΌμ λ€μ΄λ‘λν μ μλλ‘ λ°μ΄λ리 λ°μ΄ν°λ‘ μ½κΈ°
|
252 |
with open(parquet_filename, "rb") as f:
|
253 |
data = f.read()
|
254 |
+
return message, load_parquet(parquet_filename), data
|
255 |
else:
|
256 |
return message, "", None
|
257 |
|
|
|
278 |
def handle_parquet_upload(file_path):
|
279 |
message, parquet_content, parquet_json = upload_parquet(file_path)
|
280 |
if parquet_json:
|
281 |
+
return message, parquet_content, parquet_json
|
282 |
else:
|
283 |
+
return message, "", None
|
284 |
|
285 |
parquet_upload_button.click(
|
286 |
handle_parquet_upload,
|
|
|
290 |
|
291 |
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
292 |
chatbot_data_upload = gr.Chatbot(label="μ±λ΄ λ°μ΄ν° μ
λ‘λ", elem_id="chatbot-data-upload")
|
293 |
+
msg_data_upload = gr.Textbox(label="λ©μμ§ μ
λ ₯", placeholder="μ¬κΈ°μ λ©μμ§λ₯Ό μ
λ ₯νμΈμ...")
|
294 |
send_data_upload = gr.Button("μ μ‘")
|
295 |
|
296 |
# μ±λ΄ λ©μμ§ μ²λ¦¬ ν¨μ (λ°μ΄ν° μ
λ‘λ λ²μ )
|
297 |
def handle_message_data_upload(message, history, system_message, max_tokens, temperature, top_p, parquet_data):
|
298 |
+
# Parquet λ°μ΄ν°κ° μ
λ‘λλ κ²½μ°, μΆκ°μ μΈ λ‘μ§μ ꡬνν μ μμ
|
299 |
+
# νμ¬λ κΈ°λ³Έμ μΌλ‘ λ©μμ§λ₯Ό μ²λ¦¬
|
300 |
history = history or []
|
301 |
history.append({"role": "user", "content": message})
|
302 |
# Generate response
|
|
|
320 |
gr.Markdown("### ν
μ€νΈλ₯Ό μ
λ ₯νλ©΄ CSVλ‘ λ³ν ν ParquetμΌλ‘ μλ μ νλ©λλ€.")
|
321 |
with gr.Row():
|
322 |
with gr.Column():
|
323 |
+
text_input = gr.Textbox(label="ν
μ€νΈ μ
λ ₯ (κ° νμ `id,text,label,metadata` νμμΌλ‘ μ
λ ₯)", lines=10, placeholder="μ: 1,Sample Text,Label1,Metadata1\n2,Another Text,Label2,Metadata2")
|
324 |
convert_button = gr.Button("λ³ν λ° λ€μ΄λ‘λ")
|
325 |
convert_status = gr.Textbox(label="λ³ν μν", interactive=False)
|
326 |
parquet_preview_convert = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
327 |
download_parquet_convert = gr.File(label="Parquet νμΌ λ€μ΄λ‘λ", type="binary", interactive=False)
|
328 |
|
329 |
+
# λ³ν λ²νΌ ν΄λ¦ μ μ€νν ν¨μ
|
330 |
def handle_text_to_parquet(text):
|
331 |
message, parquet_content, file_data = text_to_parquet(text)
|
332 |
if file_data:
|
|
|
354 |
|
355 |
if __name__ == "__main__":
|
356 |
demo.launch()
|
357 |
+
|