Update app.py
Browse files
app.py
CHANGED
@@ -2,13 +2,12 @@ import gradio as gr
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
-
from typing import List, Dict
|
6 |
|
7 |
# ์ถ๋ก API ํด๋ผ์ด์ธํธ ์ค์
|
8 |
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
|
9 |
-
# hf_client = InferenceClient("CohereForAI/aya-23-35B", token=os.getenv("HF_TOKEN"))
|
10 |
|
11 |
-
def load_code(filename):
|
12 |
try:
|
13 |
with open(filename, 'r', encoding='utf-8') as file:
|
14 |
return file.read()
|
@@ -17,10 +16,9 @@ def load_code(filename):
|
|
17 |
except Exception as e:
|
18 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
19 |
|
20 |
-
def load_parquet(filename):
|
21 |
try:
|
22 |
df = pd.read_parquet(filename, engine='pyarrow')
|
23 |
-
# ๋ฐ์ดํฐํ๋ ์์ ์ฒซ ๋ช ํ์ ๋ฌธ์์ด๋ก ๋ณํ (Markdown ํ ํ์)
|
24 |
return df.head(10).to_markdown(index=False)
|
25 |
except FileNotFoundError:
|
26 |
return f"{filename} ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
@@ -36,28 +34,28 @@ MixGEN_code = load_code('mgen.cod')
|
|
36 |
test_parquet_content = load_parquet('test.parquet')
|
37 |
|
38 |
def respond(
|
39 |
-
message,
|
40 |
history: List[Dict[str, str]],
|
41 |
-
system_message="",
|
42 |
-
max_tokens=4000,
|
43 |
-
temperature=0.7,
|
44 |
-
top_p=0.9,
|
45 |
-
|
46 |
# ์์คํ
ํ๋กฌํํธ ์ค์
|
47 |
system_prefix = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ. ๋๋ ์ฃผ์ด์ง ์์ค์ฝ๋๋ฅผ ๊ธฐ๋ฐ์ผ๋ก "์๋น์ค ์ฌ์ฉ ์ค๋ช
๋ฐ ์๋ด, Q&A๋ฅผ ํ๋ ์ญํ ์ด๋ค". ์์ฃผ ์น์ ํ๊ณ ์์ธํ๊ฒ 4000ํ ํฐ ์ด์ Markdown ํ์์ผ๋ก ์์ฑํ๋ผ. ๋๋ ์ฝ๋๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ฌ์ฉ ์ค๋ช
๋ฐ ์ง์ ์๋ต์ ์งํํ๋ฉฐ, ์ด์ฉ์์๊ฒ ๋์์ ์ฃผ์ด์ผ ํ๋ค. ์ด์ฉ์๊ฐ ๊ถ๊ธํด ํ ๋งํ ๋ด์ฉ์ ์น์ ํ๊ฒ ์๋ ค์ฃผ๋๋ก ํ๋ผ. ์ฝ๋ ์ ์ฒด ๋ด์ฉ์ ๋ํด์๋ ๋ณด์์ ์ ์งํ๊ณ , ํค ๊ฐ ๋ฐ ์๋ํฌ์ธํธ์ ๊ตฌ์ฒด์ ์ธ ๋ชจ๋ธ์ ๊ณต๊ฐํ์ง ๋ง๋ผ."""
|
48 |
-
|
49 |
-
# ๋ช
๋ น์ด ์ฒ๋ฆฌ
|
50 |
if message.lower() == "ํจ์
์ฝ๋ ์คํ":
|
51 |
-
|
52 |
message = "ํจ์
๊ฐ์ํผํ
์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ์ค๋ช
ํ ์ค๋น๊ฐ ๋์ด์๋ค๊ณ ์๋ฆฌ๊ณ ์๋น์ค URL(https://aiqcamp-fash.hf.space)์ ํตํด ํ
์คํธ ํด๋ณด๋ผ๊ณ ์ถ๋ ฅํ๋ผ."
|
53 |
elif message.lower() == "uhd ์ด๋ฏธ์ง ์ฝ๋ ์คํ":
|
54 |
-
|
55 |
message = "UHD ์ด๋ฏธ์ง ์์ฑ์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ์ค๋ช
ํ ์ค๋น๊ฐ ๋์ด์๋ค๊ณ ์๋ฆฌ๊ณ ์๋น์ค URL(https://openfree-ultpixgen.hf.space)์ ํตํด ํ
์คํธ ํด๋ณด๋ผ๊ณ ์ถ๋ ฅํ๋ผ."
|
56 |
elif message.lower() == "mixgen ์ฝ๋ ์คํ":
|
57 |
-
|
58 |
message = "MixGEN3 ์ด๋ฏธ์ง ์์ฑ์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ์ค๋ช
ํ ์ค๋น๊ฐ ๋์ด์๋ค๊ณ ์๋ฆฌ๊ณ ์๋น์ค URL(https://openfree-mixgen3.hf.space)์ ํตํด ํ
์คํธ ํด๋ณด๋ผ๊ณ ์ถ๋ ฅํ๋ผ."
|
59 |
elif message.lower() == "test.parquet ์คํ":
|
60 |
-
#
|
61 |
parquet_content = ""
|
62 |
for item in history:
|
63 |
if item['role'] == 'assistant' and 'test.parquet ํ์ผ ๋ด์ฉ' in item['content']:
|
@@ -66,19 +64,20 @@ def respond(
|
|
66 |
except IndexError:
|
67 |
parquet_content = ""
|
68 |
break
|
69 |
-
|
70 |
message = "test.parquet ํ์ผ์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ๊ด๋ จ ์ค๋ช
๋ฐ Q&A๋ฅผ ์งํํ ์ค๋น๊ฐ ๋์ด์๋ค. ๊ถ๊ธํ ์ ์ด ์์ผ๋ฉด ๋ฌผ์ด๋ณด๋ผ."
|
71 |
elif message.lower() == "csv ์
๋ก๋":
|
72 |
message = "CSV ํ์ผ์ ์
๋ก๋ํ๋ ค๋ฉด ๋ ๋ฒ์งธ ํญ์ ์ฌ์ฉํ์ธ์."
|
73 |
|
74 |
-
# ์์คํ
๋ฉ์์ง์
|
75 |
-
messages = [{"role": "system", "content":
|
76 |
for chat in history:
|
77 |
messages.append({"role": chat['role'], "content": chat['content']})
|
78 |
messages.append({"role": "user", "content": message})
|
79 |
|
80 |
response = ""
|
81 |
try:
|
|
|
82 |
for msg in hf_client.chat_completion(
|
83 |
messages,
|
84 |
max_tokens=max_tokens,
|
@@ -93,18 +92,18 @@ def respond(
|
|
93 |
except Exception as e:
|
94 |
yield f"์ถ๋ก ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
95 |
|
96 |
-
def upload_csv(file_path):
|
97 |
try:
|
98 |
-
# CSV ํ์ผ ์ฝ๊ธฐ
|
99 |
df = pd.read_csv(file_path, sep=',')
|
100 |
|
101 |
-
#
|
102 |
required_columns = {'id', 'text', 'label', 'metadata'}
|
103 |
available_columns = set(df.columns)
|
104 |
missing_columns = required_columns - available_columns
|
105 |
|
106 |
if missing_columns:
|
107 |
-
return f"CSV ํ์ผ์ ๋ค์ ํ์ ์ปฌ๋ผ์ด ๋๋ฝ๋์์ต๋๋ค: {', '.join(missing_columns)}",
|
108 |
|
109 |
# ๋ฐ์ดํฐ ํด๋ ์ง
|
110 |
df.drop_duplicates(inplace=True)
|
@@ -117,28 +116,28 @@ def upload_csv(file_path):
|
|
117 |
parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
|
118 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
119 |
|
120 |
-
# Parquet ํ์ผ
|
121 |
parquet_content = load_parquet(parquet_filename)
|
122 |
|
123 |
-
return f"{parquet_filename} ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋๊ณ ๋ณํ๋์์ต๋๋ค.",
|
124 |
except Exception as e:
|
125 |
-
return f"CSV ํ์ผ ์
๋ก๋ ๋ฐ ๋ณํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
126 |
|
127 |
-
def upload_parquet(file_path):
|
128 |
try:
|
129 |
# Parquet ํ์ผ ์ฝ๊ธฐ
|
130 |
df = pd.read_parquet(file_path, engine='pyarrow')
|
131 |
|
132 |
-
#
|
133 |
parquet_content = df.to_markdown(index=False)
|
134 |
|
135 |
-
return "Parquet ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋์์ต๋๋ค.", parquet_content, df.to_json()
|
136 |
except Exception as e:
|
137 |
-
return f"Parquet ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
138 |
|
139 |
-
def text_to_parquet(text):
|
140 |
try:
|
141 |
-
# ํ
์คํธ๋ฅผ DataFrame์ผ๋ก ๋ณํ (
|
142 |
data = [line.split(',') for line in text.strip().split('\n')]
|
143 |
df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
|
144 |
|
@@ -149,17 +148,18 @@ def text_to_parquet(text):
|
|
149 |
parquet_filename = 'text_to_parquet.parquet'
|
150 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
151 |
|
152 |
-
# Parquet ํ์ผ
|
153 |
parquet_content = load_parquet(parquet_filename)
|
154 |
|
155 |
-
# ํ์ผ ๋ฐ์ดํฐ๋ฅผ
|
156 |
with open(parquet_filename, "rb") as f:
|
157 |
data = f.read()
|
158 |
|
159 |
-
return f"{parquet_filename} ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ๋ณํ๋์์ต๋๋ค.", parquet_content,
|
160 |
except Exception as e:
|
161 |
-
return f"ํ
์คํธ ๋ณํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "",
|
162 |
|
|
|
163 |
css = """
|
164 |
footer {
|
165 |
visibility: hidden;
|
@@ -177,6 +177,7 @@ footer {
|
|
177 |
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
178 |
gr.Markdown("# LLM ์๋น์ค ์ธํฐํ์ด์ค")
|
179 |
|
|
|
180 |
with gr.Tab("์ฑ๋ด"):
|
181 |
gr.Markdown("### LLM๊ณผ ๋ํํ๊ธฐ")
|
182 |
chatbot = gr.Chatbot(label="์ฑ๋ด", type="messages", elem_id="chatbot-container")
|
@@ -191,18 +192,18 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
191 |
|
192 |
# ์ฑ๋ด ๋ฉ์์ง ์ฒ๋ฆฌ ํจ์
|
193 |
def handle_message(message, history, system_message, max_tokens, temperature, top_p):
|
194 |
-
# Update history with user message
|
195 |
history = history or []
|
196 |
history.append({"role": "user", "content": message})
|
197 |
-
# Generate response
|
198 |
-
response = ""
|
199 |
try:
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
203 |
history.append({"role": "assistant", "content": response})
|
204 |
except Exception as e:
|
205 |
-
|
|
|
206 |
return history, ""
|
207 |
|
208 |
send.click(
|
@@ -211,9 +212,9 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
211 |
outputs=[chatbot, msg]
|
212 |
)
|
213 |
|
214 |
-
# ์์
|
215 |
with gr.Accordion("์์ ", open=False):
|
216 |
-
|
217 |
examples=[
|
218 |
["ํจ์
์ฝ๋ ์คํ"],
|
219 |
["UHD ์ด๋ฏธ์ง ์ฝ๋ ์คํ"],
|
@@ -230,6 +231,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
230 |
label="์์ ์ ํ",
|
231 |
)
|
232 |
|
|
|
233 |
with gr.Tab("๋ฐ์ดํฐ ๋ณํ"):
|
234 |
gr.Markdown("### CSV ํ์ผ ์
๋ก๋ ๋ฐ Parquet ๋ณํ")
|
235 |
with gr.Row():
|
@@ -238,16 +240,16 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
238 |
upload_button = gr.Button("์
๋ก๋ ๋ฐ ๋ณํ")
|
239 |
upload_status = gr.Textbox(label="์
๋ก๋ ์ํ", interactive=False)
|
240 |
parquet_preview = gr.Markdown(label="Parquet ํ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
241 |
-
download_button = gr.
|
242 |
|
243 |
-
|
244 |
-
def handle_csv_upload(file_path):
|
245 |
message, parquet_filename = upload_csv(file_path)
|
246 |
if parquet_filename:
|
247 |
-
#
|
248 |
with open(parquet_filename, "rb") as f:
|
249 |
data = f.read()
|
250 |
-
|
|
|
251 |
else:
|
252 |
return message, "", None
|
253 |
|
@@ -260,6 +262,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
260 |
gr.Markdown("### ๊ธฐ์กด Parquet ํ์ผ")
|
261 |
gr.Markdown(f"**test.parquet ํ์ผ ๋ด์ฉ:**\n```markdown\n{test_parquet_content}\n```")
|
262 |
|
|
|
263 |
with gr.Tab("์ฑ๋ด ๋ฐ์ดํฐ ์
๋ก๋"):
|
264 |
gr.Markdown("### Parquet ํ์ผ ์
๋ก๋ ๋ฐ ์ง๋ฌธํ๊ธฐ")
|
265 |
with gr.Row():
|
@@ -268,15 +271,14 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
268 |
parquet_upload_button = gr.Button("์
๋ก๋")
|
269 |
parquet_upload_status = gr.Textbox(label="์
๋ก๋ ์ํ", interactive=False)
|
270 |
parquet_preview_chat = gr.Markdown(label="Parquet ํ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
271 |
-
# ์ํ๋ฅผ ์ ์ฅํ Hidden State
|
272 |
parquet_data_state = gr.State()
|
273 |
|
274 |
-
def handle_parquet_upload(file_path):
|
275 |
message, parquet_content, parquet_json = upload_parquet(file_path)
|
276 |
if parquet_json:
|
277 |
return message, parquet_content, parquet_json
|
278 |
else:
|
279 |
-
return message, "",
|
280 |
|
281 |
parquet_upload_button.click(
|
282 |
handle_parquet_upload,
|
@@ -289,21 +291,19 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
289 |
msg_data_upload = gr.Textbox(label="๋ฉ์์ง ์
๋ ฅ", placeholder="์ฌ๊ธฐ์ ๋ฉ์์ง๋ฅผ ์
๋ ฅํ์ธ์...")
|
290 |
send_data_upload = gr.Button("์ ์ก")
|
291 |
|
292 |
-
|
293 |
-
def handle_message_data_upload(message, history, system_message, max_tokens, temperature, top_p, parquet_data):
|
294 |
-
# Parquet ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ ์ ์๋ ๋ก์ง์ ์ถ๊ฐ (์: ๋ฐ์ดํฐ ๋ถ์, ์ง์์๋ต)
|
295 |
-
# ํ์ฌ๋ ๊ธฐ๋ณธ์ ์ผ๋ก ๋ฉ์์ง๋ฅผ ์ฒ๋ฆฌ
|
296 |
history = history or []
|
297 |
history.append({"role": "user", "content": message})
|
298 |
-
# Generate response
|
299 |
-
response = ""
|
300 |
try:
|
301 |
-
|
302 |
-
|
303 |
-
|
|
|
|
|
304 |
history.append({"role": "assistant", "content": response})
|
305 |
except Exception as e:
|
306 |
-
|
|
|
307 |
return history, ""
|
308 |
|
309 |
send_data_upload.click(
|
@@ -312,21 +312,26 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
312 |
outputs=[chatbot_data_upload, msg_data_upload]
|
313 |
)
|
314 |
|
|
|
315 |
with gr.Tab("ํ
์คํธ to csv to parquet ๋ณํ"):
|
316 |
gr.Markdown("### ํ
์คํธ๋ฅผ ์
๋ ฅํ๋ฉด CSV๋ก ๋ณํ ํ Parquet์ผ๋ก ์๋ ์ ํ๋ฉ๋๋ค.")
|
317 |
with gr.Row():
|
318 |
with gr.Column():
|
319 |
-
text_input = gr.Textbox(
|
|
|
|
|
|
|
|
|
320 |
convert_button = gr.Button("๋ณํ ๋ฐ ๋ค์ด๋ก๋")
|
321 |
convert_status = gr.Textbox(label="๋ณํ ์ํ", interactive=False)
|
322 |
parquet_preview_convert = gr.Markdown(label="Parquet ํ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
323 |
-
download_parquet_convert = gr.
|
324 |
|
325 |
-
|
326 |
-
def handle_text_to_parquet(text):
|
327 |
message, parquet_content, file_data = text_to_parquet(text)
|
328 |
if file_data:
|
329 |
-
|
|
|
330 |
else:
|
331 |
return message, "", None
|
332 |
|
@@ -336,6 +341,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
336 |
outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
|
337 |
)
|
338 |
|
|
|
339 |
gr.Markdown("## ์ฃผ์ ์ฌํญ")
|
340 |
gr.Markdown("""
|
341 |
- **CSV ์
๋ก๋**: CSV ํ์ผ์ ์
๋ก๋ํ๋ฉด ์๋์ผ๋ก Parquet ํ์ผ๋ก ๋ณํ๋ฉ๋๋ค. CSV ํ์ผ์ ๋ฐ๋์ **์ฝค๋ง(`,`)**๋ก ๊ตฌ๋ถ๋์ด์ผ ํฉ๋๋ค.
|
@@ -349,4 +355,5 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
|
349 |
gr.Markdown("### Gradio ์ธํฐํ์ด์ค๋ฅผ ์ฌ์ฉํ์ฌ LLM ๋ชจ๋ธ๊ณผ ์ํธ์์ฉํ์ธ์!")
|
350 |
|
351 |
if __name__ == "__main__":
|
352 |
-
|
|
|
|
2 |
from huggingface_hub import InferenceClient
|
3 |
import os
|
4 |
import pandas as pd
|
5 |
+
from typing import List, Dict, Tuple
|
6 |
|
7 |
# ์ถ๋ก API ํด๋ผ์ด์ธํธ ์ค์
|
8 |
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
|
|
|
9 |
|
10 |
+
def load_code(filename: str) -> str:
|
11 |
try:
|
12 |
with open(filename, 'r', encoding='utf-8') as file:
|
13 |
return file.read()
|
|
|
16 |
except Exception as e:
|
17 |
return f"ํ์ผ์ ์ฝ๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
18 |
|
19 |
+
def load_parquet(filename: str) -> str:
|
20 |
try:
|
21 |
df = pd.read_parquet(filename, engine='pyarrow')
|
|
|
22 |
return df.head(10).to_markdown(index=False)
|
23 |
except FileNotFoundError:
|
24 |
return f"{filename} ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
|
|
34 |
test_parquet_content = load_parquet('test.parquet')
|
35 |
|
36 |
def respond(
|
37 |
+
message: str,
|
38 |
history: List[Dict[str, str]],
|
39 |
+
system_message: str = "",
|
40 |
+
max_tokens: int = 4000,
|
41 |
+
temperature: float = 0.7,
|
42 |
+
top_p: float = 0.9,
|
43 |
+
) -> str:
|
44 |
# ์์คํ
ํ๋กฌํํธ ์ค์
|
45 |
system_prefix = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ. ๋๋ ์ฃผ์ด์ง ์์ค์ฝ๋๋ฅผ ๊ธฐ๋ฐ์ผ๋ก "์๋น์ค ์ฌ์ฉ ์ค๋ช
๋ฐ ์๋ด, Q&A๋ฅผ ํ๋ ์ญํ ์ด๋ค". ์์ฃผ ์น์ ํ๊ณ ์์ธํ๊ฒ 4000ํ ํฐ ์ด์ Markdown ํ์์ผ๋ก ์์ฑํ๋ผ. ๋๋ ์ฝ๋๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ฌ์ฉ ์ค๋ช
๋ฐ ์ง์ ์๋ต์ ์งํํ๋ฉฐ, ์ด์ฉ์์๊ฒ ๋์์ ์ฃผ์ด์ผ ํ๋ค. ์ด์ฉ์๊ฐ ๊ถ๊ธํด ํ ๋งํ ๋ด์ฉ์ ์น์ ํ๊ฒ ์๋ ค์ฃผ๋๋ก ํ๋ผ. ์ฝ๋ ์ ์ฒด ๋ด์ฉ์ ๋ํด์๋ ๋ณด์์ ์ ์งํ๊ณ , ํค ๊ฐ ๋ฐ ์๋ํฌ์ธํธ์ ๊ตฌ์ฒด์ ์ธ ๋ชจ๋ธ์ ๊ณต๊ฐํ์ง ๋ง๋ผ."""
|
46 |
+
|
47 |
+
# ํน์ ๋ช
๋ น์ด ์ฒ๋ฆฌ
|
48 |
if message.lower() == "ํจ์
์ฝ๋ ์คํ":
|
49 |
+
system_prefix += f"\n\nํจ์
์ฝ๋ ๋ด์ฉ:\n```python\n{fashion_code}\n```"
|
50 |
message = "ํจ์
๊ฐ์ํผํ
์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ์ค๋ช
ํ ์ค๋น๊ฐ ๋์ด์๋ค๊ณ ์๋ฆฌ๊ณ ์๋น์ค URL(https://aiqcamp-fash.hf.space)์ ํตํด ํ
์คํธ ํด๋ณด๋ผ๊ณ ์ถ๋ ฅํ๋ผ."
|
51 |
elif message.lower() == "uhd ์ด๋ฏธ์ง ์ฝ๋ ์คํ":
|
52 |
+
system_prefix += f"\n\nUHD ์ด๋ฏธ์ง ์ฝ๋ ๋ด์ฉ:\n```python\n{uhdimage_code}\n```"
|
53 |
message = "UHD ์ด๋ฏธ์ง ์์ฑ์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ์ค๋ช
ํ ์ค๋น๊ฐ ๋์ด์๋ค๊ณ ์๋ฆฌ๊ณ ์๋น์ค URL(https://openfree-ultpixgen.hf.space)์ ํตํด ํ
์คํธ ํด๋ณด๋ผ๊ณ ์ถ๋ ฅํ๋ผ."
|
54 |
elif message.lower() == "mixgen ์ฝ๋ ์คํ":
|
55 |
+
system_prefix += f"\n\nMixGEN ์ฝ๋ ๋ด์ฉ:\n```python\n{MixGEN_code}\n```"
|
56 |
message = "MixGEN3 ์ด๋ฏธ์ง ์์ฑ์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ์ค๋ช
ํ ์ค๋น๊ฐ ๋์ด์๋ค๊ณ ์๋ฆฌ๊ณ ์๋น์ค URL(https://openfree-mixgen3.hf.space)์ ํตํด ํ
์คํธ ํด๋ณด๋ผ๊ณ ์ถ๋ ฅํ๋ผ."
|
57 |
elif message.lower() == "test.parquet ์คํ":
|
58 |
+
# history์์ parquet_content ์ฐพ๊ธฐ
|
59 |
parquet_content = ""
|
60 |
for item in history:
|
61 |
if item['role'] == 'assistant' and 'test.parquet ํ์ผ ๋ด์ฉ' in item['content']:
|
|
|
64 |
except IndexError:
|
65 |
parquet_content = ""
|
66 |
break
|
67 |
+
system_prefix += f"\n\ntest.parquet ํ์ผ ๋ด์ฉ:\n```markdown\n{parquet_content}\n```"
|
68 |
message = "test.parquet ํ์ผ์ ๋ํ ๋ด์ฉ์ ํ์ตํ์๊ณ , ๊ด๋ จ ์ค๋ช
๋ฐ Q&A๋ฅผ ์งํํ ์ค๋น๊ฐ ๋์ด์๋ค. ๊ถ๊ธํ ์ ์ด ์์ผ๋ฉด ๋ฌผ์ด๋ณด๋ผ."
|
69 |
elif message.lower() == "csv ์
๋ก๋":
|
70 |
message = "CSV ํ์ผ์ ์
๋ก๋ํ๋ ค๋ฉด ๋ ๋ฒ์งธ ํญ์ ์ฌ์ฉํ์ธ์."
|
71 |
|
72 |
+
# ์์คํ
๋ฉ์์ง์ ๋ํ ๊ธฐ๋ก ๊ฒฐํฉ
|
73 |
+
messages = [{"role": "system", "content": system_prefix}]
|
74 |
for chat in history:
|
75 |
messages.append({"role": chat['role'], "content": chat['content']})
|
76 |
messages.append({"role": "user", "content": message})
|
77 |
|
78 |
response = ""
|
79 |
try:
|
80 |
+
# ๋ชจ๋ธ์ ๋ฉ์์ง ์ ์ก ๋ฐ ์๋ต ๋ฐ๏ฟฝ๏ฟฝ๏ฟฝ
|
81 |
for msg in hf_client.chat_completion(
|
82 |
messages,
|
83 |
max_tokens=max_tokens,
|
|
|
92 |
except Exception as e:
|
93 |
yield f"์ถ๋ก ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
94 |
|
95 |
+
def upload_csv(file_path: str) -> Tuple[str, str]:
|
96 |
try:
|
97 |
+
# CSV ํ์ผ ์ฝ๊ธฐ
|
98 |
df = pd.read_csv(file_path, sep=',')
|
99 |
|
100 |
+
# ํ์ ์ปฌ๋ผ ํ์ธ
|
101 |
required_columns = {'id', 'text', 'label', 'metadata'}
|
102 |
available_columns = set(df.columns)
|
103 |
missing_columns = required_columns - available_columns
|
104 |
|
105 |
if missing_columns:
|
106 |
+
return f"CSV ํ์ผ์ ๋ค์ ํ์ ์ปฌ๋ผ์ด ๋๋ฝ๋์์ต๋๋ค: {', '.join(missing_columns)}", ""
|
107 |
|
108 |
# ๋ฐ์ดํฐ ํด๋ ์ง
|
109 |
df.drop_duplicates(inplace=True)
|
|
|
116 |
parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet'
|
117 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
118 |
|
119 |
+
# Parquet ํ์ผ ๋ด์ฉ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
120 |
parquet_content = load_parquet(parquet_filename)
|
121 |
|
122 |
+
return f"{parquet_filename} ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋๊ณ ๋ณํ๋์์ต๋๋ค.", parquet_content
|
123 |
except Exception as e:
|
124 |
+
return f"CSV ํ์ผ ์
๋ก๋ ๋ฐ ๋ณํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", ""
|
125 |
|
126 |
+
def upload_parquet(file_path: str) -> Tuple[str, str, Dict]:
|
127 |
try:
|
128 |
# Parquet ํ์ผ ์ฝ๊ธฐ
|
129 |
df = pd.read_parquet(file_path, engine='pyarrow')
|
130 |
|
131 |
+
# Markdown์ผ๋ก ๋ณํํ์ฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
132 |
parquet_content = df.to_markdown(index=False)
|
133 |
|
134 |
+
return "Parquet ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋์์ต๋๋ค.", parquet_content, df.to_json()
|
135 |
except Exception as e:
|
136 |
+
return f"Parquet ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "", {}
|
137 |
|
138 |
+
def text_to_parquet(text: str) -> Tuple[str, str, bytes]:
|
139 |
try:
|
140 |
+
# ํ
์คํธ๋ฅผ DataFrame์ผ๋ก ๋ณํ (๊ฐ ํ์ ์ฝค๋ง๋ก ๊ตฌ๋ถ)
|
141 |
data = [line.split(',') for line in text.strip().split('\n')]
|
142 |
df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata'])
|
143 |
|
|
|
148 |
parquet_filename = 'text_to_parquet.parquet'
|
149 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
150 |
|
151 |
+
# Parquet ํ์ผ ๋ด์ฉ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
152 |
parquet_content = load_parquet(parquet_filename)
|
153 |
|
154 |
+
# ํ์ผ ๋ฐ์ดํฐ๋ฅผ ๋ฐ์ด๋๋ฆฌ๋ก ์ฝ๊ธฐ
|
155 |
with open(parquet_filename, "rb") as f:
|
156 |
data = f.read()
|
157 |
|
158 |
+
return f"{parquet_filename} ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ๋ณํ๋์์ต๋๋ค.", parquet_content, data
|
159 |
except Exception as e:
|
160 |
+
return f"ํ
์คํธ ๋ณํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "", b""
|
161 |
|
162 |
+
# CSS ์ค์
|
163 |
css = """
|
164 |
footer {
|
165 |
visibility: hidden;
|
|
|
177 |
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
|
178 |
gr.Markdown("# LLM ์๋น์ค ์ธํฐํ์ด์ค")
|
179 |
|
180 |
+
# ์ฒซ ๋ฒ์งธ ํญ: ์ฑ๋ด
|
181 |
with gr.Tab("์ฑ๋ด"):
|
182 |
gr.Markdown("### LLM๊ณผ ๋ํํ๊ธฐ")
|
183 |
chatbot = gr.Chatbot(label="์ฑ๋ด", type="messages", elem_id="chatbot-container")
|
|
|
192 |
|
193 |
# ์ฑ๋ด ๋ฉ์์ง ์ฒ๋ฆฌ ํจ์
|
194 |
def handle_message(message, history, system_message, max_tokens, temperature, top_p):
|
|
|
195 |
history = history or []
|
196 |
history.append({"role": "user", "content": message})
|
|
|
|
|
197 |
try:
|
198 |
+
# ์๋ต ์์ฑ
|
199 |
+
response_gen = respond(message, history, system_message, max_tokens, temperature, top_p)
|
200 |
+
response = ""
|
201 |
+
for partial in response_gen:
|
202 |
+
response = partial # ์ต์ข
์๋ต
|
203 |
history.append({"role": "assistant", "content": response})
|
204 |
except Exception as e:
|
205 |
+
response = f"์ถ๋ก ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
206 |
+
history.append({"role": "assistant", "content": response})
|
207 |
return history, ""
|
208 |
|
209 |
send.click(
|
|
|
212 |
outputs=[chatbot, msg]
|
213 |
)
|
214 |
|
215 |
+
# ์์ ๋ณต์
|
216 |
with gr.Accordion("์์ ", open=False):
|
217 |
+
gr.Examples(
|
218 |
examples=[
|
219 |
["ํจ์
์ฝ๋ ์คํ"],
|
220 |
["UHD ์ด๋ฏธ์ง ์ฝ๋ ์คํ"],
|
|
|
231 |
label="์์ ์ ํ",
|
232 |
)
|
233 |
|
234 |
+
# ๋ ๋ฒ์งธ ํญ: ๋ฐ์ดํฐ ๋ณํ
|
235 |
with gr.Tab("๋ฐ์ดํฐ ๋ณํ"):
|
236 |
gr.Markdown("### CSV ํ์ผ ์
๋ก๋ ๋ฐ Parquet ๋ณํ")
|
237 |
with gr.Row():
|
|
|
240 |
upload_button = gr.Button("์
๋ก๋ ๋ฐ ๋ณํ")
|
241 |
upload_status = gr.Textbox(label="์
๋ก๋ ์ํ", interactive=False)
|
242 |
parquet_preview = gr.Markdown(label="Parquet ํ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
243 |
+
download_button = gr.Download(label="Parquet ํ์ผ ๋ค์ด๋ก๋")
|
244 |
|
245 |
+
def handle_csv_upload(file_path: str) -> Tuple[str, str, Tuple[bytes, str]]:
|
|
|
246 |
message, parquet_filename = upload_csv(file_path)
|
247 |
if parquet_filename:
|
248 |
+
# Parquet ํ์ผ ์ฝ๊ธฐ
|
249 |
with open(parquet_filename, "rb") as f:
|
250 |
data = f.read()
|
251 |
+
filename = os.path.basename(parquet_filename)
|
252 |
+
return message, parquet_filename, (data, filename)
|
253 |
else:
|
254 |
return message, "", None
|
255 |
|
|
|
262 |
gr.Markdown("### ๊ธฐ์กด Parquet ํ์ผ")
|
263 |
gr.Markdown(f"**test.parquet ํ์ผ ๋ด์ฉ:**\n```markdown\n{test_parquet_content}\n```")
|
264 |
|
265 |
+
# ์ธ ๋ฒ์งธ ํญ: ์ฑ๋ด ๋ฐ์ดํฐ ์
๋ก๋
|
266 |
with gr.Tab("์ฑ๋ด ๋ฐ์ดํฐ ์
๋ก๋"):
|
267 |
gr.Markdown("### Parquet ํ์ผ ์
๋ก๋ ๋ฐ ์ง๋ฌธํ๊ธฐ")
|
268 |
with gr.Row():
|
|
|
271 |
parquet_upload_button = gr.Button("์
๋ก๋")
|
272 |
parquet_upload_status = gr.Textbox(label="์
๋ก๋ ์ํ", interactive=False)
|
273 |
parquet_preview_chat = gr.Markdown(label="Parquet ํ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
|
|
274 |
parquet_data_state = gr.State()
|
275 |
|
276 |
+
def handle_parquet_upload(file_path: str) -> Tuple[str, str, Dict]:
|
277 |
message, parquet_content, parquet_json = upload_parquet(file_path)
|
278 |
if parquet_json:
|
279 |
return message, parquet_content, parquet_json
|
280 |
else:
|
281 |
+
return message, "", {}
|
282 |
|
283 |
parquet_upload_button.click(
|
284 |
handle_parquet_upload,
|
|
|
291 |
msg_data_upload = gr.Textbox(label="๋ฉ์์ง ์
๋ ฅ", placeholder="์ฌ๊ธฐ์ ๋ฉ์์ง๋ฅผ ์
๋ ฅํ์ธ์...")
|
292 |
send_data_upload = gr.Button("์ ์ก")
|
293 |
|
294 |
+
def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: Dict) -> Tuple[List[Dict[str, str]], str]:
|
|
|
|
|
|
|
295 |
history = history or []
|
296 |
history.append({"role": "user", "content": message})
|
|
|
|
|
297 |
try:
|
298 |
+
# Parquet ๋ฐ์ดํฐ๋ฅผ ํ์ฉํ ์ ์๋ ๋ก์ง ์ถ๊ฐ ๊ฐ๋ฅ
|
299 |
+
response_gen = respond(message, history, system_message, max_tokens, temperature, top_p)
|
300 |
+
response = ""
|
301 |
+
for partial in response_gen:
|
302 |
+
response = partial
|
303 |
history.append({"role": "assistant", "content": response})
|
304 |
except Exception as e:
|
305 |
+
response = f"์ถ๋ก ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
306 |
+
history.append({"role": "assistant", "content": response})
|
307 |
return history, ""
|
308 |
|
309 |
send_data_upload.click(
|
|
|
312 |
outputs=[chatbot_data_upload, msg_data_upload]
|
313 |
)
|
314 |
|
315 |
+
# ๋ค ๋ฒ์งธ ํญ: ํ
์คํธ to csv to parquet ๋ณํ
|
316 |
with gr.Tab("ํ
์คํธ to csv to parquet ๋ณํ"):
|
317 |
gr.Markdown("### ํ
์คํธ๋ฅผ ์
๋ ฅํ๋ฉด CSV๋ก ๋ณํ ํ Parquet์ผ๋ก ์๋ ์ ํ๋ฉ๋๋ค.")
|
318 |
with gr.Row():
|
319 |
with gr.Column():
|
320 |
+
text_input = gr.Textbox(
|
321 |
+
label="ํ
์คํธ ์
๋ ฅ (๊ฐ ํ์ `id,text,label,metadata` ํ์์ผ๋ก ์
๋ ฅ)",
|
322 |
+
lines=10,
|
323 |
+
placeholder="์: 1,Sample Text,Label1,Metadata1\n2,Another Text,Label2,Metadata2"
|
324 |
+
)
|
325 |
convert_button = gr.Button("๋ณํ ๋ฐ ๋ค์ด๋ก๋")
|
326 |
convert_status = gr.Textbox(label="๋ณํ ์ํ", interactive=False)
|
327 |
parquet_preview_convert = gr.Markdown(label="Parquet ํ์ผ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
|
328 |
+
download_parquet_convert = gr.Download(label="Parquet ํ์ผ ๋ค์ด๋ก๋")
|
329 |
|
330 |
+
def handle_text_to_parquet(text: str) -> Tuple[str, str, Tuple[bytes, str]]:
|
|
|
331 |
message, parquet_content, file_data = text_to_parquet(text)
|
332 |
if file_data:
|
333 |
+
filename = 'text_to_parquet.parquet'
|
334 |
+
return message, parquet_content, (file_data, filename)
|
335 |
else:
|
336 |
return message, "", None
|
337 |
|
|
|
341 |
outputs=[convert_status, parquet_preview_convert, download_parquet_convert]
|
342 |
)
|
343 |
|
344 |
+
# ์ฃผ์ ์ฌํญ
|
345 |
gr.Markdown("## ์ฃผ์ ์ฌํญ")
|
346 |
gr.Markdown("""
|
347 |
- **CSV ์
๋ก๋**: CSV ํ์ผ์ ์
๋ก๋ํ๋ฉด ์๋์ผ๋ก Parquet ํ์ผ๋ก ๋ณํ๋ฉ๋๋ค. CSV ํ์ผ์ ๋ฐ๋์ **์ฝค๋ง(`,`)**๋ก ๊ตฌ๋ถ๋์ด์ผ ํฉ๋๋ค.
|
|
|
355 |
gr.Markdown("### Gradio ์ธํฐํ์ด์ค๋ฅผ ์ฌ์ฉํ์ฌ LLM ๋ชจ๋ธ๊ณผ ์ํธ์์ฉํ์ธ์!")
|
356 |
|
357 |
if __name__ == "__main__":
|
358 |
+
# share=True๋ Hugging Face Spaces์์ ์ง์๋์ง ์์ผ๋ฏ๋ก ์ ๊ฑฐ
|
359 |
+
demo.launch()
|