Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ fashion_code = load_code('fashion.cod')
|
|
32 |
uhdimage_code = load_code('uhdimage.cod')
|
33 |
MixGEN_code = load_code('mgen.cod')
|
34 |
|
35 |
-
# Parquet νμΌ λ‘λ
|
36 |
test_parquet_content = load_parquet('test.parquet')
|
37 |
|
38 |
def respond(
|
@@ -42,11 +42,13 @@ def respond(
|
|
42 |
max_tokens=1024, # κΈ°λ³Έκ° μΆκ°
|
43 |
temperature=0.7, # κΈ°λ³Έκ° μΆκ°
|
44 |
top_p=0.9, # κΈ°λ³Έκ° μΆκ°
|
|
|
45 |
):
|
46 |
global fashion_code, uhdimage_code, MixGEN_code, test_parquet_content
|
47 |
system_message = system_message or ""
|
48 |
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ."""
|
49 |
|
|
|
50 |
if message.lower() == "ν¨μ
μ½λ μ€ν":
|
51 |
system_message += f"\n\nν¨μ
μ½λ λ΄μ©:\n```python\n{fashion_code}\n```"
|
52 |
message = "ν¨μ
κ°μνΌν
μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://aiqcamp-fash.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ."
|
@@ -59,7 +61,10 @@ def respond(
|
|
59 |
elif message.lower() == "test.parquet μ€ν":
|
60 |
system_message += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{test_parquet_content}\n```"
|
61 |
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ."
|
62 |
-
|
|
|
|
|
|
|
63 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
64 |
for val in history:
|
65 |
if val[0]:
|
@@ -70,44 +75,127 @@ def respond(
|
|
70 |
|
71 |
response = ""
|
72 |
try:
|
73 |
-
for
|
74 |
messages,
|
75 |
max_tokens=max_tokens,
|
76 |
stream=True,
|
77 |
temperature=temperature,
|
78 |
top_p=top_p,
|
79 |
):
|
80 |
-
token =
|
81 |
if token:
|
82 |
response += token
|
83 |
yield response
|
84 |
except Exception as e:
|
85 |
yield f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
[
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
demo.launch()
|
|
|
32 |
uhdimage_code = load_code('uhdimage.cod')
|
33 |
MixGEN_code = load_code('mgen.cod')
|
34 |
|
35 |
+
# μ΄κΈ° Parquet νμΌ λ‘λ (κΈ°μ‘΄ test.parquet)
|
36 |
test_parquet_content = load_parquet('test.parquet')
|
37 |
|
38 |
def respond(
|
|
|
42 |
max_tokens=1024, # κΈ°λ³Έκ° μΆκ°
|
43 |
temperature=0.7, # κΈ°λ³Έκ° μΆκ°
|
44 |
top_p=0.9, # κΈ°λ³Έκ° μΆκ°
|
45 |
+
parquet_content="", # Parquet νμΌ λ΄μ© μν
|
46 |
):
|
47 |
global fashion_code, uhdimage_code, MixGEN_code, test_parquet_content
|
48 |
system_message = system_message or ""
|
49 |
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ."""
|
50 |
|
51 |
+
# λͺ
λ Ήμ΄ μ²λ¦¬
|
52 |
if message.lower() == "ν¨μ
μ½λ μ€ν":
|
53 |
system_message += f"\n\nν¨μ
μ½λ λ΄μ©:\n```python\n{fashion_code}\n```"
|
54 |
message = "ν¨μ
κ°μνΌν
μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://aiqcamp-fash.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ."
|
|
|
61 |
elif message.lower() == "test.parquet μ€ν":
|
62 |
system_message += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{test_parquet_content}\n```"
|
63 |
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ."
|
64 |
+
elif message.lower() == "csv μ
λ‘λ":
|
65 |
+
message = "CSV νμΌμ μ
λ‘λνλ €λ©΄ μλμ μ
λ‘λ λ²νΌμ μ¬μ©νμΈμ."
|
66 |
+
|
67 |
+
# μμ€ν
λ©μμ§μ μ¬μ©μ λ©μμ§ κ²°ν©
|
68 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
69 |
for val in history:
|
70 |
if val[0]:
|
|
|
75 |
|
76 |
response = ""
|
77 |
try:
|
78 |
+
for msg in hf_client.chat_completion(
|
79 |
messages,
|
80 |
max_tokens=max_tokens,
|
81 |
stream=True,
|
82 |
temperature=temperature,
|
83 |
top_p=top_p,
|
84 |
):
|
85 |
+
token = msg.choices[0].delta.get('content', None)
|
86 |
if token:
|
87 |
response += token
|
88 |
yield response
|
89 |
except Exception as e:
|
90 |
yield f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
91 |
|
92 |
+
def upload_csv(file):
|
93 |
+
try:
|
94 |
+
# CSV νμΌ μ½κΈ°
|
95 |
+
df = pd.read_csv(file.name)
|
96 |
+
|
97 |
+
# λ°μ΄ν° ν΄λ μ§
|
98 |
+
df.drop_duplicates(inplace=True)
|
99 |
+
df.fillna('', inplace=True)
|
100 |
+
|
101 |
+
# λ°μ΄ν° μ ν μ΅μ ν
|
102 |
+
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'})
|
103 |
+
|
104 |
+
# Parquet νμΌλ‘ λ³ν
|
105 |
+
parquet_filename = os.path.splitext(file.name)[0] + '.parquet'
|
106 |
+
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
107 |
+
|
108 |
+
# Parquet νμΌ λ‘λ
|
109 |
+
parquet_content = load_parquet(parquet_filename)
|
110 |
+
|
111 |
+
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_content
|
112 |
+
except Exception as e:
|
113 |
+
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", ""
|
114 |
+
|
115 |
+
# Gradio Blocks μΈν°νμ΄μ€ μ€μ
|
116 |
+
with gr.Blocks() as demo:
|
117 |
+
gr.Markdown("# LLM μλΉμ€ μΈν°νμ΄μ€")
|
118 |
+
|
119 |
+
with gr.Row():
|
120 |
+
with gr.Column():
|
121 |
+
# CSV νμΌ μ
λ‘λ μΉμ
|
122 |
+
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν")
|
123 |
+
csv_file = gr.File(label="CSV νμΌ μ
λ‘λ", type="file")
|
124 |
+
upload_button = gr.Button("μ
λ‘λ λ° λ³ν")
|
125 |
+
upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False)
|
126 |
+
parquet_preview = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
127 |
+
|
128 |
+
# μ
λ‘λ λ²νΌ ν΄λ¦ μ μ€νν ν¨μ
|
129 |
+
upload_button.click(
|
130 |
+
upload_csv,
|
131 |
+
inputs=csv_file,
|
132 |
+
outputs=[upload_status, parquet_preview]
|
133 |
+
)
|
134 |
+
|
135 |
+
gr.Markdown("### κΈ°μ‘΄ Parquet νμΌ")
|
136 |
+
gr.Markdown(f"**test.parquet νμΌ λ΄μ©:**\n```markdown\n{test_parquet_content}\n```")
|
137 |
+
|
138 |
+
with gr.Column():
|
139 |
+
# Chat μΈν°νμ΄μ€ μΉμ
|
140 |
+
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
141 |
+
chat_history = gr.State([]) # λν νμ€ν 리 μ μ₯
|
142 |
+
parquet_content_state = gr.State(test_parquet_content) # μ΄κΈ° Parquet λ΄μ© μν
|
143 |
+
|
144 |
+
def chat_respond(
|
145 |
+
message,
|
146 |
+
history,
|
147 |
+
system_message,
|
148 |
+
max_tokens,
|
149 |
+
temperature,
|
150 |
+
top_p,
|
151 |
+
parquet_content
|
152 |
+
):
|
153 |
+
# νμ¬ Parquet λ΄μ© μνλ₯Ό μ
λ°μ΄νΈ
|
154 |
+
response = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_content)
|
155 |
+
return response
|
156 |
+
|
157 |
+
chat = gr.Chatbot()
|
158 |
+
system_message = gr.Textbox(label="System Message", value="")
|
159 |
+
max_tokens = gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens")
|
160 |
+
temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
|
161 |
+
top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
|
162 |
+
|
163 |
+
with gr.Row():
|
164 |
+
user_message = gr.Textbox(label="λ©μμ§ μ
λ ₯")
|
165 |
+
send_button = gr.Button("보λ΄κΈ°")
|
166 |
+
|
167 |
+
send_button.click(
|
168 |
+
chat_respond,
|
169 |
+
inputs=[user_message, chat_history, system_message, max_tokens, temperature, top_p, parquet_preview],
|
170 |
+
outputs=[chat, chat_history]
|
171 |
+
)
|
172 |
+
|
173 |
+
gr.Markdown("## μ¬μ© μμ ")
|
174 |
+
gr.Examples(
|
175 |
+
examples=[
|
176 |
+
["ν¨μ
μ½λ μ€ν"],
|
177 |
+
["UHD μ΄λ―Έμ§ μ½λ μ€ν"],
|
178 |
+
["MixGEN μ½λ μ€ν"],
|
179 |
+
["test.parquet μ€ν"], # μλ‘μ΄ μμ μΆκ°
|
180 |
+
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
181 |
+
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
182 |
+
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
183 |
+
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
184 |
+
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
185 |
+
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"],
|
186 |
+
],
|
187 |
+
inputs=user_message,
|
188 |
+
label="μμ μ ν",
|
189 |
+
)
|
190 |
+
|
191 |
+
gr.Markdown("## μ£Όμ μ¬ν")
|
192 |
+
gr.Markdown("""
|
193 |
+
- **CSV μ
λ‘λ**: CSV νμΌμ μ
λ‘λνλ©΄ μλμΌλ‘ Parquet νμΌλ‘ λ³νλ©λλ€.
|
194 |
+
- **Parquet 미리보기**: μ
λ‘λλ Parquet νμΌμ 첫 10κ° νμ΄ λ―Έλ¦¬λ³΄κΈ°λ‘ νμλ©λλ€.
|
195 |
+
- **LLMκ³Όμ λν**: λ³νλ Parquet νμΌ λ΄μ©μ κΈ°λ°μΌλ‘ LLMμ΄ μλ΅μ μμ±ν©λλ€.
|
196 |
+
""")
|
197 |
+
|
198 |
+
gr.Markdown("### Gradio μΈν°νμ΄μ€λ₯Ό μ¬μ©νμ¬ LLM λͺ¨λΈκ³Ό μνΈμμ©νμΈμ!")
|
199 |
|
200 |
if __name__ == "__main__":
|
201 |
demo.launch()
|