Update app.py
Browse files
app.py
CHANGED
@@ -35,6 +35,9 @@ MixGEN_code = load_code('mgen.cod')
|
|
35 |
# μ΄κΈ° Parquet νμΌ λ‘λ (κΈ°μ‘΄ test.parquet)
|
36 |
test_parquet_content = load_parquet('test.parquet')
|
37 |
|
|
|
|
|
|
|
38 |
def respond(
|
39 |
message,
|
40 |
history: List[Tuple[str, str]],
|
@@ -42,9 +45,8 @@ def respond(
|
|
42 |
max_tokens=1024, # κΈ°λ³Έκ° μΆκ°
|
43 |
temperature=0.7, # κΈ°λ³Έκ° μΆκ°
|
44 |
top_p=0.9, # κΈ°λ³Έκ° μΆκ°
|
45 |
-
parquet_content="", # Parquet νμΌ λ΄μ© μν
|
46 |
):
|
47 |
-
global fashion_code, uhdimage_code, MixGEN_code,
|
48 |
system_message = system_message or ""
|
49 |
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ."""
|
50 |
|
@@ -59,10 +61,10 @@ def respond(
|
|
59 |
system_message += f"\n\nMixGEN μ½λ λ΄μ©:\n```python\n{MixGEN_code}\n```"
|
60 |
message = "MixGEN3 μ΄λ―Έμ§ μμ±μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://openfree-mixgen3.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ."
|
61 |
elif message.lower() == "test.parquet μ€ν":
|
62 |
-
system_message += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{
|
63 |
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ."
|
64 |
elif message.lower() == "csv μ
λ‘λ":
|
65 |
-
message = "CSV νμΌμ μ
λ‘λνλ €λ©΄
|
66 |
|
67 |
# μμ€ν
λ©μμ§μ μ¬μ©μ λ©μμ§ κ²°ν©
|
68 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
@@ -94,6 +96,14 @@ def upload_csv(file):
|
|
94 |
# CSV νμΌ μ½κΈ° (filepath νμ
μ΄λ―λ‘ fileμ κ²½λ‘ λ¬Έμμ΄)
|
95 |
df = pd.read_csv(file)
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# λ°μ΄ν° ν΄λ μ§
|
98 |
df.drop_duplicates(inplace=True)
|
99 |
df.fillna('', inplace=True)
|
@@ -108,93 +118,70 @@ def upload_csv(file):
|
|
108 |
# Parquet νμΌ λ‘λ
|
109 |
parquet_content = load_parquet(parquet_filename)
|
110 |
|
|
|
|
|
|
|
|
|
111 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_content
|
112 |
except Exception as e:
|
113 |
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", ""
|
114 |
|
115 |
# Gradio Blocks μΈν°νμ΄μ€ μ€μ
|
116 |
-
with gr.Blocks() as demo:
|
117 |
gr.Markdown("# LLM μλΉμ€ μΈν°νμ΄μ€")
|
118 |
-
|
119 |
-
with gr.
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
chat_history = gr.State([]) # λν νμ€ν 리 μ μ₯
|
142 |
-
parquet_content_state = gr.State(test_parquet_content) # μ΄κΈ° Parquet λ΄μ© μν
|
143 |
-
|
144 |
-
def chat_respond(
|
145 |
-
message,
|
146 |
-
history,
|
147 |
-
system_message,
|
148 |
-
max_tokens,
|
149 |
-
temperature,
|
150 |
-
top_p,
|
151 |
-
parquet_content
|
152 |
-
):
|
153 |
-
# νμ¬ Parquet λ΄μ© μνλ₯Ό μ
λ°μ΄νΈ
|
154 |
-
response = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_content)
|
155 |
-
return response
|
156 |
-
|
157 |
-
chat = gr.Chatbot()
|
158 |
-
system_message = gr.Textbox(label="System Message", value="")
|
159 |
-
max_tokens = gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens")
|
160 |
-
temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
|
161 |
-
top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
|
162 |
-
|
163 |
-
with gr.Row():
|
164 |
-
user_message = gr.Textbox(label="λ©μμ§ μ
λ ₯")
|
165 |
-
send_button = gr.Button("보λ΄κΈ°")
|
166 |
-
|
167 |
-
send_button.click(
|
168 |
-
chat_respond,
|
169 |
-
inputs=[user_message, chat_history, system_message, max_tokens, temperature, top_p, parquet_preview],
|
170 |
-
outputs=[chat, chat_history]
|
171 |
-
)
|
172 |
-
|
173 |
-
gr.Markdown("## μ¬μ© μμ ")
|
174 |
-
gr.Examples(
|
175 |
-
examples=[
|
176 |
-
["ν¨μ
μ½λ μ€ν"],
|
177 |
-
["UHD μ΄λ―Έμ§ μ½λ μ€ν"],
|
178 |
-
["MixGEN μ½λ μ€ν"],
|
179 |
-
["test.parquet μ€ν"], # μλ‘μ΄ μμ μΆκ°
|
180 |
-
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
181 |
-
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
182 |
-
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
183 |
-
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
184 |
-
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
185 |
-
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"],
|
186 |
-
],
|
187 |
-
inputs=user_message,
|
188 |
-
label="μμ μ ν",
|
189 |
-
)
|
190 |
-
|
191 |
gr.Markdown("## μ£Όμ μ¬ν")
|
192 |
gr.Markdown("""
|
193 |
- **CSV μ
λ‘λ**: CSV νμΌμ μ
λ‘λνλ©΄ μλμΌλ‘ Parquet νμΌλ‘ λ³νλ©λλ€.
|
194 |
- **Parquet 미리보기**: μ
λ‘λλ Parquet νμΌμ 첫 10κ° νμ΄ λ―Έλ¦¬λ³΄κΈ°λ‘ νμλ©λλ€.
|
195 |
- **LLMκ³Όμ λν**: λ³νλ Parquet νμΌ λ΄μ©μ κΈ°λ°μΌλ‘ LLMμ΄ μλ΅μ μμ±ν©λλ€.
|
196 |
""")
|
197 |
-
|
198 |
gr.Markdown("### Gradio μΈν°νμ΄μ€λ₯Ό μ¬μ©νμ¬ LLM λͺ¨λΈκ³Ό μνΈμμ©νμΈμ!")
|
199 |
|
200 |
if __name__ == "__main__":
|
|
|
35 |
# μ΄κΈ° Parquet νμΌ λ‘λ (κΈ°μ‘΄ test.parquet)
|
36 |
test_parquet_content = load_parquet('test.parquet')
|
37 |
|
38 |
+
# κΈλ‘λ² λ³μλ‘ Parquet λ΄μ© μ μ₯
|
39 |
+
current_parquet_content = test_parquet_content
|
40 |
+
|
41 |
def respond(
|
42 |
message,
|
43 |
history: List[Tuple[str, str]],
|
|
|
45 |
max_tokens=1024, # κΈ°λ³Έκ° μΆκ°
|
46 |
temperature=0.7, # κΈ°λ³Έκ° μΆκ°
|
47 |
top_p=0.9, # κΈ°λ³Έκ° μΆκ°
|
|
|
48 |
):
|
49 |
+
global fashion_code, uhdimage_code, MixGEN_code, current_parquet_content
|
50 |
system_message = system_message or ""
|
51 |
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ."""
|
52 |
|
|
|
61 |
system_message += f"\n\nMixGEN μ½λ λ΄μ©:\n```python\n{MixGEN_code}\n```"
|
62 |
message = "MixGEN3 μ΄λ―Έμ§ μμ±μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://openfree-mixgen3.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ."
|
63 |
elif message.lower() == "test.parquet μ€ν":
|
64 |
+
system_message += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{current_parquet_content}\n```"
|
65 |
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ."
|
66 |
elif message.lower() == "csv μ
λ‘λ":
|
67 |
+
message = "CSV νμΌμ μ
λ‘λνλ €λ©΄ λ λ²μ§Έ νμ μ¬μ©νμΈμ."
|
68 |
|
69 |
# μμ€ν
λ©μμ§μ μ¬μ©μ λ©μμ§ κ²°ν©
|
70 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
|
|
96 |
# CSV νμΌ μ½κΈ° (filepath νμ
μ΄λ―λ‘ fileμ κ²½λ‘ λ¬Έμμ΄)
|
97 |
df = pd.read_csv(file)
|
98 |
|
99 |
+
# CSV νμΌμ μ»¬λΌ νμΈ
|
100 |
+
required_columns = {'id', 'text', 'label', 'metadata'}
|
101 |
+
available_columns = set(df.columns)
|
102 |
+
missing_columns = required_columns - available_columns
|
103 |
+
|
104 |
+
if missing_columns:
|
105 |
+
return f"CSV νμΌμ λ€μ νμ 컬λΌμ΄ λλ½λμμ΅λλ€: {', '.join(missing_columns)}", ""
|
106 |
+
|
107 |
# λ°μ΄ν° ν΄λ μ§
|
108 |
df.drop_duplicates(inplace=True)
|
109 |
df.fillna('', inplace=True)
|
|
|
118 |
# Parquet νμΌ λ‘λ
|
119 |
parquet_content = load_parquet(parquet_filename)
|
120 |
|
121 |
+
# κΈλ‘λ² λ³μ μ
λ°μ΄νΈ
|
122 |
+
global current_parquet_content
|
123 |
+
current_parquet_content = parquet_content
|
124 |
+
|
125 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_content
|
126 |
except Exception as e:
|
127 |
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", ""
|
128 |
|
129 |
# Gradio Blocks μΈν°νμ΄μ€ μ€μ
|
130 |
+
with gr.Blocks(theme="Nymbo/Nymbo_Theme",) as demo:
|
131 |
gr.Markdown("# LLM μλΉμ€ μΈν°νμ΄μ€")
|
132 |
+
|
133 |
+
with gr.Tab("μ±λ΄"):
|
134 |
+
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
135 |
+
chat = gr.ChatInterface(
|
136 |
+
respond,
|
137 |
+
additional_inputs=[
|
138 |
+
gr.Textbox(label="System Message", value=""),
|
139 |
+
gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens"),
|
140 |
+
gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
|
141 |
+
gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P"),
|
142 |
+
],
|
143 |
+
examples=[
|
144 |
+
["ν¨μ
μ½λ μ€ν"],
|
145 |
+
["UHD μ΄λ―Έμ§ μ½λ μ€ν"],
|
146 |
+
["MixGEN μ½λ μ€ν"],
|
147 |
+
["test.parquet μ€ν"], # μλ‘μ΄ μμ μΆκ°
|
148 |
+
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
149 |
+
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
150 |
+
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
151 |
+
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
152 |
+
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
153 |
+
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"],
|
154 |
+
],
|
155 |
+
theme="Nymbo/Nymbo_Theme",
|
156 |
+
cache_examples=False, # μΊμ± λΉνμ±ν μ€μ
|
157 |
+
)
|
158 |
+
|
159 |
+
with gr.Tab("λ°μ΄ν° λ³ν"):
|
160 |
+
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν")
|
161 |
+
with gr.Row():
|
162 |
+
with gr.Column():
|
163 |
+
csv_file = gr.File(label="CSV νμΌ μ
λ‘λ", type="filepath")
|
164 |
+
upload_button = gr.Button("μ
λ‘λ λ° λ³ν")
|
165 |
+
upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False)
|
166 |
+
parquet_preview = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
167 |
+
|
168 |
+
# μ
λ‘λ λ²νΌ ν΄λ¦ μ μ€νν ν¨μ
|
169 |
+
upload_button.click(
|
170 |
+
upload_csv,
|
171 |
+
inputs=csv_file,
|
172 |
+
outputs=[upload_status, parquet_preview]
|
173 |
+
)
|
174 |
|
175 |
+
gr.Markdown("### κΈ°μ‘΄ Parquet νμΌ")
|
176 |
+
gr.Markdown(f"**test.parquet νμΌ λ΄μ©:**\n```markdown\n{test_parquet_content}\n```")
|
177 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
gr.Markdown("## μ£Όμ μ¬ν")
|
179 |
gr.Markdown("""
|
180 |
- **CSV μ
λ‘λ**: CSV νμΌμ μ
λ‘λνλ©΄ μλμΌλ‘ Parquet νμΌλ‘ λ³νλ©λλ€.
|
181 |
- **Parquet 미리보기**: μ
λ‘λλ Parquet νμΌμ 첫 10κ° νμ΄ λ―Έλ¦¬λ³΄κΈ°λ‘ νμλ©λλ€.
|
182 |
- **LLMκ³Όμ λν**: λ³νλ Parquet νμΌ λ΄μ©μ κΈ°λ°μΌλ‘ LLMμ΄ μλ΅μ μμ±ν©λλ€.
|
183 |
""")
|
184 |
+
|
185 |
gr.Markdown("### Gradio μΈν°νμ΄μ€λ₯Ό μ¬μ©νμ¬ LLM λͺ¨λΈκ³Ό μνΈμμ©νμΈμ!")
|
186 |
|
187 |
if __name__ == "__main__":
|