Update app.py
Browse files
app.py
CHANGED
|
@@ -35,6 +35,9 @@ MixGEN_code = load_code('mgen.cod')
|
|
| 35 |
# μ΄κΈ° Parquet νμΌ λ‘λ (κΈ°μ‘΄ test.parquet)
|
| 36 |
test_parquet_content = load_parquet('test.parquet')
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
def respond(
|
| 39 |
message,
|
| 40 |
history: List[Tuple[str, str]],
|
|
@@ -42,9 +45,8 @@ def respond(
|
|
| 42 |
max_tokens=1024, # κΈ°λ³Έκ° μΆκ°
|
| 43 |
temperature=0.7, # κΈ°λ³Έκ° μΆκ°
|
| 44 |
top_p=0.9, # κΈ°λ³Έκ° μΆκ°
|
| 45 |
-
parquet_content="", # Parquet νμΌ λ΄μ© μν
|
| 46 |
):
|
| 47 |
-
global fashion_code, uhdimage_code, MixGEN_code,
|
| 48 |
system_message = system_message or ""
|
| 49 |
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ."""
|
| 50 |
|
|
@@ -59,10 +61,10 @@ def respond(
|
|
| 59 |
system_message += f"\n\nMixGEN μ½λ λ΄μ©:\n```python\n{MixGEN_code}\n```"
|
| 60 |
message = "MixGEN3 μ΄λ―Έμ§ μμ±μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://openfree-mixgen3.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ."
|
| 61 |
elif message.lower() == "test.parquet μ€ν":
|
| 62 |
-
system_message += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{
|
| 63 |
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ."
|
| 64 |
elif message.lower() == "csv μ
λ‘λ":
|
| 65 |
-
message = "CSV νμΌμ μ
λ‘λνλ €λ©΄
|
| 66 |
|
| 67 |
# μμ€ν
λ©μμ§μ μ¬μ©μ λ©μμ§ κ²°ν©
|
| 68 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
|
@@ -94,6 +96,14 @@ def upload_csv(file):
|
|
| 94 |
# CSV νμΌ μ½κΈ° (filepath νμ
μ΄λ―λ‘ fileμ κ²½λ‘ λ¬Έμμ΄)
|
| 95 |
df = pd.read_csv(file)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# λ°μ΄ν° ν΄λ μ§
|
| 98 |
df.drop_duplicates(inplace=True)
|
| 99 |
df.fillna('', inplace=True)
|
|
@@ -108,93 +118,70 @@ def upload_csv(file):
|
|
| 108 |
# Parquet νμΌ λ‘λ
|
| 109 |
parquet_content = load_parquet(parquet_filename)
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_content
|
| 112 |
except Exception as e:
|
| 113 |
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", ""
|
| 114 |
|
| 115 |
# Gradio Blocks μΈν°νμ΄μ€ μ€μ
|
| 116 |
-
with gr.Blocks() as demo:
|
| 117 |
gr.Markdown("# LLM μλΉμ€ μΈν°νμ΄μ€")
|
| 118 |
-
|
| 119 |
-
with gr.
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
chat_history = gr.State([]) # λν νμ€ν 리 μ μ₯
|
| 142 |
-
parquet_content_state = gr.State(test_parquet_content) # μ΄κΈ° Parquet λ΄μ© μν
|
| 143 |
-
|
| 144 |
-
def chat_respond(
|
| 145 |
-
message,
|
| 146 |
-
history,
|
| 147 |
-
system_message,
|
| 148 |
-
max_tokens,
|
| 149 |
-
temperature,
|
| 150 |
-
top_p,
|
| 151 |
-
parquet_content
|
| 152 |
-
):
|
| 153 |
-
# νμ¬ Parquet λ΄μ© μνλ₯Ό μ
λ°μ΄νΈ
|
| 154 |
-
response = respond(message, history, system_message, max_tokens, temperature, top_p, parquet_content)
|
| 155 |
-
return response
|
| 156 |
-
|
| 157 |
-
chat = gr.Chatbot()
|
| 158 |
-
system_message = gr.Textbox(label="System Message", value="")
|
| 159 |
-
max_tokens = gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens")
|
| 160 |
-
temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
|
| 161 |
-
top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P")
|
| 162 |
-
|
| 163 |
-
with gr.Row():
|
| 164 |
-
user_message = gr.Textbox(label="λ©μμ§ μ
λ ₯")
|
| 165 |
-
send_button = gr.Button("보λ΄κΈ°")
|
| 166 |
-
|
| 167 |
-
send_button.click(
|
| 168 |
-
chat_respond,
|
| 169 |
-
inputs=[user_message, chat_history, system_message, max_tokens, temperature, top_p, parquet_preview],
|
| 170 |
-
outputs=[chat, chat_history]
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
gr.Markdown("## μ¬μ© μμ ")
|
| 174 |
-
gr.Examples(
|
| 175 |
-
examples=[
|
| 176 |
-
["ν¨μ
μ½λ μ€ν"],
|
| 177 |
-
["UHD μ΄λ―Έμ§ μ½λ μ€ν"],
|
| 178 |
-
["MixGEN μ½λ μ€ν"],
|
| 179 |
-
["test.parquet μ€ν"], # μλ‘μ΄ μμ μΆκ°
|
| 180 |
-
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
| 181 |
-
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
| 182 |
-
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
| 183 |
-
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
| 184 |
-
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
| 185 |
-
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"],
|
| 186 |
-
],
|
| 187 |
-
inputs=user_message,
|
| 188 |
-
label="μμ μ ν",
|
| 189 |
-
)
|
| 190 |
-
|
| 191 |
gr.Markdown("## μ£Όμ μ¬ν")
|
| 192 |
gr.Markdown("""
|
| 193 |
- **CSV μ
λ‘λ**: CSV νμΌμ μ
λ‘λνλ©΄ μλμΌλ‘ Parquet νμΌλ‘ λ³νλ©λλ€.
|
| 194 |
- **Parquet 미리보기**: μ
λ‘λλ Parquet νμΌμ 첫 10κ° νμ΄ λ―Έλ¦¬λ³΄κΈ°λ‘ νμλ©λλ€.
|
| 195 |
- **LLMκ³Όμ λν**: λ³νλ Parquet νμΌ λ΄μ©μ κΈ°λ°μΌλ‘ LLMμ΄ μλ΅μ μμ±ν©λλ€.
|
| 196 |
""")
|
| 197 |
-
|
| 198 |
gr.Markdown("### Gradio μΈν°νμ΄μ€λ₯Ό μ¬μ©νμ¬ LLM λͺ¨λΈκ³Ό μνΈμμ©νμΈμ!")
|
| 199 |
|
| 200 |
if __name__ == "__main__":
|
|
|
|
| 35 |
# μ΄κΈ° Parquet νμΌ λ‘λ (κΈ°μ‘΄ test.parquet)
|
| 36 |
test_parquet_content = load_parquet('test.parquet')
|
| 37 |
|
| 38 |
+
# κΈλ‘λ² λ³μλ‘ Parquet λ΄μ© μ μ₯
|
| 39 |
+
current_parquet_content = test_parquet_content
|
| 40 |
+
|
| 41 |
def respond(
|
| 42 |
message,
|
| 43 |
history: List[Tuple[str, str]],
|
|
|
|
| 45 |
max_tokens=1024, # κΈ°λ³Έκ° μΆκ°
|
| 46 |
temperature=0.7, # κΈ°λ³Έκ° μΆκ°
|
| 47 |
top_p=0.9, # κΈ°λ³Έκ° μΆκ°
|
|
|
|
| 48 |
):
|
| 49 |
+
global fashion_code, uhdimage_code, MixGEN_code, current_parquet_content
|
| 50 |
system_message = system_message or ""
|
| 51 |
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ."""
|
| 52 |
|
|
|
|
| 61 |
system_message += f"\n\nMixGEN μ½λ λ΄μ©:\n```python\n{MixGEN_code}\n```"
|
| 62 |
message = "MixGEN3 μ΄λ―Έμ§ μμ±μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://openfree-mixgen3.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ."
|
| 63 |
elif message.lower() == "test.parquet μ€ν":
|
| 64 |
+
system_message += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{current_parquet_content}\n```"
|
| 65 |
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ."
|
| 66 |
elif message.lower() == "csv μ
λ‘λ":
|
| 67 |
+
message = "CSV νμΌμ μ
λ‘λνλ €λ©΄ λ λ²μ§Έ νμ μ¬μ©νμΈμ."
|
| 68 |
|
| 69 |
# μμ€ν
λ©μμ§μ μ¬μ©μ λ©μμ§ κ²°ν©
|
| 70 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
|
|
|
| 96 |
# CSV νμΌ μ½κΈ° (filepath νμ
μ΄λ―λ‘ fileμ κ²½λ‘ λ¬Έμμ΄)
|
| 97 |
df = pd.read_csv(file)
|
| 98 |
|
| 99 |
+
# CSV νμΌμ μ»¬λΌ νμΈ
|
| 100 |
+
required_columns = {'id', 'text', 'label', 'metadata'}
|
| 101 |
+
available_columns = set(df.columns)
|
| 102 |
+
missing_columns = required_columns - available_columns
|
| 103 |
+
|
| 104 |
+
if missing_columns:
|
| 105 |
+
return f"CSV νμΌμ λ€μ νμ 컬λΌμ΄ λλ½λμμ΅λλ€: {', '.join(missing_columns)}", ""
|
| 106 |
+
|
| 107 |
# λ°μ΄ν° ν΄λ μ§
|
| 108 |
df.drop_duplicates(inplace=True)
|
| 109 |
df.fillna('', inplace=True)
|
|
|
|
| 118 |
# Parquet νμΌ λ‘λ
|
| 119 |
parquet_content = load_parquet(parquet_filename)
|
| 120 |
|
| 121 |
+
# κΈλ‘λ² λ³μ μ
λ°μ΄νΈ
|
| 122 |
+
global current_parquet_content
|
| 123 |
+
current_parquet_content = parquet_content
|
| 124 |
+
|
| 125 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_content
|
| 126 |
except Exception as e:
|
| 127 |
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", ""
|
| 128 |
|
| 129 |
# Gradio Blocks μΈν°νμ΄μ€ μ€μ
|
| 130 |
+
with gr.Blocks(theme="Nymbo/Nymbo_Theme",) as demo:
|
| 131 |
gr.Markdown("# LLM μλΉμ€ μΈν°νμ΄μ€")
|
| 132 |
+
|
| 133 |
+
with gr.Tab("μ±λ΄"):
|
| 134 |
+
gr.Markdown("### LLMκ³Ό λννκΈ°")
|
| 135 |
+
chat = gr.ChatInterface(
|
| 136 |
+
respond,
|
| 137 |
+
additional_inputs=[
|
| 138 |
+
gr.Textbox(label="System Message", value=""),
|
| 139 |
+
gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens"),
|
| 140 |
+
gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
|
| 141 |
+
gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P"),
|
| 142 |
+
],
|
| 143 |
+
examples=[
|
| 144 |
+
["ν¨μ
μ½λ μ€ν"],
|
| 145 |
+
["UHD μ΄λ―Έμ§ μ½λ μ€ν"],
|
| 146 |
+
["MixGEN μ½λ μ€ν"],
|
| 147 |
+
["test.parquet μ€ν"], # μλ‘μ΄ μμ μΆκ°
|
| 148 |
+
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"],
|
| 149 |
+
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."],
|
| 150 |
+
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"],
|
| 151 |
+
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"],
|
| 152 |
+
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."],
|
| 153 |
+
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"],
|
| 154 |
+
],
|
| 155 |
+
theme="Nymbo/Nymbo_Theme",
|
| 156 |
+
cache_examples=False, # μΊμ± λΉνμ±ν μ€μ
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
with gr.Tab("λ°μ΄ν° λ³ν"):
|
| 160 |
+
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν")
|
| 161 |
+
with gr.Row():
|
| 162 |
+
with gr.Column():
|
| 163 |
+
csv_file = gr.File(label="CSV νμΌ μ
λ‘λ", type="filepath")
|
| 164 |
+
upload_button = gr.Button("μ
λ‘λ λ° λ³ν")
|
| 165 |
+
upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False)
|
| 166 |
+
parquet_preview = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°")
|
| 167 |
+
|
| 168 |
+
# μ
λ‘λ λ²νΌ ν΄λ¦ μ μ€νν ν¨μ
|
| 169 |
+
upload_button.click(
|
| 170 |
+
upload_csv,
|
| 171 |
+
inputs=csv_file,
|
| 172 |
+
outputs=[upload_status, parquet_preview]
|
| 173 |
+
)
|
| 174 |
|
| 175 |
+
gr.Markdown("### κΈ°μ‘΄ Parquet νμΌ")
|
| 176 |
+
gr.Markdown(f"**test.parquet νμΌ λ΄μ©:**\n```markdown\n{test_parquet_content}\n```")
|
| 177 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
gr.Markdown("## μ£Όμ μ¬ν")
|
| 179 |
gr.Markdown("""
|
| 180 |
- **CSV μ
λ‘λ**: CSV νμΌμ μ
λ‘λνλ©΄ μλμΌλ‘ Parquet νμΌλ‘ λ³νλ©λλ€.
|
| 181 |
- **Parquet 미리보기**: μ
λ‘λλ Parquet νμΌμ 첫 10κ° νμ΄ λ―Έλ¦¬λ³΄κΈ°λ‘ νμλ©λλ€.
|
| 182 |
- **LLMκ³Όμ λν**: λ³νλ Parquet νμΌ λ΄μ©μ κΈ°λ°μΌλ‘ LLMμ΄ μλ΅μ μμ±ν©λλ€.
|
| 183 |
""")
|
| 184 |
+
|
| 185 |
gr.Markdown("### Gradio μΈν°νμ΄μ€λ₯Ό μ¬μ©νμ¬ LLM λͺ¨λΈκ³Ό μνΈμμ©νμΈμ!")
|
| 186 |
|
| 187 |
if __name__ == "__main__":
|