|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import os |
|
import pandas as pd |
|
from typing import List, Dict, Tuple |
|
|
|
|
|
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")) |
|
|
|
def load_code(filename: str) -> str: |
|
try: |
|
with open(filename, 'r', encoding='utf-8') as file: |
|
return file.read() |
|
except FileNotFoundError: |
|
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€." |
|
except Exception as e: |
|
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
|
|
def load_parquet(filename: str) -> str: |
|
try: |
|
df = pd.read_parquet(filename, engine='pyarrow') |
|
return df.head(10).to_markdown(index=False) |
|
except FileNotFoundError: |
|
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€." |
|
except Exception as e: |
|
return f"νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
|
|
|
|
fashion_code = load_code('fashion.cod') |
|
uhdimage_code = load_code('uhdimage.cod') |
|
MixGEN_code = load_code('mgen.cod') |
|
|
|
|
|
test_parquet_content = load_parquet('test.parquet') |
|
|
|
def respond( |
|
message: str, |
|
history: List[Dict[str, str]], |
|
system_message: str = "", |
|
max_tokens: int = 4000, |
|
temperature: float = 0.7, |
|
top_p: float = 0.9, |
|
) -> str: |
|
|
|
system_prefix = """λ°λμ νκΈλ‘ λ΅λ³ν κ². λλ μ£Όμ΄μ§ μμ€μ½λλ₯Ό κΈ°λ°μΌλ‘ "μλΉμ€ μ¬μ© μ€λͺ
λ° μλ΄, Q&Aλ₯Ό νλ μν μ΄λ€". μμ£Ό μΉμ νκ³ μμΈνκ² 4000ν ν° μ΄μ Markdown νμμΌλ‘ μμ±νλΌ. λλ μ½λλ₯Ό κΈ°λ°μΌλ‘ μ¬μ© μ€λͺ
λ° μ§μ μλ΅μ μ§ννλ©°, μ΄μ©μμκ² λμμ μ£Όμ΄μΌ νλ€. μ΄μ©μκ° κΆκΈν΄ ν λ§ν λ΄μ©μ μΉμ νκ² μλ €μ£Όλλ‘ νλΌ. μ½λ μ 체 λ΄μ©μ λν΄μλ 보μμ μ μ§νκ³ , ν€ κ° λ° μλν¬μΈνΈμ ꡬ체μ μΈ λͺ¨λΈμ 곡κ°νμ§ λ§λΌ.""" |
|
|
|
|
|
if message.lower() == "ν¨μ
μ½λ μ€ν": |
|
system_prefix += f"\n\nν¨μ
μ½λ λ΄μ©:\n```python\n{fashion_code}\n```" |
|
message = "ν¨μ
κ°μνΌν
μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://aiqcamp-fash.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ." |
|
elif message.lower() == "uhd μ΄λ―Έμ§ μ½λ μ€ν": |
|
system_prefix += f"\n\nUHD μ΄λ―Έμ§ μ½λ λ΄μ©:\n```python\n{uhdimage_code}\n```" |
|
message = "UHD μ΄λ―Έμ§ μμ±μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://openfree-ultpixgen.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ." |
|
elif message.lower() == "mixgen μ½λ μ€ν": |
|
system_prefix += f"\n\nMixGEN μ½λ λ΄μ©:\n```python\n{MixGEN_code}\n```" |
|
message = "MixGEN3 μ΄λ―Έμ§ μμ±μ λν λ΄μ©μ νμ΅νμκ³ , μ€λͺ
ν μ€λΉκ° λμ΄μλ€κ³ μλ¦¬κ³ μλΉμ€ URL(https://openfree-mixgen3.hf.space)μ ν΅ν΄ ν
μ€νΈ ν΄λ³΄λΌκ³ μΆλ ₯νλΌ." |
|
elif message.lower() == "test.parquet μ€ν": |
|
|
|
parquet_content = "" |
|
for item in history: |
|
if item['role'] == 'assistant' and 'test.parquet νμΌ λ΄μ©' in item['content']: |
|
try: |
|
parquet_content = item['content'].split("```markdown\n")[1].split("\n```")[0] |
|
except IndexError: |
|
parquet_content = "" |
|
break |
|
system_prefix += f"\n\ntest.parquet νμΌ λ΄μ©:\n```markdown\n{parquet_content}\n```" |
|
message = "test.parquet νμΌμ λν λ΄μ©μ νμ΅νμκ³ , κ΄λ ¨ μ€λͺ
λ° Q&Aλ₯Ό μ§νν μ€λΉκ° λμ΄μλ€. κΆκΈν μ μ΄ μμΌλ©΄ λ¬Όμ΄λ³΄λΌ." |
|
elif message.lower() == "csv μ
λ‘λ": |
|
message = "CSV νμΌμ μ
λ‘λνλ €λ©΄ λ λ²μ§Έ νμ μ¬μ©νμΈμ." |
|
|
|
|
|
messages = [{"role": "system", "content": system_prefix}] |
|
for chat in history: |
|
messages.append({"role": chat['role'], "content": chat['content']}) |
|
messages.append({"role": "user", "content": message}) |
|
|
|
response = "" |
|
try: |
|
|
|
for msg in hf_client.chat_completion( |
|
messages, |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
): |
|
token = msg.choices[0].delta.get('content', None) |
|
if token: |
|
response += token |
|
yield response |
|
except Exception as e: |
|
yield f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
|
|
def upload_csv(file_path: str) -> Tuple[str, str]: |
|
try: |
|
|
|
df = pd.read_csv(file_path, sep=',') |
|
|
|
|
|
required_columns = {'id', 'text', 'label', 'metadata'} |
|
available_columns = set(df.columns) |
|
missing_columns = required_columns - available_columns |
|
|
|
if missing_columns: |
|
return f"CSV νμΌμ λ€μ νμ 컬λΌμ΄ λλ½λμμ΅λλ€: {', '.join(missing_columns)}", "" |
|
|
|
|
|
df.drop_duplicates(inplace=True) |
|
df.fillna('', inplace=True) |
|
|
|
|
|
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'}) |
|
|
|
|
|
parquet_filename = os.path.splitext(os.path.basename(file_path))[0] + '.parquet' |
|
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy') |
|
|
|
|
|
parquet_content = load_parquet(parquet_filename) |
|
|
|
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_content |
|
except Exception as e: |
|
return f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "" |
|
|
|
def upload_parquet(file_path: str) -> Tuple[str, str, Dict]: |
|
try: |
|
|
|
df = pd.read_parquet(file_path, engine='pyarrow') |
|
|
|
|
|
parquet_content = df.to_markdown(index=False) |
|
|
|
return "Parquet νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλμμ΅λλ€.", parquet_content, df.to_json() |
|
except Exception as e: |
|
return f"Parquet νμΌ μ
λ‘λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "", {} |
|
|
|
def text_to_parquet(text: str) -> Tuple[str, str, bytes]: |
|
try: |
|
|
|
data = [line.split(',') for line in text.strip().split('\n')] |
|
df = pd.DataFrame(data, columns=['id', 'text', 'label', 'metadata']) |
|
|
|
|
|
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'category', 'metadata': 'string'}) |
|
|
|
|
|
parquet_filename = 'text_to_parquet.parquet' |
|
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy') |
|
|
|
|
|
parquet_content = load_parquet(parquet_filename) |
|
|
|
|
|
with open(parquet_filename, "rb") as f: |
|
data = f.read() |
|
|
|
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³νλμμ΅λλ€.", parquet_content, data |
|
except Exception as e: |
|
return f"ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}", "", b"" |
|
|
|
|
|
css = """ |
|
footer { |
|
visibility: hidden; |
|
} |
|
#chatbot-container, #chatbot-data-upload { |
|
height: 600px; |
|
overflow-y: scroll; |
|
} |
|
#chatbot-container .message, #chatbot-data-upload .message { |
|
font-size: 14px; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo: |
|
gr.Markdown("# LLM μλΉμ€ μΈν°νμ΄μ€") |
|
|
|
|
|
with gr.Tab("μ±λ΄"): |
|
gr.Markdown("### LLMκ³Ό λννκΈ°") |
|
chatbot = gr.Chatbot(label="μ±λ΄", type="messages", elem_id="chatbot-container") |
|
msg = gr.Textbox(label="λ©μμ§ μ
λ ₯", placeholder="μ¬κΈ°μ λ©μμ§λ₯Ό μ
λ ₯νμΈμ...") |
|
send = gr.Button("μ μ‘") |
|
|
|
with gr.Accordion("μμ€ν
ν둬ννΈ λ° μ΅μ
μ€μ ", open=False): |
|
system_message = gr.Textbox(label="System Message", value="λλ AI μ‘°μΈμ μν μ΄λ€.") |
|
max_tokens = gr.Slider(minimum=1, maximum=8000, value=4000, label="Max Tokens") |
|
temperature = gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature") |
|
top_p = gr.Slider(minimum=0, maximum=1, value=0.9, label="Top P") |
|
|
|
|
|
def handle_message(message, history, system_message, max_tokens, temperature, top_p): |
|
history = history or [] |
|
history.append({"role": "user", "content": message}) |
|
try: |
|
|
|
response_gen = respond(message, history, system_message, max_tokens, temperature, top_p) |
|
response = "" |
|
for partial in response_gen: |
|
response = partial |
|
history.append({"role": "assistant", "content": response}) |
|
except Exception as e: |
|
response = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
history.append({"role": "assistant", "content": response}) |
|
return history, "" |
|
|
|
send.click( |
|
handle_message, |
|
inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p], |
|
outputs=[chatbot, msg] |
|
) |
|
|
|
|
|
with gr.Accordion("μμ ", open=False): |
|
gr.Examples( |
|
examples=[ |
|
["ν¨μ
μ½λ μ€ν"], |
|
["UHD μ΄λ―Έμ§ μ½λ μ€ν"], |
|
["MixGEN μ½λ μ€ν"], |
|
["test.parquet μ€ν"], |
|
["μμΈν μ¬μ© λ°©λ²μ λ§μΉ νλ©΄μ 보면μ μ€λͺ
νλ―μ΄ 4000 ν ν° μ΄μ μμΈν μ€λͺ
νλΌ"], |
|
["FAQ 20건μ μμΈνκ² μμ±νλΌ. 4000ν ν° μ΄μ μ¬μ©νλΌ."], |
|
["μ¬μ© λ°©λ²κ³Ό μ°¨λ³μ , νΉμ§, κ°μ μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μ νλΈ μμ μ€ν¬λ¦½νΈ ννλ‘ μμ±νλΌ"], |
|
["λ³Έ μλΉμ€λ₯Ό SEO μ΅μ ννμ¬ λΈλ‘κ·Έ ν¬μ€νΈ(λ°°κ²½ λ° νμμ±, κΈ°μ‘΄ μ μ¬ μλΉμ€μ λΉκ΅νμ¬ νΉμ₯μ , νμ©μ², κ°μΉ, κΈ°λν¨κ³Ό, κ²°λ‘ μ ν¬ν¨)λ‘ 4000 ν ν° μ΄μ μμ±νλΌ"], |
|
["νΉν μΆμμ νμ©ν κΈ°μ λ° λΉμ¦λμ€λͺ¨λΈ μΈ‘λ©΄μ ν¬ν¨νμ¬ νΉν μΆμμ ꡬμ±μ λ§κ² νμ μ μΈ μ°½μ λ°λͺ
λ΄μ©μ μ€μ¬μΌλ‘ 4000 ν ν° μ΄μ μμ±νλΌ."], |
|
["κ³μ μ΄μ΄μ λ΅λ³νλΌ"], |
|
], |
|
inputs=msg, |
|
label="μμ μ ν", |
|
) |
|
|
|
|
|
with gr.Tab("λ°μ΄ν° λ³ν"): |
|
gr.Markdown("### CSV νμΌ μ
λ‘λ λ° Parquet λ³ν") |
|
with gr.Row(): |
|
with gr.Column(): |
|
csv_file = gr.File(label="CSV νμΌ μ
λ‘λ", type="filepath") |
|
upload_button = gr.Button("μ
λ‘λ λ° λ³ν") |
|
upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False) |
|
parquet_preview = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°") |
|
download_button = gr.Download(label="Parquet νμΌ λ€μ΄λ‘λ") |
|
|
|
def handle_csv_upload(file_path: str) -> Tuple[str, str, Tuple[bytes, str]]: |
|
message, parquet_filename = upload_csv(file_path) |
|
if parquet_filename: |
|
|
|
with open(parquet_filename, "rb") as f: |
|
data = f.read() |
|
filename = os.path.basename(parquet_filename) |
|
return message, parquet_filename, (data, filename) |
|
else: |
|
return message, "", None |
|
|
|
upload_button.click( |
|
handle_csv_upload, |
|
inputs=csv_file, |
|
outputs=[upload_status, parquet_preview, download_button] |
|
) |
|
|
|
gr.Markdown("### κΈ°μ‘΄ Parquet νμΌ") |
|
gr.Markdown(f"**test.parquet νμΌ λ΄μ©:**\n```markdown\n{test_parquet_content}\n```") |
|
|
|
|
|
with gr.Tab("μ±λ΄ λ°μ΄ν° μ
λ‘λ"): |
|
gr.Markdown("### Parquet νμΌ μ
λ‘λ λ° μ§λ¬ΈνκΈ°") |
|
with gr.Row(): |
|
with gr.Column(): |
|
parquet_upload = gr.File(label="Parquet νμΌ μ
λ‘λ", type="filepath") |
|
parquet_upload_button = gr.Button("μ
λ‘λ") |
|
parquet_upload_status = gr.Textbox(label="μ
λ‘λ μν", interactive=False) |
|
parquet_preview_chat = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°") |
|
parquet_data_state = gr.State() |
|
|
|
def handle_parquet_upload(file_path: str) -> Tuple[str, str, Dict]: |
|
message, parquet_content, parquet_json = upload_parquet(file_path) |
|
if parquet_json: |
|
return message, parquet_content, parquet_json |
|
else: |
|
return message, "", {} |
|
|
|
parquet_upload_button.click( |
|
handle_parquet_upload, |
|
inputs=parquet_upload, |
|
outputs=[parquet_upload_status, parquet_preview_chat, parquet_data_state] |
|
) |
|
|
|
gr.Markdown("### LLMκ³Ό λννκΈ°") |
|
chatbot_data_upload = gr.Chatbot(label="μ±λ΄ λ°μ΄ν° μ
λ‘λ", type="messages", elem_id="chatbot-data-upload") |
|
msg_data_upload = gr.Textbox(label="λ©μμ§ μ
λ ₯", placeholder="μ¬κΈ°μ λ©μμ§λ₯Ό μ
λ ₯νμΈμ...") |
|
send_data_upload = gr.Button("μ μ‘") |
|
|
|
def handle_message_data_upload(message: str, history: List[Dict[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float, parquet_data: Dict) -> Tuple[List[Dict[str, str]], str]: |
|
history = history or [] |
|
history.append({"role": "user", "content": message}) |
|
try: |
|
|
|
response_gen = respond(message, history, system_message, max_tokens, temperature, top_p) |
|
response = "" |
|
for partial in response_gen: |
|
response = partial |
|
history.append({"role": "assistant", "content": response}) |
|
except Exception as e: |
|
response = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" |
|
history.append({"role": "assistant", "content": response}) |
|
return history, "" |
|
|
|
send_data_upload.click( |
|
handle_message_data_upload, |
|
inputs=[msg_data_upload, chatbot_data_upload, system_message, max_tokens, temperature, top_p, parquet_data_state], |
|
outputs=[chatbot_data_upload, msg_data_upload] |
|
) |
|
|
|
|
|
with gr.Tab("ν
μ€νΈ to csv to parquet λ³ν"): |
|
gr.Markdown("### ν
μ€νΈλ₯Ό μ
λ ₯νλ©΄ CSVλ‘ λ³ν ν ParquetμΌλ‘ μλ μ νλ©λλ€.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
text_input = gr.Textbox( |
|
label="ν
μ€νΈ μ
λ ₯ (κ° νμ `id,text,label,metadata` νμμΌλ‘ μ
λ ₯)", |
|
lines=10, |
|
placeholder="μ: 1,Sample Text,Label1,Metadata1\n2,Another Text,Label2,Metadata2" |
|
) |
|
convert_button = gr.Button("λ³ν λ° λ€μ΄λ‘λ") |
|
convert_status = gr.Textbox(label="λ³ν μν", interactive=False) |
|
parquet_preview_convert = gr.Markdown(label="Parquet νμΌ λ―Έλ¦¬λ³΄κΈ°") |
|
download_parquet_convert = gr.Download(label="Parquet νμΌ λ€μ΄λ‘λ") |
|
|
|
def handle_text_to_parquet(text: str) -> Tuple[str, str, Tuple[bytes, str]]: |
|
message, parquet_content, file_data = text_to_parquet(text) |
|
if file_data: |
|
filename = 'text_to_parquet.parquet' |
|
return message, parquet_content, (file_data, filename) |
|
else: |
|
return message, "", None |
|
|
|
convert_button.click( |
|
handle_text_to_parquet, |
|
inputs=text_input, |
|
outputs=[convert_status, parquet_preview_convert, download_parquet_convert] |
|
) |
|
|
|
|
|
gr.Markdown("## μ£Όμ μ¬ν") |
|
gr.Markdown(""" |
|
- **CSV μ
λ‘λ**: CSV νμΌμ μ
λ‘λνλ©΄ μλμΌλ‘ Parquet νμΌλ‘ λ³νλ©λλ€. CSV νμΌμ λ°λμ **μ½€λ§(`,`)**λ‘ κ΅¬λΆλμ΄μΌ ν©λλ€. |
|
- **Parquet 미리보기**: μ
λ‘λλ Parquet νμΌμ 첫 10κ° νμ΄ λ―Έλ¦¬λ³΄κΈ°λ‘ νμλ©λλ€. |
|
- **LLMκ³Όμ λν**: λ³νλ Parquet νμΌ λ΄μ©μ κΈ°λ°μΌλ‘ LLMμ΄ μλ΅μ μμ±ν©λλ€. |
|
- **Parquet λ€μ΄λ‘λ**: λ³νλ Parquet νμΌμ λ€μ΄λ‘λνλ €λ©΄ λ³νλ νμΌ μμ λ€μ΄λ‘λ λ§ν¬λ₯Ό ν΄λ¦νμΈμ. |
|
- **μ±λ΄ λ°μ΄ν° μ
λ‘λ**: μ±λ΄ λ°μ΄ν° μ
λ‘λ νμμ Parquet νμΌμ μ
λ‘λνλ©΄ ν΄λΉ λ°μ΄ν°λ₯Ό κΈ°λ°μΌλ‘ μ§λ¬Έκ³Ό λ΅λ³μ μ§νν μ μμ΅λλ€. |
|
- **ν
μ€νΈ to csv to parquet**: λ€ λ²μ§Έ νμμ ν
μ€νΈλ₯Ό μ
λ ₯νλ©΄ μλμΌλ‘ CSVλ‘ λ³νλκ³ , λ€μ Parquet νμΌλ‘ μ νλμ΄ λ€μ΄λ‘λν μ μμ΅λλ€. |
|
""") |
|
|
|
gr.Markdown("### Gradio μΈν°νμ΄μ€λ₯Ό μ¬μ©νμ¬ LLM λͺ¨λΈκ³Ό μνΈμμ©νμΈμ!") |
|
|
|
if __name__ == "__main__": |
|
|
|
demo.launch() |
|
|