|
import gradio as gr |
|
from llama_cpp import Llama |
|
import requests |
|
|
|
|
|
MODELS = { |
|
"Llama-3.2-3B": { |
|
"repo_id": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF", |
|
"filename": "*Q4_K_M.gguf" |
|
}, |
|
"Llama-3.2-1.5B": { |
|
"repo_id": "lmstudio-community/Llama-3.2-1B-Instruct-GGUF", |
|
"filename": "*Q4_K_M.gguf" |
|
} |
|
} |
|
|
|
|
|
current_model = None |
|
|
|
def load_model(model_name): |
|
global current_model |
|
model_info = MODELS[model_name] |
|
current_model = Llama.from_pretrained( |
|
repo_id=model_info["repo_id"], |
|
filename=model_info["filename"], |
|
verbose=True, |
|
n_ctx=32768, |
|
n_threads=2, |
|
chat_format="chatml" |
|
) |
|
return current_model |
|
|
|
|
|
current_model = load_model(list(MODELS.keys())[0]) |
|
|
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
model_name, |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
): |
|
global current_model |
|
|
|
|
|
if current_model is None or model_name not in str(current_model.model_path): |
|
current_model = load_model(model_name) |
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
for val in history: |
|
if val[0]: |
|
messages.append({"role": "user", "content": val[0]}) |
|
if val[1]: |
|
messages.append({"role": "assistant", "content": val[1]}) |
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
response = "" |
|
response = current_model.create_chat_completion( |
|
messages=messages, |
|
stream=True, |
|
max_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p |
|
) |
|
message_repl = "" |
|
for chunk in response: |
|
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: |
|
message_repl = message_repl + \ |
|
chunk['choices'][0]["delta"]["content"] |
|
yield message_repl |
|
|
|
def get_chat_title(model_name): |
|
return f"{model_name} - load other model in advanced settings" |
|
|
|
demo = gr.ChatInterface( |
|
respond, |
|
title=get_chat_title(list(MODELS.keys())[0]), |
|
description="GGUF is popular format, try models locclay in: [LM Studio AI for PC](https://lmstudio.ai) | PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai)) on Tablet or Mobile", |
|
additional_inputs=[ |
|
gr.Dropdown( |
|
choices=list(MODELS.keys()), |
|
value=list(MODELS.keys())[0], |
|
label="Select Model", |
|
interactive=True, |
|
allow_custom_value=False, |
|
elem_id="model_selector", |
|
show_label=True |
|
), |
|
gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
|
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p (nucleus sampling)", |
|
), |
|
], |
|
theme=gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
), |
|
css=""" |
|
.message-wrap { |
|
border: 1px solid #e0e0e0; |
|
border-radius: 8px; |
|
padding: 8px; |
|
margin: 8px 0; |
|
} |
|
#component-0, #component-1 { |
|
border: 4px solid #2196F3; |
|
border-radius: 12px; |
|
padding: 15px; |
|
background-color: #E3F2FD; |
|
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3); |
|
margin: 10px 0; |
|
} |
|
#component-0:focus-within, #component-1:focus-within { |
|
border-color: #1976D2; |
|
box-shadow: 0 0 15px rgba(33, 150, 243, 0.5); |
|
background-color: #BBDEFB; |
|
} |
|
.input-container, .gradio-container .input-container { |
|
border: 4px solid #2196F3; |
|
border-radius: 12px; |
|
padding: 15px; |
|
background-color: #E3F2FD; |
|
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3); |
|
margin: 10px 0; |
|
} |
|
.input-container textarea, .input-container input[type="text"] { |
|
background-color: #E3F2FD; |
|
border: 2px solid #2196F3; |
|
border-radius: 8px; |
|
padding: 10px; |
|
} |
|
.input-container textarea:focus, .input-container input[type="text"]:focus { |
|
background-color: #BBDEFB; |
|
border-color: #1976D2; |
|
outline: none; |
|
} |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |