expandme's picture
Fixing model size to 1B ? - What wind.surf will do ?
154242e
raw
history blame
4.59 kB
import gradio as gr
from llama_cpp import Llama
import requests
# Define available models
MODELS = {
"Llama-3.2-3B": {
"repo_id": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF",
"filename": "*Q4_K_M.gguf"
},
"Llama-3.2-1.5B": {
"repo_id": "lmstudio-community/Llama-3.2-1B-Instruct-GGUF",
"filename": "*Q4_K_M.gguf"
}
}
# Initialize with default model
current_model = None
def load_model(model_name):
global current_model
model_info = MODELS[model_name]
current_model = Llama.from_pretrained(
repo_id=model_info["repo_id"],
filename=model_info["filename"],
verbose=True,
n_ctx=32768,
n_threads=2,
chat_format="chatml"
)
return current_model
# Initialize with first model
current_model = load_model(list(MODELS.keys())[0])
def respond(
message,
history: list[tuple[str, str]],
model_name,
system_message,
max_tokens,
temperature,
top_p,
):
global current_model
# Load new model if changed
if current_model is None or model_name not in str(current_model.model_path):
current_model = load_model(model_name)
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
response = current_model.create_chat_completion(
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
message_repl = ""
for chunk in response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
message_repl = message_repl + \
chunk['choices'][0]["delta"]["content"]
yield message_repl
def get_chat_title(model_name):
return f"{model_name} - load other model in advanced settings"
demo = gr.ChatInterface(
respond,
title=get_chat_title(list(MODELS.keys())[0]),
description="GGUF is popular format, try models locclay in: [LM Studio AI for PC](https://lmstudio.ai) | PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai)) on Tablet or Mobile",
additional_inputs=[
gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="Select Model",
interactive=True,
allow_custom_value=False,
elem_id="model_selector",
show_label=True
),
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
),
css="""
.message-wrap {
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 8px;
margin: 8px 0;
}
#component-0, #component-1 {
border: 4px solid #2196F3;
border-radius: 12px;
padding: 15px;
background-color: #E3F2FD;
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
margin: 10px 0;
}
#component-0:focus-within, #component-1:focus-within {
border-color: #1976D2;
box-shadow: 0 0 15px rgba(33, 150, 243, 0.5);
background-color: #BBDEFB;
}
.input-container, .gradio-container .input-container {
border: 4px solid #2196F3;
border-radius: 12px;
padding: 15px;
background-color: #E3F2FD;
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
margin: 10px 0;
}
.input-container textarea, .input-container input[type="text"] {
background-color: #E3F2FD;
border: 2px solid #2196F3;
border-radius: 8px;
padding: 10px;
}
.input-container textarea:focus, .input-container input[type="text"]:focus {
background-color: #BBDEFB;
border-color: #1976D2;
outline: none;
}
"""
)
if __name__ == "__main__":
demo.launch()