File size: 4,996 Bytes
05e5eb9 bba907b 05e5eb9 de7b824 5d478c9 de7b824 5d478c9 154242e 5d478c9 de7b824 001fc24 5d478c9 001fc24 de7b824 05e5eb9 bba907b 561eefd de7b824 bba907b de7b824 30987fc de7b824 001fc24 16ba14a bba907b 561eefd bba907b 16ba14a bba907b 16ba14a de7b824 16ba14a bba907b 16ba14a bba907b e87d0c7 30987fc 5d478c9 30987fc bba907b 30987fc bd2c03e bba907b de7b824 f45f99c 154242e bd2c03e de7b824 bba907b 05e5eb9 bba907b 561eefd f6ad64a d7bd4a2 d61c83a b0d1b78 d61c83a d7bd4a2 d61c83a b0d1b78 f6ad64a ee5980f f6ad64a bba907b 05e5eb9 bba907b e87d0c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import gradio as gr
from llama_cpp import Llama
import requests
# Define available models
MODELS = {
"Llama-3.2-3B": {
"repo_id": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF",
"filename": "*Q4_K_M.gguf",
"chat_format": "chatml"
},
"Llama-3.2-5B": {
"repo_id": "lmstudio-community/Llama-3.2-1B-Instruct-GGUF",
"filename": "*Q4_K_M.gguf",
"chat_format": "chatml"
}
}
# Initialize with default model
current_model = None
def load_model(model_name):
global current_model
model_info = MODELS[model_name]
current_model = Llama.from_pretrained(
repo_id=model_info["repo_id"],
filename=model_info["filename"],
verbose=True,
n_ctx=32768,
n_threads=2,
chat_format=model_info["chat_format"]
)
return current_model
# Initialize with first model
current_model = load_model(list(MODELS.keys())[0])
def respond(
message,
history,
model_name,
system_message,
max_tokens,
temperature,
top_p,
):
global current_model
# Load new model if changed
if current_model is None or model_name not in str(current_model.model_path):
current_model = load_model(model_name)
# Start with system message
messages = []
if system_message and system_message.strip():
messages.append({"role": "system", "content": system_message})
# Convert history to messages format
for msg in history:
if isinstance(msg, tuple):
if msg[0]:
messages.append({"role": "user", "content": msg[0]})
if msg[1]:
messages.append({"role": "assistant", "content": msg[1]})
else:
messages.append(msg)
# Add current message
messages.append({"role": "user", "content": message})
# Generate response
response = current_model.create_chat_completion(
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
message_repl = ""
for chunk in response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
message_repl = message_repl + chunk['choices'][0]["delta"]["content"]
yield message_repl
def get_chat_title(model_name):
return f"{model_name} < - Load different model in Additional Inputs"
demo = gr.ChatInterface(
respond,
title=get_chat_title(list(MODELS.keys())[0]),
description="GGUF is popular format, try models locclay in: [LM Studio AI for PC](https://lmstudio.ai) | PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai)) on Tablet or Mobile",
additional_inputs=[
gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="Select Model",
interactive=True,
allow_custom_value=False,
elem_id="model_selector",
show_label=True
),
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
chatbot=gr.Chatbot(
value=[],
type="messages",
label="Chat Messages"
),
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
),
css="""
.message-wrap {
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 8px;
margin: 8px 0;
}
#component-0, #component-1 {
border: 4px solid #2196F3;
border-radius: 12px;
padding: 15px;
background-color: #E3F2FD;
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
margin: 10px 0;
}
#component-0:focus-within, #component-1:focus-within {
border-color: #1976D2;
box-shadow: 0 0 15px rgba(33, 150, 243, 0.5);
background-color: #BBDEFB;
}
.input-container, .gradio-container .input-container {
border: 4px solid #2196F3;
border-radius: 12px;
padding: 15px;
background-color: #E3F2FD;
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
margin: 10px 0;
}
.input-container textarea, .input-container input[type="text"] {
background-color: #E3F2FD;
border: 2px solid #2196F3;
border-radius: 8px;
padding: 10px;
}
.input-container textarea:focus, .input-container input[type="text"]:focus {
background-color: #BBDEFB;
border-color: #1976D2;
outline: none;
}
"""
)
if __name__ == "__main__":
demo.launch() |