File size: 4,510 Bytes
05e5eb9 bba907b 05e5eb9 de7b824 05e5eb9 bba907b de7b824 bba907b de7b824 bba907b de7b824 bba907b de7b824 bba907b e53d175 de7b824 bba907b de7b824 bba907b 05e5eb9 bba907b f6ad64a d7bd4a2 d61c83a b0d1b78 d61c83a d7bd4a2 d61c83a b0d1b78 f6ad64a ee5980f f6ad64a bba907b 05e5eb9 bba907b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
from llama_cpp import Llama
import requests
# Define available models
MODELS = {
"Llama-3.2-3B": {
"repo_id": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF",
"filename": "*Q4_K_M.gguf"
},
"Llama-3.2-1.5B": {
"repo_id": "lmstudio-community/Llama-3.2-1.5B-Instruct-GGUF",
"filename": "*Q4_K_M.gguf"
}
}
# Initialize with default model
current_model = None
def load_model(model_name):
global current_model
model_info = MODELS[model_name]
current_model = Llama.from_pretrained(
repo_id=model_info["repo_id"],
filename=model_info["filename"],
verbose=True,
n_ctx=32768,
n_threads=2,
chat_format="chatml"
)
return current_model
# Initialize with first model
current_model = load_model(list(MODELS.keys())[0])
def respond(
message,
history: list[tuple[str, str]],
model_name,
system_message,
max_tokens,
temperature,
top_p,
):
global current_model
# Load new model if changed
if current_model is None or model_name != current_model.model_path:
current_model = load_model(model_name)
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
response = current_model.create_chat_completion(
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
message_repl = ""
for chunk in response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
message_repl = message_repl + \
chunk['choices'][0]["delta"]["content"]
yield message_repl
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
title="GGUF is popular format on PC in LM Studio or on Tablet/Mobile in PocketPal APPs",
description="Try models locclay in: 🖥️ [LM Studio AI for PC](https://lmstudio.ai) | 📱 PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai)) on Tablet or Mobile",
additional_inputs=[
gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="Select Model"
),
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="purple",
),
css="""
.message-wrap {
border: 1px solid #e0e0e0;
border-radius: 8px;
padding: 8px;
margin: 8px 0;
}
#component-0, #component-1 {
border: 4px solid #2196F3;
border-radius: 12px;
padding: 15px;
background-color: #E3F2FD;
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
margin: 10px 0;
}
#component-0:focus-within, #component-1:focus-within {
border-color: #1976D2;
box-shadow: 0 0 15px rgba(33, 150, 243, 0.5);
background-color: #BBDEFB;
}
.input-container, .gradio-container .input-container {
border: 4px solid #2196F3;
border-radius: 12px;
padding: 15px;
background-color: #E3F2FD;
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3);
margin: 10px 0;
}
.input-container textarea, .input-container input[type="text"] {
background-color: #E3F2FD;
border: 2px solid #2196F3;
border-radius: 8px;
padding: 10px;
}
.input-container textarea:focus, .input-container input[type="text"]:focus {
background-color: #BBDEFB;
border-color: #1976D2;
outline: none;
}
"""
)
if __name__ == "__main__":
demo.launch() |