|
import gradio as gr |
|
from llama_cpp import Llama |
|
import requests |
|
|
|
llm = Llama.from_pretrained( |
|
repo_id="lmstudio-community/Llama-3.2-3B-Instruct-GGUF", |
|
filename="*Q4_K_M.gguf", |
|
verbose=True, |
|
n_ctx=32768, |
|
n_threads=2, |
|
chat_format="chatml" |
|
) |
|
|
|
def respond( |
|
message, |
|
history: list[tuple[str, str]], |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
): |
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
for val in history: |
|
if val[0]: |
|
messages.append({"role": "user", "content": val[0]}) |
|
if val[1]: |
|
messages.append({"role": "assistant", "content": val[1]}) |
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
response = "" |
|
response = llm.create_chat_completion( |
|
messages=messages, |
|
stream=True, |
|
max_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p |
|
) |
|
message_repl = "" |
|
for chunk in response: |
|
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: |
|
message_repl = message_repl + \ |
|
chunk['choices'][0]["delta"]["content"] |
|
yield message_repl |
|
""" |
|
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface |
|
""" |
|
demo = gr.ChatInterface( |
|
respond, |
|
title="GGUF is popular format on PC in LM Studio or on Tablet/Mobile in PocketPal APPs", |
|
description="Try models locclay in: 🖥️ [LM Studio AI for PC](https://lmstudio.ai) | 📱 PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai))", |
|
additional_inputs=[ |
|
gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
|
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p (nucleus sampling)", |
|
), |
|
], |
|
theme=gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
), |
|
css=""" |
|
.message-wrap { |
|
border: 1px solid #e0e0e0; |
|
border-radius: 8px; |
|
padding: 8px; |
|
margin: 8px 0; |
|
} |
|
#component-0, #component-1 { |
|
border: 4px solid #2196F3; |
|
border-radius: 12px; |
|
padding: 15px; |
|
background-color: #E3F2FD; |
|
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3); |
|
margin: 10px 0; |
|
} |
|
#component-0:focus-within, #component-1:focus-within { |
|
border-color: #1976D2; |
|
box-shadow: 0 0 15px rgba(33, 150, 243, 0.5); |
|
background-color: #BBDEFB; |
|
} |
|
.input-container, .gradio-container .input-container { |
|
border: 4px solid #2196F3; |
|
border-radius: 12px; |
|
padding: 15px; |
|
background-color: #E3F2FD; |
|
box-shadow: 0 0 10px rgba(33, 150, 243, 0.3); |
|
margin: 10px 0; |
|
} |
|
.input-container textarea, .input-container input[type="text"] { |
|
background-color: #E3F2FD; |
|
border: 2px solid #2196F3; |
|
border-radius: 8px; |
|
padding: 10px; |
|
} |
|
.input-container textarea:focus, .input-container input[type="text"]:focus { |
|
background-color: #BBDEFB; |
|
border-color: #1976D2; |
|
outline: none; |
|
} |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |