File size: 2,304 Bytes
5fd0c28 01945bd 2936c26 01945bd 2936c26 932195b 2936c26 932195b 01945bd 932195b 01945bd def541d 01945bd ded9e09 01945bd def541d ded9e09 01945bd ded9e09 01945bd ded9e09 def541d 01945bd ded9e09 01945bd def541d 01945bd def541d 01945bd 2af305a 01945bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Model identifier from Hugging Face
adapter_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID
# Download the GGUF file from Hugging Face
lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")
from huggingface_hub import hf_hub_download
# Download the base model GGUF file
base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")
# Load the base model
print("Loading base model...")
llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8)
# Apply the LoRA adapter
print("Applying LoRA adapter...")
llm.load_adapter(adapter_path=lora_adapter_path)
print("Model ready with LoRA adapter!")
# Chat function
def chat_with_model(user_input, chat_history):
"""
Process user input and generate a response from the model.
:param user_input: User's input string
:param chat_history: List of [user_message, ai_response] pairs
:return: Updated chat history
"""
# Construct the prompt from chat history
prompt = ""
for user, ai in chat_history:
prompt += f"User: {user}\nAI: {ai}\n"
prompt += f"User: {user_input}\nAI:" # Add the latest user input
# Generate response from the model
raw_response = llm(prompt)["choices"][0]["text"].strip()
# Clean the response (remove extra tags, if any)
response = raw_response.split("User:")[0].strip()
# Update chat history with the new turn
chat_history.append((user_input, response))
return chat_history, chat_history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🦙 LLaMA GGUF Chatbot")
chatbot = gr.Chatbot(label="Chat with the GGUF Model")
with gr.Row():
with gr.Column(scale=4):
user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
with gr.Column(scale=1):
submit_btn = gr.Button("Send")
chat_history = gr.State([])
# Link components
submit_btn.click(
chat_with_model,
inputs=[user_input, chat_history],
outputs=[chatbot, chat_history],
show_progress=True,
)
# Launch the app
demo.launch()
|