File size: 2,304 Bytes
5fd0c28
01945bd
2936c26
01945bd
2936c26
932195b
2936c26
 
932195b
01945bd
932195b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01945bd
 
 
 
 
 
def541d
01945bd
 
ded9e09
01945bd
def541d
 
ded9e09
01945bd
 
ded9e09
01945bd
ded9e09
 
 
 
def541d
01945bd
 
ded9e09
01945bd
 
 
def541d
01945bd
 
 
 
 
 
 
 
 
 
 
 
 
def541d
01945bd
2af305a
01945bd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Model identifier from Hugging Face
adapter_repo = "Mat17892/lora_llama_gguf_g14"  # Hugging Face model ID

# Download the GGUF file from Hugging Face
lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")

from huggingface_hub import hf_hub_download

# Download the base model GGUF file
base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")


# Load the base model
print("Loading base model...")
llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8)

# Apply the LoRA adapter
print("Applying LoRA adapter...")
llm.load_adapter(adapter_path=lora_adapter_path)

print("Model ready with LoRA adapter!")

# Chat function
def chat_with_model(user_input, chat_history):
    """
    Process user input and generate a response from the model.
    :param user_input: User's input string
    :param chat_history: List of [user_message, ai_response] pairs
    :return: Updated chat history
    """
    # Construct the prompt from chat history
    prompt = ""
    for user, ai in chat_history:
        prompt += f"User: {user}\nAI: {ai}\n"
    prompt += f"User: {user_input}\nAI:"  # Add the latest user input

    # Generate response from the model
    raw_response = llm(prompt)["choices"][0]["text"].strip()

    # Clean the response (remove extra tags, if any)
    response = raw_response.split("User:")[0].strip()

    # Update chat history with the new turn
    chat_history.append((user_input, response))
    return chat_history, chat_history


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🦙 LLaMA GGUF Chatbot")
    chatbot = gr.Chatbot(label="Chat with the GGUF Model")

    with gr.Row():
        with gr.Column(scale=4):
            user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
        with gr.Column(scale=1):
            submit_btn = gr.Button("Send")

    chat_history = gr.State([])

    # Link components
    submit_btn.click(
        chat_with_model,
        inputs=[user_input, chat_history],
        outputs=[chatbot, chat_history],
        show_progress=True,
    )

# Launch the app
demo.launch()