File size: 1,164 Bytes
4ade08c
d5c72cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ade08c
d5c72cc
 
 
 
4ade08c
 
 
d5c72cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

def load_model():
    # Download the model from HuggingFace
    repo_id = "forestav/gguf_lora_model"
    model_file = "unsloth.F16.gguf"  
    
    local_path = hf_hub_download(
        repo_id=repo_id,
        filename=model_file
    )
    
    # Initialize the model
    model = Llama(
        model_path=local_path,
        n_ctx=2048,
        n_threads=8
    )
    
    return model

def generate_response(message, history):
    # Generate response
    response = model.create_chat_completion(
        messages=[
            {"role": "user", "content": message}
        ],
        max_tokens=512,
        temperature=0.7,
        top_p=0.95,
    )
    
    return response['choices'][0]['message']['content']

# Load model globally
model = load_model()

# Create Gradio interface with updated parameters
demo = gr.ChatInterface(
    fn=generate_response,
    title="Your GGUF Model Chat",
    description="A conversational AI model using GGUF format",
    examples=["Continue the fibonacci sequence: 1, 1, 2, 3, 5, 8,"]
)

if __name__ == "__main__":
    demo.launch()