File size: 953 Bytes
2c29d2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr  
from llama_cpp import Llama  
  
# Load the Llama model  
llm = Llama.from_pretrained(  
    repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",  
    filename="GGUF_FILE",  
)  
  
def generate_response(user_input):  
    # Perform inference  
    response = llm.create_chat_completion(  
        messages=[  
            {  
                "role": "user",  
                "content": user_input  
            }  
        ]  
    )  
      
    # Extract the model's reply  
    model_reply = response['choices'][0]['message']['content']  
    return model_reply  
  
# Create a Gradio interface  
iface = gr.Interface(  
    fn=generate_response,  
    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),  
    outputs="text",  
    title="AIML Q&A Chatbot",  
    description="Ask questions related to AIML and get answers from the fine-tuned Llama model."  
)  
  
# Launch the app  
iface.launch()