AIML_QA_Demo / app.py
GSridhar1982's picture
Update app.py
a858e38 verified
raw
history blame
974 Bytes
import gradio as gr
from llama_cpp import Llama
# Load the Llama model
llm = Llama.from_pretrained(
repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",
filename="QA_llama31_unsloth.Q4_K_M.gguf",
)
def generate_response(user_input):
# Perform inference
response = llm.create_chat_completion(
messages=[
{
"role": "user",
"content": user_input
}
]
)
# Extract the model's reply
model_reply = response['choices'][0]['message']['content']
return model_reply
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),
outputs="text",
title="AIML Q&A Chatbot",
description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
)
# Launch the app
iface.launch()