Spaces:
Sleeping
Sleeping
File size: 953 Bytes
2c29d2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
from llama_cpp import Llama
# Load the Llama model
llm = Llama.from_pretrained(
repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",
filename="GGUF_FILE",
)
def generate_response(user_input):
# Perform inference
response = llm.create_chat_completion(
messages=[
{
"role": "user",
"content": user_input
}
]
)
# Extract the model's reply
model_reply = response['choices'][0]['message']['content']
return model_reply
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),
outputs="text",
title="AIML Q&A Chatbot",
description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
)
# Launch the app
iface.launch()
|