Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
# Load the Llama model | |
llm = Llama.from_pretrained( | |
repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF", | |
filename="GGUF_FILE", | |
) | |
def generate_response(user_input): | |
# Perform inference | |
response = llm.create_chat_completion( | |
messages=[ | |
{ | |
"role": "user", | |
"content": user_input | |
} | |
] | |
) | |
# Extract the model's reply | |
model_reply = response['choices'][0]['message']['content'] | |
return model_reply | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."), | |
outputs="text", | |
title="AIML Q&A Chatbot", | |
description="Ask questions related to AIML and get answers from the fine-tuned Llama model." | |
) | |
# Launch the app | |
iface.launch() | |