import gradio as gr from llama_cpp import Llama # Load the Llama model llm = Llama.from_pretrained( repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF", filename="QA_llama31_unsloth.Q4_K_M.gguf", ) def generate_response(user_input): # Perform inference response = llm.create_chat_completion( messages=[ { "role": "user", "content": user_input } ] ) # Extract the model's reply model_reply = response['choices'][0]['message']['content'] return model_reply # Create a Gradio interface iface = gr.Interface( fn=generate_response, inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."), outputs="text", title="AIML Q&A Chatbot", description="Ask questions related to AIML and get answers from the fine-tuned Llama model." ) # Launch the app iface.launch()