Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoTokenizer | |
#from llama_cpp import Llama | |
# Load the Llama model | |
#llm = Llama.from_pretrained( | |
# repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF", | |
# filename="QA_llama31_unsloth.Q4_K_M.gguf", | |
#) | |
#def generate_response(user_input): | |
# Perform inference | |
# response = llm.create_chat_completion( | |
# messages=[ | |
# { | |
# "role": "user", | |
# "content": user_input | |
# } | |
# ] | |
# ) | |
# Extract the model's reply | |
# model_reply = response['choices'][0]['message']['content'] | |
# return model_reply | |
def generate_answer(user_input): | |
model = AutoPeftModelForCausalLM.from_pretrained( | |
"GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora", # YOUR MODEL YOU USED FOR TRAINING | |
load_in_4bit = load_in_4bit, | |
) | |
tokenizer = AutoTokenizer.from_pretrained("GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora") | |
# Create a text generation pipeline | |
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer) | |
# Generate predictions on the test dataset | |
# Access the input column of the dataset using the column name | |
predictions = generator( | |
user_input, | |
max_new_tokens=100, | |
num_beams=1, | |
) | |
# Extract the generated text from the pipeline output | |
predictions = [pred[0]['generated_text'] for pred in predictions] | |
return predictions | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs="textbox", | |
outputs="text", | |
title="AIML Q&A Chatbot", | |
description="Ask questions related to AIML and get answers from the fine-tuned Llama model." | |
) | |
# Launch the app | |
iface.launch() | |