AIML_QA_Demo

Sleeping

App Files Files Community

AIML_QA_Demo / app.py

GSridhar1982

Update app.py

e05a359 verified 10 months ago

raw

history blame

1.86 kB

	import gradio as gr
	from transformers import pipeline
	from peft import AutoPeftModelForCausalLM
	from transformers import AutoTokenizer

	#from llama_cpp import Llama

	# Load the Llama model
	#llm = Llama.from_pretrained(
	# repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",
	# filename="QA_llama31_unsloth.Q4_K_M.gguf",
	#)

	#def generate_response(user_input):
	# Perform inference
	# response = llm.create_chat_completion(
	# messages=[
	# {
	# "role": "user",
	# "content": user_input
	# }
	# ]
	# )

	# Extract the model's reply
	# model_reply = response['choices'][0]['message']['content']
	# return model_reply
	def generate_answer(user_input):
	model = AutoPeftModelForCausalLM.from_pretrained(
	"GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora", # YOUR MODEL YOU USED FOR TRAINING
	load_in_4bit = load_in_4bit,
	)
	tokenizer = AutoTokenizer.from_pretrained("GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora")
	# Create a text generation pipeline
	generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

	# Generate predictions on the test dataset
	# Access the input column of the dataset using the column name
	predictions = generator(
	user_input,
	max_new_tokens=100,
	num_beams=1,
	)

	# Extract the generated text from the pipeline output
	predictions = [pred[0]['generated_text'] for pred in predictions]
	return predictions

	# Create a Gradio interface
	iface = gr.Interface(
	fn=generate_text,
	inputs="textbox",
	outputs="text",
	title="AIML Q&A Chatbot",
	description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
	)

	# Launch the app
	iface.launch()