Spaces:
Sleeping
Sleeping
File size: 1,857 Bytes
e05a359 6d13369 2c29d2c 6d13369 2c29d2c 6d13369 2c29d2c 6d13369 2c29d2c 6d13369 e05a359 6d13369 e05a359 6d13369 e05a359 6d13369 2c29d2c 6d13369 edc2e4f 2c29d2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
from transformers import pipeline
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
#from llama_cpp import Llama
# Load the Llama model
#llm = Llama.from_pretrained(
# repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",
# filename="QA_llama31_unsloth.Q4_K_M.gguf",
#)
#def generate_response(user_input):
# Perform inference
# response = llm.create_chat_completion(
# messages=[
# {
# "role": "user",
# "content": user_input
# }
# ]
# )
# Extract the model's reply
# model_reply = response['choices'][0]['message']['content']
# return model_reply
def generate_answer(user_input):
model = AutoPeftModelForCausalLM.from_pretrained(
"GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora", # YOUR MODEL YOU USED FOR TRAINING
load_in_4bit = load_in_4bit,
)
tokenizer = AutoTokenizer.from_pretrained("GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora")
# Create a text generation pipeline
generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
# Generate predictions on the test dataset
# Access the input column of the dataset using the column name
predictions = generator(
user_input,
max_new_tokens=100,
num_beams=1,
)
# Extract the generated text from the pipeline output
predictions = [pred[0]['generated_text'] for pred in predictions]
return predictions
# Create a Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs="textbox",
outputs="text",
title="AIML Q&A Chatbot",
description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
)
# Launch the app
iface.launch()
|