AIML_QA_Demo

Sleeping

File size: 1,857 Bytes

import gradio as gr  
from transformers import pipeline
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

#from llama_cpp import Llama  
  
# Load the Llama model  
#llm = Llama.from_pretrained(  
 #   repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",  
 #   filename="QA_llama31_unsloth.Q4_K_M.gguf",  
#)  
  
#def generate_response(user_input):  
    # Perform inference  
#    response = llm.create_chat_completion(  
#        messages=[  
#            {  
#                "role": "user",  
#                "content": user_input  
 #           }  
 #       ]  
 #   )  
      
    # Extract the model's reply  
  #  model_reply = response['choices'][0]['message']['content']  
   # return model_reply  
def generate_answer(user_input):
    model = AutoPeftModelForCausalLM.from_pretrained(
        "GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora", # YOUR MODEL YOU USED FOR TRAINING
        load_in_4bit = load_in_4bit,
    )
    tokenizer = AutoTokenizer.from_pretrained("GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora")
    # Create a text generation pipeline
    generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

    # Generate predictions on the test dataset
    # Access the input column of the dataset using the column name
    predictions = generator(
        user_input,
        max_new_tokens=100,
        num_beams=1,
    )

    # Extract the generated text from the pipeline output
    predictions = [pred[0]['generated_text'] for pred in predictions]
    return predictions

# Create a Gradio interface  
iface = gr.Interface(  
    fn=generate_text,  
    inputs="textbox",  
    outputs="text",  
    title="AIML Q&A Chatbot",  
    description="Ask questions related to AIML and get answers from the fine-tuned Llama model."  
)  
  
# Launch the app  
iface.launch()