AIML_QA_Demo

Sleeping

App Files Files Community

GSridhar1982 commited on Sep 28, 2024

Commit

e7bda41

verified ·

1 Parent(s): 0629758

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -54

app.py CHANGED Viewed

@@ -1,58 +1,26 @@
-import gradio as gr
-from transformers import pipeline
-from peft import AutoPeftModelForCausalLM
-from transformers import AutoTokenizer
-max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
-dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
-load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
-#from llama_cpp import Llama
-# Load the Llama model
-#llm = Llama.from_pretrained(
- #   repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",
- #   filename="QA_llama31_unsloth.Q4_K_M.gguf",
-#)
-#def generate_response(user_input):
-    # Perform inference
-#    response = llm.create_chat_completion(
-#        messages=[
-#            {
-#                "role": "user",
-#                "content": user_input
- #           }
- #       ]
- #   )
-    # Extract the model's reply
-  #  model_reply = response['choices'][0]['message']['content']
-   # return model_reply
-def generate_answer(user_input):
-    model = AutoPeftModelForCausalLM.from_pretrained(
-        "GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora", # YOUR MODEL YOU USED FOR TRAINING
-        load_in_4bit = load_in_4bit,
-    )
-    tokenizer = AutoTokenizer.from_pretrained("GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora")
-    # Create a text generation pipeline
-    generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
-    # Generate predictions on the test dataset
-    # Access the input column of the dataset using the column name
-    predictions = generator(user_input)[0]['generated_text']
-    # Extract the generated text from the pipeline output
-    #predictions = [pred[0]['generated_text'] for pred in predictions]
-    return predictions
-# Create a Gradio interface
-iface = gr.Interface(
-    fn=generate_answer,
-    inputs="textbox",
-    outputs="text",
-    title="AIML Q&A Chatbot",
-    description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
-)
-# Launch the app
-iface.launch()

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import nltk
+nltk.download('punkt')
+def generate_answer(question):
+  model_name = "anukvma/bart-aiml-question-answer-v2"
+  tokenizer = AutoTokenizer.from_pretrained(model_name)
+  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+  inputs = ["Answer this AIML Question: " + question]
+  inputs = tokenizer(inputs, max_length=256, truncation=True, return_tensors="pt")
+  output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=1, max_length=256)
+  decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
+  predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]
+  return predicted_title
+iface = gr.Interface(
+    fn=generate_answer,
+    inputs=[
+        gr.Textbox(lines=5, label="Question")
+    ],
+    outputs=gr.Textbox(label="Answer")
+)
+iface.launch(debug=True)