zainnobody's picture
Create app.py
c9fb0e9 verified
raw
history blame
16.2 kB
import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import traceback
import torch
import os
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import pandas as pd
import json
# Preprocessing text by lowercasing, removing punctuation, and extra spaces
def optimized_preprocess_text(text):
text = text.lower()
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
# Compute cosine similarity between two texts using TF-IDF
def optimized_compute_text_similarity(text1, text2):
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 1))
tfidf_matrix = tfidf.fit_transform([text1, text2])
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]).flatten()
return cosine_sim[0]
# Compute SBERT similarity between question and context
def compute_sbert_similarity(question, context, model):
embeddings = model.encode([question, context], convert_to_tensor=True)
similarity = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
return similarity
# Use hybrid approach: TF-IDF to narrow down top N contexts, then SBERT for refined similarity
def hybrid_sbert_approach(question, filtered_contexts, model, top_n=10):
tfidf = TfidfVectorizer(stop_words='english')
contexts_combined = [question] + filtered_contexts
tfidf_matrix = tfidf.fit_transform(contexts_combined)
# Calculate TF-IDF similarity and rank contexts
similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
ranked_contexts = [filtered_contexts[i] for i in similarity_scores.argsort()[::-1][:top_n]]
# Refine using SBERT
sbert_similarities = [compute_sbert_similarity(question, context, model) for context in ranked_contexts]
ranked_by_sbert = sorted(zip(ranked_contexts, sbert_similarities), key=lambda x: x[1], reverse=True)
return [context for context, _ in ranked_by_sbert]
# RAG with optimized SBERT function
def optimized_generate_rag_context(question, filtered_contexts, selected_context_window=2):
hybrid_retrieved_contexts = hybrid_sbert_approach(question, filtered_contexts, sbert_model, top_n=int(selected_context_window))
rag_context = "\n".join(hybrid_retrieved_contexts[:selected_context_window])
return rag_context
# Extract unique contexts and filter them by length
def extract_and_filter_contexts(data, min_length=151, max_length=3706):
unique_contexts = data['context'].unique()
filtered_contexts = [context for context in unique_contexts if min_length <= len(context) <= max_length]
return filtered_contexts
# Compute the TF-IDF matrix for the question and contexts
def compute_tfidf_and_similarity_scores(question, contexts):
tfidf = TfidfVectorizer(stop_words='english')
contexts_combined = [question] + contexts
tfidf_matrix = tfidf.fit_transform(contexts_combined)
# Calculate the cosine similarity scores
similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
return tfidf_matrix, similarity_scores
# Rank contexts based on similarity scores
def rank_contexts_by_similarity(contexts, similarity_scores):
ranked_indices = similarity_scores.argsort()[::-1]
ranked_contexts = [contexts[i] for i in ranked_indices]
ranked_scores = similarity_scores[ranked_indices]
return ranked_contexts, ranked_scores
# Select the top contexts based on the selected window
def select_top_contexts(selected_context_window, ranked_contexts, ranked_scores):
count = int(selected_context_window)
top_contexts = ranked_contexts[:count]
top_scores = ranked_scores[:count]
return top_contexts, top_scores
# Helper function to maintain chat history and generate the response
def maintain_chat_history(message, chat_history):
if chat_history is None:
chat_history = []
chat_history.append({"role": "user", "content": message})
return chat_history
def generate_rag_context(question, filtered_contexts, selected_context_window = 3):
tfidf_matrix, similarity_scores = compute_tfidf_and_similarity_scores(question, filtered_contexts)
ranked_contexts, ranked_scores = rank_contexts_by_similarity(filtered_contexts, similarity_scores)
top_contexts, top_scores = select_top_contexts(str(selected_context_window), ranked_contexts, ranked_scores)
rag_context = "\n".join(top_contexts)
return rag_context
def load_squad_data(filepath):
with open(filepath, 'r') as f:
squad_data = json.load(f)
return squad_data
# Preprocess the data: extract contexts, questions, and answers from the SQuAD data
def raw_preprocess_data(squad_data):
contexts = []
questions = []
answers = []
for group in squad_data['data']:
for passage in group['paragraphs']:
context = passage['context']
for qa in passage['qas']:
question = qa['question']
for answer in qa['answers']:
contexts.append(context)
questions.append(question)
# Make a copy to avoid modifying the original answer
answers.append({
'text': answer['text'],
'answer_start': answer['answer_start']
})
return contexts, questions, answers
# Add the end index of the answer in the context
def add_end_idx(answers, contexts):
for answer, context in zip(answers, contexts):
gold_text = answer['text']
start_idx = answer['answer_start']
end_idx = start_idx + len(gold_text)
if context[start_idx:end_idx] == gold_text:
answer['answer_end'] = end_idx
else:
# Try to find the correct position if there's a mismatch
for n in range(1, 30):
if context[start_idx - n:end_idx - n] == gold_text:
answer['answer_start'] = start_idx - n
answer['answer_end'] = end_idx - n
break
elif context[start_idx + n:end_idx + n] == gold_text:
answer['answer_start'] = start_idx + n
answer['answer_end'] = end_idx + n
break
else:
answer['answer_start'] = -1
answer['answer_end'] = -1
# Create a DataFrame from the contexts, questions, and answers
def create_dataframe(contexts, questions, answers):
data = pd.DataFrame({
'context': contexts,
'question': questions,
'answer_text': [answer['text'] for answer in answers],
'answer_start': [answer['answer_start'] for answer in answers],
'answer_end': [answer.get('answer_end', -1) for answer in answers]
})
# Remove samples with -1 start index
data = data[data['answer_start'] != -1].reset_index(drop=True)
return data
# Check if a GPU (CUDA) is available; otherwise, use the CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Loading the pre-trained SBERT model globally for efficiency
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
# Available models
electra_models = [
"./models/fine_tuned_electra_model_1000",
"./models/fine_tuned_electra_model_20000",
"./models/fine_tuned_electra_model_5000",
"./models/fine_tuned_electra_model_all"
]
other_models = [
"./models/fine_tuned_bert_base_cased_1000",
"./models/fine_tuned_bert_base_cased_all",
"./models/fine_tuned_distilbert_base_uncased_10000",
"./models/fine_tuned_distilgpt2_10000",
"./models/fine_tuned_retro-reader_intensive_1000",
"./models/fine_tuned_retro-reader_intensive_5000",
"./models/fine_tuned_retro-reader_sketchy_1000"
]
DATA_DIR = './data'
# Load and preprocess data
squad_data = load_squad_data(DATA_DIR+ '/train-v1.1.json')
contexts, questions, answers = raw_preprocess_data(squad_data)
add_end_idx(answers, contexts)
data = create_dataframe(contexts, questions, answers)
# Function to generate a response with logging and custom content
def generate_response(message, chat_history, model_name, debug, rag, selected_context_window):
try:
if chat_history is None:
chat_history = []
context = message
# Determine if the model is for question answering based on its name
is_question_answering = "electra_model" in model_name
# Initialize the tokenizer and model
if is_question_answering:
model = pipeline("question-answering", model=model_name, tokenizer=model_name, device=device)
else:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.to(device)
# Append the new user message to the chat history
chat_history.append({"role": "user", "content": message})
if is_question_answering:
if rag:
filtered_contexts = extract_and_filter_contexts(data, min_length=100, max_length=4000)
context = generate_rag_context(message, filtered_contexts, selected_context_window)
else:
context = "\n".join([turn["content"] for turn in chat_history if turn["role"] == "user"])
if debug:
print("context:\n" + context)
print("message:\n" + message)
# Call the pipeline for question-answering
answer = model(question=message, context=context)
response = answer['answer']
else:
# Prepare the conversation history for a regular chatbot
conversation = ""
for turn in chat_history:
if turn["role"] == "user":
conversation += f"User: {turn['content']}\n"
else:
conversation += f"Assistant: {turn['content']}\n"
if debug:
print("Conversation being sent to the model:\n", conversation)
# Encode the input and generate a response
inputs = tokenizer.encode(conversation + "Assistant:", return_tensors='pt').to(device)
outputs = model.generate(
inputs,
max_length=inputs.shape[1] + 100,
pad_token_id=tokenizer.eos_token_id,
do_sample=True,
top_p=0.95,
top_k=50,
temperature=0.7,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract the assistant's reply
response = response[len(conversation):].strip()
if "User:" in response:
response = response.split("User:")[0].strip()
# Append the assistant's response to the chat history
chat_history.append({"role": "assistant", "content": response})
if debug:
print("Generated response:", response)
print("Configurations:")
print(f"Model Name: {model_name}")
print(f"Is Question Answering: {is_question_answering}")
print(f"RAG Enabled: {rag}")
print(f"Selected Context Window: {selected_context_window}")
# Return the updated chat history and the assistant's response
display_history = [[turn["content"], chat_history[i + 1]["content"]] for i, turn in enumerate(chat_history[:-1]) if turn["role"] == "user" and i + 1 < len(chat_history)]
return display_history, chat_history
except Exception as e:
# Capture the traceback details
error_message = f"An error occurred: {str(e)}"
detailed_error = traceback.format_exc()
chat_history.append({"role": "assistant", "content": error_message})
if debug:
print("Error Details:\n", detailed_error)
# Ensure safe generation of the display history
try:
display_history = [[turn["content"], chat_history[i + 1]["content"]] for i, turn in enumerate(chat_history[:-1]) if turn["role"] == "user" and i + 1 < len(chat_history)]
except Exception as history_error:
if debug:
print("Error while generating display history:", str(history_error))
display_history = []
return display_history, chat_history
# Gradio Interface Configuration
def run_prod_chatbot(local=True):
with gr.Blocks() as demo:
gr.Markdown("""
<div style="text-align: center;">
<h1><strong>SQuAD Q&A ChatBot</strong></h1>
<h3>Authors: <a href="https://github.com/zainnobody">Zain Ali</a> & <a href="https://github.com/AIBenHopwood/">Ben Hopwood</a></h3>
<p>
<a href="https://github.com/zainnobody/AAI-520-Final-Project" target="_blank">Code: GitHub link</a> &nbsp;|&nbsp;
<a href="https://huggingface.co/zainnobody/AAI-520-Final-Project-Models" target="_blank">Models: Huggingface link</a>
</p>
</div>
<div style="text-align: center;">
<p>
This project aims to develop a chatbot capable of multi-turn, context-adaptive conversations across various topics, using the Stanford Question Answering Dataset (SQuAD) as the primary source for training.
</p>
</div>
<div style="text-align: center;">
<h4>University of San Diego - AAI 520</h4>
</div>
""")
with gr.Row(variant="compact"):
model_dropdown = gr.Dropdown(
choices=electra_models + other_models,
label="Select Model",
value="./models/fine_tuned_electra_model_all"
)
# Column for Use RAG and Debug Mode checkboxes
with gr.Column():
rag_checkbox = gr.Checkbox(
label="Use RAG",
value=True,
interactive=True
)
debug_checkbox = gr.Checkbox(
label="Debug Mode",
value=False
)
context_window_dropdown = gr.Dropdown(
choices=[1, 2, 3],
label="Select Context Window",
value=1
)
# Commented out the is_question_answering_checkbox, making it auto detectable. Leaving this as a reminder that other models do not use pipeline
# is_question_answering_checkbox = gr.Checkbox(
# label="Use Question Answering (Electra Only)",
# value=True
# )
chatbot = gr.Chatbot()
state = gr.State([])
with gr.Row():
# Textbox taking 75% of the space
msg = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter", scale=3)
# Send button taking 25% of the space and stretching full width
send_btn = gr.Button("Send", scale=1)
send_btn.click(lambda message, chat_history, model_name, debug, rag, selected_context_window: generate_response(message, chat_history, model_name, debug, rag, selected_context_window),
inputs=[msg, state, model_dropdown, debug_checkbox, rag_checkbox, context_window_dropdown],
outputs=[chatbot, state])
msg.submit(lambda message, chat_history, model_name, debug, rag, selected_context_window: generate_response(message, chat_history, model_name, debug, rag, selected_context_window),
inputs=[msg, state, model_dropdown, debug_checkbox, rag_checkbox, context_window_dropdown],
outputs=[chatbot, state])
if local:
demo.launch(share=True)
else:
demo.launch(server_name="0.0.0.0", server_port=None)
# Launch the Gradio app
run_prod_chatbot()