import gradio as gr from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer from sentence_transformers import SentenceTransformer, util from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import re import traceback import torch import os from sentence_transformers import SentenceTransformer, util from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import re import pandas as pd import json # Preprocessing text by lowercasing, removing punctuation, and extra spaces def optimized_preprocess_text(text): text = text.lower() text = re.sub(r'[^\w\s]', '', text) text = re.sub(r'\s+', ' ', text).strip() return text # Compute cosine similarity between two texts using TF-IDF def optimized_compute_text_similarity(text1, text2): tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 1)) tfidf_matrix = tfidf.fit_transform([text1, text2]) cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]).flatten() return cosine_sim[0] # Compute SBERT similarity between question and context def compute_sbert_similarity(question, context, model): embeddings = model.encode([question, context], convert_to_tensor=True) similarity = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() return similarity # Use hybrid approach: TF-IDF to narrow down top N contexts, then SBERT for refined similarity def hybrid_sbert_approach(question, filtered_contexts, model, top_n=10): tfidf = TfidfVectorizer(stop_words='english') contexts_combined = [question] + filtered_contexts tfidf_matrix = tfidf.fit_transform(contexts_combined) # Calculate TF-IDF similarity and rank contexts similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten() ranked_contexts = [filtered_contexts[i] for i in similarity_scores.argsort()[::-1][:top_n]] # Refine using SBERT sbert_similarities = [compute_sbert_similarity(question, context, model) for context in ranked_contexts] ranked_by_sbert = sorted(zip(ranked_contexts, sbert_similarities), key=lambda x: x[1], reverse=True) return [context for context, _ in ranked_by_sbert] # RAG with optimized SBERT function def optimized_generate_rag_context(question, filtered_contexts, selected_context_window=2): hybrid_retrieved_contexts = hybrid_sbert_approach(question, filtered_contexts, sbert_model, top_n=int(selected_context_window)) rag_context = "\n".join(hybrid_retrieved_contexts[:selected_context_window]) return rag_context # Extract unique contexts and filter them by length def extract_and_filter_contexts(data, min_length=151, max_length=3706): unique_contexts = data['context'].unique() filtered_contexts = [context for context in unique_contexts if min_length <= len(context) <= max_length] return filtered_contexts # Compute the TF-IDF matrix for the question and contexts def compute_tfidf_and_similarity_scores(question, contexts): tfidf = TfidfVectorizer(stop_words='english') contexts_combined = [question] + contexts tfidf_matrix = tfidf.fit_transform(contexts_combined) # Calculate the cosine similarity scores similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten() return tfidf_matrix, similarity_scores # Rank contexts based on similarity scores def rank_contexts_by_similarity(contexts, similarity_scores): ranked_indices = similarity_scores.argsort()[::-1] ranked_contexts = [contexts[i] for i in ranked_indices] ranked_scores = similarity_scores[ranked_indices] return ranked_contexts, ranked_scores # Select the top contexts based on the selected window def select_top_contexts(selected_context_window, ranked_contexts, ranked_scores): count = int(selected_context_window) top_contexts = ranked_contexts[:count] top_scores = ranked_scores[:count] return top_contexts, top_scores # Helper function to maintain chat history and generate the response def maintain_chat_history(message, chat_history): if chat_history is None: chat_history = [] chat_history.append({"role": "user", "content": message}) return chat_history def generate_rag_context(question, filtered_contexts, selected_context_window = 3): tfidf_matrix, similarity_scores = compute_tfidf_and_similarity_scores(question, filtered_contexts) ranked_contexts, ranked_scores = rank_contexts_by_similarity(filtered_contexts, similarity_scores) top_contexts, top_scores = select_top_contexts(str(selected_context_window), ranked_contexts, ranked_scores) rag_context = "\n".join(top_contexts) return rag_context def load_squad_data(filepath): with open(filepath, 'r') as f: squad_data = json.load(f) return squad_data # Preprocess the data: extract contexts, questions, and answers from the SQuAD data def raw_preprocess_data(squad_data): contexts = [] questions = [] answers = [] for group in squad_data['data']: for passage in group['paragraphs']: context = passage['context'] for qa in passage['qas']: question = qa['question'] for answer in qa['answers']: contexts.append(context) questions.append(question) # Make a copy to avoid modifying the original answer answers.append({ 'text': answer['text'], 'answer_start': answer['answer_start'] }) return contexts, questions, answers # Add the end index of the answer in the context def add_end_idx(answers, contexts): for answer, context in zip(answers, contexts): gold_text = answer['text'] start_idx = answer['answer_start'] end_idx = start_idx + len(gold_text) if context[start_idx:end_idx] == gold_text: answer['answer_end'] = end_idx else: # Try to find the correct position if there's a mismatch for n in range(1, 30): if context[start_idx - n:end_idx - n] == gold_text: answer['answer_start'] = start_idx - n answer['answer_end'] = end_idx - n break elif context[start_idx + n:end_idx + n] == gold_text: answer['answer_start'] = start_idx + n answer['answer_end'] = end_idx + n break else: answer['answer_start'] = -1 answer['answer_end'] = -1 # Create a DataFrame from the contexts, questions, and answers def create_dataframe(contexts, questions, answers): data = pd.DataFrame({ 'context': contexts, 'question': questions, 'answer_text': [answer['text'] for answer in answers], 'answer_start': [answer['answer_start'] for answer in answers], 'answer_end': [answer.get('answer_end', -1) for answer in answers] }) # Remove samples with -1 start index data = data[data['answer_start'] != -1].reset_index(drop=True) return data # Check if a GPU (CUDA) is available; otherwise, use the CPU device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Loading the pre-trained SBERT model globally for efficiency sbert_model = SentenceTransformer('all-MiniLM-L6-v2') # Available models electra_models = [ "./models/fine_tuned_electra_model_1000", "./models/fine_tuned_electra_model_20000", "./models/fine_tuned_electra_model_5000", "./models/fine_tuned_electra_model_all" ] other_models = [ "./models/fine_tuned_bert_base_cased_1000", "./models/fine_tuned_bert_base_cased_all", "./models/fine_tuned_distilbert_base_uncased_10000", "./models/fine_tuned_distilgpt2_10000", "./models/fine_tuned_retro-reader_intensive_1000", "./models/fine_tuned_retro-reader_intensive_5000", "./models/fine_tuned_retro-reader_sketchy_1000" ] DATA_DIR = './data' # Load and preprocess data squad_data = load_squad_data(DATA_DIR+ '/train-v1.1.json') contexts, questions, answers = raw_preprocess_data(squad_data) add_end_idx(answers, contexts) data = create_dataframe(contexts, questions, answers) # Function to generate a response with logging and custom content def generate_response(message, chat_history, model_name, debug, rag, selected_context_window): try: if chat_history is None: chat_history = [] context = message # Determine if the model is for question answering based on its name is_question_answering = "electra_model" in model_name # Initialize the tokenizer and model if is_question_answering: model = pipeline("question-answering", model=model_name, tokenizer=model_name, device=device) else: tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) model.to(device) # Append the new user message to the chat history chat_history.append({"role": "user", "content": message}) if is_question_answering: if rag: filtered_contexts = extract_and_filter_contexts(data, min_length=100, max_length=4000) context = generate_rag_context(message, filtered_contexts, selected_context_window) else: context = "\n".join([turn["content"] for turn in chat_history if turn["role"] == "user"]) if debug: print("context:\n" + context) print("message:\n" + message) # Call the pipeline for question-answering answer = model(question=message, context=context) response = answer['answer'] else: # Prepare the conversation history for a regular chatbot conversation = "" for turn in chat_history: if turn["role"] == "user": conversation += f"User: {turn['content']}\n" else: conversation += f"Assistant: {turn['content']}\n" if debug: print("Conversation being sent to the model:\n", conversation) # Encode the input and generate a response inputs = tokenizer.encode(conversation + "Assistant:", return_tensors='pt').to(device) outputs = model.generate( inputs, max_length=inputs.shape[1] + 100, pad_token_id=tokenizer.eos_token_id, do_sample=True, top_p=0.95, top_k=50, temperature=0.7, eos_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract the assistant's reply response = response[len(conversation):].strip() if "User:" in response: response = response.split("User:")[0].strip() # Append the assistant's response to the chat history chat_history.append({"role": "assistant", "content": response}) if debug: print("Generated response:", response) print("Configurations:") print(f"Model Name: {model_name}") print(f"Is Question Answering: {is_question_answering}") print(f"RAG Enabled: {rag}") print(f"Selected Context Window: {selected_context_window}") # Return the updated chat history and the assistant's response display_history = [[turn["content"], chat_history[i + 1]["content"]] for i, turn in enumerate(chat_history[:-1]) if turn["role"] == "user" and i + 1 < len(chat_history)] return display_history, chat_history except Exception as e: # Capture the traceback details error_message = f"An error occurred: {str(e)}" detailed_error = traceback.format_exc() chat_history.append({"role": "assistant", "content": error_message}) if debug: print("Error Details:\n", detailed_error) # Ensure safe generation of the display history try: display_history = [[turn["content"], chat_history[i + 1]["content"]] for i, turn in enumerate(chat_history[:-1]) if turn["role"] == "user" and i + 1 < len(chat_history)] except Exception as history_error: if debug: print("Error while generating display history:", str(history_error)) display_history = [] return display_history, chat_history # Gradio Interface Configuration def run_prod_chatbot(local=True): with gr.Blocks() as demo: gr.Markdown("""

SQuAD Q&A ChatBot

Authors: Zain Ali & Ben Hopwood

Code: GitHub link  |  Models: Huggingface link

This project aims to develop a chatbot capable of multi-turn, context-adaptive conversations across various topics, using the Stanford Question Answering Dataset (SQuAD) as the primary source for training.

University of San Diego - AAI 520

""") with gr.Row(variant="compact"): model_dropdown = gr.Dropdown( choices=electra_models + other_models, label="Select Model", value="./models/fine_tuned_electra_model_all" ) # Column for Use RAG and Debug Mode checkboxes with gr.Column(): rag_checkbox = gr.Checkbox( label="Use RAG", value=True, interactive=True ) debug_checkbox = gr.Checkbox( label="Debug Mode", value=False ) context_window_dropdown = gr.Dropdown( choices=[1, 2, 3], label="Select Context Window", value=1 ) # Commented out the is_question_answering_checkbox, making it auto detectable. Leaving this as a reminder that other models do not use pipeline # is_question_answering_checkbox = gr.Checkbox( # label="Use Question Answering (Electra Only)", # value=True # ) chatbot = gr.Chatbot() state = gr.State([]) with gr.Row(): # Textbox taking 75% of the space msg = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter", scale=3) # Send button taking 25% of the space and stretching full width send_btn = gr.Button("Send", scale=1) send_btn.click(lambda message, chat_history, model_name, debug, rag, selected_context_window: generate_response(message, chat_history, model_name, debug, rag, selected_context_window), inputs=[msg, state, model_dropdown, debug_checkbox, rag_checkbox, context_window_dropdown], outputs=[chatbot, state]) msg.submit(lambda message, chat_history, model_name, debug, rag, selected_context_window: generate_response(message, chat_history, model_name, debug, rag, selected_context_window), inputs=[msg, state, model_dropdown, debug_checkbox, rag_checkbox, context_window_dropdown], outputs=[chatbot, state]) if local: demo.launch(share=True) else: demo.launch(server_name="0.0.0.0", server_port=None) # Launch the Gradio app run_prod_chatbot()