Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
from sentence_transformers import SentenceTransformer, util | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import re | |
import traceback | |
import torch | |
import os | |
from sentence_transformers import SentenceTransformer, util | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import re | |
import pandas as pd | |
import json | |
# Preprocessing text by lowercasing, removing punctuation, and extra spaces | |
def optimized_preprocess_text(text): | |
text = text.lower() | |
text = re.sub(r'[^\w\s]', '', text) | |
text = re.sub(r'\s+', ' ', text).strip() | |
return text | |
# Compute cosine similarity between two texts using TF-IDF | |
def optimized_compute_text_similarity(text1, text2): | |
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 1)) | |
tfidf_matrix = tfidf.fit_transform([text1, text2]) | |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]).flatten() | |
return cosine_sim[0] | |
# Compute SBERT similarity between question and context | |
def compute_sbert_similarity(question, context, model): | |
embeddings = model.encode([question, context], convert_to_tensor=True) | |
similarity = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() | |
return similarity | |
# Use hybrid approach: TF-IDF to narrow down top N contexts, then SBERT for refined similarity | |
def hybrid_sbert_approach(question, filtered_contexts, model, top_n=10): | |
tfidf = TfidfVectorizer(stop_words='english') | |
contexts_combined = [question] + filtered_contexts | |
tfidf_matrix = tfidf.fit_transform(contexts_combined) | |
# Calculate TF-IDF similarity and rank contexts | |
similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten() | |
ranked_contexts = [filtered_contexts[i] for i in similarity_scores.argsort()[::-1][:top_n]] | |
# Refine using SBERT | |
sbert_similarities = [compute_sbert_similarity(question, context, model) for context in ranked_contexts] | |
ranked_by_sbert = sorted(zip(ranked_contexts, sbert_similarities), key=lambda x: x[1], reverse=True) | |
return [context for context, _ in ranked_by_sbert] | |
# RAG with optimized SBERT function | |
def optimized_generate_rag_context(question, filtered_contexts, selected_context_window=2): | |
hybrid_retrieved_contexts = hybrid_sbert_approach(question, filtered_contexts, sbert_model, top_n=int(selected_context_window)) | |
rag_context = "\n".join(hybrid_retrieved_contexts[:selected_context_window]) | |
return rag_context | |
# Extract unique contexts and filter them by length | |
def extract_and_filter_contexts(data, min_length=151, max_length=3706): | |
unique_contexts = data['context'].unique() | |
filtered_contexts = [context for context in unique_contexts if min_length <= len(context) <= max_length] | |
return filtered_contexts | |
# Compute the TF-IDF matrix for the question and contexts | |
def compute_tfidf_and_similarity_scores(question, contexts): | |
tfidf = TfidfVectorizer(stop_words='english') | |
contexts_combined = [question] + contexts | |
tfidf_matrix = tfidf.fit_transform(contexts_combined) | |
# Calculate the cosine similarity scores | |
similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten() | |
return tfidf_matrix, similarity_scores | |
# Rank contexts based on similarity scores | |
def rank_contexts_by_similarity(contexts, similarity_scores): | |
ranked_indices = similarity_scores.argsort()[::-1] | |
ranked_contexts = [contexts[i] for i in ranked_indices] | |
ranked_scores = similarity_scores[ranked_indices] | |
return ranked_contexts, ranked_scores | |
# Select the top contexts based on the selected window | |
def select_top_contexts(selected_context_window, ranked_contexts, ranked_scores): | |
count = int(selected_context_window) | |
top_contexts = ranked_contexts[:count] | |
top_scores = ranked_scores[:count] | |
return top_contexts, top_scores | |
# Helper function to maintain chat history and generate the response | |
def maintain_chat_history(message, chat_history): | |
if chat_history is None: | |
chat_history = [] | |
chat_history.append({"role": "user", "content": message}) | |
return chat_history | |
def generate_rag_context(question, filtered_contexts, selected_context_window = 3): | |
tfidf_matrix, similarity_scores = compute_tfidf_and_similarity_scores(question, filtered_contexts) | |
ranked_contexts, ranked_scores = rank_contexts_by_similarity(filtered_contexts, similarity_scores) | |
top_contexts, top_scores = select_top_contexts(str(selected_context_window), ranked_contexts, ranked_scores) | |
rag_context = "\n".join(top_contexts) | |
return rag_context | |
def load_squad_data(filepath): | |
with open(filepath, 'r') as f: | |
squad_data = json.load(f) | |
return squad_data | |
# Preprocess the data: extract contexts, questions, and answers from the SQuAD data | |
def raw_preprocess_data(squad_data): | |
contexts = [] | |
questions = [] | |
answers = [] | |
for group in squad_data['data']: | |
for passage in group['paragraphs']: | |
context = passage['context'] | |
for qa in passage['qas']: | |
question = qa['question'] | |
for answer in qa['answers']: | |
contexts.append(context) | |
questions.append(question) | |
# Make a copy to avoid modifying the original answer | |
answers.append({ | |
'text': answer['text'], | |
'answer_start': answer['answer_start'] | |
}) | |
return contexts, questions, answers | |
# Add the end index of the answer in the context | |
def add_end_idx(answers, contexts): | |
for answer, context in zip(answers, contexts): | |
gold_text = answer['text'] | |
start_idx = answer['answer_start'] | |
end_idx = start_idx + len(gold_text) | |
if context[start_idx:end_idx] == gold_text: | |
answer['answer_end'] = end_idx | |
else: | |
# Try to find the correct position if there's a mismatch | |
for n in range(1, 30): | |
if context[start_idx - n:end_idx - n] == gold_text: | |
answer['answer_start'] = start_idx - n | |
answer['answer_end'] = end_idx - n | |
break | |
elif context[start_idx + n:end_idx + n] == gold_text: | |
answer['answer_start'] = start_idx + n | |
answer['answer_end'] = end_idx + n | |
break | |
else: | |
answer['answer_start'] = -1 | |
answer['answer_end'] = -1 | |
# Create a DataFrame from the contexts, questions, and answers | |
def create_dataframe(contexts, questions, answers): | |
data = pd.DataFrame({ | |
'context': contexts, | |
'question': questions, | |
'answer_text': [answer['text'] for answer in answers], | |
'answer_start': [answer['answer_start'] for answer in answers], | |
'answer_end': [answer.get('answer_end', -1) for answer in answers] | |
}) | |
# Remove samples with -1 start index | |
data = data[data['answer_start'] != -1].reset_index(drop=True) | |
return data | |
# Check if a GPU (CUDA) is available; otherwise, use the CPU | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# Loading the pre-trained SBERT model globally for efficiency | |
sbert_model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Available models | |
electra_models = [ | |
"./models/fine_tuned_electra_model_1000", | |
"./models/fine_tuned_electra_model_20000", | |
"./models/fine_tuned_electra_model_5000", | |
"./models/fine_tuned_electra_model_all" | |
] | |
other_models = [ | |
"./models/fine_tuned_bert_base_cased_1000", | |
"./models/fine_tuned_bert_base_cased_all", | |
"./models/fine_tuned_distilbert_base_uncased_10000", | |
"./models/fine_tuned_distilgpt2_10000", | |
"./models/fine_tuned_retro-reader_intensive_1000", | |
"./models/fine_tuned_retro-reader_intensive_5000", | |
"./models/fine_tuned_retro-reader_sketchy_1000" | |
] | |
DATA_DIR = './data' | |
# Load and preprocess data | |
squad_data = load_squad_data(DATA_DIR+ '/train-v1.1.json') | |
contexts, questions, answers = raw_preprocess_data(squad_data) | |
add_end_idx(answers, contexts) | |
data = create_dataframe(contexts, questions, answers) | |
# Function to generate a response with logging and custom content | |
def generate_response(message, chat_history, model_name, debug, rag, selected_context_window): | |
try: | |
if chat_history is None: | |
chat_history = [] | |
context = message | |
# Determine if the model is for question answering based on its name | |
is_question_answering = "electra_model" in model_name | |
# Initialize the tokenizer and model | |
if is_question_answering: | |
model = pipeline("question-answering", model=model_name, tokenizer=model_name, device=device) | |
else: | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
model.to(device) | |
# Append the new user message to the chat history | |
chat_history.append({"role": "user", "content": message}) | |
if is_question_answering: | |
if rag: | |
filtered_contexts = extract_and_filter_contexts(data, min_length=100, max_length=4000) | |
context = generate_rag_context(message, filtered_contexts, selected_context_window) | |
else: | |
context = "\n".join([turn["content"] for turn in chat_history if turn["role"] == "user"]) | |
if debug: | |
print("context:\n" + context) | |
print("message:\n" + message) | |
# Call the pipeline for question-answering | |
answer = model(question=message, context=context) | |
response = answer['answer'] | |
else: | |
# Prepare the conversation history for a regular chatbot | |
conversation = "" | |
for turn in chat_history: | |
if turn["role"] == "user": | |
conversation += f"User: {turn['content']}\n" | |
else: | |
conversation += f"Assistant: {turn['content']}\n" | |
if debug: | |
print("Conversation being sent to the model:\n", conversation) | |
# Encode the input and generate a response | |
inputs = tokenizer.encode(conversation + "Assistant:", return_tensors='pt').to(device) | |
outputs = model.generate( | |
inputs, | |
max_length=inputs.shape[1] + 100, | |
pad_token_id=tokenizer.eos_token_id, | |
do_sample=True, | |
top_p=0.95, | |
top_k=50, | |
temperature=0.7, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract the assistant's reply | |
response = response[len(conversation):].strip() | |
if "User:" in response: | |
response = response.split("User:")[0].strip() | |
# Append the assistant's response to the chat history | |
chat_history.append({"role": "assistant", "content": response}) | |
if debug: | |
print("Generated response:", response) | |
print("Configurations:") | |
print(f"Model Name: {model_name}") | |
print(f"Is Question Answering: {is_question_answering}") | |
print(f"RAG Enabled: {rag}") | |
print(f"Selected Context Window: {selected_context_window}") | |
# Return the updated chat history and the assistant's response | |
display_history = [[turn["content"], chat_history[i + 1]["content"]] for i, turn in enumerate(chat_history[:-1]) if turn["role"] == "user" and i + 1 < len(chat_history)] | |
return display_history, chat_history | |
except Exception as e: | |
# Capture the traceback details | |
error_message = f"An error occurred: {str(e)}" | |
detailed_error = traceback.format_exc() | |
chat_history.append({"role": "assistant", "content": error_message}) | |
if debug: | |
print("Error Details:\n", detailed_error) | |
# Ensure safe generation of the display history | |
try: | |
display_history = [[turn["content"], chat_history[i + 1]["content"]] for i, turn in enumerate(chat_history[:-1]) if turn["role"] == "user" and i + 1 < len(chat_history)] | |
except Exception as history_error: | |
if debug: | |
print("Error while generating display history:", str(history_error)) | |
display_history = [] | |
return display_history, chat_history | |
# Gradio Interface Configuration | |
def run_prod_chatbot(local=True): | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
<div style="text-align: center;"> | |
<h1><strong>SQuAD Q&A ChatBot</strong></h1> | |
<h3>Authors: <a href="https://github.com/zainnobody">Zain Ali</a> & <a href="https://github.com/AIBenHopwood/">Ben Hopwood</a></h3> | |
<p> | |
<a href="https://github.com/zainnobody/AAI-520-Final-Project" target="_blank">Code: GitHub link</a> | | |
<a href="https://huggingface.co/zainnobody/AAI-520-Final-Project-Models" target="_blank">Models: Huggingface link</a> | |
</p> | |
</div> | |
<div style="text-align: center;"> | |
<p> | |
This project aims to develop a chatbot capable of multi-turn, context-adaptive conversations across various topics, using the Stanford Question Answering Dataset (SQuAD) as the primary source for training. | |
</p> | |
</div> | |
<div style="text-align: center;"> | |
<h4>University of San Diego - AAI 520</h4> | |
</div> | |
""") | |
with gr.Row(variant="compact"): | |
model_dropdown = gr.Dropdown( | |
choices=electra_models + other_models, | |
label="Select Model", | |
value="./models/fine_tuned_electra_model_all" | |
) | |
# Column for Use RAG and Debug Mode checkboxes | |
with gr.Column(): | |
rag_checkbox = gr.Checkbox( | |
label="Use RAG", | |
value=True, | |
interactive=True | |
) | |
debug_checkbox = gr.Checkbox( | |
label="Debug Mode", | |
value=False | |
) | |
context_window_dropdown = gr.Dropdown( | |
choices=[1, 2, 3], | |
label="Select Context Window", | |
value=1 | |
) | |
# Commented out the is_question_answering_checkbox, making it auto detectable. Leaving this as a reminder that other models do not use pipeline | |
# is_question_answering_checkbox = gr.Checkbox( | |
# label="Use Question Answering (Electra Only)", | |
# value=True | |
# ) | |
chatbot = gr.Chatbot() | |
state = gr.State([]) | |
with gr.Row(): | |
# Textbox taking 75% of the space | |
msg = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter", scale=3) | |
# Send button taking 25% of the space and stretching full width | |
send_btn = gr.Button("Send", scale=1) | |
send_btn.click(lambda message, chat_history, model_name, debug, rag, selected_context_window: generate_response(message, chat_history, model_name, debug, rag, selected_context_window), | |
inputs=[msg, state, model_dropdown, debug_checkbox, rag_checkbox, context_window_dropdown], | |
outputs=[chatbot, state]) | |
msg.submit(lambda message, chat_history, model_name, debug, rag, selected_context_window: generate_response(message, chat_history, model_name, debug, rag, selected_context_window), | |
inputs=[msg, state, model_dropdown, debug_checkbox, rag_checkbox, context_window_dropdown], | |
outputs=[chatbot, state]) | |
if local: | |
demo.launch(share=True) | |
else: | |
demo.launch(server_name="0.0.0.0", server_port=None) | |
# Launch the Gradio app | |
run_prod_chatbot() |