import os import openai import torch import tensorflow as tf from transformers import AutoTokenizer, TFAutoModelForQuestionAnswering import gradio as gr import re # Set your OpenAI API key here temporarily for testing openai.api_key = os.getenv("OPENAI_API_KEY") # Check if GPU is available and use it if possible device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load the English models and tokenizers qa_model_name_v1 = 'salsarra/ConfliBERT-QA' qa_model_v1 = TFAutoModelForQuestionAnswering.from_pretrained(qa_model_name_v1) qa_tokenizer_v1 = AutoTokenizer.from_pretrained(qa_model_name_v1) bert_model_name_v1 = 'salsarra/BERT-base-cased-SQuAD-v1' bert_qa_model_v1 = TFAutoModelForQuestionAnswering.from_pretrained(bert_model_name_v1) bert_qa_tokenizer_v1 = AutoTokenizer.from_pretrained(bert_model_name_v1) # Load Spanish models and tokenizers confli_model_spanish = 'salsarra/ConfliBERT-Spanish-Beto-Cased-NewsQA' confli_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(confli_model_spanish) confli_tokenizer_spanish = AutoTokenizer.from_pretrained(confli_model_spanish) beto_model_spanish = 'salsarra/Beto-Spanish-Cased-NewsQA' beto_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(beto_model_spanish) beto_tokenizer_spanish = AutoTokenizer.from_pretrained(beto_model_spanish) # Load the newly added models for Spanish (Beto and ConfliBERT SQAC) confli_sqac_model_spanish = 'salsarra/ConfliBERT-Spanish-Beto-Cased-SQAC' confli_sqac_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(confli_sqac_model_spanish) confli_sqac_tokenizer_spanish = AutoTokenizer.from_pretrained(confli_sqac_model_spanish) beto_sqac_model_spanish = 'salsarra/Beto-Spanish-Cased-SQAC' beto_sqac_model_spanish_qa = TFAutoModelForQuestionAnswering.from_pretrained(beto_sqac_model_spanish) beto_sqac_tokenizer_spanish = AutoTokenizer.from_pretrained(beto_sqac_model_spanish) # Define error handling to separate input size errors from other issues def handle_error_message(e, default_limit=512): error_message = str(e) pattern = re.compile(r"The size of tensor a \\((\\d+)\\) must match the size of tensor b \\((\\d+)\\)") match = pattern.search(error_message) if match: number_1, number_2 = match.groups() return f"Error: Text Input is over limit where inserted text size {number_1} is larger than model limits of {number_2}" pattern_qa = re.compile(r"indices\\[0,(\\d+)\\] = \\d+ is not in \\[0, (\\d+)\\)") match_qa = pattern_qa.search(error_message) if match_qa: number_1, number_2 = match_qa.groups() return f"Error: Text Input is over limit where inserted text size {number_1} is larger than model limits of {number_2}" return f"Error: {error_message}" # Main comparison function with language selection def compare_question_answering(language, context, question): if language == "English": confli_answer_v1 = question_answering_v1(context, question) bert_answer_v1 = bert_question_answering_v1(context, question) chatgpt_answer = chatgpt_question_answering(context, question) return f"""