import json from transformers import AutoTokenizer, AutoModelForQuestionAnsweringRegression # Load your documents into a dictionary with open("your_docs.json", "r") as f: docs = json.load(f) # Load the pre-trained question answering model model = AutoModelForQuestionAnsweringRegression.from_pretrained("bert-large-uncased-whole-word-masking") tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking") # Define a function to retrieve the answer to a question based on your documents def get_answer(question): # Tokenize the question and the documents question_tokens = tokenizer(question, return_tensors="pt") docs_tokens = [tokenizer(doc["text"], return_tensors="pt") for doc in docs.values()] # Combine the documents and the question tokens into a single input inputs = { "input_ids": torch.cat([q["input_ids"] for q in docs_tokens + [question_tokens]], dim=0), "attention_mask": torch.cat([q["attention_mask"] for q in docs_tokens + [question_tokens]], dim=0), }