File size: 1,038 Bytes
3197d2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import json
from transformers import AutoTokenizer, AutoModelForQuestionAnsweringRegression

# Load your documents into a dictionary
with open("your_docs.json", "r") as f:
    docs = json.load(f)

# Load the pre-trained question answering model
model = AutoModelForQuestionAnsweringRegression.from_pretrained("bert-large-uncased-whole-word-masking")
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking")

# Define a function to retrieve the answer to a question based on your documents
def get_answer(question):
    # Tokenize the question and the documents
    question_tokens = tokenizer(question, return_tensors="pt")
    docs_tokens = [tokenizer(doc["text"], return_tensors="pt") for doc in docs.values()]

    # Combine the documents and the question tokens into a single input
    inputs = {
        "input_ids": torch.cat([q["input_ids"] for q in docs_tokens + [question_tokens]], dim=0),
        "attention_mask": torch.cat([q["attention_mask"] for q in docs_tokens + [question_tokens]], dim=0),
    }