Spaces:
No application file
No application file
import json | |
from transformers import AutoTokenizer, AutoModelForQuestionAnsweringRegression | |
# Load your documents into a dictionary | |
with open("your_docs.json", "r") as f: | |
docs = json.load(f) | |
# Load the pre-trained question answering model | |
model = AutoModelForQuestionAnsweringRegression.from_pretrained("bert-large-uncased-whole-word-masking") | |
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking") | |
# Define a function to retrieve the answer to a question based on your documents | |
def get_answer(question): | |
# Tokenize the question and the documents | |
question_tokens = tokenizer(question, return_tensors="pt") | |
docs_tokens = [tokenizer(doc["text"], return_tensors="pt") for doc in docs.values()] | |
# Combine the documents and the question tokens into a single input | |
inputs = { | |
"input_ids": torch.cat([q["input_ids"] for q in docs_tokens + [question_tokens]], dim=0), | |
"attention_mask": torch.cat([q["attention_mask"] for q in docs_tokens + [question_tokens]], dim=0), | |
} |