Spaces:
No application file
No application file
testing something
Browse files- requirements.txt +22 -0
requirements.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from transformers import AutoTokenizer, AutoModelForQuestionAnsweringRegression
|
3 |
+
|
4 |
+
# Load your documents into a dictionary
|
5 |
+
with open("your_docs.json", "r") as f:
|
6 |
+
docs = json.load(f)
|
7 |
+
|
8 |
+
# Load the pre-trained question answering model
|
9 |
+
model = AutoModelForQuestionAnsweringRegression.from_pretrained("bert-large-uncased-whole-word-masking")
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking")
|
11 |
+
|
12 |
+
# Define a function to retrieve the answer to a question based on your documents
|
13 |
+
def get_answer(question):
|
14 |
+
# Tokenize the question and the documents
|
15 |
+
question_tokens = tokenizer(question, return_tensors="pt")
|
16 |
+
docs_tokens = [tokenizer(doc["text"], return_tensors="pt") for doc in docs.values()]
|
17 |
+
|
18 |
+
# Combine the documents and the question tokens into a single input
|
19 |
+
inputs = {
|
20 |
+
"input_ids": torch.cat([q["input_ids"] for q in docs_tokens + [question_tokens]], dim=0),
|
21 |
+
"attention_mask": torch.cat([q["attention_mask"] for q in docs_tokens + [question_tokens]], dim=0),
|
22 |
+
}
|