rsss0 commited on
Commit
2f7f9fa
·
1 Parent(s): 4e00d37

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -0
app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, BigBirdForQuestionAnswering
3
+ from datasets import load_dataset
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("google/bigbird-roberta-base")
6
+ model = BigBirdForQuestionAnswering.from_pretrained("google/bigbird-roberta-base")
7
+ squad_ds = load_dataset("squad_v2", split="train")
8
+ # select random article and question
9
+ LONG_ARTICLE = squad_ds[81514]["context"]
10
+ QUESTION = squad_ds[81514]["question"]
11
+ QUESTION
12
+
13
+ inputs = tokenizer(QUESTION, LONG_ARTICLE, return_tensors="pt")
14
+ # long article and question input
15
+ list(inputs["input_ids"].shape)
16
+
17
+ with torch.no_grad():
18
+ outputs = model(**inputs)
19
+
20
+ answer_start_index = outputs.start_logits.argmax()
21
+ answer_end_index = outputs.end_logits.argmax()
22
+ predict_answer_token_ids = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
23
+ predict_answer_token = tokenizer.decode(predict_answer_token_ids)