owaiskha9654
commited on
Commit
β’
cf8a311
1
Parent(s):
0058211
Update app.py
Browse files
app.py
CHANGED
@@ -14,57 +14,28 @@ num_labels=14
|
|
14 |
|
15 |
def Multi_Label_Classification_of_Pubmed_Articles(model_input: str) -> Dict[str, float]:
|
16 |
|
17 |
-
# tokenized = tokenizer.tokenize_and_pad(model_input)
|
18 |
# Encoding input data
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
test_attention_masks = test_encodings['attention_mask']
|
24 |
-
# Make tensors out of data
|
25 |
-
test_inputs = torch.tensor(test_input_ids)
|
26 |
-
#test_labels = torch.tensor(test_labels)
|
27 |
-
test_masks = torch.tensor(test_attention_masks)
|
28 |
-
# Create test dataloader
|
29 |
-
test_data = TensorDataset(test_inputs, test_masks, )#test_labels, test_token_types)
|
30 |
-
test_sampler = SequentialSampler(test_data)
|
31 |
-
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
|
32 |
|
33 |
-
# Put model in evaluation mode to evaluate loss on the validation set
|
34 |
-
model.eval()
|
35 |
-
|
36 |
-
#track variables
|
37 |
-
logit_preds,pred_labels,tokenized_texts = [],[],[]
|
38 |
-
|
39 |
-
# Predict
|
40 |
-
for i, batch in enumerate(test_dataloader):
|
41 |
-
batch = tuple(t.to(device) for t in batch)
|
42 |
-
# Unpack the inputs from our dataloader
|
43 |
-
b_input_ids, b_input_mask, b_labels, = batch
|
44 |
-
with torch.no_grad():
|
45 |
-
# Forward pass
|
46 |
-
outs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
|
47 |
-
b_logit_pred = outs[0]
|
48 |
-
pred_label = torch.sigmoid(b_logit_pred)
|
49 |
-
|
50 |
-
b_logit_pred = b_logit_pred.detach().cpu().numpy()
|
51 |
-
pred_label = pred_label.to('cpu').numpy()
|
52 |
-
b_labels = b_labels.to('cpu').numpy()
|
53 |
-
|
54 |
-
tokenized_texts.append(b_input_ids)
|
55 |
-
logit_preds.append(b_logit_pred)
|
56 |
-
#true_labels.append(b_labels)
|
57 |
-
pred_labels.append(pred_label)
|
58 |
-
|
59 |
-
# Flatten outputs
|
60 |
-
tokenized_texts = [item for sublist in tokenized_texts for item in sublist]
|
61 |
-
pred_labels = [item for sublist in pred_labels for item in sublist]
|
62 |
-
# true_labels = [item for sublist in true_labels for item in sublist]
|
63 |
-
# Converting flattened binary values to boolean values
|
64 |
-
# true_bools = [tl==1 for tl in true_labels]
|
65 |
-
|
66 |
-
|
67 |
#prediction = model.predict(tokenized)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
#ret = {
|
69 |
# "negative": float(prediction[0]),
|
70 |
# "positive": float(prediction[1])
|
|
|
14 |
|
15 |
def Multi_Label_Classification_of_Pubmed_Articles(model_input: str) -> Dict[str, float]:
|
16 |
|
|
|
17 |
# Encoding input data
|
18 |
+
encodings = tokenizer.batch_encode_plus(Article_train[0],max_length=1024,padding=True,truncation=True) # tokenizer's encoding method
|
19 |
+
outs = model(torch.tensor(encodings['input_ids']).cuda(), token_type_ids=None, attention_mask=torch.tensor(encodings['attention_mask']).cuda())
|
20 |
+
b_logit_pred = outs[0]
|
21 |
+
pred_label = torch.sigmoid(b_logit_pred)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
#prediction = model.predict(tokenized)[0]
|
24 |
+
ret ={
|
25 |
+
"Anatomy [A]": float(pred_label[0][0]),
|
26 |
+
"Organisms [B]": float(pred_label[0][1]),
|
27 |
+
"Diseases [C]": float(pred_label[0][2]),
|
28 |
+
"Chemicals and Drugs [D]": float(pred_label[0][3]),
|
29 |
+
"Analytical, Diagnostic and Therapeutic Techniques, and Equipment [E]": float(pred_label[0][4]),
|
30 |
+
"Psychiatry and Psychology [F]": float(pred_label[0][5]),
|
31 |
+
"Phenomena and Processes [G]": float(pred_label[0][6]),
|
32 |
+
"Disciplines and Occupations [H]": float(pred_label[0][7]),
|
33 |
+
"Anthropology, Education, Sociology, and Social Phenomena [I]": float(pred_label[0][8]),
|
34 |
+
"Technology, Industry, and Agriculture [J]": float(pred_label[0][9]),
|
35 |
+
"Information Science [L]": float(pred_label[0][10]),
|
36 |
+
"Named Groups [M]": float(pred_label[0][11]),
|
37 |
+
"Health Care [N]": float(pred_label[0][12]),
|
38 |
+
"Geographicals [Z]": float(pred_label[0][13])}
|
39 |
#ret = {
|
40 |
# "negative": float(prediction[0]),
|
41 |
# "positive": float(prediction[1])
|