Spaces:

owaiskha9654
/

Multi-Label-Classification-of-Pubmed-Articles

Runtime error

App Files Files Community

owaiskha9654 commited on Jul 31, 2022

Commit

cf8a311

•

1 Parent(s): 0058211

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -48

app.py CHANGED Viewed

@@ -14,57 +14,28 @@ num_labels=14
 def Multi_Label_Classification_of_Pubmed_Articles(model_input: str) -> Dict[str, float]:
-    # tokenized = tokenizer.tokenize_and_pad(model_input)
     # Encoding input data
-    max_length = 128
-    Articles_test = list(model_input)
-    test_encodings = tokenizer.batch_encode_plus(Articles_test,max_length=max_length,padding=True,truncation=True)
-    test_input_ids = test_encodings['input_ids']
-    test_attention_masks = test_encodings['attention_mask']
-        # Make tensors out of data
-    test_inputs = torch.tensor(test_input_ids)
-    #test_labels = torch.tensor(test_labels)
-    test_masks = torch.tensor(test_attention_masks)
-    # Create test dataloader
-    test_data = TensorDataset(test_inputs, test_masks, )#test_labels, test_token_types)
-    test_sampler = SequentialSampler(test_data)
-    test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
-    # Put model in evaluation mode to evaluate loss on the validation set
-    model.eval()
-    #track variables
-    logit_preds,pred_labels,tokenized_texts = [],[],[]
-    # Predict
-    for i, batch in enumerate(test_dataloader):
-        batch = tuple(t.to(device) for t in batch)
-        # Unpack the inputs from our dataloader
-        b_input_ids, b_input_mask, b_labels, = batch
-        with torch.no_grad():
-            # Forward pass
-            outs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
-            b_logit_pred = outs[0]
-            pred_label = torch.sigmoid(b_logit_pred)
-            b_logit_pred = b_logit_pred.detach().cpu().numpy()
-            pred_label = pred_label.to('cpu').numpy()
-            b_labels = b_labels.to('cpu').numpy()
-        tokenized_texts.append(b_input_ids)
-        logit_preds.append(b_logit_pred)
-        #true_labels.append(b_labels)
-        pred_labels.append(pred_label)
-    # Flatten outputs
-    tokenized_texts = [item for sublist in tokenized_texts for item in sublist]
-    pred_labels = [item for sublist in pred_labels for item in sublist]
-#     true_labels = [item for sublist in true_labels for item in sublist]
-    # Converting flattened binary values to boolean values
-#     true_bools = [tl==1 for tl in true_labels]
     #prediction = model.predict(tokenized)[0]
     #ret = {
     #    "negative": float(prediction[0]),
     #    "positive": float(prediction[1])

 def Multi_Label_Classification_of_Pubmed_Articles(model_input: str) -> Dict[str, float]:
     # Encoding input data
+    encodings = tokenizer.batch_encode_plus(Article_train[0],max_length=1024,padding=True,truncation=True) # tokenizer's encoding method
+    outs = model(torch.tensor(encodings['input_ids']).cuda(), token_type_ids=None, attention_mask=torch.tensor(encodings['attention_mask']).cuda())
+    b_logit_pred = outs[0]
+    pred_label = torch.sigmoid(b_logit_pred)
     #prediction = model.predict(tokenized)[0]
+    ret ={
+        "Anatomy [A]": float(pred_label[0][0]),
+        "Organisms [B]": float(pred_label[0][1]),
+        "Diseases [C]": float(pred_label[0][2]),
+        "Chemicals and Drugs [D]": float(pred_label[0][3]),
+        "Analytical, Diagnostic and Therapeutic Techniques, and Equipment [E]": float(pred_label[0][4]),
+        "Psychiatry and Psychology [F]": float(pred_label[0][5]),
+        "Phenomena and Processes [G]": float(pred_label[0][6]),
+        "Disciplines and Occupations [H]": float(pred_label[0][7]),
+        "Anthropology, Education, Sociology, and Social Phenomena [I]": float(pred_label[0][8]),
+        "Technology, Industry, and Agriculture [J]": float(pred_label[0][9]),
+        "Information Science [L]": float(pred_label[0][10]),
+        "Named Groups [M]": float(pred_label[0][11]),
+        "Health Care [N]": float(pred_label[0][12]),
+        "Geographicals [Z]": float(pred_label[0][13])}
     #ret = {
     #    "negative": float(prediction[0]),
     #    "positive": float(prediction[1])