Spaces:

CesarLeblanc
/

plantbert_space

Running

CesarLeblanc commited on Dec 1, 2023

Commit

20742e4

1 Parent(s): 94ce85f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,9 @@ from transformers import pipeline
 from datasets import load_dataset
 import requests
 from bs4 import BeautifulSoup
-classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
 mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
 dataset = load_dataset("CesarLeblanc/text_classification_dataset")
@@ -86,9 +87,12 @@ def classification(text, typology, confidence):
 def masking(text):
     text = gbif_normalization(text)
     masked_text = text + ', [MASK] [MASK]'
-    pred_genus = mask_model(masked_text, top_k=3)
     for i in range(3):
-        new_genus = pred_genus[0][i]['token_str']
         masked_text = text + f', {new_genus} [MASK]'
         pred_epithet = mask_model(masked_text, top_k=3)
         for j in range(3):

 from datasets import load_dataset
 import requests
 from bs4 import BeautifulSoup
+import random
+classification_model = pipeline("text-classification", model="CesarLeblanc/text_classification_model")
 mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
 dataset = load_dataset("CesarLeblanc/text_classification_dataset")
 def masking(text):
     text = gbif_normalization(text)
     masked_text = text + ', [MASK] [MASK]'
+    pred_genus = mask_model(masked_text, top_k=10)[0]
+    for d in pred_genus:
+        d["score"] += random.uniform(0, 0.1)
+    pred_genus.sort(key=lambda x: x["score"], reverse=True)
     for i in range(3):
+        new_genus = pred_genus[i]['token_str']
         masked_text = text + f', {new_genus} [MASK]'
         pred_epithet = mask_model(masked_text, top_k=3)
         for j in range(3):