Spaces:

CesarLeblanc
/

plantbert_space

Running

App Files Files Community

CesarLeblanc commited on Feb 9, 2024

Commit

24390e2

verified ·

1 Parent(s): 4270278

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -1

app.py CHANGED Viewed

@@ -82,10 +82,50 @@ def classification(text, typology, confidence):
 def masking(text):
     text = gbif_normalization(text)
     masked_text = text + ', [MASK]'
     pred = mask_model(masked_text)[0]
     new_species = pred['token_str']
-    text = f"The last species from this vegetation plot is probably {new_species}."
     image = return_species_image(new_species)
     return text, image

 def masking(text):
     text = gbif_normalization(text)
+    max_score = 0
+    best_prediction = None
+    best_position = None
+    # Case for the first position
+    masked_text = "[MASK], " + ', '.join(text.split(', '))
+    prediction = mask_model(masked_text)[0]
+    species = prediction['token_str']
+    score = prediction['score']
+    if score > max_score:
+        max_score = score
+        best_prediction = species
+        best_position = 0
+    # Loop through each position in the middle of the sentence
+    for i in range(1, len(text.split(', '))):
+        masked_text = ', '.join(text.split(', ')[:i]) + ', [MASK], ' + ', '.join(text.split(', ')[i:])
+        prediction = mask_model(masked_text)[0]
+        species = prediction['token_str']
+        score = prediction['score']
+        # Update best prediction and position if score is higher
+        if score > max_score:
+            max_score = score
+            best_prediction = species
+            best_position = i
+    # Case for the last position
+    masked_text = ', '.join(text.split(', ')) + ', [MASK]'
+    prediction = mask_model(masked_text)[0]
+    species = prediction['token_str']
+    score = prediction['score']
+    if score > max_score:
+        max_score = score
+        best_prediction = species
+        best_position = len(text.split(', '))
     masked_text = text + ', [MASK]'
     pred = mask_model(masked_text)[0]
     new_species = pred['token_str']
+    text = f"The most likely missing species in position {best_position} is: {best_species}".
     image = return_species_image(new_species)
     return text, image