Spaces:

CesarLeblanc
/

plantbert_space

Running

App Files Files Community

CesarLeblanc commited on Feb 7, 2024

Commit

edd2cf0

verified ·

1 Parent(s): f9dd18b

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -25

app.py CHANGED Viewed

@@ -5,10 +5,8 @@ import requests
 from bs4 import BeautifulSoup
 import random
-classification_model = pipeline("text-classification", model="CesarLeblanc/plantbert_text_classification_model")
-mask_model = pipeline("fill-mask", model="CesarLeblanc/plantbert_fill_mask_model")
-dataset = load_dataset("CesarLeblanc/plantbert_text_classification_dataset")
 def return_text(habitat_label, habitat_score, confidence):
     if habitat_score*100 > confidence:
@@ -78,7 +76,6 @@ def classification(text, typology, confidence):
     text = gbif_normalization(text)
     result = classification_model(text)
     habitat_label = result[0]['label']
-    habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
     habitat_score = result[0]['score']
     formatted_output = return_text(habitat_label, habitat_score, confidence)
     image_output = return_habitat_image(habitat_label, habitat_score, confidence)
@@ -86,26 +83,9 @@ def classification(text, typology, confidence):
 def masking(text):
     text = gbif_normalization(text)
-    masked_text = text + ', [MASK] [MASK]'
-    pred_genus = mask_model(masked_text, top_k=10)[0]
-    for d in pred_genus:
-        d["score"] += random.uniform(0, 0.1)
-    pred_genus.sort(key=lambda x: x["score"], reverse=True)
-    for i in range(3):
-        new_genus = pred_genus[i]['token_str']
-        masked_text = text + f', {new_genus} [MASK]'
-        pred_epithet = mask_model(masked_text, top_k=3)
-        for j in range(3):
-            new_epithet = pred_epithet[j]['token_str']
-            new_species = new_genus + ' ' + new_epithet
-            url_species = f"https://api.gbif.org/v1/species/match?name={new_species}"
-            r = requests.get(url_species)
-            r = r.json()
-            if new_species not in text and r["matchType"] != "NONE":
-                text = f"The last species from this vegetation plot is probably {new_species}."
-                image = return_species_image(new_species)
-                return text, image
-    text = f"We can't find the last species from this vegetation plot."
     image = return_species_image(new_species)
     return text, image

 from bs4 import BeautifulSoup
 import random
+classification_model = pipeline("text-classification", model="plantbert_text_classification_model", tokenizer="plantbert_text_classification_model")
+mask_model = pipeline("fill-mask", model="plantbert_fill_mask_model", tokenizer="plantbert_fill_mask_model")
 def return_text(habitat_label, habitat_score, confidence):
     if habitat_score*100 > confidence:
     text = gbif_normalization(text)
     result = classification_model(text)
     habitat_label = result[0]['label']
     habitat_score = result[0]['score']
     formatted_output = return_text(habitat_label, habitat_score, confidence)
     image_output = return_habitat_image(habitat_label, habitat_score, confidence)
 def masking(text):
     text = gbif_normalization(text)
+    masked_text = text + ', [MASK]'
+    pred = mask_model(masked_text)[0]
+    text = f"The last species from this vegetation plot is probably {pred}."
     image = return_species_image(new_species)
     return text, image