Spaces:
Running
Running
CesarLeblanc
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -5,10 +5,8 @@ import requests
|
|
5 |
from bs4 import BeautifulSoup
|
6 |
import random
|
7 |
|
8 |
-
classification_model = pipeline("text-classification", model="
|
9 |
-
mask_model = pipeline("fill-mask", model="
|
10 |
-
|
11 |
-
dataset = load_dataset("CesarLeblanc/plantbert_text_classification_dataset")
|
12 |
|
13 |
def return_text(habitat_label, habitat_score, confidence):
|
14 |
if habitat_score*100 > confidence:
|
@@ -78,7 +76,6 @@ def classification(text, typology, confidence):
|
|
78 |
text = gbif_normalization(text)
|
79 |
result = classification_model(text)
|
80 |
habitat_label = result[0]['label']
|
81 |
-
habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
|
82 |
habitat_score = result[0]['score']
|
83 |
formatted_output = return_text(habitat_label, habitat_score, confidence)
|
84 |
image_output = return_habitat_image(habitat_label, habitat_score, confidence)
|
@@ -86,26 +83,9 @@ def classification(text, typology, confidence):
|
|
86 |
|
87 |
def masking(text):
|
88 |
text = gbif_normalization(text)
|
89 |
-
masked_text = text + ', [MASK]
|
90 |
-
|
91 |
-
|
92 |
-
d["score"] += random.uniform(0, 0.1)
|
93 |
-
pred_genus.sort(key=lambda x: x["score"], reverse=True)
|
94 |
-
for i in range(3):
|
95 |
-
new_genus = pred_genus[i]['token_str']
|
96 |
-
masked_text = text + f', {new_genus} [MASK]'
|
97 |
-
pred_epithet = mask_model(masked_text, top_k=3)
|
98 |
-
for j in range(3):
|
99 |
-
new_epithet = pred_epithet[j]['token_str']
|
100 |
-
new_species = new_genus + ' ' + new_epithet
|
101 |
-
url_species = f"https://api.gbif.org/v1/species/match?name={new_species}"
|
102 |
-
r = requests.get(url_species)
|
103 |
-
r = r.json()
|
104 |
-
if new_species not in text and r["matchType"] != "NONE":
|
105 |
-
text = f"The last species from this vegetation plot is probably {new_species}."
|
106 |
-
image = return_species_image(new_species)
|
107 |
-
return text, image
|
108 |
-
text = f"We can't find the last species from this vegetation plot."
|
109 |
image = return_species_image(new_species)
|
110 |
return text, image
|
111 |
|
|
|
5 |
from bs4 import BeautifulSoup
|
6 |
import random
|
7 |
|
8 |
+
classification_model = pipeline("text-classification", model="plantbert_text_classification_model", tokenizer="plantbert_text_classification_model")
|
9 |
+
mask_model = pipeline("fill-mask", model="plantbert_fill_mask_model", tokenizer="plantbert_fill_mask_model")
|
|
|
|
|
10 |
|
11 |
def return_text(habitat_label, habitat_score, confidence):
|
12 |
if habitat_score*100 > confidence:
|
|
|
76 |
text = gbif_normalization(text)
|
77 |
result = classification_model(text)
|
78 |
habitat_label = result[0]['label']
|
|
|
79 |
habitat_score = result[0]['score']
|
80 |
formatted_output = return_text(habitat_label, habitat_score, confidence)
|
81 |
image_output = return_habitat_image(habitat_label, habitat_score, confidence)
|
|
|
83 |
|
84 |
def masking(text):
|
85 |
text = gbif_normalization(text)
|
86 |
+
masked_text = text + ', [MASK]'
|
87 |
+
pred = mask_model(masked_text)[0]
|
88 |
+
text = f"The last species from this vegetation plot is probably {pred}."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
image = return_species_image(new_species)
|
90 |
return text, image
|
91 |
|