Spaces:
Running
Running
CesarLeblanc
commited on
Commit
·
b1a0d53
1
Parent(s):
d5ff4e3
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ from datasets import load_dataset
|
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
|
7 |
-
|
8 |
classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
|
9 |
mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
|
10 |
|
@@ -52,7 +51,30 @@ def return_species_image(species):
|
|
52 |
image = gr.Image(value=image_url)
|
53 |
return image
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def classification(text, typology, confidence):
|
|
|
56 |
result = classification_model(text)
|
57 |
habitat_label = result[0]['label']
|
58 |
habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
|
@@ -62,6 +84,7 @@ def classification(text, typology, confidence):
|
|
62 |
return formatted_output, image_output
|
63 |
|
64 |
def masking(text):
|
|
|
65 |
masked_text = text + ', [MASK] [MASK]'
|
66 |
pred = mask_model(masked_text, top_k=1)
|
67 |
new_species = [pred[i][0]['token_str'] for i in range(len(pred))]
|
|
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
|
|
|
7 |
classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
|
8 |
mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
|
9 |
|
|
|
51 |
image = gr.Image(value=image_url)
|
52 |
return image
|
53 |
|
54 |
+
def gbif_normalization(text):
|
55 |
+
base = "https://api.gbif.org/v1"
|
56 |
+
api = "species"
|
57 |
+
function = "match"
|
58 |
+
parameter = "name"
|
59 |
+
url = f"{base}/{api}/{function}?{parameter}="
|
60 |
+
all_species = text.split(',')
|
61 |
+
all_species = [species.strip() for species in all_species]
|
62 |
+
species_gbif = []
|
63 |
+
for species in all_species:
|
64 |
+
url = url.replace(url.partition('name')[2], f'={species}')
|
65 |
+
r = requests.get(url)
|
66 |
+
r = r.json()
|
67 |
+
if 'species' in r:
|
68 |
+
r = r["species"]
|
69 |
+
else:
|
70 |
+
r = species
|
71 |
+
species_gbif.append(r)
|
72 |
+
text = ", ".join(species_gbif)
|
73 |
+
text = text.lower()
|
74 |
+
return text
|
75 |
+
|
76 |
def classification(text, typology, confidence):
|
77 |
+
text = gbif_normalization(text)
|
78 |
result = classification_model(text)
|
79 |
habitat_label = result[0]['label']
|
80 |
habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
|
|
|
84 |
return formatted_output, image_output
|
85 |
|
86 |
def masking(text):
|
87 |
+
text = gbif_normalization(text)
|
88 |
masked_text = text + ', [MASK] [MASK]'
|
89 |
pred = mask_model(masked_text, top_k=1)
|
90 |
new_species = [pred[i][0]['token_str'] for i in range(len(pred))]
|