CesarLeblanc commited on
Commit
b1a0d53
1 Parent(s): d5ff4e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -1
app.py CHANGED
@@ -4,7 +4,6 @@ from datasets import load_dataset
4
  import requests
5
  from bs4 import BeautifulSoup
6
 
7
-
8
  classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
9
  mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
10
 
@@ -52,7 +51,30 @@ def return_species_image(species):
52
  image = gr.Image(value=image_url)
53
  return image
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def classification(text, typology, confidence):
 
56
  result = classification_model(text)
57
  habitat_label = result[0]['label']
58
  habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
@@ -62,6 +84,7 @@ def classification(text, typology, confidence):
62
  return formatted_output, image_output
63
 
64
  def masking(text):
 
65
  masked_text = text + ', [MASK] [MASK]'
66
  pred = mask_model(masked_text, top_k=1)
67
  new_species = [pred[i][0]['token_str'] for i in range(len(pred))]
 
4
  import requests
5
  from bs4 import BeautifulSoup
6
 
 
7
  classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
8
  mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
9
 
 
51
  image = gr.Image(value=image_url)
52
  return image
53
 
54
+ def gbif_normalization(text):
55
+ base = "https://api.gbif.org/v1"
56
+ api = "species"
57
+ function = "match"
58
+ parameter = "name"
59
+ url = f"{base}/{api}/{function}?{parameter}="
60
+ all_species = text.split(',')
61
+ all_species = [species.strip() for species in all_species]
62
+ species_gbif = []
63
+ for species in all_species:
64
+ url = url.replace(url.partition('name')[2], f'={species}')
65
+ r = requests.get(url)
66
+ r = r.json()
67
+ if 'species' in r:
68
+ r = r["species"]
69
+ else:
70
+ r = species
71
+ species_gbif.append(r)
72
+ text = ", ".join(species_gbif)
73
+ text = text.lower()
74
+ return text
75
+
76
  def classification(text, typology, confidence):
77
+ text = gbif_normalization(text)
78
  result = classification_model(text)
79
  habitat_label = result[0]['label']
80
  habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
 
84
  return formatted_output, image_output
85
 
86
  def masking(text):
87
+ text = gbif_normalization(text)
88
  masked_text = text + ', [MASK] [MASK]'
89
  pred = mask_model(masked_text, top_k=1)
90
  new_species = [pred[i][0]['token_str'] for i in range(len(pred))]