CesarLeblanc commited on
Commit
edd2cf0
·
verified ·
1 Parent(s): f9dd18b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -25
app.py CHANGED
@@ -5,10 +5,8 @@ import requests
5
  from bs4 import BeautifulSoup
6
  import random
7
 
8
- classification_model = pipeline("text-classification", model="CesarLeblanc/plantbert_text_classification_model")
9
- mask_model = pipeline("fill-mask", model="CesarLeblanc/plantbert_fill_mask_model")
10
-
11
- dataset = load_dataset("CesarLeblanc/plantbert_text_classification_dataset")
12
 
13
  def return_text(habitat_label, habitat_score, confidence):
14
  if habitat_score*100 > confidence:
@@ -78,7 +76,6 @@ def classification(text, typology, confidence):
78
  text = gbif_normalization(text)
79
  result = classification_model(text)
80
  habitat_label = result[0]['label']
81
- habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
82
  habitat_score = result[0]['score']
83
  formatted_output = return_text(habitat_label, habitat_score, confidence)
84
  image_output = return_habitat_image(habitat_label, habitat_score, confidence)
@@ -86,26 +83,9 @@ def classification(text, typology, confidence):
86
 
87
  def masking(text):
88
  text = gbif_normalization(text)
89
- masked_text = text + ', [MASK] [MASK]'
90
- pred_genus = mask_model(masked_text, top_k=10)[0]
91
- for d in pred_genus:
92
- d["score"] += random.uniform(0, 0.1)
93
- pred_genus.sort(key=lambda x: x["score"], reverse=True)
94
- for i in range(3):
95
- new_genus = pred_genus[i]['token_str']
96
- masked_text = text + f', {new_genus} [MASK]'
97
- pred_epithet = mask_model(masked_text, top_k=3)
98
- for j in range(3):
99
- new_epithet = pred_epithet[j]['token_str']
100
- new_species = new_genus + ' ' + new_epithet
101
- url_species = f"https://api.gbif.org/v1/species/match?name={new_species}"
102
- r = requests.get(url_species)
103
- r = r.json()
104
- if new_species not in text and r["matchType"] != "NONE":
105
- text = f"The last species from this vegetation plot is probably {new_species}."
106
- image = return_species_image(new_species)
107
- return text, image
108
- text = f"We can't find the last species from this vegetation plot."
109
  image = return_species_image(new_species)
110
  return text, image
111
 
 
5
  from bs4 import BeautifulSoup
6
  import random
7
 
8
+ classification_model = pipeline("text-classification", model="plantbert_text_classification_model", tokenizer="plantbert_text_classification_model")
9
+ mask_model = pipeline("fill-mask", model="plantbert_fill_mask_model", tokenizer="plantbert_fill_mask_model")
 
 
10
 
11
  def return_text(habitat_label, habitat_score, confidence):
12
  if habitat_score*100 > confidence:
 
76
  text = gbif_normalization(text)
77
  result = classification_model(text)
78
  habitat_label = result[0]['label']
 
79
  habitat_score = result[0]['score']
80
  formatted_output = return_text(habitat_label, habitat_score, confidence)
81
  image_output = return_habitat_image(habitat_label, habitat_score, confidence)
 
83
 
84
  def masking(text):
85
  text = gbif_normalization(text)
86
+ masked_text = text + ', [MASK]'
87
+ pred = mask_model(masked_text)[0]
88
+ text = f"The last species from this vegetation plot is probably {pred}."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  image = return_species_image(new_species)
90
  return text, image
91