CesarLeblanc commited on
Commit
24390e2
1 Parent(s): 4270278

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -1
app.py CHANGED
@@ -82,10 +82,50 @@ def classification(text, typology, confidence):
82
 
83
  def masking(text):
84
  text = gbif_normalization(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  masked_text = text + ', [MASK]'
86
  pred = mask_model(masked_text)[0]
87
  new_species = pred['token_str']
88
- text = f"The last species from this vegetation plot is probably {new_species}."
89
  image = return_species_image(new_species)
90
  return text, image
91
 
 
82
 
83
  def masking(text):
84
  text = gbif_normalization(text)
85
+
86
+ max_score = 0
87
+ best_prediction = None
88
+ best_position = None
89
+
90
+ # Case for the first position
91
+ masked_text = "[MASK], " + ', '.join(text.split(', '))
92
+ prediction = mask_model(masked_text)[0]
93
+ species = prediction['token_str']
94
+ score = prediction['score']
95
+
96
+ if score > max_score:
97
+ max_score = score
98
+ best_prediction = species
99
+ best_position = 0
100
+
101
+ # Loop through each position in the middle of the sentence
102
+ for i in range(1, len(text.split(', '))):
103
+ masked_text = ', '.join(text.split(', ')[:i]) + ', [MASK], ' + ', '.join(text.split(', ')[i:])
104
+ prediction = mask_model(masked_text)[0]
105
+ species = prediction['token_str']
106
+ score = prediction['score']
107
+
108
+ # Update best prediction and position if score is higher
109
+ if score > max_score:
110
+ max_score = score
111
+ best_prediction = species
112
+ best_position = i
113
+
114
+ # Case for the last position
115
+ masked_text = ', '.join(text.split(', ')) + ', [MASK]'
116
+ prediction = mask_model(masked_text)[0]
117
+ species = prediction['token_str']
118
+ score = prediction['score']
119
+
120
+ if score > max_score:
121
+ max_score = score
122
+ best_prediction = species
123
+ best_position = len(text.split(', '))
124
+
125
  masked_text = text + ', [MASK]'
126
  pred = mask_model(masked_text)[0]
127
  new_species = pred['token_str']
128
+ text = f"The most likely missing species in position {best_position} is: {best_species}".
129
  image = return_species_image(new_species)
130
  return text, image
131