CesarLeblanc commited on
Commit
20742e4
1 Parent(s): 94ce85f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -3,8 +3,9 @@ from transformers import pipeline
3
  from datasets import load_dataset
4
  import requests
5
  from bs4 import BeautifulSoup
 
6
 
7
- classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
8
  mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
9
 
10
  dataset = load_dataset("CesarLeblanc/text_classification_dataset")
@@ -86,9 +87,12 @@ def classification(text, typology, confidence):
86
  def masking(text):
87
  text = gbif_normalization(text)
88
  masked_text = text + ', [MASK] [MASK]'
89
- pred_genus = mask_model(masked_text, top_k=3)
 
 
 
90
  for i in range(3):
91
- new_genus = pred_genus[0][i]['token_str']
92
  masked_text = text + f', {new_genus} [MASK]'
93
  pred_epithet = mask_model(masked_text, top_k=3)
94
  for j in range(3):
 
3
  from datasets import load_dataset
4
  import requests
5
  from bs4 import BeautifulSoup
6
+ import random
7
 
8
+ classification_model = pipeline("text-classification", model="CesarLeblanc/text_classification_model")
9
  mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
10
 
11
  dataset = load_dataset("CesarLeblanc/text_classification_dataset")
 
87
  def masking(text):
88
  text = gbif_normalization(text)
89
  masked_text = text + ', [MASK] [MASK]'
90
+ pred_genus = mask_model(masked_text, top_k=10)[0]
91
+ for d in pred_genus:
92
+ d["score"] += random.uniform(0, 0.1)
93
+ pred_genus.sort(key=lambda x: x["score"], reverse=True)
94
  for i in range(3):
95
+ new_genus = pred_genus[i]['token_str']
96
  masked_text = text + f', {new_genus} [MASK]'
97
  pred_epithet = mask_model(masked_text, top_k=3)
98
  for j in range(3):