Spaces:
Runtime error
Runtime error
CesarLeblanc
commited on
Commit
•
20742e4
1
Parent(s):
94ce85f
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,9 @@ from transformers import pipeline
|
|
3 |
from datasets import load_dataset
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
|
|
6 |
|
7 |
-
classification_model = pipeline("text-classification", model="CesarLeblanc/
|
8 |
mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
|
9 |
|
10 |
dataset = load_dataset("CesarLeblanc/text_classification_dataset")
|
@@ -86,9 +87,12 @@ def classification(text, typology, confidence):
|
|
86 |
def masking(text):
|
87 |
text = gbif_normalization(text)
|
88 |
masked_text = text + ', [MASK] [MASK]'
|
89 |
-
pred_genus = mask_model(masked_text, top_k=
|
|
|
|
|
|
|
90 |
for i in range(3):
|
91 |
-
new_genus = pred_genus[
|
92 |
masked_text = text + f', {new_genus} [MASK]'
|
93 |
pred_epithet = mask_model(masked_text, top_k=3)
|
94 |
for j in range(3):
|
|
|
3 |
from datasets import load_dataset
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
+
import random
|
7 |
|
8 |
+
classification_model = pipeline("text-classification", model="CesarLeblanc/text_classification_model")
|
9 |
mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
|
10 |
|
11 |
dataset = load_dataset("CesarLeblanc/text_classification_dataset")
|
|
|
87 |
def masking(text):
|
88 |
text = gbif_normalization(text)
|
89 |
masked_text = text + ', [MASK] [MASK]'
|
90 |
+
pred_genus = mask_model(masked_text, top_k=10)[0]
|
91 |
+
for d in pred_genus:
|
92 |
+
d["score"] += random.uniform(0, 0.1)
|
93 |
+
pred_genus.sort(key=lambda x: x["score"], reverse=True)
|
94 |
for i in range(3):
|
95 |
+
new_genus = pred_genus[i]['token_str']
|
96 |
masked_text = text + f', {new_genus} [MASK]'
|
97 |
pred_epithet = mask_model(masked_text, top_k=3)
|
98 |
for j in range(3):
|