vives's picture
Create app.py
cc3cff0
raw
history blame
622 Bytes
from transformers import AutoModelForMaskedLM
from transformers import AutoTokenizer
import spacy
import pytextrank
model_checkpoint = "vives/distilbert-base-uncased-finetuned-cvent-2019_2022"
model = AutoModelForMaskedLM.from_pretrained(model_checkpoint, output_hidden_states=True)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
FILT_GROUPS = ["CARDINAL", "TIME", "DATE", "PERCENT", "MONEY", "QUANTITY", "ORDINAL"]
POS = ["NOUN", "PROPN", "VERB"]
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("textrank", last=True, config={"pos_kept": POS, "token_lookback": 3})
all_stopwords = nlp.Defaults.stop_words