ak5005 commited on
Commit
c1e64ce
·
1 Parent(s): edb1927

Remove spacy download step; use pip-installed model

Browse files
Files changed (2) hide show
  1. categories/accuracy.py +1 -1
  2. categories/fluency.py +1 -1
categories/accuracy.py CHANGED
@@ -10,7 +10,7 @@ from transformers import AutoModel, AutoTokenizer
10
  # setup global variables on import (bad practice, but whatever)
11
  # --------------------------------------------------------------
12
 
13
- aligner = SentenceAligner(model="bert-base-multilingual-cased", layer=6)
14
 
15
  de_encoder = LaserEncoderPipeline(lang="deu_Latn")
16
  en_encoder = LaserEncoderPipeline(lang="eng_Latn")
 
10
  # setup global variables on import (bad practice, but whatever)
11
  # --------------------------------------------------------------
12
 
13
+ aligner = SentenceAligner(model="xlm-roberta-base", layer=6)
14
 
15
  de_encoder = LaserEncoderPipeline(lang="deu_Latn")
16
  en_encoder = LaserEncoderPipeline(lang="eng_Latn")
categories/fluency.py CHANGED
@@ -12,7 +12,7 @@ from transformers import AutoModelForMaskedLM, AutoTokenizer
12
  tool = language_tool_python.LanguageTool("en-US")
13
 
14
  # masked language model and tokenizer from huggingface
15
- model_name = "distilbert-base-multilingual-cased"
16
  model = AutoModelForMaskedLM.from_pretrained(model_name)
17
  model.eval()
18
  tokenizer = AutoTokenizer.from_pretrained(model_name) # tokenizer
 
12
  tool = language_tool_python.LanguageTool("en-US")
13
 
14
  # masked language model and tokenizer from huggingface
15
+ model_name = "xlm-roberta-base"
16
  model = AutoModelForMaskedLM.from_pretrained(model_name)
17
  model.eval()
18
  tokenizer = AutoTokenizer.from_pretrained(model_name) # tokenizer