Spaces:
Sleeping
Sleeping
wzkariampuzha
commited on
Commit
•
5516b92
1
Parent(s):
30a4f12
specifying batch_size did not work - using training argument specification per HF docs and ChatGPT
Browse files- epi_pipeline.py +9 -3
epi_pipeline.py
CHANGED
@@ -426,7 +426,7 @@ from torch import nn
|
|
426 |
import numpy as np
|
427 |
from unidecode import unidecode
|
428 |
import re
|
429 |
-
from transformers import BertConfig, AutoModelForTokenClassification, BertTokenizer, Trainer
|
430 |
from unidecode import unidecode
|
431 |
from collections import OrderedDict
|
432 |
import json
|
@@ -655,7 +655,13 @@ class NER_Pipeline:
|
|
655 |
#model = AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder)
|
656 |
self.config = BertConfig.from_pretrained(name_or_path_to_model_folder)
|
657 |
self.labels = {re.sub(".-","",label) for label in self.config.label2id.keys() if label != "O"}
|
658 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
659 |
|
660 |
def __str__(self):
|
661 |
return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
|
@@ -669,7 +675,7 @@ class NER_Pipeline:
|
|
669 |
output_dict = {label:[] for label in self.labels}
|
670 |
|
671 |
dataset = NerDataset(text, self.bert_tokenizer, self.config)
|
672 |
-
predictions, label_ids, _ = self.trainer.predict(dataset
|
673 |
preds_list, _ = self.align_predictions(predictions, label_ids)
|
674 |
#dataset.ner_inputs.labels = preds_list
|
675 |
for ner_input, sent_pred_list in zip(dataset.ner_inputs, preds_list):
|
|
|
426 |
import numpy as np
|
427 |
from unidecode import unidecode
|
428 |
import re
|
429 |
+
from transformers import BertConfig, AutoModelForTokenClassification, BertTokenizer, Trainer, TrainingArguments
|
430 |
from unidecode import unidecode
|
431 |
from collections import OrderedDict
|
432 |
import json
|
|
|
655 |
#model = AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder)
|
656 |
self.config = BertConfig.from_pretrained(name_or_path_to_model_folder)
|
657 |
self.labels = {re.sub(".-","",label) for label in self.config.label2id.keys() if label != "O"}
|
658 |
+
# Create the TrainingArguments object and set the batch size
|
659 |
+
training_args = TrainingArguments(
|
660 |
+
per_device_eval_batch_size=16,
|
661 |
+
per_gpu_eval_batch_size=16,
|
662 |
+
# other training arguments...
|
663 |
+
)
|
664 |
+
self.trainer = Trainer(model=AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder),args=training_args)
|
665 |
|
666 |
def __str__(self):
|
667 |
return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
|
|
|
675 |
output_dict = {label:[] for label in self.labels}
|
676 |
|
677 |
dataset = NerDataset(text, self.bert_tokenizer, self.config)
|
678 |
+
predictions, label_ids, _ = self.trainer.predict(dataset)
|
679 |
preds_list, _ = self.align_predictions(predictions, label_ids)
|
680 |
#dataset.ner_inputs.labels = preds_list
|
681 |
for ner_input, sent_pred_list in zip(dataset.ner_inputs, preds_list):
|