Spaces:
Runtime error
Runtime error
import json | |
from pathlib import Path | |
import spacy | |
from spacy.training import Example | |
def make_training_doc(nlp: spacy, data: list): | |
""" | |
To convert data into spacy doc type that can be use for training | |
parameters: | |
nlp: model | |
data: training data | |
returns: | |
trainiing_data: list of spacy doc | |
""" | |
training_data = [] | |
for text, annotations in data: | |
doc = nlp.make_doc(text) | |
example = Example.from_dict(doc, annotations) | |
training_data.append(example) | |
return training_data | |
def load_model(model: str=None): | |
""" | |
Load the model indicated by model | |
parameters: | |
model: str , name of the model to load | |
returns: | |
nlp: spacy model object | |
optimizer : the optimizer to be use in training | |
""" | |
if model is not None: | |
nlp = spacy.load(model) # load existing spaCy model | |
print("Loaded model '%s'" % model) | |
optimizer = nlp.resume_training() | |
else: | |
nlp = spacy.blank('en') # create blank Language class | |
print("Created blank 'en' model") | |
optimizer = nlp.begin_training() | |
return nlp, optimizer | |
def save_model(model: spacy, output_dir: str): | |
""" | |
Save the model to the output_dir | |
parameters: | |
model: spacy model | |
output_dir: path | |
""" | |
if output_dir is not None: | |
output_dir = Path(output_dir) | |
if not output_dir.exists(): | |
output_dir.mkdir() | |
model.to_disk(output_dir) | |
print("Saved model to", output_dir) | |
return None | |
def load_data(args): | |
""" | |
Load training data, evaluation data as well as entities dictionary | |
parameters: | |
args: dict, configuration from the config file | |
returns: | |
train_dict, entities_dict, eval_dict | |
""" | |
assert args['train_dir'] != None, 'indicate path for training directory' | |
# Load the training data | |
with open(args['train_dir']) as f: | |
train_dict = json.load(f) | |
print('Loaded Training Data') | |
try: | |
entities_dict=train_dict[args['ent_key']] | |
print('Loaded Entities from Training Data') | |
except KeyError: | |
entities_dict=None | |
print('No classes for entities found in data loaded. Proceed to check in ent_dir') | |
# Load entities | |
if args['ent_dir'] is not None and entities_dict is None: | |
with open(args['ent_dir']) as f: | |
entities_dict = json.load(f) | |
entities_dict = entities_dict[args['ent_key']] | |
print('Loaded Entities from ent_dir') | |
elif args['ent_dir'] is None and entities_dict is None: | |
assert entities_dict != None, 'No entities found from training_dir & ent_dir' | |
# Load eval data | |
if args['eval_dir'] is not None: | |
with open(args['eval_dir']) as f: | |
eval_dict = json.load(f) | |
print('Loaded Evaluating Data') | |
else: | |
return train_dict, entities_dict, None | |
return train_dict, entities_dict, eval_dict |