Spaces:

Kaelan
/

ner-analyzer

App Files Files Community

ner-analyzer / src /model_utils.py

Kaelan

initial commit

f5e3fa7 over 1 year ago

3.01 kB

	import json
	from pathlib import Path

	import spacy
	from spacy.training import Example

	def make_training_doc(nlp: spacy, data: list):
	"""
	To convert data into spacy doc type that can be use for training

	parameters:
	nlp: model
	data: training data

	returns:
	trainiing_data: list of spacy doc
	"""
	training_data = []
	for text, annotations in data:
	doc = nlp.make_doc(text)
	example = Example.from_dict(doc, annotations)
	training_data.append(example)

	return training_data


	def load_model(model: str=None):
	"""
	Load the model indicated by model

	parameters:
	model: str , name of the model to load

	returns:
	nlp: spacy model object
	optimizer : the optimizer to be use in training
	"""
	if model is not None:
	nlp = spacy.load(model) # load existing spaCy model
	print("Loaded model '%s'" % model)
	optimizer = nlp.resume_training()
	else:
	nlp = spacy.blank('en') # create blank Language class
	print("Created blank 'en' model")
	optimizer = nlp.begin_training()

	return nlp, optimizer


	def save_model(model: spacy, output_dir: str):
	"""
	Save the model to the output_dir

	parameters:
	model: spacy model
	output_dir: path
	"""
	if output_dir is not None:
	output_dir = Path(output_dir)
	if not output_dir.exists():
	output_dir.mkdir()
	model.to_disk(output_dir)
	print("Saved model to", output_dir)

	return None


	def load_data(args):
	"""
	Load training data, evaluation data as well as entities dictionary

	parameters:
	args: dict, configuration from the config file

	returns:
	train_dict, entities_dict, eval_dict

	"""

	assert args['train_dir'] != None, 'indicate path for training directory'

	# Load the training data
	with open(args['train_dir']) as f:
	train_dict = json.load(f)
	print('Loaded Training Data')

	try:
	entities_dict=train_dict[args['ent_key']]
	print('Loaded Entities from Training Data')
	except KeyError:
	entities_dict=None
	print('No classes for entities found in data loaded. Proceed to check in ent_dir')

	# Load entities
	if args['ent_dir'] is not None and entities_dict is None:
	with open(args['ent_dir']) as f:
	entities_dict = json.load(f)
	entities_dict = entities_dict[args['ent_key']]
	print('Loaded Entities from ent_dir')
	elif args['ent_dir'] is None and entities_dict is None:
	assert entities_dict != None, 'No entities found from training_dir & ent_dir'

	# Load eval data
	if args['eval_dir'] is not None:
	with open(args['eval_dir']) as f:
	eval_dict = json.load(f)
	print('Loaded Evaluating Data')
	else:
	return train_dict, entities_dict, None

	return train_dict, entities_dict, eval_dict