NamedEntityRecognitionTool

Running

App Files Files Community

NamedEntityRecognitionTool / ner_tool.py

Chris4K

Update ner_tool.py

e607aa8 over 1 year ago

raw

history blame

2.52 kB

	from transformers import pipeline
	from transformers import Tool

	class NamedEntityRecognitionTool(Tool):
	name = "ner_tool"
	description = "Identifies and labels various entities in a given text."
	inputs = ["text"]
	outputs = ["text"]

	def __call__(self, text: str):
	# Initialize the named entity recognition pipeline
	ner_analyzer = pipeline("ner")

	# Perform named entity recognition on the input text
	entities = ner_analyzer(text)

	# Categorize entities based on labels into different types
	categorized_entities = {
	"persons": [],
	"organizations": [],
	"locations": [],
	"dates": [],
	"times": [],
	"money": [],
	"percentages": [],
	"numbers": [],
	"ordinals": [],
	"miscellaneous": [],
	}

	for entity in entities:
	label = entity.get("entity", "UNKNOWN")
	word = entity.get("word", "")
	start = entity.get("start", -1)
	end = entity.get("end", -1)

	# Extract the complete entity text
	entity_text = text[start:end].strip()

	if label.startswith("I-PER"):
	categorized_entities["persons"].append(entity_text)
	elif label.startswith("I-ORG"):
	categorized_entities["organizations"].append(entity_text)
	elif label.startswith("I-LOC"):
	categorized_entities["locations"].append(entity_text)
	elif label.startswith("I-DATE"):
	categorized_entities["dates"].append(entity_text)
	elif label.startswith("I-TIME"):
	categorized_entities["times"].append(entity_text)
	elif label.startswith("I-MONEY"):
	categorized_entities["money"].append(entity_text)
	elif label.startswith("I-PERCENT"):
	categorized_entities["percentages"].append(entity_text)
	elif label.startswith("I-CARDINAL"):
	categorized_entities["numbers"].append(entity_text)
	elif label.startswith("I-ORDINAL"):
	categorized_entities["ordinals"].append(entity_text)
	else:
	categorized_entities["miscellaneous"].append(entity_text)

	# Print the identified entities
	print(f"Categorized Entities: {categorized_entities}")

	return {"entities": categorized_entities} # Return a dictionary with the specified output component