Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from transformers import Tool | |
class NamedEntityRecognitionTool(Tool): | |
name = "ner_tool" | |
description = "Identifies and labels various entities in a given text." | |
inputs = ["text"] | |
outputs = ["text"] | |
def __call__(self, text: str): | |
# Initialize the named entity recognition pipeline | |
ner_analyzer = pipeline("ner") | |
# Perform named entity recognition on the input text | |
entities = ner_analyzer(text) | |
# Categorize entities based on labels into different types | |
categorized_entities = { | |
"persons": [], | |
"organizations": [], | |
"locations": [], | |
"dates": [], | |
"times": [], | |
"money": [], | |
"percentages": [], | |
"numbers": [], | |
"ordinals": [], | |
"miscellaneous": [], | |
} | |
for entity in entities: | |
label = entity.get("entity", "UNKNOWN") | |
word = entity.get("word", "") | |
start = entity.get("start", -1) | |
end = entity.get("end", -1) | |
# Extract the complete entity text | |
entity_text = text[start:end].strip() | |
if label.startswith("I-PER"): | |
categorized_entities["persons"].append(entity_text) | |
elif label.startswith("I-ORG"): | |
categorized_entities["organizations"].append(entity_text) | |
elif label.startswith("I-LOC"): | |
categorized_entities["locations"].append(entity_text) | |
elif label.startswith("I-DATE"): | |
categorized_entities["dates"].append(entity_text) | |
elif label.startswith("I-TIME"): | |
categorized_entities["times"].append(entity_text) | |
elif label.startswith("I-MONEY"): | |
categorized_entities["money"].append(entity_text) | |
elif label.startswith("I-PERCENT"): | |
categorized_entities["percentages"].append(entity_text) | |
elif label.startswith("I-CARDINAL"): | |
categorized_entities["numbers"].append(entity_text) | |
elif label.startswith("I-ORDINAL"): | |
categorized_entities["ordinals"].append(entity_text) | |
else: | |
categorized_entities["miscellaneous"].append(entity_text) | |
# Print the identified entities | |
print(f"Categorized Entities: {categorized_entities}") | |
return {"entities": categorized_entities} # Return a dictionary with the specified output component | |