|
from transformers import pipeline |
|
from transformers import Tool |
|
|
|
class NamedEntityRecognitionTool(Tool): |
|
name = "ner_tool" |
|
description = "Identifies and labels various entities in a given text." |
|
inputs = ["text"] |
|
outputs = ["text"] |
|
|
|
def __call__(self, text: str): |
|
|
|
ner_analyzer = pipeline("ner") |
|
|
|
|
|
entities = ner_analyzer(text) |
|
|
|
|
|
categorized_entities = { |
|
"persons": [], |
|
"organizations": [], |
|
"locations": [], |
|
"dates": [], |
|
"times": [], |
|
"money": [], |
|
"percentages": [], |
|
"numbers": [], |
|
"ordinals": [], |
|
"miscellaneous": [], |
|
} |
|
|
|
for entity in entities: |
|
label = entity.get("entity", "UNKNOWN") |
|
word = entity.get("word", "") |
|
start = entity.get("start", -1) |
|
end = entity.get("end", -1) |
|
|
|
|
|
entity_text = text[start:end].strip() |
|
|
|
if label.startswith("I-PER"): |
|
categorized_entities["persons"].append(entity_text) |
|
elif label.startswith("I-ORG"): |
|
categorized_entities["organizations"].append(entity_text) |
|
elif label.startswith("I-LOC"): |
|
categorized_entities["locations"].append(entity_text) |
|
elif label.startswith("I-DATE"): |
|
categorized_entities["dates"].append(entity_text) |
|
elif label.startswith("I-TIME"): |
|
categorized_entities["times"].append(entity_text) |
|
elif label.startswith("I-MONEY"): |
|
categorized_entities["money"].append(entity_text) |
|
elif label.startswith("I-PERCENT"): |
|
categorized_entities["percentages"].append(entity_text) |
|
elif label.startswith("I-CARDINAL"): |
|
categorized_entities["numbers"].append(entity_text) |
|
elif label.startswith("I-ORDINAL"): |
|
categorized_entities["ordinals"].append(entity_text) |
|
else: |
|
categorized_entities["miscellaneous"].append(entity_text) |
|
|
|
|
|
print(f"Categorized Entities: {categorized_entities}") |
|
|
|
return {"entities": categorized_entities} |
|
|