from transformers import pipeline from transformers import Tool class NamedEntityRecognitionTool(Tool): name = "ner_tool" description = "Identifies and labels various entities in a given text." inputs = ["text"] outputs = ["text"] def __call__(self, text: str): # Initialize the named entity recognition pipeline ner_analyzer = pipeline("ner") # Perform named entity recognition on the input text entities = ner_analyzer(text) # Categorize entities based on labels into different types categorized_entities = { "persons": [], "organizations": [], "locations": [], "dates": [], "times": [], "money": [], "percentages": [], "numbers": [], "ordinals": [], "miscellaneous": [], } for entity in entities: label = entity.get("entity", "UNKNOWN") word = entity.get("word", "") start = entity.get("start", -1) end = entity.get("end", -1) # Extract the complete entity text entity_text = text[start:end].strip() if label.startswith("I-PER"): categorized_entities["persons"].append(entity_text) elif label.startswith("I-ORG"): categorized_entities["organizations"].append(entity_text) elif label.startswith("I-LOC"): categorized_entities["locations"].append(entity_text) elif label.startswith("I-DATE"): categorized_entities["dates"].append(entity_text) elif label.startswith("I-TIME"): categorized_entities["times"].append(entity_text) elif label.startswith("I-MONEY"): categorized_entities["money"].append(entity_text) elif label.startswith("I-PERCENT"): categorized_entities["percentages"].append(entity_text) elif label.startswith("I-CARDINAL"): categorized_entities["numbers"].append(entity_text) elif label.startswith("I-ORDINAL"): categorized_entities["ordinals"].append(entity_text) else: categorized_entities["miscellaneous"].append(entity_text) # Print the identified entities print(f"Categorized Entities: {categorized_entities}") return {"entities": categorized_entities} # Return a dictionary with the specified output component