NamedEntityRecognitionTool

Running

App Files Files Community

Chris4K commited on Jan 9, 2024

Commit

d25649c

1 Parent(s): 507724b

Update ner_tool.py

Browse files

Files changed (1) hide show

ner_tool.py +44 -7

ner_tool.py CHANGED Viewed

@@ -5,7 +5,7 @@ from transformers import Tool
 class NamedEntityRecognitionTool(Tool):
     name = "ner_tool"
-    description = "Identifies and labels entities such as persons, organizations, and locations in a given text."
     inputs = ["text"]
     outputs = ["text"]
@@ -16,13 +16,50 @@ class NamedEntityRecognitionTool(Tool):
         # Perform named entity recognition on the input text
         entities = ner_analyzer(text)
-        # Extract relevant information for each identified entity
-        entity_info = [{"entity": entity.get("entity", "UNKNOWN"), "word": entity.get("word", ""), "start": entity.get("start", -1), "end": entity.get("end", -1)} for entity in entities]
-        # Extract the actual text span for each identified location entity
-        location_entities = [text[start:end] for entity in entity_info if entity["entity"] == "I-LOC" for start, end in [(entity["start"], entity["end"])]]
         # Print the identified entities
-        print(f"Identified Location Entities: {location_entities}")
-        return {"entities": location_entities}  # Return a dictionary with the specified output component

 class NamedEntityRecognitionTool(Tool):
     name = "ner_tool"
+    description = "Identifies and labels various entities in a given text."
     inputs = ["text"]
     outputs = ["text"]
         # Perform named entity recognition on the input text
         entities = ner_analyzer(text)
+        # Categorize entities based on labels into different types
+        categorized_entities = {
+            "persons": [],
+            "organizations": [],
+            "locations": [],
+            "dates": [],
+            "times": [],
+            "money": [],
+            "percentages": [],
+            "numbers": [],
+            "ordinals": [],
+            "miscellaneous": [],
+        }
+        for entity in entities:
+            label = entity.get("entity", "UNKNOWN")
+            word = entity.get("word", "")
+            start = entity.get("start", -1)
+            end = entity.get("end", -1)
+            entity_text = text[start:end].strip()
+            if label.startswith("I-PER"):
+                categorized_entities["persons"].append(entity_text)
+            elif label.startswith("I-ORG"):
+                categorized_entities["organizations"].append(entity_text)
+            elif label.startswith("I-LOC"):
+                categorized_entities["locations"].append(entity_text)
+            elif label.startswith("I-DATE"):
+                categorized_entities["dates"].append(entity_text)
+            elif label.startswith("I-TIME"):
+                categorized_entities["times"].append(entity_text)
+            elif label.startswith("I-MONEY"):
+                categorized_entities["money"].append(entity_text)
+            elif label.startswith("I-PERCENT"):
+                categorized_entities["percentages"].append(entity_text)
+            elif label.startswith("I-CARDINAL"):
+                categorized_entities["numbers"].append(entity_text)
+            elif label.startswith("I-ORDINAL"):
+                categorized_entities["ordinals"].append(entity_text)
+            else:
+                categorized_entities["miscellaneous"].append(entity_text)
         # Print the identified entities
+        print(f"Categorized Entities: {categorized_entities}")
+        return {"entities": categorized_entities}  # Return a dictionary with the specified output component