NamedEntityRecognitionTool

Running

App Files Files Community

Chris4K commited on Jan 9, 2024

Commit

cf00609

1 Parent(s): e607aa8

Update ner_tool.py

Browse files

Files changed (1) hide show

ner_tool.py +15 -35

ner_tool.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from transformers import pipeline
 from transformers import Tool
@@ -14,19 +16,8 @@ class NamedEntityRecognitionTool(Tool):
         # Perform named entity recognition on the input text
         entities = ner_analyzer(text)
-        # Categorize entities based on labels into different types
-        categorized_entities = {
-            "persons": [],
-            "organizations": [],
-            "locations": [],
-            "dates": [],
-            "times": [],
-            "money": [],
-            "percentages": [],
-            "numbers": [],
-            "ordinals": [],
-            "miscellaneous": [],
-        }
         for entity in entities:
             label = entity.get("entity", "UNKNOWN")
@@ -37,28 +28,17 @@ class NamedEntityRecognitionTool(Tool):
             # Extract the complete entity text
             entity_text = text[start:end].strip()
-            if label.startswith("I-PER"):
-                categorized_entities["persons"].append(entity_text)
-            elif label.startswith("I-ORG"):
-                categorized_entities["organizations"].append(entity_text)
-            elif label.startswith("I-LOC"):
-                categorized_entities["locations"].append(entity_text)
-            elif label.startswith("I-DATE"):
-                categorized_entities["dates"].append(entity_text)
-            elif label.startswith("I-TIME"):
-                categorized_entities["times"].append(entity_text)
-            elif label.startswith("I-MONEY"):
-                categorized_entities["money"].append(entity_text)
-            elif label.startswith("I-PERCENT"):
-                categorized_entities["percentages"].append(entity_text)
-            elif label.startswith("I-CARDINAL"):
-                categorized_entities["numbers"].append(entity_text)
-            elif label.startswith("I-ORDINAL"):
-                categorized_entities["ordinals"].append(entity_text)
             else:
-                categorized_entities["miscellaneous"].append(entity_text)
-        # Print the identified entities
-        print(f"Categorized Entities: {categorized_entities}")
-        return {"entities": categorized_entities}  # Return a dictionary with the specified output component

+# Updated NamedEntityRecognitionTool in ner_tool.py
 from transformers import pipeline
 from transformers import Tool
         # Perform named entity recognition on the input text
         entities = ner_analyzer(text)
+        # Prepare a list to store token-level entities
+        token_entities = []
         for entity in entities:
             label = entity.get("entity", "UNKNOWN")
             # Extract the complete entity text
             entity_text = text[start:end].strip()
+            # Check for multi-token entities
+            if "##" in word:
+                # For multi-token entities, add each sub-token with its label
+                sub_tokens = word.split("##")
+                for i, sub_token in enumerate(sub_tokens):
+                    token_entities.append({"token": sub_token, "label": label, "entity_text": entity_text})
             else:
+                # For single-token entities, add the token with its label
+                token_entities.append({"token": word, "label": label, "entity_text": entity_text})
+        # Print the identified token-level entities
+        print(f"Token-level Entities: {token_entities}")
+        return {"entities": token_entities}  # Return a dictionary with the specified output component