Chris4K commited on
Commit
d25649c
·
1 Parent(s): 507724b

Update ner_tool.py

Browse files
Files changed (1) hide show
  1. ner_tool.py +44 -7
ner_tool.py CHANGED
@@ -5,7 +5,7 @@ from transformers import Tool
5
 
6
  class NamedEntityRecognitionTool(Tool):
7
  name = "ner_tool"
8
- description = "Identifies and labels entities such as persons, organizations, and locations in a given text."
9
  inputs = ["text"]
10
  outputs = ["text"]
11
 
@@ -16,13 +16,50 @@ class NamedEntityRecognitionTool(Tool):
16
  # Perform named entity recognition on the input text
17
  entities = ner_analyzer(text)
18
 
19
- # Extract relevant information for each identified entity
20
- entity_info = [{"entity": entity.get("entity", "UNKNOWN"), "word": entity.get("word", ""), "start": entity.get("start", -1), "end": entity.get("end", -1)} for entity in entities]
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Extract the actual text span for each identified location entity
23
- location_entities = [text[start:end] for entity in entity_info if entity["entity"] == "I-LOC" for start, end in [(entity["start"], entity["end"])]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Print the identified entities
26
- print(f"Identified Location Entities: {location_entities}")
27
 
28
- return {"entities": location_entities} # Return a dictionary with the specified output component
 
5
 
6
  class NamedEntityRecognitionTool(Tool):
7
  name = "ner_tool"
8
+ description = "Identifies and labels various entities in a given text."
9
  inputs = ["text"]
10
  outputs = ["text"]
11
 
 
16
  # Perform named entity recognition on the input text
17
  entities = ner_analyzer(text)
18
 
19
+ # Categorize entities based on labels into different types
20
+ categorized_entities = {
21
+ "persons": [],
22
+ "organizations": [],
23
+ "locations": [],
24
+ "dates": [],
25
+ "times": [],
26
+ "money": [],
27
+ "percentages": [],
28
+ "numbers": [],
29
+ "ordinals": [],
30
+ "miscellaneous": [],
31
+ }
32
 
33
+ for entity in entities:
34
+ label = entity.get("entity", "UNKNOWN")
35
+ word = entity.get("word", "")
36
+ start = entity.get("start", -1)
37
+ end = entity.get("end", -1)
38
+
39
+ entity_text = text[start:end].strip()
40
+
41
+ if label.startswith("I-PER"):
42
+ categorized_entities["persons"].append(entity_text)
43
+ elif label.startswith("I-ORG"):
44
+ categorized_entities["organizations"].append(entity_text)
45
+ elif label.startswith("I-LOC"):
46
+ categorized_entities["locations"].append(entity_text)
47
+ elif label.startswith("I-DATE"):
48
+ categorized_entities["dates"].append(entity_text)
49
+ elif label.startswith("I-TIME"):
50
+ categorized_entities["times"].append(entity_text)
51
+ elif label.startswith("I-MONEY"):
52
+ categorized_entities["money"].append(entity_text)
53
+ elif label.startswith("I-PERCENT"):
54
+ categorized_entities["percentages"].append(entity_text)
55
+ elif label.startswith("I-CARDINAL"):
56
+ categorized_entities["numbers"].append(entity_text)
57
+ elif label.startswith("I-ORDINAL"):
58
+ categorized_entities["ordinals"].append(entity_text)
59
+ else:
60
+ categorized_entities["miscellaneous"].append(entity_text)
61
 
62
  # Print the identified entities
63
+ print(f"Categorized Entities: {categorized_entities}")
64
 
65
+ return {"entities": categorized_entities} # Return a dictionary with the specified output component