Chris4K commited on
Commit
cf00609
·
1 Parent(s): e607aa8

Update ner_tool.py

Browse files
Files changed (1) hide show
  1. ner_tool.py +15 -35
ner_tool.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from transformers import pipeline
2
  from transformers import Tool
3
 
@@ -14,19 +16,8 @@ class NamedEntityRecognitionTool(Tool):
14
  # Perform named entity recognition on the input text
15
  entities = ner_analyzer(text)
16
 
17
- # Categorize entities based on labels into different types
18
- categorized_entities = {
19
- "persons": [],
20
- "organizations": [],
21
- "locations": [],
22
- "dates": [],
23
- "times": [],
24
- "money": [],
25
- "percentages": [],
26
- "numbers": [],
27
- "ordinals": [],
28
- "miscellaneous": [],
29
- }
30
 
31
  for entity in entities:
32
  label = entity.get("entity", "UNKNOWN")
@@ -37,28 +28,17 @@ class NamedEntityRecognitionTool(Tool):
37
  # Extract the complete entity text
38
  entity_text = text[start:end].strip()
39
 
40
- if label.startswith("I-PER"):
41
- categorized_entities["persons"].append(entity_text)
42
- elif label.startswith("I-ORG"):
43
- categorized_entities["organizations"].append(entity_text)
44
- elif label.startswith("I-LOC"):
45
- categorized_entities["locations"].append(entity_text)
46
- elif label.startswith("I-DATE"):
47
- categorized_entities["dates"].append(entity_text)
48
- elif label.startswith("I-TIME"):
49
- categorized_entities["times"].append(entity_text)
50
- elif label.startswith("I-MONEY"):
51
- categorized_entities["money"].append(entity_text)
52
- elif label.startswith("I-PERCENT"):
53
- categorized_entities["percentages"].append(entity_text)
54
- elif label.startswith("I-CARDINAL"):
55
- categorized_entities["numbers"].append(entity_text)
56
- elif label.startswith("I-ORDINAL"):
57
- categorized_entities["ordinals"].append(entity_text)
58
  else:
59
- categorized_entities["miscellaneous"].append(entity_text)
 
60
 
61
- # Print the identified entities
62
- print(f"Categorized Entities: {categorized_entities}")
63
 
64
- return {"entities": categorized_entities} # Return a dictionary with the specified output component
 
1
+ # Updated NamedEntityRecognitionTool in ner_tool.py
2
+
3
  from transformers import pipeline
4
  from transformers import Tool
5
 
 
16
  # Perform named entity recognition on the input text
17
  entities = ner_analyzer(text)
18
 
19
+ # Prepare a list to store token-level entities
20
+ token_entities = []
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  for entity in entities:
23
  label = entity.get("entity", "UNKNOWN")
 
28
  # Extract the complete entity text
29
  entity_text = text[start:end].strip()
30
 
31
+ # Check for multi-token entities
32
+ if "##" in word:
33
+ # For multi-token entities, add each sub-token with its label
34
+ sub_tokens = word.split("##")
35
+ for i, sub_token in enumerate(sub_tokens):
36
+ token_entities.append({"token": sub_token, "label": label, "entity_text": entity_text})
 
 
 
 
 
 
 
 
 
 
 
 
37
  else:
38
+ # For single-token entities, add the token with its label
39
+ token_entities.append({"token": word, "label": label, "entity_text": entity_text})
40
 
41
+ # Print the identified token-level entities
42
+ print(f"Token-level Entities: {token_entities}")
43
 
44
+ return {"entities": token_entities} # Return a dictionary with the specified output component