Spaces:
Sleeping
Sleeping
File size: 2,522 Bytes
c5922b9 d25649c c5922b9 507724b c5922b9 d25649c fb510e6 d25649c e607aa8 d25649c c5922b9 fb510e6 d25649c c5922b9 d25649c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from transformers import pipeline
from transformers import Tool
class NamedEntityRecognitionTool(Tool):
name = "ner_tool"
description = "Identifies and labels various entities in a given text."
inputs = ["text"]
outputs = ["text"]
def __call__(self, text: str):
# Initialize the named entity recognition pipeline
ner_analyzer = pipeline("ner")
# Perform named entity recognition on the input text
entities = ner_analyzer(text)
# Categorize entities based on labels into different types
categorized_entities = {
"persons": [],
"organizations": [],
"locations": [],
"dates": [],
"times": [],
"money": [],
"percentages": [],
"numbers": [],
"ordinals": [],
"miscellaneous": [],
}
for entity in entities:
label = entity.get("entity", "UNKNOWN")
word = entity.get("word", "")
start = entity.get("start", -1)
end = entity.get("end", -1)
# Extract the complete entity text
entity_text = text[start:end].strip()
if label.startswith("I-PER"):
categorized_entities["persons"].append(entity_text)
elif label.startswith("I-ORG"):
categorized_entities["organizations"].append(entity_text)
elif label.startswith("I-LOC"):
categorized_entities["locations"].append(entity_text)
elif label.startswith("I-DATE"):
categorized_entities["dates"].append(entity_text)
elif label.startswith("I-TIME"):
categorized_entities["times"].append(entity_text)
elif label.startswith("I-MONEY"):
categorized_entities["money"].append(entity_text)
elif label.startswith("I-PERCENT"):
categorized_entities["percentages"].append(entity_text)
elif label.startswith("I-CARDINAL"):
categorized_entities["numbers"].append(entity_text)
elif label.startswith("I-ORDINAL"):
categorized_entities["ordinals"].append(entity_text)
else:
categorized_entities["miscellaneous"].append(entity_text)
# Print the identified entities
print(f"Categorized Entities: {categorized_entities}")
return {"entities": categorized_entities} # Return a dictionary with the specified output component
|