NamedEntityRecognitionTool

Running

File size: 1,699 Bytes

# Updated NamedEntityRecognitionTool in ner_tool.py

from transformers import pipeline
from transformers import Tool

class NamedEntityRecognitionTool(Tool):
    name = "ner_tool"
    description = "Identifies and labels various entities in a given text."
    inputs = ["text"]
    outputs = ["text"]

    def __call__(self, text: str):
        # Initialize the named entity recognition pipeline
        ner_analyzer = pipeline("ner")

        # Perform named entity recognition on the input text
        entities = ner_analyzer(text)

        # Prepare a list to store token-level entities
        token_entities = []

        for entity in entities:
            label = entity.get("entity", "UNKNOWN")
            word = entity.get("word", "")
            start = entity.get("start", -1)
            end = entity.get("end", -1)

            # Extract the complete entity text
            entity_text = text[start:end].strip()

            # Check for multi-token entities
            if "##" in word:
                # For multi-token entities, add each sub-token with its label
                sub_tokens = word.split("##")
                for i, sub_token in enumerate(sub_tokens):
                    token_entities.append({"token": sub_token, "label": label, "entity_text": entity_text})
            else:
                # For single-token entities, add the token with its label
                token_entities.append({"token": word, "label": label, "entity_text": entity_text})

        # Print the identified token-level entities
        print(f"Token-level Entities: {token_entities}")

        return {"entities": token_entities}  # Return a dictionary with the specified output component