# NER helpers and constants from typing import List # Standard NER entity types with descriptions NER_ENTITY_TYPES = { "PERSON": "People, including fictional", "ORG": "Companies, agencies, institutions, etc.", "GPE": "Countries, cities, states", "LOC": "Non-GPE locations, mountain ranges, bodies of water", "PRODUCT": "Objects, vehicles, foods, etc. (not services)", "EVENT": "Named hurricanes, battles, wars, sports events, etc.", "WORK_OF_ART": "Titles of books, songs, etc.", "LAW": "Named documents made into laws", "LANGUAGE": "Any named language", "DATE": "Absolute or relative dates or periods", "TIME": "Times smaller than a day", "PERCENT": "Percentage (including '%')", "MONEY": "Monetary values, including unit", "QUANTITY": "Measurements, as of weight or distance", "ORDINAL": "'first', 'second', etc.", "CARDINAL": "Numerals that do not fall under another type", "NORP": "Nationalities or religious or political groups", "FAC": "Buildings, airports, highways, bridges, etc.", "PRODUCT": "Objects, vehicles, foods, etc. (not services)", "EVENT": "Named hurricanes, battles, wars, sports events, etc.", "WORK_OF_ART": "Titles of books, songs, etc.", "LAW": "Named documents made into laws", "LANGUAGE": "Any named language" } # Default selected entity types (first 5 by default) DEFAULT_SELECTED_ENTITIES = list(NER_ENTITY_TYPES.keys())[:5] LLM_MODELS = ["gemini", "gpt", "claude"] def is_llm_model(model_id: str) -> bool: """Check if the model is an LLM-based model.""" return any(llm_model in model_id.lower() for llm_model in LLM_MODELS) # Render NER HTML for tagged view def render_ner_html(text, entities, selected_entity_types=None): import html as html_lib import re if not text.strip() or not entities: return "