import nltk import nltk.downloader import spacy from core.config import settings from pathlib import Path import en_core_web_sm def initialize_nlp(): print("Initializing NLP resources...") # nltk_data_path = Path("/tmp/nltk_data") # nltk_data_path.mkdir(parents=True, exist_ok=True) # nltk.data.path.append(str(nltk_data_path)) # # Download NLTK resources nltk_resources = [ 'maxent_ne_chunker', 'words', 'treebank', 'maxent_treebank_pos_tagger', 'punkt', 'averaged_perceptron_tagger' ] # for resource in nltk_resources: # nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True) # Load spaCy model # spacy.load(settings.SPACY_MODEL) spacy.load("en_core_web_sm") en_core_web_sm.load() for resource in nltk_resources: nltk.download(nltk_resources) print("NLP resources initialized successfully.") # Global variables to store initialized resources nlp = None nltk_initialized = False def get_nlp(): global nlp if nlp is None: nlp = spacy.load(settings.SPACY_MODEL) return nlp def get_nltk(): global nltk_initialized if not nltk_initialized: nltk.downloader.download('punkt', quiet=True) nltk.download('averaged_perceptron_tagger', quiet=True) nltk_initialized = True return nltk