geo_location_api / core /init_nlp.py
Omkar008's picture
Update core/init_nlp.py
3b52a97 verified
import nltk
import nltk.downloader
import spacy
from core.config import settings
from pathlib import Path
import en_core_web_sm
def initialize_nlp():
print("Initializing NLP resources...")
# nltk_data_path = Path("/tmp/nltk_data")
# nltk_data_path.mkdir(parents=True, exist_ok=True)
# nltk.data.path.append(str(nltk_data_path))
# # Download NLTK resources
nltk_resources = [
'maxent_ne_chunker',
'words',
'treebank',
'maxent_treebank_pos_tagger',
'punkt',
'averaged_perceptron_tagger'
]
# for resource in nltk_resources:
# nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True)
# Load spaCy model
# spacy.load(settings.SPACY_MODEL)
spacy.load("en_core_web_sm")
en_core_web_sm.load()
for resource in nltk_resources:
nltk.download(nltk_resources)
print("NLP resources initialized successfully.")
# Global variables to store initialized resources
nlp = None
nltk_initialized = False
def get_nlp():
global nlp
if nlp is None:
nlp = spacy.load(settings.SPACY_MODEL)
return nlp
def get_nltk():
global nltk_initialized
if not nltk_initialized:
nltk.downloader.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk_initialized = True
return nltk