Spaces:
Running
Running
import nltk | |
import nltk.downloader | |
import spacy | |
from core.config import settings | |
from pathlib import Path | |
import en_core_web_sm | |
def initialize_nlp(): | |
print("Initializing NLP resources...") | |
# nltk_data_path = Path("/tmp/nltk_data") | |
# nltk_data_path.mkdir(parents=True, exist_ok=True) | |
# nltk.data.path.append(str(nltk_data_path)) | |
# # Download NLTK resources | |
nltk_resources = [ | |
'maxent_ne_chunker', | |
'words', | |
'treebank', | |
'maxent_treebank_pos_tagger', | |
'punkt', | |
'averaged_perceptron_tagger' | |
] | |
# for resource in nltk_resources: | |
# nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True) | |
# Load spaCy model | |
# spacy.load(settings.SPACY_MODEL) | |
spacy.load("en_core_web_sm") | |
en_core_web_sm.load() | |
for resource in nltk_resources: | |
nltk.download(nltk_resources) | |
print("NLP resources initialized successfully.") | |
# Global variables to store initialized resources | |
nlp = None | |
nltk_initialized = False | |
def get_nlp(): | |
global nlp | |
if nlp is None: | |
nlp = spacy.load(settings.SPACY_MODEL) | |
return nlp | |
def get_nltk(): | |
global nltk_initialized | |
if not nltk_initialized: | |
nltk.downloader.download('punkt', quiet=True) | |
nltk.download('averaged_perceptron_tagger', quiet=True) | |
nltk_initialized = True | |
return nltk |