Spaces:
Running
Running
File size: 1,396 Bytes
2542be6 27537e8 b748bd2 2542be6 b8d1cbb b748bd2 2542be6 b748bd2 3b52a97 2542be6 b748bd2 2542be6 b748bd2 3b52a97 2542be6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import nltk
import nltk.downloader
import spacy
from core.config import settings
from pathlib import Path
import en_core_web_sm
def initialize_nlp():
print("Initializing NLP resources...")
# nltk_data_path = Path("/tmp/nltk_data")
# nltk_data_path.mkdir(parents=True, exist_ok=True)
# nltk.data.path.append(str(nltk_data_path))
# # Download NLTK resources
nltk_resources = [
'maxent_ne_chunker',
'words',
'treebank',
'maxent_treebank_pos_tagger',
'punkt',
'averaged_perceptron_tagger'
]
# for resource in nltk_resources:
# nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True)
# Load spaCy model
# spacy.load(settings.SPACY_MODEL)
spacy.load("en_core_web_sm")
en_core_web_sm.load()
for resource in nltk_resources:
nltk.download(nltk_resources)
print("NLP resources initialized successfully.")
# Global variables to store initialized resources
nlp = None
nltk_initialized = False
def get_nlp():
global nlp
if nlp is None:
nlp = spacy.load(settings.SPACY_MODEL)
return nlp
def get_nltk():
global nltk_initialized
if not nltk_initialized:
nltk.downloader.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk_initialized = True
return nltk |