Spaces:

Omkar008
/

geo_location_api

Running

File size: 1,396 Bytes

2542be6
 
 
 
27537e8
b748bd2
2542be6
 
 
b8d1cbb
b748bd2
 
 
2542be6
b748bd2
3b52a97
 
 
 
 
 
 
 
2542be6
b748bd2
 
2542be6
 
b748bd2
 
 
3b52a97
 
2542be6

import nltk
import nltk.downloader
import spacy
from core.config import settings
from pathlib import Path
import en_core_web_sm

def initialize_nlp():
    print("Initializing NLP resources...")

    # nltk_data_path = Path("/tmp/nltk_data")
    # nltk_data_path.mkdir(parents=True, exist_ok=True)
    # nltk.data.path.append(str(nltk_data_path))
    
    # # Download NLTK resources
    nltk_resources = [
        'maxent_ne_chunker',
        'words',
        'treebank',
        'maxent_treebank_pos_tagger',
        'punkt',
        'averaged_perceptron_tagger'
    ]
    
    # for resource in nltk_resources:
    #     nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True)
    
    # Load spaCy model
    # spacy.load(settings.SPACY_MODEL)
    spacy.load("en_core_web_sm")
    en_core_web_sm.load()
    for resource in nltk_resources:
        nltk.download(nltk_resources)
    print("NLP resources initialized successfully.")

# Global variables to store initialized resources
nlp = None
nltk_initialized = False

def get_nlp():
    global nlp
    if nlp is None:
        nlp = spacy.load(settings.SPACY_MODEL)
    return nlp

def get_nltk():
    global nltk_initialized
    if not nltk_initialized:
        nltk.downloader.download('punkt', quiet=True)
        nltk.download('averaged_perceptron_tagger', quiet=True)
        nltk_initialized = True
    return nltk