Omkar008 commited on
Commit
b748bd2
·
verified ·
1 Parent(s): 55d5cbb

Update core/init_nlp.py

Browse files
Files changed (1) hide show
  1. core/init_nlp.py +18 -15
core/init_nlp.py CHANGED
@@ -3,29 +3,32 @@ import nltk.downloader
3
  import spacy
4
  from core.config import settings
5
  from pathlib import Path
 
6
 
7
  def initialize_nlp():
8
  print("Initializing NLP resources...")
9
 
10
- nltk_data_path = Path("/tmp/nltk_data")
11
- nltk_data_path.mkdir(parents=True, exist_ok=True)
12
- nltk.data.path.append(str(nltk_data_path))
13
 
14
- # Download NLTK resources
15
- nltk_resources = [
16
- 'maxent_ne_chunker',
17
- 'words',
18
- 'treebank',
19
- 'maxent_treebank_pos_tagger',
20
- 'punkt',
21
- 'averaged_perceptron_tagger'
22
- ]
23
 
24
- for resource in nltk_resources:
25
- nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True)
26
 
27
  # Load spaCy model
28
- spacy.load(settings.SPACY_MODEL)
 
 
29
 
30
  print("NLP resources initialized successfully.")
31
 
 
3
  import spacy
4
  from core.config import settings
5
  from pathlib import Path
6
+ import en_core_web_sm
7
 
8
  def initialize_nlp():
9
  print("Initializing NLP resources...")
10
 
11
+ # nltk_data_path = Path("/tmp/nltk_data")
12
+ # nltk_data_path.mkdir(parents=True, exist_ok=True)
13
+ # nltk.data.path.append(str(nltk_data_path))
14
 
15
+ # # Download NLTK resources
16
+ # nltk_resources = [
17
+ # 'maxent_ne_chunker',
18
+ # 'words',
19
+ # 'treebank',
20
+ # 'maxent_treebank_pos_tagger',
21
+ # 'punkt',
22
+ # 'averaged_perceptron_tagger'
23
+ # ]
24
 
25
+ # for resource in nltk_resources:
26
+ # nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True)
27
 
28
  # Load spaCy model
29
+ # spacy.load(settings.SPACY_MODEL)
30
+ spacy.load("en_core_web_sm")
31
+ en_core_web_sm.load()
32
 
33
  print("NLP resources initialized successfully.")
34