SantanuBanerjee commited on
Commit
39dbf03
·
verified ·
1 Parent(s): b71a177

Update app.py

Browse files

Adding https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz in requirements

Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -78,7 +78,15 @@ from transformers import AutoTokenizer, AutoModel
78
  import torch
79
 
80
  # Load SpaCy model
81
- nlp = spacy.load('en_core_web_sm')
 
 
 
 
 
 
 
 
82
 
83
  # Load Hugging Face Transformers model
84
  tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
@@ -100,6 +108,9 @@ model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
100
  import re
101
  from nltk.corpus import stopwords
102
  from nltk.tokenize import word_tokenize
 
 
 
103
 
104
  def combined_text_processing(text):
105
  # Remove punctuation, numbers, URLs, and special characters
 
78
  import torch
79
 
80
  # Load SpaCy model
81
+ # Install the 'en_core_web_sm' model if it isn't already installed
82
+ try:
83
+ nlp = spacy.load('en_core_web_sm')
84
+ except OSError:
85
+ # Instead of this try~catch, we could also include this < https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz > in the requirements.txt to directly load it
86
+ from spacy.cli import download
87
+ download('en_core_web_sm')
88
+ nlp = spacy.load('en_core_web_sm')
89
+
90
 
91
  # Load Hugging Face Transformers model
92
  tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
 
108
  import re
109
  from nltk.corpus import stopwords
110
  from nltk.tokenize import word_tokenize
111
+ # Download necessary NLTK data
112
+ nltk.download('punkt')
113
+ nltk.download('stopwords')
114
 
115
  def combined_text_processing(text):
116
  # Remove punctuation, numbers, URLs, and special characters