Spaces:
Sleeping
Sleeping
Update app.py
Browse filesAdding https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz in requirements
app.py
CHANGED
@@ -78,7 +78,15 @@ from transformers import AutoTokenizer, AutoModel
|
|
78 |
import torch
|
79 |
|
80 |
# Load SpaCy model
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Load Hugging Face Transformers model
|
84 |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
@@ -100,6 +108,9 @@ model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
|
100 |
import re
|
101 |
from nltk.corpus import stopwords
|
102 |
from nltk.tokenize import word_tokenize
|
|
|
|
|
|
|
103 |
|
104 |
def combined_text_processing(text):
|
105 |
# Remove punctuation, numbers, URLs, and special characters
|
|
|
78 |
import torch
|
79 |
|
80 |
# Load SpaCy model
|
81 |
+
# Install the 'en_core_web_sm' model if it isn't already installed
|
82 |
+
try:
|
83 |
+
nlp = spacy.load('en_core_web_sm')
|
84 |
+
except OSError:
|
85 |
+
# Instead of this try~catch, we could also include this < https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz > in the requirements.txt to directly load it
|
86 |
+
from spacy.cli import download
|
87 |
+
download('en_core_web_sm')
|
88 |
+
nlp = spacy.load('en_core_web_sm')
|
89 |
+
|
90 |
|
91 |
# Load Hugging Face Transformers model
|
92 |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
|
|
108 |
import re
|
109 |
from nltk.corpus import stopwords
|
110 |
from nltk.tokenize import word_tokenize
|
111 |
+
# Download necessary NLTK data
|
112 |
+
nltk.download('punkt')
|
113 |
+
nltk.download('stopwords')
|
114 |
|
115 |
def combined_text_processing(text):
|
116 |
# Remove punctuation, numbers, URLs, and special characters
|