Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,17 +3,15 @@ from minivectordb.embedding_model import EmbeddingModel
|
|
3 |
from minivectordb.vector_database import VectorDatabase
|
4 |
from multiprocessing import cpu_count
|
5 |
from functools import lru_cache
|
6 |
-
import fasttext, random,
|
7 |
import concurrent.futures
|
8 |
-
nltk.download('stopwords')
|
9 |
-
from nltk.corpus import stopwords
|
10 |
|
11 |
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
|
12 |
|
13 |
langdetect_model = fasttext.load_model('lid.176.ftz')
|
14 |
embedding_model = EmbeddingModel(onnx_model_cpu_core_count=1)
|
15 |
-
en_stop_words =
|
16 |
-
pt_stop_words =
|
17 |
tokenizer = tiktoken.encoding_for_model("gpt-4")
|
18 |
|
19 |
def count_tokens_tiktoken(text):
|
|
|
3 |
from minivectordb.vector_database import VectorDatabase
|
4 |
from multiprocessing import cpu_count
|
5 |
from functools import lru_cache
|
6 |
+
import fasttext, random, tiktoken, os, pickle
|
7 |
import concurrent.futures
|
|
|
|
|
8 |
|
9 |
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
|
10 |
|
11 |
langdetect_model = fasttext.load_model('lid.176.ftz')
|
12 |
embedding_model = EmbeddingModel(onnx_model_cpu_core_count=1)
|
13 |
+
en_stop_words = pickle.load(open("en_stopwords.pkl", "rb"))
|
14 |
+
pt_stop_words = pickle.load(open("pt_stopwords.pkl", "rb"))
|
15 |
tokenizer = tiktoken.encoding_for_model("gpt-4")
|
16 |
|
17 |
def count_tokens_tiktoken(text):
|