Spaces:
Runtime error
Runtime error
Shchushch
commited on
Commit
·
50872cb
1
Parent(s):
e2a5546
nltk
Browse files
find.py
CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
|
|
3 |
from transformers import AutoTokenizer, AutoModel,BertTokenizer,BertModel
|
4 |
import numpy as np
|
5 |
import pickle
|
6 |
-
|
7 |
from nltk.stem import WordNetLemmatizer
|
8 |
from nltk.tag import pos_tag
|
9 |
from nltk.corpus import stopwords
|
@@ -14,9 +14,13 @@ import faiss
|
|
14 |
from tqdm import tqdm
|
15 |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
16 |
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
|
|
|
|
|
|
|
17 |
eng_stop_words = stopwords.words('english')
|
18 |
with open('russian.txt', 'r') as f:
|
19 |
ru_stop_words = f.read()
|
|
|
20 |
ru_stop_words=ru_stop_words.split('\n')
|
21 |
allow="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя0123456789-' \n\t"
|
22 |
#Задаём стеммер
|
|
|
3 |
from transformers import AutoTokenizer, AutoModel,BertTokenizer,BertModel
|
4 |
import numpy as np
|
5 |
import pickle
|
6 |
+
import nltk
|
7 |
from nltk.stem import WordNetLemmatizer
|
8 |
from nltk.tag import pos_tag
|
9 |
from nltk.corpus import stopwords
|
|
|
14 |
from tqdm import tqdm
|
15 |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
16 |
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
|
17 |
+
|
18 |
+
nltk.download('stopwords')
|
19 |
+
|
20 |
eng_stop_words = stopwords.words('english')
|
21 |
with open('russian.txt', 'r') as f:
|
22 |
ru_stop_words = f.read()
|
23 |
+
|
24 |
ru_stop_words=ru_stop_words.split('\n')
|
25 |
allow="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя0123456789-' \n\t"
|
26 |
#Задаём стеммер
|