Spaces:
Sleeping
Sleeping
images
Browse files- .gitignore +3 -0
- images/2_rubert_metrics.png +0 -0
- images/log_reg_metrics.png +0 -0
- images/nlp.jpg +0 -0
- models/model1/lstm_model.py +71 -0
- models/model1/lstm_preprocessor.py +61 -0
- models/model1/lstm_vocab_to_int.pkl +3 -0
- models/model1/lstm_weights +0 -0
- models/model1/word2vec_model.bin +3 -0
- pages/1_policlinic.py +121 -0
- pages/2_comments.py +94 -0
- requirements.txt +86 -0
- space.yaml +2 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
# Ignore Python cache files
|
3 |
+
__pycache__/
|
images/2_rubert_metrics.png
ADDED
![]() |
images/log_reg_metrics.png
ADDED
![]() |
images/nlp.jpg
ADDED
![]() |
models/model1/lstm_model.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
import numpy as np
|
4 |
+
import torch.nn.functional as F
|
5 |
+
import joblib
|
6 |
+
from gensim.models import Word2Vec
|
7 |
+
|
8 |
+
vocab_to_int = joblib.load('models/model1/lstm_vocab_to_int.pkl')
|
9 |
+
|
10 |
+
wv = Word2Vec.load("models/model1/word2vec_model.bin")
|
11 |
+
|
12 |
+
# Определение embedding_layer
|
13 |
+
embedding_matrix = np.zeros((3379, 32))
|
14 |
+
for word, i in vocab_to_int.items():
|
15 |
+
try:
|
16 |
+
embedding_vector = wv.wv[word]
|
17 |
+
embedding_matrix[i] = embedding_vector
|
18 |
+
except KeyError as e:
|
19 |
+
pass
|
20 |
+
print(f'{e}: word: {word}')
|
21 |
+
|
22 |
+
embedding_layer = nn.Embedding.from_pretrained(torch.FloatTensor(embedding_matrix))
|
23 |
+
|
24 |
+
class BahdanauAttention(nn.Module):
|
25 |
+
def __init__(self, hidden_size=32):
|
26 |
+
super().__init__()
|
27 |
+
self.hidden_size = hidden_size
|
28 |
+
self.linear_1 = nn.Linear(self.hidden_size, self.hidden_size)
|
29 |
+
self.linear_2 = nn.Linear(self.hidden_size, self.hidden_size)
|
30 |
+
self.alogn = nn.Linear(self.hidden_size, 1)
|
31 |
+
self.tanh = nn.Tanh()
|
32 |
+
|
33 |
+
def forward(self, lstm_outputs, final_hidden):
|
34 |
+
|
35 |
+
keys = self.linear_1(lstm_outputs) # keys.shape: [batch_size, seq_len, hidden_size]
|
36 |
+
query = self.linear_2(final_hidden) # query.shape: [batch_size, hidden_size]
|
37 |
+
|
38 |
+
query = query.unsqueeze(1).expand(-1, lstm_outputs.size(1), -1) # query.shape: [batch_size, seq_len, hidden_size]
|
39 |
+
|
40 |
+
keys_query = keys + query # keys_query.shape: [batch_size, seq_len, hidden_size]
|
41 |
+
att_weights = self.tanh(keys_query) # att_weights.shape: [batch_size, seq_len, hidden_size]
|
42 |
+
att_weights = self.alogn(att_weights) # att_weights.shape: [batch_size, seq_len, 1]
|
43 |
+
att_weights = F.softmax(att_weights.squeeze(2), dim=1) # att_weights.shape: [batch_size, seq_len]
|
44 |
+
# Compute the context vector
|
45 |
+
context = torch.bmm(lstm_outputs.transpose(1, 2), att_weights.unsqueeze(2)) # context.shape: [batch_size, hidden_size, 1]
|
46 |
+
context = context.squeeze(2) # context.shape: [batch_size, hidden_size]
|
47 |
+
|
48 |
+
return context, att_weights
|
49 |
+
|
50 |
+
|
51 |
+
# Определение класса модели
|
52 |
+
class LSTMConcatAttention(nn.Module):
|
53 |
+
def __init__(self):
|
54 |
+
super().__init__()
|
55 |
+
|
56 |
+
self.embedding = embedding_layer
|
57 |
+
self.lstm = nn.LSTM(32, 32, batch_first=True)
|
58 |
+
self.attn = BahdanauAttention(32)
|
59 |
+
self.clf = nn.Sequential(
|
60 |
+
nn.Linear(32, 128),
|
61 |
+
nn.Dropout(),
|
62 |
+
nn.Tanh(),
|
63 |
+
nn.Linear(128, 1)
|
64 |
+
)
|
65 |
+
|
66 |
+
def forward(self, x):
|
67 |
+
embeddings = self.embedding(x)
|
68 |
+
outputs, (h_n, _) = self.lstm(embeddings)
|
69 |
+
context, att_weights = self.attn(outputs, h_n.squeeze(0))
|
70 |
+
out = self.clf(context)
|
71 |
+
return out, att_weights
|
models/model1/lstm_preprocessor.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import string
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from sklearn.base import BaseEstimator, TransformerMixin
|
5 |
+
from nltk.corpus import stopwords
|
6 |
+
import joblib
|
7 |
+
import re
|
8 |
+
|
9 |
+
class TextPreprocessorWord2Vec(BaseEstimator, TransformerMixin):
|
10 |
+
def __init__(self):
|
11 |
+
self.stop_words = set(stopwords.words('russian'))
|
12 |
+
self.vocab_to_int = joblib.load('models/model1/lstm_vocab_to_int.pkl')
|
13 |
+
|
14 |
+
def preprocess_text(self, text):
|
15 |
+
# Преобразование к нижнему регистру
|
16 |
+
text = text.lower()
|
17 |
+
# Удаление HTML тегов
|
18 |
+
text = re.sub('<.*?>', '', text)
|
19 |
+
# Удаление пунктуации
|
20 |
+
text = ''.join([c for c in text if c not in string.punctuation])
|
21 |
+
# Удаление стоп-слов
|
22 |
+
text = ' '.join([word for word in text.split() if word not in self.stop_words])
|
23 |
+
# Удаление цифр
|
24 |
+
text = ' '.join([word for word in text.split() if not word.isdigit()])
|
25 |
+
return text
|
26 |
+
|
27 |
+
def padding(review_int: list, seq_len: int) -> np.array:
|
28 |
+
features = np.zeros((len(review_int), seq_len), dtype=int)
|
29 |
+
for i, review in enumerate(review_int):
|
30 |
+
if len(review) <= seq_len:
|
31 |
+
zeros = list(np.zeros(seq_len - len(review)))
|
32 |
+
new = zeros + review
|
33 |
+
else:
|
34 |
+
new = review[:seq_len]
|
35 |
+
features[i, :] = np.array(new)
|
36 |
+
return features
|
37 |
+
|
38 |
+
@staticmethod
|
39 |
+
def preprocess_single_string(
|
40 |
+
input_string: str,
|
41 |
+
seq_len: int,
|
42 |
+
vocab_to_int: dict,
|
43 |
+
verbose: bool = False
|
44 |
+
) -> torch.tensor:
|
45 |
+
preprocessed_string = TextPreprocessorWord2Vec().preprocess_text(input_string)
|
46 |
+
result_list = []
|
47 |
+
for word in preprocessed_string.split():
|
48 |
+
try:
|
49 |
+
result_list.append(vocab_to_int[word])
|
50 |
+
except KeyError as e:
|
51 |
+
if verbose:
|
52 |
+
print(f'{e}: not in dictionary!')
|
53 |
+
pass
|
54 |
+
result_padded = TextPreprocessorWord2Vec.padding([result_list], 64)[0]
|
55 |
+
return torch.tensor(result_padded)
|
56 |
+
|
57 |
+
def fit(self, X, y=None):
|
58 |
+
return self
|
59 |
+
|
60 |
+
def transform(self, X, y=None):
|
61 |
+
return self.preprocess_single_string(X, 64, self.vocab_to_int)
|
models/model1/lstm_vocab_to_int.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47b13b436bb7c4727517cbff2b50e2e8f60f8bd944b963ce9af7ba07d936804d
|
3 |
+
size 66010
|
models/model1/lstm_weights
ADDED
Binary file (498 kB). View file
|
|
models/model1/word2vec_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd6fbff4f54327bdf67ee352c0ed9cfbd63b2707348015f2d6d696b3231c7f10
|
3 |
+
size 993284
|
pages/1_policlinic.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import joblib
|
3 |
+
import pandas as pd
|
4 |
+
from models.model1.Custom_class import TextPreprocessor
|
5 |
+
from pathlib import Path
|
6 |
+
import sys
|
7 |
+
import torch
|
8 |
+
import numpy as np
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import time
|
11 |
+
|
12 |
+
project_root = Path(__file__).resolve().parents[1]
|
13 |
+
models_path = project_root / 'models'
|
14 |
+
sys.path.append(str(models_path))
|
15 |
+
from models.model1.lstm_preprocessor import TextPreprocessorWord2Vec
|
16 |
+
from models.model1.lstm_model import LSTMConcatAttention
|
17 |
+
|
18 |
+
# Load the trained pipeline
|
19 |
+
pipeline = joblib.load('models/model1/logistic_regression_pipeline.pkl')
|
20 |
+
|
21 |
+
# Streamlit application
|
22 |
+
st.title('Классификация отзывов на русском языке')
|
23 |
+
|
24 |
+
input_text = st.text_area('Введите текст отзыва')
|
25 |
+
|
26 |
+
device = 'cpu'
|
27 |
+
|
28 |
+
# Загрузка модели LSTM и словаря
|
29 |
+
@st.cache_resource
|
30 |
+
def load_lstm_model():
|
31 |
+
model = LSTMConcatAttention()
|
32 |
+
weights_path = models_path / 'model1' / 'lstm_weights'
|
33 |
+
state_dict = torch.load(weights_path, map_location=device)
|
34 |
+
model.load_state_dict(state_dict)
|
35 |
+
model.to(device)
|
36 |
+
model.eval()
|
37 |
+
return model
|
38 |
+
|
39 |
+
lstm_model = load_lstm_model()
|
40 |
+
|
41 |
+
@st.cache_resource
|
42 |
+
def load_int_to_vocab():
|
43 |
+
vocab_path = models_path / 'model1' / 'lstm_vocab_to_int.pkl'
|
44 |
+
vocab_to_int = joblib.load(vocab_path)
|
45 |
+
int_to_vocab = {j:i for i, j in vocab_to_int.items()}
|
46 |
+
return int_to_vocab
|
47 |
+
int_to_vocab = load_int_to_vocab()
|
48 |
+
|
49 |
+
def plot_and_predict_lstm(input_text):
|
50 |
+
preprocessor_lstm = TextPreprocessorWord2Vec()
|
51 |
+
preprocessed = preprocessor_lstm.transform(input_text)
|
52 |
+
lstm_model.eval()
|
53 |
+
with torch.inference_mode():
|
54 |
+
pred, att_scores = lstm_model(preprocessed.long().unsqueeze(0))
|
55 |
+
|
56 |
+
lstm_pred = pred.sigmoid().item()
|
57 |
+
|
58 |
+
# Получить индексы слов, которые не равны <pad> и не имеют индекс 0
|
59 |
+
valid_indices = [i for i, x in enumerate(preprocessed) if x.item() != 0 and int_to_vocab[x.item()] != "<pad>"]
|
60 |
+
|
61 |
+
# Получить соответствующие оценки внимания и метки слов
|
62 |
+
valid_att_scores = att_scores.detach().cpu().numpy()[0][valid_indices]
|
63 |
+
valid_labels = [int_to_vocab[preprocessed[i].item()] for i in valid_indices]
|
64 |
+
|
65 |
+
# Упорядочить метки и оценки внимания по убыванию веса смысла
|
66 |
+
sorted_indices = np.argsort(valid_att_scores)
|
67 |
+
sorted_labels = [valid_labels[i] for i in sorted_indices]
|
68 |
+
sorted_att_scores = valid_att_scores[sorted_indices]
|
69 |
+
|
70 |
+
# Построить график с учетом только валидных меток
|
71 |
+
plt.figure(figsize=(4, 8))
|
72 |
+
plt.barh(np.arange(len(sorted_indices)), sorted_att_scores)
|
73 |
+
plt.yticks(ticks=np.arange(len(sorted_indices)), labels=sorted_labels)
|
74 |
+
|
75 |
+
return lstm_pred, plt
|
76 |
+
|
77 |
+
if st.button('Предсказать'):
|
78 |
+
start_time_lr = time.time()
|
79 |
+
prediction = pipeline.predict(pd.Series([input_text]))
|
80 |
+
pred_probe = pipeline.predict_proba(pd.Series([input_text]))
|
81 |
+
pred_proba_rounded = np.round(pred_probe, 2).flatten()
|
82 |
+
if prediction[0] == 0:
|
83 |
+
predicted_class = "POSITIVE"
|
84 |
+
else:
|
85 |
+
predicted_class = "NEGATIVE"
|
86 |
+
st.subheader('Предсказанный класс с помощью логистической регрессии и tf-idf')
|
87 |
+
end_time_lr = time.time()
|
88 |
+
time_lr = end_time_lr - start_time_lr
|
89 |
+
st.write(f'**{predicted_class}** с вероятностью {pred_proba_rounded[0]}')
|
90 |
+
st.write(f'Время выполнения расчетов {time_lr:.4f} секунд')
|
91 |
+
|
92 |
+
start_time_lstm = time.time()
|
93 |
+
lstm_pred, lstm_plot = plot_and_predict_lstm(input_text)
|
94 |
+
if lstm_pred > 0.5:
|
95 |
+
predicted_lstm_class = "POSITIVE"
|
96 |
+
else:
|
97 |
+
predicted_lstm_class = "NEGATIVE"
|
98 |
+
st.subheader('Предсказанный класс с помощью LSTM + Word2Vec + BahdanauAttention:')
|
99 |
+
end_time_lstm = time.time()
|
100 |
+
time_lstm = end_time_lstm - start_time_lstm
|
101 |
+
st.write(f'**{predicted_lstm_class}** с вероятностью {round(lstm_pred, 3)}')
|
102 |
+
st.write(f'Время выполнения расчетов {time_lstm:.4f} секунд')
|
103 |
+
st.pyplot(lstm_plot)
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
st.write("# Информация об обучении модели логистической регрессии и tf-idf:")
|
108 |
+
st.image(str(project_root / 'images/pipeline_logreg.png'))
|
109 |
+
st.write("Модель обучалась на предсказание 1 класса")
|
110 |
+
st.write("Размер датасета - 70597 текстов отзывов")
|
111 |
+
st.write("Проведена предобработка текста")
|
112 |
+
|
113 |
+
st.write("Метрики:")
|
114 |
+
st.image(str(project_root / 'images/log_reg_metrics.png'))
|
115 |
+
|
116 |
+
st.write("# Информация об обучении модели LSTM + Word2Vec + BahdanauAttention:")
|
117 |
+
st.write("Время обучения модели - 10 эпох")
|
118 |
+
st.write("Метрики на 10 эпохе:")
|
119 |
+
st.write("Train f1: 0.95, Val f1: 0.93")
|
120 |
+
st.write("Train accuracy: 0.94, Val accuracy: 0.92")
|
121 |
+
|
pages/2_comments.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
import sys
|
4 |
+
from pathlib import Path
|
5 |
+
import time
|
6 |
+
import numpy as np
|
7 |
+
from transformers import AutoTokenizer
|
8 |
+
|
9 |
+
|
10 |
+
st.write("# Оценка степени токсичности пользовательского сообщения")
|
11 |
+
# st.write("Здесь вы можете загрузить картинку со своего устройства, либо при помощи ссылки")
|
12 |
+
|
13 |
+
# Добавление пути к проекту и моделям
|
14 |
+
project_root = Path(__file__).resolve().parents[1]
|
15 |
+
models_path = project_root / 'models'
|
16 |
+
sys.path.append(str(models_path))
|
17 |
+
from models.model2.preprocess_text import TextPreprocessorBERT
|
18 |
+
from models.model2.model import BERTClassifier
|
19 |
+
|
20 |
+
device = 'cpu'
|
21 |
+
|
22 |
+
# Загрузка модели и словаря
|
23 |
+
@st.cache_resource
|
24 |
+
def load_model():
|
25 |
+
model = BERTClassifier()
|
26 |
+
weights_path = models_path / 'model2' / 'model_weights_new.pth'
|
27 |
+
state_dict = torch.load(weights_path, map_location=device)
|
28 |
+
model.load_state_dict(state_dict)
|
29 |
+
model.to(device)
|
30 |
+
model.eval()
|
31 |
+
return model
|
32 |
+
|
33 |
+
@st.cache_resource
|
34 |
+
def load_tokenizer():
|
35 |
+
return AutoTokenizer.from_pretrained('cointegrated/rubert-tiny-toxicity')
|
36 |
+
|
37 |
+
model = load_model()
|
38 |
+
tokenizer = load_tokenizer()
|
39 |
+
|
40 |
+
input_text = st.text_area('Введите текст сообщения')
|
41 |
+
|
42 |
+
if st.button('Предсказать'):
|
43 |
+
start_time = time.time()
|
44 |
+
# Применяем предобработку
|
45 |
+
preprocessor = TextPreprocessorBERT()
|
46 |
+
preprocessed_text = preprocessor.transform(input_text)
|
47 |
+
|
48 |
+
# Токенизация
|
49 |
+
tokens = tokenizer.encode_plus(
|
50 |
+
preprocessed_text,
|
51 |
+
add_special_tokens=True,
|
52 |
+
truncation=True,
|
53 |
+
max_length=100,
|
54 |
+
padding='max_length',
|
55 |
+
return_tensors='pt'
|
56 |
+
)
|
57 |
+
|
58 |
+
# Получаем input_ids и attention_mask из токенов
|
59 |
+
input_ids = tokens['input_ids'].to(device)
|
60 |
+
attention_mask = tokens['attention_mask'].to(device)
|
61 |
+
|
62 |
+
# Предсказание
|
63 |
+
with torch.no_grad():
|
64 |
+
output = model(input_ids, attention_mask=attention_mask)
|
65 |
+
|
66 |
+
# Интерпретация результата
|
67 |
+
prediction = torch.sigmoid(output).item()
|
68 |
+
end_time = time.time() # Останавливаем таймер
|
69 |
+
execution_time = end_time - start_time
|
70 |
+
if prediction > 0.5:
|
71 |
+
class_pred = 'TOXIC'
|
72 |
+
else:
|
73 |
+
class_pred = 'healthy'
|
74 |
+
st.subheader(f'Предсказанный класс токсичности: **{class_pred}** с вероятностью {prediction:.4f}')
|
75 |
+
# st.write(f'Предсказанный класс токсичности: {prediction:.4f}')
|
76 |
+
st.write(f'Время выполнения: {execution_time:.4f} секунд')
|
77 |
+
|
78 |
+
|
79 |
+
|
80 |
+
# Информация о первой модели
|
81 |
+
st.write("# Информация об обучении модели rubert-tiny-toxicity:")
|
82 |
+
st.write("Модель обучалась на предсказание 1 класса")
|
83 |
+
st.write("Размер датасета - 14412 текстов сообщений")
|
84 |
+
st.write("Проведена предобработка текста")
|
85 |
+
|
86 |
+
st.image(str(project_root / 'images/2_rubert_metrics.png'), width=1000)
|
87 |
+
st.write("Время обучения модели - 50 эпох")
|
88 |
+
st.write("Метрики на 50 эпохе:")
|
89 |
+
st.write("Train f1: 0.73, Val f1: 0.77")
|
90 |
+
st.write("Train acc: 0.73, Val acc: 0.74")
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.3.0
|
2 |
+
attrs==23.2.0
|
3 |
+
blinker==1.8.2
|
4 |
+
cachetools==5.3.3
|
5 |
+
certifi==2024.2.2
|
6 |
+
charset-normalizer==3.3.2
|
7 |
+
click==8.1.7
|
8 |
+
contourpy==1.2.1
|
9 |
+
cycler==0.12.1
|
10 |
+
DAWG-Python==0.7.2
|
11 |
+
docopt==0.6.2
|
12 |
+
filelock==3.14.0
|
13 |
+
fonttools==4.52.4
|
14 |
+
fsspec==2024.5.0
|
15 |
+
gensim==4.3.1
|
16 |
+
gitdb==4.0.11
|
17 |
+
GitPython==3.1.43
|
18 |
+
huggingface-hub==0.23.2
|
19 |
+
idna==3.7
|
20 |
+
Jinja2==3.1.4
|
21 |
+
joblib==1.4.2
|
22 |
+
jsonschema==4.22.0
|
23 |
+
jsonschema-specifications==2023.12.1
|
24 |
+
kiwisolver==1.4.5
|
25 |
+
markdown-it-py==3.0.0
|
26 |
+
MarkupSafe==2.1.5
|
27 |
+
matplotlib==3.9.0
|
28 |
+
mdurl==0.1.2
|
29 |
+
mpmath==1.3.0
|
30 |
+
networkx==3.3
|
31 |
+
nltk==3.8.1
|
32 |
+
numpy==1.24.4
|
33 |
+
nvidia-cublas-cu12==12.1.3.1
|
34 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
35 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
36 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
37 |
+
nvidia-cudnn-cu12==8.9.2.26
|
38 |
+
nvidia-cufft-cu12==11.0.2.54
|
39 |
+
nvidia-curand-cu12==10.3.2.106
|
40 |
+
nvidia-cusolver-cu12==11.4.5.107
|
41 |
+
nvidia-cusparse-cu12==12.1.0.106
|
42 |
+
nvidia-nccl-cu12==2.20.5
|
43 |
+
nvidia-nvjitlink-cu12==12.5.40
|
44 |
+
nvidia-nvtx-cu12==12.1.105
|
45 |
+
packaging==24.0
|
46 |
+
pandas==2.2.2
|
47 |
+
pathlib==1.0.1
|
48 |
+
pillow==10.3.0
|
49 |
+
protobuf==4.25.3
|
50 |
+
pyarrow==16.1.0
|
51 |
+
pydeck==0.9.1
|
52 |
+
Pygments==2.18.0
|
53 |
+
pymorphy2==0.9.1
|
54 |
+
pymorphy2-dicts-ru==2.4.417127.4579844
|
55 |
+
pyparsing==3.1.2
|
56 |
+
python-dateutil==2.9.0.post0
|
57 |
+
pytz==2024.1
|
58 |
+
PyYAML==6.0.1
|
59 |
+
referencing==0.35.1
|
60 |
+
regex==2024.5.15
|
61 |
+
requests==2.32.3
|
62 |
+
rich==13.7.1
|
63 |
+
rpds-py==0.18.1
|
64 |
+
safetensors==0.4.3
|
65 |
+
scikit-learn==1.5.0
|
66 |
+
scipy==1.8.1
|
67 |
+
six==1.16.0
|
68 |
+
smart-open==7.0.4
|
69 |
+
smmap==5.0.1
|
70 |
+
streamlit==1.35.0
|
71 |
+
sympy==1.12.1
|
72 |
+
tenacity==8.3.0
|
73 |
+
threadpoolctl==3.5.0
|
74 |
+
tokenizers==0.19.1
|
75 |
+
toml==0.10.2
|
76 |
+
toolz==0.12.1
|
77 |
+
torch==2.3.0
|
78 |
+
tornado==6.4
|
79 |
+
tqdm==4.66.4
|
80 |
+
transformers==4.41.2
|
81 |
+
triton==2.3.0
|
82 |
+
typing_extensions==4.12.0
|
83 |
+
tzdata==2024.1
|
84 |
+
urllib3==2.2.1
|
85 |
+
watchdog==4.0.1
|
86 |
+
wrapt==1.16.0
|
space.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
title: Nlp Bert Team
|
2 |
+
app_file: Hello.py
|