import streamlit as st import torch import torch.nn as nn from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import re import string from collections import Counter import numpy as np from typing import List import time # Загрузка предобученной модели class BahdanauAttention(nn.Module): def __init__(self, hidden_size: int): super().__init__() self.Wa = nn.Linear(hidden_size, hidden_size) self.Wk = nn.Linear(hidden_size, hidden_size) self.Wv = nn.Linear(hidden_size, 1) def forward(self, query, keys): query = query.unsqueeze(1) # (batch_size, 1, hidden_size) scores = self.Wv(torch.tanh(self.Wa(query) + self.Wk(keys))).squeeze(2) # (batch_size, seq_len) attention_weights = torch.softmax(scores, dim=1) # (batch_size, seq_len) context = torch.bmm(attention_weights.unsqueeze(1), keys).squeeze(1) # (batch_size, hidden_size) return context, attention_weights class LSTM_Word2Vec_Attention(nn.Module): def __init__(self, hidden_size: int, vocab_size: int, embedding_dim: int): super().__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_size, batch_first=True) self.attn = BahdanauAttention(hidden_size) self.clf = nn.Sequential( nn.Linear(hidden_size, 128), nn.Dropout(), nn.Tanh(), nn.Linear(128, 3) ) self.sigmoid = nn.Sigmoid() def forward(self, x): embedded = self.embedding(x) output, (hidden, _) = self.lstm(embedded) context, attention_weights = self.attn(hidden[-1], output) output = self.clf(context.squeeze(1)) output = self.sigmoid(output) return output, attention_weights