Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
import torch.nn as nn | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
import re | |
import string | |
from collections import Counter | |
import numpy as np | |
from typing import List | |
import time | |
# Загрузка предобученной модели | |
class BahdanauAttention(nn.Module): | |
def __init__(self, hidden_size: int): | |
super().__init__() | |
self.Wa = nn.Linear(hidden_size, hidden_size) | |
self.Wk = nn.Linear(hidden_size, hidden_size) | |
self.Wv = nn.Linear(hidden_size, 1) | |
def forward(self, query, keys): | |
query = query.unsqueeze(1) # (batch_size, 1, hidden_size) | |
scores = self.Wv(torch.tanh(self.Wa(query) + self.Wk(keys))).squeeze(2) # (batch_size, seq_len) | |
attention_weights = torch.softmax(scores, dim=1) # (batch_size, seq_len) | |
context = torch.bmm(attention_weights.unsqueeze(1), keys).squeeze(1) # (batch_size, hidden_size) | |
return context, attention_weights | |
class LSTM_Word2Vec_Attention(nn.Module): | |
def __init__(self, hidden_size: int, vocab_size: int, embedding_dim: int): | |
super().__init__() | |
self.embedding = nn.Embedding(vocab_size, embedding_dim) | |
self.lstm = nn.LSTM(embedding_dim, hidden_size, batch_first=True) | |
self.attn = BahdanauAttention(hidden_size) | |
self.clf = nn.Sequential( | |
nn.Linear(hidden_size, 128), | |
nn.Dropout(), | |
nn.Tanh(), | |
nn.Linear(128, 3) | |
) | |
self.sigmoid = nn.Sigmoid() | |
def forward(self, x): | |
embedded = self.embedding(x) | |
output, (hidden, _) = self.lstm(embedded) | |
context, attention_weights = self.attn(hidden[-1], output) | |
output = self.clf(context.squeeze(1)) | |
output = self.sigmoid(output) | |
return output, attention_weights |