Spaces:

cdxxi
/

nlp_gpt_proj

Sleeping

App Files Files Community

nlp_gpt_proj / models /rest /model_lstm.py

cdxxi

initial commit

1867879 about 1 month ago

raw

history blame contribute delete

1.84 kB

	import streamlit as st
	import torch
	import torch.nn as nn
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	import re
	import string
	from collections import Counter
	import numpy as np
	from typing import List
	import time

	# Загрузка предобученной модели
	class BahdanauAttention(nn.Module):
	def __init__(self, hidden_size: int):
	super().__init__()
	self.Wa = nn.Linear(hidden_size, hidden_size)
	self.Wk = nn.Linear(hidden_size, hidden_size)
	self.Wv = nn.Linear(hidden_size, 1)

	def forward(self, query, keys):
	query = query.unsqueeze(1) # (batch_size, 1, hidden_size)
	scores = self.Wv(torch.tanh(self.Wa(query) + self.Wk(keys))).squeeze(2) # (batch_size, seq_len)
	attention_weights = torch.softmax(scores, dim=1) # (batch_size, seq_len)
	context = torch.bmm(attention_weights.unsqueeze(1), keys).squeeze(1) # (batch_size, hidden_size)
	return context, attention_weights

	class LSTM_Word2Vec_Attention(nn.Module):
	def __init__(self, hidden_size: int, vocab_size: int, embedding_dim: int):
	super().__init__()
	self.embedding = nn.Embedding(vocab_size, embedding_dim)
	self.lstm = nn.LSTM(embedding_dim, hidden_size, batch_first=True)
	self.attn = BahdanauAttention(hidden_size)
	self.clf = nn.Sequential(
	nn.Linear(hidden_size, 128),
	nn.Dropout(),
	nn.Tanh(),
	nn.Linear(128, 3)
	)
	self.sigmoid = nn.Sigmoid()

	def forward(self, x):
	embedded = self.embedding(x)
	output, (hidden, _) = self.lstm(embedded)
	context, attention_weights = self.attn(hidden[-1], output)
	output = self.clf(context.squeeze(1))
	output = self.sigmoid(output)
	return output, attention_weights