Spaces:
Runtime error
Runtime error
import torch.nn as nn | |
import torch.nn.functional as F | |
import torchvision.transforms as transforms | |
from torchvision import models | |
import torchvision | |
import torch | |
import copy | |
class ForkliftFrameClassifier_V0(nn.Module): | |
def __init__(self, n_classes = 2, dropout = 0.15): | |
super(ForkliftFrameClassifier_V0, self).__init__() | |
self.dropout = dropout | |
# N x 3 x 480 x 640 | |
self.Conv1 = nn.Conv2d(3, 32, kernel_size=(8,8), stride=(3,5), padding=(3,1)) | |
self.Bn1 = nn.BatchNorm2d(32) | |
# N x 32 x 160 x 127 | |
self.Conv2 = nn.Conv2d(32, 64, kernel_size=(8,8), stride=(5,5), padding=(0,0)) | |
self.Bn2 = nn.BatchNorm2d(64) | |
# N x 64 x 31 x 24 | |
self.Maxpool1 = nn.MaxPool2d(kernel_size=(5,5), stride=(3,3), padding=(0,2)) | |
# N x 64 x 9 x 8 | |
self.Conv3 = nn.Conv2d(64, 64, kernel_size=(5,5), stride=(3,3), padding=(1,2)) | |
self.Bn3 = nn.BatchNorm2d(64) | |
# N x 64 x 3 x 3 | |
self.Maxpool2 = nn.MaxPool2d(kernel_size=(3,3), stride=(1,1), padding=(0,0)) | |
# N x 64 x 1 x 1 | |
self.Linear1 = nn.Linear(64, 16) | |
self.FC_out = nn.Linear(16, 1) if n_classes==2 else nn.Linear(64, n_classes) | |
def forward(self, x): | |
#print(x.shape) | |
x = self.Conv1(x) | |
x = self.Bn1(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Conv2(x) | |
x = self.Bn2(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Maxpool1(x) | |
#print(x.shape) | |
x = self.Conv3(x) | |
x = self.Bn3(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Maxpool2(x) | |
#print(x.shape) | |
x = x.reshape(x.shape[0], -1) | |
#print(x.shape) | |
x = self.Linear1(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.FC_out(x) | |
return x | |
class ForkliftFrameClassifier_V1(nn.Module): | |
def __init__(self, n_classes = 2, dropout = 0.15): | |
super(ForkliftFrameClassifier_V1, self).__init__() | |
self.dropout = dropout | |
# N x 3 x 240 x 240 | |
self.Conv1 = nn.Conv2d(3, 32, kernel_size=(5,5), stride=(3,3), padding=(1,1)) | |
self.Bn1 = nn.BatchNorm2d(32) | |
# N x 32 x 80 x 80 | |
self.Conv2 = nn.Conv2d(32, 64, kernel_size=(5,5), stride=(3,3), padding=(1,1)) | |
self.Bn2 = nn.BatchNorm2d(64) | |
# N x 64 x 26 x 26 | |
self.Maxpool1 = nn.MaxPool2d(kernel_size=(5,5), stride=(3,3), padding=(1,1)) | |
# N x 64 x 8 x 8 | |
self.Conv3 = nn.Conv2d(64, 32, kernel_size=(3,3), stride=(2,2), padding=(1,1)) | |
self.Bn3 = nn.BatchNorm2d(32) | |
# N x 64 x 4 x 4 | |
self.Maxpool2 = nn.MaxPool2d(kernel_size=(4,4), stride=(1,1), padding=(0,0)) | |
# N x 64 x 1 x 1 | |
#self.Linear1 = nn.Linear(64, 16) | |
self.FC_out = nn.Linear(32, 1) if n_classes==2 else nn.Linear(64, n_classes) | |
def forward(self, x): | |
#print(x.shape) | |
#print(x.shape) | |
x = self.Conv1(x) | |
x = self.Bn1(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Conv2(x) | |
x = self.Bn2(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Maxpool1(x) | |
#print(x.shape) | |
x = self.Conv3(x) | |
x = self.Bn3(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Maxpool2(x) | |
#print(x.shape) | |
x = x.reshape(x.shape[0], -1) | |
#print(x.shape) | |
#x = self.Linear1(x) | |
#x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.FC_out(x) | |
return x | |
class ForkliftFrameClassifier_V2(nn.Module): | |
def __init__(self, n_classes = 2, dropout = 0.15): | |
super(ForkliftFrameClassifier_V2, self).__init__() | |
self.dropout = dropout | |
# N x 3 x 150 x 150 | |
self.Conv1 = nn.Conv2d(3, 32, kernel_size=(5,5), stride=(3,3), padding=(1,1)) | |
self.Bn1 = nn.BatchNorm2d(32) | |
# N x 32 x 50 x 50 | |
self.Conv2 = nn.Conv2d(32, 64, kernel_size=(5,5), stride=(3,3), padding=(1,1)) | |
self.Bn2 = nn.BatchNorm2d(64) | |
# N x 64 x 16 x 16 | |
self.Maxpool1 = nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=(1,1)) | |
# N x 64 x 8 x 8 | |
self.Conv3 = nn.Conv2d(64, 32, kernel_size=(3,3), stride=(2,2), padding=(1,1)) | |
self.Bn3 = nn.BatchNorm2d(32) | |
# N x 64 x 4 x 4 | |
self.Maxpool2 = nn.MaxPool2d(kernel_size=(4,4), stride=(1,1), padding=(0,0)) | |
# N x 64 x 1 x 1 | |
#self.Linear1 = nn.Linear(64, 16) | |
self.FC_out = nn.Linear(32, 1) if n_classes==2 else nn.Linear(64, n_classes) | |
def forward(self, x): | |
#print(x.shape) | |
#print(x.shape) | |
x = self.Conv1(x) | |
x = self.Bn1(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Conv2(x) | |
x = self.Bn2(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Maxpool1(x) | |
#print(x.shape) | |
x = self.Conv3(x) | |
x = self.Bn3(x) | |
x = F.relu(x) | |
x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.Maxpool2(x) | |
#print(x.shape) | |
x = x.reshape(x.shape[0], -1) | |
#print(x.shape) | |
#x = self.Linear1(x) | |
#x = F.dropout(x,self.dropout) | |
#print(x.shape) | |
x = self.FC_out(x) | |
return x | |
class ForkliftFrameClassifier_PT1(nn.Module): | |
def __init__(self, pretrained_model,n_out_last_layer, n_classes = 2): | |
super(ForkliftFrameClassifier_PT1, self).__init__() | |
self.pt_model = pretrained_model | |
self.pt_model.classifier = nn.Linear(25088,1) if n_classes==2 else nn.Linear(n_out_last_layer, n_classes) | |
for param in self.pt_model.classifier.parameters(): | |
param.requires_grad = False | |
def forward(self, x): | |
x = self.pt_model(x) | |
return x | |
class CNN_Feature_Extractor(nn.Module): | |
def __init__(self): | |
super(CNN_Feature_Extractor, self).__init__() | |
self.conv1 = nn.Conv2d(3, 10, kernel_size=(5,5), stride=(3,3)) | |
self.conv2 = nn.Conv2d(10, 20, kernel_size=(5,5), stride=(2,2)) | |
self.conv3 = nn.Conv2d(20, 30, kernel_size=(5,5), stride=(2,2)) | |
def forward(self, i): | |
x = i.view(-1, i.shape[2], i.shape[3], i.shape[4]) | |
x = F.relu(self.conv1(x)) | |
x = F.relu(self.conv2(x)) | |
x = F.relu(self.conv3(x)) | |
x = nn.AvgPool2d(3)(x) | |
x = x.view(i.shape[0], i.shape[1], -1) | |
return x | |
class LSTM(nn.Module): | |
def __init__(self, seq_len, hidden_size,out_size): | |
super(LSTM, self).__init__() | |
self.lstm = nn.LSTM(750, hidden_size) | |
self.fc = nn.Linear(seq_len*hidden_size, out_size) | |
def forward(self, x): | |
x, _ = self.lstm(x) | |
x = x.view(x.shape[0], -1) | |
x = self.fc(x) | |
return x | |
class Full_LSTM(nn.Module): | |
def __init__(self,seq_len = 15, hidden_size = 100, out_size = 512): | |
super(Full_LSTM, self).__init__() | |
self.net_cnn = CNN_Feature_Extractor() | |
self.net_lstm = LSTM(seq_len, hidden_size, out_size) | |
self.classifier = nn.Sequential(nn.Linear(out_size, 16), | |
nn.Dropout(0.3), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
def forward(self, x): | |
# x.size() == (B,L,C,H,W) | |
# B : Batch size | |
# L : Sequence Length = 15 | |
# C : Channels = 3 | |
# H : Heigth = 224 | |
# W : Width = 224 | |
x = self.net_cnn(x) | |
x = self.net_lstm(x) | |
x = self.classifier(x) | |
return x | |
class Full_CNN(nn.Module): | |
def __init__(self): | |
super(Full_CNN, self).__init__() | |
self.model = torchvision.models.resnet18(pretrained=True) | |
#for param in model.parameters(): | |
# param.requires_grad = False | |
self.model.fc = nn.Sequential(nn.Linear(512, 16), | |
nn.Dropout(0.3), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
def forward(self, x): | |
# x.size() == (B,L,C,H,W) | |
# B : Batch size | |
# L : Sequence Length = 15 | |
# C : Channels = 3 | |
# H : Heigth = 224 | |
# W : Width = 224 | |
x = self.model(x[:,0,:]) | |
return x | |
class Full_Model(nn.Module): | |
def __init__(self,seq_len = 15, hidden_size = 100, classifier_size = 512, dropout = 0.4, cnn_model_path = None, lstm_model_path= None): | |
super(Full_Model, self).__init__() | |
self.CNN_Part = Full_CNN() | |
if cnn_model_path is not None: | |
self.CNN_Part.model.load_state_dict(torch.load(cnn_model_path)['model_state_dict']) | |
self.CNN_classifier = copy.deepcopy(self.CNN_Part.model.fc) | |
else: | |
self.CNN_classifier = nn.Sequential(nn.Linear(classifier_size, 16), | |
nn.Dropout(dropout), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
self.CNN_Part.model.fc = nn.Sequential(nn.Linear(classifier_size, classifier_size), | |
nn.Dropout(dropout), | |
nn.ReLU()) | |
self.LSTM_Part = Full_LSTM(seq_len, hidden_size, classifier_size) | |
if lstm_model_path is not None: | |
self.LSTM_Part.load_state_dict(torch.load(lstm_model_path)['model_state_dict']) | |
self.LSTM_classifier = copy.deepcopy(self.LSTM_Part.classifier) | |
else: | |
self.LSTM_classifier = nn.Sequential(nn.Linear(classifier_size, 16), | |
nn.Dropout(dropout), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
self.Finalclassifier = nn.Sequential(nn.Linear(classifier_size, 16), | |
nn.Dropout(dropout), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
def forward(self, x): | |
# x.size() == (B,L,C,H,W) | |
# B : Batch size | |
# L : Sequence Length = 15 | |
# C : Channels = 3 | |
# H : Heigth = 224 | |
# W : Width = 224 | |
cnn_out = self.CNN_Part(x) | |
# xcnn : (B,512) | |
lstm_out = self.LSTM_Part.net_cnn(x) | |
lstm_out = self.LSTM_Part.net_lstm(lstm_out) | |
# xlstm : (B,512) | |
out = cnn_out + lstm_out | |
cnn_out = self.CNN_classifier(cnn_out) | |
lstm_out = self.LSTM_classifier(lstm_out) | |
out = self.Finalclassifier(out) | |
return (cnn_out, lstm_out, out) | |
# Se establece que el modelo final consista en cargar un modelo Full, pero considerando solamente la salida y el | |
# forward de la componente LSTM. Esto se hace así (Cargar inclusive los pesos de LSTM) por si en el futuro se decidiera que estos pesos pueden ser útiles. | |
class Final_CNN_Model(nn.Module): | |
def __init__(self,seq_len = 15, hidden_size = 100, classifier_size = 512, dropout = 0.4, cnn_model_path = None, lstm_model_path= None): | |
super(Final_CNN_Model, self).__init__() | |
self.CNN_Part = Full_CNN() | |
if cnn_model_path is not None: | |
self.CNN_Part.model.load_state_dict(torch.load(cnn_model_path)['model_state_dict']) | |
self.CNN_classifier = copy.deepcopy(self.CNN_Part.model.fc) | |
else: | |
self.CNN_classifier = nn.Sequential(nn.Linear(classifier_size, 16), | |
nn.Dropout(dropout), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
self.CNN_Part.model.fc = nn.Sequential(nn.Linear(classifier_size, classifier_size), | |
nn.Dropout(dropout), | |
nn.ReLU()) | |
self.LSTM_Part = Full_LSTM(seq_len, hidden_size, classifier_size) | |
if lstm_model_path is not None: | |
self.LSTM_Part.load_state_dict(torch.load(lstm_model_path)['model_state_dict']) | |
self.LSTM_classifier = copy.deepcopy(self.LSTM_Part.classifier) | |
else: | |
self.LSTM_classifier = nn.Sequential(nn.Linear(classifier_size, 16), | |
nn.Dropout(dropout), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
self.Finalclassifier = nn.Sequential(nn.Linear(classifier_size, 16), | |
nn.Dropout(dropout), | |
nn.ReLU(), | |
nn.Linear(16,1)) | |
def forward(self, x): | |
# x.size() == (B,L,C,H,W) | |
# B : Batch size | |
# L : Sequence Length = 15 | |
# C : Channels = 3 | |
# H : Heigth = 224 | |
# W : Width = 224 | |
cnn_out = self.CNN_Part(x) | |
cnn_out = self.CNN_classifier(cnn_out) | |
return cnn_out | |
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# torch.cuda.empty_cache() | |
# x1 = torch.rand((64, 15, 3, 224 , 224)) | |
# model = Full_Model(cnn_model_path = 'Best_model_4.pt', lstm_model_path= 'Best_model_10.pt') | |
# model.to(device) | |
# x1 = x1.to(device) | |
# out1 = model(x1) | |