Spaces:

forklift-app
/

forklift-images

Runtime error

App Files Files Community

Stepp1 commited on Dec 19, 2022

Commit

756da91

1 Parent(s): c0f8ae8

[app] model and predict update

Browse files

Files changed (2) hide show

app.py +24 -17
models.py +458 -0

app.py CHANGED Viewed

@@ -1,42 +1,49 @@
 import gradio as gr
 import torch
-import torch.nn as nn
 from torchvision import transforms
-from torchvision.models import resnet18
 from transferwee import download
-model = resnet18(pretrained=True)
-model.fc = nn.Sequential(
-    nn.Linear(512, 16),
-    nn.ReLU(),
-    nn.Linear(16,1)
-)
 # download latest model
-# download("https://we.tl/t-bbgc3gXROZ", "best.pt") # 1
-# download("https://we.tl/t-25s74dahjU", "best.pt") # 4 --> 0.92
-# checkpoint = torch.load("best.pt", map_location=torch.device('cpu'))
-# model.load_state_dict(checkpoint['model_state_dict'])
 model.eval()
 labels_to_class = {
     0: "normal",
     1: "risk"
 }
 def predict(inp):
-    inp = transforms.ToTensor()(inp).unsqueeze(0) # [1, C, H, W]
     with torch.no_grad():
         prediction = torch.sigmoid(model(inp)[0])
-        if prediction > 0.7:
             confidences = {
-                "Normal": float(prediction[0])
             }
         else:
             confidences = {
-                "Riesgo": float(prediction[0])
             }
     print(confidences)
     return confidences

 import gradio as gr
 import torch
 from torchvision import transforms
 from transferwee import download
+from models import Final_CNN_Model
 # download latest model
+download("https://we.tl/t-uc4MWbAzIJ", "best.pt")
+model = Final_CNN_Model()
+checkpoint = torch.load("best.pt")
+model.load_state_dict(checkpoint['model_state_dict'])
 model.eval()
 labels_to_class = {
     0: "normal",
     1: "risk"
 }
 def predict(inp):
+    tranforms_pipe = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Resize((224,224))
+    ])
+    inp = tranforms_pipe(inp) # [C, H, W]
+    shape = inp.shape
+    # [1, C, H, W]
+    serie = torch.Tensor(1, shape[0], shape[1] , shape[2])
     with torch.no_grad():
+        inp = serie.unsqueeze(0) # [B, 1, C, H, W]
         prediction = torch.sigmoid(model(inp)[0])
+        print(prediction)
+        if prediction > 0.5:
             confidences = {
+                "Riesgo": float(prediction[0])
             }
         else:
             confidences = {
+                "Normal": float(prediction[0])
             }
     print(confidences)
     return confidences

models.py ADDED Viewed

	@@ -0,0 +1,458 @@

+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from torchvision import models
+import torchvision
+import torch
+import copy
+class ForkliftFrameClassifier_V0(nn.Module):
+    def __init__(self, n_classes = 2, dropout = 0.15):
+       super(ForkliftFrameClassifier_V0, self).__init__()
+       self.dropout = dropout
+       # N x 3 x 480 x 640
+       self.Conv1 = nn.Conv2d(3, 32, kernel_size=(8,8), stride=(3,5), padding=(3,1))
+       self.Bn1 = nn.BatchNorm2d(32)
+       # N x 32 x 160 x 127
+       self.Conv2 = nn.Conv2d(32, 64, kernel_size=(8,8), stride=(5,5), padding=(0,0))
+       self.Bn2 = nn.BatchNorm2d(64)
+       # N x 64 x 31 x 24
+       self.Maxpool1 = nn.MaxPool2d(kernel_size=(5,5), stride=(3,3), padding=(0,2))
+       # N x 64 x 9 x 8
+       self.Conv3 = nn.Conv2d(64, 64, kernel_size=(5,5), stride=(3,3), padding=(1,2))
+       self.Bn3 = nn.BatchNorm2d(64)
+       # N x 64 x 3 x 3
+       self.Maxpool2 = nn.MaxPool2d(kernel_size=(3,3), stride=(1,1), padding=(0,0))
+       # N x 64 x 1 x 1
+       self.Linear1 = nn.Linear(64, 16)
+       self.FC_out = nn.Linear(16, 1) if n_classes==2 else nn.Linear(64, n_classes)
+    def forward(self, x):
+        #print(x.shape)
+        x = self.Conv1(x)
+        x = self.Bn1(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Conv2(x)
+        x = self.Bn2(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Maxpool1(x)
+        #print(x.shape)
+        x = self.Conv3(x)
+        x = self.Bn3(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Maxpool2(x)
+        #print(x.shape)
+        x = x.reshape(x.shape[0], -1)
+        #print(x.shape)
+        x = self.Linear1(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.FC_out(x)
+        return x
+class ForkliftFrameClassifier_V1(nn.Module):
+    def __init__(self, n_classes = 2, dropout = 0.15):
+       super(ForkliftFrameClassifier_V1, self).__init__()
+       self.dropout = dropout
+       # N x 3 x 240 x 240
+       self.Conv1 = nn.Conv2d(3, 32, kernel_size=(5,5), stride=(3,3), padding=(1,1))
+       self.Bn1 = nn.BatchNorm2d(32)
+       # N x 32 x 80 x 80
+       self.Conv2 = nn.Conv2d(32, 64, kernel_size=(5,5), stride=(3,3), padding=(1,1))
+       self.Bn2 = nn.BatchNorm2d(64)
+       # N x 64 x 26 x 26
+       self.Maxpool1 = nn.MaxPool2d(kernel_size=(5,5), stride=(3,3), padding=(1,1))
+       # N x 64 x 8 x 8
+       self.Conv3 = nn.Conv2d(64, 32, kernel_size=(3,3), stride=(2,2), padding=(1,1))
+       self.Bn3 = nn.BatchNorm2d(32)
+       # N x 64 x 4 x 4
+       self.Maxpool2 = nn.MaxPool2d(kernel_size=(4,4), stride=(1,1), padding=(0,0))
+       # N x 64 x 1 x 1
+       #self.Linear1 = nn.Linear(64, 16)
+       self.FC_out = nn.Linear(32, 1) if n_classes==2 else nn.Linear(64, n_classes)
+    def forward(self, x):
+        #print(x.shape)
+        #print(x.shape)
+        x = self.Conv1(x)
+        x = self.Bn1(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Conv2(x)
+        x = self.Bn2(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Maxpool1(x)
+        #print(x.shape)
+        x = self.Conv3(x)
+        x = self.Bn3(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Maxpool2(x)
+        #print(x.shape)
+        x = x.reshape(x.shape[0], -1)
+        #print(x.shape)
+        #x = self.Linear1(x)
+        #x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.FC_out(x)
+        return x
+class ForkliftFrameClassifier_V2(nn.Module):
+    def __init__(self, n_classes = 2, dropout = 0.15):
+       super(ForkliftFrameClassifier_V2, self).__init__()
+       self.dropout = dropout
+       # N x 3 x 150 x 150
+       self.Conv1 = nn.Conv2d(3, 32, kernel_size=(5,5), stride=(3,3), padding=(1,1))
+       self.Bn1 = nn.BatchNorm2d(32)
+       # N x 32 x 50 x 50
+       self.Conv2 = nn.Conv2d(32, 64, kernel_size=(5,5), stride=(3,3), padding=(1,1))
+       self.Bn2 = nn.BatchNorm2d(64)
+       # N x 64 x 16 x 16
+       self.Maxpool1 = nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=(1,1))
+       # N x 64 x 8 x 8
+       self.Conv3 = nn.Conv2d(64, 32, kernel_size=(3,3), stride=(2,2), padding=(1,1))
+       self.Bn3 = nn.BatchNorm2d(32)
+       # N x 64 x 4 x 4
+       self.Maxpool2 = nn.MaxPool2d(kernel_size=(4,4), stride=(1,1), padding=(0,0))
+       # N x 64 x 1 x 1
+       #self.Linear1 = nn.Linear(64, 16)
+       self.FC_out = nn.Linear(32, 1) if n_classes==2 else nn.Linear(64, n_classes)
+    def forward(self, x):
+        #print(x.shape)
+        #print(x.shape)
+        x = self.Conv1(x)
+        x = self.Bn1(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Conv2(x)
+        x = self.Bn2(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Maxpool1(x)
+        #print(x.shape)
+        x = self.Conv3(x)
+        x = self.Bn3(x)
+        x = F.relu(x)
+        x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.Maxpool2(x)
+        #print(x.shape)
+        x = x.reshape(x.shape[0], -1)
+        #print(x.shape)
+        #x = self.Linear1(x)
+        #x = F.dropout(x,self.dropout)
+        #print(x.shape)
+        x = self.FC_out(x)
+        return x
+class ForkliftFrameClassifier_PT1(nn.Module):
+    def __init__(self, pretrained_model,n_out_last_layer, n_classes = 2):
+        super(ForkliftFrameClassifier_PT1, self).__init__()
+        self.pt_model = pretrained_model
+        self.pt_model.classifier = nn.Linear(25088,1) if n_classes==2 else nn.Linear(n_out_last_layer, n_classes)
+        for param in self.pt_model.classifier.parameters():
+                param.requires_grad = False
+    def forward(self, x):
+        x = self.pt_model(x)
+        return x
+class CNN_Feature_Extractor(nn.Module):
+    def __init__(self):
+        super(CNN_Feature_Extractor, self).__init__()
+        self.conv1 = nn.Conv2d(3, 10, kernel_size=(5,5), stride=(3,3))
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=(5,5), stride=(2,2))
+        self.conv3 = nn.Conv2d(20, 30, kernel_size=(5,5), stride=(2,2))
+    def forward(self, i):
+        x = i.view(-1, i.shape[2], i.shape[3], i.shape[4])
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = F.relu(self.conv3(x))
+        x = nn.AvgPool2d(3)(x)
+        x = x.view(i.shape[0], i.shape[1], -1)
+        return x
+class LSTM(nn.Module):
+    def __init__(self, seq_len, hidden_size,out_size):
+        super(LSTM, self).__init__()
+        self.lstm = nn.LSTM(750, hidden_size)
+        self.fc = nn.Linear(seq_len*hidden_size, out_size)
+    def forward(self, x):
+        x, _ = self.lstm(x)
+        x = x.view(x.shape[0], -1)
+        x = self.fc(x)
+        return x
+class Full_LSTM(nn.Module):
+    def __init__(self,seq_len = 15, hidden_size = 100, out_size = 512):
+        super(Full_LSTM, self).__init__()
+        self.net_cnn = CNN_Feature_Extractor()
+        self.net_lstm = LSTM(seq_len, hidden_size, out_size)
+        self.classifier = nn.Sequential(nn.Linear(out_size, 16),
+                                        nn.Dropout(0.3),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+    def forward(self, x):
+        # x.size() == (B,L,C,H,W)
+        # B : Batch size
+        # L : Sequence Length = 15
+        # C : Channels = 3
+        # H : Heigth = 224
+        # W : Width = 224
+        x = self.net_cnn(x)
+        x = self.net_lstm(x)
+        x = self.classifier(x)
+        return x
+class Full_CNN(nn.Module):
+    def __init__(self):
+        super(Full_CNN, self).__init__()
+        self.model = torchvision.models.resnet18(pretrained=True)
+        #for param in model.parameters():
+        #                param.requires_grad = False
+        self.model.fc = nn.Sequential(nn.Linear(512, 16),
+                                nn.Dropout(0.3),
+                                nn.ReLU(),
+                                nn.Linear(16,1))
+    def forward(self, x):
+        # x.size() == (B,L,C,H,W)
+        # B : Batch size
+        # L : Sequence Length = 15
+        # C : Channels = 3
+        # H : Heigth = 224
+        # W : Width = 224
+        x = self.model(x[:,0,:])
+        return x
+class Full_Model(nn.Module):
+    def __init__(self,seq_len = 15, hidden_size = 100, classifier_size = 512, dropout = 0.4, cnn_model_path = None, lstm_model_path= None):
+        super(Full_Model, self).__init__()
+        self.CNN_Part = Full_CNN()
+        if cnn_model_path is not None:
+            self.CNN_Part.model.load_state_dict(torch.load(cnn_model_path)['model_state_dict'])
+            self.CNN_classifier = copy.deepcopy(self.CNN_Part.model.fc)
+        else:
+            self.CNN_classifier = nn.Sequential(nn.Linear(classifier_size, 16),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+        self.CNN_Part.model.fc = nn.Sequential(nn.Linear(classifier_size, classifier_size),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU())
+        self.LSTM_Part = Full_LSTM(seq_len, hidden_size, classifier_size)
+        if lstm_model_path is not None:
+            self.LSTM_Part.load_state_dict(torch.load(lstm_model_path)['model_state_dict'])
+            self.LSTM_classifier = copy.deepcopy(self.LSTM_Part.classifier)
+        else:
+            self.LSTM_classifier = nn.Sequential(nn.Linear(classifier_size, 16),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+        self.Finalclassifier = nn.Sequential(nn.Linear(classifier_size, 16),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+    def forward(self, x):
+        # x.size() == (B,L,C,H,W)
+        # B : Batch size
+        # L : Sequence Length = 15
+        # C : Channels = 3
+        # H : Heigth = 224
+        # W : Width = 224
+        cnn_out = self.CNN_Part(x)
+        # xcnn : (B,512)
+        lstm_out = self.LSTM_Part.net_cnn(x)
+        lstm_out = self.LSTM_Part.net_lstm(lstm_out)
+        # xlstm : (B,512)
+        out = cnn_out + lstm_out
+        cnn_out = self.CNN_classifier(cnn_out)
+        lstm_out = self.LSTM_classifier(lstm_out)
+        out = self.Finalclassifier(out)
+        return (cnn_out, lstm_out, out)
+# Se establece que el modelo final consista en cargar un modelo Full, pero considerando solamente la salida y el
+# forward de la componente LSTM. Esto se hace así (Cargar inclusive los pesos de LSTM) por si en el futuro se decidiera que estos pesos pueden ser útiles.
+class Final_CNN_Model(nn.Module):
+    def __init__(self,seq_len = 15, hidden_size = 100, classifier_size = 512, dropout = 0.4, cnn_model_path = None, lstm_model_path= None):
+        super(Final_CNN_Model, self).__init__()
+        self.CNN_Part = Full_CNN()
+        if cnn_model_path is not None:
+            self.CNN_Part.model.load_state_dict(torch.load(cnn_model_path)['model_state_dict'])
+            self.CNN_classifier = copy.deepcopy(self.CNN_Part.model.fc)
+        else:
+            self.CNN_classifier = nn.Sequential(nn.Linear(classifier_size, 16),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+        self.CNN_Part.model.fc = nn.Sequential(nn.Linear(classifier_size, classifier_size),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU())
+        self.LSTM_Part = Full_LSTM(seq_len, hidden_size, classifier_size)
+        if lstm_model_path is not None:
+            self.LSTM_Part.load_state_dict(torch.load(lstm_model_path)['model_state_dict'])
+            self.LSTM_classifier = copy.deepcopy(self.LSTM_Part.classifier)
+        else:
+            self.LSTM_classifier = nn.Sequential(nn.Linear(classifier_size, 16),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+        self.Finalclassifier = nn.Sequential(nn.Linear(classifier_size, 16),
+                                        nn.Dropout(dropout),
+                                        nn.ReLU(),
+                                        nn.Linear(16,1))
+    def forward(self, x):
+        # x.size() == (B,L,C,H,W)
+        # B : Batch size
+        # L : Sequence Length = 15
+        # C : Channels = 3
+        # H : Heigth = 224
+        # W : Width = 224
+        cnn_out = self.CNN_Part(x)
+        cnn_out = self.CNN_classifier(cnn_out)
+        return cnn_out
+# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# torch.cuda.empty_cache()
+# x1 = torch.rand((64, 15, 3, 224 , 224))
+# model = Full_Model(cnn_model_path = 'Best_model_4.pt', lstm_model_path= 'Best_model_10.pt')
+# model.to(device)
+# x1 = x1.to(device)
+# out1 = model(x1)