Spaces:

Datatrooper
/

posters_classification

Runtime error

App Files Files Community

Dabs commited on Mar 24, 2022

Commit

cb8043e

•

1 Parent(s): ba20c12

first commit

Browse files

Files changed (14) hide show

.gitignore +2 -0
app.py +50 -0
imagenes/tt0084058.jpg +0 -0
imagenes/tt0084867.jpg +0 -0
imagenes/tt0085121.jpg +0 -0
model.pth +3 -0
models.py +16 -0
requirements.txt +8 -0
training/dataset.py +65 -0
training/engine.py +44 -0
training/inference.py +57 -0
training/models.py +16 -0
training/predict_single.py +42 -0
training/train.py +81 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv/
2	+ __pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+import models
+import torch
+import torchvision.transforms as transforms
+import cv2
+import numpy as np
+# initialize the computation device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+#intialize the model
+model = models.model(pretrained=False, requires_grad=False).to(device)
+# load the model checkpoint
+checkpoint = torch.load('model.pth')
+# load model weights state_dict
+model.load_state_dict(checkpoint['model_state_dict'])
+model.eval()
+transform = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.ToTensor(),
+            ])
+genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
+ 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
+ 'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
+ 'Sport', 'Thriller', 'War', 'Western']
+def segment(image):
+    image = np.asarray(image)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image = transform(image)
+    image = torch.tensor(image, dtype=torch.float32)
+    image = image.to(device)
+    image = torch.unsqueeze(image, dim=0)
+    # get the predictions by passing the image through the model
+    outputs = model(image)
+    outputs = torch.sigmoid(outputs)
+    outputs = outputs.detach().cpu()
+    out_dict = {k: v for k, v in zip(genres, outputs.tolist()[0])}
+    return out_dict
+iface = gr.Interface(fn=segment,
+                     inputs="image",
+                     outputs="label",
+                     title="Poster classification",
+                     description="classify the genre of your poster by uploading an image",
+                     examples=[["imagenes/tt0084058.jpg"], ["imagenes/tt0084867.jpg"], ["imagenes/tt0085121.jpg"]]).launch()

imagenes/tt0084058.jpg ADDED Viewed

imagenes/tt0084867.jpg ADDED Viewed

imagenes/tt0085121.jpg ADDED Viewed

model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f5ec1109b461ee19772daeec5c78c3af9cb28f42854b81338f2ab7ef8d0e52d
+size 94965817

models.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from torchvision import models as models
+import torch.nn as nn
+def model(pretrained, requires_grad):
+    model = models.resnet50(progress=True, pretrained=pretrained)
+    # to freeze the hidden layers
+    if requires_grad == False:
+        for param in model.parameters():
+            param.requires_grad = False
+    # to train the hidden layers
+    elif requires_grad == True:
+        for param in model.parameters():
+            param.requires_grad = True
+    # make the classification layer learnable
+    # we have 25 classes in total
+    model.fc = nn.Linear(2048, 25)
+    return model

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+pandas
+matplotlib
+tqdm
+opencv-python
+torchvision
+gradio
+jinja2

training/dataset.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import cv2
+import numpy as np
+import torchvision.transforms as transforms
+from torch.utils.data import Dataset
+class ImageDataset(Dataset):
+    def __init__(self, csv, train, test):
+        self.csv = csv
+        self.train = train
+        self.test = test
+        self.all_image_names = self.csv[:]['Id']
+        self.all_labels = np.array(self.csv.drop(['Id', 'Genre'], axis=1))
+        self.train_ratio = int(0.85 * len(self.csv))
+        self.valid_ratio = len(self.csv) - self.train_ratio
+        # set the training data images and labels
+        if self.train == True:
+            print(f"Number of training images: {self.train_ratio}")
+            self.image_names = list(self.all_image_names[:self.train_ratio])
+            self.labels = list(self.all_labels[:self.train_ratio])
+            # define the training transforms
+            self.transform = transforms.Compose([
+                transforms.ToPILImage(),
+                transforms.Resize((400, 400)),
+                transforms.RandomHorizontalFlip(p=0.5),
+                transforms.RandomRotation(degrees=45),
+                transforms.ToTensor(),
+            ])
+        # set the validation data images and labels
+        elif self.train == False and self.test == False:
+            print(f"Number of validation images: {self.valid_ratio}")
+            self.image_names = list(self.all_image_names[-self.valid_ratio:-10])
+            self.labels = list(self.all_labels[-self.valid_ratio:])
+            # define the validation transforms
+            self.transform = transforms.Compose([
+                transforms.ToPILImage(),
+                transforms.Resize((400, 400)),
+                transforms.ToTensor(),
+            ])
+        # set the test data images and labels, only last 10 images
+        # this, we will use in a separate inference script
+        elif self.test == True and self.train == False:
+            self.image_names = list(self.all_image_names[-10:])
+            self.labels = list(self.all_labels[-10:])
+             # define the test transforms
+            self.transform = transforms.Compose([
+                transforms.ToPILImage(),
+                transforms.ToTensor(),
+            ])
+    def __len__(self):
+        return len(self.image_names)
+    def __getitem__(self, index):
+        image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/{self.image_names[index]}.jpg")
+        # convert the image from BGR to RGB color format
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # apply image transforms
+        image = self.transform(image)
+        targets = self.labels[index]
+        return {
+            'image': torch.tensor(image, dtype=torch.float32),
+            'label': torch.tensor(targets, dtype=torch.float32)
+        }

training/engine.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+from tqdm import tqdm
+# training function
+def train(model, dataloader, optimizer, criterion, train_data, device):
+    print('Training')
+    model.train()
+    counter = 0
+    train_running_loss = 0.0
+    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
+        counter += 1
+        data, target = data['image'].to(device), data['label'].to(device)
+        optimizer.zero_grad()
+        outputs = model(data)
+        # apply sigmoid activation to get all the outputs between 0 and 1
+        outputs = torch.sigmoid(outputs)
+        loss = criterion(outputs, target)
+        train_running_loss += loss.item()
+        # backpropagation
+        loss.backward()
+        # update optimizer parameters
+        optimizer.step()
+    train_loss = train_running_loss / counter
+    return train_loss
+# validation function
+def validate(model, dataloader, criterion, val_data, device):
+    print('Validating')
+    model.eval()
+    counter = 0
+    val_running_loss = 0.0
+    with torch.no_grad():
+        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
+            counter += 1
+            data, target = data['image'].to(device), data['label'].to(device)
+            outputs = model(data)
+            # apply sigmoid activation to get all the outputs between 0 and 1
+            outputs = torch.sigmoid(outputs)
+            loss = criterion(outputs, target)
+            val_running_loss += loss.item()
+        val_loss = val_running_loss / counter
+        return val_loss

training/inference.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import models
+import torch
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from dataset import ImageDataset
+from torch.utils.data import DataLoader
+# initialize the computation device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+#intialize the model
+model = models.model(pretrained=False, requires_grad=False).to(device)
+# load the model checkpoint
+checkpoint = torch.load('../outputs/model.pth')
+# load model weights state_dict
+model.load_state_dict(checkpoint['model_state_dict'])
+model.eval()
+train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')
+genres = train_csv.columns.values[2:]
+print(genres)
+# prepare the test dataset and dataloader
+test_data = ImageDataset(
+    train_csv, train=False, test=True
+)
+test_loader = DataLoader(
+    test_data,
+    batch_size=1,
+    shuffle=False
+)
+for counter, data in enumerate(test_loader):
+    image, target = data['image'].to(device), data['label']
+    # get all the index positions where value == 1
+    target_indices = [i for i in range(len(target[0])) if target[0][i] == 1]
+    # get the predictions by passing the image through the model
+    print(image.shape)
+    outputs = model(image)
+    outputs = torch.sigmoid(outputs)
+    outputs = outputs.detach().cpu()
+    sorted_indices = np.argsort(outputs[0])
+    best = sorted_indices[-3:]
+    string_predicted = ''
+    string_actual = ''
+    for i in range(len(best)):
+        string_predicted += f"{genres[best[i]]}    "
+    for i in range(len(target_indices)):
+        string_actual += f"{genres[target_indices[i]]}    "
+    image = image.squeeze(0)
+    image = image.detach().cpu().numpy()
+    image = np.transpose(image, (1, 2, 0))
+    plt.imshow(image)
+    plt.axis('off')
+    plt.title(f"PREDICTED: {string_predicted}\nACTUAL: {string_actual}")
+    plt.savefig(f"../outputs/inference_{counter}.jpg")
+    plt.show()

training/models.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from torchvision import models as models
+import torch.nn as nn
+def model(pretrained, requires_grad):
+    model = models.resnet50(progress=True, pretrained=pretrained)
+    # to freeze the hidden layers
+    if requires_grad == False:
+        for param in model.parameters():
+            param.requires_grad = False
+    # to train the hidden layers
+    elif requires_grad == True:
+        for param in model.parameters():
+            param.requires_grad = True
+    # make the classification layer learnable
+    # we have 25 classes in total
+    model.fc = nn.Linear(2048, 25)
+    return model

training/predict_single.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import models
+import torch
+import torchvision.transforms as transforms
+import cv2
+# initialize the computation device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+#intialize the model
+model = models.model(pretrained=False, requires_grad=False).to(device)
+# load the model checkpoint
+checkpoint = torch.load('../outputs/model.pth')
+# load model weights state_dict
+model.load_state_dict(checkpoint['model_state_dict'])
+model.eval()
+transform = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.ToTensor(),
+            ])
+genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
+ 'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
+ 'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
+ 'Sport', 'Thriller', 'War', 'Western']
+image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/tt0084058.jpg")
+image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+image = transform(image)
+image = torch.tensor(image, dtype=torch.float32)
+image = image.to(device)
+image = torch.unsqueeze(image, dim=0)
+# get the predictions by passing the image through the model
+outputs = model(image)
+outputs = torch.sigmoid(outputs)
+outputs = outputs.detach().cpu()
+out_dict = {k: v for k, v in zip(genres, outputs.tolist()[0])}
+print(out_dict)

training/train.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import models
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+from engine import train, validate
+from dataset import ImageDataset
+from torch.utils.data import DataLoader
+matplotlib.style.use('ggplot')
+# initialize the computation device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(device)
+#intialize the model
+model = models.model(pretrained=True, requires_grad=False).to(device)
+# learning parameters
+lr = 0.0001
+epochs = 10
+batch_size = 32
+optimizer = optim.Adam(model.parameters(), lr=lr)
+criterion = nn.BCELoss()
+# read the training csv file
+train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')
+# train dataset
+train_data = ImageDataset(
+    train_csv, train=True, test=False
+)
+# validation dataset
+valid_data = ImageDataset(
+    train_csv, train=False, test=False
+)
+# train data loader
+train_loader = DataLoader(
+    train_data,
+    batch_size=batch_size,
+    shuffle=True
+)
+# validation data loader
+valid_loader = DataLoader(
+    valid_data,
+    batch_size=batch_size,
+    shuffle=False
+)
+# start the training and validation
+train_loss = []
+valid_loss = []
+for epoch in range(epochs):
+    print(f"Epoch {epoch+1} of {epochs}")
+    train_epoch_loss = train(
+        model, train_loader, optimizer, criterion, train_data, device
+    )
+    valid_epoch_loss = validate(
+        model, valid_loader, criterion, valid_data, device
+    )
+    train_loss.append(train_epoch_loss)
+    valid_loss.append(valid_epoch_loss)
+    print(f"Train Loss: {train_epoch_loss:.4f}")
+    print(f'Val Loss: {valid_epoch_loss:.4f}')
+# save the trained model to disk
+torch.save({
+            'epoch': epochs,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'loss': criterion,
+            }, '../outputs/model.pth')
+# plot and save the train and validation line graphs
+plt.figure(figsize=(10, 7))
+plt.plot(train_loss, color='orange', label='train loss')
+plt.plot(valid_loss, color='red', label='validataion loss')
+plt.xlabel('Epochs')
+plt.ylabel('Loss')
+plt.legend()
+plt.savefig('../outputs/loss.png')
+plt.show()