Spaces:
Runtime error
Runtime error
first commit
Browse files- .gitignore +2 -0
- app.py +50 -0
- imagenes/tt0084058.jpg +0 -0
- imagenes/tt0084867.jpg +0 -0
- imagenes/tt0085121.jpg +0 -0
- model.pth +3 -0
- models.py +16 -0
- requirements.txt +8 -0
- training/dataset.py +65 -0
- training/engine.py +44 -0
- training/inference.py +57 -0
- training/models.py +16 -0
- training/predict_single.py +42 -0
- training/train.py +81 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
__pycache__/
|
app.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import models
|
3 |
+
import torch
|
4 |
+
import torchvision.transforms as transforms
|
5 |
+
import cv2
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
# initialize the computation device
|
10 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
11 |
+
#intialize the model
|
12 |
+
model = models.model(pretrained=False, requires_grad=False).to(device)
|
13 |
+
# load the model checkpoint
|
14 |
+
checkpoint = torch.load('model.pth')
|
15 |
+
# load model weights state_dict
|
16 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
17 |
+
model.eval()
|
18 |
+
|
19 |
+
transform = transforms.Compose([
|
20 |
+
transforms.ToPILImage(),
|
21 |
+
transforms.ToTensor(),
|
22 |
+
])
|
23 |
+
|
24 |
+
genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
|
25 |
+
'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
|
26 |
+
'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
|
27 |
+
'Sport', 'Thriller', 'War', 'Western']
|
28 |
+
|
29 |
+
|
30 |
+
def segment(image):
|
31 |
+
image = np.asarray(image)
|
32 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
33 |
+
image = transform(image)
|
34 |
+
image = torch.tensor(image, dtype=torch.float32)
|
35 |
+
image = image.to(device)
|
36 |
+
image = torch.unsqueeze(image, dim=0)
|
37 |
+
# get the predictions by passing the image through the model
|
38 |
+
outputs = model(image)
|
39 |
+
outputs = torch.sigmoid(outputs)
|
40 |
+
outputs = outputs.detach().cpu()
|
41 |
+
|
42 |
+
out_dict = {k: v for k, v in zip(genres, outputs.tolist()[0])}
|
43 |
+
return out_dict
|
44 |
+
|
45 |
+
iface = gr.Interface(fn=segment,
|
46 |
+
inputs="image",
|
47 |
+
outputs="label",
|
48 |
+
title="Poster classification",
|
49 |
+
description="classify the genre of your poster by uploading an image",
|
50 |
+
examples=[["imagenes/tt0084058.jpg"], ["imagenes/tt0084867.jpg"], ["imagenes/tt0085121.jpg"]]).launch()
|
imagenes/tt0084058.jpg
ADDED
imagenes/tt0084867.jpg
ADDED
imagenes/tt0085121.jpg
ADDED
model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f5ec1109b461ee19772daeec5c78c3af9cb28f42854b81338f2ab7ef8d0e52d
|
3 |
+
size 94965817
|
models.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torchvision import models as models
|
2 |
+
import torch.nn as nn
|
3 |
+
def model(pretrained, requires_grad):
|
4 |
+
model = models.resnet50(progress=True, pretrained=pretrained)
|
5 |
+
# to freeze the hidden layers
|
6 |
+
if requires_grad == False:
|
7 |
+
for param in model.parameters():
|
8 |
+
param.requires_grad = False
|
9 |
+
# to train the hidden layers
|
10 |
+
elif requires_grad == True:
|
11 |
+
for param in model.parameters():
|
12 |
+
param.requires_grad = True
|
13 |
+
# make the classification layer learnable
|
14 |
+
# we have 25 classes in total
|
15 |
+
model.fc = nn.Linear(2048, 25)
|
16 |
+
return model
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
pandas
|
3 |
+
matplotlib
|
4 |
+
tqdm
|
5 |
+
opencv-python
|
6 |
+
torchvision
|
7 |
+
gradio
|
8 |
+
jinja2
|
training/dataset.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import torchvision.transforms as transforms
|
5 |
+
from torch.utils.data import Dataset
|
6 |
+
|
7 |
+
|
8 |
+
class ImageDataset(Dataset):
|
9 |
+
def __init__(self, csv, train, test):
|
10 |
+
self.csv = csv
|
11 |
+
self.train = train
|
12 |
+
self.test = test
|
13 |
+
self.all_image_names = self.csv[:]['Id']
|
14 |
+
self.all_labels = np.array(self.csv.drop(['Id', 'Genre'], axis=1))
|
15 |
+
self.train_ratio = int(0.85 * len(self.csv))
|
16 |
+
self.valid_ratio = len(self.csv) - self.train_ratio
|
17 |
+
# set the training data images and labels
|
18 |
+
if self.train == True:
|
19 |
+
print(f"Number of training images: {self.train_ratio}")
|
20 |
+
self.image_names = list(self.all_image_names[:self.train_ratio])
|
21 |
+
self.labels = list(self.all_labels[:self.train_ratio])
|
22 |
+
# define the training transforms
|
23 |
+
self.transform = transforms.Compose([
|
24 |
+
transforms.ToPILImage(),
|
25 |
+
transforms.Resize((400, 400)),
|
26 |
+
transforms.RandomHorizontalFlip(p=0.5),
|
27 |
+
transforms.RandomRotation(degrees=45),
|
28 |
+
transforms.ToTensor(),
|
29 |
+
])
|
30 |
+
# set the validation data images and labels
|
31 |
+
elif self.train == False and self.test == False:
|
32 |
+
print(f"Number of validation images: {self.valid_ratio}")
|
33 |
+
self.image_names = list(self.all_image_names[-self.valid_ratio:-10])
|
34 |
+
self.labels = list(self.all_labels[-self.valid_ratio:])
|
35 |
+
# define the validation transforms
|
36 |
+
self.transform = transforms.Compose([
|
37 |
+
transforms.ToPILImage(),
|
38 |
+
transforms.Resize((400, 400)),
|
39 |
+
transforms.ToTensor(),
|
40 |
+
])
|
41 |
+
# set the test data images and labels, only last 10 images
|
42 |
+
# this, we will use in a separate inference script
|
43 |
+
elif self.test == True and self.train == False:
|
44 |
+
self.image_names = list(self.all_image_names[-10:])
|
45 |
+
self.labels = list(self.all_labels[-10:])
|
46 |
+
# define the test transforms
|
47 |
+
self.transform = transforms.Compose([
|
48 |
+
transforms.ToPILImage(),
|
49 |
+
transforms.ToTensor(),
|
50 |
+
])
|
51 |
+
def __len__(self):
|
52 |
+
return len(self.image_names)
|
53 |
+
|
54 |
+
def __getitem__(self, index):
|
55 |
+
image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/{self.image_names[index]}.jpg")
|
56 |
+
# convert the image from BGR to RGB color format
|
57 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
58 |
+
# apply image transforms
|
59 |
+
image = self.transform(image)
|
60 |
+
targets = self.labels[index]
|
61 |
+
|
62 |
+
return {
|
63 |
+
'image': torch.tensor(image, dtype=torch.float32),
|
64 |
+
'label': torch.tensor(targets, dtype=torch.float32)
|
65 |
+
}
|
training/engine.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from tqdm import tqdm
|
3 |
+
|
4 |
+
# training function
|
5 |
+
def train(model, dataloader, optimizer, criterion, train_data, device):
|
6 |
+
print('Training')
|
7 |
+
model.train()
|
8 |
+
counter = 0
|
9 |
+
train_running_loss = 0.0
|
10 |
+
for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
|
11 |
+
counter += 1
|
12 |
+
data, target = data['image'].to(device), data['label'].to(device)
|
13 |
+
optimizer.zero_grad()
|
14 |
+
outputs = model(data)
|
15 |
+
# apply sigmoid activation to get all the outputs between 0 and 1
|
16 |
+
outputs = torch.sigmoid(outputs)
|
17 |
+
loss = criterion(outputs, target)
|
18 |
+
train_running_loss += loss.item()
|
19 |
+
# backpropagation
|
20 |
+
loss.backward()
|
21 |
+
# update optimizer parameters
|
22 |
+
optimizer.step()
|
23 |
+
|
24 |
+
train_loss = train_running_loss / counter
|
25 |
+
return train_loss
|
26 |
+
|
27 |
+
# validation function
|
28 |
+
def validate(model, dataloader, criterion, val_data, device):
|
29 |
+
print('Validating')
|
30 |
+
model.eval()
|
31 |
+
counter = 0
|
32 |
+
val_running_loss = 0.0
|
33 |
+
with torch.no_grad():
|
34 |
+
for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
|
35 |
+
counter += 1
|
36 |
+
data, target = data['image'].to(device), data['label'].to(device)
|
37 |
+
outputs = model(data)
|
38 |
+
# apply sigmoid activation to get all the outputs between 0 and 1
|
39 |
+
outputs = torch.sigmoid(outputs)
|
40 |
+
loss = criterion(outputs, target)
|
41 |
+
val_running_loss += loss.item()
|
42 |
+
|
43 |
+
val_loss = val_running_loss / counter
|
44 |
+
return val_loss
|
training/inference.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import models
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from dataset import ImageDataset
|
7 |
+
from torch.utils.data import DataLoader
|
8 |
+
|
9 |
+
|
10 |
+
# initialize the computation device
|
11 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
12 |
+
#intialize the model
|
13 |
+
model = models.model(pretrained=False, requires_grad=False).to(device)
|
14 |
+
# load the model checkpoint
|
15 |
+
checkpoint = torch.load('../outputs/model.pth')
|
16 |
+
# load model weights state_dict
|
17 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
18 |
+
model.eval()
|
19 |
+
|
20 |
+
train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')
|
21 |
+
genres = train_csv.columns.values[2:]
|
22 |
+
print(genres)
|
23 |
+
# prepare the test dataset and dataloader
|
24 |
+
test_data = ImageDataset(
|
25 |
+
train_csv, train=False, test=True
|
26 |
+
)
|
27 |
+
test_loader = DataLoader(
|
28 |
+
test_data,
|
29 |
+
batch_size=1,
|
30 |
+
shuffle=False
|
31 |
+
)
|
32 |
+
|
33 |
+
for counter, data in enumerate(test_loader):
|
34 |
+
image, target = data['image'].to(device), data['label']
|
35 |
+
# get all the index positions where value == 1
|
36 |
+
target_indices = [i for i in range(len(target[0])) if target[0][i] == 1]
|
37 |
+
# get the predictions by passing the image through the model
|
38 |
+
print(image.shape)
|
39 |
+
outputs = model(image)
|
40 |
+
outputs = torch.sigmoid(outputs)
|
41 |
+
outputs = outputs.detach().cpu()
|
42 |
+
sorted_indices = np.argsort(outputs[0])
|
43 |
+
best = sorted_indices[-3:]
|
44 |
+
string_predicted = ''
|
45 |
+
string_actual = ''
|
46 |
+
for i in range(len(best)):
|
47 |
+
string_predicted += f"{genres[best[i]]} "
|
48 |
+
for i in range(len(target_indices)):
|
49 |
+
string_actual += f"{genres[target_indices[i]]} "
|
50 |
+
image = image.squeeze(0)
|
51 |
+
image = image.detach().cpu().numpy()
|
52 |
+
image = np.transpose(image, (1, 2, 0))
|
53 |
+
plt.imshow(image)
|
54 |
+
plt.axis('off')
|
55 |
+
plt.title(f"PREDICTED: {string_predicted}\nACTUAL: {string_actual}")
|
56 |
+
plt.savefig(f"../outputs/inference_{counter}.jpg")
|
57 |
+
plt.show()
|
training/models.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torchvision import models as models
|
2 |
+
import torch.nn as nn
|
3 |
+
def model(pretrained, requires_grad):
|
4 |
+
model = models.resnet50(progress=True, pretrained=pretrained)
|
5 |
+
# to freeze the hidden layers
|
6 |
+
if requires_grad == False:
|
7 |
+
for param in model.parameters():
|
8 |
+
param.requires_grad = False
|
9 |
+
# to train the hidden layers
|
10 |
+
elif requires_grad == True:
|
11 |
+
for param in model.parameters():
|
12 |
+
param.requires_grad = True
|
13 |
+
# make the classification layer learnable
|
14 |
+
# we have 25 classes in total
|
15 |
+
model.fc = nn.Linear(2048, 25)
|
16 |
+
return model
|
training/predict_single.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import models
|
2 |
+
import torch
|
3 |
+
import torchvision.transforms as transforms
|
4 |
+
import cv2
|
5 |
+
|
6 |
+
|
7 |
+
# initialize the computation device
|
8 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
9 |
+
#intialize the model
|
10 |
+
model = models.model(pretrained=False, requires_grad=False).to(device)
|
11 |
+
# load the model checkpoint
|
12 |
+
checkpoint = torch.load('../outputs/model.pth')
|
13 |
+
# load model weights state_dict
|
14 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
15 |
+
model.eval()
|
16 |
+
|
17 |
+
transform = transforms.Compose([
|
18 |
+
transforms.ToPILImage(),
|
19 |
+
transforms.ToTensor(),
|
20 |
+
])
|
21 |
+
|
22 |
+
genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
|
23 |
+
'Documentary', 'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
|
24 |
+
'Musical', 'Mystery', 'N/A', 'News', 'Reality-TV', 'Romance', 'Sci-Fi', 'Short',
|
25 |
+
'Sport', 'Thriller', 'War', 'Western']
|
26 |
+
|
27 |
+
|
28 |
+
image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/tt0084058.jpg")
|
29 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
30 |
+
image = transform(image)
|
31 |
+
image = torch.tensor(image, dtype=torch.float32)
|
32 |
+
image = image.to(device)
|
33 |
+
image = torch.unsqueeze(image, dim=0)
|
34 |
+
# get the predictions by passing the image through the model
|
35 |
+
outputs = model(image)
|
36 |
+
outputs = torch.sigmoid(outputs)
|
37 |
+
outputs = outputs.detach().cpu()
|
38 |
+
|
39 |
+
|
40 |
+
out_dict = {k: v for k, v in zip(genres, outputs.tolist()[0])}
|
41 |
+
print(out_dict)
|
42 |
+
|
training/train.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import models
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.optim as optim
|
5 |
+
import pandas as pd
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import matplotlib
|
8 |
+
from engine import train, validate
|
9 |
+
from dataset import ImageDataset
|
10 |
+
from torch.utils.data import DataLoader
|
11 |
+
matplotlib.style.use('ggplot')
|
12 |
+
# initialize the computation device
|
13 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
14 |
+
print(device)
|
15 |
+
|
16 |
+
#intialize the model
|
17 |
+
model = models.model(pretrained=True, requires_grad=False).to(device)
|
18 |
+
# learning parameters
|
19 |
+
lr = 0.0001
|
20 |
+
epochs = 10
|
21 |
+
batch_size = 32
|
22 |
+
optimizer = optim.Adam(model.parameters(), lr=lr)
|
23 |
+
criterion = nn.BCELoss()
|
24 |
+
|
25 |
+
# read the training csv file
|
26 |
+
train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')
|
27 |
+
# train dataset
|
28 |
+
train_data = ImageDataset(
|
29 |
+
train_csv, train=True, test=False
|
30 |
+
)
|
31 |
+
# validation dataset
|
32 |
+
valid_data = ImageDataset(
|
33 |
+
train_csv, train=False, test=False
|
34 |
+
)
|
35 |
+
# train data loader
|
36 |
+
train_loader = DataLoader(
|
37 |
+
train_data,
|
38 |
+
batch_size=batch_size,
|
39 |
+
shuffle=True
|
40 |
+
)
|
41 |
+
# validation data loader
|
42 |
+
valid_loader = DataLoader(
|
43 |
+
valid_data,
|
44 |
+
batch_size=batch_size,
|
45 |
+
shuffle=False
|
46 |
+
)
|
47 |
+
|
48 |
+
# start the training and validation
|
49 |
+
train_loss = []
|
50 |
+
valid_loss = []
|
51 |
+
for epoch in range(epochs):
|
52 |
+
print(f"Epoch {epoch+1} of {epochs}")
|
53 |
+
train_epoch_loss = train(
|
54 |
+
model, train_loader, optimizer, criterion, train_data, device
|
55 |
+
)
|
56 |
+
valid_epoch_loss = validate(
|
57 |
+
model, valid_loader, criterion, valid_data, device
|
58 |
+
)
|
59 |
+
train_loss.append(train_epoch_loss)
|
60 |
+
valid_loss.append(valid_epoch_loss)
|
61 |
+
print(f"Train Loss: {train_epoch_loss:.4f}")
|
62 |
+
print(f'Val Loss: {valid_epoch_loss:.4f}')
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
# save the trained model to disk
|
67 |
+
torch.save({
|
68 |
+
'epoch': epochs,
|
69 |
+
'model_state_dict': model.state_dict(),
|
70 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
71 |
+
'loss': criterion,
|
72 |
+
}, '../outputs/model.pth')
|
73 |
+
# plot and save the train and validation line graphs
|
74 |
+
plt.figure(figsize=(10, 7))
|
75 |
+
plt.plot(train_loss, color='orange', label='train loss')
|
76 |
+
plt.plot(valid_loss, color='red', label='validataion loss')
|
77 |
+
plt.xlabel('Epochs')
|
78 |
+
plt.ylabel('Loss')
|
79 |
+
plt.legend()
|
80 |
+
plt.savefig('../outputs/loss.png')
|
81 |
+
plt.show()
|