|
import ast |
|
|
|
import keras |
|
from keras.models import Sequential |
|
from keras.layers import Dense, Dropout, Flatten |
|
from keras.layers import Conv2D, MaxPooling2D |
|
from keras.preprocessing import image |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from sklearn.model_selection import train_test_split |
|
from tqdm import tqdm |
|
from keras.layers import BatchNormalization |
|
import json |
|
|
|
def label_map(category, n_classes=290): |
|
category = ast.literal_eval(category) |
|
labels = [0]*n_classes |
|
for category_id in category: |
|
labels[int(category_id)-1] = 1 |
|
return labels |
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
image_dir = "images/" |
|
|
|
train_df = pd.read_csv("multilabel_classification/train.csv") |
|
train_df['categories'] = train_df['categories'].apply(label_map) |
|
file_name = [] |
|
|
|
for idx in range(len(train_df)): |
|
file_name.append(image_dir + train_df["id"][idx]+".png") |
|
|
|
train_df["file_name"] = file_name |
|
|
|
X_dataset = [] |
|
|
|
SIZE = 256 |
|
|
|
for i in range(len(train_df)): |
|
img = keras.utils.load_img(train_df["file_name"][i], target_size=(SIZE,SIZE,3)) |
|
img = keras.utils.img_to_array(img) |
|
img = img/255. |
|
X_dataset.append(img) |
|
|
|
X = np.array(X_dataset) |
|
y = np.array(train_df["categories"].to_list()) |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20, test_size=0.3) |
|
|
|
|
|
|
|
model = Sequential() |
|
|
|
model.add(Conv2D(filters=16, kernel_size=(5, 5), activation="relu", input_shape=(SIZE,SIZE,3))) |
|
model.add(BatchNormalization()) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
model.add(Dropout(0.2)) |
|
|
|
model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
model.add(BatchNormalization()) |
|
model.add(Dropout(0.2)) |
|
|
|
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation="relu")) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
model.add(BatchNormalization()) |
|
model.add(Dropout(0.2)) |
|
|
|
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
model.add(BatchNormalization()) |
|
model.add(Dropout(0.2)) |
|
|
|
model.add(Flatten()) |
|
model.add(Dense(128, activation='relu')) |
|
model.add(Dropout(0.5)) |
|
model.add(Dense(64, activation='relu')) |
|
model.add(Dropout(0.5)) |
|
model.add(Dense(290, activation='sigmoid')) |
|
|
|
|
|
|
|
EPOCH = 1 |
|
BATCH_SIZE = 64 |
|
|
|
|
|
|
|
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) |
|
|
|
history = model.fit(X_train, y_train, epochs=EPOCH, validation_data=(X_test, y_test), batch_size=BATCH_SIZE) |
|
|
|
|
|
valid_json = json.load(open("object_detection/eval.json"))["images"] |
|
valid_df = pd.DataFrame(valid_json) |
|
|
|
predict_list = [] |
|
|
|
for i in range(len(valid_df)): |
|
|
|
img = keras.utils.load_img(image_dir + valid_df['file_name'][0], target_size=(SIZE,SIZE,3)) |
|
img = keras.utils.img_to_array(img) |
|
img = img/255. |
|
img = np.expand_dims(img, axis=0) |
|
|
|
classes = np.array(pd.read_csv("category_key.csv")["name"].to_list()) |
|
proba = model.predict(img) |
|
sorted_categories = np.argsort(proba[0])[:-11:-1] |
|
|
|
threshold = 0.5 |
|
predict = [] |
|
proba = proba[0] |
|
for i in range(len(proba)): |
|
if proba[i]>=threshold: |
|
predict.append(i+1) |
|
predict.sort() |
|
predict_list.append(predict) |
|
|
|
valid_id = [x[:-4] for x in valid_df["file_name"].to_list()] |
|
valid_osd = [1]*len(valid_id) |
|
|
|
submit_data = [[valid_id[i], predict_list[i], valid_osd[i]] for i in range(len(valid_id))] |
|
pd.DataFrame(data=submit_data, columns=["id", "categories", "osd"]).to_csv("submission.csv", index=False) |
|
|
|
|