In [38]:
pip install transformers datasets evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [39]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [40]:
from datasets import load_dataset

emotions_df = load_dataset("FastJobs/Visual_Emotional_Analysis", split="train[:800]") 



In [41]:
len(emotions_df)

800

In [42]:
emotions_df = emotions_df.train_test_split(test_size=0.2)

In [43]:
# size of the train dataset
len(emotions_df['train'])

640

In [44]:
# size of the test dataset
len(emotions_df['test'])

160

In [45]:
# create 2 dictionary 
# dic1: maps the label name to an integer
# dic2: maps the label id(integer) to a label name
labels = emotions_df["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label


In [46]:
label2id

{'anger': '0',
 'contempt': '1',
 'disgust': '2',
 'fear': '3',
 'happy': '4',
 'neutral': '5',
 'sad': '6',
 'surprise': '7'}

In [47]:
id2label

{'0': 'anger',
 '1': 'contempt',
 '2': 'disgust',
 '3': 'fear',
 '4': 'happy',
 '5': 'neutral',
 '6': 'sad',
 '7': 'surprise'}

In [48]:
from transformers import AutoImageProcessor

checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [49]:
import numpy as np
import tensorflow as tf
from PIL import Image

# convert image to a tensor
def convert_to_tf_tensor(image: Image):
    np_image = np.array(image)
    tf_image = tf.convert_to_tensor(np_image)
    # `expand_dims()` is used to add a batch dimension since
    # the TF augmentation layers operates on batched inputs.
    return tf.expand_dims(tf_image, 0)


def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    images = [
        train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
    ]
    example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
    return example_batch


def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    images = [
        val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
    ]
    example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
    return example_batch

In [50]:
# apply transform to the training and testing dataset

emotions_df["train"].set_transform(preprocess_train)
emotions_df["test"].set_transform(preprocess_val)

In [51]:
#create a batch of examples using DefaultDataCollator
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator(return_tensors="tf")

In [52]:
# evaluate accuracy
import evaluate

accuracy = evaluate.load("accuracy")

In [53]:
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

TRAIN

In [54]:
from transformers import create_optimizer

batch_size = 16
num_epochs = 20
num_train_steps = len(emotions_df["train"]) * num_epochs
learning_rate = 3e-4
weight_decay_rate = 0.01

optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=0,
)

In [55]:
from transformers import TFAutoModelForImageClassification

model = TFAutoModelForImageClassification.from_pretrained(
    checkpoint,
    id2label=id2label,
    label2id=label2id,
)

Some layers from the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing TFViTForImageClassification: ['vit/pooler/dense/kernel:0', 'vit/pooler/dense/bias:0']
- This IS expected if you are initializing TFViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [56]:
# avoiding overfitting 

from tensorflow import keras
from tensorflow.keras import layers

size = (image_processor.size["height"], image_processor.size["width"])

# Transformations for the training set
# data augmentation to make the model more robust and to avoid overfitting
train_data_augmentation = keras.Sequential(
    [
        layers.RandomCrop(size[0], size[1]),
        layers.Rescaling(scale=1.0 / 127.5, offset=-1),
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(factor=0.02),
        layers.RandomZoom(height_factor=0.2, width_factor=0.2),
    ],
    name="train_data_augmentation",
)

# Transformations for the validation set
val_data_augmentation = keras.Sequential(
    [
        layers.CenterCrop(size[0], size[1]),
        layers.Rescaling(scale=1.0 / 127.5, offset=-1),
    ],
    name="val_data_augmentation",
)

In [57]:
# converting our train dataset to tensor dataset (tf.data.Dataset)
tf_train_dataset = emotions_df["train"].to_tf_dataset(
    columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
)

# converting our test dataset to tensor dataset (tf.data.Dataset)
tf_eval_dataset = emotions_df["test"].to_tf_dataset(
    columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
)

In [58]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy

loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss)

In [59]:
from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback

metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
push_to_hub_callback = PushToHubCallback(
    output_dir="emotions_classifier",
    tokenizer=image_processor,
    save_strategy="no",
)
callbacks = [metric_callback, push_to_hub_callback]

/content/emotions_classifier is already a clone of https://huggingface.co/CynthiaCR/emotions_classifier. Make sure you pull the latest changes with `repo.git_pull()`.


In [60]:
model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Upload file tf_model.h5:   0%|          | 1.00/328M [00:00<?, ?B/s]

To https://huggingface.co/CynthiaCR/emotions_classifier
   6794f2e..9552b39  main -> main

   6794f2e..9552b39  main -> main



<keras.callbacks.History at 0x7f131a6aae60>

Prediction

In [61]:
ds = load_dataset("FastJobs/Visual_Emotional_Analysis", split="train[:10]")
ds



Dataset({
    features: ['image', 'label'],
    num_rows: 10
})

In [62]:
image = ds["image"][0]

In [63]:
from transformers import pipeline

classifier = pipeline("image-classification", model="CynthiaCR/emotions_classifier")
classifier(image)

Downloading tf_model.h5:   0%|          | 0.00/344M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at CynthiaCR/emotions_classifier.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


[{'score': 0.32123512029647827, 'label': 'fear'},
 {'score': 0.31210750341415405, 'label': 'sad'},
 {'score': 0.1644315868616104, 'label': 'anger'},
 {'score': 0.10217338800430298, 'label': 'disgust'},
 {'score': 0.04358164221048355, 'label': 'contempt'}]

In [64]:
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained("CynthiaCR/emotions_classifier")
inputs = image_processor(image, return_tensors="tf")

In [65]:
from transformers import TFAutoModelForImageClassification

model = TFAutoModelForImageClassification.from_pretrained("CynthiaCR/emotions_classifier")
logits = model(**inputs).logits

All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at CynthiaCR/emotions_classifier.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


In [66]:
predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
model.config.id2label[predicted_class_id]

'fear'