submission-template

Sleeping

File size: 10,761 Bytes

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from huggingface_hub import hf_hub_download

from fastapi import APIRouter
from datetime import datetime
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
import random
import os

from .utils.evaluation import ImageEvaluationRequest
from .utils.emissions import tracker, clean_emissions_data, get_space_info

from dotenv import load_dotenv
load_dotenv()

router = APIRouter()

DESCRIPTION = "Convolutionnal Neural Network"
ROUTE = "/image"

def parse_boxes(annotation_string):
    """Parse multiple boxes from a single annotation string.
    Each box has 5 values: class_id, x_center, y_center, width, height"""
    values = [float(x) for x in annotation_string.strip().split()]
    boxes = []
    # Each box has 5 values
    for i in range(0, len(values), 5):
        if i + 5 <= len(values):
            # Skip class_id (first value) and take the next 4 values
            box = values[i+1:i+5]
            boxes.append(box)
    return boxes

def compute_iou(box1, box2):
    """Compute Intersection over Union (IoU) between two YOLO format boxes."""
    # Convert YOLO format (x_center, y_center, width, height) to corners
    def yolo_to_corners(box):
        x_center, y_center, width, height = box
        x1 = x_center - width/2
        y1 = y_center - height/2
        x2 = x_center + width/2
        y2 = y_center + height/2
        return np.array([x1, y1, x2, y2])
    
    box1_corners = yolo_to_corners(box1)
    box2_corners = yolo_to_corners(box2)
    
    # Calculate intersection
    x1 = max(box1_corners[0], box2_corners[0])
    y1 = max(box1_corners[1], box2_corners[1])
    x2 = min(box1_corners[2], box2_corners[2])
    y2 = min(box1_corners[3], box2_corners[3])
    
    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    
    # Calculate union
    box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1])
    box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1])
    union = box1_area + box2_area - intersection
    
    return intersection / (union + 1e-6)

def compute_max_iou(true_boxes, pred_box):
    """Compute maximum IoU between a predicted box and all true boxes"""
    max_iou = 0
    for true_box in true_boxes:
        iou = compute_iou(true_box, pred_box)
        max_iou = max(max_iou, iou)
    return max_iou

@router.post(ROUTE, tags=["Image Task"],
             description=DESCRIPTION)
async def evaluate_image(request: ImageEvaluationRequest):
    """
    Evaluate image classification and object detection for forest fire smoke.
    
    Current Model: Random Baseline
    - Makes random predictions for both classification and bounding boxes
    - Used as a baseline for comparison
    
    Metrics:
    - Classification accuracy: Whether an image contains smoke or not
    - Object Detection accuracy: IoU (Intersection over Union) for smoke bounding boxes
    """
    # Get space info
    username, space_url = get_space_info()
    
    # Load and prepare the dataset
    dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
    
    # Split dataset
    train_test = dataset["train"]
    test_dataset = dataset["val"]
    
    # Start tracking emissions
    tracker.start()
    tracker.start_task("inference")
    
    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE CODE HERE
    # Update the code below to replace the random baseline with your model inference
    #--------------------------------------------------------------------------------------------   
    
    class ImageClassifier(nn.Module):
        def __init__(self):
            super(ImageClassifier, self).__init__()
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
            self.relu1 = nn.ReLU()
            self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
            self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
            self.relu2 = nn.ReLU()
            self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
            self.fc1 = nn.Linear(64 * 16 * 16, 128) 
            self.relu3 = nn.ReLU()
            self.fc2 = nn.Linear(128, 2)  # Output layer with 2 classes (0, 1)

        def forward(self, x):
            x = self.pool1(self.relu1(self.conv1(x)))
            x = self.pool2(self.relu2(self.conv2(x)))
            x = x.view(x.size(0), -1) 
            x = self.relu3(self.fc1(x))
            x = self.fc2(x)
            return x

    class CustomDataset(Dataset, labels):
        def __init__(self, dataset, transform=None):
            self.dataset = dataset
            self.transform = transform
            self.labels = labels

        def __len__(self):
            return len(self.dataset)

        def __getitem__(self, idx):
            image = self.dataset[idx]['image']
            label = self.labels[idx] 

            if self.transform:
                image = self.transform(image)

            return image, label

    # Create an instance of the model
    model = ImageClassifier()

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1)

    predictions = []
    true_labels = []
    pred_boxes = []
    true_boxes_list = []  # List of lists, each inner list contains boxes for one image
    
    # Data Augmentation:
    torch.manual_seed(0)

    transform = transforms.Compose([
        transforms.RandomCrop(size=(512, 512)),  # Crop an image to reduce informations
        transforms.Resize(size=(64, 64)),  # Resize to a standard size, experiment with different sizes
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(30),  # Add random rotations
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Color variations
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for ImageNet
    ])

    # Dataset Loader for CNN computation
    train_loader = DataLoader(train_test, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)


    # Training loop 
    # num_epochs = 10 
    # for epoch in range(num_epochs):
    #     for images, labels in train_loader :
    #         images, labels = images.to(device), labels.to(device)
    #         # Zero the parameter gradients
    #         optimizer.zero_grad()

    #         # Forward + backward + optimize
    #         outputs = model(images)
    #         loss = criterion(outputs, labels)
    #         loss.backward()
    #         optimizer.step()
    #     print(f'Epoch [{epoch + 1}/10], Loss: {loss.item():.4f}')

    # Charging pre-trained model
    repo_id = "AlexandreL2024/CNN-Image-Classification"
    filename = "model_CNN_2Layers.pth"
    
    # Upload file .pth from Hugging Face
    model_path = hf_hub_download(repo_id=repo_id, filename=filename)
    
    # Charger le modèle avec torch.load()
    model = ImageClassifier()
    model = model.load_state_dict(torch.load(model_path))

    # Evaluation loop
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            # Apply sigmoid to get probabilities
            probabilities = torch.sigmoid(outputs)
            #Get the predicted class with maximum probability
            _, prediction = torch.max(probabilities, 1)
            predictions.extend(prediction.cpu().numpy())

    for example in test_dataset:
        # Parse true annotation (YOLO format: class_id x_center y_center width height)
        annotation = example.get("annotations", "").strip()
        has_smoke = len(annotation) > 0
        true_labels.append(int(has_smoke))
              
        # If there's a true box, parse it and make random box prediction
        if has_smoke:
            # Parse all true boxes from the annotation
            image_true_boxes = parse_boxes(annotation)
            true_boxes_list.append(image_true_boxes)
            
            # For baseline, make one random box prediction per image
            # In a real model, you might want to predict multiple boxes
            random_box = [
                random.random(),  # x_center
                random.random(),  # y_center
                random.random() * 0.5,  # width (max 0.5)
                random.random() * 0.5   # height (max 0.5)
            ]
            pred_boxes.append(random_box)
    
    #--------------------------------------------------------------------------------------------
    # YOUR MODEL INFERENCE STOPS HERE
    #--------------------------------------------------------------------------------------------   
    
    # Stop tracking emissions
    emissions_data = tracker.stop_task()
    
    # Calculate classification metrics
    classification_accuracy = accuracy_score(true_labels, predictions)
    classification_precision = precision_score(true_labels, predictions)
    classification_recall = recall_score(true_labels, predictions)
    
    # Calculate mean IoU for object detection (only for images with smoke)
    # For each image, we compute the max IoU between the predicted box and all true boxes
    ious = []
    for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):
        max_iou = compute_max_iou(true_boxes, pred_box)
        ious.append(max_iou)
    
    mean_iou = float(np.mean(ious)) if ious else 0.0
    
    # Prepare results dictionary
    results = {
        "username": username,
        "space_url": space_url,
        "submission_timestamp": datetime.now().isoformat(),
        "model_description": DESCRIPTION,
        "classification_accuracy": float(classification_accuracy),
        "classification_precision": float(classification_precision),
        "classification_recall": float(classification_recall),
        "mean_iou": mean_iou,
        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
        "emissions_gco2eq": emissions_data.emissions * 1000,
        "emissions_data": clean_emissions_data(emissions_data),
        "api_route": ROUTE,
        "dataset_config": {
            "dataset_name": request.dataset_name,
            "test_size": request.test_size,
            "test_seed": request.test_seed
        }
    }
    
    return results