TheoLvs's picture
Update tasks/image.py
c341b58 verified
import torch
import os
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from huggingface_hub import hf_hub_download
from fastapi import APIRouter
from datetime import datetime
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score
import random
import os
from .utils.evaluation import ImageEvaluationRequest
from .utils.emissions import tracker, clean_emissions_data, get_space_info
from dotenv import load_dotenv
load_dotenv()
router = APIRouter()
DESCRIPTION = "Convolutionnal Neural Network"
ROUTE = "/image"
def parse_boxes(annotation_string):
"""Parse multiple boxes from a single annotation string.
Each box has 5 values: class_id, x_center, y_center, width, height"""
values = [float(x) for x in annotation_string.strip().split()]
boxes = []
# Each box has 5 values
for i in range(0, len(values), 5):
if i + 5 <= len(values):
# Skip class_id (first value) and take the next 4 values
box = values[i+1:i+5]
boxes.append(box)
return boxes
def compute_iou(box1, box2):
"""Compute Intersection over Union (IoU) between two YOLO format boxes."""
# Convert YOLO format (x_center, y_center, width, height) to corners
def yolo_to_corners(box):
x_center, y_center, width, height = box
x1 = x_center - width/2
y1 = y_center - height/2
x2 = x_center + width/2
y2 = y_center + height/2
return np.array([x1, y1, x2, y2])
box1_corners = yolo_to_corners(box1)
box2_corners = yolo_to_corners(box2)
# Calculate intersection
x1 = max(box1_corners[0], box2_corners[0])
y1 = max(box1_corners[1], box2_corners[1])
x2 = min(box1_corners[2], box2_corners[2])
y2 = min(box1_corners[3], box2_corners[3])
intersection = max(0, x2 - x1) * max(0, y2 - y1)
# Calculate union
box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1])
box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1])
union = box1_area + box2_area - intersection
return intersection / (union + 1e-6)
def compute_max_iou(true_boxes, pred_box):
"""Compute maximum IoU between a predicted box and all true boxes"""
max_iou = 0
for true_box in true_boxes:
iou = compute_iou(true_box, pred_box)
max_iou = max(max_iou, iou)
return max_iou
@router.post(ROUTE, tags=["Image Task"],
description=DESCRIPTION)
async def evaluate_image(request: ImageEvaluationRequest):
"""
Evaluate image classification and object detection for forest fire smoke.
Current Model: Random Baseline
- Makes random predictions for both classification and bounding boxes
- Used as a baseline for comparison
Metrics:
- Classification accuracy: Whether an image contains smoke or not
- Object Detection accuracy: IoU (Intersection over Union) for smoke bounding boxes
"""
# Get space info
username, space_url = get_space_info()
# Load and prepare the dataset
dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
# Split dataset
test_dataset = dataset["test"]
# Start tracking emissions
tracker.start()
tracker.start_task("inference")
#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE CODE HERE
# Update the code below to replace the random baseline with your model inference
#--------------------------------------------------------------------------------------------
class ImageClassifier(nn.Module):
def __init__(self):
super(ImageClassifier, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.fc1 = nn.Linear(64 * 16 * 16, 128)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, 2) # Output layer with 2 classes (0, 1)
def forward(self, x):
x = self.pool1(self.relu1(self.conv1(x)))
x = self.pool2(self.relu2(self.conv2(x)))
x = x.view(x.size(0), -1)
x = self.relu3(self.fc1(x))
x = self.fc2(x)
return x
# class CustomDataset(Dataset, labels):
# def __init__(self, dataset, transform=None):
# self.dataset = dataset
# self.transform = transform
# self.labels = labels
# def __len__(self):
# return len(self.dataset)
# def __getitem__(self, idx):
# image = self.dataset[idx]['image']
# label = self.labels[idx]
# if self.transform:
# image = self.transform(image)
# return image, label
# Create an instance of the model
model = ImageClassifier()
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
predictions = []
true_labels = []
pred_boxes = []
true_boxes_list = [] # List of lists, each inner list contains boxes for one image
# Data Augmentation:
torch.manual_seed(0)
transform = transforms.Compose([
transforms.RandomCrop(size=(512, 512)), # Crop an image to reduce informations
transforms.Resize(size=(64, 64)), # Resize to a standard size, experiment with different sizes
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(30), # Add random rotations
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Color variations
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize for ImageNet
])
# Dataset Loader for CNN computation
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Training loop
# num_epochs = 10
# for epoch in range(num_epochs):
# for images, labels in train_loader :
# images, labels = images.to(device), labels.to(device)
# # Zero the parameter gradients
# optimizer.zero_grad()
# # Forward + backward + optimize
# outputs = model(images)
# loss = criterion(outputs, labels)
# loss.backward()
# optimizer.step()
# print(f'Epoch [{epoch + 1}/10], Loss: {loss.item():.4f}')
# Charging pre-trained model
repo_id = "AlexandreL2024/CNN-Image-Classification"
filename = "model_CNN_2Layers.pth"
# Upload file .pth from Hugging Face
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
# Charger le modèle avec torch.load()
model = ImageClassifier()
model = model.load_state_dict(torch.load(model_path))
# Evaluation loop
model.eval() # Set the model to evaluation mode
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
# Apply sigmoid to get probabilities
probabilities = torch.sigmoid(outputs)
#Get the predicted class with maximum probability
_, prediction = torch.max(probabilities, 1)
predictions.extend(prediction.cpu().numpy())
for example in test_dataset:
# Parse true annotation (YOLO format: class_id x_center y_center width height)
annotation = example.get("annotations", "").strip()
has_smoke = len(annotation) > 0
true_labels.append(int(has_smoke))
# If there's a true box, parse it and make random box prediction
if has_smoke:
# Parse all true boxes from the annotation
image_true_boxes = parse_boxes(annotation)
true_boxes_list.append(image_true_boxes)
# For baseline, make one random box prediction per image
# In a real model, you might want to predict multiple boxes
random_box = [
random.random(), # x_center
random.random(), # y_center
random.random() * 0.5, # width (max 0.5)
random.random() * 0.5 # height (max 0.5)
]
pred_boxes.append(random_box)
#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE STOPS HERE
#--------------------------------------------------------------------------------------------
# Stop tracking emissions
emissions_data = tracker.stop_task()
# Calculate classification metrics
classification_accuracy = accuracy_score(true_labels, predictions)
classification_precision = precision_score(true_labels, predictions)
classification_recall = recall_score(true_labels, predictions)
# Calculate mean IoU for object detection (only for images with smoke)
# For each image, we compute the max IoU between the predicted box and all true boxes
ious = []
for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):
max_iou = compute_max_iou(true_boxes, pred_box)
ious.append(max_iou)
mean_iou = float(np.mean(ious)) if ious else 0.0
# Prepare results dictionary
results = {
"username": username,
"space_url": space_url,
"submission_timestamp": datetime.now().isoformat(),
"model_description": DESCRIPTION,
"classification_accuracy": float(classification_accuracy),
"classification_precision": float(classification_precision),
"classification_recall": float(classification_recall),
"mean_iou": mean_iou,
"energy_consumed_wh": emissions_data.energy_consumed * 1000,
"emissions_gco2eq": emissions_data.emissions * 1000,
"emissions_data": clean_emissions_data(emissions_data),
"api_route": ROUTE,
"dataset_config": {
"dataset_name": request.dataset_name,
"test_size": request.test_size,
"test_seed": request.test_seed
}
}
return results