Spaces:
Sleeping
Sleeping
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
from torchvision import transforms | |
from torch.utils.data import DataLoader, Dataset | |
from huggingface_hub import hf_hub_download | |
from fastapi import APIRouter | |
from datetime import datetime | |
from datasets import load_dataset | |
import numpy as np | |
from sklearn.metrics import accuracy_score, precision_score, recall_score | |
import random | |
import os | |
from .utils.evaluation import ImageEvaluationRequest | |
from .utils.emissions import tracker, clean_emissions_data, get_space_info | |
from dotenv import load_dotenv | |
load_dotenv() | |
router = APIRouter() | |
DESCRIPTION = "Convolutionnal Neural Network" | |
ROUTE = "/image" | |
def parse_boxes(annotation_string): | |
"""Parse multiple boxes from a single annotation string. | |
Each box has 5 values: class_id, x_center, y_center, width, height""" | |
values = [float(x) for x in annotation_string.strip().split()] | |
boxes = [] | |
# Each box has 5 values | |
for i in range(0, len(values), 5): | |
if i + 5 <= len(values): | |
# Skip class_id (first value) and take the next 4 values | |
box = values[i+1:i+5] | |
boxes.append(box) | |
return boxes | |
def compute_iou(box1, box2): | |
"""Compute Intersection over Union (IoU) between two YOLO format boxes.""" | |
# Convert YOLO format (x_center, y_center, width, height) to corners | |
def yolo_to_corners(box): | |
x_center, y_center, width, height = box | |
x1 = x_center - width/2 | |
y1 = y_center - height/2 | |
x2 = x_center + width/2 | |
y2 = y_center + height/2 | |
return np.array([x1, y1, x2, y2]) | |
box1_corners = yolo_to_corners(box1) | |
box2_corners = yolo_to_corners(box2) | |
# Calculate intersection | |
x1 = max(box1_corners[0], box2_corners[0]) | |
y1 = max(box1_corners[1], box2_corners[1]) | |
x2 = min(box1_corners[2], box2_corners[2]) | |
y2 = min(box1_corners[3], box2_corners[3]) | |
intersection = max(0, x2 - x1) * max(0, y2 - y1) | |
# Calculate union | |
box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1]) | |
box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1]) | |
union = box1_area + box2_area - intersection | |
return intersection / (union + 1e-6) | |
def compute_max_iou(true_boxes, pred_box): | |
"""Compute maximum IoU between a predicted box and all true boxes""" | |
max_iou = 0 | |
for true_box in true_boxes: | |
iou = compute_iou(true_box, pred_box) | |
max_iou = max(max_iou, iou) | |
return max_iou | |
async def evaluate_image(request: ImageEvaluationRequest): | |
""" | |
Evaluate image classification and object detection for forest fire smoke. | |
Current Model: Random Baseline | |
- Makes random predictions for both classification and bounding boxes | |
- Used as a baseline for comparison | |
Metrics: | |
- Classification accuracy: Whether an image contains smoke or not | |
- Object Detection accuracy: IoU (Intersection over Union) for smoke bounding boxes | |
""" | |
# Get space info | |
username, space_url = get_space_info() | |
# Load and prepare the dataset | |
dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN")) | |
# Split dataset | |
train_test = dataset["train"] | |
test_dataset = dataset["val"] | |
# Start tracking emissions | |
tracker.start() | |
tracker.start_task("inference") | |
#-------------------------------------------------------------------------------------------- | |
# YOUR MODEL INFERENCE CODE HERE | |
# Update the code below to replace the random baseline with your model inference | |
#-------------------------------------------------------------------------------------------- | |
class ImageClassifier(nn.Module): | |
def __init__(self): | |
super(ImageClassifier, self).__init__() | |
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1) | |
self.relu1 = nn.ReLU() | |
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) | |
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) | |
self.relu2 = nn.ReLU() | |
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) | |
self.fc1 = nn.Linear(64 * 16 * 16, 128) | |
self.relu3 = nn.ReLU() | |
self.fc2 = nn.Linear(128, 2) # Output layer with 2 classes (0, 1) | |
def forward(self, x): | |
x = self.pool1(self.relu1(self.conv1(x))) | |
x = self.pool2(self.relu2(self.conv2(x))) | |
x = x.view(x.size(0), -1) | |
x = self.relu3(self.fc1(x)) | |
x = self.fc2(x) | |
return x | |
class CustomDataset(Dataset, labels): | |
def __init__(self, dataset, transform=None): | |
self.dataset = dataset | |
self.transform = transform | |
self.labels = labels | |
def __len__(self): | |
return len(self.dataset) | |
def __getitem__(self, idx): | |
image = self.dataset[idx]['image'] | |
label = self.labels[idx] | |
if self.transform: | |
image = self.transform(image) | |
return image, label | |
# Create an instance of the model | |
model = ImageClassifier() | |
# Define loss function and optimizer | |
criterion = nn.CrossEntropyLoss() | |
optimizer = optim.SGD(model.parameters(), lr=0.1) | |
predictions = [] | |
true_labels = [] | |
pred_boxes = [] | |
true_boxes_list = [] # List of lists, each inner list contains boxes for one image | |
# Data Augmentation: | |
torch.manual_seed(0) | |
transform = transforms.Compose([ | |
transforms.RandomCrop(size=(512, 512)), # Crop an image to reduce informations | |
transforms.Resize(size=(64, 64)), # Resize to a standard size, experiment with different sizes | |
transforms.RandomHorizontalFlip(), | |
transforms.RandomVerticalFlip(), | |
transforms.RandomRotation(30), # Add random rotations | |
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), # Color variations | |
transforms.ToTensor(), | |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize for ImageNet | |
]) | |
# Dataset Loader for CNN computation | |
train_loader = DataLoader(train_test, batch_size=64, shuffle=False) | |
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
# Training loop | |
# num_epochs = 10 | |
# for epoch in range(num_epochs): | |
# for images, labels in train_loader : | |
# images, labels = images.to(device), labels.to(device) | |
# # Zero the parameter gradients | |
# optimizer.zero_grad() | |
# # Forward + backward + optimize | |
# outputs = model(images) | |
# loss = criterion(outputs, labels) | |
# loss.backward() | |
# optimizer.step() | |
# print(f'Epoch [{epoch + 1}/10], Loss: {loss.item():.4f}') | |
# Charging pre-trained model | |
repo_id = "AlexandreL2024/CNN-Image-Classification" | |
filename = "model_CNN_2Layers.pth" | |
# Upload file .pth from Hugging Face | |
model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
# Charger le modèle avec torch.load() | |
model = ImageClassifier() | |
model = model.load_state_dict(torch.load(model_path)) | |
# Evaluation loop | |
model.eval() # Set the model to evaluation mode | |
with torch.no_grad(): | |
for images, labels in test_loader: | |
images, labels = images.to(device), labels.to(device) | |
outputs = model(images) | |
# Apply sigmoid to get probabilities | |
probabilities = torch.sigmoid(outputs) | |
#Get the predicted class with maximum probability | |
_, prediction = torch.max(probabilities, 1) | |
predictions.extend(prediction.cpu().numpy()) | |
for example in test_dataset: | |
# Parse true annotation (YOLO format: class_id x_center y_center width height) | |
annotation = example.get("annotations", "").strip() | |
has_smoke = len(annotation) > 0 | |
true_labels.append(int(has_smoke)) | |
# If there's a true box, parse it and make random box prediction | |
if has_smoke: | |
# Parse all true boxes from the annotation | |
image_true_boxes = parse_boxes(annotation) | |
true_boxes_list.append(image_true_boxes) | |
# For baseline, make one random box prediction per image | |
# In a real model, you might want to predict multiple boxes | |
random_box = [ | |
random.random(), # x_center | |
random.random(), # y_center | |
random.random() * 0.5, # width (max 0.5) | |
random.random() * 0.5 # height (max 0.5) | |
] | |
pred_boxes.append(random_box) | |
#-------------------------------------------------------------------------------------------- | |
# YOUR MODEL INFERENCE STOPS HERE | |
#-------------------------------------------------------------------------------------------- | |
# Stop tracking emissions | |
emissions_data = tracker.stop_task() | |
# Calculate classification metrics | |
classification_accuracy = accuracy_score(true_labels, predictions) | |
classification_precision = precision_score(true_labels, predictions) | |
classification_recall = recall_score(true_labels, predictions) | |
# Calculate mean IoU for object detection (only for images with smoke) | |
# For each image, we compute the max IoU between the predicted box and all true boxes | |
ious = [] | |
for true_boxes, pred_box in zip(true_boxes_list, pred_boxes): | |
max_iou = compute_max_iou(true_boxes, pred_box) | |
ious.append(max_iou) | |
mean_iou = float(np.mean(ious)) if ious else 0.0 | |
# Prepare results dictionary | |
results = { | |
"username": username, | |
"space_url": space_url, | |
"submission_timestamp": datetime.now().isoformat(), | |
"model_description": DESCRIPTION, | |
"classification_accuracy": float(classification_accuracy), | |
"classification_precision": float(classification_precision), | |
"classification_recall": float(classification_recall), | |
"mean_iou": mean_iou, | |
"energy_consumed_wh": emissions_data.energy_consumed * 1000, | |
"emissions_gco2eq": emissions_data.emissions * 1000, | |
"emissions_data": clean_emissions_data(emissions_data), | |
"api_route": ROUTE, | |
"dataset_config": { | |
"dataset_name": request.dataset_name, | |
"test_size": request.test_size, | |
"test_seed": request.test_seed | |
} | |
} | |
return results |