Remember to change the test and model Path!
Since I'm using Embedding to encode headlines to vector, it takes 10+ min. to encode information for test set which I cannot do it on my end since I do not have access to hiddne test set! 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler
from tqdm import tqdm
import numpy as np
import random
import os
import copy
from torch.utils.data import TensorDataset
import pandas as pd

In [2]:
class LabelSmoothingBCELoss(nn.Module):
    def __init__(self, smoothing=0.1):
        """
        Label Smoothing Binary Cross Entropy Loss
        
        Args:
            smoothing (float): Amount of label smoothing to apply
        """
        super(LabelSmoothingBCELoss, self).__init__()
        self.smoothing = smoothing
    
    def forward(self, predictions, targets):
        """
        Compute label-smoothed binary cross entropy loss
        
        Args:
            predictions (torch.Tensor): Model predictions
            targets (torch.Tensor): Binary labels
        
        Returns:
            torch.Tensor: Smoothed loss
        """
        # Apply label smoothing
        smooth_targets = targets * (1 - self.smoothing) + 0.5 * self.smoothing
        
        # Standard Binary Cross Entropy Loss
        loss = nn.functional.binary_cross_entropy(predictions, smooth_targets)
        
        return loss

class EarlyStoppingCallback:
    def __init__(self, patience=5, min_delta=0.001):
        """
        Early stopping mechanism
        
        Args:
            patience (int): Number of epochs to wait for improvement
            min_delta (float): Minimum change to qualify as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False
        self.best_model_state = None
    
    def __call__(self, val_loss, model):
        """
        Check if training should stop
        
        Args:
            val_loss (float): Current validation loss
            model (nn.Module): Current model state
        
        Returns:
            bool: Whether to stop training
        """
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            # Save the best model state
            self.best_model_state = copy.deepcopy(model.state_dict())
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        
        return self.early_stop

class EnsembleMLPClassifier(nn.Module):
    def __init__(self, 
                 input_dim=1024,  # BGE embedding dimension
                 hidden_layers=None,
                 dropout_rate=0.2,
                 activation=nn.ReLU(),  # Allow passing activation functions dynamically
                 device=None):
        super(EnsembleMLPClassifier, self).__init__()
        
        # Default configuration if not provided
        if hidden_layers is None:
            hidden_layers = [512, 256, 128]
        
        # Set device (GPU if available, else CPU)
        self.device = device or torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Store initialization parameters
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        self.dropout_rate = dropout_rate
        self.activation = activation
        
        # Add linear gate mechanism
        self.gate = nn.Linear(input_dim, input_dim, bias=False)
        
        # Create layers dynamically based on hidden_layers specification
        layers = []
        prev_dim = input_dim
        for hidden_dim in hidden_layers:
            # Dense Layer with dynamic activation and BatchNorm
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                activation,
                nn.Dropout(dropout_rate)
            ])
            prev_dim = hidden_dim
        
        # Final output layer for binary classification
        layers.append(nn.Linear(prev_dim, 1))
        layers.append(nn.Sigmoid())
        
        # Create the model and move to device
        self.model = nn.Sequential(*layers)
        self.to(self.device)

    def forward(self, x):
        """Forward pass through the network"""
        # Apply gating mechanism
        x = self.gate(x) * x
        return self.model(x)

class EnsembleClassifier:
    def __init__(self, num_models=5, label_smoothing=0.1):
        self.models = self._create_diverse_models(num_models)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.label_smoothing = label_smoothing
        self.model_weights = None  
        
    def _create_diverse_models(self, num_models):
        models = []
    
        # Predefined configurations for consistency across runs
        architectures = [
            {'hidden_layers': [512, 256, 128], 'dropout_rate': 0.2, 'activation': nn.ReLU()},
            {'hidden_layers': [1024, 512], 'dropout_rate': 0.3, 'activation': nn.LeakyReLU()},
            {'hidden_layers': [256, 128, 64], 'dropout_rate': 0.1, 'activation': nn.GELU()},
            {'hidden_layers': [512, 128], 'dropout_rate': 0.25, 'activation': nn.SELU()},
            {'hidden_layers': [256, 128], 'dropout_rate': 0.15, 'activation': nn.Tanh()}
        ]
    
        # Optimizer strategies
        optimizers = [optim.Adam, optim.AdamW, optim.SGD]
    
        for i in range(num_models):
            # Use predefined architectures in a consistent order
            config = architectures[i % len(architectures)]
            optimizer_fn = optimizers[i % len(optimizers)]
    
            model = EnsembleMLPClassifier(
                input_dim=1024,
                hidden_layers=config['hidden_layers'],
                dropout_rate=config['dropout_rate'],
                activation=config['activation']
            )
    
            # Custom weight initialization
            def init_weights(m):
                if isinstance(m, nn.Linear):
                    init_methods = [
                        nn.init.xavier_uniform_,
                        nn.init.kaiming_normal_,
                        nn.init.orthogonal_
                    ]
                    init_method = init_methods[i % len(init_methods)]  # Consistent initialization
                    init_method(m.weight)
                    if m.bias is not None:
                        nn.init.zeros_(m.bias)
    
            model.model.apply(init_weights)
    
            # Attach optimizer to model instance for flexibility
            model.optimizer_fn = optimizer_fn
    
            # Add L2 regularization to the model (Weight Decay)
            model.regularization = {
                'weight_decay': 1e-5  # Example regularization value
            }
    
            models.append(model)
    
        return models
    
    def train(self, train_dataset, batch_size=32, num_epochs=20):
        for model_idx, model in enumerate(tqdm(self.models, desc="Training Models", position=0)):
            print(f"Starting training for Model {model_idx + 1}/{len(self.models)}")
            
            # Randomly split 80% for training and 20% for validation
            total_size = len(train_dataset)
            train_size = int(0.8 * total_size)
            val_size = total_size - train_size
    
            train_subset, val_subset = random_split(train_dataset, [train_size, val_size])
    
            # Create data loaders for training and validation
            train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
    
            # Optimizer with learning rate scheduler
            optimizer = optim.AdamW(model.parameters(), lr=1e-3)
            scheduler = optim.lr_scheduler.CosineAnnealingLR(
                optimizer, 
                T_max=num_epochs, 
                eta_min=1e-5
            )
    
            # Label Smoothing Loss
            criterion = LabelSmoothingBCELoss(smoothing=self.label_smoothing)
    
            # Early stopping
            early_stopping = EarlyStoppingCallback(patience=4, min_delta=0.001)
    
            model.train()
            epoch_progress = tqdm(range(num_epochs), desc=f"Model {model_idx} Training", position=1, leave=False)
            
            best_val_loss = float('inf')
            for epoch in epoch_progress:
                total_loss = 0
    
                # Training phase
                for batch in train_loader:
                    inputs, labels = batch
                    inputs, labels = inputs.to(model.device), labels.to(model.device)
    
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels.float().unsqueeze(1))
                    loss.backward()
                    optimizer.step()
    
                    total_loss += loss.item()
                avg_train_loss = total_loss / len(train_loader)
    
                # Validation phase
                model.eval()
                val_loss = 0
                with torch.no_grad():
                    for val_batch in val_loader:
                        val_inputs, val_labels = val_batch
                        val_inputs, val_labels = val_inputs.to(model.device), val_labels.to(model.device)
                        val_outputs = model(val_inputs)
                        val_loss += criterion(val_outputs, val_labels.float().unsqueeze(1)).item()
    
                avg_val_loss = val_loss / len(val_loader)
                epoch_progress.set_postfix({
                    'train_loss': avg_train_loss,
                    'val_loss': avg_val_loss
                })
    
                # Early stopping check
                if early_stopping(avg_val_loss, model):
                    if early_stopping.best_model_state:
                        model.load_state_dict(early_stopping.best_model_state)
                    print(f"Early stopping triggered for Model {model_idx}")
                    break
    
                # Learning rate adjustment
                scheduler.step()
    
                # Reset to training mode
                model.train()
    
            # Store model's final state after training
            model.eval()
            
    def compute_test_weights(self, test_loader):
        """
        Compute model weights based on test accuracy while emphasizing distinctions.
        """
        model_accuracies = []
        for model_idx, model in enumerate(self.models):
            correct = 0
            total = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(model.device), labels.to(model.device)
                    outputs = model(inputs)
                    preds = (outputs > 0.5).float()
                    correct += (preds == labels).sum().item()
                    total += labels.size(0)
            accuracy = correct / total
            model_accuracies.append(accuracy)
    
        # Apply a power transformation for distinction
        accuracies = np.array(model_accuracies)
        print(f"Raw model accuracies: {accuracies}")
        
        # Use power scaling to exaggerate differences (e.g., square the accuracies)
        power_scaling_factor = 2  # Choose 2 for squaring, can experiment with higher values
        scaled_accuracies = accuracies ** power_scaling_factor
        
        # Smooth the accuracies slightly to avoid over-reliance on any single model
        smoothed_accuracies = scaled_accuracies * (1 - 0.1) + 0.1 * np.mean(scaled_accuracies)
        
        # Normalize weights so they sum to 1
        weights = smoothed_accuracies / smoothed_accuracies.sum()
    
        # Store model weights
        self.model_weights = torch.tensor(weights, dtype=torch.float32).to(self.device)
        print(f"Model weights after scaling: {self.model_weights}")


    def predict(self, test_loader, confidence_threshold=0.5, return_raw_scores=True):
        """
        Prediction with confidence-weighted voting, optionally returning raw scores.
        """
        if self.model_weights is None:
            raise ValueError("Model weights not computed. Call compute_test_weights first.")
    
        all_predictions = []
        for model_idx, model in enumerate(self.models):
            model.eval()
            model_preds = []
            with torch.no_grad():
                for batch in test_loader:
                    inputs, _ = batch
                    inputs = inputs.to(model.device)
                    outputs = model(inputs)
                    model_preds.append(outputs)
    
            # Concatenate predictions for this model
            all_predictions.append(torch.cat(model_preds))
    
        # Stack predictions and compute weighted average
        stacked_preds = torch.stack(all_predictions, dim=1).squeeze(-1)
        weighted_preds = (stacked_preds * self.model_weights.view(1, -1)).sum(dim=1)
    
        # Final prediction with thresholding
        final_preds = (weighted_preds > confidence_threshold).float()
    
        # Optionally return raw probabilities for debugging
        if return_raw_scores:
            return final_preds, weighted_preds.cpu().numpy()
    
        return final_preds


    def save_models(self, save_dir='ensemble_models/model_test_4'):
        """
        Save ensemble model weights and model weights with progress tracking
        """
        os.makedirs(save_dir, exist_ok=True)

        save_data = {
            'models': {},
            'model_weights': self.model_weights.cpu().numpy() if self.model_weights is not None else None
        }

        for i, model in tqdm(enumerate(self.models), desc="Saving Models", total=len(self.models)):
            save_data['models'][i] = model.state_dict()

        torch.save(save_data, os.path.join(save_dir, 'ensemble_checkpoint.pth'))

    def load_models(self, save_dir='ensemble_models/model_test_4'):
        """
        Load ensemble model weights and model weights with progress tracking
        """
        checkpoint_path = os.path.join(save_dir, 'ensemble_checkpoint.pth')

        save_data = torch.load(checkpoint_path)

        for i, model in tqdm(enumerate(self.models), desc="Loading Models", total=len(self.models)):
            model.load_state_dict(save_data['models'][i])
            model.eval()  # Set to evaluation mode

        if save_data['model_weights'] is not None:
            self.model_weights = torch.tensor(save_data['model_weights'], dtype=torch.float32).to(self.device)
            
    def evaluate(self, test_loader):
        """
        Evaluate ensemble performance with weighted voting, supporting both CPU and GPU.
        """
        # Collect ground truth labels
        all_labels = torch.cat([labels for _, labels in test_loader], dim=0).to(self.device)
    
        # Get predictions for the entire test set
        test_preds = self.predict(test_loader, return_raw_scores=True)
    
        # Ensure predictions and labels are on the same device
        all_labels = all_labels.cpu().numpy().ravel()  # Flatten to 1D
        test_preds, raw_probs = test_preds
        test_preds = test_preds.cpu().numpy().ravel()  # Flatten to 1D
    
        # Print debug information
        # print("Ground truth labels (all_labels):", all_labels)
        # print("Predicted classes (test_preds):", test_preds)
        # print("Raw probabilities (raw_probs):", raw_probs)  
    
        # Calculate metrics
        accuracy = np.mean(test_preds == all_labels)
        precision = precision_score(all_labels, test_preds, zero_division=0)
        recall = recall_score(all_labels, test_preds, zero_division=0)
    
        return {
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall
        }

In [3]:
!pip install FlagEmbedding
from FlagEmbedding import BGEM3FlagModel
model = BGEM3FlagModel('BAAI/bge-m3')

# Remember to change the test path
test_data_path = "/home/jovyan/work/test_data_random_subset.csv"

data = data = pd.read_csv(test_data_path)
titles = data['title'].tolist()
labels = data['labels'].tolist()

batch_size = 32
embeddings = []

print('Encoding titles...')
for i in range(0, len(titles), batch_size):
    batch = titles[i:i + batch_size]
    batch_embeddings = model.encode(batch, batch_size=batch_size, max_length=512)['dense_vecs']
    embeddings.extend(batch_embeddings)
    print(f"Processed {i + len(batch)}/{len(titles)} titles")

embeddings_df = pd.DataFrame(embeddings)
embeddings_df['label'] = labels

# Convert embeddings and labels to PyTorch tensors
X_test = torch.FloatTensor(embeddings_df.iloc[:, :-1].values)  # Features
y_test = torch.FloatTensor(embeddings_df['label'].values).view(-1, 1)  # Labels

# Create DataLoader for the test dataset
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

Encoding titles...


You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Processed 20/20 titles


In [5]:
from sklearn.metrics import precision_score, recall_score
ensemble = EnsembleClassifier(5)  

# Load saved model weights
# Be sure to change to the actual path
ensemble.load_models(save_dir='/home/jovyan/work/ensemble_models/model_test_4')

# Evaluate the ensemble
results = ensemble.evaluate(test_loader)
print(results)

Loading Models: 100%|██████████| 5/5 [00:00<00:00, 1799.05it/s]

{'accuracy': 0.9, 'precision': 0.9, 'recall': 0.9}



