Spaces:

adnaan05
/

TruthCheck

Sleeping

File size: 6,192 Bytes

469c254

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from pathlib import Path
import json
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def plot_training_history(history: dict, save_path: Path = None):
    """
    Plot training and validation metrics over epochs.
    
    Args:
        history: Dictionary containing training history
        save_path: Path to save the plot
    """
    plt.figure(figsize=(12, 5))
    
    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(history['train_loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    # Plot metrics
    plt.subplot(1, 2, 2)
    metrics = ['accuracy', 'precision', 'recall', 'f1']
    for metric in metrics:
        values = [epoch_metrics[metric] for epoch_metrics in history['val_metrics']]
        plt.plot(values, label=metric.capitalize())
    
    plt.title('Validation Metrics')
    plt.xlabel('Epoch')
    plt.ylabel('Score')
    plt.legend()
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        logger.info(f"Training history plot saved to {save_path}")
    
    plt.close()

def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, save_path: Path = None):
    """
    Plot confusion matrix for model predictions.
    
    Args:
        y_true: True labels
        y_pred: Predicted labels
        save_path: Path to save the plot
    """
    from sklearn.metrics import confusion_matrix
    
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    
    if save_path:
        plt.savefig(save_path)
        logger.info(f"Confusion matrix plot saved to {save_path}")
    
    plt.close()

def plot_attention_weights(text: str, attention_weights: np.ndarray, save_path: Path = None):
    """
    Plot attention weights for a given text.
    
    Args:
        text: Input text
        attention_weights: Attention weights for each token
        save_path: Path to save the plot
    """
    tokens = text.split()
    plt.figure(figsize=(12, 4))
    
    # Plot attention weights
    plt.bar(range(len(tokens)), attention_weights)
    plt.xticks(range(len(tokens)), tokens, rotation=45, ha='right')
    plt.title('Attention Weights')
    plt.xlabel('Tokens')
    plt.ylabel('Attention Weight')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        logger.info(f"Attention weights plot saved to {save_path}")
    
    plt.close()

def plot_model_comparison(metrics: dict, save_path: Path = None):
    """
    Plot comparison of different models' performance.
    
    Args:
        metrics: Dictionary containing model metrics
        save_path: Path to save the plot
    """
    models = list(metrics.keys())
    metric_names = ['accuracy', 'precision', 'recall', 'f1']
    
    plt.figure(figsize=(10, 6))
    x = np.arange(len(models))
    width = 0.2
    
    for i, metric in enumerate(metric_names):
        values = [metrics[model][metric] for model in models]
        plt.bar(x + i*width, values, width, label=metric.capitalize())
    
    plt.title('Model Performance Comparison')
    plt.xlabel('Models')
    plt.ylabel('Score')
    plt.xticks(x + width*1.5, models, rotation=45)
    plt.legend()
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        logger.info(f"Model comparison plot saved to {save_path}")
    
    plt.close()

def plot_feature_importance(feature_importance: dict, save_path: Path = None):
    """
    Plot feature importance scores.
    
    Args:
        feature_importance: Dictionary containing feature importance scores
        save_path: Path to save the plot
    """
    features = list(feature_importance.keys())
    importance = list(feature_importance.values())
    
    # Sort by importance
    sorted_idx = np.argsort(importance)
    features = [features[i] for i in sorted_idx]
    importance = [importance[i] for i in sorted_idx]
    
    plt.figure(figsize=(10, 6))
    plt.barh(range(len(features)), importance)
    plt.yticks(range(len(features)), features)
    plt.title('Feature Importance')
    plt.xlabel('Importance Score')
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        logger.info(f"Feature importance plot saved to {save_path}")
    
    plt.close()

def main():
    # Create visualization directory
    vis_dir = Path(__file__).parent.parent.parent / "visualizations"
    vis_dir.mkdir(exist_ok=True)
    
    # Example usage
    history = {
        'train_loss': [0.5, 0.4, 0.3],
        'val_loss': [0.45, 0.35, 0.25],
        'val_metrics': [
            {'accuracy': 0.8, 'precision': 0.75, 'recall': 0.85, 'f1': 0.8},
            {'accuracy': 0.85, 'precision': 0.8, 'recall': 0.9, 'f1': 0.85},
            {'accuracy': 0.9, 'precision': 0.85, 'recall': 0.95, 'f1': 0.9}
        ]
    }
    
    # Plot training history
    plot_training_history(history, save_path=vis_dir / "training_history.png")
    
    # Example confusion matrix
    y_true = np.array([0, 1, 0, 1, 1, 0])
    y_pred = np.array([0, 1, 0, 0, 1, 0])
    plot_confusion_matrix(y_true, y_pred, save_path=vis_dir / "confusion_matrix.png")
    
    # Example model comparison
    metrics = {
        'BERT': {'accuracy': 0.85, 'precision': 0.82, 'recall': 0.88, 'f1': 0.85},
        'BiLSTM': {'accuracy': 0.78, 'precision': 0.75, 'recall': 0.81, 'f1': 0.78},
        'Hybrid': {'accuracy': 0.92, 'precision': 0.9, 'recall': 0.94, 'f1': 0.92}
    }
    plot_model_comparison(metrics, save_path=vis_dir / "model_comparison.png")
    
    # Example feature importance
    feature_importance = {
        'BERT': 0.4,
        'BiLSTM': 0.3,
        'Attention': 0.2,
        'TF-IDF': 0.1
    }
    plot_feature_importance(feature_importance, save_path=vis_dir / "feature_importance.png")

if __name__ == "__main__":
    main()