from flask import Flask, request, jsonify
import numpy as np
import tensorflow as tf
from PIL import Image
import io
import base64
import re
import joblib
import os

app = Flask(__name__)

# Ensure the "images" directory exists
IMAGE_DIR = "images"
if not os.path.exists(IMAGE_DIR):
    os.makedirs(IMAGE_DIR)
    
# Load all models - use absolute paths for Hugging Face
MODEL_DIR = os.path.join(os.getcwd(), "models")
models = {
    "cnn": tf.keras.models.load_model(os.path.join(MODEL_DIR, "mnist_cnn_model.h5")),
    "svm": joblib.load(os.path.join(MODEL_DIR, "mnist_svm.pkl")),
    "logistic": joblib.load(os.path.join(MODEL_DIR, "mnist_logistic_regression.pkl")),
    "random_forest": joblib.load(os.path.join(MODEL_DIR, "mnist_random_forest.pkl"))
}

# [Keep your existing classification_reports, preprocess_image, 
# and create_simulated_scores functions exactly as they are]
# Classification reports for each model
classification_reports = {
    "cnn": """
               precision    recall  f1-score   support
           0       0.99      1.00      0.99       980
           1       1.00      1.00      1.00      1135
           2       0.99      0.99      0.99      1032
           3       0.99      1.00      0.99      1010
           4       1.00      0.99      0.99       982
           5       0.98      0.99      0.99       892
           6       1.00      0.98      0.99       958
           7       0.99      0.99      0.99      1028
           8       1.00      0.99      0.99       974
           9       0.99      0.99      0.99      1009
    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000
    """,
    "svm": """
               precision    recall  f1-score   support
           0     0.9874    0.9896    0.9885      1343
           1     0.9882    0.9925    0.9903      1600
           2     0.9706    0.9819    0.9762      1380
           3     0.9783    0.9749    0.9766      1433
           4     0.9777    0.9822    0.9800      1295
           5     0.9827    0.9796    0.9811      1273
           6     0.9858    0.9921    0.9889      1396
           7     0.9768    0.9807    0.9788      1503
           8     0.9813    0.9683    0.9748      1357
           9     0.9807    0.9669    0.9738      1420
    accuracy                         0.9810     14000
   macro avg     0.9809    0.9809    0.9809     14000
weighted avg     0.9810    0.9810    0.9810     14000
    """,
    "random_forest": """
               precision    recall  f1-score   support
           0     0.9844    0.9866    0.9855      1343
           1     0.9831    0.9831    0.9831      1600
           2     0.9522    0.9674    0.9597      1380
           3     0.9579    0.9532    0.9556      1433
           4     0.9617    0.9699    0.9658      1295
           5     0.9707    0.9631    0.9669      1273
           6     0.9800    0.9828    0.9814      1396
           7     0.9668    0.9681    0.9674      1503
           8     0.9599    0.9528    0.9564      1357
           9     0.9566    0.9465    0.9515      1420
    accuracy                         0.9675     14000
   macro avg     0.9673    0.9674    0.9673     14000
weighted avg     0.9675    0.9675    0.9675     14000
    """,
    "logistic": """
               precision    recall  f1-score   support
           0     0.9636    0.9650    0.9643      1343
           1     0.9433    0.9675    0.9553      1600
           2     0.9113    0.8935    0.9023      1380
           3     0.9021    0.8939    0.8980      1433
           4     0.9225    0.9290    0.9257      1295
           5     0.8846    0.8790    0.8818      1273
           6     0.9420    0.9534    0.9477      1396
           7     0.9273    0.9421    0.9347      1503
           8     0.8973    0.8696    0.8832      1357
           9     0.9019    0.9000    0.9010      1420
    accuracy                         0.9204     14000
   macro avg     0.9196    0.9193    0.9194     14000
weighted avg     0.9201    0.9204    0.9202     14000
    """
}

# Preprocess image before prediction
def preprocess_image(image, model_type):
    image = image.resize((28, 28)).convert('L')  # Convert to grayscale
    img_array = np.array(image) / 255.0  # Normalize
    
    if model_type == "cnn":
        # CNN expects 4D tensor with channel dimension
        return np.expand_dims(np.expand_dims(img_array, axis=0), axis=-1)
    else:
        # Other models expect flattened 1D array
        return img_array.flatten().reshape(1, -1)

@app.route('/')
def home():
    return jsonify({
        "message": "MNIST Classifier API",
        "available_models": list(models.keys()),
        "endpoints": {
            "/predict": "POST - Send image and model_type",
            "/get_classification_report": "POST - Get model metrics"
        }
    })

# [Keep your existing /get_classification_report and /predict routes exactly as they are]
@app.route('/get_classification_report', methods=['POST'])
def get_classification_report():
    model_type = request.json['model_type']
    if model_type in classification_reports:
        return jsonify({
            'report': classification_reports[model_type]
        })
    return jsonify({'error': 'Model not found'})

@app.route('/predict', methods=['POST'])
def predict():
    if request.method == 'POST':
        data = request.json['image']
        model_type = request.json['model_type']
        
        img_data = re.sub('^data:image/png;base64,', '', data)
        img = Image.open(io.BytesIO(base64.b64decode(img_data)))

        # Save the image to "images" folder
        image_path = os.path.join(IMAGE_DIR, "digit.png")
        img.save(image_path)

        # Preprocess image and predict
        processed_image = preprocess_image(img, model_type)
        
        if model_type in models:
            model = models[model_type]
            
            # Model-specific prediction logic
            if model_type == "cnn":
                # For CNN, use softmax probabilities
                prediction = model.predict(processed_image)
                predicted_digit = np.argmax(prediction)
                confidence_scores = prediction[0].tolist()
                score_type = "probability"
                
            elif model_type == "svm":
                # For SVM, use decision function distances
                predicted_digit = model.predict(processed_image)[0]
                
                # Try to get decision function scores
                if hasattr(model, "decision_function") and callable(getattr(model, "decision_function")):
                    try:
                        # Get raw decision scores
                        decision_scores = model.decision_function(processed_image)
                        
                        # One-vs-One SVMs have a different shape for decision_function output
                        if len(decision_scores.shape) == 2:
                            # This is a standard one-vs-rest SVM, shape should be (1, n_classes)
                            confidence_scores = decision_scores[0].tolist()
                        else:
                            # One-vs-One SVM returns pairwise comparisons
                            # Convert to a simplified score per class (this is an approximation)
                            confidence_scores = [0] * 10
                            for i in range(10):
                                # Count how many times class i wins in pairwise comparisons
                                confidence_scores[i] = sum(1 for score in decision_scores[0] if score > 0)
                            
                        # Normalize scores to positive values for visualization
                        min_score = min(confidence_scores)
                        if min_score < 0:
                            confidence_scores = [score - min_score for score in confidence_scores]
                        
                        score_type = "decision_distance"
                    except (AttributeError, NotImplementedError) as e:
                        print(f"Error getting decision function: {e}")
                        confidence_scores = create_simulated_scores(int(predicted_digit))
                        score_type = "simulated"
                else:
                    # Fallback if decision_function is not available
                    confidence_scores = create_simulated_scores(int(predicted_digit))
                    score_type = "simulated"
            
            else:
                # For other models (Random Forest, Logistic Regression)
                predicted_digit = model.predict(processed_image)[0]
                
                # Try to get probability estimates
                if hasattr(model, "predict_proba") and callable(getattr(model, "predict_proba")):
                    try:
                        confidence_scores = model.predict_proba(processed_image)[0].tolist()
                        score_type = "probability"
                    except (AttributeError, NotImplementedError):
                        confidence_scores = create_simulated_scores(int(predicted_digit))
                        score_type = "simulated"
                else:
                    confidence_scores = create_simulated_scores(int(predicted_digit))
                    score_type = "simulated"

            return jsonify({
                'digit': int(predicted_digit),
                'confidence_scores': confidence_scores,
                'score_type': score_type
            })
        
        return jsonify({'error': 'Model not found'})

def create_simulated_scores(predicted_digit):
    """Create simulated confidence scores that sum to 1.0 with highest probability for the predicted digit."""
    # Assign base probabilities
    scores = [0.01] * 10  # Give each digit a small base probability
    
    # Calculate remaining probability (should be around 0.9)
    remaining = 1.0 - sum(scores)
    
    # Assign the remaining probability to the predicted digit
    scores[predicted_digit] += remaining
    
    return scores

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)  # Hugging Face uses port 7860