File size: 4,016 Bytes
c96b110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
from datetime import datetime

# Load and prepare data
df = pd.read_csv("water_potability (1).csv")
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

X = df_imputed.drop('Potability', axis=1)
y = df_imputed['Potability']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

feature_importance = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)

def predict(ph, hardness, solids, chloramines, sulfate,
            conductivity, organic_carbon, trihalomethanes, turbidity):
    
    input_data = pd.DataFrame([[
        ph, hardness, solids, chloramines, sulfate,
        conductivity, organic_carbon, trihalomethanes, turbidity
    ]], columns=X.columns)

    prediction = model.predict(input_data)[0]
    label = "βœ… Safe to Drink" if prediction == 1 else "❌ Not Safe to Drink"
    proba = model.predict_proba(input_data)[0][prediction]

    # --- Visuals ---
    # Feature Importance
    fig1, ax1 = plt.subplots(figsize=(6, 5))
    feature_importance.plot(kind='barh', ax=ax1)
    ax1.set_title("Feature Importance")
    ax1.set_xlabel("Importance")
    ax1.invert_yaxis()

    # Class Distribution
    fig2, ax2 = plt.subplots(figsize=(4, 4))
    sns.countplot(x='Potability', data=df_imputed, ax=ax2)
    ax2.set_title("Potability Class Distribution")

    # Heatmap
    fig3, ax3 = plt.subplots(figsize=(6, 5))
    sns.heatmap(df_imputed.corr(), cmap='coolwarm', annot=False, ax=ax3)
    ax3.set_title("Feature Correlation Heatmap")

    # Metrics Text
    metrics_info = (
        f"πŸ“Š Model Performance on Test Set:\n\n"
        f"- Accuracy : {accuracy:.2f}\n"
        f"- Precision: {precision:.2f}\n"
        f"- Recall   : {recall:.2f}\n"
        f"- F1 Score : {f1:.2f}\n\n"
        f"Prediction Confidence: {proba:.2f}"
    )

    return f"{label}\n\n{metrics_info}", fig1, fig2, fig3

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# πŸ’§ Trustworthy Water Quality Predictor")
    gr.Markdown("Uses Random Forest with data imputation, performance metrics, and feature insights.")

    with gr.Row():
        with gr.Column():
            ph = gr.Slider(0, 14, step=0.1, label="pH")
            hardness = gr.Slider(50, 300, step=1, label="Hardness")
            solids = gr.Slider(3000, 50000, step=100, label="Solids")
            chloramines = gr.Slider(0, 15, step=0.1, label="Chloramines")
            sulfate = gr.Slider(100, 500, step=1, label="Sulfate")
            conductivity = gr.Slider(100, 800, step=1, label="Conductivity")
            organic_carbon = gr.Slider(2, 30, step=0.1, label="Organic Carbon")
            trihalomethanes = gr.Slider(0, 120, step=1, label="Trihalomethanes")
            turbidity = gr.Slider(0, 7, step=0.1, label="Turbidity")
            submit = gr.Button("Predict")

        with gr.Column():
            result = gr.Textbox(label="Prediction + Metrics")
            fig1 = gr.Plot(label="Feature Importance")
            fig2 = gr.Plot(label="Potability Class Distribution")
            fig3 = gr.Plot(label="Correlation Heatmap")

    submit.click(
        fn=predict,
        inputs=[ph, hardness, solids, chloramines, sulfate,
                conductivity, organic_carbon, trihalomethanes, turbidity],
        outputs=[result, fig1, fig2, fig3]
    )

demo.launch()