Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- README.md +60 -8
- app.py +151 -0
- data.xlsx +0 -0
- model.pth +3 -0
- predict.py +35 -0
- requirements.txt +8 -0
README.md
CHANGED
@@ -1,14 +1,66 @@
|
|
1 |
---
|
2 |
-
title: Resistivity
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: Soil resistivity
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Soil Resistivity Prediction
|
3 |
+
emoji: 🚗
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: "1.29.0"
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
|
12 |
+
# Resistivity Prediction App
|
13 |
+
|
14 |
+
This is a Streamlit web application for predicting resistivity based on input features. The app uses a trained deep learning model with attention mechanism and provides SHAP value explanations for predictions.
|
15 |
+
|
16 |
+
## Setup Instructions
|
17 |
+
|
18 |
+
1. Create a virtual environment (recommended):
|
19 |
+
```bash
|
20 |
+
python -m venv venv
|
21 |
+
source venv/bin/activate # On Windows use: venv\Scripts\activate
|
22 |
+
```
|
23 |
+
|
24 |
+
2. Install required packages:
|
25 |
+
```bash
|
26 |
+
pip install -r requirements.txt
|
27 |
+
```
|
28 |
+
|
29 |
+
3. Place the following files in the same directory:
|
30 |
+
- `model.pth` (trained model file)
|
31 |
+
- `data.xlsx` (dataset file with features and target)
|
32 |
+
|
33 |
+
## Running the App
|
34 |
+
|
35 |
+
To run the app, use the following command:
|
36 |
+
```bash
|
37 |
+
streamlit run app.py
|
38 |
+
```
|
39 |
+
|
40 |
+
The app will be available at http://localhost:8501 by default.
|
41 |
+
|
42 |
+
## Usage
|
43 |
+
|
44 |
+
1. Enter values for each feature using the input fields
|
45 |
+
2. Click the "Predict" button
|
46 |
+
3. View the prediction result and SHAP value explanation
|
47 |
+
|
48 |
+
## Files Description
|
49 |
+
|
50 |
+
- `app.py`: Main Streamlit application file
|
51 |
+
- `predict.py`: Contains model architecture and prediction functions
|
52 |
+
- `requirements.txt`: List of required Python packages
|
53 |
+
- `model.pth`: Trained model weights (not included, must be added)
|
54 |
+
- `data.xlsx`: Dataset file (not included, must be added)
|
55 |
+
|
56 |
+
## Model Architecture
|
57 |
+
|
58 |
+
The model uses a TabularTransformer architecture with:
|
59 |
+
- Feature embedding layer
|
60 |
+
- Multi-head attention mechanism
|
61 |
+
- Fully connected layers for prediction
|
62 |
+
|
63 |
+
## Requirements
|
64 |
+
|
65 |
+
- Python 3.8+
|
66 |
+
- Required packages listed in requirements.txt
|
app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
from predict import TabularTransformer, model_predict
|
6 |
+
from sklearn.preprocessing import MinMaxScaler
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import shap
|
9 |
+
|
10 |
+
# Set page config
|
11 |
+
st.set_page_config(
|
12 |
+
page_title="Resistivity Prediction App",
|
13 |
+
page_icon="🔮",
|
14 |
+
layout="wide"
|
15 |
+
)
|
16 |
+
|
17 |
+
# Title and description
|
18 |
+
st.title("Resistivity Prediction App")
|
19 |
+
st.markdown("""
|
20 |
+
This app predicts resistivity based on input features. Enter the values for each feature
|
21 |
+
and click 'Predict' to get the prediction and explanation.
|
22 |
+
""")
|
23 |
+
|
24 |
+
@st.cache_resource
|
25 |
+
def load_model_and_scalers():
|
26 |
+
# Load data for scaling
|
27 |
+
df = pd.read_excel('data.xlsx')
|
28 |
+
X = df.iloc[:, 0:8]
|
29 |
+
y = df.iloc[:, 8]
|
30 |
+
feature_names = X.columns.tolist()
|
31 |
+
|
32 |
+
# Initialize scalers
|
33 |
+
scaler_X = MinMaxScaler()
|
34 |
+
scaler_y = MinMaxScaler()
|
35 |
+
|
36 |
+
# Fit scalers
|
37 |
+
scaler_X.fit(X)
|
38 |
+
scaler_y.fit(y.values.reshape(-1, 1))
|
39 |
+
|
40 |
+
# Load model
|
41 |
+
model = TabularTransformer(input_dim=8, output_dim=1)
|
42 |
+
model.load_state_dict(torch.load('model.pth'))
|
43 |
+
model.eval()
|
44 |
+
|
45 |
+
return model, scaler_X, scaler_y, feature_names, X
|
46 |
+
|
47 |
+
def explain_prediction(model, input_df, X_background, scaler_X, scaler_y, feature_names):
|
48 |
+
# Create a prediction function for SHAP
|
49 |
+
def predict_fn(X):
|
50 |
+
X_tensor = torch.FloatTensor(scaler_X.transform(X))
|
51 |
+
with torch.no_grad():
|
52 |
+
scaled_pred = model(X_tensor).numpy()
|
53 |
+
return scaler_y.inverse_transform(scaled_pred)
|
54 |
+
|
55 |
+
# Use a subset of training data as background
|
56 |
+
background_sample = X_background.sample(n=min(100, len(X_background)), random_state=42)
|
57 |
+
explainer = shap.KernelExplainer(predict_fn, background_sample)
|
58 |
+
|
59 |
+
# Calculate SHAP values for the input
|
60 |
+
shap_values = explainer.shap_values(input_df)
|
61 |
+
|
62 |
+
# Handle different SHAP value formats
|
63 |
+
if isinstance(shap_values, list):
|
64 |
+
shap_values = np.array(shap_values[0])
|
65 |
+
|
66 |
+
# Ensure correct shape for waterfall plot
|
67 |
+
if len(shap_values.shape) > 1:
|
68 |
+
if shap_values.shape[0] == len(feature_names):
|
69 |
+
shap_values = shap_values.T
|
70 |
+
shap_values = shap_values.flatten()
|
71 |
+
|
72 |
+
# Create waterfall plot
|
73 |
+
plt.figure(figsize=(12, 8))
|
74 |
+
shap.plots.waterfall(
|
75 |
+
shap.Explanation(
|
76 |
+
values=shap_values,
|
77 |
+
base_values=explainer.expected_value if np.isscalar(explainer.expected_value)
|
78 |
+
else explainer.expected_value[0],
|
79 |
+
data=input_df.iloc[0].values,
|
80 |
+
feature_names=feature_names
|
81 |
+
),
|
82 |
+
show=False
|
83 |
+
)
|
84 |
+
plt.title('SHAP Value Contributions')
|
85 |
+
plt.tight_layout()
|
86 |
+
plt.savefig('shap_explanation.png', dpi=300, bbox_inches='tight')
|
87 |
+
plt.close()
|
88 |
+
|
89 |
+
return explainer.expected_value, shap_values
|
90 |
+
|
91 |
+
# Load model and scalers
|
92 |
+
try:
|
93 |
+
model, scaler_X, scaler_y, feature_names, X = load_model_and_scalers()
|
94 |
+
|
95 |
+
# Create input fields for features
|
96 |
+
st.subheader("Input Features")
|
97 |
+
|
98 |
+
# Create two columns for input fields
|
99 |
+
col1, col2 = st.columns(2)
|
100 |
+
|
101 |
+
# Dictionary to store input values
|
102 |
+
input_values = {}
|
103 |
+
|
104 |
+
# Create input fields split between two columns
|
105 |
+
for i, feature in enumerate(feature_names):
|
106 |
+
# Get min and max values for each feature
|
107 |
+
min_val = float(X[feature].min())
|
108 |
+
max_val = float(X[feature].max())
|
109 |
+
|
110 |
+
# Add input field to alternating columns
|
111 |
+
with col1 if i < len(feature_names)//2 else col2:
|
112 |
+
input_values[feature] = st.number_input(
|
113 |
+
f"{feature}",
|
114 |
+
min_value=float(min_val),
|
115 |
+
max_value=float(max_val),
|
116 |
+
value=float(X[feature].mean()),
|
117 |
+
help=f"Range: {min_val:.2f} to {max_val:.2f}"
|
118 |
+
)
|
119 |
+
|
120 |
+
# Add predict button
|
121 |
+
if st.button("Predict"):
|
122 |
+
# Create input DataFrame
|
123 |
+
input_df = pd.DataFrame([input_values])
|
124 |
+
|
125 |
+
# Make prediction
|
126 |
+
prediction = model_predict(model, input_df, scaler_X, scaler_y)
|
127 |
+
|
128 |
+
# Display prediction
|
129 |
+
st.subheader("Prediction Result")
|
130 |
+
st.markdown(f"### Predicted Resistivity: {prediction[0]:.2f}")
|
131 |
+
|
132 |
+
# Calculate and display SHAP values
|
133 |
+
st.subheader("Feature Importance Explanation")
|
134 |
+
|
135 |
+
# Get SHAP values using the training data as background
|
136 |
+
expected_value, shap_values = explain_prediction(
|
137 |
+
model, input_df, X, scaler_X, scaler_y, feature_names
|
138 |
+
)
|
139 |
+
|
140 |
+
# Display the waterfall plot
|
141 |
+
st.image('shap_explanation.png')
|
142 |
+
|
143 |
+
except Exception as e:
|
144 |
+
st.error(f"""
|
145 |
+
Error loading the model and data. Please make sure:
|
146 |
+
1. The model file 'model.pth' exists
|
147 |
+
2. The data file 'data.xlsx' exists
|
148 |
+
3. All required packages are installed
|
149 |
+
|
150 |
+
Error details: {str(e)}
|
151 |
+
""")
|
data.xlsx
ADDED
Binary file (26.3 kB). View file
|
|
model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d333cc2a9d3c4f94c55f32b766e58643dbc514a24bea307a8b2aa80dd8d609b0
|
3 |
+
size 105820
|
predict.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import torch.nn as nn
|
5 |
+
|
6 |
+
class TabularTransformer(nn.Module):
|
7 |
+
def __init__(self, input_dim=7, output_dim=1, embedding_dim=64, num_heads=8, hidden_dim=128):
|
8 |
+
super().__init__()
|
9 |
+
self.embedding = nn.Linear(input_dim, embedding_dim)
|
10 |
+
self.attention = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads)
|
11 |
+
self.fc = nn.Sequential(
|
12 |
+
nn.Linear(embedding_dim, hidden_dim),
|
13 |
+
nn.ReLU(),
|
14 |
+
nn.Linear(hidden_dim, output_dim)
|
15 |
+
)
|
16 |
+
|
17 |
+
def forward(self, x):
|
18 |
+
x = self.embedding(x)
|
19 |
+
x = x.unsqueeze(0) # Add sequence dimension for attention
|
20 |
+
attn_out, _ = self.attention(x, x, x)
|
21 |
+
x = attn_out.squeeze(0) # Remove sequence dimension
|
22 |
+
return self.fc(x)
|
23 |
+
|
24 |
+
def model_predict(model, X_input, scaler_X, scaler_y):
|
25 |
+
# Convert to tensor
|
26 |
+
X_scaled = scaler_X.transform(X_input)
|
27 |
+
X_tensor = torch.FloatTensor(X_scaled)
|
28 |
+
|
29 |
+
# Make prediction
|
30 |
+
with torch.no_grad():
|
31 |
+
scaled_pred = model(X_tensor).numpy()
|
32 |
+
|
33 |
+
# Inverse transform to get original scale prediction
|
34 |
+
prediction = scaler_y.inverse_transform(scaled_pred)
|
35 |
+
return prediction.flatten()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.32.0
|
2 |
+
pandas==2.2.0
|
3 |
+
numpy==1.26.4
|
4 |
+
torch==2.2.0
|
5 |
+
scikit-learn==1.4.0
|
6 |
+
matplotlib==3.8.3
|
7 |
+
shap==0.44.0
|
8 |
+
openpyxl==3.1.2
|