Sompote commited on
Commit
778f96f
·
verified ·
1 Parent(s): a74f3cd

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +60 -8
  2. app.py +151 -0
  3. data.xlsx +0 -0
  4. model.pth +3 -0
  5. predict.py +35 -0
  6. requirements.txt +8 -0
README.md CHANGED
@@ -1,14 +1,66 @@
1
  ---
2
- title: Resistivity
3
- emoji: 📉
4
- colorFrom: indigo
5
- colorTo: gray
6
  sdk: streamlit
7
- sdk_version: 1.42.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: Soil resistivity
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Soil Resistivity Prediction
3
+ emoji: 🚗
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: streamlit
7
+ sdk_version: "1.29.0"
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
+ # Resistivity Prediction App
13
+
14
+ This is a Streamlit web application for predicting resistivity based on input features. The app uses a trained deep learning model with attention mechanism and provides SHAP value explanations for predictions.
15
+
16
+ ## Setup Instructions
17
+
18
+ 1. Create a virtual environment (recommended):
19
+ ```bash
20
+ python -m venv venv
21
+ source venv/bin/activate # On Windows use: venv\Scripts\activate
22
+ ```
23
+
24
+ 2. Install required packages:
25
+ ```bash
26
+ pip install -r requirements.txt
27
+ ```
28
+
29
+ 3. Place the following files in the same directory:
30
+ - `model.pth` (trained model file)
31
+ - `data.xlsx` (dataset file with features and target)
32
+
33
+ ## Running the App
34
+
35
+ To run the app, use the following command:
36
+ ```bash
37
+ streamlit run app.py
38
+ ```
39
+
40
+ The app will be available at http://localhost:8501 by default.
41
+
42
+ ## Usage
43
+
44
+ 1. Enter values for each feature using the input fields
45
+ 2. Click the "Predict" button
46
+ 3. View the prediction result and SHAP value explanation
47
+
48
+ ## Files Description
49
+
50
+ - `app.py`: Main Streamlit application file
51
+ - `predict.py`: Contains model architecture and prediction functions
52
+ - `requirements.txt`: List of required Python packages
53
+ - `model.pth`: Trained model weights (not included, must be added)
54
+ - `data.xlsx`: Dataset file (not included, must be added)
55
+
56
+ ## Model Architecture
57
+
58
+ The model uses a TabularTransformer architecture with:
59
+ - Feature embedding layer
60
+ - Multi-head attention mechanism
61
+ - Fully connected layers for prediction
62
+
63
+ ## Requirements
64
+
65
+ - Python 3.8+
66
+ - Required packages listed in requirements.txt
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import torch
5
+ from predict import TabularTransformer, model_predict
6
+ from sklearn.preprocessing import MinMaxScaler
7
+ import matplotlib.pyplot as plt
8
+ import shap
9
+
10
+ # Set page config
11
+ st.set_page_config(
12
+ page_title="Resistivity Prediction App",
13
+ page_icon="🔮",
14
+ layout="wide"
15
+ )
16
+
17
+ # Title and description
18
+ st.title("Resistivity Prediction App")
19
+ st.markdown("""
20
+ This app predicts resistivity based on input features. Enter the values for each feature
21
+ and click 'Predict' to get the prediction and explanation.
22
+ """)
23
+
24
+ @st.cache_resource
25
+ def load_model_and_scalers():
26
+ # Load data for scaling
27
+ df = pd.read_excel('data.xlsx')
28
+ X = df.iloc[:, 0:8]
29
+ y = df.iloc[:, 8]
30
+ feature_names = X.columns.tolist()
31
+
32
+ # Initialize scalers
33
+ scaler_X = MinMaxScaler()
34
+ scaler_y = MinMaxScaler()
35
+
36
+ # Fit scalers
37
+ scaler_X.fit(X)
38
+ scaler_y.fit(y.values.reshape(-1, 1))
39
+
40
+ # Load model
41
+ model = TabularTransformer(input_dim=8, output_dim=1)
42
+ model.load_state_dict(torch.load('model.pth'))
43
+ model.eval()
44
+
45
+ return model, scaler_X, scaler_y, feature_names, X
46
+
47
+ def explain_prediction(model, input_df, X_background, scaler_X, scaler_y, feature_names):
48
+ # Create a prediction function for SHAP
49
+ def predict_fn(X):
50
+ X_tensor = torch.FloatTensor(scaler_X.transform(X))
51
+ with torch.no_grad():
52
+ scaled_pred = model(X_tensor).numpy()
53
+ return scaler_y.inverse_transform(scaled_pred)
54
+
55
+ # Use a subset of training data as background
56
+ background_sample = X_background.sample(n=min(100, len(X_background)), random_state=42)
57
+ explainer = shap.KernelExplainer(predict_fn, background_sample)
58
+
59
+ # Calculate SHAP values for the input
60
+ shap_values = explainer.shap_values(input_df)
61
+
62
+ # Handle different SHAP value formats
63
+ if isinstance(shap_values, list):
64
+ shap_values = np.array(shap_values[0])
65
+
66
+ # Ensure correct shape for waterfall plot
67
+ if len(shap_values.shape) > 1:
68
+ if shap_values.shape[0] == len(feature_names):
69
+ shap_values = shap_values.T
70
+ shap_values = shap_values.flatten()
71
+
72
+ # Create waterfall plot
73
+ plt.figure(figsize=(12, 8))
74
+ shap.plots.waterfall(
75
+ shap.Explanation(
76
+ values=shap_values,
77
+ base_values=explainer.expected_value if np.isscalar(explainer.expected_value)
78
+ else explainer.expected_value[0],
79
+ data=input_df.iloc[0].values,
80
+ feature_names=feature_names
81
+ ),
82
+ show=False
83
+ )
84
+ plt.title('SHAP Value Contributions')
85
+ plt.tight_layout()
86
+ plt.savefig('shap_explanation.png', dpi=300, bbox_inches='tight')
87
+ plt.close()
88
+
89
+ return explainer.expected_value, shap_values
90
+
91
+ # Load model and scalers
92
+ try:
93
+ model, scaler_X, scaler_y, feature_names, X = load_model_and_scalers()
94
+
95
+ # Create input fields for features
96
+ st.subheader("Input Features")
97
+
98
+ # Create two columns for input fields
99
+ col1, col2 = st.columns(2)
100
+
101
+ # Dictionary to store input values
102
+ input_values = {}
103
+
104
+ # Create input fields split between two columns
105
+ for i, feature in enumerate(feature_names):
106
+ # Get min and max values for each feature
107
+ min_val = float(X[feature].min())
108
+ max_val = float(X[feature].max())
109
+
110
+ # Add input field to alternating columns
111
+ with col1 if i < len(feature_names)//2 else col2:
112
+ input_values[feature] = st.number_input(
113
+ f"{feature}",
114
+ min_value=float(min_val),
115
+ max_value=float(max_val),
116
+ value=float(X[feature].mean()),
117
+ help=f"Range: {min_val:.2f} to {max_val:.2f}"
118
+ )
119
+
120
+ # Add predict button
121
+ if st.button("Predict"):
122
+ # Create input DataFrame
123
+ input_df = pd.DataFrame([input_values])
124
+
125
+ # Make prediction
126
+ prediction = model_predict(model, input_df, scaler_X, scaler_y)
127
+
128
+ # Display prediction
129
+ st.subheader("Prediction Result")
130
+ st.markdown(f"### Predicted Resistivity: {prediction[0]:.2f}")
131
+
132
+ # Calculate and display SHAP values
133
+ st.subheader("Feature Importance Explanation")
134
+
135
+ # Get SHAP values using the training data as background
136
+ expected_value, shap_values = explain_prediction(
137
+ model, input_df, X, scaler_X, scaler_y, feature_names
138
+ )
139
+
140
+ # Display the waterfall plot
141
+ st.image('shap_explanation.png')
142
+
143
+ except Exception as e:
144
+ st.error(f"""
145
+ Error loading the model and data. Please make sure:
146
+ 1. The model file 'model.pth' exists
147
+ 2. The data file 'data.xlsx' exists
148
+ 3. All required packages are installed
149
+
150
+ Error details: {str(e)}
151
+ """)
data.xlsx ADDED
Binary file (26.3 kB). View file
 
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d333cc2a9d3c4f94c55f32b766e58643dbc514a24bea307a8b2aa80dd8d609b0
3
+ size 105820
predict.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import pandas as pd
4
+ import torch.nn as nn
5
+
6
+ class TabularTransformer(nn.Module):
7
+ def __init__(self, input_dim=7, output_dim=1, embedding_dim=64, num_heads=8, hidden_dim=128):
8
+ super().__init__()
9
+ self.embedding = nn.Linear(input_dim, embedding_dim)
10
+ self.attention = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads)
11
+ self.fc = nn.Sequential(
12
+ nn.Linear(embedding_dim, hidden_dim),
13
+ nn.ReLU(),
14
+ nn.Linear(hidden_dim, output_dim)
15
+ )
16
+
17
+ def forward(self, x):
18
+ x = self.embedding(x)
19
+ x = x.unsqueeze(0) # Add sequence dimension for attention
20
+ attn_out, _ = self.attention(x, x, x)
21
+ x = attn_out.squeeze(0) # Remove sequence dimension
22
+ return self.fc(x)
23
+
24
+ def model_predict(model, X_input, scaler_X, scaler_y):
25
+ # Convert to tensor
26
+ X_scaled = scaler_X.transform(X_input)
27
+ X_tensor = torch.FloatTensor(X_scaled)
28
+
29
+ # Make prediction
30
+ with torch.no_grad():
31
+ scaled_pred = model(X_tensor).numpy()
32
+
33
+ # Inverse transform to get original scale prediction
34
+ prediction = scaler_y.inverse_transform(scaled_pred)
35
+ return prediction.flatten()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.32.0
2
+ pandas==2.2.0
3
+ numpy==1.26.4
4
+ torch==2.2.0
5
+ scikit-learn==1.4.0
6
+ matplotlib==3.8.3
7
+ shap==0.44.0
8
+ openpyxl==3.1.2