Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -524,30 +524,30 @@ elif app_mode == "Data Cleaning":
|
|
524 |
# --------------------------
|
525 |
# Label Encoding
|
526 |
# --------------------------
|
527 |
-
# --------------------------
|
528 |
-
# Label/One-Hot Encoding
|
529 |
-
# --------------------------
|
530 |
-
enhance_section_title("Encoding Options", "๐ข")
|
531 |
-
with st.expander("๐ข Encoding Options"):
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
|
552 |
|
553 |
# --------------------------
|
@@ -585,141 +585,137 @@ with st.expander("๐ข Encoding Options"):
|
|
585 |
except Exception as e:
|
586 |
st.error(f"Error: {str(e)}")
|
587 |
|
588 |
-
# --------------------------
|
589 |
# Bulk Operations
|
590 |
# --------------------------
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
if bulk_action == "Auto-Clean Common Issues":
|
611 |
-
if st.button("Apply Auto-Clean"):
|
612 |
-
new_df = df.copy()
|
613 |
-
new_df = new_df.dropna(axis=1, how='all') # Remove empty cols
|
614 |
-
new_df = new_df.convert_dtypes() # Better type inference
|
615 |
-
text_cols = new_df.select_dtypes(include='object').columns
|
616 |
-
new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
|
617 |
-
update_cleaned_data(new_df)
|
618 |
-
st.rerun() # Force re-run after apply
|
619 |
-
|
620 |
-
if bulk_action == "Drop All Missing Values":
|
621 |
-
if st.button("Apply Drop All Missing"):
|
622 |
-
new_df = df.copy()
|
623 |
-
new_df = new_df.dropna() # Drop rows with any missing values
|
624 |
-
update_cleaned_data(new_df)
|
625 |
-
st.rerun() # Force re-run after apply
|
626 |
-
|
627 |
-
if bulk_action == "Fill Missing Values":
|
628 |
-
fill_value = st.text_input("Fill Value (e.g., 0, mean, median)")
|
629 |
-
if st.button("Apply Fill Missing"):
|
630 |
-
new_df = df.copy()
|
631 |
-
if fill_value.lower() == "mean":
|
632 |
-
new_df = new_df.fillna(new_df.mean())
|
633 |
-
elif fill_value.lower() == "median":
|
634 |
-
new_df = new_df.fillna(new_df.median())
|
635 |
-
else:
|
636 |
-
new_df = new_df.fillna(fill_value)
|
637 |
-
update_cleaned_data(new_df)
|
638 |
-
st.rerun() # Force re-run after apply
|
639 |
-
|
640 |
-
if bulk_action == "One-Hot Encode All Categorical Columns":
|
641 |
-
if st.button("Apply One-Hot Encoding"):
|
642 |
-
new_df = df.copy()
|
643 |
-
categorical_cols = new_df.select_dtypes(include='object').columns
|
644 |
-
new_df = pd.get_dummies(new_df, columns=categorical_cols, drop_first=True)
|
645 |
-
update_cleaned_data(new_df)
|
646 |
-
st.rerun() # Force re-run after apply
|
647 |
-
|
648 |
-
if bulk_action == "Min-Max Scaling":
|
649 |
-
if st.button("Apply Min-Max Scaling"):
|
650 |
-
new_df = df.copy()
|
651 |
-
scaler = MinMaxScaler()
|
652 |
-
numerical_cols = new_df.select_dtypes(include=np.number).columns
|
653 |
-
new_df[numerical_cols] = scaler.fit_transform(new_df[numerical_cols])
|
654 |
-
update_cleaned_data(new_df)
|
655 |
-
st.rerun() # Force re-run after apply
|
656 |
-
|
657 |
-
if bulk_action == "Remove Outliers":
|
658 |
-
if st.button("Apply Remove Outliers"):
|
659 |
-
new_df = df.copy()
|
660 |
-
z_scores = np.abs(stats.zscore(new_df.select_dtypes(include=np.number)))
|
661 |
-
new_df = new_df[(z_scores < 3).all(axis=1)] # Remove rows with z-score > 3
|
662 |
-
update_cleaned_data(new_df)
|
663 |
-
st.rerun() # Force re-run after apply
|
664 |
-
|
665 |
-
if bulk_action == "Tokenize Text Columns":
|
666 |
-
text_cols = st.multiselect("Select text columns to tokenize", df.select_dtypes(include='object').columns)
|
667 |
-
if text_cols:
|
668 |
-
if st.button("Apply Tokenization"):
|
669 |
-
tokenizer = Tokenizer()
|
670 |
new_df = df.copy()
|
671 |
-
|
672 |
-
|
673 |
-
|
|
|
674 |
update_cleaned_data(new_df)
|
675 |
st.rerun() # Force re-run after apply
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
if text_cols:
|
680 |
-
if st.button("Apply TF-IDF Vectorization"):
|
681 |
-
tfidf = TfidfVectorizer()
|
682 |
new_df = df.copy()
|
683 |
-
|
684 |
-
new_col = tfidf.fit_transform(new_df[col]).toarray()
|
685 |
-
new_df = new_df.drop(columns=[col])
|
686 |
-
new_df = new_df.join(pd.DataFrame(new_col, columns=[f'{col}_{i}' for i in range(new_col.shape[1])]))
|
687 |
update_cleaned_data(new_df)
|
688 |
st.rerun() # Force re-run after apply
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
if st.button("Apply Date Feature Extraction"):
|
694 |
new_df = df.copy()
|
695 |
-
|
696 |
-
new_df
|
697 |
-
|
698 |
-
new_df
|
699 |
-
|
700 |
-
new_df
|
701 |
update_cleaned_data(new_df)
|
702 |
st.rerun() # Force re-run after apply
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
cat_cols = st.multiselect("Select categorical columns to encode", df.select_dtypes(include='object').columns)
|
707 |
-
if cat_cols:
|
708 |
-
if st.button("Apply Target Encoding"):
|
709 |
new_df = df.copy()
|
710 |
-
|
711 |
-
|
712 |
-
new_df[col] = new_df[col].map(target_mean)
|
713 |
update_cleaned_data(new_df)
|
714 |
st.rerun() # Force re-run after apply
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
723 |
|
724 |
|
725 |
# --------------------------
|
@@ -1360,110 +1356,110 @@ elif app_mode == "Model Training":
|
|
1360 |
num_trials = st.number_input("Number of Trials", 1, 100, 10, help="Number of trials for hyperparameter search.")
|
1361 |
|
1362 |
# ----- [5. Training & Monitoring] -----
|
1363 |
-
st.subheader("๐ฏ Training Configuration")
|
1364 |
-
|
1365 |
-
import shap # Ensure SHAP is installed: pip install shap
|
1366 |
-
|
1367 |
-
class LiveMetrics(Callback):
|
1368 |
-
|
1369 |
-
|
1370 |
-
|
1371 |
-
|
1372 |
-
|
1373 |
-
|
1374 |
-
def update_chart(self):
|
1375 |
-
df = pd.DataFrame(st.session_state.metrics)
|
1376 |
-
fig = px.line(df, y=['loss', 'val_loss'], title="Training Progress")
|
1377 |
-
loss_chart.plotly_chart(fig)
|
1378 |
-
|
1379 |
-
if st.button("๐ Start Training"):
|
1380 |
-
try:
|
1381 |
-
model = tf.keras.Sequential()
|
1382 |
-
|
1383 |
-
# Add layers with regularization
|
1384 |
-
for layer in st.session_state.layers:
|
1385 |
-
layer_class = {
|
1386 |
-
"Dense": Dense,
|
1387 |
-
"Conv2D": Conv2D,
|
1388 |
-
"LSTM": LSTM
|
1389 |
-
}[layer['type']]
|
1390 |
-
|
1391 |
-
# Add regularization
|
1392 |
-
if l2_reg > 0:
|
1393 |
-
layer['kernel_regularizer'] = tf.keras.regularizers.l2(l2_reg)
|
1394 |
|
1395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1396 |
|
1397 |
-
# Add
|
1398 |
-
|
1399 |
-
|
|
|
|
|
|
|
|
|
1400 |
|
1401 |
-
|
1402 |
-
|
1403 |
-
|
1404 |
-
|
1405 |
-
|
1406 |
-
|
1407 |
-
|
1408 |
-
|
1409 |
-
|
1410 |
-
|
1411 |
-
|
1412 |
-
|
1413 |
-
|
1414 |
-
|
1415 |
-
|
1416 |
-
|
1417 |
-
|
1418 |
-
|
1419 |
-
|
1420 |
-
|
1421 |
-
|
1422 |
-
|
1423 |
-
|
1424 |
-
|
1425 |
-
|
1426 |
-
|
1427 |
-
|
1428 |
-
|
1429 |
-
|
1430 |
-
|
1431 |
-
|
1432 |
-
|
1433 |
-
|
1434 |
-
|
1435 |
-
|
1436 |
-
|
1437 |
-
|
1438 |
-
|
|
|
|
|
|
|
|
|
|
|
1439 |
|
1440 |
-
export_format = st.radio("Format", [
|
1441 |
-
"TensorFlow SavedModel",
|
1442 |
-
"HDF5",
|
1443 |
-
"ONNX"
|
1444 |
-
])
|
1445 |
|
1446 |
-
|
1447 |
-
|
1448 |
-
|
1449 |
-
|
1450 |
-
|
1451 |
-
|
1452 |
-
|
1453 |
-
|
1454 |
-
|
1455 |
-
|
1456 |
-
|
1457 |
-
|
1458 |
-
|
1459 |
-
|
1460 |
-
|
1461 |
-
|
1462 |
-
|
1463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1464 |
|
1465 |
# Predictions Section (Fixed)
|
1466 |
-
|
1467 |
st.title("๐ฎ Predictive Analytics - Informed Business Decisions")
|
1468 |
st.warning("Note: SHAP explanations currently work best with tree-based models like Random Forest")
|
1469 |
|
|
|
524 |
# --------------------------
|
525 |
# Label Encoding
|
526 |
# --------------------------
|
527 |
+
# --------------------------
|
528 |
+
# Label/One-Hot Encoding
|
529 |
+
# --------------------------
|
530 |
+
enhance_section_title("Encoding Options", "๐ข")
|
531 |
+
with st.expander("๐ข Encoding Options"):
|
532 |
+
encoding_method = st.radio("Select Encoding Method", ("Label Encoding", "One-Hot Encoding"))
|
533 |
+
|
534 |
+
data_to_encode = st.multiselect("Select categorical columns to encode", df.select_dtypes(include='object').columns)
|
535 |
+
if data_to_encode:
|
536 |
+
if st.button("Apply Encoding"):
|
537 |
+
new_df = df.copy()
|
538 |
+
if encoding_method == "Label Encoding":
|
539 |
+
label_encoders = {}
|
540 |
+
for col in data_to_encode:
|
541 |
+
le = LabelEncoder()
|
542 |
+
new_df[col] = le.fit_transform(new_df[col].astype(str))
|
543 |
+
label_encoders[col] = le
|
544 |
+
elif encoding_method == "One-Hot Encoding":
|
545 |
+
new_df = pd.get_dummies(new_df, columns=data_to_encode, drop_first=True)
|
546 |
+
|
547 |
+
update_cleaned_data(new_df)
|
548 |
+
st.rerun() # Force re-run after apply
|
549 |
+
except Exception as e:
|
550 |
+
st.error(f"Error: {str(e)}")
|
551 |
|
552 |
|
553 |
# --------------------------
|
|
|
585 |
except Exception as e:
|
586 |
st.error(f"Error: {str(e)}")
|
587 |
|
|
|
588 |
# Bulk Operations
|
589 |
# --------------------------
|
590 |
+
enhance_section_title("Bulk Actions", "๐")
|
591 |
+
with st.expander("๐ Bulk Actions"):
|
592 |
+
bulk_action = st.selectbox("Select Bulk Action", [
|
593 |
+
"Auto-Clean Common Issues",
|
594 |
+
"Drop All Missing Values",
|
595 |
+
"Fill Missing Values",
|
596 |
+
"One-Hot Encode All Categorical Columns",
|
597 |
+
"Min-Max Scaling",
|
598 |
+
"Remove Outliers",
|
599 |
+
"Tokenize Text Columns",
|
600 |
+
"Vectorize Text Columns (TF-IDF)",
|
601 |
+
"Extract Date Features",
|
602 |
+
"Target Encoding",
|
603 |
+
"Principal Component Analysis (PCA)"
|
604 |
+
])
|
605 |
+
|
606 |
+
if bulk_action == "Auto-Clean Common Issues":
|
607 |
+
if st.button("Apply Auto-Clean"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
608 |
new_df = df.copy()
|
609 |
+
new_df = new_df.dropna(axis=1, how='all') # Remove empty cols
|
610 |
+
new_df = new_df.convert_dtypes() # Better type inference
|
611 |
+
text_cols = new_df.select_dtypes(include='object').columns
|
612 |
+
new_df[text_cols] = new_df[text_cols].apply(lambda x: x.str.strip())
|
613 |
update_cleaned_data(new_df)
|
614 |
st.rerun() # Force re-run after apply
|
615 |
+
|
616 |
+
if bulk_action == "Drop All Missing Values":
|
617 |
+
if st.button("Apply Drop All Missing"):
|
|
|
|
|
|
|
618 |
new_df = df.copy()
|
619 |
+
new_df = new_df.dropna() # Drop rows with any missing values
|
|
|
|
|
|
|
620 |
update_cleaned_data(new_df)
|
621 |
st.rerun() # Force re-run after apply
|
622 |
+
|
623 |
+
if bulk_action == "Fill Missing Values":
|
624 |
+
fill_value = st.text_input("Fill Value (e.g., 0, mean, median)")
|
625 |
+
if st.button("Apply Fill Missing"):
|
|
|
626 |
new_df = df.copy()
|
627 |
+
if fill_value.lower() == "mean":
|
628 |
+
new_df = new_df.fillna(new_df.mean())
|
629 |
+
elif fill_value.lower() == "median":
|
630 |
+
new_df = new_df.fillna(new_df.median())
|
631 |
+
else:
|
632 |
+
new_df = new_df.fillna(fill_value)
|
633 |
update_cleaned_data(new_df)
|
634 |
st.rerun() # Force re-run after apply
|
635 |
+
|
636 |
+
if bulk_action == "One-Hot Encode All Categorical Columns":
|
637 |
+
if st.button("Apply One-Hot Encoding"):
|
|
|
|
|
|
|
638 |
new_df = df.copy()
|
639 |
+
categorical_cols = new_df.select_dtypes(include='object').columns
|
640 |
+
new_df = pd.get_dummies(new_df, columns=categorical_cols, drop_first=True)
|
|
|
641 |
update_cleaned_data(new_df)
|
642 |
st.rerun() # Force re-run after apply
|
643 |
+
|
644 |
+
if bulk_action == "Min-Max Scaling":
|
645 |
+
if st.button("Apply Min-Max Scaling"):
|
646 |
+
new_df = df.copy()
|
647 |
+
scaler = MinMaxScaler()
|
648 |
+
numerical_cols = new_df.select_dtypes(include=np.number).columns
|
649 |
+
new_df[numerical_cols] = scaler.fit_transform(new_df[numerical_cols])
|
650 |
+
update_cleaned_data(new_df)
|
651 |
+
st.rerun() # Force re-run after apply
|
652 |
+
|
653 |
+
if bulk_action == "Remove Outliers":
|
654 |
+
if st.button("Apply Remove Outliers"):
|
655 |
+
new_df = df.copy()
|
656 |
+
z_scores = np.abs(stats.zscore(new_df.select_dtypes(include=np.number)))
|
657 |
+
new_df = new_df[(z_scores < 3).all(axis=1)] # Remove rows with z-score > 3
|
658 |
+
update_cleaned_data(new_df)
|
659 |
+
st.rerun() # Force re-run after apply
|
660 |
+
|
661 |
+
if bulk_action == "Tokenize Text Columns":
|
662 |
+
text_cols = st.multiselect("Select text columns to tokenize", df.select_dtypes(include='object').columns)
|
663 |
+
if text_cols:
|
664 |
+
if st.button("Apply Tokenization"):
|
665 |
+
tokenizer = Tokenizer()
|
666 |
+
new_df = df.copy()
|
667 |
+
for col in text_cols:
|
668 |
+
tokenizer.fit_on_texts(new_df[col])
|
669 |
+
new_df[col] = tokenizer.texts_to_sequences(new_df[col])
|
670 |
+
update_cleaned_data(new_df)
|
671 |
+
st.rerun() # Force re-run after apply
|
672 |
+
|
673 |
+
if bulk_action == "Vectorize Text Columns (TF-IDF)":
|
674 |
+
text_cols = st.multiselect("Select text columns to vectorize", df.select_dtypes(include='object').columns)
|
675 |
+
if text_cols:
|
676 |
+
if st.button("Apply TF-IDF Vectorization"):
|
677 |
+
tfidf = TfidfVectorizer()
|
678 |
+
new_df = df.copy()
|
679 |
+
for col in text_cols:
|
680 |
+
new_col = tfidf.fit_transform(new_df[col]).toarray()
|
681 |
+
new_df = new_df.drop(columns=[col])
|
682 |
+
new_df = new_df.join(pd.DataFrame(new_col, columns=[f'{col}_{i}' for i in range(new_col.shape[1])]))
|
683 |
+
update_cleaned_data(new_df)
|
684 |
+
st.rerun() # Force re-run after apply
|
685 |
+
|
686 |
+
if bulk_action == "Extract Date Features":
|
687 |
+
date_cols = st.multiselect("Select date columns to extract features from", df.select_dtypes(include='datetime').columns)
|
688 |
+
if date_cols:
|
689 |
+
if st.button("Apply Date Feature Extraction"):
|
690 |
+
new_df = df.copy()
|
691 |
+
for col in date_cols:
|
692 |
+
new_df[f'{col}_year'] = new_df[col].dt.year
|
693 |
+
new_df[f'{col}_month'] = new_df[col].dt.month
|
694 |
+
new_df[f'{col}_day'] = new_df[col].dt.day
|
695 |
+
new_df[f'{col}_weekday'] = new_df[col].dt.weekday
|
696 |
+
new_df[f'{col}_hour'] = new_df[col].dt.hour
|
697 |
+
update_cleaned_data(new_df)
|
698 |
+
st.rerun() # Force re-run after apply
|
699 |
+
|
700 |
+
if bulk_action == "Target Encoding":
|
701 |
+
target_col = st.selectbox("Select target column", df.columns)
|
702 |
+
cat_cols = st.multiselect("Select categorical columns to encode", df.select_dtypes(include='object').columns)
|
703 |
+
if cat_cols:
|
704 |
+
if st.button("Apply Target Encoding"):
|
705 |
+
new_df = df.copy()
|
706 |
+
for col in cat_cols:
|
707 |
+
target_mean = new_df.groupby(col)[target_col].mean()
|
708 |
+
new_df[col] = new_df[col].map(target_mean)
|
709 |
+
update_cleaned_data(new_df)
|
710 |
+
st.rerun() # Force re-run after apply
|
711 |
+
|
712 |
+
if bulk_action == "Principal Component Analysis (PCA)":
|
713 |
+
n_components = st.slider("Number of components", min_value=1, max_value=min(df.shape[1], 10), value=2)
|
714 |
+
if st.button("Apply PCA"):
|
715 |
+
new_df = df.copy()
|
716 |
+
pca = PCA(n_components=n_components)
|
717 |
+
pca_result = pca.fit_transform(new_df.select_dtypes(include=np.number))
|
718 |
+
new_df = pd.DataFrame(pca_result, columns=[f'PC{i+1}' for i in range
|
719 |
|
720 |
|
721 |
# --------------------------
|
|
|
1356 |
num_trials = st.number_input("Number of Trials", 1, 100, 10, help="Number of trials for hyperparameter search.")
|
1357 |
|
1358 |
# ----- [5. Training & Monitoring] -----
|
1359 |
+
st.subheader("๐ฏ Training Configuration")
|
1360 |
+
|
1361 |
+
import shap # Ensure SHAP is installed: pip install shap
|
1362 |
+
|
1363 |
+
class LiveMetrics(Callback):
|
1364 |
+
def on_epoch_end(self, epoch, logs=None):
|
1365 |
+
if 'metrics' not in st.session_state:
|
1366 |
+
st.session_state.metrics = []
|
1367 |
+
st.session_state.metrics.append(logs)
|
1368 |
+
self.update_chart()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1369 |
|
1370 |
+
def update_chart(self):
|
1371 |
+
df = pd.DataFrame(st.session_state.metrics)
|
1372 |
+
fig = px.line(df, y=['loss', 'val_loss'], title="Training Progress")
|
1373 |
+
loss_chart.plotly_chart(fig)
|
1374 |
+
|
1375 |
+
if st.button("๐ Start Training"):
|
1376 |
+
try:
|
1377 |
+
model = tf.keras.Sequential()
|
1378 |
|
1379 |
+
# Add layers with regularization
|
1380 |
+
for layer in st.session_state.layers:
|
1381 |
+
layer_class = {
|
1382 |
+
"Dense": Dense,
|
1383 |
+
"Conv2D": Conv2D,
|
1384 |
+
"LSTM": LSTM
|
1385 |
+
}[layer['type']]
|
1386 |
|
1387 |
+
# Add regularization
|
1388 |
+
if l2_reg > 0:
|
1389 |
+
layer['kernel_regularizer'] = tf.keras.regularizers.l2(l2_reg)
|
1390 |
+
|
1391 |
+
model.add(layer_class(**layer))
|
1392 |
+
|
1393 |
+
# Add batch norm after each layer
|
1394 |
+
if batch_norm:
|
1395 |
+
model.add(BatchNormalization())
|
1396 |
+
|
1397 |
+
# Add global dropout
|
1398 |
+
if dropout > 0:
|
1399 |
+
model.add(Dropout(dropout))
|
1400 |
+
|
1401 |
+
model.compile(
|
1402 |
+
optimizer=optimizer,
|
1403 |
+
loss=loss,
|
1404 |
+
metrics=metrics
|
1405 |
+
)
|
1406 |
+
|
1407 |
+
# Show model summary
|
1408 |
+
st.subheader("Model Architecture")
|
1409 |
+
with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
|
1410 |
+
plot_model(model, to_file=tmp.name, show_shapes=True)
|
1411 |
+
st.image(tmp.name)
|
1412 |
+
|
1413 |
+
# Start training
|
1414 |
+
st.subheader("Live Training Metrics")
|
1415 |
+
loss_chart = st.empty()
|
1416 |
+
model.fit(X_train, y_train,
|
1417 |
+
epochs=10,
|
1418 |
+
validation_data=(X_val, y_val),
|
1419 |
+
callbacks=[LiveMetrics()])
|
1420 |
+
|
1421 |
+
# SHAP explanations
|
1422 |
+
st.subheader("SHAP Explanations")
|
1423 |
+
explainer = shap.KernelExplainer(model.predict, X_train[:100])
|
1424 |
+
shap_values = explainer.shap_values(X_train[:100])
|
1425 |
+
shap.summary_plot(shap_values, X_train[:100], plot_type="bar")
|
1426 |
+
st.pyplot(bbox_inches='tight')
|
1427 |
+
|
1428 |
+
except Exception as e:
|
1429 |
+
st.error(f"Training failed: {str(e)}")
|
1430 |
|
|
|
|
|
|
|
|
|
|
|
1431 |
|
1432 |
+
|
1433 |
+
# ----- [6. Export & Deployment] -----
|
1434 |
+
st.subheader("๐พ Export Model")
|
1435 |
+
|
1436 |
+
export_format = st.radio("Format", [
|
1437 |
+
"TensorFlow SavedModel",
|
1438 |
+
"HDF5",
|
1439 |
+
"ONNX"
|
1440 |
+
])
|
1441 |
+
|
1442 |
+
if st.button("Export"):
|
1443 |
+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
1444 |
+
if export_format == "HDF5":
|
1445 |
+
model.save(tmp.name + '.h5')
|
1446 |
+
elif export_format == "ONNX":
|
1447 |
+
import tf2onnx
|
1448 |
+
model_proto, _ = tf2onnx.convert.from_keras(model)
|
1449 |
+
with open(tmp.name + '.onnx', 'wb') as f:
|
1450 |
+
f.write(model_proto.SerializeToString())
|
1451 |
+
else:
|
1452 |
+
tf.saved_model.save(model, tmp.name)
|
1453 |
+
|
1454 |
+
with open(tmp.name, 'rb') as f:
|
1455 |
+
st.download_button(
|
1456 |
+
"Download Model",
|
1457 |
+
f.read(),
|
1458 |
+
file_name=f"model.{'h5' if export_format=='HDF5' else 'onnx'}"
|
1459 |
+
)
|
1460 |
|
1461 |
# Predictions Section (Fixed)
|
1462 |
+
elif app_mode == "Predictions":
|
1463 |
st.title("๐ฎ Predictive Analytics - Informed Business Decisions")
|
1464 |
st.warning("Note: SHAP explanations currently work best with tree-based models like Random Forest")
|
1465 |
|