Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -531,7 +531,6 @@ elif app_mode == "Advanced EDA":
|
|
531 |
'plot_type': "Histogram",
|
532 |
'x_col': df.columns[0] if len(df.columns) > 0 else None,
|
533 |
'y_col': df.columns[1] if len(df.columns) > 1 else None,
|
534 |
-
'z_col': df.columns[2] if len(df.columns) > 2 else None,
|
535 |
'color_col': None,
|
536 |
'size_col': None,
|
537 |
'time_col': None,
|
@@ -568,9 +567,8 @@ elif app_mode == "Advanced EDA":
|
|
568 |
st.sidebar.header("π Visualization Configuration")
|
569 |
plot_types = [
|
570 |
"Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
|
571 |
-
"Correlation Heatmap", "Parallel Coordinates", "
|
572 |
-
"
|
573 |
-
"Sunburst Chart", "Funnel Chart", "Clustering Analysis"
|
574 |
]
|
575 |
st.session_state.eda_config['plot_type'] = st.sidebar.selectbox(
|
576 |
"Choose Visualization",
|
@@ -579,33 +577,49 @@ elif app_mode == "Advanced EDA":
|
|
579 |
)
|
580 |
|
581 |
# Dynamic Controls Based on Plot Type
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
|
|
|
|
|
|
589 |
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
597 |
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
|
|
|
|
609 |
|
610 |
# Advanced Plot Customization
|
611 |
with st.expander("π¨ Advanced Customization", expanded=False):
|
@@ -623,7 +637,14 @@ elif app_mode == "Advanced EDA":
|
|
623 |
fig = None
|
624 |
config = st.session_state.eda_config
|
625 |
|
626 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
627 |
color_palette = config['color_palette']
|
628 |
colors = getattr(pc.sequential, color_palette)
|
629 |
fig = px.histogram(
|
@@ -633,7 +654,9 @@ elif app_mode == "Advanced EDA":
|
|
633 |
color_discrete_sequence = [colors[0]]
|
634 |
)
|
635 |
|
636 |
-
elif
|
|
|
|
|
637 |
fig = px.scatter(
|
638 |
df, x=config['x_col'], y=config['y_col'],
|
639 |
color=config['color_col'],
|
@@ -641,14 +664,17 @@ elif app_mode == "Advanced EDA":
|
|
641 |
hover_data=config['hover_data_cols']
|
642 |
)
|
643 |
|
644 |
-
elif
|
|
|
|
|
|
|
645 |
fig = px.scatter_3d(
|
646 |
df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
|
647 |
color=config['color_col'],
|
648 |
color_discrete_sequence=[config['color_palette']]
|
649 |
)
|
650 |
|
651 |
-
elif
|
652 |
numeric_df = df.select_dtypes(include=np.number)
|
653 |
if not numeric_df.empty:
|
654 |
corr = numeric_df.corr()
|
@@ -659,37 +685,67 @@ elif app_mode == "Advanced EDA":
|
|
659 |
else:
|
660 |
st.warning("No numerical columns found for correlation heatmap.")
|
661 |
|
662 |
-
elif
|
663 |
fig = px.box(
|
664 |
df, x=config['x_col'], y=config['y_col'],
|
665 |
color=config['color_col']
|
666 |
)
|
667 |
|
668 |
-
elif
|
669 |
fig = px.violin(
|
670 |
df, x=config['x_col'], y=config['y_col'],
|
671 |
box=True, points="all",
|
672 |
color=config['color_col']
|
673 |
)
|
674 |
|
675 |
-
elif
|
676 |
-
|
677 |
fig = px.line(
|
678 |
df, x=config['time_col'], y=config['value_col'],
|
679 |
color=config['color_col']
|
680 |
)
|
681 |
|
682 |
-
elif
|
683 |
-
|
684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
685 |
color=config['color_col']
|
686 |
)
|
687 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
688 |
if fig:
|
689 |
st.plotly_chart(fig, use_container_width=True)
|
690 |
except Exception as e:
|
691 |
st.error(f"An error occurred while generating the plot: {e}")
|
692 |
-
|
693 |
# Model Training Section
|
694 |
elif app_mode == "Model Training":
|
695 |
st.title("π Model Training Studio")
|
|
|
531 |
'plot_type': "Histogram",
|
532 |
'x_col': df.columns[0] if len(df.columns) > 0 else None,
|
533 |
'y_col': df.columns[1] if len(df.columns) > 1 else None,
|
|
|
534 |
'color_col': None,
|
535 |
'size_col': None,
|
536 |
'time_col': None,
|
|
|
567 |
st.sidebar.header("π Visualization Configuration")
|
568 |
plot_types = [
|
569 |
"Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
|
570 |
+
"Correlation Heatmap", "Parallel Coordinates", "Pair Plot", "Density Contour",
|
571 |
+
"3D Scatter", "Time Series", "Bar Chart", "Pie Chart", "Line Chart" # Removed the computationally expensive ones
|
|
|
572 |
]
|
573 |
st.session_state.eda_config['plot_type'] = st.sidebar.selectbox(
|
574 |
"Choose Visualization",
|
|
|
577 |
)
|
578 |
|
579 |
# Dynamic Controls Based on Plot Type
|
580 |
+
plot_type = st.session_state.eda_config['plot_type']
|
581 |
+
|
582 |
+
def show_column_selectors(plot_type, df, config):
|
583 |
+
"""Helper function to display column selectors based on plot type."""
|
584 |
+
if plot_type != "Correlation Heatmap":
|
585 |
+
config['x_col'] = st.sidebar.selectbox(
|
586 |
+
"X Axis",
|
587 |
+
df.columns,
|
588 |
+
index=df.columns.get_loc(config['x_col']) if config['x_col'] in df.columns else 0
|
589 |
+
)
|
590 |
|
591 |
+
if plot_type in ["Scatter Plot", "Box Plot", "Violin Plot", "Time Series", "3D Scatter", "Histogram", "Line Chart"]:
|
592 |
+
config['y_col'] = st.sidebar.selectbox(
|
593 |
+
"Y Axis",
|
594 |
+
df.columns,
|
595 |
+
index=df.columns.get_loc(config['y_col']) if config['y_col'] in df.columns else 0
|
596 |
+
)
|
597 |
+
|
598 |
+
if plot_type == "Time Series":
|
599 |
+
config['time_col'] = st.sidebar.selectbox(
|
600 |
+
"Time Column",
|
601 |
+
df.columns,
|
602 |
+
index=df.columns.get_loc(config['time_col']) if config['time_col'] in df.columns else 0
|
603 |
+
)
|
604 |
+
config['value_col'] = st.sidebar.selectbox(
|
605 |
+
"Value Column",
|
606 |
+
df.columns,
|
607 |
+
index=df.columns.get_loc(config['value_col']) if config['value_col'] in df.columns else 0
|
608 |
+
)
|
609 |
|
610 |
+
if plot_type == "3D Scatter":
|
611 |
+
config['z_col'] = st.sidebar.selectbox(
|
612 |
+
"Z Axis",
|
613 |
+
df.columns,
|
614 |
+
index=df.columns.get_loc(config['z_col']) if config['z_col'] in df.columns else 0
|
615 |
+
)
|
616 |
+
config['color_col'] = st.sidebar.selectbox(
|
617 |
+
"Color by",
|
618 |
+
[None] + list(df.columns)
|
619 |
+
)
|
620 |
+
return config
|
621 |
+
|
622 |
+
st.session_state.eda_config = show_column_selectors(plot_type, df, st.session_state.eda_config)
|
623 |
|
624 |
# Advanced Plot Customization
|
625 |
with st.expander("π¨ Advanced Customization", expanded=False):
|
|
|
637 |
fig = None
|
638 |
config = st.session_state.eda_config
|
639 |
|
640 |
+
# Numeric Column Validation Helper
|
641 |
+
def check_numeric(col):
|
642 |
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
643 |
+
st.error(f"Column '{col}' must be numeric for this plot type.")
|
644 |
+
st.stop()
|
645 |
+
|
646 |
+
if plot_type == "Histogram":
|
647 |
+
check_numeric(config['x_col'])
|
648 |
color_palette = config['color_palette']
|
649 |
colors = getattr(pc.sequential, color_palette)
|
650 |
fig = px.histogram(
|
|
|
654 |
color_discrete_sequence = [colors[0]]
|
655 |
)
|
656 |
|
657 |
+
elif plot_type == "Scatter Plot":
|
658 |
+
check_numeric(config['x_col'])
|
659 |
+
check_numeric(config['y_col'])
|
660 |
fig = px.scatter(
|
661 |
df, x=config['x_col'], y=config['y_col'],
|
662 |
color=config['color_col'],
|
|
|
664 |
hover_data=config['hover_data_cols']
|
665 |
)
|
666 |
|
667 |
+
elif plot_type == "3D Scatter":
|
668 |
+
check_numeric(config['x_col'])
|
669 |
+
check_numeric(config['y_col'])
|
670 |
+
check_numeric(config['z_col'])
|
671 |
fig = px.scatter_3d(
|
672 |
df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
|
673 |
color=config['color_col'],
|
674 |
color_discrete_sequence=[config['color_palette']]
|
675 |
)
|
676 |
|
677 |
+
elif plot_type == "Correlation Heatmap":
|
678 |
numeric_df = df.select_dtypes(include=np.number)
|
679 |
if not numeric_df.empty:
|
680 |
corr = numeric_df.corr()
|
|
|
685 |
else:
|
686 |
st.warning("No numerical columns found for correlation heatmap.")
|
687 |
|
688 |
+
elif plot_type == "Box Plot":
|
689 |
fig = px.box(
|
690 |
df, x=config['x_col'], y=config['y_col'],
|
691 |
color=config['color_col']
|
692 |
)
|
693 |
|
694 |
+
elif plot_type == "Violin Plot":
|
695 |
fig = px.violin(
|
696 |
df, x=config['x_col'], y=config['y_col'],
|
697 |
box=True, points="all",
|
698 |
color=config['color_col']
|
699 |
)
|
700 |
|
701 |
+
elif plot_type == "Time Series":
|
702 |
+
# Time Series plots now require time_col and value_col
|
703 |
fig = px.line(
|
704 |
df, x=config['time_col'], y=config['value_col'],
|
705 |
color=config['color_col']
|
706 |
)
|
707 |
|
708 |
+
elif plot_type == "Parallel Coordinates":
|
709 |
+
numeric_df = df.select_dtypes(include=np.number)
|
710 |
+
if not numeric_df.empty:
|
711 |
+
fig = px.parallel_coordinates(numeric_df, color_continuous_scale=config['color_palette'])
|
712 |
+
else:
|
713 |
+
st.warning("No numerical columns found for parallel coordinates plot.")
|
714 |
+
|
715 |
+
elif plot_type == "Pair Plot":
|
716 |
+
numeric_cols = df.select_dtypes(include=np.number).columns
|
717 |
+
if len(numeric_cols) >= 2:
|
718 |
+
dimensions = st.multiselect("Select Columns for Pair Plot", numeric_cols, default=numeric_cols[:2])
|
719 |
+
fig = px.scatter_matrix(df[dimensions], color=config['color_col'])
|
720 |
+
else:
|
721 |
+
st.warning("Need at least 2 numeric columns for pair plot.")
|
722 |
+
|
723 |
+
elif plot_type == "Density Contour":
|
724 |
+
check_numeric(config['x_col'])
|
725 |
+
check_numeric(config['y_col'])
|
726 |
+
fig = px.density_contour(df, x=config['x_col'], y=config['y_col'], color=config['color_col'])
|
727 |
+
|
728 |
+
elif plot_type == "Bar Chart":
|
729 |
+
fig = px.bar(
|
730 |
+
df, x=config['x_col'], y=config['y_col'],
|
731 |
color=config['color_col']
|
732 |
)
|
733 |
|
734 |
+
elif plot_type == "Pie Chart":
|
735 |
+
fig = px.pie(
|
736 |
+
df, values=config['y_col'], names=config['x_col'],
|
737 |
+
color_discrete_sequence=px.colors.sequential.RdBu
|
738 |
+
)
|
739 |
+
elif plot_type == "Line Chart":
|
740 |
+
fig = px.line(
|
741 |
+
df, x=config['x_col'], y=config['y_col'],
|
742 |
+
color=config['color_col']
|
743 |
+
)
|
744 |
if fig:
|
745 |
st.plotly_chart(fig, use_container_width=True)
|
746 |
except Exception as e:
|
747 |
st.error(f"An error occurred while generating the plot: {e}")
|
748 |
+
|
749 |
# Model Training Section
|
750 |
elif app_mode == "Model Training":
|
751 |
st.title("π Model Training Studio")
|