CosmickVisions commited on
Commit
77d87df
Β·
verified Β·
1 Parent(s): 79b3b0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -41
app.py CHANGED
@@ -531,7 +531,6 @@ elif app_mode == "Advanced EDA":
531
  'plot_type': "Histogram",
532
  'x_col': df.columns[0] if len(df.columns) > 0 else None,
533
  'y_col': df.columns[1] if len(df.columns) > 1 else None,
534
- 'z_col': df.columns[2] if len(df.columns) > 2 else None,
535
  'color_col': None,
536
  'size_col': None,
537
  'time_col': None,
@@ -568,9 +567,8 @@ elif app_mode == "Advanced EDA":
568
  st.sidebar.header("πŸ“Š Visualization Configuration")
569
  plot_types = [
570
  "Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
571
- "Correlation Heatmap", "Parallel Coordinates", "Andrews Curves",
572
- "Pair Plot", "Density Contour", "3D Scatter", "Time Series",
573
- "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
574
  ]
575
  st.session_state.eda_config['plot_type'] = st.sidebar.selectbox(
576
  "Choose Visualization",
@@ -579,33 +577,49 @@ elif app_mode == "Advanced EDA":
579
  )
580
 
581
  # Dynamic Controls Based on Plot Type
582
- if st.session_state.eda_config['plot_type'] != "Correlation Heatmap":
583
- st.session_state.eda_config['x_col'] = st.sidebar.selectbox(
584
- "X Axis",
585
- df.columns,
586
- index=df.columns.get_loc(st.session_state.eda_config['x_col'])
587
- if st.session_state.eda_config['x_col'] in df.columns else 0
588
- )
 
 
 
589
 
590
- if st.session_state.eda_config['plot_type'] in ["Scatter Plot", "Box Plot", "Violin Plot", "Time Series", "3D Scatter", "Histogram"]:
591
- st.session_state.eda_config['y_col'] = st.sidebar.selectbox(
592
- "Y Axis",
593
- df.columns,
594
- index=df.columns.get_loc(st.session_state.eda_config['y_col'])
595
- if st.session_state.eda_config['y_col'] in df.columns else 0
596
- )
 
 
 
 
 
 
 
 
 
 
 
597
 
598
- if st.session_state.eda_config['plot_type'] == "3D Scatter":
599
- st.session_state.eda_config['z_col'] = st.sidebar.selectbox(
600
- "Z Axis",
601
- df.columns,
602
- index=df.columns.get_loc(st.session_state.eda_config['z_col'])
603
- if st.session_state.eda_config['z_col'] in df.columns else 0
604
- )
605
- st.session_state.eda_config['color_col'] = st.sidebar.selectbox(
606
- "Color by",
607
- [None] + list(df.columns)
608
- )
 
 
609
 
610
  # Advanced Plot Customization
611
  with st.expander("🎨 Advanced Customization", expanded=False):
@@ -623,7 +637,14 @@ elif app_mode == "Advanced EDA":
623
  fig = None
624
  config = st.session_state.eda_config
625
 
626
- if config['plot_type'] == "Histogram":
 
 
 
 
 
 
 
627
  color_palette = config['color_palette']
628
  colors = getattr(pc.sequential, color_palette)
629
  fig = px.histogram(
@@ -633,7 +654,9 @@ elif app_mode == "Advanced EDA":
633
  color_discrete_sequence = [colors[0]]
634
  )
635
 
636
- elif config['plot_type'] == "Scatter Plot":
 
 
637
  fig = px.scatter(
638
  df, x=config['x_col'], y=config['y_col'],
639
  color=config['color_col'],
@@ -641,14 +664,17 @@ elif app_mode == "Advanced EDA":
641
  hover_data=config['hover_data_cols']
642
  )
643
 
644
- elif config['plot_type'] == "3D Scatter":
 
 
 
645
  fig = px.scatter_3d(
646
  df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
647
  color=config['color_col'],
648
  color_discrete_sequence=[config['color_palette']]
649
  )
650
 
651
- elif config['plot_type'] == "Correlation Heatmap":
652
  numeric_df = df.select_dtypes(include=np.number)
653
  if not numeric_df.empty:
654
  corr = numeric_df.corr()
@@ -659,37 +685,67 @@ elif app_mode == "Advanced EDA":
659
  else:
660
  st.warning("No numerical columns found for correlation heatmap.")
661
 
662
- elif config['plot_type'] == "Box Plot":
663
  fig = px.box(
664
  df, x=config['x_col'], y=config['y_col'],
665
  color=config['color_col']
666
  )
667
 
668
- elif config['plot_type'] == "Violin Plot":
669
  fig = px.violin(
670
  df, x=config['x_col'], y=config['y_col'],
671
  box=True, points="all",
672
  color=config['color_col']
673
  )
674
 
675
- elif config['plot_type'] == "Time Series":
676
- df = df.sort_values(by=config['time_col'])
677
  fig = px.line(
678
  df, x=config['time_col'], y=config['value_col'],
679
  color=config['color_col']
680
  )
681
 
682
- elif config['plot_type'] == "Scatter Matrix":
683
- fig = px.scatter_matrix(
684
- df, dimensions=config['scatter_matrix_cols'],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  color=config['color_col']
686
  )
687
 
 
 
 
 
 
 
 
 
 
 
688
  if fig:
689
  st.plotly_chart(fig, use_container_width=True)
690
  except Exception as e:
691
  st.error(f"An error occurred while generating the plot: {e}")
692
-
693
  # Model Training Section
694
  elif app_mode == "Model Training":
695
  st.title("πŸš‚ Model Training Studio")
 
531
  'plot_type': "Histogram",
532
  'x_col': df.columns[0] if len(df.columns) > 0 else None,
533
  'y_col': df.columns[1] if len(df.columns) > 1 else None,
 
534
  'color_col': None,
535
  'size_col': None,
536
  'time_col': None,
 
567
  st.sidebar.header("πŸ“Š Visualization Configuration")
568
  plot_types = [
569
  "Histogram", "Scatter Plot", "Box Plot", "Violin Plot",
570
+ "Correlation Heatmap", "Parallel Coordinates", "Pair Plot", "Density Contour",
571
+ "3D Scatter", "Time Series", "Bar Chart", "Pie Chart", "Line Chart" # Removed the computationally expensive ones
 
572
  ]
573
  st.session_state.eda_config['plot_type'] = st.sidebar.selectbox(
574
  "Choose Visualization",
 
577
  )
578
 
579
  # Dynamic Controls Based on Plot Type
580
+ plot_type = st.session_state.eda_config['plot_type']
581
+
582
+ def show_column_selectors(plot_type, df, config):
583
+ """Helper function to display column selectors based on plot type."""
584
+ if plot_type != "Correlation Heatmap":
585
+ config['x_col'] = st.sidebar.selectbox(
586
+ "X Axis",
587
+ df.columns,
588
+ index=df.columns.get_loc(config['x_col']) if config['x_col'] in df.columns else 0
589
+ )
590
 
591
+ if plot_type in ["Scatter Plot", "Box Plot", "Violin Plot", "Time Series", "3D Scatter", "Histogram", "Line Chart"]:
592
+ config['y_col'] = st.sidebar.selectbox(
593
+ "Y Axis",
594
+ df.columns,
595
+ index=df.columns.get_loc(config['y_col']) if config['y_col'] in df.columns else 0
596
+ )
597
+
598
+ if plot_type == "Time Series":
599
+ config['time_col'] = st.sidebar.selectbox(
600
+ "Time Column",
601
+ df.columns,
602
+ index=df.columns.get_loc(config['time_col']) if config['time_col'] in df.columns else 0
603
+ )
604
+ config['value_col'] = st.sidebar.selectbox(
605
+ "Value Column",
606
+ df.columns,
607
+ index=df.columns.get_loc(config['value_col']) if config['value_col'] in df.columns else 0
608
+ )
609
 
610
+ if plot_type == "3D Scatter":
611
+ config['z_col'] = st.sidebar.selectbox(
612
+ "Z Axis",
613
+ df.columns,
614
+ index=df.columns.get_loc(config['z_col']) if config['z_col'] in df.columns else 0
615
+ )
616
+ config['color_col'] = st.sidebar.selectbox(
617
+ "Color by",
618
+ [None] + list(df.columns)
619
+ )
620
+ return config
621
+
622
+ st.session_state.eda_config = show_column_selectors(plot_type, df, st.session_state.eda_config)
623
 
624
  # Advanced Plot Customization
625
  with st.expander("🎨 Advanced Customization", expanded=False):
 
637
  fig = None
638
  config = st.session_state.eda_config
639
 
640
+ # Numeric Column Validation Helper
641
+ def check_numeric(col):
642
+ if not pd.api.types.is_numeric_dtype(df[col]):
643
+ st.error(f"Column '{col}' must be numeric for this plot type.")
644
+ st.stop()
645
+
646
+ if plot_type == "Histogram":
647
+ check_numeric(config['x_col'])
648
  color_palette = config['color_palette']
649
  colors = getattr(pc.sequential, color_palette)
650
  fig = px.histogram(
 
654
  color_discrete_sequence = [colors[0]]
655
  )
656
 
657
+ elif plot_type == "Scatter Plot":
658
+ check_numeric(config['x_col'])
659
+ check_numeric(config['y_col'])
660
  fig = px.scatter(
661
  df, x=config['x_col'], y=config['y_col'],
662
  color=config['color_col'],
 
664
  hover_data=config['hover_data_cols']
665
  )
666
 
667
+ elif plot_type == "3D Scatter":
668
+ check_numeric(config['x_col'])
669
+ check_numeric(config['y_col'])
670
+ check_numeric(config['z_col'])
671
  fig = px.scatter_3d(
672
  df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
673
  color=config['color_col'],
674
  color_discrete_sequence=[config['color_palette']]
675
  )
676
 
677
+ elif plot_type == "Correlation Heatmap":
678
  numeric_df = df.select_dtypes(include=np.number)
679
  if not numeric_df.empty:
680
  corr = numeric_df.corr()
 
685
  else:
686
  st.warning("No numerical columns found for correlation heatmap.")
687
 
688
+ elif plot_type == "Box Plot":
689
  fig = px.box(
690
  df, x=config['x_col'], y=config['y_col'],
691
  color=config['color_col']
692
  )
693
 
694
+ elif plot_type == "Violin Plot":
695
  fig = px.violin(
696
  df, x=config['x_col'], y=config['y_col'],
697
  box=True, points="all",
698
  color=config['color_col']
699
  )
700
 
701
+ elif plot_type == "Time Series":
702
+ # Time Series plots now require time_col and value_col
703
  fig = px.line(
704
  df, x=config['time_col'], y=config['value_col'],
705
  color=config['color_col']
706
  )
707
 
708
+ elif plot_type == "Parallel Coordinates":
709
+ numeric_df = df.select_dtypes(include=np.number)
710
+ if not numeric_df.empty:
711
+ fig = px.parallel_coordinates(numeric_df, color_continuous_scale=config['color_palette'])
712
+ else:
713
+ st.warning("No numerical columns found for parallel coordinates plot.")
714
+
715
+ elif plot_type == "Pair Plot":
716
+ numeric_cols = df.select_dtypes(include=np.number).columns
717
+ if len(numeric_cols) >= 2:
718
+ dimensions = st.multiselect("Select Columns for Pair Plot", numeric_cols, default=numeric_cols[:2])
719
+ fig = px.scatter_matrix(df[dimensions], color=config['color_col'])
720
+ else:
721
+ st.warning("Need at least 2 numeric columns for pair plot.")
722
+
723
+ elif plot_type == "Density Contour":
724
+ check_numeric(config['x_col'])
725
+ check_numeric(config['y_col'])
726
+ fig = px.density_contour(df, x=config['x_col'], y=config['y_col'], color=config['color_col'])
727
+
728
+ elif plot_type == "Bar Chart":
729
+ fig = px.bar(
730
+ df, x=config['x_col'], y=config['y_col'],
731
  color=config['color_col']
732
  )
733
 
734
+ elif plot_type == "Pie Chart":
735
+ fig = px.pie(
736
+ df, values=config['y_col'], names=config['x_col'],
737
+ color_discrete_sequence=px.colors.sequential.RdBu
738
+ )
739
+ elif plot_type == "Line Chart":
740
+ fig = px.line(
741
+ df, x=config['x_col'], y=config['y_col'],
742
+ color=config['color_col']
743
+ )
744
  if fig:
745
  st.plotly_chart(fig, use_container_width=True)
746
  except Exception as e:
747
  st.error(f"An error occurred while generating the plot: {e}")
748
+
749
  # Model Training Section
750
  elif app_mode == "Model Training":
751
  st.title("πŸš‚ Model Training Studio")