CosmickVisions commited on
Commit
42a818a
Β·
verified Β·
1 Parent(s): bde5851

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -51
app.py CHANGED
@@ -376,16 +376,16 @@ elif app_mode == "Advanced EDA":
376
  with st.expander("πŸ”Ž Data Filtering", expanded=False):
377
  # Use direct session state assignment for reactivity
378
  st.session_state.plot_config['filter_col'] = st.selectbox(
379
- "Filter Column",
380
  [None] + list(df.columns),
381
  help="Choose a column to filter the data."
382
  )
383
-
384
  if st.session_state.plot_config['filter_col']:
385
  unique_values = df[st.session_state.plot_config['filter_col']].unique()
386
  st.session_state.plot_config['filter_options'] = st.multiselect(
387
- "Filter Values",
388
- unique_values,
389
  default=unique_values,
390
  help=f"Select values from '{st.session_state.plot_config['filter_col']}'"
391
  )
@@ -395,13 +395,13 @@ elif app_mode == "Advanced EDA":
395
 
396
  # Visualization Configuration
397
  st.sidebar.header("πŸ“Š Plot Configuration")
398
-
399
  # Plot type selector
400
  st.session_state.plot_config['plot_type'] = st.sidebar.selectbox(
401
- "Choose Visualization",
402
  [
403
- "Histogram", "Scatter Plot", "Box Plot",
404
- "Correlation Heatmap", "3D Scatter",
405
  "Violin Plot", "Time Series", "Scatter Matrix"
406
  ],
407
  index=0 # Reset to first option when plot type changes
@@ -410,17 +410,17 @@ elif app_mode == "Advanced EDA":
410
  # Dynamic controls based on plot type
411
  if st.session_state.plot_config['plot_type'] != "Correlation Heatmap":
412
  st.session_state.plot_config['x_col'] = st.sidebar.selectbox(
413
- "X Axis",
414
  df.columns,
415
- index=df.columns.get_loc(st.session_state.plot_config['x_col'])
416
  if st.session_state.plot_config['x_col'] in df.columns else 0
417
  )
418
 
419
- if st.session_state.plot_config['plot_type'] in ["Scatter Plot", "Box Plot",
420
- "Violin Plot", "Time Series",
421
- "3D Scatter", "Histogram"]:
422
  st.session_state.plot_config['y_col'] = st.sidebar.selectbox(
423
- "Y Axis",
424
  df.columns,
425
  index=df.columns.get_loc(st.session_state.plot_config['y_col'])
426
  if st.session_state.plot_config['y_col'] in df.columns else 0
@@ -428,52 +428,52 @@ elif app_mode == "Advanced EDA":
428
 
429
  if st.session_state.plot_config['plot_type'] == "3D Scatter":
430
  st.session_state.plot_config['z_col'] = st.sidebar.selectbox(
431
- "Z Axis",
432
  df.columns,
433
  index=df.columns.get_loc(st.session_state.plot_config['z_col'])
434
  if st.session_state.plot_config['z_col'] in df.columns else 0
435
  )
436
  st.session_state.plot_config['color_col'] = st.sidebar.selectbox(
437
- "Color by",
438
  [None] + list(df.columns)
439
  )
440
 
441
  # Color configuration
442
  if st.session_state.plot_config['plot_type'] == "Correlation Heatmap":
443
  st.session_state.plot_config['color_continuous_scale'] = st.sidebar.selectbox(
444
- "Color Scale",
445
  ['Viridis', 'Plasma', 'Magma', 'Cividis', 'RdBu']
446
  )
447
  else:
448
  st.session_state.plot_config['color_palette'] = st.sidebar.selectbox(
449
- "Color Palette",
450
  ['#00f7ff', '#ff00ff', '#f70000', '#0000f7']
451
  )
452
 
453
  # Additional configurations
454
  if st.session_state.plot_config['plot_type'] == "Scatter Plot":
455
  st.session_state.plot_config['size_col'] = st.sidebar.selectbox(
456
- "Size by",
457
  [None] + list(df.columns)
458
  )
459
  st.session_state.plot_config['hover_data_cols'] = st.sidebar.multiselect(
460
- "Hover Data",
461
  df.columns
462
  )
463
 
464
  if st.session_state.plot_config['plot_type'] == "Time Series":
465
  st.session_state.plot_config['time_col'] = st.sidebar.selectbox(
466
- "Time Column",
467
  df.columns
468
  )
469
  st.session_state.plot_config['value_col'] = st.sidebar.selectbox(
470
- "Value Column",
471
  df.columns
472
  )
473
 
474
  if st.session_state.plot_config['plot_type'] == "Scatter Matrix":
475
  st.session_state.plot_config['scatter_matrix_cols'] = st.multiselect(
476
- "Columns for Scatter Matrix",
477
  df.select_dtypes(include=np.number).columns,
478
  default=st.session_state.plot_config['scatter_matrix_cols']
479
  )
@@ -482,29 +482,29 @@ elif app_mode == "Advanced EDA":
482
  try:
483
  fig = None
484
  config = st.session_state.plot_config
485
-
486
  if config['plot_type'] == "Histogram":
487
  fig = px.histogram(
488
- df, x=config['x_col'], y=config['y_col'],
489
  nbins=30, template="plotly_dark",
490
  color_discrete_sequence=[config['color_palette']]
491
  )
492
-
493
  elif config['plot_type'] == "Scatter Plot":
494
  fig = px.scatter(
495
  df, x=config['x_col'], y=config['y_col'],
496
  color_discrete_sequence=[config['color_palette']],
497
- size=config['size_col'],
498
  hover_data=config['hover_data_cols']
499
  )
500
-
501
  elif config['plot_type'] == "3D Scatter":
502
  fig = px.scatter_3d(
503
  df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
504
  color=config['color_col'],
505
  color_discrete_sequence=[config['color_palette']]
506
  )
507
-
508
  elif config['plot_type'] == "Correlation Heatmap":
509
  numeric_df = df.select_dtypes(include=np.number)
510
  if not numeric_df.empty:
@@ -515,27 +515,27 @@ elif app_mode == "Advanced EDA":
515
  )
516
  else:
517
  st.warning("No numerical columns found for correlation heatmap.")
518
-
519
  elif config['plot_type'] == "Box Plot":
520
  fig = px.box(
521
  df, x=config['x_col'], y=config['y_col'],
522
  color_discrete_sequence=[config['color_palette']]
523
  )
524
-
525
  elif config['plot_type'] == "Violin Plot":
526
  fig = px.violin(
527
  df, x=config['x_col'], y=config['y_col'],
528
  box=True, points="all",
529
  color_discrete_sequence=[config['color_palette']]
530
  )
531
-
532
  elif config['plot_type'] == "Time Series":
533
  df = df.sort_values(by=config['time_col'])
534
  fig = px.line(
535
  df, x=config['time_col'], y=config['value_col'],
536
  color_discrete_sequence=[config['color_palette']]
537
  )
538
-
539
  elif config['plot_type'] == "Scatter Matrix":
540
  fig = px.scatter_matrix(
541
  df, dimensions=config['scatter_matrix_cols'],
@@ -546,25 +546,29 @@ elif app_mode == "Advanced EDA":
546
  st.plotly_chart(fig, use_container_width=True)
547
  except Exception as e:
548
  st.error(f"An error occurred while generating the plot: {e}")
 
549
  with st.expander("πŸ§ͺ Hypothesis Testing"):
550
- test_type = st.selectbox("Select Test Type", ["T-test", "Chi-Squared Test"])
551
-
552
- if test_type == "T-test":
553
- col1 = st.selectbox("Column 1 (Numeric)", df.select_dtypes(include=np.number).columns)
554
- col2 = st.selectbox("Column 2 (Categorical)", df.select_dtypes(include='object').columns)
555
- if st.button("Run T-test"):
556
- # Example: Split data by category and perform t-test
557
- groups = df.groupby(col2)[col1].apply(list)
558
- if len(groups) == 2:
559
- t_stat, p_value = stats.ttest_ind(groups.iloc[0], groups.iloc[1])
560
- st.write(f"T-statistic: {t_stat:.4f}")
561
- st.write(f"P-value: {p_value:.4f}")
562
- if p_value < 0.05:
563
- st.write("Reject the null hypothesis.")
564
- else:
565
- st.write("Fail to reject the null hypothesis.")
566
- else:
567
- st.write("Select a categorical column with exactly two categories.")
 
 
 
568
 
569
  elif app_mode == "Model Training":
570
  st.title("πŸš‚ Model Training")
 
376
  with st.expander("πŸ”Ž Data Filtering", expanded=False):
377
  # Use direct session state assignment for reactivity
378
  st.session_state.plot_config['filter_col'] = st.selectbox(
379
+ "Filter Column",
380
  [None] + list(df.columns),
381
  help="Choose a column to filter the data."
382
  )
383
+
384
  if st.session_state.plot_config['filter_col']:
385
  unique_values = df[st.session_state.plot_config['filter_col']].unique()
386
  st.session_state.plot_config['filter_options'] = st.multiselect(
387
+ "Filter Values",
388
+ unique_values,
389
  default=unique_values,
390
  help=f"Select values from '{st.session_state.plot_config['filter_col']}'"
391
  )
 
395
 
396
  # Visualization Configuration
397
  st.sidebar.header("πŸ“Š Plot Configuration")
398
+
399
  # Plot type selector
400
  st.session_state.plot_config['plot_type'] = st.sidebar.selectbox(
401
+ "Choose Visualization",
402
  [
403
+ "Histogram", "Scatter Plot", "Box Plot",
404
+ "Correlation Heatmap", "3D Scatter",
405
  "Violin Plot", "Time Series", "Scatter Matrix"
406
  ],
407
  index=0 # Reset to first option when plot type changes
 
410
  # Dynamic controls based on plot type
411
  if st.session_state.plot_config['plot_type'] != "Correlation Heatmap":
412
  st.session_state.plot_config['x_col'] = st.sidebar.selectbox(
413
+ "X Axis",
414
  df.columns,
415
+ index=df.columns.get_loc(st.session_state.plot_config['x_col'])
416
  if st.session_state.plot_config['x_col'] in df.columns else 0
417
  )
418
 
419
+ if st.session_state.plot_config['plot_type'] in ["Scatter Plot", "Box Plot",
420
+ "Violin Plot", "Time Series",
421
+ "3D Scatter", "Histogram"]:
422
  st.session_state.plot_config['y_col'] = st.sidebar.selectbox(
423
+ "Y Axis",
424
  df.columns,
425
  index=df.columns.get_loc(st.session_state.plot_config['y_col'])
426
  if st.session_state.plot_config['y_col'] in df.columns else 0
 
428
 
429
  if st.session_state.plot_config['plot_type'] == "3D Scatter":
430
  st.session_state.plot_config['z_col'] = st.sidebar.selectbox(
431
+ "Z Axis",
432
  df.columns,
433
  index=df.columns.get_loc(st.session_state.plot_config['z_col'])
434
  if st.session_state.plot_config['z_col'] in df.columns else 0
435
  )
436
  st.session_state.plot_config['color_col'] = st.sidebar.selectbox(
437
+ "Color by",
438
  [None] + list(df.columns)
439
  )
440
 
441
  # Color configuration
442
  if st.session_state.plot_config['plot_type'] == "Correlation Heatmap":
443
  st.session_state.plot_config['color_continuous_scale'] = st.sidebar.selectbox(
444
+ "Color Scale",
445
  ['Viridis', 'Plasma', 'Magma', 'Cividis', 'RdBu']
446
  )
447
  else:
448
  st.session_state.plot_config['color_palette'] = st.sidebar.selectbox(
449
+ "Color Palette",
450
  ['#00f7ff', '#ff00ff', '#f70000', '#0000f7']
451
  )
452
 
453
  # Additional configurations
454
  if st.session_state.plot_config['plot_type'] == "Scatter Plot":
455
  st.session_state.plot_config['size_col'] = st.sidebar.selectbox(
456
+ "Size by",
457
  [None] + list(df.columns)
458
  )
459
  st.session_state.plot_config['hover_data_cols'] = st.sidebar.multiselect(
460
+ "Hover Data",
461
  df.columns
462
  )
463
 
464
  if st.session_state.plot_config['plot_type'] == "Time Series":
465
  st.session_state.plot_config['time_col'] = st.sidebar.selectbox(
466
+ "Time Column",
467
  df.columns
468
  )
469
  st.session_state.plot_config['value_col'] = st.sidebar.selectbox(
470
+ "Value Column",
471
  df.columns
472
  )
473
 
474
  if st.session_state.plot_config['plot_type'] == "Scatter Matrix":
475
  st.session_state.plot_config['scatter_matrix_cols'] = st.multiselect(
476
+ "Columns for Scatter Matrix",
477
  df.select_dtypes(include=np.number).columns,
478
  default=st.session_state.plot_config['scatter_matrix_cols']
479
  )
 
482
  try:
483
  fig = None
484
  config = st.session_state.plot_config
485
+
486
  if config['plot_type'] == "Histogram":
487
  fig = px.histogram(
488
+ df, x=config['x_col'], y=config['y_col'],
489
  nbins=30, template="plotly_dark",
490
  color_discrete_sequence=[config['color_palette']]
491
  )
492
+
493
  elif config['plot_type'] == "Scatter Plot":
494
  fig = px.scatter(
495
  df, x=config['x_col'], y=config['y_col'],
496
  color_discrete_sequence=[config['color_palette']],
497
+ size=config['size_col'],
498
  hover_data=config['hover_data_cols']
499
  )
500
+
501
  elif config['plot_type'] == "3D Scatter":
502
  fig = px.scatter_3d(
503
  df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
504
  color=config['color_col'],
505
  color_discrete_sequence=[config['color_palette']]
506
  )
507
+
508
  elif config['plot_type'] == "Correlation Heatmap":
509
  numeric_df = df.select_dtypes(include=np.number)
510
  if not numeric_df.empty:
 
515
  )
516
  else:
517
  st.warning("No numerical columns found for correlation heatmap.")
518
+
519
  elif config['plot_type'] == "Box Plot":
520
  fig = px.box(
521
  df, x=config['x_col'], y=config['y_col'],
522
  color_discrete_sequence=[config['color_palette']]
523
  )
524
+
525
  elif config['plot_type'] == "Violin Plot":
526
  fig = px.violin(
527
  df, x=config['x_col'], y=config['y_col'],
528
  box=True, points="all",
529
  color_discrete_sequence=[config['color_palette']]
530
  )
531
+
532
  elif config['plot_type'] == "Time Series":
533
  df = df.sort_values(by=config['time_col'])
534
  fig = px.line(
535
  df, x=config['time_col'], y=config['value_col'],
536
  color_discrete_sequence=[config['color_palette']]
537
  )
538
+
539
  elif config['plot_type'] == "Scatter Matrix":
540
  fig = px.scatter_matrix(
541
  df, dimensions=config['scatter_matrix_cols'],
 
546
  st.plotly_chart(fig, use_container_width=True)
547
  except Exception as e:
548
  st.error(f"An error occurred while generating the plot: {e}")
549
+
550
  with st.expander("πŸ§ͺ Hypothesis Testing"):
551
+ test_type = st.selectbox("Select Test Type", ["T-test", "Chi-Squared Test"])
552
+
553
+ if test_type == "T-test":
554
+ col1 = st.selectbox("Column 1 (Numeric)", df.select_dtypes(include=np.number).columns)
555
+ col2 = st.selectbox("Column 2 (Categorical)", df.select_dtypes(include='object').columns)
556
+ if st.button("Run T-test"):
557
+ # Example: Split data by category and perform t-test
558
+ try:
559
+ groups = df.groupby(col2)[col1].apply(list)
560
+ if len(groups) == 2:
561
+ t_stat, p_value = stats.ttest_ind(groups.iloc[0], groups.iloc[1])
562
+ st.write(f"T-statistic: {t_stat:.4f}")
563
+ st.write(f"P-value: {p_value:.4f}")
564
+ if p_value < 0.05:
565
+ st.write("Reject the null hypothesis.")
566
+ else:
567
+ st.write("Fail to reject the null hypothesis.")
568
+ else:
569
+ st.write("Select a categorical column with exactly two categories.")
570
+ except Exception as e:
571
+ st.error(f"An error occurred during the T-test: {e}")
572
 
573
  elif app_mode == "Model Training":
574
  st.title("πŸš‚ Model Training")