Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -376,16 +376,16 @@ elif app_mode == "Advanced EDA":
|
|
376 |
with st.expander("π Data Filtering", expanded=False):
|
377 |
# Use direct session state assignment for reactivity
|
378 |
st.session_state.plot_config['filter_col'] = st.selectbox(
|
379 |
-
"Filter Column",
|
380 |
[None] + list(df.columns),
|
381 |
help="Choose a column to filter the data."
|
382 |
)
|
383 |
-
|
384 |
if st.session_state.plot_config['filter_col']:
|
385 |
unique_values = df[st.session_state.plot_config['filter_col']].unique()
|
386 |
st.session_state.plot_config['filter_options'] = st.multiselect(
|
387 |
-
"Filter Values",
|
388 |
-
unique_values,
|
389 |
default=unique_values,
|
390 |
help=f"Select values from '{st.session_state.plot_config['filter_col']}'"
|
391 |
)
|
@@ -395,13 +395,13 @@ elif app_mode == "Advanced EDA":
|
|
395 |
|
396 |
# Visualization Configuration
|
397 |
st.sidebar.header("π Plot Configuration")
|
398 |
-
|
399 |
# Plot type selector
|
400 |
st.session_state.plot_config['plot_type'] = st.sidebar.selectbox(
|
401 |
-
"Choose Visualization",
|
402 |
[
|
403 |
-
"Histogram", "Scatter Plot", "Box Plot",
|
404 |
-
"Correlation Heatmap", "3D Scatter",
|
405 |
"Violin Plot", "Time Series", "Scatter Matrix"
|
406 |
],
|
407 |
index=0 # Reset to first option when plot type changes
|
@@ -410,17 +410,17 @@ elif app_mode == "Advanced EDA":
|
|
410 |
# Dynamic controls based on plot type
|
411 |
if st.session_state.plot_config['plot_type'] != "Correlation Heatmap":
|
412 |
st.session_state.plot_config['x_col'] = st.sidebar.selectbox(
|
413 |
-
"X Axis",
|
414 |
df.columns,
|
415 |
-
index=df.columns.get_loc(st.session_state.plot_config['x_col'])
|
416 |
if st.session_state.plot_config['x_col'] in df.columns else 0
|
417 |
)
|
418 |
|
419 |
-
if st.session_state.plot_config['plot_type'] in ["Scatter Plot", "Box Plot",
|
420 |
-
|
421 |
-
|
422 |
st.session_state.plot_config['y_col'] = st.sidebar.selectbox(
|
423 |
-
"Y Axis",
|
424 |
df.columns,
|
425 |
index=df.columns.get_loc(st.session_state.plot_config['y_col'])
|
426 |
if st.session_state.plot_config['y_col'] in df.columns else 0
|
@@ -428,52 +428,52 @@ elif app_mode == "Advanced EDA":
|
|
428 |
|
429 |
if st.session_state.plot_config['plot_type'] == "3D Scatter":
|
430 |
st.session_state.plot_config['z_col'] = st.sidebar.selectbox(
|
431 |
-
"Z Axis",
|
432 |
df.columns,
|
433 |
index=df.columns.get_loc(st.session_state.plot_config['z_col'])
|
434 |
if st.session_state.plot_config['z_col'] in df.columns else 0
|
435 |
)
|
436 |
st.session_state.plot_config['color_col'] = st.sidebar.selectbox(
|
437 |
-
"Color by",
|
438 |
[None] + list(df.columns)
|
439 |
)
|
440 |
|
441 |
# Color configuration
|
442 |
if st.session_state.plot_config['plot_type'] == "Correlation Heatmap":
|
443 |
st.session_state.plot_config['color_continuous_scale'] = st.sidebar.selectbox(
|
444 |
-
"Color Scale",
|
445 |
['Viridis', 'Plasma', 'Magma', 'Cividis', 'RdBu']
|
446 |
)
|
447 |
else:
|
448 |
st.session_state.plot_config['color_palette'] = st.sidebar.selectbox(
|
449 |
-
"Color Palette",
|
450 |
['#00f7ff', '#ff00ff', '#f70000', '#0000f7']
|
451 |
)
|
452 |
|
453 |
# Additional configurations
|
454 |
if st.session_state.plot_config['plot_type'] == "Scatter Plot":
|
455 |
st.session_state.plot_config['size_col'] = st.sidebar.selectbox(
|
456 |
-
"Size by",
|
457 |
[None] + list(df.columns)
|
458 |
)
|
459 |
st.session_state.plot_config['hover_data_cols'] = st.sidebar.multiselect(
|
460 |
-
"Hover Data",
|
461 |
df.columns
|
462 |
)
|
463 |
|
464 |
if st.session_state.plot_config['plot_type'] == "Time Series":
|
465 |
st.session_state.plot_config['time_col'] = st.sidebar.selectbox(
|
466 |
-
"Time Column",
|
467 |
df.columns
|
468 |
)
|
469 |
st.session_state.plot_config['value_col'] = st.sidebar.selectbox(
|
470 |
-
"Value Column",
|
471 |
df.columns
|
472 |
)
|
473 |
|
474 |
if st.session_state.plot_config['plot_type'] == "Scatter Matrix":
|
475 |
st.session_state.plot_config['scatter_matrix_cols'] = st.multiselect(
|
476 |
-
"Columns for Scatter Matrix",
|
477 |
df.select_dtypes(include=np.number).columns,
|
478 |
default=st.session_state.plot_config['scatter_matrix_cols']
|
479 |
)
|
@@ -482,29 +482,29 @@ elif app_mode == "Advanced EDA":
|
|
482 |
try:
|
483 |
fig = None
|
484 |
config = st.session_state.plot_config
|
485 |
-
|
486 |
if config['plot_type'] == "Histogram":
|
487 |
fig = px.histogram(
|
488 |
-
df, x=config['x_col'], y=config['y_col'],
|
489 |
nbins=30, template="plotly_dark",
|
490 |
color_discrete_sequence=[config['color_palette']]
|
491 |
)
|
492 |
-
|
493 |
elif config['plot_type'] == "Scatter Plot":
|
494 |
fig = px.scatter(
|
495 |
df, x=config['x_col'], y=config['y_col'],
|
496 |
color_discrete_sequence=[config['color_palette']],
|
497 |
-
size=config['size_col'],
|
498 |
hover_data=config['hover_data_cols']
|
499 |
)
|
500 |
-
|
501 |
elif config['plot_type'] == "3D Scatter":
|
502 |
fig = px.scatter_3d(
|
503 |
df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
|
504 |
color=config['color_col'],
|
505 |
color_discrete_sequence=[config['color_palette']]
|
506 |
)
|
507 |
-
|
508 |
elif config['plot_type'] == "Correlation Heatmap":
|
509 |
numeric_df = df.select_dtypes(include=np.number)
|
510 |
if not numeric_df.empty:
|
@@ -515,27 +515,27 @@ elif app_mode == "Advanced EDA":
|
|
515 |
)
|
516 |
else:
|
517 |
st.warning("No numerical columns found for correlation heatmap.")
|
518 |
-
|
519 |
elif config['plot_type'] == "Box Plot":
|
520 |
fig = px.box(
|
521 |
df, x=config['x_col'], y=config['y_col'],
|
522 |
color_discrete_sequence=[config['color_palette']]
|
523 |
)
|
524 |
-
|
525 |
elif config['plot_type'] == "Violin Plot":
|
526 |
fig = px.violin(
|
527 |
df, x=config['x_col'], y=config['y_col'],
|
528 |
box=True, points="all",
|
529 |
color_discrete_sequence=[config['color_palette']]
|
530 |
)
|
531 |
-
|
532 |
elif config['plot_type'] == "Time Series":
|
533 |
df = df.sort_values(by=config['time_col'])
|
534 |
fig = px.line(
|
535 |
df, x=config['time_col'], y=config['value_col'],
|
536 |
color_discrete_sequence=[config['color_palette']]
|
537 |
)
|
538 |
-
|
539 |
elif config['plot_type'] == "Scatter Matrix":
|
540 |
fig = px.scatter_matrix(
|
541 |
df, dimensions=config['scatter_matrix_cols'],
|
@@ -546,25 +546,29 @@ elif app_mode == "Advanced EDA":
|
|
546 |
st.plotly_chart(fig, use_container_width=True)
|
547 |
except Exception as e:
|
548 |
st.error(f"An error occurred while generating the plot: {e}")
|
|
|
549 |
with st.expander("π§ͺ Hypothesis Testing"):
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
|
|
|
|
|
|
568 |
|
569 |
elif app_mode == "Model Training":
|
570 |
st.title("π Model Training")
|
|
|
376 |
with st.expander("π Data Filtering", expanded=False):
|
377 |
# Use direct session state assignment for reactivity
|
378 |
st.session_state.plot_config['filter_col'] = st.selectbox(
|
379 |
+
"Filter Column",
|
380 |
[None] + list(df.columns),
|
381 |
help="Choose a column to filter the data."
|
382 |
)
|
383 |
+
|
384 |
if st.session_state.plot_config['filter_col']:
|
385 |
unique_values = df[st.session_state.plot_config['filter_col']].unique()
|
386 |
st.session_state.plot_config['filter_options'] = st.multiselect(
|
387 |
+
"Filter Values",
|
388 |
+
unique_values,
|
389 |
default=unique_values,
|
390 |
help=f"Select values from '{st.session_state.plot_config['filter_col']}'"
|
391 |
)
|
|
|
395 |
|
396 |
# Visualization Configuration
|
397 |
st.sidebar.header("π Plot Configuration")
|
398 |
+
|
399 |
# Plot type selector
|
400 |
st.session_state.plot_config['plot_type'] = st.sidebar.selectbox(
|
401 |
+
"Choose Visualization",
|
402 |
[
|
403 |
+
"Histogram", "Scatter Plot", "Box Plot",
|
404 |
+
"Correlation Heatmap", "3D Scatter",
|
405 |
"Violin Plot", "Time Series", "Scatter Matrix"
|
406 |
],
|
407 |
index=0 # Reset to first option when plot type changes
|
|
|
410 |
# Dynamic controls based on plot type
|
411 |
if st.session_state.plot_config['plot_type'] != "Correlation Heatmap":
|
412 |
st.session_state.plot_config['x_col'] = st.sidebar.selectbox(
|
413 |
+
"X Axis",
|
414 |
df.columns,
|
415 |
+
index=df.columns.get_loc(st.session_state.plot_config['x_col'])
|
416 |
if st.session_state.plot_config['x_col'] in df.columns else 0
|
417 |
)
|
418 |
|
419 |
+
if st.session_state.plot_config['plot_type'] in ["Scatter Plot", "Box Plot",
|
420 |
+
"Violin Plot", "Time Series",
|
421 |
+
"3D Scatter", "Histogram"]:
|
422 |
st.session_state.plot_config['y_col'] = st.sidebar.selectbox(
|
423 |
+
"Y Axis",
|
424 |
df.columns,
|
425 |
index=df.columns.get_loc(st.session_state.plot_config['y_col'])
|
426 |
if st.session_state.plot_config['y_col'] in df.columns else 0
|
|
|
428 |
|
429 |
if st.session_state.plot_config['plot_type'] == "3D Scatter":
|
430 |
st.session_state.plot_config['z_col'] = st.sidebar.selectbox(
|
431 |
+
"Z Axis",
|
432 |
df.columns,
|
433 |
index=df.columns.get_loc(st.session_state.plot_config['z_col'])
|
434 |
if st.session_state.plot_config['z_col'] in df.columns else 0
|
435 |
)
|
436 |
st.session_state.plot_config['color_col'] = st.sidebar.selectbox(
|
437 |
+
"Color by",
|
438 |
[None] + list(df.columns)
|
439 |
)
|
440 |
|
441 |
# Color configuration
|
442 |
if st.session_state.plot_config['plot_type'] == "Correlation Heatmap":
|
443 |
st.session_state.plot_config['color_continuous_scale'] = st.sidebar.selectbox(
|
444 |
+
"Color Scale",
|
445 |
['Viridis', 'Plasma', 'Magma', 'Cividis', 'RdBu']
|
446 |
)
|
447 |
else:
|
448 |
st.session_state.plot_config['color_palette'] = st.sidebar.selectbox(
|
449 |
+
"Color Palette",
|
450 |
['#00f7ff', '#ff00ff', '#f70000', '#0000f7']
|
451 |
)
|
452 |
|
453 |
# Additional configurations
|
454 |
if st.session_state.plot_config['plot_type'] == "Scatter Plot":
|
455 |
st.session_state.plot_config['size_col'] = st.sidebar.selectbox(
|
456 |
+
"Size by",
|
457 |
[None] + list(df.columns)
|
458 |
)
|
459 |
st.session_state.plot_config['hover_data_cols'] = st.sidebar.multiselect(
|
460 |
+
"Hover Data",
|
461 |
df.columns
|
462 |
)
|
463 |
|
464 |
if st.session_state.plot_config['plot_type'] == "Time Series":
|
465 |
st.session_state.plot_config['time_col'] = st.sidebar.selectbox(
|
466 |
+
"Time Column",
|
467 |
df.columns
|
468 |
)
|
469 |
st.session_state.plot_config['value_col'] = st.sidebar.selectbox(
|
470 |
+
"Value Column",
|
471 |
df.columns
|
472 |
)
|
473 |
|
474 |
if st.session_state.plot_config['plot_type'] == "Scatter Matrix":
|
475 |
st.session_state.plot_config['scatter_matrix_cols'] = st.multiselect(
|
476 |
+
"Columns for Scatter Matrix",
|
477 |
df.select_dtypes(include=np.number).columns,
|
478 |
default=st.session_state.plot_config['scatter_matrix_cols']
|
479 |
)
|
|
|
482 |
try:
|
483 |
fig = None
|
484 |
config = st.session_state.plot_config
|
485 |
+
|
486 |
if config['plot_type'] == "Histogram":
|
487 |
fig = px.histogram(
|
488 |
+
df, x=config['x_col'], y=config['y_col'],
|
489 |
nbins=30, template="plotly_dark",
|
490 |
color_discrete_sequence=[config['color_palette']]
|
491 |
)
|
492 |
+
|
493 |
elif config['plot_type'] == "Scatter Plot":
|
494 |
fig = px.scatter(
|
495 |
df, x=config['x_col'], y=config['y_col'],
|
496 |
color_discrete_sequence=[config['color_palette']],
|
497 |
+
size=config['size_col'],
|
498 |
hover_data=config['hover_data_cols']
|
499 |
)
|
500 |
+
|
501 |
elif config['plot_type'] == "3D Scatter":
|
502 |
fig = px.scatter_3d(
|
503 |
df, x=config['x_col'], y=config['y_col'], z=config['z_col'],
|
504 |
color=config['color_col'],
|
505 |
color_discrete_sequence=[config['color_palette']]
|
506 |
)
|
507 |
+
|
508 |
elif config['plot_type'] == "Correlation Heatmap":
|
509 |
numeric_df = df.select_dtypes(include=np.number)
|
510 |
if not numeric_df.empty:
|
|
|
515 |
)
|
516 |
else:
|
517 |
st.warning("No numerical columns found for correlation heatmap.")
|
518 |
+
|
519 |
elif config['plot_type'] == "Box Plot":
|
520 |
fig = px.box(
|
521 |
df, x=config['x_col'], y=config['y_col'],
|
522 |
color_discrete_sequence=[config['color_palette']]
|
523 |
)
|
524 |
+
|
525 |
elif config['plot_type'] == "Violin Plot":
|
526 |
fig = px.violin(
|
527 |
df, x=config['x_col'], y=config['y_col'],
|
528 |
box=True, points="all",
|
529 |
color_discrete_sequence=[config['color_palette']]
|
530 |
)
|
531 |
+
|
532 |
elif config['plot_type'] == "Time Series":
|
533 |
df = df.sort_values(by=config['time_col'])
|
534 |
fig = px.line(
|
535 |
df, x=config['time_col'], y=config['value_col'],
|
536 |
color_discrete_sequence=[config['color_palette']]
|
537 |
)
|
538 |
+
|
539 |
elif config['plot_type'] == "Scatter Matrix":
|
540 |
fig = px.scatter_matrix(
|
541 |
df, dimensions=config['scatter_matrix_cols'],
|
|
|
546 |
st.plotly_chart(fig, use_container_width=True)
|
547 |
except Exception as e:
|
548 |
st.error(f"An error occurred while generating the plot: {e}")
|
549 |
+
|
550 |
with st.expander("π§ͺ Hypothesis Testing"):
|
551 |
+
test_type = st.selectbox("Select Test Type", ["T-test", "Chi-Squared Test"])
|
552 |
+
|
553 |
+
if test_type == "T-test":
|
554 |
+
col1 = st.selectbox("Column 1 (Numeric)", df.select_dtypes(include=np.number).columns)
|
555 |
+
col2 = st.selectbox("Column 2 (Categorical)", df.select_dtypes(include='object').columns)
|
556 |
+
if st.button("Run T-test"):
|
557 |
+
# Example: Split data by category and perform t-test
|
558 |
+
try:
|
559 |
+
groups = df.groupby(col2)[col1].apply(list)
|
560 |
+
if len(groups) == 2:
|
561 |
+
t_stat, p_value = stats.ttest_ind(groups.iloc[0], groups.iloc[1])
|
562 |
+
st.write(f"T-statistic: {t_stat:.4f}")
|
563 |
+
st.write(f"P-value: {p_value:.4f}")
|
564 |
+
if p_value < 0.05:
|
565 |
+
st.write("Reject the null hypothesis.")
|
566 |
+
else:
|
567 |
+
st.write("Fail to reject the null hypothesis.")
|
568 |
+
else:
|
569 |
+
st.write("Select a categorical column with exactly two categories.")
|
570 |
+
except Exception as e:
|
571 |
+
st.error(f"An error occurred during the T-test: {e}")
|
572 |
|
573 |
elif app_mode == "Model Training":
|
574 |
st.title("π Model Training")
|