JUNGU commited on
Commit
900c0ad
·
verified ·
1 Parent(s): f076a08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -33
app.py CHANGED
@@ -5,6 +5,8 @@ import seaborn as sns
5
  import numpy as np
6
  from io import StringIO
7
  import openpyxl
 
 
8
 
9
  def load_data(file):
10
  file_extension = file.name.split('.')[-1].lower()
@@ -24,22 +26,51 @@ def manual_data_entry():
24
 
25
  if col_names:
26
  num_rows = st.number_input("Enter number of rows:", min_value=1, value=5)
27
- data = []
28
- for i in range(num_rows):
29
- row = []
30
- for col in col_names:
31
- value = st.text_input(f"Enter value for {col} (Row {i+1}):")
32
- row.append(value)
33
- data.append(row)
34
 
35
- return pd.DataFrame(data, columns=col_names)
 
 
 
 
 
 
 
 
36
  return None
37
 
38
- def perform_analysis(data):
39
- st.header("4. Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # EDA
42
- st.subheader("Exploratory Data Analysis")
 
 
43
 
44
  # Summary statistics
45
  st.write("Summary Statistics:")
@@ -70,19 +101,28 @@ def perform_analysis(data):
70
  sns.histplot(data[column], kde=True, ax=ax)
71
  st.pyplot(fig)
72
 
73
- def main():
74
- st.title("PPDAC Data Analysis Toolkit")
 
 
 
 
75
 
76
- # Problem
77
- st.header("1. Problem")
78
- problem = st.text_area("Define your problem:")
 
 
 
 
 
 
 
 
79
 
80
- # Plan
81
- st.header("2. Plan")
82
- plan = st.text_area("Describe your plan:")
83
 
84
- # Data
85
- st.header("3. Data")
86
  data_input_method = st.radio("Choose data input method:", ("Upload File", "Manual Entry"))
87
 
88
  if data_input_method == "Upload File":
@@ -98,18 +138,8 @@ def main():
98
  st.write("Data Preview:")
99
  st.write(data.head())
100
 
101
- # Convert columns to numeric where possible
102
- for col in data.columns:
103
- try:
104
- data[col] = pd.to_numeric(data[col])
105
- except ValueError:
106
- pass # Keep as non-numeric if conversion fails
107
-
108
  perform_analysis(data)
109
 
110
- # Conclusion
111
- st.header("5. Conclusion")
112
- conclusion = st.text_area("Write your conclusion based on the analysis:")
113
-
114
  if __name__ == "__main__":
115
  main()
 
5
  import numpy as np
6
  from io import StringIO
7
  import openpyxl
8
+ from st_aggrid import AgGrid, GridUpdateMode
9
+ from st_aggrid.grid_options_builder import GridOptionsBuilder
10
 
11
  def load_data(file):
12
  file_extension = file.name.split('.')[-1].lower()
 
26
 
27
  if col_names:
28
  num_rows = st.number_input("Enter number of rows:", min_value=1, value=5)
29
+ data = pd.DataFrame(columns=col_names, index=range(num_rows))
 
 
 
 
 
 
30
 
31
+ gd = GridOptionsBuilder.from_dataframe(data)
32
+ gd.configure_default_column(editable=True)
33
+ gridoptions = gd.build()
34
+
35
+ grid_table = AgGrid(data, gridOptions=gridoptions,
36
+ update_mode=GridUpdateMode.VALUE_CHANGED,
37
+ height=400)
38
+
39
+ return grid_table['data']
40
  return None
41
 
42
+ def preprocess_data(data):
43
+ st.subheader("Data Preprocessing")
44
+
45
+ # Handle missing values
46
+ if data.isnull().sum().sum() > 0:
47
+ st.write("Handling missing values:")
48
+ for column in data.columns:
49
+ if data[column].isnull().sum() > 0:
50
+ method = st.selectbox(f"Choose method for {column}:",
51
+ ["Drop", "Fill with mean", "Fill with median", "Fill with mode"])
52
+ if method == "Drop":
53
+ data = data.dropna(subset=[column])
54
+ elif method == "Fill with mean":
55
+ data[column].fillna(data[column].mean(), inplace=True)
56
+ elif method == "Fill with median":
57
+ data[column].fillna(data[column].median(), inplace=True)
58
+ elif method == "Fill with mode":
59
+ data[column].fillna(data[column].mode()[0], inplace=True)
60
+
61
+ # Convert data types
62
+ for column in data.columns:
63
+ if data[column].dtype == 'object':
64
+ try:
65
+ data[column] = pd.to_numeric(data[column])
66
+ st.write(f"Converted {column} to numeric.")
67
+ except ValueError:
68
+ st.write(f"Kept {column} as categorical.")
69
 
70
+ return data
71
+
72
+ def perform_analysis(data):
73
+ st.header("Exploratory Data Analysis")
74
 
75
  # Summary statistics
76
  st.write("Summary Statistics:")
 
101
  sns.histplot(data[column], kde=True, ax=ax)
102
  st.pyplot(fig)
103
 
104
+ # Box plots for numerical columns
105
+ st.write("Box Plots:")
106
+ for column in numeric_data.columns:
107
+ fig, ax = plt.subplots()
108
+ sns.boxplot(data=data, y=column, ax=ax)
109
+ st.pyplot(fig)
110
 
111
+ # Bar plots for categorical columns
112
+ categorical_columns = data.select_dtypes(include=['object']).columns
113
+ if not categorical_columns.empty:
114
+ st.write("Bar Plots for Categorical Variables:")
115
+ for column in categorical_columns:
116
+ fig, ax = plt.subplots()
117
+ data[column].value_counts().plot(kind='bar', ax=ax)
118
+ plt.title(f"Distribution of {column}")
119
+ plt.xlabel(column)
120
+ plt.ylabel("Count")
121
+ st.pyplot(fig)
122
 
123
+ def main():
124
+ st.title("Interactive EDA Toolkit")
 
125
 
 
 
126
  data_input_method = st.radio("Choose data input method:", ("Upload File", "Manual Entry"))
127
 
128
  if data_input_method == "Upload File":
 
138
  st.write("Data Preview:")
139
  st.write(data.head())
140
 
141
+ data = preprocess_data(data)
 
 
 
 
 
 
142
  perform_analysis(data)
143
 
 
 
 
 
144
  if __name__ == "__main__":
145
  main()