JUNGU commited on
Commit
f076a08
·
verified ·
1 Parent(s): dc3cf99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -29
app.py CHANGED
@@ -2,7 +2,73 @@ import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
 
5
  from io import StringIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def main():
8
  st.title("PPDAC Data Analysis Toolkit")
@@ -17,39 +83,29 @@ def main():
17
 
18
  # Data
19
  st.header("3. Data")
20
- uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
21
 
22
- if uploaded_file is not None:
23
- data = pd.read_csv(uploaded_file)
 
 
 
 
 
 
 
 
 
24
  st.write(data.head())
25
 
26
- # Analysis
27
- st.header("4. Analysis")
28
-
29
- # EDA
30
- st.subheader("Exploratory Data Analysis")
31
-
32
- # Summary statistics
33
- st.write("Summary Statistics:")
34
- st.write(data.describe())
35
-
36
- # Correlation heatmap
37
- st.write("Correlation Heatmap:")
38
- fig, ax = plt.subplots(figsize=(10, 8))
39
- sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
40
- st.pyplot(fig)
41
-
42
- # Pairplot
43
- st.write("Pairplot:")
44
- fig = sns.pairplot(data)
45
- st.pyplot(fig)
46
 
47
- # Histogram
48
- st.write("Histograms:")
49
- for column in data.select_dtypes(include=['float64', 'int64']).columns:
50
- fig, ax = plt.subplots()
51
- sns.histplot(data[column], kde=True, ax=ax)
52
- st.pyplot(fig)
53
 
54
  # Conclusion
55
  st.header("5. Conclusion")
 
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
+ import numpy as np
6
  from io import StringIO
7
+ import openpyxl
8
+
9
+ def load_data(file):
10
+ file_extension = file.name.split('.')[-1].lower()
11
+ if file_extension == 'csv':
12
+ data = pd.read_csv(file)
13
+ elif file_extension in ['xls', 'xlsx']:
14
+ data = pd.read_excel(file)
15
+ else:
16
+ st.error("Unsupported file format. Please upload a CSV, XLS, or XLSX file.")
17
+ return None
18
+ return data
19
+
20
+ def manual_data_entry():
21
+ st.subheader("Manual Data Entry")
22
+ col_names = st.text_input("Enter column names separated by commas:").split(',')
23
+ col_names = [name.strip() for name in col_names if name.strip()]
24
+
25
+ if col_names:
26
+ num_rows = st.number_input("Enter number of rows:", min_value=1, value=5)
27
+ data = []
28
+ for i in range(num_rows):
29
+ row = []
30
+ for col in col_names:
31
+ value = st.text_input(f"Enter value for {col} (Row {i+1}):")
32
+ row.append(value)
33
+ data.append(row)
34
+
35
+ return pd.DataFrame(data, columns=col_names)
36
+ return None
37
+
38
+ def perform_analysis(data):
39
+ st.header("4. Analysis")
40
+
41
+ # EDA
42
+ st.subheader("Exploratory Data Analysis")
43
+
44
+ # Summary statistics
45
+ st.write("Summary Statistics:")
46
+ st.write(data.describe())
47
+
48
+ # Correlation heatmap
49
+ st.write("Correlation Heatmap:")
50
+ numeric_data = data.select_dtypes(include=['float64', 'int64'])
51
+ if not numeric_data.empty:
52
+ fig, ax = plt.subplots(figsize=(10, 8))
53
+ sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm', ax=ax)
54
+ st.pyplot(fig)
55
+ else:
56
+ st.write("No numeric columns available for correlation heatmap.")
57
+
58
+ # Pairplot
59
+ st.write("Pairplot:")
60
+ if not numeric_data.empty:
61
+ fig = sns.pairplot(numeric_data)
62
+ st.pyplot(fig)
63
+ else:
64
+ st.write("No numeric columns available for pairplot.")
65
+
66
+ # Histogram
67
+ st.write("Histograms:")
68
+ for column in numeric_data.columns:
69
+ fig, ax = plt.subplots()
70
+ sns.histplot(data[column], kde=True, ax=ax)
71
+ st.pyplot(fig)
72
 
73
  def main():
74
  st.title("PPDAC Data Analysis Toolkit")
 
83
 
84
  # Data
85
  st.header("3. Data")
86
+ data_input_method = st.radio("Choose data input method:", ("Upload File", "Manual Entry"))
87
 
88
+ if data_input_method == "Upload File":
89
+ uploaded_file = st.file_uploader("Choose a CSV, XLS, or XLSX file", type=["csv", "xls", "xlsx"])
90
+ if uploaded_file is not None:
91
+ data = load_data(uploaded_file)
92
+ else:
93
+ data = None
94
+ else:
95
+ data = manual_data_entry()
96
+
97
+ if data is not None:
98
+ st.write("Data Preview:")
99
  st.write(data.head())
100
 
101
+ # Convert columns to numeric where possible
102
+ for col in data.columns:
103
+ try:
104
+ data[col] = pd.to_numeric(data[col])
105
+ except ValueError:
106
+ pass # Keep as non-numeric if conversion fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ perform_analysis(data)
 
 
 
 
 
109
 
110
  # Conclusion
111
  st.header("5. Conclusion")