File size: 1,560 Bytes
92a085a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO

def main():
    st.title("PPDAC Data Analysis Toolkit")

    # Problem
    st.header("1. Problem")
    problem = st.text_area("Define your problem:")

    # Plan
    st.header("2. Plan")
    plan = st.text_area("Describe your plan:")

    # Data
    st.header("3. Data")
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
    
    if uploaded_file is not None:
        data = pd.read_csv(uploaded_file)
        st.write(data.head())

        # Analysis
        st.header("4. Analysis")
        
        # EDA
        st.subheader("Exploratory Data Analysis")
        
        # Summary statistics
        st.write("Summary Statistics:")
        st.write(data.describe())

        # Correlation heatmap
        st.write("Correlation Heatmap:")
        fig, ax = plt.subplots(figsize=(10, 8))
        sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax)
        st.pyplot(fig)

        # Pairplot
        st.write("Pairplot:")
        fig = sns.pairplot(data)
        st.pyplot(fig)

        # Histogram
        st.write("Histograms:")
        for column in data.select_dtypes(include=['float64', 'int64']).columns:
            fig, ax = plt.subplots()
            sns.histplot(data[column], kde=True, ax=ax)
            st.pyplot(fig)

        # Conclusion
        st.header("5. Conclusion")
        conclusion = st.text_area("Write your conclusion based on the analysis:")

if __name__ == "__main__":
    main()