import streamlit as st import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from io import StringIO def main(): st.title("PPDAC Data Analysis Toolkit") # Problem st.header("1. Problem") problem = st.text_area("Define your problem:") # Plan st.header("2. Plan") plan = st.text_area("Describe your plan:") # Data st.header("3. Data") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: data = pd.read_csv(uploaded_file) st.write(data.head()) # Analysis st.header("4. Analysis") # EDA st.subheader("Exploratory Data Analysis") # Summary statistics st.write("Summary Statistics:") st.write(data.describe()) # Correlation heatmap st.write("Correlation Heatmap:") fig, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(data.corr(), annot=True, cmap='coolwarm', ax=ax) st.pyplot(fig) # Pairplot st.write("Pairplot:") fig = sns.pairplot(data) st.pyplot(fig) # Histogram st.write("Histograms:") for column in data.select_dtypes(include=['float64', 'int64']).columns: fig, ax = plt.subplots() sns.histplot(data[column], kde=True, ax=ax) st.pyplot(fig) # Conclusion st.header("5. Conclusion") conclusion = st.text_area("Write your conclusion based on the analysis:") if __name__ == "__main__": main()