PPPDC_example / app.py
JUNGU's picture
Update app.py
f076a08 verified
raw
history blame
3.41 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from io import StringIO
import openpyxl
def load_data(file):
file_extension = file.name.split('.')[-1].lower()
if file_extension == 'csv':
data = pd.read_csv(file)
elif file_extension in ['xls', 'xlsx']:
data = pd.read_excel(file)
else:
st.error("Unsupported file format. Please upload a CSV, XLS, or XLSX file.")
return None
return data
def manual_data_entry():
st.subheader("Manual Data Entry")
col_names = st.text_input("Enter column names separated by commas:").split(',')
col_names = [name.strip() for name in col_names if name.strip()]
if col_names:
num_rows = st.number_input("Enter number of rows:", min_value=1, value=5)
data = []
for i in range(num_rows):
row = []
for col in col_names:
value = st.text_input(f"Enter value for {col} (Row {i+1}):")
row.append(value)
data.append(row)
return pd.DataFrame(data, columns=col_names)
return None
def perform_analysis(data):
st.header("4. Analysis")
# EDA
st.subheader("Exploratory Data Analysis")
# Summary statistics
st.write("Summary Statistics:")
st.write(data.describe())
# Correlation heatmap
st.write("Correlation Heatmap:")
numeric_data = data.select_dtypes(include=['float64', 'int64'])
if not numeric_data.empty:
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm', ax=ax)
st.pyplot(fig)
else:
st.write("No numeric columns available for correlation heatmap.")
# Pairplot
st.write("Pairplot:")
if not numeric_data.empty:
fig = sns.pairplot(numeric_data)
st.pyplot(fig)
else:
st.write("No numeric columns available for pairplot.")
# Histogram
st.write("Histograms:")
for column in numeric_data.columns:
fig, ax = plt.subplots()
sns.histplot(data[column], kde=True, ax=ax)
st.pyplot(fig)
def main():
st.title("PPDAC Data Analysis Toolkit")
# Problem
st.header("1. Problem")
problem = st.text_area("Define your problem:")
# Plan
st.header("2. Plan")
plan = st.text_area("Describe your plan:")
# Data
st.header("3. Data")
data_input_method = st.radio("Choose data input method:", ("Upload File", "Manual Entry"))
if data_input_method == "Upload File":
uploaded_file = st.file_uploader("Choose a CSV, XLS, or XLSX file", type=["csv", "xls", "xlsx"])
if uploaded_file is not None:
data = load_data(uploaded_file)
else:
data = None
else:
data = manual_data_entry()
if data is not None:
st.write("Data Preview:")
st.write(data.head())
# Convert columns to numeric where possible
for col in data.columns:
try:
data[col] = pd.to_numeric(data[col])
except ValueError:
pass # Keep as non-numeric if conversion fails
perform_analysis(data)
# Conclusion
st.header("5. Conclusion")
conclusion = st.text_area("Write your conclusion based on the analysis:")
if __name__ == "__main__":
main()