Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
from io import StringIO | |
import openpyxl | |
def load_data(file): | |
file_extension = file.name.split('.')[-1].lower() | |
if file_extension == 'csv': | |
data = pd.read_csv(file) | |
elif file_extension in ['xls', 'xlsx']: | |
data = pd.read_excel(file) | |
else: | |
st.error("Unsupported file format. Please upload a CSV, XLS, or XLSX file.") | |
return None | |
return data | |
def manual_data_entry(): | |
st.subheader("Manual Data Entry") | |
col_names = st.text_input("Enter column names separated by commas:").split(',') | |
col_names = [name.strip() for name in col_names if name.strip()] | |
if col_names: | |
num_rows = st.number_input("Enter number of rows:", min_value=1, value=5) | |
data = [] | |
for i in range(num_rows): | |
row = [] | |
for col in col_names: | |
value = st.text_input(f"Enter value for {col} (Row {i+1}):") | |
row.append(value) | |
data.append(row) | |
return pd.DataFrame(data, columns=col_names) | |
return None | |
def perform_analysis(data): | |
st.header("4. Analysis") | |
# EDA | |
st.subheader("Exploratory Data Analysis") | |
# Summary statistics | |
st.write("Summary Statistics:") | |
st.write(data.describe()) | |
# Correlation heatmap | |
st.write("Correlation Heatmap:") | |
numeric_data = data.select_dtypes(include=['float64', 'int64']) | |
if not numeric_data.empty: | |
fig, ax = plt.subplots(figsize=(10, 8)) | |
sns.heatmap(numeric_data.corr(), annot=True, cmap='coolwarm', ax=ax) | |
st.pyplot(fig) | |
else: | |
st.write("No numeric columns available for correlation heatmap.") | |
# Pairplot | |
st.write("Pairplot:") | |
if not numeric_data.empty: | |
fig = sns.pairplot(numeric_data) | |
st.pyplot(fig) | |
else: | |
st.write("No numeric columns available for pairplot.") | |
# Histogram | |
st.write("Histograms:") | |
for column in numeric_data.columns: | |
fig, ax = plt.subplots() | |
sns.histplot(data[column], kde=True, ax=ax) | |
st.pyplot(fig) | |
def main(): | |
st.title("PPDAC Data Analysis Toolkit") | |
# Problem | |
st.header("1. Problem") | |
problem = st.text_area("Define your problem:") | |
# Plan | |
st.header("2. Plan") | |
plan = st.text_area("Describe your plan:") | |
# Data | |
st.header("3. Data") | |
data_input_method = st.radio("Choose data input method:", ("Upload File", "Manual Entry")) | |
if data_input_method == "Upload File": | |
uploaded_file = st.file_uploader("Choose a CSV, XLS, or XLSX file", type=["csv", "xls", "xlsx"]) | |
if uploaded_file is not None: | |
data = load_data(uploaded_file) | |
else: | |
data = None | |
else: | |
data = manual_data_entry() | |
if data is not None: | |
st.write("Data Preview:") | |
st.write(data.head()) | |
# Convert columns to numeric where possible | |
for col in data.columns: | |
try: | |
data[col] = pd.to_numeric(data[col]) | |
except ValueError: | |
pass # Keep as non-numeric if conversion fails | |
perform_analysis(data) | |
# Conclusion | |
st.header("5. Conclusion") | |
conclusion = st.text_area("Write your conclusion based on the analysis:") | |
if __name__ == "__main__": | |
main() |