import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from io import StringIO
import openpyxl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

# 한글 폰트 설정
def set_font():
    font_path = "Pretendard-Bold.ttf"  
    fm.fontManager.addfont(font_path)
    plt.rcParams['font.family'] = 'Pretendard-Bold'  # 
    plt.rcParams['axes.unicode_minus'] = False  # 마이너스 기호 깨짐 방지

# Streamlit 설정에 폰트 적용
st.set_option('deprecation.showPyplotGlobalUse', False)
set_font()

def load_data(file):
    file_extension = file.name.split('.')[-1].lower()
    if file_extension == 'csv':
        data = pd.read_csv(file)
    elif file_extension in ['xls', 'xlsx']:
        data = pd.read_excel(file)
    else:
        st.error("지원되지 않는 파일 형식입니다. CSV, XLS, 또는 XLSX 파일을 업로드해주세요.")
        return None
    return data

def manual_data_entry():
    st.subheader("수동 데이터 입력")
    col_names = st.text_input("열 이름을 쉼표로 구분하여 입력하세요:").split(',')
    col_names = [name.strip() for name in col_names if name.strip()]
    
    if col_names:
        num_rows = st.number_input("초기 행의 수를 입력하세요:", min_value=1, value=5)
        data = pd.DataFrame(columns=col_names, index=range(num_rows))
        
        edited_data = st.data_editor(data, num_rows="dynamic")
        
        return edited_data
    return None

def preprocess_data(data):
    st.subheader("데이터 전처리")
    
    # 결측치 처리
    if data.isnull().sum().sum() > 0:
        st.write("결측치 처리:")
        for column in data.columns:
            if data[column].isnull().sum() > 0:
                method = st.selectbox(f"{column} 열의 처리 방법 선택:", 
                                      ["제거", "평균으로 대체", "중앙값으로 대체", "최빈값으로 대체"])
                if method == "제거":
                    data = data.dropna(subset=[column])
                elif method == "평균으로 대체":
                    data[column].fillna(data[column].mean(), inplace=True)
                elif method == "중앙값으로 대체":
                    data[column].fillna(data[column].median(), inplace=True)
                elif method == "최빈값으로 대체":
                    data[column].fillna(data[column].mode()[0], inplace=True)
    
    # 데이터 타입 변환
    for column in data.columns:
        if data[column].dtype == 'object':
            try:
                data[column] = pd.to_numeric(data[column])
                st.write(f"{column} 열을 숫자형으로 변환했습니다.")
            except ValueError:
                st.write(f"{column} 열은 범주형으로 유지됩니다.")
    
    return data

def perform_analysis(data):
    st.header("탐색적 데이터 분석")
    
    # 요약 통계
    st.write("요약 통계:")
    st.write(data.describe())

    # 상관관계 히트맵
    st.write("상관관계 히트맵:")
    numeric_data = data.select_dtypes(include=['float64', 'int64'])
    if not numeric_data.empty:
        fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
        fig.update_layout(title='상관관계 히트맵')
        st.plotly_chart(fig)
    else:
        st.write("상관관계 히트맵을 그릴 수 있는 숫자형 열이 없습니다.")

    # 산점도 행렬
    st.write("산점도 행렬:")
    if not numeric_data.empty:
        fig = px.scatter_matrix(numeric_data)
        fig.update_layout(title='산점도 행렬')
        st.plotly_chart(fig)
    else:
        st.write("산점도 행렬을 그릴 수 있는 숫자형 열이 없습니다.")

    # 히스토그램
    st.write("히스토그램:")
    for column in numeric_data.columns:
        fig = px.histogram(data, x=column, marginal='box')
        fig.update_layout(title=f'{column} 히스토그램')
        st.plotly_chart(fig)

    # 박스플롯
    st.write("박스플롯:")
    for column in numeric_data.columns:
        fig = px.box(data, y=column)
        fig.update_layout(title=f'{column} 박스플롯')
        st.plotly_chart(fig)

    # 범주형 변수 막대 그래프
    categorical_columns = data.select_dtypes(include=['object']).columns
    if not categorical_columns.empty:
        st.write("범주형 변수 막대 그래프:")
        for column in categorical_columns:
            value_counts = data[column].value_counts().reset_index()
            value_counts.columns = ['category', 'count']
            fig = px.bar(value_counts, x='category', y='count', title=f'{column} 분포')
            fig.update_layout(xaxis_title=column, yaxis_title='개수')
            st.plotly_chart(fig)

def main():
    st.title("인터랙티브 EDA 툴킷")

    data_input_method = st.radio("데이터 입력 방법 선택:", ("파일 업로드", "수동 입력"))
    
    if data_input_method == "파일 업로드":
        uploaded_file = st.file_uploader("CSV, XLS, 또는 XLSX 파일을 선택하세요", type=["csv", "xls", "xlsx"])
        if uploaded_file is not None:
            data = load_data(uploaded_file)
        else:
            data = None
    else:
        data = manual_data_entry()
    
    if data is not None:
        st.subheader("데이터 미리보기 및 수정")
        st.write("데이터를 확인하고 필요한 경우 수정하세요:")
        edited_data = st.data_editor(data, num_rows="dynamic")
        
        if st.button("데이터 분석 시작"):
            processed_data = preprocess_data(edited_data)
            perform_analysis(processed_data)

if __name__ == "__main__":
    main()