File size: 3,287 Bytes
1bd8d17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from operator import index
import streamlit as st
import plotly.express as px
import numpy as np
from pycaret.regression import setup, compare_models, pull, save_model, load_model, plot_model
# from lazypredict.Supervised import LazyRegressor
# from sklearn.model_selection import train_test_split
from pandas_profiling import ProfileReport
import pandas as pd
from streamlit_pandas_profiling import st_profile_report
import os 

@st.cache
def load_data():
    return pd.read_csv('dataset.csv', index_col=None)

# Rest of your code...

if os.path.exists('./dataset.csv'): 
    df = load_data()

with st.sidebar: 
    st.image("https://michael-fuchs-python.netlify.app/post/2022-01-01-automl-using-pycaret-classification_files/p133s1.png")
    st.title("AutoML")
    choice = st.radio("Navigation", ["Upload","Profiling","Modelling", "Download"])
    st.info("This project application helps you build and explore your data.")

if choice == "Upload":
    st.title("Upload Your Dataset")
    file = st.file_uploader("Upload Your Dataset")
    if file: 
        df = pd.read_csv(file, index_col=None)
        df.to_csv('dataset.csv', index=None)
        st.dataframe(df)

if choice == "Profiling": 
    st.title("Exploratory Data Analysis")
    profile_df = df.profile_report()
    st_profile_report(profile_df)

if choice == "Modelling": 
    chosen_target = st.selectbox('Choose the Target Column', df.columns)
    if st.button('Run Modelling'): 
        print("WIP")
        # h2o.init()
        # df = h2o.import_file(df)
        # df.describe(chunk_summary=True)
        # train, test = df.split_frame(ratios=[0.8], seed = 1)
        # aml = H2OAutoML(max_models =25,
        #         balance_classes=True,
		# seed =16548846)
        # aml.train(training_frame = train, y = 'y')
        # lb = aml.leaderboard
        # lb.head(rows=lb.nrows)
        # aml.train(training_frame = train, y = 'y', leaderboard_frame = my_leaderboard_frame)
        # best_model = aml.get_best_model()
        # model_path = h2o.save_model(model=best_model,force=True)
        setup(df.dropna(subset=chosen_target), target=chosen_target, session_id = 2774764,imputation_type = 'simple',numeric_imputation='mean',categorical_imputation='mode')
        setup_df = pull()
        st.dataframe(setup_df)
        best_model = compare_models(n_select = 5)
        compare_df = pull()
        st.dataframe(compare_df)
        plot_model(best_model, plot='residuals', display_format='streamlit')
        plot_model(best_model, plot='feature', display_format='streamlit')
        plot_model(best_model, plot='error', display_format='streamlit')
        save_model(best_model, 'best_model')
        # y = df[chosen_target]
        # X = df.loc[:, df.columns!=chosen_target]
        # X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=.5,random_state =65481254)
        # reg = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )
        # models,predictions = reg.fit(X_train, X_test, y_train, y_test)
        # st.dataframe(models)
        # model_dictionary = reg.provide_models(X_train,X_test,y_train,y_test)

if choice == "Download": 
    print("Working")
    with open('best_model.pkl', 'rb') as f: 
        st.download_button('Download Model', f, file_name="best_model.pkl")