File size: 3,276 Bytes
a6dd78d
 
 
 
 
 
 
197c7b3
a6dd78d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
## Imports
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

## Load Dataset
data = pd.read_csv('/content/drive/MyDrive/LoanApprovalPrediction.csv')

## Data Preprocessing 
# Replace NaN with a estimate value accordingly
# Fill empty values of Dependant,Loan Amount,Loan_Amount_Term as it's numeric and float dtype
data['Dependents'].fillna(data['Dependents'].median(),inplace=True)
data['LoanAmount'].fillna(data['LoanAmount'].median(),inplace=True)
data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].median(),inplace=True)

# Fill Empty Credit History with mode as its categorical
data['Credit_History'].fillna(data['Credit_History'].mode()[0],inplace=True)

# Dropping UnWanted Loan_ID column
data.drop(['Loan_ID'],axis=1,inplace=True)

# Changing Data Types of Columns 
data['Dependents']=data['Dependents'].astype(int)
data['ApplicantIncome']=data['ApplicantIncome'].astype(int)
data['CoapplicantIncome']=data['CoapplicantIncome'].astype(int)
data['LoanAmount']=data['LoanAmount'].astype(int)
data['Loan_Amount_Term']=data['Loan_Amount_Term'].astype(int)
data['Credit_History'] = data['Credit_History'].astype(int)

# Categorical to Numerical Value Conversion
data['Gender']=data.Gender.apply(lambda x:1 if x=='Male' else 0 )
data['Education'] = data.Education.apply(lambda x:0 if x=='Graduate' else 1)
data['Married'] = data.Education.apply(lambda x:0 if x=='Yes' else 1)
data['Self_Employed'] = data.Education.apply(lambda x:0 if x=='Yes' else 1)
Prop_area = {'Urban':0,'Semiurban':1,'Rural':2}
data['Property_Area'] = data['Property_Area'].map(Prop_area)

## Train Test Split
X=data.drop('Loan_Status',1)
y=data.Loan_Status.apply(lambda x:0 if x=='Y' else 1) # Can use pd.get_dummies to reduce code
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,test_size=0.3,random_state=42)

## Parameter Efficient Logestic Regression Model Training By GridSearchCV  
LG = LogisticRegression()
parameter = {'penalty':['l1','l2','elasticnet'],'C':[1,2,5,10,20,25,30,40,50],'max_iter':[100,150,200,250]}
Eff_log_reg=GridSearchCV(estimator=LG,param_grid=parameter,scoring='accuracy',cv=5)
Log_Model = Eff_log_reg.fit(X_train,y_train)

## Gradio App
def input(gender,married,dependents,education,self_employed,app_income,co_app_income,loan_amount,Loan_term,credit,area):  
  input = [gender,married,dependents,education,self_employed,app_income,co_app_income,loan_amount,Loan_term,credit,area]
  output = lm.predict([input])
  return int(output)

demo = gr.Interface(
    input,
    [
        gradio.Checkbox(['Male','Female'],label=Gender,max_choice=1),
        gr.Slider(minimum=600, maximum=7000, randomize=True, step = 1,label="Living Area"),
        gr.Slider(minimum=1, maximum=8, randomize=True,step = 1, label="Number of Bedrooms"),
        gr.Slider(minimum=1, maximum=5, randomize=True,step = 1, label="Number of Bathrooms"),
        gr.Slider(minimum=1,maximum=3.5,randomize=True,step=0.5,label="Number of stories/Floors")
    ],
    "number",
       examples=[
        [1000, 600, 1, 1, 1],
        [2000,1200,2,3,1],
        [4000,1900,2,3,2],
        [28000,3000,5,3,3],
    ],