File size: 4,482 Bytes
351b233
 
 
 
 
 
 
 
 
 
 
 
6e14a9e
 
351b233
 
 
 
6e14a9e
 
351b233
 
 
 
 
 
 
 
 
6e14a9e
351b233
 
 
 
 
 
 
 
 
 
 
 
 
6e14a9e
 
351b233
 
 
6e14a9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351b233
 
 
 
 
804bce3
351b233
6e14a9e
351b233
 
6e14a9e
 
351b233
 
 
 
 
 
 
 
 
 
 
6e14a9e
 
 
351b233
804bce3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4775126
 
 
 
 
e5eed51
 
 
4775126
 
6e14a9e
8364b94
8245559
804bce3
e5eed51
 
351b233
4775126
 
 
a04aaa9
75028e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import gradio as gr

df=pd.read_csv("mexican_medical_students_mental_health_data.csv")
df.head()
df.info
target=df.iloc[:,19:27].sum(axis=1)
df.insert(43,"gad_total",target)
df.head()
df.nunique() #Checking the number of unique values for primary keys or constants
df.isna().sum()#Missing values 
h_mean=df["height"].mean()
w_mean=df["weight"].mean()
age_mean=df["age"].mean()
g_mode=df["gender"].mode()[0]
r_mode=df["reported_sleep_hours"].mode()[0]
n_mode=df["nap_duration"].mode()[0]
p1=df["phq1"].mode()[0]
p2=df["phq2"].mode()[0]
p3=df["phq3"].mode()[0]
p4=df["phq4"].mode()[0]
p5=df["phq5"].mode()[0]
p6=df["phq6"].mode()[0]
p7=df["phq7"].mode()[0]
p8=df["phq8"].mode()[0]
p9=df["phq9"].mode()[0]
r_mode
df["height"].fillna(h_mean,inplace=True)
df["weight"].fillna(w_mean,inplace=True)
df["age"].fillna(age_mean,inplace=True)
df["gender"].fillna(g_mode,inplace=True)
df["phq1"].fillna(p1,inplace=True)
df["phq2"].fillna(p2,inplace=True)
df["phq3"].fillna(p3,inplace=True)
df["phq4"].fillna(p4,inplace=True)
df["phq5"].fillna(p5,inplace=True)
df["phq6"].fillna(p6,inplace=True)
df["phq7"].fillna(p7,inplace=True)
df["phq8"].fillna(p8,inplace=True)
df["phq9"].fillna(p9,inplace=True)
df["reported_sleep_hours"].fillna(r_mode,inplace=True)
df["nap_duration"].fillna(n_mode,inplace=True)

df.isna().sum()

import datetime
new=[]
for i in range(len(df["reported_sleep_hours"])):
    con=datetime.datetime.strptime(str(df["reported_sleep_hours"][i]),"%H:%M")
    t=float(con.minute/60)
    tot=float(con.hour)+t
    new.append(tot)
df.insert(44,"reported_sleep_in_hours",new)
new=[]
for i in range(len(df["nap_duration"])):
    con=datetime.datetime.strptime(str(df["nap_duration"][i]),"%H:%M")
    t=float(con.minute/60)
    tot=float(con.hour)+t
    new.append(tot)
df.insert(45,"nap_duration_hours",new)

from sklearn import preprocessing  
le= preprocessing.LabelEncoder()
df["gender"]=le.fit_transform(df["gender"])
df.head()
df.insert(44,"epw_total",target)

# In[22]:


X=df[["age","gender","height","weight","phq1","phq2","phq3","phq4","phq5","phq6","phq7","phq8","phq9","reported_sleep_in_hours","nap_duration_hours"]]
y=df["gad_total"]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=21)

model=LinearRegression()

model.fit(X_train,y_train)
print("Training complete.")

r2_score=model.score(X_test,y_test)
print(r2_score*100,"%")

y_pred = model.predict(X_test)
print('Coefficients: \n', model.coef_)
print("Mean squared error: %.2f" % np.mean((model.predict(X_test) - y_test) ** 2))

#epw
X=df[["age","gender","height","weight","phq1","phq2","phq3","phq4","phq5","phq6","phq7","phq8","phq9","reported_sleep_in_hours","nap_duration_hours"]]
y=df["epw_total"]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=21)

modelepw=LinearRegression()
modelepw.fit(X_train,y_train)
print("Training complete.")


# In[79]:


r2_score=modelepw.score(X_test,y_test)
print(r2_score*100,"%")


# In[80]:


y_pred = modelepw.predict(X_test)
print('Coefficients: \n', modelepw.coef_)
# The mean squared error
print("Mean squared error: %.2f" % np.mean((modelepw.predict(X_test) - y_test) ** 2))

#local Testing
# input = "25,1,10,70,3,3,3,2,3,3,3,3,2,24,2";
# temp =np.array(input.split(","), dtype=float) 
# y = model.predict([[temp[0],temp[1],temp[2],temp[3],temp[4],temp[5],temp[6],temp[7],temp[8],temp[9],temp[10],temp[11],temp[12],temp[13],temp[14]]])
# z = modelepw.predict([[temp[0],temp[1],temp[2],temp[3],temp[4],temp[5],temp[6],temp[7],temp[8],temp[9],temp[10],temp[11],temp[12],temp[13],temp[14]]])
# output = str(y)+ "," +str(z)

# print(output)


def greet(input):
    temp =np.array(input.split(","), dtype=float) 
    y = model.predict([[temp[0],temp[1],temp[2],temp[3],temp[4],temp[5],temp[6],temp[7],temp[8],temp[9],temp[10],temp[11],temp[12],temp[13],temp[14]]])
    z = modelepw.predict([[temp[0],temp[1],temp[2],temp[3],temp[4],temp[5],temp[6],temp[7],temp[8],temp[9],temp[10],temp[11],temp[12],temp[13],temp[14]]])
    output = str(y)+ "," +str(z)
    return output

textbox_x = gr.Textbox(label="Enter Input:", placeholder="25,1,10,70,3,3,3,2,3,3,3,3,2,24,2", lines=1)
textbox_y = gr.Textbox(label="Variable Y:", placeholder ="5.0000000", lines=1)
grIn = gr.Interface(title = "Your Name Regeression ",fn=greet, inputs=textbox_x, outputs=textbox_y)

grIn.launch()