geographicalcollins commited on
Commit
ffbf114
·
1 Parent(s): d994772

first commit

Browse files
Files changed (2) hide show
  1. main.py +455 -0
  2. requirements.txt +107 -0
main.py ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from numpy import cov
2
+ import streamlit as st
3
+ st.set_page_config(layout = "wide")
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+
7
+ import numpy as np
8
+ import seaborn as sns
9
+ sns.set(style='white',color_codes=True)
10
+
11
+ from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error
12
+ from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error
13
+
14
+ from scipy.optimize import minimize
15
+ import statsmodels.tsa.api as smt
16
+ import statsmodels.api as sm
17
+
18
+ from tqdm import tqdm_notebook
19
+ from tqdm.notebook import tqdm
20
+
21
+
22
+ from itertools import product
23
+
24
+
25
+ header=st.container()
26
+ dataset=st.container()
27
+ data_exploration_with_cleaning=st.container()
28
+ features=st.container()
29
+ modelTraining=st.container()
30
+ covid_relationship=st.container()
31
+
32
+ mystyle = '''
33
+ <style>
34
+ .main {
35
+ background_color:#FFCCFF;
36
+ }
37
+ </style>
38
+ '''
39
+
40
+ # @st.cache(allow_output_mutation=True)
41
+ def load_data(filename):
42
+ covid_data=pd.read_csv(filename)
43
+ return covid_data
44
+
45
+ with header:
46
+ st.title('Covid-19 Analysis for predictive analytics')
47
+ st.text('Aims to provide appropriate analytics and showcasing the relationship between different diseases and covid 19')
48
+
49
+ with dataset:
50
+ st.subheader('Dataset 1:ISDH - VR or NBS covid dataset as of July 4, 2022, 9:37 PM (UTC+03:00)')
51
+ st.subheader('Dataset 2: cdv.gov dataset')
52
+ covid_data=load_data('data/covid.csv')
53
+ covid_data.rename(columns = {'_id':'id', 'date':'date', 'agegrp':'age_group'},inplace=True)
54
+ covid_data['date'] = covid_data['date'].str[:-9]
55
+
56
+ covid_data['date'] = pd.to_datetime(covid_data['date'])
57
+
58
+ st.write(covid_data.head(5))
59
+
60
+ with data_exploration_with_cleaning:
61
+ st.subheader('Data exploratory and cleaning')
62
+ nRow, nCol = covid_data.shape
63
+ st.write('* **Shape of our data is :** ', nRow, nCol )
64
+ summary=covid_data.describe()
65
+ st.write('* **Statistical summary :** ', summary)
66
+ a=covid_data.isnull().sum()
67
+ st.write('* **Checking for null values** ', a)
68
+ w=covid_data['age_group'].unique()
69
+ st.write('* **Age Group categories** ', w)
70
+ with features:
71
+ st.subheader('Features of the dataset')
72
+
73
+
74
+ covid_data.drop("id", axis=1, inplace=True)
75
+
76
+ covid_data.to_csv('data/cleaned_data.csv',index=False)
77
+
78
+ dat=pd.read_csv('data/cleaned_data.csv')
79
+ dat['date']= pd.to_datetime(dat['date'])
80
+ dat.to_csv('data/cleaned_data.csv',index=False)
81
+ data=pd.read_csv('data/cleaned_data.csv',index_col=['date'], parse_dates=['date'])
82
+
83
+ group1 = data.loc[data['age_group'] == '0-19']
84
+ group2 = data.loc[data['age_group'] == '20-29']
85
+ group3 = data.loc[data['age_group'] == '30-39']
86
+ group4 = data.loc[data['age_group'] == '40-49']
87
+ group5 = data.loc[data['age_group'] == '50-59']
88
+ group6 = data.loc[data['age_group'] == '60-69']
89
+ group7 = data.loc[data['age_group'] == '70-79']
90
+ group8 = data.loc[data['age_group'] == '80+']
91
+
92
+ a=plt.figure(figsize=(17, 8))
93
+ plt.plot(group1.covid_deaths)
94
+ plt.title('Infection Rate in 0-19 Years')
95
+ plt.ylabel('Number of Infection')
96
+ plt.xlabel('Period')
97
+ plt.grid(False)
98
+ # plt.show()
99
+
100
+ b=plt.figure(figsize=(17, 8))
101
+ plt.plot(group2.covid_deaths)
102
+ plt.title('Infection Rate in 20-29 Years')
103
+ plt.ylabel('Number of Infection')
104
+ plt.xlabel('Period')
105
+ plt.grid(False)
106
+ # plt.show()
107
+
108
+ c=plt.figure(figsize=(17, 8))
109
+ plt.plot(group3.covid_deaths)
110
+ plt.title('Infection Rate in 30-39 Years')
111
+ plt.ylabel('Number of Infection')
112
+ plt.xlabel('Period')
113
+ plt.grid(False)
114
+ # plt.show()
115
+
116
+ d=plt.figure(figsize=(17, 8))
117
+ plt.plot(group4.covid_deaths)
118
+ plt.title('Infection Rate in 40-49 Years')
119
+ plt.ylabel('Number of Infection')
120
+ plt.xlabel('Period')
121
+ plt.grid(False)
122
+ # plt.show()
123
+
124
+ e=plt.figure(figsize=(17, 8))
125
+ plt.plot(group5.covid_deaths)
126
+ plt.title('Infection Rate in 50-59 Years')
127
+ plt.ylabel('Number of Infection')
128
+ plt.xlabel('Period')
129
+ plt.grid(False)
130
+ # plt.show()
131
+
132
+ f=plt.figure(figsize=(17, 8))
133
+ plt.plot(group6.covid_deaths)
134
+ plt.title('Infection Rate in 60-69 Years')
135
+ plt.ylabel('Number of Infection')
136
+ plt.xlabel('Period')
137
+ plt.grid(False)
138
+ # plt.show()
139
+
140
+ g=plt.figure(figsize=(17, 8))
141
+ plt.plot(group7.covid_deaths)
142
+ plt.title('Infection Rate in 70-79 Years')
143
+ plt.ylabel('Number of Infection')
144
+ plt.xlabel('Period')
145
+ plt.grid(False)
146
+ # plt.show()
147
+
148
+ h=plt.figure(figsize=(17, 8))
149
+ plt.plot(group8.covid_deaths)
150
+ plt.title('Infection Rate in 80-89 Years')
151
+ plt.ylabel('Number of Infection')
152
+ plt.xlabel('Period')
153
+ plt.grid(False)
154
+ # plt.show()
155
+
156
+ st.pyplot(a)
157
+ st.pyplot(b)
158
+ st.pyplot(c)
159
+ st.pyplot(d)
160
+ st.pyplot(e)
161
+ st.pyplot(f)
162
+ st.pyplot(g)
163
+ st.pyplot(h)
164
+
165
+
166
+ with modelTraining:
167
+ st.subheader('model training')
168
+
169
+ st.write('MODELLING WITH 60-69 years')
170
+
171
+ def plot_moving_average(series, window, plot_intervals=False, scale=1.96):
172
+ rolling_mean = series.rolling(window=window).mean()
173
+
174
+ aa=plt.figure(figsize=(12,8))
175
+ plt.title('Moving average\n window size = {}'.format(window))
176
+ plt.plot(rolling_mean, 'g', label='Rolling mean trend')
177
+
178
+ #Plot confidence intervals for smoothed values
179
+ if plot_intervals:
180
+ mae = mean_absolute_error(series[window:], rolling_mean[window:])
181
+ deviation = np.std(series[window:] - rolling_mean[window:])
182
+ lower_bound = rolling_mean - (mae + scale * deviation)
183
+ upper_bound = rolling_mean + (mae + scale * deviation)
184
+ plt.plot(upper_bound, 'r--', label='Upper bound / Lower bound')
185
+ plt.plot(lower_bound, 'r--')
186
+
187
+ plt.plot(series[window:], label='Actual values')
188
+ plt.legend(loc='best')
189
+ plt.grid(True)
190
+ st.pyplot(aa)
191
+
192
+
193
+ #Smooth by the previous 5 days (by week)
194
+ plot_moving_average(group6.covid_deaths, 5)
195
+
196
+ #Smooth by the previous month (30 days)
197
+ plot_moving_average(group6.covid_deaths, 30)
198
+
199
+ #Smooth by previous quarter (90 days)
200
+ plot_moving_average(group6.covid_deaths, 60, plot_intervals=True)
201
+
202
+ st.write("Using Exponential smoothening")
203
+ st.markdown('* Determines how fast the weight decreases from previous observations')
204
+ def exponential_smoothing(series, alpha):
205
+
206
+ result = [series[0]] # first value is same as series
207
+ for n in range(1, len(series)):
208
+ result.append(alpha * series[n] + (1 - alpha) * result[n-1])
209
+ return result
210
+
211
+ def plot_exponential_smoothing(series, alphas):
212
+
213
+ bb=plt.figure(figsize=(12, 8))
214
+ for alpha in alphas:
215
+ plt.plot(exponential_smoothing(series, alpha), label="Alpha {}".format(alpha))
216
+ plt.plot(series.values, "c", label = "Actual")
217
+ plt.legend(loc="best")
218
+ plt.axis('tight')
219
+ plt.title("Exponential Smoothing")
220
+ plt.grid(True)
221
+ st.pyplot(bb)
222
+
223
+ plot_exponential_smoothing(group6.covid_deaths, [0.05, 0.2])
224
+
225
+ def double_exponential_smoothing(series, alpha, beta):
226
+
227
+ result = [series[0]]
228
+ for n in range(1, len(series)+1):
229
+ if n == 1:
230
+ level, trend = series[0], series[1] - series[0]
231
+ if n >= len(series): # forecasting
232
+ value = result[-1]
233
+ else:
234
+ value = series[n]
235
+ last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
236
+ trend = beta * (level - last_level) + (1 - beta) * trend
237
+ result.append(level + trend)
238
+ return result
239
+
240
+ def plot_double_exponential_smoothing(series, alphas, betas):
241
+
242
+ cc=plt.figure(figsize=(17, 8))
243
+ for alpha in alphas:
244
+ for beta in betas:
245
+ plt.plot(double_exponential_smoothing(series, alpha, beta), label="Alpha {}, beta {}".format(alpha, beta))
246
+ plt.plot(series.values, label = "Actual")
247
+ plt.legend(loc="best")
248
+ plt.axis('tight')
249
+ plt.title("Double Exponential Smoothing")
250
+ plt.grid(True)
251
+ st.pyplot(cc)
252
+
253
+
254
+ plot_double_exponential_smoothing(group6.covid_deaths, alphas=[0.9, 0.02], betas=[0.9, 0.02])
255
+
256
+ st.subheader("USING SARIMA MODEL")
257
+ def tsplot(y, lags=None, figsize=(12, 7), style='bmh'):
258
+
259
+ if not isinstance(y, pd.Series):
260
+ y = pd.Series(y)
261
+
262
+ with plt.style.context(style='bmh'):
263
+ fig = plt.figure(figsize=figsize)
264
+ layout = (2,2)
265
+ ts_ax = plt.subplot2grid(layout, (0,0), colspan=2)
266
+ acf_ax = plt.subplot2grid(layout, (1,0))
267
+ pacf_ax = plt.subplot2grid(layout, (1,1))
268
+
269
+ y.plot(ax=ts_ax)
270
+ p_value = sm.tsa.stattools.adfuller(y)[1]
271
+ ts_ax.set_title('Time Series Analysis Plots\n Dickey-Fuller: p={0:.5f}'.format(p_value))
272
+ smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
273
+ smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
274
+ plt.tight_layout()
275
+ st.pyplot(fig)
276
+ tsplot(group6.covid_deaths, lags=30)
277
+
278
+ # Take the first difference to remove to make the process stationary
279
+ data_diff = group6.covid_deaths - group6.covid_deaths.shift(1)
280
+
281
+ tsplot(data_diff[1:], lags=30)
282
+
283
+ import warnings
284
+ warnings.filterwarnings("ignore",category=FutureWarning)
285
+ #Set initial values and some bounds
286
+ ps = range(0, 5)
287
+ d = 1
288
+ qs = range(0, 5)
289
+ Ps = range(0, 5)
290
+ D = 1
291
+ Qs = range(0, 5)
292
+ s = 5
293
+
294
+ #Create a list with all possible combinations of parameters
295
+ parameters = product(ps, qs, Ps, Qs)
296
+ parameters_list = list(parameters)
297
+ len(parameters_list)
298
+
299
+ # Train many SARIMA models to find the best set of parameters
300
+ def optimize_SARIMA(parameters_list, d, D, s):
301
+ """
302
+ Return dataframe with parameters and corresponding AIC
303
+
304
+ parameters_list - list with (p, q, P, Q) tuples
305
+ d - integration order
306
+ D - seasonal integration order
307
+ s - length of season
308
+ """
309
+
310
+ results = []
311
+ best_aic = float('inf')
312
+
313
+ for param in tqdm_notebook(parameters_list):
314
+ try: model = sm.tsa.statespace.SARIMAX(group6.covid_deaths, order=(param[0], d, param[1]),
315
+ seasonal_order=(param[2], D, param[3], s)).fit(disp=-1)
316
+ except:
317
+ continue
318
+
319
+ aic = model.aic
320
+
321
+ #Save best model, AIC and parameters
322
+ if aic < best_aic:
323
+ best_model = model
324
+ best_aic = aic
325
+ best_param = param
326
+ results.append([param, model.aic])
327
+
328
+ result_table = pd.DataFrame(results)
329
+ result_table.columns = ['parameters', 'aic']
330
+ #Sort in ascending order, lower AIC is better
331
+ result_table = result_table.sort_values(by='aic', ascending=True).reset_index(drop=True)
332
+
333
+ return result_table
334
+
335
+ # result_table = optimize_SARIMA(parameters_list, d, D, s)
336
+
337
+ #Set parameters that give the lowest AIC (Akaike Information Criteria)
338
+ # p, q, P, Q = result_table.parameters[0]
339
+
340
+ best_model = sm.tsa.statespace.SARIMAX(group6.covid_deaths, order=(1, 1, 1),
341
+ seasonal_order=(1, 1, 1, 7)).fit(disp=-1)
342
+
343
+ st.write(best_model.summary())
344
+
345
+ # with covid_relationship:
346
+ st.subheader('Covid Relationship With Other Diseases')
347
+
348
+ df=pd.read_csv("data/Provisional_COVID-19_Deaths_by_Sex_and_Age.csv")
349
+ df['End Date']=pd.to_datetime(df['End Date'])
350
+ df['Start Date']=pd.to_datetime(df['Start Date'])
351
+ df['Data As Of']=pd.to_datetime(df['Data As Of'])
352
+ for col in df.select_dtypes(include=['datetime64']).columns.tolist():
353
+ df.style.format({"df[col]":
354
+ lambda t:t.strftime("%Y-%m-%d")})
355
+ df['Year']=df['Year'].fillna(2020)
356
+ df. drop(["Month","Footnote"], axis=1, inplace=True)
357
+ df=df.dropna()
358
+ Roww, Coll = df.shape
359
+ st.write('dataset 2 shape: ', Roww, Coll)
360
+ df.index=df['End Date']
361
+
362
+ df=df[df['Age Group'] !='All Ages']
363
+ df.reset_index(drop=True)
364
+ df=df[['Year','Sex','Age Group', 'COVID-19 Deaths', 'Pneumonia Deaths', 'Influenza Deaths']]
365
+
366
+ jj=sns.lmplot('Pneumonia Deaths','COVID-19 Deaths',data=df,fit_reg=True,scatter_kws={'color':'red','marker':"D","s":20})
367
+ plt.title("Relationship between Covid 19 and Pneumonia")
368
+ st.pyplot(jj)
369
+
370
+
371
+ mm=sns.lmplot('Influenza Deaths','COVID-19 Deaths',data=df,fit_reg=True,scatter_kws={'color':'red','marker':"D","s":20})
372
+ plt.title("Relationship between Covid 19 and Influenza")
373
+ st.pyplot(mm)
374
+
375
+ nn=sns.lmplot('Influenza Deaths','Pneumonia Deaths',data=df,fit_reg=True,scatter_kws={'color':'red','marker':"D","s":20})
376
+ plt.title("Relationship between Pneumonia and Influenza")
377
+ st.pyplot(nn)
378
+
379
+ df=df[df['Age Group'] !='Under 1 year']
380
+ df=df[df['Age Group'] !='0-17 years']
381
+ df=df[df['Age Group'] !='18-29 years']
382
+ df=df[df['Age Group'] !='30-39 years']
383
+ df=df[df['Age Group'] !='40-49 years']
384
+
385
+ # Finding the most affected Age Group towards Covid 19
386
+ df.reset_index(drop=True)
387
+ Group_1=df['COVID-19 Deaths'][df['Age Group']=='1-4 years'].to_list()
388
+ Group_2=df['COVID-19 Deaths'][df['Age Group']=='5-14 years'].to_list()
389
+ Group_3=df['COVID-19 Deaths'][df['Age Group']=='15-24 years'].to_list()
390
+ Group_4=df['COVID-19 Deaths'][df['Age Group']=='25-34 years'].to_list()
391
+ Group_5=df['COVID-19 Deaths'][df['Age Group']=='35-44 years'].to_list()
392
+ Group_6=df['COVID-19 Deaths'][df['Age Group']=='45-54 years'].to_list()
393
+ Group_7=df['COVID-19 Deaths'][df['Age Group']=='55-64 years'].to_list()
394
+ Group_8=df['COVID-19 Deaths'][df['Age Group']=='65-74 years'].to_list()
395
+ Group_9=df['COVID-19 Deaths'][df['Age Group']=='75-84 years'].to_list()
396
+ Group_10=df['COVID-19 Deaths'][df['Age Group']=='85 years and over'].to_list()
397
+
398
+ Infection_rate={'1-4':sum(Group_1),'5-14':sum(Group_2),'15-24':sum(Group_3),'25-34':sum(Group_4),'35-44':sum(Group_5),'45-54':sum(Group_6),'55-64':sum(Group_7),'65-74':sum(Group_8),'75-84':sum(Group_9),'Over 85':sum(Group_10)}
399
+ names=list(Infection_rate.keys())
400
+ values=list(Infection_rate.values())
401
+
402
+ vv=plt.figure(figsize=(12, 8))
403
+ plt.bar(range(len(Infection_rate)),values,tick_label=names)
404
+ plt.xlabel('Age group{Years}')
405
+ plt.ylabel('Number of Infections')
406
+ plt.title("Covid Infection Rate in various Age group categories")
407
+ # plt.show()
408
+ st.pyplot(vv)
409
+
410
+ df.to_csv('data/provisional_data.csv',index=False)
411
+ provisional_data=pd.read_csv('data/provisional_data.csv',index_col=['Year'],parse_dates=['Year'])
412
+ provisional_data.rename(columns = {'COVID-19 Deaths':'COVID_Deaths', 'Pneumonia Deaths':'Pneumonia_Deaths','Influenza Deaths':'Influenza_Deaths'}, inplace = True)
413
+
414
+ # Analysis of infection rate per Gender
415
+ Male_Covid=provisional_data['COVID_Deaths'][provisional_data['Sex']=='Male'].to_list()
416
+ Female_Covid=provisional_data['COVID_Deaths'][provisional_data['Sex']=='Female'].to_list()
417
+ Female_Pneumonia=provisional_data['Pneumonia_Deaths'][provisional_data['Sex']=='Female'].to_list()
418
+ Male_Pneumonia=provisional_data['Pneumonia_Deaths'][provisional_data['Sex']=='Male'].to_list()
419
+ Female_Influenza=provisional_data['Influenza_Deaths'][provisional_data['Sex']=='Female'].to_list()
420
+ Male_Influenza=provisional_data['Influenza_Deaths'][provisional_data['Sex']=='Male'].to_list()
421
+
422
+ Gender_Infection_rate={'F_Covid':sum(Female_Covid),'M_Covid':sum(Male_Covid),'F_Pneum..':sum(Female_Pneumonia),'M_Pneum..':sum(Male_Pneumonia),'F_Influenza':sum(Female_Influenza),'M_Influenza':sum(Male_Influenza)}
423
+ names=list(Gender_Infection_rate.keys())
424
+ values=list(Gender_Infection_rate.values())
425
+
426
+ zz=plt.figure()
427
+ plt.bar(range(len(Gender_Infection_rate)),values,tick_label=names,color=['black', 'red', 'green', 'blue', 'cyan','pink'],width=0.3)
428
+ plt.xlabel('Gender')
429
+ plt.ylabel('Number of Infections')
430
+ plt.title("Analysis of infection rate per Gender")
431
+ # plt.show()
432
+ st.pyplot(zz)
433
+
434
+ # Finding the highest recorded value of detected covid death
435
+ provisional_data["COVID_Deaths"].max()
436
+
437
+ # Finding the highest recorded value of detected Pneumonia_Deaths
438
+ provisional_data["Pneumonia_Deaths"].max()
439
+
440
+ # Finding the highest recorded value of detected Influenza_Deaths
441
+ provisional_data["Influenza_Deaths"].max()
442
+
443
+ st.subheader('Finding Correlation between different diseases')
444
+
445
+ # The correlation between Covid 19 and Pneumonia
446
+ correlation1=provisional_data['COVID_Deaths']. corr(provisional_data['Pneumonia_Deaths'])
447
+ st.write('The correlation between Covid 19 and Pneumonia',correlation1)
448
+
449
+ # The correlation between Covid 19 and Influenza
450
+ correlation2=provisional_data['COVID_Deaths']. corr(provisional_data['Influenza_Deaths'])
451
+ st.write('The correlation between Covid 19 and Influenza',correlation2)
452
+
453
+ # The correlation between Pneumonia and Influenza Disease
454
+ correlation3=provisional_data['Pneumonia_Deaths']. corr(provisional_data['Influenza_Deaths'])
455
+ st.write('The correlation between Pneumonia and Influenza Disease',correlation3)
requirements.txt ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==4.2.0
2
+ argon2-cffi==21.3.0
3
+ argon2-cffi-bindings==21.2.0
4
+ asttokens==2.0.5
5
+ attrs==21.4.0
6
+ backcall==0.2.0
7
+ beautifulsoup4==4.11.1
8
+ bleach==5.0.1
9
+ blinker==1.4
10
+ cachetools==5.2.0
11
+ certifi==2022.6.15
12
+ cffi==1.15.1
13
+ charset-normalizer==2.1.0
14
+ click==8.1.3
15
+ commonmark==0.9.1
16
+ cycler==0.11.0
17
+ debugpy==1.6.0
18
+ decorator==5.1.1
19
+ defusedxml==0.7.1
20
+ entrypoints==0.4
21
+ executing==0.8.3
22
+ fastjsonschema==2.15.3
23
+ fonttools==4.34.2
24
+ gitdb==4.0.9
25
+ GitPython==3.1.27
26
+ idna==3.3
27
+ importlib-metadata==4.12.0
28
+ ipykernel==6.15.0
29
+ ipython==8.4.0
30
+ ipython-genutils==0.2.0
31
+ ipywidgets==7.7.1
32
+ jedi==0.18.1
33
+ Jinja2==3.1.2
34
+ joblib==1.1.0
35
+ jsonschema==4.6.1
36
+ jupyter-client==7.3.4
37
+ jupyter-core==4.10.0
38
+ jupyterlab-pygments==0.2.2
39
+ jupyterlab-widgets==1.1.1
40
+ kiwisolver==1.4.3
41
+ MarkupSafe==2.1.1
42
+ matplotlib==3.5.2
43
+ matplotlib-inline==0.1.3
44
+ mistune==0.8.4
45
+ nbclient==0.6.6
46
+ nbconvert==6.5.0
47
+ nbformat==5.4.0
48
+ nest-asyncio==1.5.5
49
+ notebook==6.4.12
50
+ numpy==1.23.0
51
+ packaging==21.3
52
+ pandas==1.4.3
53
+ pandocfilters==1.5.0
54
+ parso==0.8.3
55
+ patsy==0.5.2
56
+ pexpect==4.8.0
57
+ pickleshare==0.7.5
58
+ Pillow==9.2.0
59
+ prometheus-client==0.14.1
60
+ prompt-toolkit==3.0.30
61
+ protobuf==3.20.1
62
+ psutil==5.9.1
63
+ ptyprocess==0.7.0
64
+ pure-eval==0.2.2
65
+ pyarrow==8.0.0
66
+ pycparser==2.21
67
+ pydeck==0.7.1
68
+ Pygments==2.12.0
69
+ Pympler==1.0.1
70
+ pyparsing==3.0.9
71
+ pyrsistent==0.18.1
72
+ python-dateutil==2.8.2
73
+ pytz==2022.1
74
+ pytz-deprecation-shim==0.1.0.post0
75
+ pyzmq==23.2.0
76
+ requests==2.28.1
77
+ rich==12.4.4
78
+ scikit-learn==1.1.1
79
+ scipy==1.8.1
80
+ seaborn==0.11.2
81
+ semver==2.13.0
82
+ Send2Trash==1.8.0
83
+ six==1.16.0
84
+ sklearn==0.0
85
+ smmap==5.0.0
86
+ soupsieve==2.3.2.post1
87
+ stack-data==0.3.0
88
+ statsmodels==0.13.2
89
+ streamlit==1.10.0
90
+ terminado==0.15.0
91
+ threadpoolctl==3.1.0
92
+ tinycss2==1.1.1
93
+ toml==0.10.2
94
+ toolz==0.11.2
95
+ tornado==6.2
96
+ tqdm==4.64.0
97
+ traitlets==5.3.0
98
+ typing-extensions==4.3.0
99
+ tzdata==2022.1
100
+ tzlocal==4.2
101
+ urllib3==1.26.9
102
+ validators==0.20.0
103
+ watchdog==2.1.9
104
+ wcwidth==0.2.5
105
+ webencodings==0.5.1
106
+ widgetsnbextension==3.6.1
107
+ zipp==3.8.0