coycs commited on
Commit
d178e6e
·
1 Parent(s): 47c4205

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +461 -166
app.py CHANGED
@@ -1,194 +1,489 @@
1
- # 返回图片测试
2
- from sklearn.linear_model import LinearRegression
3
- from sklearn.neural_network import MLPRegressor
 
 
 
 
 
 
 
4
  import lightgbm as lgb
5
  from xgboost import XGBRegressor
 
6
  from sklearn.ensemble import RandomForestRegressor
7
- from sklearn.preprocessing import StandardScaler
8
- from sklearn.model_selection import train_test_split
9
- import pandas as pd
10
- from fastapi.middleware.cors import CORSMiddleware # 跨域
11
- from fastapi import FastAPI, Response, BackgroundTasks
12
- import json
13
  import matplotlib.pyplot as plt
14
  import io
 
 
 
15
  import matplotlib
16
  matplotlib.use('AGG')
17
 
 
18
  app = FastAPI()
19
 
20
- # 配置跨域白名单
21
  origins = [
22
- "http://127.0.0.1:5500"
 
 
 
23
  ]
24
-
25
- # 图片测试
26
  app.add_middleware(
27
  CORSMiddleware,
28
  allow_origins=origins,
29
  allow_credentials=True,
30
- allow_methods=["POST"],
31
  allow_headers=["*"],
32
  )
33
 
34
- def create_img():
35
- plt.rcParams['figure.figsize'] = [7.50, 3.50]
36
- plt.rcParams['figure.autolayout'] = True
37
- plt.plot([1, 2])
38
- img_buf = io.BytesIO()
39
- plt.savefig(img_buf, format='png')
40
- plt.close()
41
- return img_buf
42
-
43
- @app.get('/png')
44
- async def get_img(background_tasks: BackgroundTasks):
45
- img_buf = create_img()
46
- # get the entire buffer content
47
- # because of the async, this will await the loading of all content
48
- bufContents: bytes = img_buf.getvalue()
49
- background_tasks.add_task(img_buf.close)
50
- headers = {'Content-Disposition': 'inline; filename="out.png"'}
51
- return Response(bufContents, headers=headers, media_type='image/png')
52
-
53
-
54
- # 机器学习测试
55
- # 多元线性回归
56
- @app.post("/")
57
- async def mlr():
58
- # 引入excel数据
59
- # 可能存在空行问题,dropna(axis=0)删除空行
60
- df = pd.read_csv("./1.csv").dropna(axis=0)
61
- # 取xy
62
- x = df.iloc[:, :12]
63
- # y = df.loc[:, "BSR"]
64
- y = df.loc[:, "SBR"]
65
- # 划分数据集
66
- x_train, x_test, y_train, y_test = train_test_split(
67
- x, y, test_size=0.3, random_state=0)
68
-
69
- # 标准化
70
- standardscaler = StandardScaler()
71
- standardscaler.fit(x_train)
72
- x_train = standardscaler.transform(x_train)
73
- x_test = standardscaler.transform(x_test)
74
-
75
- # 多元线性回归
76
- model = LinearRegression()
77
- model.fit(x_train, y_train)
78
-
79
- # 测试样本预测
80
- y_pred = model.predict(x_test)
81
- return json.dumps(y_pred.tolist())
82
-
83
- # 随机森林
84
- @app.post("/rf")
85
- async def rf():
86
- # 引入excel数据
87
- # 可能存在空行问题,dropna(axis=0)删除空行
88
- df = pd.read_csv("./1.csv").dropna(axis=0)
89
- # 取xy
90
- x = df.iloc[:, :12]
91
- # y = df.loc[:, "BSR"]
92
- y = df.loc[:, "SBR"]
93
- # 划分数据集
94
- x_train, x_test, y_train, y_test = train_test_split(
95
- x, y, test_size=0.3, random_state=0)
96
-
97
- # 标准化
98
- standardscaler = StandardScaler()
99
- standardscaler.fit(x_train)
100
- x_train = standardscaler.transform(x_train)
101
- x_test = standardscaler.transform(x_test)
102
-
103
- # 随机森林
104
- model = RandomForestRegressor()
105
- model.fit(x_train, y_train)
106
-
107
- # 测试样本预测
108
- y_pred = model.predict(x_test)
109
- return json.dumps(y_pred.tolist())
110
-
111
- # BP神经网络
112
- @app.post("/bpn")
113
- async def rf():
114
- # 引入excel数据
115
- # 可能存在空行问题,dropna(axis=0)删除空行
116
- df = pd.read_csv("./1.csv").dropna(axis=0)
117
- # 取xy
118
- x = df.iloc[:, :12]
119
- # y = df.loc[:, "BSR"]
120
- y = df.loc[:, "SBR"]
121
- # 划分数据集
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  x_train, x_test, y_train, y_test = train_test_split(
123
- x, y, test_size=0.3, random_state=0)
124
-
125
- # 标准化
126
- standardscaler = StandardScaler()
127
- standardscaler.fit(x_train)
128
- x_train = standardscaler.transform(x_train)
129
- x_test = standardscaler.transform(x_test)
130
-
131
- # BP神经网络
132
- model = MLPRegressor(hidden_layer_sizes=(10,), random_state=10,learning_rate_init=0.1) # BP神经网络回归模型
133
- model.fit(x_train,y_train) # 训练模型
134
-
135
- # 测试样本预测
136
- y_pred = model.predict(x_test)
137
- return json.dumps(y_pred.tolist())
138
-
139
- # XGBoost
140
- @app.post("/xgboost")
141
- async def rf():
142
- # 引入excel数据
143
- # 可能存在空行问题,dropna(axis=0)删除空行
144
- df = pd.read_csv("./1.csv").dropna(axis=0)
145
- # 取xy
146
- x = df.iloc[:, :12]
147
- # y = df.loc[:, "BSR"]
148
- y = df.loc[:, "SBR"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  # 划分数据集
150
  x_train, x_test, y_train, y_test = train_test_split(
151
- x, y, test_size=0.3, random_state=0)
152
-
153
- # 标准化
154
- standardscaler = StandardScaler()
155
- standardscaler.fit(x_train)
156
- x_train = standardscaler.transform(x_train)
157
- x_test = standardscaler.transform(x_test)
158
-
159
- # XGBoost
160
- model = XGBRegressor(max_depth=5, learning_rate=0.1, n_estimators=160, objective='reg:gamma')
161
- model.fit(x_train, y_train)
162
-
163
- # 测试样本预测
164
- y_pred = model.predict(x_test)
165
- return json.dumps(y_pred.tolist())
166
-
167
- # LightGBM
168
- @app.post("/lightgbm")
169
- async def rf():
170
- # 引入excel数据
171
- # 可能存在空行问题,dropna(axis=0)删除空行
172
- df = pd.read_csv("./1.csv").dropna(axis=0)
173
- # 取xy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  x = df.iloc[:, :12]
175
- # y = df.loc[:, "BSR"]
176
- y = df.loc[:, "SBR"]
177
- # 划分数据集
 
 
178
  x_train, x_test, y_train, y_test = train_test_split(
179
- x, y, test_size=0.3, random_state=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- # 标准化
182
- standardscaler = StandardScaler()
183
- standardscaler.fit(x_train)
184
- x_train = standardscaler.transform(x_train)
185
- x_test = standardscaler.transform(x_test)
186
 
187
- # LightGBM
188
- model = lgb.LGBMRegressor(objective='regression',num_leaves=31,learning_rate=0.05,n_estimators=20)
189
- model.fit(x_train, y_train)
 
 
 
 
 
190
 
191
- # 测试样本预测
192
- y_pred = model.predict(x_test)
193
- return json.dumps(y_pred.tolist())
194
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+ from fastapi import FastAPI, Response, BackgroundTasks
4
+ from fastapi.middleware.cors import CORSMiddleware # 跨域
5
+ from sklearn.metrics import mean_absolute_error
6
+ from sklearn.metrics import mean_squared_error
7
+ from sklearn.metrics import r2_score
8
+ from sklearn.model_selection import train_test_split
9
+ import sklearn.preprocessing as preproc
10
+ from sklearn.preprocessing import StandardScaler
11
  import lightgbm as lgb
12
  from xgboost import XGBRegressor
13
+ from sklearn.neural_network import MLPRegressor
14
  from sklearn.ensemble import RandomForestRegressor
15
+ from sklearn.linear_model import LinearRegression
 
 
 
 
 
16
  import matplotlib.pyplot as plt
17
  import io
18
+ import json
19
+ import numpy as np
20
+ import pandas as pd
21
  import matplotlib
22
  matplotlib.use('AGG')
23
 
24
+
25
  app = FastAPI()
26
 
27
+ # set cross-domain whitelist
28
  origins = [
29
+ "http://127.0.0.1:5500",
30
+ "http://localhost:8081",
31
+ "http://mlca.coycs.com",
32
+ "https://mlca.coycs.com"
33
  ]
 
 
34
  app.add_middleware(
35
  CORSMiddleware,
36
  allow_origins=origins,
37
  allow_credentials=True,
38
+ allow_methods=["POST", "GET"],
39
  allow_headers=["*"],
40
  )
41
 
42
+ # 工具函数
43
+
44
+
45
+ def json2df(json):
46
+ # 字符串转数值
47
+ def str2num(x):
48
+ if isinstance(x, str):
49
+ return eval(x)
50
+ else:
51
+ return x
52
+ df = pd.DataFrame(json)
53
+ # 空白符转None,且是"None"让eval能解析成功
54
+ df.replace(to_replace=r"^\s*$", value="None", regex=True, inplace=True)
55
+ # 科学计数法转数值
56
+ df = df.applymap(str2num)
57
+ return df
58
+
59
+
60
+ # def process_abnormal(df, detect, method): # 异常值处理
61
+ # if detect == 1: # IQR检测方式
62
+ # for coloum in df.columns:
63
+ # q1 = df[coloum].quantile(0.75)
64
+ # q3 = df[coloum].quantile(0.25)
65
+ # iqr = q1-q3
66
+ # if method == 1: # 删除异常值
67
+ # df.drop(
68
+ # df.loc[lambda x:x[coloum] > q1 + 1.5 * iqr].index, inplace=True)
69
+ # df.drop(
70
+ # df.loc[lambda x:x[coloum] < q3 - 1.5 * iqr].index, inplace=True)
71
+ # elif method == 2: # 均值替换
72
+ # df.loc[lambda x:x[coloum] > q1 + 1.5 *
73
+ # iqr, coloum]=df[coloum].mean()
74
+ # df.loc[lambda x:x[coloum] < q3 - 1.5 *
75
+ # iqr, coloum]=df[coloum].mean()
76
+ # elif method == 3: # 中位数替换
77
+ # df.loc[lambda x:x[coloum] > q1 + 1.5 *
78
+ # iqr, coloum]=df[coloum].median()
79
+ # df.loc[lambda x:x[coloum] < q3 - 1.5 *
80
+ # iqr, coloum]=df[coloum].median()
81
+ # elif method == 4: # 众数替换
82
+ # df.loc[lambda x:x[coloum] > q1 + 1.5 *
83
+ # iqr, coloum]=df[coloum].mode().iloc[0]
84
+ # df.loc[lambda x:x[coloum] < q3 - 1.5 *
85
+ # iqr, coloum]=df[coloum].mode().iloc[0]
86
+ # elif method == 5: # 边界替换
87
+ # df.loc[lambda x:x[coloum] > q1 +
88
+ # 1.5 * iqr, coloum]=q1 + 1.5 * iqr
89
+ # df.loc[lambda x:x[coloum] < q3 -
90
+ # 1.5 * iqr, coloum]=q3 - 1.5 * iqr
91
+ # elif detect == 2: # Z-score检测方式
92
+ # for coloum in df.columns:
93
+ # mean = df[coloum].mean()
94
+ # std = df[coloum].std()
95
+ # df.drop(
96
+ # df.loc[lambda x:x[coloum] > mean + 3 * std].index, inplace=True)
97
+ # df.drop(
98
+ # df.loc[lambda x:x[coloum] < mean - 3 * std].index, inplace=True)
99
+ # if method == 1: # 删除异常值
100
+ # df.drop(
101
+ # df.loc[lambda x:x[coloum] > mean + 3 * std].index, inplace=True)
102
+ # df.drop(
103
+ # df.loc[lambda x:x[coloum] < mean - 3 * std].index, inplace=True)
104
+ # elif method == 2: # 均值替换
105
+ # df.loc[lambda x:x[coloum] > mean +
106
+ # 3 * std, coloum]=df[coloum].mean()
107
+ # df.loc[lambda x:x[coloum] < mean -
108
+ # 3 * std, coloum]=df[coloum].mean()
109
+ # elif method == 3: # 中位数替换
110
+ # df.loc[lambda x:x[coloum] > mean + 3 *
111
+ # std, coloum]=df[coloum].median()
112
+ # df.loc[lambda x:x[coloum] < mean - 3 *
113
+ # std, coloum]=df[coloum].median()
114
+ # elif method == 4: # 众数替换
115
+ # df.loc[lambda x:x[coloum] > mean + 3 *
116
+ # std, coloum]=df[coloum].mode().iloc[0]
117
+ # df.loc[lambda x:x[coloum] < mean - 3 *
118
+ # std, coloum]=df[coloum].mode().iloc[0]
119
+ # elif method == 5: # 边界替换
120
+ # df.loc[lambda x:x[coloum] > mean +
121
+ # 3 * std, coloum]=mean + 3 * std
122
+ # df.loc[lambda x:x[coloum] < mean -
123
+ # 3 * std, coloum]=mean - 3 * std
124
+ # return df
125
+
126
+
127
+ def process_miss(df, method): # 缺失值处理
128
+ # 舍弃全为空的行
129
+ df = df.dropna(how='all')
130
+ # 舍弃全为空的列
131
+ df = df.dropna(axis=1, how='all')
132
+ if method == 1: # 均值
133
+ df = df.fillna(df.mean())
134
+ elif method == 2: # 中位数
135
+ df = df.fillna(df.median())
136
+ elif method == 3: # 众数
137
+ df = df.fillna(df.mode().iloc[0])
138
+ elif method == 4: # 线性
139
+ df = df.fillna(df.interpolate(
140
+ method='linear', limit_direction='forward', axis=0))
141
+ elif method == 5: # 前值
142
+ df = df.fillna(method="ffill")
143
+ elif method == 6: # 后值
144
+ df = df.fillna(method="bfill")
145
+ return df
146
+
147
+
148
+ def process_abnormal(df_inside, df_user, detect, method): # 异常值处理
149
+ df = pd.concat([df_inside, df_user], axis=0,
150
+ ignore_index=True) # 合并的dataframe
151
+ df_features = df.iloc[:, :12] # 取所有的特征列为dataframe
152
+ # print(df)
153
+
154
+ if detect == 1: # IQR检测方式
155
+ for coloum in df_features.columns:
156
+ q1 = df_features[coloum].quantile(0.75)
157
+ q3 = df_features[coloum].quantile(0.25)
158
+ iqr = q1-q3
159
+ if method == 1: # 删除异常值
160
+ df_features.drop(
161
+ df_features.loc[lambda x:x[coloum] > q1 + 1.5 * iqr].index, inplace=True)
162
+ df_features.drop(
163
+ df_features.loc[lambda x:x[coloum] < q3 - 1.5 * iqr].index, inplace=True)
164
+ elif method == 2: # 均值替换
165
+ df_features.loc[lambda x:x[coloum] > q1 + 1.5 *
166
+ iqr, coloum]=df_features[coloum].mean()
167
+ df_features.loc[lambda x:x[coloum] < q3 - 1.5 *
168
+ iqr, coloum]=df_features[coloum].mean()
169
+ elif method == 3: # 中位数替换
170
+ df_features.loc[lambda x:x[coloum] > q1 + 1.5 *
171
+ iqr, coloum]=df_features[coloum].median()
172
+ df_features.loc[lambda x:x[coloum] < q3 - 1.5 *
173
+ iqr, coloum]=df_features[coloum].median()
174
+ elif method == 4: # 众数替换
175
+ df_features.loc[lambda x:x[coloum] > q1 + 1.5 *
176
+ iqr, coloum]=df_features[coloum].mode().iloc[0]
177
+ df_features.loc[lambda x:x[coloum] < q3 - 1.5 *
178
+ iqr, coloum]=df_features[coloum].mode().iloc[0]
179
+ elif method == 5: # 边界替换
180
+ df_features.loc[lambda x:x[coloum] > q1 +
181
+ 1.5 * iqr, coloum]=q1 + 1.5 * iqr
182
+ df_features.loc[lambda x:x[coloum] < q3 -
183
+ 1.5 * iqr, coloum]=q3 - 1.5 * iqr
184
+ elif detect == 2: # Z-score检测方式
185
+ for coloum in df_features.columns:
186
+ mean = df_features[coloum].mean()
187
+ std = df_features[coloum].std()
188
+ df_features.drop(
189
+ df_features.loc[lambda x:x[coloum] > mean + 3 * std].index, inplace=True)
190
+ df_features.drop(
191
+ df_features.loc[lambda x:x[coloum] < mean - 3 * std].index, inplace=True)
192
+ if method == 1: # 删除异常值
193
+ df_features.drop(
194
+ df_features.loc[lambda x:x[coloum] > mean + 3 * std].index, inplace=True)
195
+ df_features.drop(
196
+ df_features.loc[lambda x:x[coloum] < mean - 3 * std].index, inplace=True)
197
+ elif method == 2: # 均值替换
198
+ df_features.loc[lambda x:x[coloum] > mean +
199
+ 3 * std, coloum]=df_features[coloum].mean()
200
+ df_features.loc[lambda x:x[coloum] < mean -
201
+ 3 * std, coloum]=df_features[coloum].mean()
202
+ elif method == 3: # 中位数替换
203
+ df_features.loc[lambda x:x[coloum] > mean + 3 *
204
+ std, coloum]=df_features[coloum].median()
205
+ df_features.loc[lambda x:x[coloum] < mean - 3 *
206
+ std, coloum]=df_features[coloum].median()
207
+ elif method == 4: # 众数替换
208
+ df_features.loc[lambda x:x[coloum] > mean + 3 *
209
+ std, coloum]=df_features[coloum].mode().iloc[0]
210
+ df_features.loc[lambda x:x[coloum] < mean - 3 *
211
+ std, coloum]=df_features[coloum].mode().iloc[0]
212
+ elif method == 5: # 边界替换
213
+ df_features.loc[lambda x:x[coloum] > mean +
214
+ 3 * std, coloum]=mean + 3 * std
215
+ df_features.loc[lambda x:x[coloum] < mean -
216
+ 3 * std, coloum]=mean - 3 * std
217
+
218
+ df.iloc[:, :12] = df_features
219
+ df_inside = df.iloc[:df_inside.shape[0], :]
220
+ df_user = df.iloc[df_inside.shape[0]:, :12]
221
+ return {"df_inside": df_inside, "df_user": df_user}
222
+
223
+
224
+ def process_standard(df_inside, df_user, method): # 标准化处理
225
+ df = pd.concat([df_inside, df_user], axis=0,
226
+ ignore_index=True) # 合并的dataframe
227
+ df_features = df.iloc[:, :12] # 取所有的特征列为dataframe
228
+ columns = df_features.columns # 列名
229
+
230
+ if method == 1: # Min-max
231
+ df_features = preproc.minmax_scale(df_features)
232
+ elif method == 2: # Z-Score
233
+ df_features = preproc.StandardScaler().fit_transform(df_features)
234
+ elif method == 3: # MaxAbs
235
+ df_features = preproc.maxabs_scale(df_features, axis=0)
236
+ elif method == 4: # RobustScaler
237
+ df_features = preproc.RobustScaler().fit_transform(df_features)
238
+ elif method == 5: # 正则化
239
+ df_features = preproc.normalize(df_features, axis=0)
240
+ df_features = pd.DataFrame(
241
+ data=df_features[0:, 0:], columns=columns) # 补充列名
242
+
243
+ df.iloc[:, :12] = df_features
244
+ df_inside = df.iloc[:df_inside.shape[0], :]
245
+ df_user = df.iloc[df_inside.shape[0]:, :12]
246
+ return {"df_inside": df_inside, "df_user": df_user}
247
+
248
+
249
+ def train_model(x, y, test_size, algorithm, paras): # 模型训练
250
+ # 划分数据集
251
  x_train, x_test, y_train, y_test = train_test_split(
252
+ x, y, test_size=test_size, random_state=0)
253
+ # 机器学习
254
+ model = None
255
+ results = {}
256
+ if algorithm == 1: # 最小二乘法线性回归
257
+ model = LinearRegression(fit_intercept=paras["fit_intercept"])
258
+ if algorithm == 2: # 随机森林回归
259
+ model = RandomForestRegressor(n_estimators=paras["n_estimators"],
260
+ criterion=paras["criterion"], max_depth=paras["max_depth"], random_state=0)
261
+ if algorithm == 3: # BP神经网络回归
262
+ model = MLPRegressor(hidden_layer_sizes=(paras["hidden_layer_sizes_1"], paras["hidden_layer_sizes_2"]),
263
+ activation=paras["activation"], solver='lbfgs', random_state=paras["random_state"])
264
+ if algorithm == 4: # XGBoost回归
265
+ model = XGBRegressor(
266
+ max_depth=paras["max_depth"], learning_rate=paras["learning_rate"], n_estimators=paras["n_estimators"])
267
+ if algorithm == 5: # LightGBM回归
268
+ model = lgb.LGBMRegressor(objective='regression', max_depth=paras["max_depth"],
269
+ learning_rate=paras["learning_rate"], random_state=paras["random_state"], n_estimators=paras["n_estimators"])
270
+
271
+ # 返回数据
272
+ if model != None:
273
+ model.fit(x_train, y_train)
274
+ if algorithm == 1: # 最小二乘法线性回归
275
+ # 保留小数点后三位
276
+ # results["coef"] = model.coef_.tolist() # 模型斜率
277
+ results["coef"] = [float('{:.4f}'.format(i))
278
+ for i in model.coef_.tolist()] # 模型斜率
279
+ results["intercept"] = round(model.intercept_, 3) # 模型截距
280
+ y_pred = model.predict(x_test) # 预测值
281
+ # y_test = y_test.values
282
+ # 误差,用round保留三位小数且四舍五入
283
+ mae = round(mean_absolute_error(y_test, y_pred), 3)
284
+ rmse = round(np.sqrt(mean_squared_error(y_test, y_pred)), 3)
285
+ r2 = round(r2_score(y_test, y_pred), 3)
286
+ # y_test = [x[0] for x in np.array(y_test).tolist()]
287
+ # y_pred = [x[0] for x in y_pred.tolist()]
288
+ y_test = np.array(y_test).tolist()
289
+ y_pred = y_pred.tolist()
290
+ return {"y_test": y_test, "y_pred": y_pred, "error": {"MAE": mae, "RMSE": rmse, "R2": r2}, "results": results}
291
+ else:
292
+ return "模型训练出错"
293
+
294
+
295
+ def predict_connectivity(x, x1, y, test_size, algorithm, paras):
296
  # 划分数据集
297
  x_train, x_test, y_train, y_test = train_test_split(
298
+ x, y, test_size=test_size, random_state=0)
299
+ # 机器学习
300
+ model = None
301
+ results = {}
302
+ if algorithm == 1: # 最小二乘法线性回归
303
+ model = LinearRegression(fit_intercept=paras["fit_intercept"])
304
+ if algorithm == 2: # 随机森林回归
305
+ model = RandomForestRegressor(n_estimators=paras["n_estimators"],
306
+ criterion=paras["criterion"], max_depth=paras["max_depth"], random_state=0)
307
+ if algorithm == 3: # BP神经网络回归
308
+ model = MLPRegressor(hidden_layer_sizes=(paras["hidden_layer_sizes_1"], paras["hidden_layer_sizes_2"]),
309
+ activation=paras["activation"], solver='lbfgs', random_state=paras["random_state"])
310
+ if algorithm == 4: # XGBoost回归
311
+ model = XGBRegressor(
312
+ max_depth=paras["max_depth"], learning_rate=paras["learning_rate"], n_estimators=paras["n_estimators"])
313
+ if algorithm == 5: # LightGBM回归
314
+ model = lgb.LGBMRegressor(objective='regression', max_depth=paras["max_depth"],
315
+ learning_rate=paras["learning_rate"], random_state=paras["random_state"], n_estimators=paras["n_estimators"])
316
+ # 返回数据
317
+ if model != None:
318
+ model.fit(x_train, y_train)
319
+ y_pred = model.predict(x1).tolist() # 预测值
320
+ return y_pred
321
+ else:
322
+ return "预测连通性出错"
323
+
324
+ # 登录验证
325
+
326
+
327
+ class Login(BaseModel): # 接口数据类型
328
+ username: str
329
+ password: str
330
+
331
+
332
+ @app.post("/login") # 接口
333
+ async def login(login: Login):
334
+ username = login.username
335
+ password = login.password
336
+ if username == "admin" and password == "123456":
337
+ return True
338
+ return False
339
+
340
+
341
+ # 处理用户数据
342
+
343
+ class Process_user(BaseModel): # 接口数据类型
344
+ mode: int
345
+ data: List
346
+ miss: List
347
+ abnormal: List
348
+ standard: List
349
+
350
+
351
+ @app.post("/process/user") # 接口
352
+ async def process_user(user: Process_user):
353
+ mode = user.mode # 选择的井间连通模式
354
+ df_inside = pd.read_csv(
355
+ "./data/mode_{}.csv".format(mode)).dropna(axis=0) # 连通模式对应的内置数据
356
+ df_user = json2df(user.data)
357
+ abnormal = user.abnormal[0]
358
+ miss = user.miss[0]
359
+ standard = user.standard[0]
360
+ # 异常值处理
361
+ if abnormal["state"]:
362
+ abnormaled = process_abnormal(
363
+ df_inside, df_user, abnormal["detect"], abnormal["method"])
364
+ df_inside = abnormaled["df_inside"]
365
+ df_user = abnormaled["df_user"]
366
+ # 缺失值处理
367
+ if miss["state"]:
368
+ df_user = process_miss(df_user, miss["method"])
369
+ # 标准化处理
370
+ if standard["state"]:
371
+ standarded = process_standard(df_inside, df_user, standard["method"])
372
+ df_inside = standarded["df_inside"]
373
+ df_user = standarded["df_user"]
374
+ # 用astype将数值转科学计数法
375
+ return {"inside": df_inside.astype('str').to_json(orient='records'), "user": df_user.astype('str').to_json(orient='records')}
376
+
377
+ # # 用astype将数值转科学计数法
378
+ # return df.astype('str').to_json(orient='records')
379
+
380
+ # 处理内置数据
381
+
382
+
383
+ class Process_inside(BaseModel): # 接口数据类型
384
+ data: List
385
+ abnormal: List
386
+ standard: List
387
+
388
+
389
+ @app.post("/process/inside") # 接口
390
+ async def process_inside(inside: Process_inside):
391
+ df = json2df(inside.data)
392
+ abnormal = inside.abnormal[0]
393
+ standard = inside.standard[0]
394
+ # 异常值处理
395
+ if abnormal["state"]:
396
+ df = process_abnormal(df, abnormal["detect"], abnormal["method"])
397
+ # 标准化处理:只对特征进行标准化,不包括标签(后三列)
398
+ if standard["state"]:
399
+ df = pd.concat([process_standard(df.iloc[:, :12],
400
+ standard["method"]), df.iloc[:, 12:]], axis=1)
401
+ # 用astype将数值转科学计数法
402
+ return df.astype('str').to_json(orient='records')
403
+
404
+
405
+ # 训练模型
406
+ class Train(BaseModel): # 接口数据类型
407
+ data: List
408
+ test_size: float
409
+ algorithm: int
410
+ paras: List
411
+
412
+
413
+ @app.post("/train") # 接口
414
+ async def train(train: Train):
415
+ # 解析数据
416
+ df = json2df(train.data)
417
+ test_size = train.test_size
418
+ algorithm = train.algorithm
419
+ paras = train.paras[0]
420
  x = df.iloc[:, :12]
421
+ y1 = df.loc[:, "BSR"]
422
+ y2 = df.loc[:, "SBR"]
423
+ y3 = df.loc[:, "D"]
424
+ bsr = train_model(x, y1, test_size, algorithm, paras)
425
+ sbr = train_model(x, y2, test_size, algorithm, paras)
426
  x_train, x_test, y_train, y_test = train_test_split(
427
+ x, y3, test_size=test_size, random_state=0)
428
+ d = {"y_test": np.array(y_test).tolist(), "y_pred": np.sum(
429
+ [bsr["y_pred"], sbr["y_pred"]], axis=0).tolist()}
430
+ return {"bsr": bsr, "sbr": sbr, "d": d}
431
+
432
+ # 预测连通性
433
+
434
+
435
+ class Predict(BaseModel): # 接口数据类型
436
+ data_train: List
437
+ data_predict: List
438
+ test_size: float
439
+ algorithm: int
440
+ paras: List
441
+
442
+
443
+ @app.post("/predict") # 接口
444
+ async def predict(predict: Predict):
445
+ # 解析数据
446
+ df_train = json2df(predict.data_train)
447
+ df_predict = json2df(predict.data_predict)
448
+ test_size = predict.test_size
449
+ algorithm = predict.algorithm
450
+ paras = predict.paras[0]
451
+ x = df_train.iloc[:, :12]
452
+ y1 = df_train.loc[:, "BSR"]
453
+ y2 = df_train.loc[:, "SBR"]
454
+ # 预测连通性
455
+ bsr = predict_connectivity(x, df_predict, y1, test_size, algorithm, paras)
456
+ sbr = predict_connectivity(x, df_predict, y2, test_size, algorithm, paras)
457
+ d = np.sum([bsr, sbr], axis=0).tolist()
458
+ # 合并为一个list后转dataframe再转json实现前端表格数据格式
459
+ data = []
460
+ data.append(bsr)
461
+ data.append(sbr)
462
+ data.append(d)
463
+ df_result = pd.concat([pd.DataFrame(predict.data_predict), pd.DataFrame(data=np.array(
464
+ data).T.tolist(), columns=["BSR", "SBR", "D"])], axis=1)
465
+ return df_result.to_json(orient='records')
466
+ # return pd.DataFrame(data=np.array(data).T.tolist(), columns=["BSR", "SBR", "D"]).to_json(orient='records')
467
+
468
 
469
+ # # 图片测试
 
 
 
 
470
 
471
+ # def create_img():
472
+ # plt.rcParams['figure.figsize'] = [7.50, 3.50]
473
+ # plt.rcParams['figure.autolayout'] = True
474
+ # plt.plot([1, 2])
475
+ # img_buf = io.BytesIO()
476
+ # plt.savefig(img_buf, format='png')
477
+ # plt.close()
478
+ # return img_buf
479
 
 
 
 
480
 
481
+ # @app.get('/png')
482
+ # async def get_img(background_tasks: BackgroundTasks):
483
+ # img_buf = create_img()
484
+ # # get the entire buffer content
485
+ # # because of the async, this will await the loading of all content
486
+ # bufContents: bytes = img_buf.getvalue()
487
+ # background_tasks.add_task(img_buf.close)
488
+ # headers = {'Content-Disposition': 'inline; filename="out.png"'}
489
+ # return Response(bufContents, headers=headers, media_type='image/png')