|
from typing import List |
|
from pydantic import BaseModel |
|
from fastapi import FastAPI, Response, BackgroundTasks |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from sklearn.metrics import mean_absolute_error |
|
from sklearn.metrics import mean_squared_error |
|
from sklearn.metrics import r2_score |
|
from sklearn.model_selection import train_test_split |
|
import sklearn.preprocessing as preproc |
|
from sklearn.preprocessing import StandardScaler |
|
import lightgbm as lgb |
|
from xgboost import XGBRegressor |
|
from sklearn.neural_network import MLPRegressor |
|
from sklearn.ensemble import RandomForestRegressor |
|
from sklearn.linear_model import LinearRegression |
|
import matplotlib.pyplot as plt |
|
import io |
|
import json |
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib |
|
matplotlib.use('AGG') |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
origins = [ |
|
"http://127.0.0.1:5500", |
|
"http://localhost:8081", |
|
"http://mlca.coycs.com", |
|
"https://mlca.coycs.com" |
|
] |
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=origins, |
|
allow_credentials=True, |
|
allow_methods=["POST", "GET"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
|
|
|
|
def json2df(json): |
|
|
|
def str2num(x): |
|
if isinstance(x, str): |
|
return eval(x) |
|
else: |
|
return x |
|
df = pd.DataFrame(json) |
|
|
|
df.replace(to_replace=r"^\s*$", value="None", regex=True, inplace=True) |
|
|
|
df = df.applymap(str2num) |
|
return df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_miss(df, method): |
|
|
|
df = df.dropna(how='all') |
|
|
|
df = df.dropna(axis=1, how='all') |
|
if method == 1: |
|
df = df.fillna(df.mean()) |
|
elif method == 2: |
|
df = df.fillna(df.median()) |
|
elif method == 3: |
|
df = df.fillna(df.mode().iloc[0]) |
|
elif method == 4: |
|
df = df.fillna(df.interpolate( |
|
method='linear', limit_direction='forward', axis=0)) |
|
elif method == 5: |
|
df = df.fillna(method="ffill") |
|
elif method == 6: |
|
df = df.fillna(method="bfill") |
|
return df |
|
|
|
|
|
def process_abnormal(df_inside, df_user, detect, method): |
|
df = pd.concat([df_inside, df_user], axis=0, |
|
ignore_index=True) |
|
df_features = df.iloc[:, :12] |
|
|
|
|
|
if detect == 1: |
|
for coloum in df_features.columns: |
|
q1 = df_features[coloum].quantile(0.75) |
|
q3 = df_features[coloum].quantile(0.25) |
|
iqr = q1-q3 |
|
if method == 1: |
|
df_features.drop( |
|
df_features.loc[lambda x:x[coloum] > q1 + 1.5 * iqr].index, inplace=True) |
|
df_features.drop( |
|
df_features.loc[lambda x:x[coloum] < q3 - 1.5 * iqr].index, inplace=True) |
|
elif method == 2: |
|
df_features.loc[lambda x:x[coloum] > q1 + 1.5 * |
|
iqr, coloum]=df_features[coloum].mean() |
|
df_features.loc[lambda x:x[coloum] < q3 - 1.5 * |
|
iqr, coloum]=df_features[coloum].mean() |
|
elif method == 3: |
|
df_features.loc[lambda x:x[coloum] > q1 + 1.5 * |
|
iqr, coloum]=df_features[coloum].median() |
|
df_features.loc[lambda x:x[coloum] < q3 - 1.5 * |
|
iqr, coloum]=df_features[coloum].median() |
|
elif method == 4: |
|
df_features.loc[lambda x:x[coloum] > q1 + 1.5 * |
|
iqr, coloum]=df_features[coloum].mode().iloc[0] |
|
df_features.loc[lambda x:x[coloum] < q3 - 1.5 * |
|
iqr, coloum]=df_features[coloum].mode().iloc[0] |
|
elif method == 5: |
|
df_features.loc[lambda x:x[coloum] > q1 + |
|
1.5 * iqr, coloum]=q1 + 1.5 * iqr |
|
df_features.loc[lambda x:x[coloum] < q3 - |
|
1.5 * iqr, coloum]=q3 - 1.5 * iqr |
|
elif detect == 2: |
|
for coloum in df_features.columns: |
|
mean = df_features[coloum].mean() |
|
std = df_features[coloum].std() |
|
df_features.drop( |
|
df_features.loc[lambda x:x[coloum] > mean + 3 * std].index, inplace=True) |
|
df_features.drop( |
|
df_features.loc[lambda x:x[coloum] < mean - 3 * std].index, inplace=True) |
|
if method == 1: |
|
df_features.drop( |
|
df_features.loc[lambda x:x[coloum] > mean + 3 * std].index, inplace=True) |
|
df_features.drop( |
|
df_features.loc[lambda x:x[coloum] < mean - 3 * std].index, inplace=True) |
|
elif method == 2: |
|
df_features.loc[lambda x:x[coloum] > mean + |
|
3 * std, coloum]=df_features[coloum].mean() |
|
df_features.loc[lambda x:x[coloum] < mean - |
|
3 * std, coloum]=df_features[coloum].mean() |
|
elif method == 3: |
|
df_features.loc[lambda x:x[coloum] > mean + 3 * |
|
std, coloum]=df_features[coloum].median() |
|
df_features.loc[lambda x:x[coloum] < mean - 3 * |
|
std, coloum]=df_features[coloum].median() |
|
elif method == 4: |
|
df_features.loc[lambda x:x[coloum] > mean + 3 * |
|
std, coloum]=df_features[coloum].mode().iloc[0] |
|
df_features.loc[lambda x:x[coloum] < mean - 3 * |
|
std, coloum]=df_features[coloum].mode().iloc[0] |
|
elif method == 5: |
|
df_features.loc[lambda x:x[coloum] > mean + |
|
3 * std, coloum]=mean + 3 * std |
|
df_features.loc[lambda x:x[coloum] < mean - |
|
3 * std, coloum]=mean - 3 * std |
|
|
|
df.iloc[:, :12] = df_features |
|
df_inside = df.iloc[:df_inside.shape[0], :] |
|
df_user = df.iloc[df_inside.shape[0]:, :12] |
|
return {"df_inside": df_inside, "df_user": df_user} |
|
|
|
|
|
def process_standard(df_inside, df_user, method): |
|
df = pd.concat([df_inside, df_user], axis=0, |
|
ignore_index=True) |
|
df_features = df.iloc[:, :12] |
|
columns = df_features.columns |
|
|
|
if method == 1: |
|
df_features = preproc.minmax_scale(df_features) |
|
elif method == 2: |
|
df_features = preproc.StandardScaler().fit_transform(df_features) |
|
elif method == 3: |
|
df_features = preproc.maxabs_scale(df_features, axis=0) |
|
elif method == 4: |
|
df_features = preproc.RobustScaler().fit_transform(df_features) |
|
elif method == 5: |
|
df_features = preproc.normalize(df_features, axis=0) |
|
df_features = pd.DataFrame( |
|
data=df_features[0:, 0:], columns=columns) |
|
|
|
df.iloc[:, :12] = df_features |
|
df_inside = df.iloc[:df_inside.shape[0], :] |
|
df_user = df.iloc[df_inside.shape[0]:, :12] |
|
return {"df_inside": df_inside, "df_user": df_user} |
|
|
|
|
|
def train_model(x, y, test_size, algorithm, paras): |
|
|
|
x_train, x_test, y_train, y_test = train_test_split( |
|
x, y, test_size=test_size, random_state=0) |
|
|
|
model = None |
|
results = {} |
|
if algorithm == 1: |
|
model = LinearRegression(fit_intercept=paras["fit_intercept"]) |
|
if algorithm == 2: |
|
model = RandomForestRegressor(n_estimators=paras["n_estimators"], |
|
criterion=paras["criterion"], max_depth=paras["max_depth"], random_state=0) |
|
if algorithm == 3: |
|
model = MLPRegressor(hidden_layer_sizes=(paras["hidden_layer_sizes_1"], paras["hidden_layer_sizes_2"]), |
|
activation=paras["activation"], solver='lbfgs', random_state=paras["random_state"]) |
|
if algorithm == 4: |
|
model = XGBRegressor( |
|
max_depth=paras["max_depth"], learning_rate=paras["learning_rate"], n_estimators=paras["n_estimators"]) |
|
if algorithm == 5: |
|
model = lgb.LGBMRegressor(objective='regression', max_depth=paras["max_depth"], |
|
learning_rate=paras["learning_rate"], random_state=paras["random_state"], n_estimators=paras["n_estimators"]) |
|
|
|
|
|
if model != None: |
|
model.fit(x_train, y_train) |
|
if algorithm == 1: |
|
|
|
|
|
results["coef"] = [float('{:.4f}'.format(i)) |
|
for i in model.coef_.tolist()] |
|
results["intercept"] = round(model.intercept_, 3) |
|
y_pred = model.predict(x_test) |
|
|
|
|
|
mae = round(mean_absolute_error(y_test, y_pred), 3) |
|
rmse = round(np.sqrt(mean_squared_error(y_test, y_pred)), 3) |
|
r2 = round(r2_score(y_test, y_pred), 3) |
|
|
|
|
|
y_test = np.array(y_test).tolist() |
|
y_pred = y_pred.tolist() |
|
return {"y_test": y_test, "y_pred": y_pred, "error": {"MAE": mae, "RMSE": rmse, "R2": r2}, "results": results} |
|
else: |
|
return "模型训练出错" |
|
|
|
|
|
def predict_connectivity(x, x1, y, test_size, algorithm, paras): |
|
|
|
x_train, x_test, y_train, y_test = train_test_split( |
|
x, y, test_size=test_size, random_state=0) |
|
|
|
model = None |
|
results = {} |
|
if algorithm == 1: |
|
model = LinearRegression(fit_intercept=paras["fit_intercept"]) |
|
if algorithm == 2: |
|
model = RandomForestRegressor(n_estimators=paras["n_estimators"], |
|
criterion=paras["criterion"], max_depth=paras["max_depth"], random_state=0) |
|
if algorithm == 3: |
|
model = MLPRegressor(hidden_layer_sizes=(paras["hidden_layer_sizes_1"], paras["hidden_layer_sizes_2"]), |
|
activation=paras["activation"], solver='lbfgs', random_state=paras["random_state"]) |
|
if algorithm == 4: |
|
model = XGBRegressor( |
|
max_depth=paras["max_depth"], learning_rate=paras["learning_rate"], n_estimators=paras["n_estimators"]) |
|
if algorithm == 5: |
|
model = lgb.LGBMRegressor(objective='regression', max_depth=paras["max_depth"], |
|
learning_rate=paras["learning_rate"], random_state=paras["random_state"], n_estimators=paras["n_estimators"]) |
|
|
|
if model != None: |
|
model.fit(x_train, y_train) |
|
y_pred = model.predict(x1).tolist() |
|
return y_pred |
|
else: |
|
return "预测连通性出错" |
|
|
|
|
|
|
|
|
|
class Login(BaseModel): |
|
username: str |
|
password: str |
|
|
|
|
|
@app.post("/login") |
|
async def login(login: Login): |
|
username = login.username |
|
password = login.password |
|
if username == "admin" and password == "123456": |
|
return True |
|
return False |
|
|
|
|
|
|
|
|
|
class Process_user(BaseModel): |
|
mode: int |
|
data: List |
|
miss: List |
|
abnormal: List |
|
standard: List |
|
|
|
|
|
@app.post("/process/user") |
|
async def process_user(user: Process_user): |
|
mode = user.mode |
|
df_inside = pd.read_csv( |
|
"./mode_{}.csv".format(mode)).dropna(axis=0) |
|
df_user = json2df(user.data) |
|
abnormal = user.abnormal[0] |
|
miss = user.miss[0] |
|
standard = user.standard[0] |
|
|
|
if abnormal["state"]: |
|
abnormaled = process_abnormal( |
|
df_inside, df_user, abnormal["detect"], abnormal["method"]) |
|
df_inside = abnormaled["df_inside"] |
|
df_user = abnormaled["df_user"] |
|
|
|
if miss["state"]: |
|
df_user = process_miss(df_user, miss["method"]) |
|
|
|
if standard["state"]: |
|
standarded = process_standard(df_inside, df_user, standard["method"]) |
|
df_inside = standarded["df_inside"] |
|
df_user = standarded["df_user"] |
|
|
|
return {"inside": df_inside.astype('str').to_json(orient='records'), "user": df_user.astype('str').to_json(orient='records')} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Process_inside(BaseModel): |
|
data: List |
|
abnormal: List |
|
standard: List |
|
|
|
|
|
@app.post("/process/inside") |
|
async def process_inside(inside: Process_inside): |
|
df = json2df(inside.data) |
|
abnormal = inside.abnormal[0] |
|
standard = inside.standard[0] |
|
|
|
if abnormal["state"]: |
|
df = process_abnormal(df, abnormal["detect"], abnormal["method"]) |
|
|
|
if standard["state"]: |
|
df = pd.concat([process_standard(df.iloc[:, :12], |
|
standard["method"]), df.iloc[:, 12:]], axis=1) |
|
|
|
return df.astype('str').to_json(orient='records') |
|
|
|
|
|
|
|
class Train(BaseModel): |
|
data: List |
|
test_size: float |
|
algorithm: int |
|
paras: List |
|
|
|
|
|
@app.post("/train") |
|
async def train(train: Train): |
|
|
|
df = json2df(train.data) |
|
test_size = train.test_size |
|
algorithm = train.algorithm |
|
paras = train.paras[0] |
|
x = df.iloc[:, :12] |
|
y1 = df.loc[:, "BSR"] |
|
y2 = df.loc[:, "SBR"] |
|
y3 = df.loc[:, "D"] |
|
bsr = train_model(x, y1, test_size, algorithm, paras) |
|
sbr = train_model(x, y2, test_size, algorithm, paras) |
|
x_train, x_test, y_train, y_test = train_test_split( |
|
x, y3, test_size=test_size, random_state=0) |
|
d = {"y_test": np.array(y_test).tolist(), "y_pred": np.sum( |
|
[bsr["y_pred"], sbr["y_pred"]], axis=0).tolist()} |
|
return {"bsr": bsr, "sbr": sbr, "d": d} |
|
|
|
|
|
|
|
|
|
class Predict(BaseModel): |
|
data_train: List |
|
data_predict: List |
|
test_size: float |
|
algorithm: int |
|
paras: List |
|
|
|
|
|
@app.post("/predict") |
|
async def predict(predict: Predict): |
|
|
|
df_train = json2df(predict.data_train) |
|
df_predict = json2df(predict.data_predict) |
|
test_size = predict.test_size |
|
algorithm = predict.algorithm |
|
paras = predict.paras[0] |
|
x = df_train.iloc[:, :12] |
|
y1 = df_train.loc[:, "BSR"] |
|
y2 = df_train.loc[:, "SBR"] |
|
|
|
bsr = predict_connectivity(x, df_predict, y1, test_size, algorithm, paras) |
|
sbr = predict_connectivity(x, df_predict, y2, test_size, algorithm, paras) |
|
d = np.sum([bsr, sbr], axis=0).tolist() |
|
|
|
data = [] |
|
data.append(bsr) |
|
data.append(sbr) |
|
data.append(d) |
|
df_result = pd.concat([pd.DataFrame(predict.data_predict), pd.DataFrame(data=np.array( |
|
data).T.tolist(), columns=["BSR", "SBR", "D"])], axis=1) |
|
return df_result.to_json(orient='records') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|