Spaces:
Sleeping
Sleeping
File size: 1,527 Bytes
f60ce93 5d525eb 0059ef7 bcf8eca bf91270 7944a63 0059ef7 9b79169 0059ef7 4d92e12 4b60c06 0059ef7 4b60c06 5c158f1 4b60c06 0059ef7 9b7ebe5 f60ce93 a044018 f60ce93 659d788 f60ce93 a044018 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import streamlit as st
from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode
import numpy as np
import pandas as pd
import sklearn
import xgboost
seed=42
data = pd.read_csv("annotations_dataset.csv")
data = data.set_index("Gene")
training_data = pd.read_csv("./selected_features_training_data.csv", header=0)
training_data.columns = [
regex.sub("_", col) if any(x in str(col) for x in set(("[", "]", "<"))) else col
for col in training_data.columns.values
]
training_data["BPlabel_encoded"] = training_data["BPlabel"].map(
{"most likely": 1, "probable": 0.75, "least likely": 0.1}
)
Y = training_data["BPlabel_encoded"]
X = training_data.drop(columns=["BPlabel_encoded","BPlabel"])
xgb = xgboost.XGBRegressor(
n_estimators=40,
learning_rate=0.2,
max_depth=4,
reg_alpha=1,
reg_lambda=1,
random_state=seed,
objective="reg:squarederror",
)
xgb.fit(X, Y)
predictions = list(xgb.predict(data))
output = pd.Series(data=predictions, index=data.index, name="XGB_Score")
df_total = pd.concat([data, output], axis=1)
df_total['XGB_Score'] = round(df_total['XGB_Score'], 2)
st.title('Blood Pressure Gene Prioritisation Post-GWAS')
st.markdown("""
A machine learning pipeline for predicting disease-causing genes post-genome-wide association study in blood pressure.
""")
gene_input = st.text_input('Input HGNC Gene')
df = df_total[df_total.index == gene_input]
st.dataframe(df)
st.markdown("""
Total Gene Prioritisation Results:
""")
st.dataframe(df_total)
|