Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,7 @@ from email_validator import validate_email, EmailNotValidError
|
|
23 |
import gradio as gr
|
24 |
import hydra
|
25 |
import pandas as pd
|
26 |
-
from pandarallel import pandarallel
|
27 |
import requests
|
28 |
from requests.adapters import HTTPAdapter, Retry
|
29 |
from markdown import markdown
|
@@ -42,7 +42,7 @@ import panel as pn
|
|
42 |
from apscheduler.schedulers.background import BackgroundScheduler
|
43 |
from tinydb import TinyDB, Query
|
44 |
|
45 |
-
|
46 |
from tqdm.auto import tqdm
|
47 |
|
48 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
@@ -741,7 +741,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
|
|
741 |
orig_df['Target Family'].isna(), 'Target Family'
|
742 |
] = orig_df.loc[
|
743 |
orig_df['Target Family'].isna(), 'X2'
|
744 |
-
].
|
745 |
|
746 |
detect_family.cache_clear()
|
747 |
|
@@ -794,15 +794,15 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
|
|
794 |
subset.to_csv(predict_subset_filepath, index=False, na_rep='')
|
795 |
seen_compounds = get_seen_smiles(family, task_value)
|
796 |
|
797 |
-
if subset['X1'].iloc[0] in seen_compounds
|
798 |
scenario = "Seen Compound"
|
799 |
else:
|
800 |
scenario = "Unseen Compound"
|
801 |
|
802 |
-
filtered_df = benchmark_df[(benchmark_df['Family'] ==
|
803 |
& (benchmark_df['Scenario'] == scenario)]
|
804 |
|
805 |
-
preset = filtered_df.loc[filtered_df[score].idxmax(), '
|
806 |
preset_value = PRESET_MAP[preset]
|
807 |
|
808 |
target_family = TARGET_FAMILY_MAP[family.title()]
|
@@ -810,7 +810,8 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
|
|
810 |
config_name="webserver_inference",
|
811 |
overrides=[f"task={task_value}",
|
812 |
f"preset={preset_value}",
|
813 |
-
f"ckpt_path=
|
|
|
814 |
f"data.data_file='{str(predict_subset_filepath)}'"])
|
815 |
|
816 |
predictions, _ = predict(cfg)
|
@@ -822,14 +823,14 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
|
|
822 |
prediction_df = pd.concat([prediction_df, annotated_df], ignore_index=True)
|
823 |
|
824 |
# prediction_df['Max. Tanimoto Similarity'] = prediction_df.groupby('Target Family')['X1'].apply(
|
825 |
-
# lambda group: group.
|
826 |
# max_tanimoto_similarity,
|
827 |
# seen_smiles=tuple(get_seen_smiles(family=group.name, task=task_value))
|
828 |
# )
|
829 |
# ).values
|
830 |
#
|
831 |
# prediction_df['Max. Sequence Identity'] = prediction_df.groupby('Target Family')['X2'].apply(
|
832 |
-
# lambda group: group.
|
833 |
# max_sequence_identity,
|
834 |
# seen_fastas=tuple(get_seen_fastas(family=group.name, task=task_value))
|
835 |
# )
|
@@ -838,7 +839,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
|
|
838 |
for family in prediction_df['Target Family'].unique():
|
839 |
prediction_df.loc[
|
840 |
prediction_df['Target Family'] == family, 'Max. Tanimoto Similarity'] = prediction_df.loc[
|
841 |
-
prediction_df['Target Family'] == family, 'X1'].
|
842 |
max_tanimoto_similarity,
|
843 |
seen_smiles=tuple(get_seen_smiles(family=family, task=task_value))
|
844 |
)
|
@@ -847,7 +848,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
|
|
847 |
for family in prediction_df['Target Family'].unique():
|
848 |
prediction_df.loc[
|
849 |
prediction_df['Target Family'] == family, 'Max. Sequence Identity'] = prediction_df.loc[
|
850 |
-
prediction_df['Target Family'] == family, 'X2'].
|
851 |
max_sequence_identity,
|
852 |
seen_fastas=tuple(get_seen_fastas(family=family, task=task_value))
|
853 |
)
|
@@ -902,10 +903,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
902 |
|
903 |
if 'X1' in df.columns:
|
904 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
905 |
-
df['Compound'] = df['X1'].
|
906 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
907 |
-
df['Scaffold'] = df['Compound'].
|
908 |
-
df['Scaffold SMILES'] = df['Scaffold'].
|
909 |
|
910 |
if task == 'Compound-Protein Binding Affinity':
|
911 |
# Convert Y^ from pIC50 to IC50
|
@@ -986,13 +987,13 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
986 |
elif 'Y^' in df_html.columns:
|
987 |
job = 'Interaction Pair Inference'
|
988 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
989 |
-
df_html['Compound'] = df_html['Compound'].
|
990 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
991 |
else:
|
992 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
993 |
|
994 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
995 |
-
df_html['Scaffold'] = df_html['Scaffold'].
|
996 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
997 |
else:
|
998 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
@@ -1000,7 +1001,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1000 |
df_html.rename(columns=column_aliases, inplace=True)
|
1001 |
df_html.index.name = 'Index'
|
1002 |
if 'Target FASTA' in df_html.columns:
|
1003 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1004 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1005 |
|
1006 |
num_cols = df_html.select_dtypes('number').columns
|
@@ -1018,7 +1019,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
|
|
1018 |
if 'Target ID' in df_html.columns:
|
1019 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1020 |
if 'Target FASTA' in df_html.columns:
|
1021 |
-
df_html['Target FASTA'] = df_html['Target FASTA'].
|
1022 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1023 |
if 'Scaffold SMILES' in df_html.columns:
|
1024 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
@@ -1272,11 +1273,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
|
|
1272 |
df_report = df.copy()
|
1273 |
try:
|
1274 |
for filter_name in filter_list:
|
1275 |
-
df_report[filter_name] = df_report['Compound'].
|
1276 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1277 |
|
1278 |
for score_name in score_list:
|
1279 |
-
df_report[score_name] = df_report['Compound'].
|
1280 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1281 |
|
1282 |
# pie_chart = None
|
@@ -1918,7 +1919,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1918 |
alignment = aligner.align(processed_fasta, query)
|
1919 |
return alignment.score / max(len(processed_fasta), len(query))
|
1920 |
|
1921 |
-
alignment_df['score'] = alignment_df['X2'].
|
1922 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1923 |
family = str(row['Target Family']).title()
|
1924 |
return gr.Dropdown(value=family,
|
@@ -2239,13 +2240,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2239 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2240 |
validate_columns(infer_df, ['X1', 'X2'])
|
2241 |
|
2242 |
-
infer_df['X1_ERR'] = infer_df['X1'].
|
2243 |
validate_seq_str, regex=SMILES_PAT)
|
2244 |
if not infer_df['X1_ERR'].isna().all():
|
2245 |
raise ValueError(
|
2246 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2247 |
|
2248 |
-
infer_df['X2_ERR'] = infer_df['X2'].
|
2249 |
validate_seq_str, regex=FASTA_PAT)
|
2250 |
if not infer_df['X2_ERR'].isna().all():
|
2251 |
raise ValueError(
|
@@ -2564,7 +2565,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2564 |
|
2565 |
|
2566 |
if __name__ == "__main__":
|
2567 |
-
pandarallel.initialize()
|
2568 |
hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
|
2569 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|
2570 |
scheduler.add_job(check_expiry, 'interval', hours=1)
|
|
|
23 |
import gradio as gr
|
24 |
import hydra
|
25 |
import pandas as pd
|
26 |
+
# from pandarallel import pandarallel
|
27 |
import requests
|
28 |
from requests.adapters import HTTPAdapter, Retry
|
29 |
from markdown import markdown
|
|
|
42 |
from apscheduler.schedulers.background import BackgroundScheduler
|
43 |
from tinydb import TinyDB, Query
|
44 |
|
45 |
+
import swifter
|
46 |
from tqdm.auto import tqdm
|
47 |
|
48 |
from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
|
|
|
741 |
orig_df['Target Family'].isna(), 'Target Family'
|
742 |
] = orig_df.loc[
|
743 |
orig_df['Target Family'].isna(), 'X2'
|
744 |
+
].swifter.apply(detect_family)
|
745 |
|
746 |
detect_family.cache_clear()
|
747 |
|
|
|
794 |
subset.to_csv(predict_subset_filepath, index=False, na_rep='')
|
795 |
seen_compounds = get_seen_smiles(family, task_value)
|
796 |
|
797 |
+
if subset['X1'].iloc[0] in seen_compounds:
|
798 |
scenario = "Seen Compound"
|
799 |
else:
|
800 |
scenario = "Unseen Compound"
|
801 |
|
802 |
+
filtered_df = benchmark_df[(benchmark_df['Family'] == family.title())
|
803 |
& (benchmark_df['Scenario'] == scenario)]
|
804 |
|
805 |
+
preset = filtered_df.loc[filtered_df[score].idxmax(), 'Model']
|
806 |
preset_value = PRESET_MAP[preset]
|
807 |
|
808 |
target_family = TARGET_FAMILY_MAP[family.title()]
|
|
|
810 |
config_name="webserver_inference",
|
811 |
overrides=[f"task={task_value}",
|
812 |
f"preset={preset_value}",
|
813 |
+
f"ckpt_path=D:/checkpoints/{preset_value}-{task_value}-{target_family}.ckpt",
|
814 |
+
# f"ckpt_path=resources/checkpoints/{preset_value}-{task_value}-{target_family}.ckpt",
|
815 |
f"data.data_file='{str(predict_subset_filepath)}'"])
|
816 |
|
817 |
predictions, _ = predict(cfg)
|
|
|
823 |
prediction_df = pd.concat([prediction_df, annotated_df], ignore_index=True)
|
824 |
|
825 |
# prediction_df['Max. Tanimoto Similarity'] = prediction_df.groupby('Target Family')['X1'].apply(
|
826 |
+
# lambda group: group.swifter.apply(
|
827 |
# max_tanimoto_similarity,
|
828 |
# seen_smiles=tuple(get_seen_smiles(family=group.name, task=task_value))
|
829 |
# )
|
830 |
# ).values
|
831 |
#
|
832 |
# prediction_df['Max. Sequence Identity'] = prediction_df.groupby('Target Family')['X2'].apply(
|
833 |
+
# lambda group: group.swifter.apply(
|
834 |
# max_sequence_identity,
|
835 |
# seen_fastas=tuple(get_seen_fastas(family=group.name, task=task_value))
|
836 |
# )
|
|
|
839 |
for family in prediction_df['Target Family'].unique():
|
840 |
prediction_df.loc[
|
841 |
prediction_df['Target Family'] == family, 'Max. Tanimoto Similarity'] = prediction_df.loc[
|
842 |
+
prediction_df['Target Family'] == family, 'X1'].swifter.apply(
|
843 |
max_tanimoto_similarity,
|
844 |
seen_smiles=tuple(get_seen_smiles(family=family, task=task_value))
|
845 |
)
|
|
|
848 |
for family in prediction_df['Target Family'].unique():
|
849 |
prediction_df.loc[
|
850 |
prediction_df['Target Family'] == family, 'Max. Sequence Identity'] = prediction_df.loc[
|
851 |
+
prediction_df['Target Family'] == family, 'X2'].swifter.apply(
|
852 |
max_sequence_identity,
|
853 |
seen_fastas=tuple(get_seen_fastas(family=family, task=task_value))
|
854 |
)
|
|
|
903 |
|
904 |
if 'X1' in df.columns:
|
905 |
if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
|
906 |
+
df['Compound'] = df['X1'].swifter.apply(
|
907 |
lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
|
908 |
+
df['Scaffold'] = df['Compound'].swifter.apply(MurckoScaffold.GetScaffoldForMol)
|
909 |
+
df['Scaffold SMILES'] = df['Scaffold'].swifter.apply(lambda x: Chem.MolToSmiles(x))
|
910 |
|
911 |
if task == 'Compound-Protein Binding Affinity':
|
912 |
# Convert Y^ from pIC50 to IC50
|
|
|
987 |
elif 'Y^' in df_html.columns:
|
988 |
job = 'Interaction Pair Inference'
|
989 |
if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
|
990 |
+
df_html['Compound'] = df_html['Compound'].swifter.apply(
|
991 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
992 |
else:
|
993 |
df_html.drop(['Compound'], axis=1, inplace=True)
|
994 |
|
995 |
if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
|
996 |
+
df_html['Scaffold'] = df_html['Scaffold'].swifter.apply(
|
997 |
lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
|
998 |
else:
|
999 |
df_html.drop(['Scaffold'], axis=1, inplace=True)
|
|
|
1001 |
df_html.rename(columns=column_aliases, inplace=True)
|
1002 |
df_html.index.name = 'Index'
|
1003 |
if 'Target FASTA' in df_html.columns:
|
1004 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
1005 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1006 |
|
1007 |
num_cols = df_html.select_dtypes('number').columns
|
|
|
1019 |
if 'Target ID' in df_html.columns:
|
1020 |
df_html.drop(['Target FASTA'], axis=1, inplace=True)
|
1021 |
if 'Target FASTA' in df_html.columns:
|
1022 |
+
df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
|
1023 |
lambda x: wrap_text(x) if not pd.isna(x) else x)
|
1024 |
if 'Scaffold SMILES' in df_html.columns:
|
1025 |
df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
|
|
|
1273 |
df_report = df.copy()
|
1274 |
try:
|
1275 |
for filter_name in filter_list:
|
1276 |
+
df_report[filter_name] = df_report['Compound'].swifter.apply(
|
1277 |
lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
|
1278 |
|
1279 |
for score_name in score_list:
|
1280 |
+
df_report[score_name] = df_report['Compound'].swifter.apply(
|
1281 |
lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
|
1282 |
|
1283 |
# pie_chart = None
|
|
|
1919 |
alignment = aligner.align(processed_fasta, query)
|
1920 |
return alignment.score / max(len(processed_fasta), len(query))
|
1921 |
|
1922 |
+
alignment_df['score'] = alignment_df['X2'].swifter.apply(align_score)
|
1923 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1924 |
family = str(row['Target Family']).title()
|
1925 |
return gr.Dropdown(value=family,
|
|
|
2240 |
infer_df = pd.read_csv(drug_target_pair_upload)
|
2241 |
validate_columns(infer_df, ['X1', 'X2'])
|
2242 |
|
2243 |
+
infer_df['X1_ERR'] = infer_df['X1'].swifter.apply(
|
2244 |
validate_seq_str, regex=SMILES_PAT)
|
2245 |
if not infer_df['X1_ERR'].isna().all():
|
2246 |
raise ValueError(
|
2247 |
f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
|
2248 |
|
2249 |
+
infer_df['X2_ERR'] = infer_df['X2'].swifter.apply(
|
2250 |
validate_seq_str, regex=FASTA_PAT)
|
2251 |
if not infer_df['X2_ERR'].isna().all():
|
2252 |
raise ValueError(
|
|
|
2565 |
|
2566 |
|
2567 |
if __name__ == "__main__":
|
2568 |
+
# pandarallel.initialize()
|
2569 |
hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
|
2570 |
demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
|
2571 |
scheduler.add_job(check_expiry, 'interval', hours=1)
|