libokj commited on
Commit
efa49ac
·
verified ·
1 Parent(s): cfe2359

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -20
app.py CHANGED
@@ -23,7 +23,7 @@ from email_validator import validate_email, EmailNotValidError
23
  import gradio as gr
24
  import hydra
25
  import pandas as pd
26
- # from pandarallel import pandarallel
27
  import requests
28
  from requests.adapters import HTTPAdapter, Retry
29
  from markdown import markdown
@@ -42,7 +42,7 @@ import panel as pn
42
  from apscheduler.schedulers.background import BackgroundScheduler
43
  from tinydb import TinyDB, Query
44
 
45
- import swifter
46
  from tqdm.auto import tqdm
47
 
48
  from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
@@ -741,7 +741,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
741
  orig_df['Target Family'].isna(), 'Target Family'
742
  ] = orig_df.loc[
743
  orig_df['Target Family'].isna(), 'X2'
744
- ].swifter.apply(detect_family)
745
 
746
  detect_family.cache_clear()
747
 
@@ -810,6 +810,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
810
  config_name="webserver_inference",
811
  overrides=[f"task={task_value}",
812
  f"preset={preset_value}",
 
813
  f"ckpt_path=resources/checkpoints/{preset_value}-{task_value}-{target_family}.ckpt",
814
  f"data.data_file='{str(predict_subset_filepath)}'"])
815
 
@@ -822,14 +823,14 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
822
  prediction_df = pd.concat([prediction_df, annotated_df], ignore_index=True)
823
 
824
  # prediction_df['Max. Tanimoto Similarity'] = prediction_df.groupby('Target Family')['X1'].apply(
825
- # lambda group: group.swifter.apply(
826
  # max_tanimoto_similarity,
827
  # seen_smiles=tuple(get_seen_smiles(family=group.name, task=task_value))
828
  # )
829
  # ).values
830
  #
831
  # prediction_df['Max. Sequence Identity'] = prediction_df.groupby('Target Family')['X2'].apply(
832
- # lambda group: group.swifter.apply(
833
  # max_sequence_identity,
834
  # seen_fastas=tuple(get_seen_fastas(family=group.name, task=task_value))
835
  # )
@@ -838,7 +839,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
838
  for family in prediction_df['Target Family'].unique():
839
  prediction_df.loc[
840
  prediction_df['Target Family'] == family, 'Max. Tanimoto Similarity'] = prediction_df.loc[
841
- prediction_df['Target Family'] == family, 'X1'].swifter.apply(
842
  max_tanimoto_similarity,
843
  seen_smiles=tuple(get_seen_smiles(family=family, task=task_value))
844
  )
@@ -847,7 +848,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, state):
847
  for family in prediction_df['Target Family'].unique():
848
  prediction_df.loc[
849
  prediction_df['Target Family'] == family, 'Max. Sequence Identity'] = prediction_df.loc[
850
- prediction_df['Target Family'] == family, 'X2'].swifter.apply(
851
  max_sequence_identity,
852
  seen_fastas=tuple(get_seen_fastas(family=family, task=task_value))
853
  )
@@ -902,10 +903,10 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
902
 
903
  if 'X1' in df.columns:
904
  if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
905
- df['Compound'] = df['X1'].swifter.apply(
906
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
907
- df['Scaffold'] = df['Compound'].swifter.apply(MurckoScaffold.GetScaffoldForMol)
908
- df['Scaffold SMILES'] = df['Scaffold'].swifter.apply(lambda x: Chem.MolToSmiles(x))
909
 
910
  if task == 'Compound-Protein Binding Affinity':
911
  # Convert Y^ from pIC50 to IC50
@@ -986,13 +987,13 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
986
  elif 'Y^' in df_html.columns:
987
  job = 'Interaction Pair Inference'
988
  if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
989
- df_html['Compound'] = df_html['Compound'].swifter.apply(
990
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
991
  else:
992
  df_html.drop(['Compound'], axis=1, inplace=True)
993
 
994
  if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
995
- df_html['Scaffold'] = df_html['Scaffold'].swifter.apply(
996
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
997
  else:
998
  df_html.drop(['Scaffold'], axis=1, inplace=True)
@@ -1000,7 +1001,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1000
  df_html.rename(columns=column_aliases, inplace=True)
1001
  df_html.index.name = 'Index'
1002
  if 'Target FASTA' in df_html.columns:
1003
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
1004
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1005
 
1006
  num_cols = df_html.select_dtypes('number').columns
@@ -1018,7 +1019,7 @@ def create_html_report(df, file=None, task=None, opts=(), progress=gr.Progress(t
1018
  if 'Target ID' in df_html.columns:
1019
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
1020
  if 'Target FASTA' in df_html.columns:
1021
- df_html['Target FASTA'] = df_html['Target FASTA'].swifter.apply(
1022
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1023
  if 'Scaffold SMILES' in df_html.columns:
1024
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
@@ -1272,11 +1273,11 @@ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_
1272
  df_report = df.copy()
1273
  try:
1274
  for filter_name in filter_list:
1275
- df_report[filter_name] = df_report['Compound'].swifter.apply(
1276
  lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
1277
 
1278
  for score_name in score_list:
1279
- df_report[score_name] = df_report['Compound'].swifter.apply(
1280
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1281
 
1282
  # pie_chart = None
@@ -1918,7 +1919,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1918
  alignment = aligner.align(processed_fasta, query)
1919
  return alignment.score / max(len(processed_fasta), len(query))
1920
 
1921
- alignment_df['score'] = alignment_df['X2'].swifter.apply(align_score)
1922
  row = alignment_df.loc[alignment_df['score'].idxmax()]
1923
  family = str(row['Target Family']).title()
1924
  return gr.Dropdown(value=family,
@@ -2239,13 +2240,13 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2239
  infer_df = pd.read_csv(drug_target_pair_upload)
2240
  validate_columns(infer_df, ['X1', 'X2'])
2241
 
2242
- infer_df['X1_ERR'] = infer_df['X1'].swifter.apply(
2243
  validate_seq_str, regex=SMILES_PAT)
2244
  if not infer_df['X1_ERR'].isna().all():
2245
  raise ValueError(
2246
  f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
2247
 
2248
- infer_df['X2_ERR'] = infer_df['X2'].swifter.apply(
2249
  validate_seq_str, regex=FASTA_PAT)
2250
  if not infer_df['X2_ERR'].isna().all():
2251
  raise ValueError(
@@ -2564,7 +2565,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2564
 
2565
 
2566
  if __name__ == "__main__":
2567
- # pandarallel.initialize()
2568
  hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
2569
  demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
2570
  scheduler.add_job(check_expiry, 'interval', hours=1)
 
23
  import gradio as gr
24
  import hydra
25
  import pandas as pd
26
+ from pandarallel import pandarallel
27
  import requests
28
  from requests.adapters import HTTPAdapter, Retry
29
  from markdown import markdown
 
42
  from apscheduler.schedulers.background import BackgroundScheduler
43
  from tinydb import TinyDB, Query
44
 
45
+ # import swifter
46
  from tqdm.auto import tqdm
47
 
48
  from deepscreen.data.dti import validate_seq_str, rdkit_canonicalize, FASTA_PAT, SMILES_PAT
 
741
  orig_df['Target Family'].isna(), 'Target Family'
742
  ] = orig_df.loc[
743
  orig_df['Target Family'].isna(), 'X2'
744
+ ].parallel_apply(detect_family)
745
 
746
  detect_family.cache_clear()
747
 
 
810
  config_name="webserver_inference",
811
  overrides=[f"task={task_value}",
812
  f"preset={preset_value}",
813
+ # f"ckpt_path=D:/checkpoints/{preset_value}-{task_value}-{target_family}.ckpt",
814
  f"ckpt_path=resources/checkpoints/{preset_value}-{task_value}-{target_family}.ckpt",
815
  f"data.data_file='{str(predict_subset_filepath)}'"])
816
 
 
823
  prediction_df = pd.concat([prediction_df, annotated_df], ignore_index=True)
824
 
825
  # prediction_df['Max. Tanimoto Similarity'] = prediction_df.groupby('Target Family')['X1'].apply(
826
+ # lambda group: group.parallel_apply(
827
  # max_tanimoto_similarity,
828
  # seen_smiles=tuple(get_seen_smiles(family=group.name, task=task_value))
829
  # )
830
  # ).values
831
  #
832
  # prediction_df['Max. Sequence Identity'] = prediction_df.groupby('Target Family')['X2'].apply(
833
+ # lambda group: group.parallel_apply(
834
  # max_sequence_identity,
835
  # seen_fastas=tuple(get_seen_fastas(family=group.name, task=task_value))
836
  # )
 
839
  for family in prediction_df['Target Family'].unique():
840
  prediction_df.loc[
841
  prediction_df['Target Family'] == family, 'Max. Tanimoto Similarity'] = prediction_df.loc[
842
+ prediction_df['Target Family'] == family, 'X1'].parallel_apply(
843
  max_tanimoto_similarity,
844
  seen_smiles=tuple(get_seen_smiles(family=family, task=task_value))
845
  )
 
848
  for family in prediction_df['Target Family'].unique():
849
  prediction_df.loc[
850
  prediction_df['Target Family'] == family, 'Max. Sequence Identity'] = prediction_df.loc[
851
+ prediction_df['Target Family'] == family, 'X2'].parallel_apply(
852
  max_sequence_identity,
853
  seen_fastas=tuple(get_seen_fastas(family=family, task=task_value))
854
  )
 
903
 
904
  if 'X1' in df.columns:
905
  if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
906
+ df['Compound'] = df['X1'].parallel_apply(
907
  lambda smiles: PandasTools._MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
908
+ df['Scaffold'] = df['Compound'].parallel_apply(MurckoScaffold.GetScaffoldForMol)
909
+ df['Scaffold SMILES'] = df['Scaffold'].parallel_apply(lambda x: Chem.MolToSmiles(x))
910
 
911
  if task == 'Compound-Protein Binding Affinity':
912
  # Convert Y^ from pIC50 to IC50
 
987
  elif 'Y^' in df_html.columns:
988
  job = 'Interaction Pair Inference'
989
  if 'Compound' in df_html.columns and 'Exclude Molecular Graph' not in opts:
990
+ df_html['Compound'] = df_html['Compound'].parallel_apply(
991
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
992
  else:
993
  df_html.drop(['Compound'], axis=1, inplace=True)
994
 
995
  if 'Scaffold' in df_html.columns and 'Exclude Scaffold Graph' not in opts:
996
+ df_html['Scaffold'] = df_html['Scaffold'].parallel_apply(
997
  lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
998
  else:
999
  df_html.drop(['Scaffold'], axis=1, inplace=True)
 
1001
  df_html.rename(columns=column_aliases, inplace=True)
1002
  df_html.index.name = 'Index'
1003
  if 'Target FASTA' in df_html.columns:
1004
+ df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
1005
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1006
 
1007
  num_cols = df_html.select_dtypes('number').columns
 
1019
  if 'Target ID' in df_html.columns:
1020
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
1021
  if 'Target FASTA' in df_html.columns:
1022
+ df_html['Target FASTA'] = df_html['Target FASTA'].parallel_apply(
1023
  lambda x: wrap_text(x) if not pd.isna(x) else x)
1024
  if 'Scaffold SMILES' in df_html.columns:
1025
  df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
 
1273
  df_report = df.copy()
1274
  try:
1275
  for filter_name in filter_list:
1276
+ df_report[filter_name] = df_report['Compound'].parallel_apply(
1277
  lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
1278
 
1279
  for score_name in score_list:
1280
+ df_report[score_name] = df_report['Compound'].parallel_apply(
1281
  lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
1282
 
1283
  # pie_chart = None
 
1919
  alignment = aligner.align(processed_fasta, query)
1920
  return alignment.score / max(len(processed_fasta), len(query))
1921
 
1922
+ alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
1923
  row = alignment_df.loc[alignment_df['score'].idxmax()]
1924
  family = str(row['Target Family']).title()
1925
  return gr.Dropdown(value=family,
 
2240
  infer_df = pd.read_csv(drug_target_pair_upload)
2241
  validate_columns(infer_df, ['X1', 'X2'])
2242
 
2243
+ infer_df['X1_ERR'] = infer_df['X1'].parallel_apply(
2244
  validate_seq_str, regex=SMILES_PAT)
2245
  if not infer_df['X1_ERR'].isna().all():
2246
  raise ValueError(
2247
  f"Encountered invalid SMILES:\n{infer_df[~infer_df['X1_ERR'].isna()][['X1', 'X1_ERR']]}")
2248
 
2249
+ infer_df['X2_ERR'] = infer_df['X2'].parallel_apply(
2250
  validate_seq_str, regex=FASTA_PAT)
2251
  if not infer_df['X2_ERR'].isna().all():
2252
  raise ValueError(
 
2565
 
2566
 
2567
  if __name__ == "__main__":
2568
+ pandarallel.initialize()
2569
  hydra.initialize(version_base="1.3", config_path="configs", job_name="webserver_inference")
2570
  demo.queue(default_concurrency_limit=None, max_size=10).launch(show_api=False)
2571
  scheduler.add_job(check_expiry, 'interval', hours=1)