libokj commited on
Commit
0ab9582
·
verified ·
1 Parent(s): 34a8526

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -758,6 +758,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
758
  task_file_abbr = {'Compound-Protein Interaction': 'CPI', 'Compound-Protein Binding Affinity': 'CPA'}
759
  predictions_file = None
760
  df_training = pd.read_csv(f'data/complete_{TASK_MAP[task].lower()}_dataset.csv')
 
761
  orig_df = pd.read_csv(predict_filepath)
762
  alignment_df = get_fasta_family_map()
763
  prediction_df = pd.DataFrame()
@@ -791,10 +792,16 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
791
  orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
792
  detect_family.cache_clear()
793
 
794
- orig_df = orig_df.merge(df_training[['X1', 'X2', 'Y']], on=['X1', 'X2'], how='left', indicator=False)
 
 
795
  annotated_df = orig_df[~orig_df['Y'].isna()].copy()
796
  annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
797
  annotated_df['Source'] = 'Database'
 
 
 
 
798
  # Save the unannotated data
799
  unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y'], axis=1)
800
  if not unannotated_df.empty:
@@ -804,7 +811,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
804
  status = "COMPLETED"
805
  return {run_state: False}
806
 
807
- columns_to_drop = ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']
808
  columns_to_drop = [col for col in columns_to_drop if col in orig_df.columns]
809
  orig_df.drop(columns_to_drop, axis=1, inplace=True)
810
 
@@ -842,7 +849,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
842
  subset.to_csv(predict_subset_filepath, index=False, na_rep='')
843
 
844
  seen_compounds = get_seen_smiles(family, task_value)['X1'].values
845
- if subset['X1'].iloc[0] in seen_compounds:
846
  scenario = "Seen Compound"
847
  else:
848
  scenario = "Unseen Compound"
@@ -852,7 +859,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
852
  & (benchmark_df['Type'] == 'Family')]
853
 
854
  seen_compounds = get_seen_smiles('General', task_value)['X1'].values
855
- if subset['X1'].iloc[0] in seen_compounds:
856
  scenario = "Seen Compound"
857
  else:
858
  scenario = "Unseen Compound"
@@ -1638,7 +1645,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1638
  )
1639
  target_identify_target_family = gr.Dropdown(
1640
  choices=['Family-Specific Auto-Recommendation'] + list(TARGET_FAMILY_MAP.keys()),
1641
- value='General',
1642
  label='Step 2. Select Target Family')
1643
  with gr.Column():
1644
  HelpTip(
@@ -1660,7 +1667,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1660
  "Please refer to the documentation for detailed benchmark results."
1661
  )
1662
  target_identify_preset = gr.Dropdown(
1663
- ['Family-Specific Auto-Recommendation'] + list(PRESET_MAP.keys()),
 
1664
  label='Step 4. Select a Preset Model')
1665
  identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1666
  variant='primary')
 
758
  task_file_abbr = {'Compound-Protein Interaction': 'CPI', 'Compound-Protein Binding Affinity': 'CPA'}
759
  predictions_file = None
760
  df_training = pd.read_csv(f'data/complete_{TASK_MAP[task].lower()}_dataset.csv')
761
+ df_training['X1^'] = df_training['X1']
762
  orig_df = pd.read_csv(predict_filepath)
763
  alignment_df = get_fasta_family_map()
764
  prediction_df = pd.DataFrame()
 
792
  orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
793
  detect_family.cache_clear()
794
 
795
+ orig_df['X1^'] = orig_df['X1'].parallel_apply(rdkit_canonicalize)
796
+
797
+ orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
798
  annotated_df = orig_df[~orig_df['Y'].isna()].copy()
799
  annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
800
  annotated_df['Source'] = 'Database'
801
+ columns_to_drop = ['X1^', 'Compound', 'Scaffold', 'Scaffold SMILES']
802
+ columns_to_drop = [col for col in columns_to_drop if col in annotated_df.columns]
803
+ annotated_df.drop(columns_to_drop, axis=1, inplace=True)
804
+
805
  # Save the unannotated data
806
  unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y'], axis=1)
807
  if not unannotated_df.empty:
 
811
  status = "COMPLETED"
812
  return {run_state: False}
813
 
814
+ columns_to_drop = ['ID1', 'X1^', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']
815
  columns_to_drop = [col for col in columns_to_drop if col in orig_df.columns]
816
  orig_df.drop(columns_to_drop, axis=1, inplace=True)
817
 
 
849
  subset.to_csv(predict_subset_filepath, index=False, na_rep='')
850
 
851
  seen_compounds = get_seen_smiles(family, task_value)['X1'].values
852
+ if subset['X1^'].iloc[0] in seen_compounds:
853
  scenario = "Seen Compound"
854
  else:
855
  scenario = "Unseen Compound"
 
859
  & (benchmark_df['Type'] == 'Family')]
860
 
861
  seen_compounds = get_seen_smiles('General', task_value)['X1'].values
862
+ if subset['X1^'].iloc[0] in seen_compounds:
863
  scenario = "Seen Compound"
864
  else:
865
  scenario = "Unseen Compound"
 
1645
  )
1646
  target_identify_target_family = gr.Dropdown(
1647
  choices=['Family-Specific Auto-Recommendation'] + list(TARGET_FAMILY_MAP.keys()),
1648
+ value='Family-Specific Auto-Recommendation',
1649
  label='Step 2. Select Target Family')
1650
  with gr.Column():
1651
  HelpTip(
 
1667
  "Please refer to the documentation for detailed benchmark results."
1668
  )
1669
  target_identify_preset = gr.Dropdown(
1670
+ choices=['Family-Specific Auto-Recommendation'] + list(PRESET_MAP.keys()),
1671
+ value='Family-Specific Auto-Recommendation',
1672
  label='Step 4. Select a Preset Model')
1673
  identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1674
  variant='primary')