DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on Apr 25, 2024

Commit

0ab9582

verified ·

1 Parent(s): 34a8526

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -6

app.py CHANGED Viewed

@@ -758,6 +758,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
     task_file_abbr = {'Compound-Protein Interaction': 'CPI', 'Compound-Protein Binding Affinity': 'CPA'}
     predictions_file = None
     df_training = pd.read_csv(f'data/complete_{TASK_MAP[task].lower()}_dataset.csv')
     orig_df = pd.read_csv(predict_filepath)
     alignment_df = get_fasta_family_map()
     prediction_df = pd.DataFrame()
@@ -791,10 +792,16 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
     orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
     detect_family.cache_clear()
-    orig_df = orig_df.merge(df_training[['X1', 'X2', 'Y']], on=['X1', 'X2'], how='left', indicator=False)
     annotated_df = orig_df[~orig_df['Y'].isna()].copy()
     annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
     annotated_df['Source'] = 'Database'
     # Save the unannotated data
     unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y'], axis=1)
     if not unannotated_df.empty:
@@ -804,7 +811,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
         status = "COMPLETED"
         return {run_state: False}
-    columns_to_drop = ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']
     columns_to_drop = [col for col in columns_to_drop if col in orig_df.columns]
     orig_df.drop(columns_to_drop, axis=1, inplace=True)
@@ -842,7 +849,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
                 subset.to_csv(predict_subset_filepath, index=False, na_rep='')
                 seen_compounds = get_seen_smiles(family, task_value)['X1'].values
-                if subset['X1'].iloc[0] in seen_compounds:
                     scenario = "Seen Compound"
                 else:
                     scenario = "Unseen Compound"
@@ -852,7 +859,7 @@ def submit_predict(predict_filepath, task, preset, target_family, opts, job_info
                                            & (benchmark_df['Type'] == 'Family')]
                 seen_compounds = get_seen_smiles('General', task_value)['X1'].values
-                if subset['X1'].iloc[0] in seen_compounds:
                     scenario = "Seen Compound"
                 else:
                     scenario = "Unseen Compound"
@@ -1638,7 +1645,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                         )
                         target_identify_target_family = gr.Dropdown(
                             choices=['Family-Specific Auto-Recommendation'] + list(TARGET_FAMILY_MAP.keys()),
-                            value='General',
                             label='Step 2. Select Target Family')
                     with gr.Column():
                         HelpTip(
@@ -1660,7 +1667,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                             "Please refer to the documentation for detailed benchmark results."
                         )
                         target_identify_preset = gr.Dropdown(
-                            ['Family-Specific Auto-Recommendation'] + list(PRESET_MAP.keys()),
                             label='Step 4. Select a Preset Model')
                         identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
                                                                   variant='primary')

     task_file_abbr = {'Compound-Protein Interaction': 'CPI', 'Compound-Protein Binding Affinity': 'CPA'}
     predictions_file = None
     df_training = pd.read_csv(f'data/complete_{TASK_MAP[task].lower()}_dataset.csv')
+    df_training['X1^'] = df_training['X1']
     orig_df = pd.read_csv(predict_filepath)
     alignment_df = get_fasta_family_map()
     prediction_df = pd.DataFrame()
     orig_df['Target Family'] = orig_df['Target Family'].str.capitalize()
     detect_family.cache_clear()
+    orig_df['X1^'] = orig_df['X1'].parallel_apply(rdkit_canonicalize)
+    orig_df = orig_df.merge(df_training[['X1^', 'X2', 'Y']], on=['X1^', 'X2'], how='left', indicator=False)
     annotated_df = orig_df[~orig_df['Y'].isna()].copy()
     annotated_df.rename(columns={'Y': 'Y^'}, inplace=True)
     annotated_df['Source'] = 'Database'
+    columns_to_drop = ['X1^', 'Compound', 'Scaffold', 'Scaffold SMILES']
+    columns_to_drop = [col for col in columns_to_drop if col in annotated_df.columns]
+    annotated_df.drop(columns_to_drop, axis=1, inplace=True)
     # Save the unannotated data
     unannotated_df = orig_df[orig_df['Y'].isna()].drop(['Y'], axis=1)
     if not unannotated_df.empty:
         status = "COMPLETED"
         return {run_state: False}
+    columns_to_drop = ['ID1', 'X1^', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']
     columns_to_drop = [col for col in columns_to_drop if col in orig_df.columns]
     orig_df.drop(columns_to_drop, axis=1, inplace=True)
                 subset.to_csv(predict_subset_filepath, index=False, na_rep='')
                 seen_compounds = get_seen_smiles(family, task_value)['X1'].values
+                if subset['X1^'].iloc[0] in seen_compounds:
                     scenario = "Seen Compound"
                 else:
                     scenario = "Unseen Compound"
                                            & (benchmark_df['Type'] == 'Family')]
                 seen_compounds = get_seen_smiles('General', task_value)['X1'].values
+                if subset['X1^'].iloc[0] in seen_compounds:
                     scenario = "Seen Compound"
                 else:
                     scenario = "Unseen Compound"
                         )
                         target_identify_target_family = gr.Dropdown(
                             choices=['Family-Specific Auto-Recommendation'] + list(TARGET_FAMILY_MAP.keys()),
+                            value='Family-Specific Auto-Recommendation',
                             label='Step 2. Select Target Family')
                     with gr.Column():
                         HelpTip(
                             "Please refer to the documentation for detailed benchmark results."
                         )
                         target_identify_preset = gr.Dropdown(
+                            choices=['Family-Specific Auto-Recommendation'] + list(PRESET_MAP.keys()),
+                            value='Family-Specific Auto-Recommendation',
                             label='Step 4. Select a Preset Model')
                         identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
                                                                   variant='primary')