DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

libokj commited on Apr 17, 2024

Commit

0872a03

verified ·

1 Parent(s): 59722b5

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -44

app.py CHANGED Viewed

@@ -1491,13 +1491,13 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                     drug_library_upload_btn = gr.UploadButton(
                         label='OR Upload Your Own Library', variant='primary')
                     drug_library_upload = gr.File(label='Custom compound library file', visible=False)
-                with gr.Column():
-                    drug_screen_opts = gr.CheckboxGroup(
-                        ['Include Max. Tanimoto Similarity'],
-                        label='Step 6. Select Additional Options',
-                        info="Calculating the maximum Tanimoto similarity of the library compounds to the "
-                             "training dataset is an experimental feature and may take a considerable amount of time."
-                    )
             with gr.Row():
                 with gr.Column():
                     drug_screen_email = gr.Textbox(
@@ -1507,10 +1507,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                     )
             with gr.Row(visible=True):
-                with gr.Column():
                     drug_screen_clr_btn = gr.ClearButton(size='lg')
                     drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
-            # TODO Modify the pd df directly with df['X2'] = target
         screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
@@ -1598,13 +1598,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                             label='OR Upload Your Own Library', variant='primary')
                         target_library_upload = gr.File(label='Custom target library file', visible=False)
-                    with gr.Column():
-                        target_identify_opts = gr.CheckboxGroup(
-                            ['Include Max. Sequence Identity'],
-                            label='Step 6. Select Additional Options',
-                            info="Calculating the maximum sequence identity of the library protein to the "
-                                 "training dataset is an experimental feature and may take a considerable amount of time."
-                        )
                 with gr.Row():
                     with gr.Column():
                         target_identify_email = gr.Textbox(
@@ -1708,6 +1707,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
                         label='Step 4. Select a Preset Model')
                     # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
                     #                                        variant='primary')
             with gr.Row():
                 pair_infer_email = gr.Textbox(
@@ -1742,7 +1742,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
             with gr.Row():
                 with gr.Column(scale=1):
                     file_for_report = gr.File(interactive=True, type='filepath')
-                    report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
                                               label='Specify the Task Labels in the Uploaded Dataset')
                 with gr.Column(scale=2):
                     with gr.Row():
@@ -1908,9 +1909,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
             alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
             row = alignment_df.loc[alignment_df['score'].idxmax()]
-            return gr.Dropdown(value=row['Target Family'],
                                info=f"Reason: Best sequence identity ({row['score']}) "
-                                    f"with {row['ID2']} from family {row['Target Family']}")
         except Exception as e:
             gr.Warning("Failed to detect the protein family due to error: " + str(e))
@@ -2044,7 +2046,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     ], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
-    def identify_recommend_model(smiles, task):
         task = TASK_MAP[task]
         score = TASK_METRIC_MAP[task]
         benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
@@ -2052,15 +2054,24 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
         if not smiles:
             gr.Warning('Please enter a valid SMILES for model recommendation.')
             return None
-        seen_compounds = pd.read_csv(
-            f'data/benchmarks/seen_compounds/all_families_full_{task.lower()}_random_split.csv')
         if rdkit_canonicalize(smiles) in seen_compounds['X1'].values:
             scenario = "Seen Compound"
         else:
             scenario = "Unseen Compound"
-        filtered_df = benchmark_df[(benchmark_df['Family'] == 'All Families')
                                    & (benchmark_df['Scenario'] == scenario)
                                    & (benchmark_df['Type'] == 'General')]
@@ -2072,7 +2083,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     identify_preset_recommend_btn.click(fn=identify_recommend_model,
-                                        inputs=[compound_smiles, target_identify_task],
                                         outputs=target_identify_preset, show_progress='hidden')
@@ -2304,22 +2315,36 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     drug_screen_clr_btn.click(
         lambda: ['General'] + [[]] + [None] * 5,
         outputs=[drug_screen_target_family, drug_screen_opts,
-                 target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email])
     target_identify_clr_btn.click(
         lambda: ['General'] + [[]] + [None] * 5,
         outputs=[target_identify_target_family, target_identify_opts,
-                 compound_smiles, target_identify_preset, target_library, target_library_upload, target_identify_email])
     pair_infer_clr_btn.click(
         lambda: ['General'] + [None] * 5,
         outputs=[pair_infer_target_family,
-                 infer_pair, infer_drug, infer_target, pair_infer_preset, pair_infer_email])
     report_clr_btn.click(
-        lambda: [[]] * 3 + [None] * 5,
-        outputs=[scores, filters, html_opts,
-                 target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email])
     def update_preset(family, preset):
@@ -2405,7 +2430,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     pair_infer_click.success(
         fn=submit_predict,
         inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
-                pair_infer_target_family, run_state, ],  # , pair_infer_email],
         outputs=[run_state, ]
     )
@@ -2448,7 +2473,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     report_df_change = file_for_report.change(
         fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
         concurrency_limit=100,
-    ).then(
         fn=lambda: [gr.Button(interactive=True)] * 2,
         outputs=[csv_generate, html_generate],
     )
@@ -2457,7 +2482,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
         fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
         cancels=[report_df_change],
         concurrency_limit=100,
-    ).then(
         fn=inquire_task, inputs=[raw_df],
         outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate],
     )
@@ -2465,7 +2490,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     file_for_report.clear(
         fn=lambda: [gr.Button(interactive=False)] * 3 +
                    [gr.File(visible=False, value=None)] * 2 +
-                   [gr.Dropdown(visible=False, value=None), gr.HTML(visible=False)],
         cancels=[report_df_change],
         outputs=[
             csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
@@ -2481,29 +2506,26 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
         concurrency_limit=100,
     )
-    report_task.select(fn=lambda: gr.Button(interactive=True),
-                       outputs=analyze_btn)
     def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
         csv_sep_map = {
             'Comma': ',',
             'Tab': '\t',
         }
-        Y_colname = 'Y^'
         if isinstance(task, str):
             if task == 'Compound-Protein Interaction':
-                Y_colname = 'Y^_pIC50',
             elif task == 'Compound-Protein Binding Affinity':
-                Y_colname = 'Y^_prob'
         try:
             now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
             filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
-            df.rename(columns={'Y^': Y_colname}).drop(
                 labels=['Compound', 'Scaffold'], axis=1
             ).to_csv(filename, index=False, na_rep='', sep=csv_sep_map[sep])
-            return gr.File(filename)
         except Exception as e:
             gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
             return None
@@ -2523,11 +2545,11 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
     # html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
     csv_generate.click(
-        lambda: [gr.File(visible=True)], outputs=[csv_download_file],
     ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
            outputs=csv_download_file, show_progress='full')
     html_generate.click(
-        lambda: [gr.File(visible=True)], outputs=[html_download_file],
     ).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
            outputs=html_download_file, show_progress='full')

                     drug_library_upload_btn = gr.UploadButton(
                         label='OR Upload Your Own Library', variant='primary')
                     drug_library_upload = gr.File(label='Custom compound library file', visible=False)
+                drug_screen_opts = gr.CheckboxGroup(
+                    ['Include Max. Tanimoto Similarity'],
+                    label='Step 6. Select Additional Options',
+                    info="Calculating the maximum Tanimoto similarity of the library compounds to the "
+                         "training dataset is an experimental feature and may take a considerable amount of time."
+                )
             with gr.Row():
                 with gr.Column():
                     drug_screen_email = gr.Textbox(
                     )
             with gr.Row(visible=True):
+                with gr.Row():
                     drug_screen_clr_btn = gr.ClearButton(size='lg')
                     drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
+        # TODO Modify the pd df directly with df['X2'] = target
         screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
                             label='OR Upload Your Own Library', variant='primary')
                         target_library_upload = gr.File(label='Custom target library file', visible=False)
+                    target_identify_opts = gr.CheckboxGroup(
+                        ['Include Max. Sequence Identity'],
+                        label='Step 6. Select Additional Options',
+                        info="Calculating the maximum sequence identity of the library protein to the "
+                             "training dataset is an experimental feature and may take a considerable amount of time."
+                    )
                 with gr.Row():
                     with gr.Column():
                         target_identify_email = gr.Textbox(
                         label='Step 4. Select a Preset Model')
                     # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
                     #                                        variant='primary')
+            pair_infer_opts = gr.CheckboxGroup(visible=False)
             with gr.Row():
                 pair_infer_email = gr.Textbox(
             with gr.Row():
                 with gr.Column(scale=1):
                     file_for_report = gr.File(interactive=True, type='filepath')
+                    report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False,
+                                              value='Compound-Protein Interaction',
                                               label='Specify the Task Labels in the Uploaded Dataset')
                 with gr.Column(scale=2):
                     with gr.Row():
             alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
             row = alignment_df.loc[alignment_df['score'].idxmax()]
+            family = row['Target Family'].title()
+            return gr.Dropdown(value=family,
                                info=f"Reason: Best sequence identity ({row['score']}) "
+                                    f"with {row['ID2']} from family {family}")
         except Exception as e:
             gr.Warning("Failed to detect the protein family due to error: " + str(e))
     ], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
+    def identify_recommend_model(smiles, family, task):
         task = TASK_MAP[task]
         score = TASK_METRIC_MAP[task]
         benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
         if not smiles:
             gr.Warning('Please enter a valid SMILES for model recommendation.')
             return None
+        if family == 'Family-Specific Auto-Recommendation':
+            return None
+        if family == 'General':
+            seen_compounds = pd.read_csv(
+                f'data/benchmarks/seen_compounds/all_families_full_{task.lower()}_random_split.csv')
+            family = 'All Families'
+        else:
+            seen_compounds = pd.read_csv(
+                f'data/benchmarks/seen_compounds/{TARGET_FAMILY_MAP[family.title()]}_{task.lower()}_random_split.csv')
         if rdkit_canonicalize(smiles) in seen_compounds['X1'].values:
             scenario = "Seen Compound"
         else:
             scenario = "Unseen Compound"
+        filtered_df = benchmark_df[(benchmark_df['Family'] == family)
                                    & (benchmark_df['Scenario'] == scenario)
                                    & (benchmark_df['Type'] == 'General')]
     identify_preset_recommend_btn.click(fn=identify_recommend_model,
+                                        inputs=[compound_smiles, target_identify_target_family, target_identify_task],
                                         outputs=target_identify_preset, show_progress='hidden')
     drug_screen_clr_btn.click(
         lambda: ['General'] + [[]] + [None] * 5,
         outputs=[drug_screen_target_family, drug_screen_opts,
+                 target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email],
+        show_progress='hidden'
+    )
     target_identify_clr_btn.click(
         lambda: ['General'] + [[]] + [None] * 5,
         outputs=[target_identify_target_family, target_identify_opts,
+                 compound_smiles, target_identify_preset, target_library, target_library_upload, target_identify_email],
+        show_progress='hidden'
+    )
     pair_infer_clr_btn.click(
         lambda: ['General'] + [None] * 5,
         outputs=[pair_infer_target_family,
+                 infer_pair, infer_drug, infer_target, pair_infer_preset, pair_infer_email],
+        show_progress='hidden'
+    )
     report_clr_btn.click(
+        lambda: [[]] * 3 + [None] * 5 +
+                [gr.Button(interactive=False)] * 3 +
+                [gr.File(visible=False, value=None)] * 2 +
+                [gr.Dropdown(visible=False, value=None), ''],
+        outputs=[
+            scores, filters, html_opts,
+            file_for_report, raw_df, report_df,
+            csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
+        ],
+        show_progress='hidden'
+    )
     def update_preset(family, preset):
     pair_infer_click.success(
         fn=submit_predict,
         inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
+                pair_infer_target_family, pair_infer_opts, run_state, ],  # , pair_infer_email],
         outputs=[run_state, ]
     )
     report_df_change = file_for_report.change(
         fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
         concurrency_limit=100,
+    ).success(
         fn=lambda: [gr.Button(interactive=True)] * 2,
         outputs=[csv_generate, html_generate],
     )
         fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
         cancels=[report_df_change],
         concurrency_limit=100,
+    ).success(
         fn=inquire_task, inputs=[raw_df],
         outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate],
     )
     file_for_report.clear(
         fn=lambda: [gr.Button(interactive=False)] * 3 +
                    [gr.File(visible=False, value=None)] * 2 +
+                   [gr.Dropdown(visible=False, value=None), ''],
         cancels=[report_df_change],
         outputs=[
             csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
         concurrency_limit=100,
     )
     def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
         csv_sep_map = {
             'Comma': ',',
             'Tab': '\t',
         }
+        y_colname = 'Y^'
         if isinstance(task, str):
             if task == 'Compound-Protein Interaction':
+                y_colname = 'Y^_prob'
             elif task == 'Compound-Protein Binding Affinity':
+                y_colname = 'Y^_pIC50'
         try:
             now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
             filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
+            df.rename(columns={'Y^': y_colname}).drop(
                 labels=['Compound', 'Scaffold'], axis=1
             ).to_csv(filename, index=False, na_rep='', sep=csv_sep_map[sep])
+            return gr.File(filename, visible=True)
         except Exception as e:
             gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
             return None
     # html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
     csv_generate.click(
+        lambda: gr.File(visible=True), outputs=csv_download_file,
     ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
            outputs=csv_download_file, show_progress='full')
     html_generate.click(
+        lambda: gr.File(visible=True), outputs=html_download_file,
     ).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
            outputs=html_download_file, show_progress='full')