libokj commited on
Commit
0872a03
·
verified ·
1 Parent(s): 59722b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -44
app.py CHANGED
@@ -1491,13 +1491,13 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1491
  drug_library_upload_btn = gr.UploadButton(
1492
  label='OR Upload Your Own Library', variant='primary')
1493
  drug_library_upload = gr.File(label='Custom compound library file', visible=False)
1494
- with gr.Column():
1495
- drug_screen_opts = gr.CheckboxGroup(
1496
- ['Include Max. Tanimoto Similarity'],
1497
- label='Step 6. Select Additional Options',
1498
- info="Calculating the maximum Tanimoto similarity of the library compounds to the "
1499
- "training dataset is an experimental feature and may take a considerable amount of time."
1500
- )
1501
  with gr.Row():
1502
  with gr.Column():
1503
  drug_screen_email = gr.Textbox(
@@ -1507,10 +1507,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1507
  )
1508
 
1509
  with gr.Row(visible=True):
1510
- with gr.Column():
1511
  drug_screen_clr_btn = gr.ClearButton(size='lg')
1512
  drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
1513
- # TODO Modify the pd df directly with df['X2'] = target
1514
 
1515
  screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1516
 
@@ -1598,13 +1598,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1598
  label='OR Upload Your Own Library', variant='primary')
1599
  target_library_upload = gr.File(label='Custom target library file', visible=False)
1600
 
1601
- with gr.Column():
1602
- target_identify_opts = gr.CheckboxGroup(
1603
- ['Include Max. Sequence Identity'],
1604
- label='Step 6. Select Additional Options',
1605
- info="Calculating the maximum sequence identity of the library protein to the "
1606
- "training dataset is an experimental feature and may take a considerable amount of time."
1607
- )
1608
  with gr.Row():
1609
  with gr.Column():
1610
  target_identify_email = gr.Textbox(
@@ -1708,6 +1707,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1708
  label='Step 4. Select a Preset Model')
1709
  # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1710
  # variant='primary')
 
1711
 
1712
  with gr.Row():
1713
  pair_infer_email = gr.Textbox(
@@ -1742,7 +1742,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1742
  with gr.Row():
1743
  with gr.Column(scale=1):
1744
  file_for_report = gr.File(interactive=True, type='filepath')
1745
- report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
 
1746
  label='Specify the Task Labels in the Uploaded Dataset')
1747
  with gr.Column(scale=2):
1748
  with gr.Row():
@@ -1908,9 +1909,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1908
 
1909
  alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
1910
  row = alignment_df.loc[alignment_df['score'].idxmax()]
1911
- return gr.Dropdown(value=row['Target Family'],
 
1912
  info=f"Reason: Best sequence identity ({row['score']}) "
1913
- f"with {row['ID2']} from family {row['Target Family']}")
1914
  except Exception as e:
1915
  gr.Warning("Failed to detect the protein family due to error: " + str(e))
1916
 
@@ -2044,7 +2046,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2044
  ], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
2045
 
2046
 
2047
- def identify_recommend_model(smiles, task):
2048
  task = TASK_MAP[task]
2049
  score = TASK_METRIC_MAP[task]
2050
  benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
@@ -2052,15 +2054,24 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2052
  if not smiles:
2053
  gr.Warning('Please enter a valid SMILES for model recommendation.')
2054
  return None
 
 
 
 
 
 
 
 
 
 
 
2055
 
2056
- seen_compounds = pd.read_csv(
2057
- f'data/benchmarks/seen_compounds/all_families_full_{task.lower()}_random_split.csv')
2058
  if rdkit_canonicalize(smiles) in seen_compounds['X1'].values:
2059
  scenario = "Seen Compound"
2060
  else:
2061
  scenario = "Unseen Compound"
2062
 
2063
- filtered_df = benchmark_df[(benchmark_df['Family'] == 'All Families')
2064
  & (benchmark_df['Scenario'] == scenario)
2065
  & (benchmark_df['Type'] == 'General')]
2066
 
@@ -2072,7 +2083,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2072
 
2073
 
2074
  identify_preset_recommend_btn.click(fn=identify_recommend_model,
2075
- inputs=[compound_smiles, target_identify_task],
2076
  outputs=target_identify_preset, show_progress='hidden')
2077
 
2078
 
@@ -2304,22 +2315,36 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2304
  drug_screen_clr_btn.click(
2305
  lambda: ['General'] + [[]] + [None] * 5,
2306
  outputs=[drug_screen_target_family, drug_screen_opts,
2307
- target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email])
 
 
2308
 
2309
  target_identify_clr_btn.click(
2310
  lambda: ['General'] + [[]] + [None] * 5,
2311
  outputs=[target_identify_target_family, target_identify_opts,
2312
- compound_smiles, target_identify_preset, target_library, target_library_upload, target_identify_email])
 
 
2313
 
2314
  pair_infer_clr_btn.click(
2315
  lambda: ['General'] + [None] * 5,
2316
  outputs=[pair_infer_target_family,
2317
- infer_pair, infer_drug, infer_target, pair_infer_preset, pair_infer_email])
 
 
2318
 
2319
  report_clr_btn.click(
2320
- lambda: [[]] * 3 + [None] * 5,
2321
- outputs=[scores, filters, html_opts,
2322
- target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email])
 
 
 
 
 
 
 
 
2323
 
2324
 
2325
  def update_preset(family, preset):
@@ -2405,7 +2430,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2405
  pair_infer_click.success(
2406
  fn=submit_predict,
2407
  inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
2408
- pair_infer_target_family, run_state, ], # , pair_infer_email],
2409
  outputs=[run_state, ]
2410
  )
2411
 
@@ -2448,7 +2473,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2448
  report_df_change = file_for_report.change(
2449
  fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
2450
  concurrency_limit=100,
2451
- ).then(
2452
  fn=lambda: [gr.Button(interactive=True)] * 2,
2453
  outputs=[csv_generate, html_generate],
2454
  )
@@ -2457,7 +2482,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2457
  fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
2458
  cancels=[report_df_change],
2459
  concurrency_limit=100,
2460
- ).then(
2461
  fn=inquire_task, inputs=[raw_df],
2462
  outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate],
2463
  )
@@ -2465,7 +2490,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2465
  file_for_report.clear(
2466
  fn=lambda: [gr.Button(interactive=False)] * 3 +
2467
  [gr.File(visible=False, value=None)] * 2 +
2468
- [gr.Dropdown(visible=False, value=None), gr.HTML(visible=False)],
2469
  cancels=[report_df_change],
2470
  outputs=[
2471
  csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
@@ -2481,29 +2506,26 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2481
  concurrency_limit=100,
2482
  )
2483
 
2484
- report_task.select(fn=lambda: gr.Button(interactive=True),
2485
- outputs=analyze_btn)
2486
-
2487
 
2488
  def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
2489
  csv_sep_map = {
2490
  'Comma': ',',
2491
  'Tab': '\t',
2492
  }
2493
- Y_colname = 'Y^'
2494
  if isinstance(task, str):
2495
  if task == 'Compound-Protein Interaction':
2496
- Y_colname = 'Y^_pIC50',
2497
  elif task == 'Compound-Protein Binding Affinity':
2498
- Y_colname = 'Y^_prob'
2499
  try:
2500
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2501
  filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
2502
- df.rename(columns={'Y^': Y_colname}).drop(
2503
  labels=['Compound', 'Scaffold'], axis=1
2504
  ).to_csv(filename, index=False, na_rep='', sep=csv_sep_map[sep])
2505
 
2506
- return gr.File(filename)
2507
  except Exception as e:
2508
  gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
2509
  return None
@@ -2523,11 +2545,11 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2523
  # html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
2524
 
2525
  csv_generate.click(
2526
- lambda: [gr.File(visible=True)], outputs=[csv_download_file],
2527
  ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
2528
  outputs=csv_download_file, show_progress='full')
2529
  html_generate.click(
2530
- lambda: [gr.File(visible=True)], outputs=[html_download_file],
2531
  ).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
2532
  outputs=html_download_file, show_progress='full')
2533
 
 
1491
  drug_library_upload_btn = gr.UploadButton(
1492
  label='OR Upload Your Own Library', variant='primary')
1493
  drug_library_upload = gr.File(label='Custom compound library file', visible=False)
1494
+
1495
+ drug_screen_opts = gr.CheckboxGroup(
1496
+ ['Include Max. Tanimoto Similarity'],
1497
+ label='Step 6. Select Additional Options',
1498
+ info="Calculating the maximum Tanimoto similarity of the library compounds to the "
1499
+ "training dataset is an experimental feature and may take a considerable amount of time."
1500
+ )
1501
  with gr.Row():
1502
  with gr.Column():
1503
  drug_screen_email = gr.Textbox(
 
1507
  )
1508
 
1509
  with gr.Row(visible=True):
1510
+ with gr.Row():
1511
  drug_screen_clr_btn = gr.ClearButton(size='lg')
1512
  drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
1513
+ # TODO Modify the pd df directly with df['X2'] = target
1514
 
1515
  screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1516
 
 
1598
  label='OR Upload Your Own Library', variant='primary')
1599
  target_library_upload = gr.File(label='Custom target library file', visible=False)
1600
 
1601
+ target_identify_opts = gr.CheckboxGroup(
1602
+ ['Include Max. Sequence Identity'],
1603
+ label='Step 6. Select Additional Options',
1604
+ info="Calculating the maximum sequence identity of the library protein to the "
1605
+ "training dataset is an experimental feature and may take a considerable amount of time."
1606
+ )
 
1607
  with gr.Row():
1608
  with gr.Column():
1609
  target_identify_email = gr.Textbox(
 
1707
  label='Step 4. Select a Preset Model')
1708
  # infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
1709
  # variant='primary')
1710
+ pair_infer_opts = gr.CheckboxGroup(visible=False)
1711
 
1712
  with gr.Row():
1713
  pair_infer_email = gr.Textbox(
 
1742
  with gr.Row():
1743
  with gr.Column(scale=1):
1744
  file_for_report = gr.File(interactive=True, type='filepath')
1745
+ report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False,
1746
+ value='Compound-Protein Interaction',
1747
  label='Specify the Task Labels in the Uploaded Dataset')
1748
  with gr.Column(scale=2):
1749
  with gr.Row():
 
1909
 
1910
  alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
1911
  row = alignment_df.loc[alignment_df['score'].idxmax()]
1912
+ family = row['Target Family'].title()
1913
+ return gr.Dropdown(value=family,
1914
  info=f"Reason: Best sequence identity ({row['score']}) "
1915
+ f"with {row['ID2']} from family {family}")
1916
  except Exception as e:
1917
  gr.Warning("Failed to detect the protein family due to error: " + str(e))
1918
 
 
2046
  ], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
2047
 
2048
 
2049
+ def identify_recommend_model(smiles, family, task):
2050
  task = TASK_MAP[task]
2051
  score = TASK_METRIC_MAP[task]
2052
  benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
 
2054
  if not smiles:
2055
  gr.Warning('Please enter a valid SMILES for model recommendation.')
2056
  return None
2057
+ if family == 'Family-Specific Auto-Recommendation':
2058
+ return None
2059
+
2060
+ if family == 'General':
2061
+ seen_compounds = pd.read_csv(
2062
+ f'data/benchmarks/seen_compounds/all_families_full_{task.lower()}_random_split.csv')
2063
+ family = 'All Families'
2064
+
2065
+ else:
2066
+ seen_compounds = pd.read_csv(
2067
+ f'data/benchmarks/seen_compounds/{TARGET_FAMILY_MAP[family.title()]}_{task.lower()}_random_split.csv')
2068
 
 
 
2069
  if rdkit_canonicalize(smiles) in seen_compounds['X1'].values:
2070
  scenario = "Seen Compound"
2071
  else:
2072
  scenario = "Unseen Compound"
2073
 
2074
+ filtered_df = benchmark_df[(benchmark_df['Family'] == family)
2075
  & (benchmark_df['Scenario'] == scenario)
2076
  & (benchmark_df['Type'] == 'General')]
2077
 
 
2083
 
2084
 
2085
  identify_preset_recommend_btn.click(fn=identify_recommend_model,
2086
+ inputs=[compound_smiles, target_identify_target_family, target_identify_task],
2087
  outputs=target_identify_preset, show_progress='hidden')
2088
 
2089
 
 
2315
  drug_screen_clr_btn.click(
2316
  lambda: ['General'] + [[]] + [None] * 5,
2317
  outputs=[drug_screen_target_family, drug_screen_opts,
2318
+ target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email],
2319
+ show_progress='hidden'
2320
+ )
2321
 
2322
  target_identify_clr_btn.click(
2323
  lambda: ['General'] + [[]] + [None] * 5,
2324
  outputs=[target_identify_target_family, target_identify_opts,
2325
+ compound_smiles, target_identify_preset, target_library, target_library_upload, target_identify_email],
2326
+ show_progress='hidden'
2327
+ )
2328
 
2329
  pair_infer_clr_btn.click(
2330
  lambda: ['General'] + [None] * 5,
2331
  outputs=[pair_infer_target_family,
2332
+ infer_pair, infer_drug, infer_target, pair_infer_preset, pair_infer_email],
2333
+ show_progress='hidden'
2334
+ )
2335
 
2336
  report_clr_btn.click(
2337
+ lambda: [[]] * 3 + [None] * 5 +
2338
+ [gr.Button(interactive=False)] * 3 +
2339
+ [gr.File(visible=False, value=None)] * 2 +
2340
+ [gr.Dropdown(visible=False, value=None), ''],
2341
+ outputs=[
2342
+ scores, filters, html_opts,
2343
+ file_for_report, raw_df, report_df,
2344
+ csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
2345
+ ],
2346
+ show_progress='hidden'
2347
+ )
2348
 
2349
 
2350
  def update_preset(family, preset):
 
2430
  pair_infer_click.success(
2431
  fn=submit_predict,
2432
  inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
2433
+ pair_infer_target_family, pair_infer_opts, run_state, ], # , pair_infer_email],
2434
  outputs=[run_state, ]
2435
  )
2436
 
 
2473
  report_df_change = file_for_report.change(
2474
  fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
2475
  concurrency_limit=100,
2476
+ ).success(
2477
  fn=lambda: [gr.Button(interactive=True)] * 2,
2478
  outputs=[csv_generate, html_generate],
2479
  )
 
2482
  fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
2483
  cancels=[report_df_change],
2484
  concurrency_limit=100,
2485
+ ).success(
2486
  fn=inquire_task, inputs=[raw_df],
2487
  outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate],
2488
  )
 
2490
  file_for_report.clear(
2491
  fn=lambda: [gr.Button(interactive=False)] * 3 +
2492
  [gr.File(visible=False, value=None)] * 2 +
2493
+ [gr.Dropdown(visible=False, value=None), ''],
2494
  cancels=[report_df_change],
2495
  outputs=[
2496
  csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
 
2506
  concurrency_limit=100,
2507
  )
2508
 
 
 
 
2509
 
2510
  def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
2511
  csv_sep_map = {
2512
  'Comma': ',',
2513
  'Tab': '\t',
2514
  }
2515
+ y_colname = 'Y^'
2516
  if isinstance(task, str):
2517
  if task == 'Compound-Protein Interaction':
2518
+ y_colname = 'Y^_prob'
2519
  elif task == 'Compound-Protein Binding Affinity':
2520
+ y_colname = 'Y^_pIC50'
2521
  try:
2522
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2523
  filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
2524
+ df.rename(columns={'Y^': y_colname}).drop(
2525
  labels=['Compound', 'Scaffold'], axis=1
2526
  ).to_csv(filename, index=False, na_rep='', sep=csv_sep_map[sep])
2527
 
2528
+ return gr.File(filename, visible=True)
2529
  except Exception as e:
2530
  gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
2531
  return None
 
2545
  # html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
2546
 
2547
  csv_generate.click(
2548
+ lambda: gr.File(visible=True), outputs=csv_download_file,
2549
  ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
2550
  outputs=csv_download_file, show_progress='full')
2551
  html_generate.click(
2552
+ lambda: gr.File(visible=True), outputs=html_download_file,
2553
  ).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
2554
  outputs=html_download_file, show_progress='full')
2555