libokj commited on
Commit
b4bf6c5
·
verified ·
1 Parent(s): 8a83419

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +418 -164
app.py CHANGED
@@ -20,16 +20,22 @@ from Bio.Align import PairwiseAligner
20
  import gradio as gr
21
  import hydra
22
  import pandas as pd
23
- import plotly.express as px
24
  import requests
25
- from bokeh.models import HTMLTemplateFormatter, StringFormatter
26
  from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
27
  from requests.adapters import HTTPAdapter, Retry
28
  from rdkit import Chem
29
- from rdkit.Chem import RDConfig, Descriptors, Draw, Lipinski, Crippen, PandasTools, AllChem
30
  from rdkit.Chem.Scaffolds import MurckoScaffold
31
  import seaborn as sns
32
 
 
 
 
 
 
 
 
33
  import swifter
34
  from tqdm.auto import tqdm
35
 
@@ -47,11 +53,11 @@ pd.set_option('display.float_format', '{:.3f}'.format)
47
  PandasTools.molRepresentation = 'svg'
48
  PandasTools.drawOptions = Draw.rdMolDraw2D.MolDrawOptions()
49
  PandasTools.drawOptions.clearBackground = False
50
- PandasTools.drawOptions.bondLineWidth = 1.5
51
  PandasTools.drawOptions.explicitMethyl = True
52
  PandasTools.drawOptions.singleColourWedgeBonds = True
53
  PandasTools.drawOptions.useCDKAtomPalette()
54
- PandasTools.molSize = (128, 128)
55
 
56
  SESSION = requests.Session()
57
  ADAPTER = HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]))
@@ -329,13 +335,13 @@ def rule_of_three(mol):
329
  SCORE_MAP = {
330
  'SAscore': sa_score,
331
  'LogP': logp,
332
- 'Molecular weight': mw,
333
- 'Number of heavy atoms': heavy_atom,
334
- 'Molar refractivity': mr,
335
- 'H-bond donor count': hbd,
336
- 'H-Bond acceptor count': hba,
337
- 'Rotatable bond count': rotatable_bond,
338
- 'Topological polar surface area': tpsa,
339
  }
340
 
341
  FILTER_MAP = {
@@ -393,7 +399,6 @@ COLUMN_ALIASES = {
393
  'ID2': 'Target ID',
394
  'Y': 'Actual CPI/CPA',
395
  'Y^': 'Predicted CPI/CPA',
396
- 'N': 'Original Index'
397
  }
398
 
399
 
@@ -401,7 +406,7 @@ def validate_columns(df, mandatory_cols):
401
  missing_cols = [col for col in mandatory_cols if col not in df.columns]
402
  if missing_cols:
403
  error_message = (f"The following mandatory columns are missing "
404
- f"in the uploaded dataset: {str(['X1', 'X2']).strip('[]')}.")
405
  raise ValueError(error_message)
406
  else:
407
  return
@@ -540,17 +545,26 @@ def submit_predict(predict_filepath, task, preset, target_family, flag, state, p
540
 
541
  def update_df(file, progress=gr.Progress(track_tqdm=True)):
542
  # global DF_FOR_REPORT
543
- if Path(file).is_file():
544
  df = pd.read_csv(file)
 
 
 
 
 
545
  # if df['X1'].nunique() > 1:
546
- df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
547
- desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
548
- # Add a new column with RDKit molecule objects
549
- if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
550
- PandasTools.AddMoleculeColumnToFrame(df, smilesCol='X1', molCol='Compound',
551
- includeFingerprints=True)
552
- PandasTools.AddMoleculeColumnToFrame(df, smilesCol='Scaffold SMILES', molCol='Scaffold',
553
- includeFingerprints=True)
 
 
 
 
554
  # DF_FOR_REPORT = df.copy()
555
 
556
  # pie_chart = None
@@ -574,44 +588,81 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
574
  return {analyze_btn: gr.Button(interactive=False)}
575
 
576
 
577
- def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
578
  df_html = df.copy(deep=True)
 
579
 
580
- cols_left = ['N', 'ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^',]
581
- cols_right = ['X1', 'X2']
582
- cols_left = [col for col in cols_left if col in df_html.columns]
583
- cols_right = [col for col in cols_right if col in df_html.columns]
584
  df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
585
 
586
- ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted binding affinity' else False
 
 
 
 
 
 
 
587
  df_html = df_html.sort_values(
588
  [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
589
  )
590
 
591
- # # Remove repeated info for one-against-N tasks to save visual and physical space
592
- # if df_html['X1'].nunique() <= 1:
593
- # columns_to_clean = ['X1', 'ID1', 'Scaffold', 'Compound'] + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys())
594
- # for column in columns_to_clean:
595
- # if column in df_html.columns:
596
- # df_html.loc[1:, column] = pd.NA
597
- #
598
- # if df_html['X2'].nunique() <= 1:
599
- # columns_to_clean = ['X2', 'ID2']
600
- # for column in columns_to_clean:
601
- # if column in df_html.columns:
602
- # df_html.loc[1:, column] = pd.NA
603
-
604
  if not file:
605
  df_html = df_html.iloc[:31]
606
 
607
- # PandasTools.ChangeMoleculeRendering(df_html, renderer='image')
608
- # PandasTools.RenderImagesInAllDataFrames(images=True)
609
- df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
610
- 'Generating compound graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
611
- df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
612
- 'Generating scaffold graph...').apply(lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
613
- df_html = df_html.rename(columns=COLUMN_ALIASES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
614
  df_html.index.name = 'Index'
 
 
 
 
 
 
 
 
 
 
 
 
 
615
 
616
  if not file:
617
  if 'Compound ID' in df_html.columns:
@@ -620,100 +671,253 @@ def create_html_report(df, file=None, progress=gr.Progress(track_tqdm=True)):
620
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
621
  if 'Target FASTA' in df_html.columns:
622
  df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
623
- 'Processing FASTA...').apply(lambda x: wrap_text(x) if not pd.isna(x) else x)
624
- df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
625
- # num_formatters = {col: "{:.3f}" for col in df.select_dtypes('number').columns}
 
626
  styled_df = df_html.style.format(precision=3)
627
- colors = sns.color_palette('husl', len(df_html.columns))
628
- for i, col in enumerate(df_html.columns):
629
- if pd.api.types.is_numeric_dtype(df_html[col]):
630
- styled_df = styled_df.background_gradient(subset=col, cmap=sns.light_palette(colors[i], as_cmap=True))
631
- html = styled_df.to_html()
632
- return f'Report preview<div style="overflow:auto; height: 300px; font-family: Courier !important;">{html}</div>'
633
- else:
634
- import panel as pn
635
- from bokeh.resources import INLINE
636
- from bokeh.models import NumberFormatter, BooleanFormatter
637
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
  bool_formatters = {col: BooleanFormatter() for col in df_html.select_dtypes(bool).columns}
639
- num_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('number').columns}
640
  other_formatters = {
641
- 'Predicted interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
642
- 'Actual interaction probability': {'type': 'progress', 'max': 1.0, 'legend': True},
643
- 'Compound': HTMLTemplateFormatter(),
644
- 'Scaffold': HTMLTemplateFormatter(),
645
  'Target FASTA': {'type': 'textarea', 'width': 60},
 
 
 
 
 
 
 
 
 
646
  }
647
  formatters = {**bool_formatters, **num_formatters, **other_formatters}
648
 
649
  # html = df.to_html(file)
650
  # return html
651
- pn.widgets.Tabulator(df_html, formatters=formatters).save(file, resources=INLINE)
652
-
653
-
654
- # def create_pie_chart(df, category, value, top_k):
655
- # df.rename(COLUMN_ALIASES, inplace=True)
656
- # # Select the top_k records based on the value_col
657
- # top_k_df = df.nlargest(top_k, value)
658
- #
659
- # # Count the frequency of each unique value in the category_col column
660
- # category_counts = top_k_df[category].value_counts()
661
- #
662
- # # Convert the counts to a DataFrame
663
- # data = pd.DataFrame({category: category_counts.index, 'value': category_counts.values})
664
- #
665
- # # Calculate the angle for each category
666
- # data['angle'] = data['value']/data['value'].sum() * 2*pi
667
- #
668
- # # Assign colors
669
- # data['color'] = Spectral11[0:len(category_counts)]
670
- #
671
- # # Create the plot
672
- # p = figure(height=350, title="Pie Chart", toolbar_location=None,
673
- # tools="hover", tooltips="@{}: @value".format(category), x_range=(-0.5, 1.0))
674
- #
675
- # p.wedge(x=0, y=1, radius=0.4,
676
- # start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
677
- # line_color="white", fill_color='color', legend_field=category, source=data)
678
- #
679
- # p.axis.axis_label = None
680
- # p.axis.visible = False
681
- # p.grid.grid_line_color = None
682
- #
683
- # return p
684
 
685
- def create_pie_chart(df, category, value, top_k):
686
- df = df.copy()
687
- df.rename(COLUMN_ALIASES, inplace=True)
688
- value = COLUMN_ALIASES.get(value, value)
689
- # Select the top_k records based on the value_col
690
- top_k_df = df.nlargest(top_k, value)
691
 
692
- # Count the frequency of each unique value in the category_col column
693
- category_counts = top_k_df[category].value_counts()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
- # Convert the counts to a DataFrame
696
- data = pd.DataFrame({category: category_counts.index, 'value': category_counts.values})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
 
698
- # Create the plot
699
- fig = px.pie(data, values='value', names=category, title=f'Top-{top_k} {category} in {value}')
700
- fig.update_traces(textposition='inside', textinfo='percent+label')
701
 
702
- return fig
703
 
 
 
 
 
 
 
704
 
705
- def submit_report(df, score_list, filter_list, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  df_report = df.copy()
707
  try:
708
  for filter_name in filter_list:
709
  df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
710
  desc=f"Calculating {filter_name}").apply(
711
- lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x, axis=1)
712
 
713
  for score_name in score_list:
714
  df_report[score_name] = df_report['Compound'].swifter.progress_bar(
715
  desc=f"Calculating {score_name}").apply(
716
- lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x, axis=1)
717
 
718
  # pie_chart = None
719
  # value = None
@@ -728,11 +932,13 @@ def submit_report(df, score_list, filter_list, progress=gr.Progress(track_tqdm=T
728
  # elif df['X2'].nunique() > 1 >= df['X1'].nunique():
729
  # pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
730
 
731
- return create_html_report(df_report), df_report # pie_chart
 
732
 
733
  except Exception as e:
734
  gr.Warning(f'Failed to report results due to error: {str(e)}')
735
- return None, None
 
736
 
737
  # def check_job_status(job_id):
738
  # job_lock = DATA_PATH / f"{job_id}.lock"
@@ -844,6 +1050,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
844
  screen_flag = gr.State(value=False)
845
  identify_flag = gr.State(value=False)
846
  infer_flag = gr.State(value=False)
 
847
 
848
  with gr.Tabs() as tabs:
849
  with gr.TabItem(label='Drug Hit Screening', id=0):
@@ -916,7 +1123,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
916
 
917
  with gr.Row():
918
  with gr.Column():
919
- target_family_detect_btn = gr.Button(value='OR Let Us Auto-Detect for You', variant='primary')
 
920
 
921
  with gr.Row():
922
  with gr.Column():
@@ -944,9 +1152,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
944
  "while affinity prediction directly estimates their binding strength measured using "
945
  "IC50."
946
  )
947
- drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()),
948
- label='Step 4. Select the Prediction Task You Want to Conduct',
949
- value='Compound-protein interaction')
 
950
 
951
  with gr.Row():
952
  with gr.Column():
@@ -954,10 +1163,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
954
  "Select your preferred model, or click Recommend for the best-performing model based "
955
  "on the selected task, family, and whether the target was trained. "
956
  "Please refer to documentation for detailed benchamrk results."
957
- )
958
  drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
959
  label='Step 5. Select a Preset Model')
960
- screen_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You', variant='primary')
 
961
  with gr.Row():
962
  with gr.Column():
963
  drug_screen_email = gr.Textbox(
@@ -1048,9 +1258,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1048
  "while affinity prediction directly estimates their binding strength measured using "
1049
  "IC50."
1050
  )
1051
- target_identify_task = gr.Dropdown(list(TASK_MAP.keys()),
1052
- label='Step 4. Select the Prediction Task You Want to Conduct',
1053
- value='Compound-protein interaction')
 
1054
 
1055
  with gr.Row():
1056
  with gr.Column():
@@ -1058,7 +1269,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1058
  "Select your preferred model, or click Recommend for the best-performing model based "
1059
  "on the selected task, family, and whether the compound was trained. "
1060
  "Please refer to documentation for detailed benchamrk results."
1061
- )
1062
  target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1063
  label='Step 5. Select a Preset Model')
1064
  identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
@@ -1073,7 +1284,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1073
 
1074
  with gr.Row(visible=True):
1075
  # target_identify_clr_btn = gr.ClearButton(size='lg')
1076
- target_identify_btn = gr.Button(value='SUBMIT THE IDENTIFICATION JOB', variant='primary', size='lg')
 
1077
 
1078
  identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1079
  identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
@@ -1152,9 +1364,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1152
  "while affinity prediction directly estimates their binding strength "
1153
  "measured using IC50."
1154
  )
1155
- pair_infer_task = gr.Dropdown(list(TASK_MAP.keys()),
1156
- label='Step 3. Select the Prediction Task You Want to Conduct',
1157
- value='Compound-protein interaction')
 
1158
 
1159
  with gr.Row():
1160
  with gr.Column():
@@ -1189,17 +1402,20 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1189
  To compute chemical properties for the predictions of drug hit screening,
1190
  target protein identification, and interaction pair inference.
1191
 
1192
- You may also upload your own dataset using a CSV file containing one required column X1 for compound SMILES.
 
1193
 
1194
  The page shows only a preview report displaying at most 30 records
1195
  (with top predicted CPI/CPA if reporting results from a prediction job).
1196
 
1197
- For a full report, please
1198
- generate and download a CSV or interactive HTML report below.
1199
-
1200
  ''')
1201
  with gr.Row():
1202
- file_for_report = gr.File(interactive=True, type='filepath')
 
 
 
1203
  raw_df = gr.State(value=pd.DataFrame())
1204
  report_df = gr.State(value=pd.DataFrame())
1205
  scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
@@ -1207,7 +1423,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1207
 
1208
  with gr.Row():
1209
  # clear_btn = gr.ClearButton(size='lg')
1210
- analyze_btn = gr.Button('Preview Top 30 Records', variant='primary', size='lg', interactive=False)
 
1211
 
1212
  with gr.Row():
1213
  with gr.Column(scale=3):
@@ -1217,11 +1434,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
1217
  with gr.Row():
1218
  with gr.Column():
1219
  csv_generate = gr.Button(value='Generate CSV Report',
1220
- interactive=True, variant='primary', visible=False)
1221
  csv_download_file = gr.File(label='Download CSV Report', visible=False)
1222
  with gr.Column():
1223
  html_generate = gr.Button(value='Generate HTML Report',
1224
- interactive=True, variant='primary', visible=False)
1225
  html_download_file = gr.File(label='Download HTML Report', visible=False)
1226
 
1227
 
@@ -1336,6 +1553,8 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1336
 
1337
  example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
1338
  target_id, target_gene, target_organism, target_fasta], show_progress=False)
 
 
1339
  # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1340
  # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1341
 
@@ -1663,47 +1882,82 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
1663
  )
1664
 
1665
  # TODO background job from these 3 pipelines to update file_for_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1666
 
1667
  file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
1668
- html_report,
1669
- raw_df,
1670
- report_df,
1671
- analyze_btn
1672
- # ranking_pie_chart
1673
- ])
1674
- analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters], outputs=[
1675
- html_report,
1676
- report_df,
1677
- # ranking_pie_chart
1678
- ])
1679
-
1680
-
1681
- def create_csv_report_file(df, file_report):
 
 
 
 
 
 
 
 
1682
  try:
1683
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1684
  filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1685
  df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
1686
 
1687
- return gr.File(filename, visible=True), gr.Button(visible=False)
1688
  except Exception as e:
1689
  gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
1690
- return None, None
 
1691
 
1692
- def create_html_report_file(df, file_report):
1693
  try:
1694
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1695
  filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
1696
  create_html_report(df, filename)
1697
- return gr.File(filename, visible=True), gr.Button(visible=False)
1698
  except Exception as e:
1699
  gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
1700
- return None, None
 
1701
 
1702
  html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
1703
- csv_generate.click(fn=create_csv_report_file, inputs=[report_df, file_for_report],
1704
- outputs=[csv_download_file, csv_generate])
1705
- html_generate.click(fn=create_html_report_file, inputs=[report_df, file_for_report],
1706
- outputs=[html_download_file, html_generate])
 
 
 
 
1707
 
1708
  # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
1709
  # every=5)
 
20
  import gradio as gr
21
  import hydra
22
  import pandas as pd
 
23
  import requests
24
+ from rdkit.Chem.PandasTools import _MolPlusFingerprint
25
  from rdkit.Chem.rdMolDescriptors import CalcNumRotatableBonds, CalcNumHeavyAtoms, CalcNumAtoms, CalcTPSA
26
  from requests.adapters import HTTPAdapter, Retry
27
  from rdkit import Chem
28
+ from rdkit.Chem import RDConfig, Descriptors, Draw, Lipinski, Crippen, PandasTools
29
  from rdkit.Chem.Scaffolds import MurckoScaffold
30
  import seaborn as sns
31
 
32
+ from bokeh.models import Legend, NumberFormatter, BooleanFormatter, HTMLTemplateFormatter, LegendItem
33
+ from bokeh.palettes import Category20c_20
34
+ from bokeh.plotting import figure
35
+ from bokeh.transform import cumsum
36
+ from bokeh.resources import INLINE
37
+ import panel as pn
38
+
39
  import swifter
40
  from tqdm.auto import tqdm
41
 
 
53
  PandasTools.molRepresentation = 'svg'
54
  PandasTools.drawOptions = Draw.rdMolDraw2D.MolDrawOptions()
55
  PandasTools.drawOptions.clearBackground = False
56
+ PandasTools.drawOptions.bondLineWidth = 1
57
  PandasTools.drawOptions.explicitMethyl = True
58
  PandasTools.drawOptions.singleColourWedgeBonds = True
59
  PandasTools.drawOptions.useCDKAtomPalette()
60
+ PandasTools.molSize = (128, 80)
61
 
62
  SESSION = requests.Session()
63
  ADAPTER = HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]))
 
335
  SCORE_MAP = {
336
  'SAscore': sa_score,
337
  'LogP': logp,
338
+ 'Molecular Weight': mw,
339
+ 'Number of Heavy Atoms': heavy_atom,
340
+ 'Molar Refractivity': mr,
341
+ 'H-Bond Donor Count': hbd,
342
+ 'H-Bond Acceptor Count': hba,
343
+ 'Rotatable Bond Count': rotatable_bond,
344
+ 'Topological Polar Surface Area': tpsa,
345
  }
346
 
347
  FILTER_MAP = {
 
399
  'ID2': 'Target ID',
400
  'Y': 'Actual CPI/CPA',
401
  'Y^': 'Predicted CPI/CPA',
 
402
  }
403
 
404
 
 
406
  missing_cols = [col for col in mandatory_cols if col not in df.columns]
407
  if missing_cols:
408
  error_message = (f"The following mandatory columns are missing "
409
+ f"in the uploaded dataset: {str(mandatory_cols).strip('[]')}.")
410
  raise ValueError(error_message)
411
  else:
412
  return
 
545
 
546
  def update_df(file, progress=gr.Progress(track_tqdm=True)):
547
  # global DF_FOR_REPORT
548
+ if file and Path(file).is_file():
549
  df = pd.read_csv(file)
550
+ if 'N' in df.columns:
551
+ df.set_index('N', inplace=True)
552
+ if not any(col in ['X1', 'X2'] for col in df.columns):
553
+ gr.Warning("At least one of columns `X1` and `X2` must be in the uploaded dataset.")
554
+ return {analyze_btn: gr.Button(interactive=False)}
555
  # if df['X1'].nunique() > 1:
556
+ if 'X1' in df.columns:
557
+ df['Scaffold SMILES'] = df['X1'].swifter.progress_bar(
558
+ desc=f"Calculating scaffold...").apply(MurckoScaffold.MurckoScaffoldSmilesFromSmiles)
559
+ df['Scaffold'] = df['Scaffold SMILES'].swifter.progress_bar(
560
+ desc='Generating scaffold graphs...').apply(
561
+ lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
562
+ # Add a new column with RDKit molecule objects
563
+ if 'Compound' not in df.columns or df['Compound'].dtype != 'object':
564
+ df['Compound'] = df['X1'].swifter.progress_bar(
565
+ desc='Generating molecular graphs...').apply(
566
+ lambda smiles: _MolPlusFingerprint(Chem.MolFromSmiles(smiles)))
567
+
568
  # DF_FOR_REPORT = df.copy()
569
 
570
  # pie_chart = None
 
588
  return {analyze_btn: gr.Button(interactive=False)}
589
 
590
 
591
+ def create_html_report(df, file=None, task=None, progress=gr.Progress(track_tqdm=True)):
592
  df_html = df.copy(deep=True)
593
+ # email_hash = hashlib.sha256(email.encode()).hexdigest()
594
 
595
+ cols_left = list(pd.Index(
596
+ ['ID1', 'Compound', 'Scaffold', 'Scaffold SMILES', 'ID2', 'Y', 'Y^']).intersection(df_html.columns))
597
+ cols_right = list(pd.Index(['X1', 'X2']).intersection(df_html.columns))
 
598
  df_html = df_html[cols_left + (df_html.columns.drop(cols_left + cols_right).tolist()) + cols_right]
599
 
600
+ if isinstance(task, str):
601
+ task = TASK_MAP[task]
602
+ COLUMN_ALIASES.update({
603
+ 'Y': 'Actual Interaction Probability' if task == 'DTI' else 'Actual Binding Affinity',
604
+ 'Y^': 'Predicted Interaction Probability' if task == 'DTI' else 'Predicted Binding Affinity'
605
+ })
606
+
607
+ ascending = True if COLUMN_ALIASES['Y^'] == 'Predicted Binding Affinity' else False
608
  df_html = df_html.sort_values(
609
  [col for col in ['Y', 'Y^'] if col in df_html.columns], ascending=ascending
610
  )
611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  if not file:
613
  df_html = df_html.iloc[:31]
614
 
615
+ # Remove repeated info for one-against-N tasks to save visual and physical space
616
+ job = 'Chemical Property'
617
+ unique_entity = 'Unique Entity'
618
+ unique_df = None
619
+ category = None
620
+ columns_unique = None
621
+ if 'X1' in df_html.columns and 'X2' in df_html.columns:
622
+ n_compound = df_html['X1'].nunique()
623
+ n_protein = df_html['X2'].nunique()
624
+
625
+ if n_compound == 1 and n_protein >= 2:
626
+ unique_entity = 'Compound of Interest'
627
+ if any(col in df_html.columns for col in ['Y^', 'Y']):
628
+ job = 'Target Protein Identification'
629
+ category = 'Target Family'
630
+ columns_unique = df_html.columns.isin(['X1', 'ID1', 'Scaffold', 'Compound', 'Scaffold SMILES']
631
+ + list(FILTER_MAP.keys()) + list(SCORE_MAP.keys()))
632
+
633
+ elif n_compound >= 2 and n_protein == 1:
634
+ unique_entity = 'Target of Interest'
635
+ if any(col in df_html.columns for col in ['Y^', 'Y']):
636
+ job = 'Drug Hit Screening'
637
+ category = 'Scaffold SMILES'
638
+ columns_unique = df_html.columns.isin(['X2', 'ID2'])
639
+
640
+ elif 'Y^' in df_html.columns:
641
+ job = 'Interaction Pair Inference'
642
+ if 'Compound' in df_html.columns:
643
+ df_html['Compound'] = df_html['Compound'].swifter.progress_bar(
644
+ desc='Generating compound graph...').apply(
645
+ lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
646
+ if 'Scaffold' in df_html.columns:
647
+ df_html['Scaffold'] = df_html['Scaffold'].swifter.progress_bar(
648
+ desc='Generating scaffold graph...').apply(
649
+ lambda x: PandasTools.PrintAsImageString(x) if not pd.isna(x) else x)
650
+
651
+ df_html.rename(columns=COLUMN_ALIASES, inplace=True)
652
  df_html.index.name = 'Index'
653
+ if 'Target FASTA' in df_html.columns:
654
+ df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
655
+ desc='Processing FASTA...').apply(
656
+ lambda x: wrap_text(x) if not pd.isna(x) else x)
657
+
658
+ if columns_unique is not None:
659
+ unique_df = df_html.loc[:, columns_unique].iloc[[0]]
660
+ df_html = df_html.loc[:, ~columns_unique]
661
+
662
+ num_cols = df_html.select_dtypes('number').columns
663
+ num_col_colors = sns.color_palette('husl', len(num_cols))
664
+ bool_cols = df_html.select_dtypes(bool).columns
665
+ bool_col_colors = {True: 'lightgreen', False: 'lightpink'}
666
 
667
  if not file:
668
  if 'Compound ID' in df_html.columns:
 
671
  df_html.drop(['Target FASTA'], axis=1, inplace=True)
672
  if 'Target FASTA' in df_html.columns:
673
  df_html['Target FASTA'] = df_html['Target FASTA'].swifter.progress_bar(
674
+ desc='Processing FASTA...').apply(
675
+ lambda x: wrap_text(x) if not pd.isna(x) else x)
676
+ if 'Scaffold SMILES' in df_html.columns:
677
+ df_html.drop(['Scaffold SMILES'], axis=1, inplace=True)
678
  styled_df = df_html.style.format(precision=3)
 
 
 
 
 
 
 
 
 
 
679
 
680
+ for i, col in enumerate(num_cols):
681
+ if col not in ['Predicted Binding Affinity', 'Actual Binding Affinity']:
682
+ styled_df = styled_df.background_gradient(
683
+ subset=[col], cmap=sns.light_palette(num_col_colors[i], as_cmap=True))
684
+ else:
685
+ styled_df = styled_df.background_gradient(
686
+ subset=[col], cmap=sns.light_palette(num_col_colors[i], as_cmap=True).reversed())
687
+
688
+ styled_df.applymap(lambda val: f'background-color: {bool_col_colors[val]}', subset=bool_cols)
689
+
690
+ table_html = styled_df.to_html()
691
+ unique_html = ''
692
+ if unique_df is not None:
693
+ unique_html = unique_df.replace('\n', '<br>', regex=True).to_html(escape=False, index=False)
694
+ unique_html = f'<div style="font-family: Courier !important;">{unique_html}</div>'
695
+
696
+ return (f'<div style="font-size: 16px; font-weight: bold;">{job} Report Preview (Top 30 Records)</div>'
697
+ f'{unique_html}'
698
+ f'<div style="overflow:auto; height: 300px; font-family: Courier !important;">{table_html}</div>')
699
+
700
+ else:
701
  bool_formatters = {col: BooleanFormatter() for col in df_html.select_dtypes(bool).columns}
702
+ num_formatters = {col: NumberFormatter(format='0.000') for col in df_html.select_dtypes('floating').columns}
703
  other_formatters = {
704
+ 'Predicted Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
705
+ 'Actual Interaction Probability': {'type': 'progress', 'max': 1.0, 'legend': True},
706
+ 'Compound': HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>'),
707
+ 'Scaffold': HTMLTemplateFormatter(template='<div class="image-zoom-viewer"><%= value %></div>'),
708
  'Target FASTA': {'type': 'textarea', 'width': 60},
709
+ 'Target ID': HTMLTemplateFormatter(
710
+ template='<a href="<% '
711
+ 'if (/^[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$/.test(value)) '
712
+ '{ %>https://www.uniprot.org/uniprotkb/<%= value %><% } '
713
+ 'else { %>https://www.uniprot.org/uniprotkb?query=<%= value %><% } '
714
+ '%>" target="_blank"><%= value %></a>'),
715
+ 'Compound ID': HTMLTemplateFormatter(
716
+ template='<a href="https://pubchem.ncbi.nlm.nih.gov/compound/<%= value %>" '
717
+ 'target="_blank"><%= value %></a>')
718
  }
719
  formatters = {**bool_formatters, **num_formatters, **other_formatters}
720
 
721
  # html = df.to_html(file)
722
  # return html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
 
724
+ report_table = pn.widgets.Tabulator(
725
+ df_html, formatters=formatters,
726
+ frozen_columns=['Index', 'Target ID', 'Compound ID', 'Compound', 'Scaffold'],
727
+ disabled=True, sizing_mode='stretch_both')
 
 
728
 
729
+ for i, col in enumerate(num_cols):
730
+ if col not in ['Predicted Binding Affinity', 'Actual Binding Affinity']:
731
+ if col not in ['Predicted Interaction Probability', 'Actual Interaction Probability']:
732
+ report_table.style.background_gradient(
733
+ subset=df_html.columns == col, cmap=sns.light_palette(num_col_colors[i], as_cmap=True))
734
+ else:
735
+ continue
736
+ else:
737
+ report_table.style.background_gradient(
738
+ subset=df_html.columns == col, cmap=sns.light_palette(num_col_colors[i], as_cmap=True).reversed())
739
+
740
+ pie_charts = {}
741
+ for y in df_html.columns.intersection(['Predicted Interaction Probability', 'Actual Interaction Probability',
742
+ 'Predicted Binding Affinity', 'Actual Binding Affinity']):
743
+ pie_charts[y] = []
744
+ for k in [10, 30, 100]:
745
+ if k < len(df_html):
746
+ pie_charts[y].append(create_pie_chart(df_html, category=category, value=y, top_k=k))
747
+ pie_charts[y].append(create_pie_chart(df_html, category=category, value=y, top_k=len(df_html)))
748
+
749
+ # Remove keys with empty values
750
+ pie_charts = {k: v for k, v in pie_charts.items() if any(v)}
751
+
752
+ pn_css = """
753
+ .tabulator {
754
+ font-family: Courier New !important;
755
+ font-weight: normal !important;
756
+ font-size: 12px !important;
757
+ overflow: visible !important;
758
+ }
759
+
760
+ .tabulator-cell {
761
+ overflow: visible !important;
762
+ }
763
+
764
+ .bk-panel-models-tabulator-DataTabulator {
765
+ overflow: visible !important;
766
+ }
767
+
768
+ .tabulator-cell.tabulator-frozen:hover {
769
+ z-index: 1000 !important;
770
+ }
771
+
772
+ .bk-panel-models-tabulator-DataTabulator:hover {
773
+ z-index: 999 !important;
774
+ }
775
+
776
+ .image-zoom-viewer {
777
+ display: inline-block;
778
+ position: relative;
779
+ overflow: visible; /* Ensures that the scaled SVG isn't clipped */
780
+ }
781
+
782
+ .image-zoom-viewer::after {
783
+ content: "";
784
+ position: absolute;
785
+ top: 0;
786
+ left: 0;
787
+ width: 100%;
788
+ height: 100%;
789
+ pointer-events: none;
790
+ }
791
+
792
+ .image-zoom-viewer:hover::after {
793
+ pointer-events: all;
794
+ }
795
+
796
+ /* When hovering over the container, scale its child (the SVG) */
797
+ .tabulator-cell:hover .image-zoom-viewer svg {
798
+ padding: 3px;
799
+ position: relative; /* Position the SVG relative to the viewport */
800
+ background-color: rgba(250, 250, 250, 0.854);
801
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.618);
802
+ border-radius: 3px;
803
+ transform: scale(4) translate(+38.2%, +38.2%); /* Scale up the SVG */
804
+ transition: transform 0.3s ease;
805
+ pointer-events: none; /* Prevents the SVG from blocking mouse interactions */
806
+ }
807
+
808
+ .image-zoom-viewer svg {
809
+ display: block; /* SVG is a block-level element for proper scaling */
810
+ z-index: 1000;
811
+ }
812
+
813
+ .image-zoom-viewer:hover {
814
+ z-index: 1000;
815
+ }
816
+
817
+ """
818
+
819
+ pn.extension(raw_css=[pn_css])
820
+
821
+ template = pn.template.VanillaTemplate(
822
+ title=f'DeepSEQreen {job} Report',
823
+ sidebar=[],
824
+ favicon='deepseqreen.svg',
825
+ logo='deepseqreen.svg',
826
+ header_background='#F3F5F7',
827
+ header_color='#4372c4',
828
+ busy_indicator=None,
829
+ )
830
 
831
+ info_row = pn.Row()
832
+ if unique_df is not None:
833
+ unique_table = pn.widgets.Tabulator(unique_df, formatters=formatters, show_index=False, disabled=True)
834
+ info_row.append(pn.Column(f'### {unique_entity}', unique_table))
835
+ if pie_charts:
836
+ for score_name, figure_list in pie_charts.items():
837
+ info_row.append(
838
+ pn.Column(f'### {category} by Top {score_name}',
839
+ pn.Tabs(*figure_list, tabs_location='above'))
840
+ # pn.Card(pn.Row(v), title=f'{category} by Top {k}')
841
+ )
842
+ if info_row:
843
+ template.main.append(pn.Card(info_row,
844
+ sizing_mode='stretch_width', title='Summary Statistics', margin=10))
845
+
846
+ template.main.append(
847
+ pn.Card(report_table, title=f'{job} Results', # width=1200,
848
+ margin=10)
849
+ )
850
 
851
+ template.save(file, resources=INLINE)
852
+ return file
 
853
 
 
854
 
855
+ def create_pie_chart(df, category, value, top_k):
856
+ if category not in df or value not in df:
857
+ return
858
+ top_k_df = df.nlargest(top_k, value)
859
+ category_counts = top_k_df[category].value_counts()
860
+ data = pd.DataFrame({category: category_counts.index, 'value': category_counts.values})
861
 
862
+ data['proportion'] = data['value'] / data['value'].sum()
863
+ # Merge rows with proportion less than 0.1% into one row
864
+ mask = data['proportion'] <= 0.001
865
+ merged_row = data[mask].sum()
866
+ merged_row[category] = 'Other'
867
+ data = pd.concat([data[~mask], pd.DataFrame(merged_row).T])
868
+ data['angle'] = data['proportion'] * 2 * pi
869
+ data['color'] = (Category20c_20 * (len(data) // 20 + 1))[:len(data)]
870
+
871
+ tooltips = [
872
+ (f"{category}", f"@{{{category}}}"),
873
+ ("Count", "@value"),
874
+ ("Percentage", "@proportion{0.0%}")
875
+ ]
876
+
877
+ if category == 'Scaffold SMILES':
878
+ data = data.merge(top_k_df[['Scaffold SMILES', 'Scaffold']].drop_duplicates(), how='left',
879
+ left_on='Scaffold SMILES', right_on='Scaffold SMILES')
880
+ tooltips.append(("Scaffold", "<div>@{Scaffold}{safe}</div>"))
881
+ p = figure(height=256, name=f"Top {top_k}" if top_k < len(df) else 'All',
882
+ toolbar_location=None, tools="hover", tooltips=tooltips, x_range=(-0.5, 0.5),
883
+ sizing_mode="scale_height")
884
+ p.axis.axis_label = None
885
+ p.axis.visible = False
886
+ p.grid.grid_line_color = None
887
+ p.outline_line_width = 0
888
+ p.min_border = 0
889
+ p.min_border_right = 0
890
+ p.margin = 0
891
+
892
+ p.add_layout(Legend(padding=0, margin=0), 'right')
893
+ p.wedge(x=0, y=1, radius=0.3,
894
+ start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
895
+ line_color="white", fill_color='color', legend_field=category, source=data)
896
+
897
+ p.legend.label_text_font_size = "8pt"
898
+ p.legend.margin = 0
899
+ p.legend.padding = 0
900
+
901
+ # Limit the number of legend items to 20 and add "..." if there are more than 30 items
902
+ if len(p.legend.items) > 20:
903
+ p.legend.items = p.legend.items[:21]
904
+ p.legend.items.append(LegendItem(label="..."))
905
+
906
+ return p
907
+
908
+
909
+ def submit_report(df, score_list, filter_list, task, progress=gr.Progress(track_tqdm=True)):
910
  df_report = df.copy()
911
  try:
912
  for filter_name in filter_list:
913
  df_report[filter_name] = df_report['Compound'].swifter.progress_bar(
914
  desc=f"Calculating {filter_name}").apply(
915
+ lambda x: FILTER_MAP[filter_name](x) if not pd.isna(x) else x)
916
 
917
  for score_name in score_list:
918
  df_report[score_name] = df_report['Compound'].swifter.progress_bar(
919
  desc=f"Calculating {score_name}").apply(
920
+ lambda x: SCORE_MAP[score_name](x) if not pd.isna(x) else x)
921
 
922
  # pie_chart = None
923
  # value = None
 
932
  # elif df['X2'].nunique() > 1 >= df['X1'].nunique():
933
  # pie_chart = create_pie_chart(df, category='Target family', value=value, top_k=100)
934
 
935
+ return (create_html_report(df_report, file=None, task=task), df_report,
936
+ gr.File(visible=False), gr.File(visible=False))
937
 
938
  except Exception as e:
939
  gr.Warning(f'Failed to report results due to error: {str(e)}')
940
+ return None, None, None, None
941
+
942
 
943
  # def check_job_status(job_id):
944
  # job_lock = DATA_PATH / f"{job_id}.lock"
 
1050
  screen_flag = gr.State(value=False)
1051
  identify_flag = gr.State(value=False)
1052
  infer_flag = gr.State(value=False)
1053
+ report_upload_flag = gr.State(value=False)
1054
 
1055
  with gr.Tabs() as tabs:
1056
  with gr.TabItem(label='Drug Hit Screening', id=0):
 
1123
 
1124
  with gr.Row():
1125
  with gr.Column():
1126
+ target_family_detect_btn = gr.Button(value='OR Let Us Auto-Detect for You',
1127
+ variant='primary')
1128
 
1129
  with gr.Row():
1130
  with gr.Column():
 
1152
  "while affinity prediction directly estimates their binding strength measured using "
1153
  "IC50."
1154
  )
1155
+ drug_screen_task = gr.Dropdown(
1156
+ list(TASK_MAP.keys()),
1157
+ label='Step 4. Select the Prediction Task You Want to Conduct',
1158
+ value='Compound-protein interaction')
1159
 
1160
  with gr.Row():
1161
  with gr.Column():
 
1163
  "Select your preferred model, or click Recommend for the best-performing model based "
1164
  "on the selected task, family, and whether the target was trained. "
1165
  "Please refer to documentation for detailed benchamrk results."
1166
+ )
1167
  drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1168
  label='Step 5. Select a Preset Model')
1169
+ screen_preset_recommend_btn = gr.Button(
1170
+ value='OR Let Us Recommend for You', variant='primary')
1171
  with gr.Row():
1172
  with gr.Column():
1173
  drug_screen_email = gr.Textbox(
 
1258
  "while affinity prediction directly estimates their binding strength measured using "
1259
  "IC50."
1260
  )
1261
+ target_identify_task = gr.Dropdown(
1262
+ list(TASK_MAP.keys()),
1263
+ label='Step 4. Select the Prediction Task You Want to Conduct',
1264
+ value='Compound-protein interaction')
1265
 
1266
  with gr.Row():
1267
  with gr.Column():
 
1269
  "Select your preferred model, or click Recommend for the best-performing model based "
1270
  "on the selected task, family, and whether the compound was trained. "
1271
  "Please refer to documentation for detailed benchamrk results."
1272
+ )
1273
  target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()),
1274
  label='Step 5. Select a Preset Model')
1275
  identify_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
 
1284
 
1285
  with gr.Row(visible=True):
1286
  # target_identify_clr_btn = gr.ClearButton(size='lg')
1287
+ target_identify_btn = gr.Button(value='SUBMIT THE IDENTIFICATION JOB', variant='primary',
1288
+ size='lg')
1289
 
1290
  identify_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
1291
  identify_waiting = gr.Markdown(f"Your job is running... It might take a few minutes."
 
1364
  "while affinity prediction directly estimates their binding strength "
1365
  "measured using IC50."
1366
  )
1367
+ pair_infer_task = gr.Dropdown(
1368
+ list(TASK_MAP.keys()),
1369
+ label='Step 3. Select the Prediction Task You Want to Conduct',
1370
+ value='Compound-protein interaction')
1371
 
1372
  with gr.Row():
1373
  with gr.Column():
 
1402
  To compute chemical properties for the predictions of drug hit screening,
1403
  target protein identification, and interaction pair inference.
1404
 
1405
+ You may also upload your own dataset using a CSV file containing
1406
+ one required column `X1` for compound SMILES.
1407
 
1408
  The page shows only a preview report displaying at most 30 records
1409
  (with top predicted CPI/CPA if reporting results from a prediction job).
1410
 
1411
+ Please first `**Preview**` the report, then `**Generate**` and download a CSV report
1412
+ or an interactive HTML report below if you wish to access the full report.
 
1413
  ''')
1414
  with gr.Row():
1415
+ with gr.Column():
1416
+ file_for_report = gr.File(interactive=True, type='filepath')
1417
+ report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
1418
+ label='Specify the Task for the Labels in the Upload Dataset')
1419
  raw_df = gr.State(value=pd.DataFrame())
1420
  report_df = gr.State(value=pd.DataFrame())
1421
  scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
 
1423
 
1424
  with gr.Row():
1425
  # clear_btn = gr.ClearButton(size='lg')
1426
+ analyze_btn = gr.Button('Preview Top 30 Records', variant='primary', size='lg',
1427
+ interactive=False)
1428
 
1429
  with gr.Row():
1430
  with gr.Column(scale=3):
 
1434
  with gr.Row():
1435
  with gr.Column():
1436
  csv_generate = gr.Button(value='Generate CSV Report',
1437
+ interactive=False, variant='primary')
1438
  csv_download_file = gr.File(label='Download CSV Report', visible=False)
1439
  with gr.Column():
1440
  html_generate = gr.Button(value='Generate HTML Report',
1441
+ interactive=False, variant='primary')
1442
  html_download_file = gr.File(label='Download HTML Report', visible=False)
1443
 
1444
 
 
1553
 
1554
  example_fasta.click(fn=example_fill, inputs=target_input_type, outputs=[
1555
  target_id, target_gene, target_organism, target_fasta], show_progress=False)
1556
+
1557
+
1558
  # example_uniprot.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1559
  # example_gene.click(fn=example_fill, inputs=target_input_type, outputs=target_fasta, show_progress=False)
1560
 
 
1882
  )
1883
 
1884
  # TODO background job from these 3 pipelines to update file_for_report
1885
+ def inquire_task(df, upload_flag):
1886
+ if upload_flag:
1887
+ if 'Y' in df.columns:
1888
+ label = 'actual CPI/CPA labels (`Y`)'
1889
+ elif 'Y^' in df.columns:
1890
+ label = 'predicted CPI/CPA labels (`Y^`)'
1891
+ else:
1892
+ return {analyze_btn: gr.Button(interactive=True),
1893
+ csv_generate: gr.Button(interactive=True),
1894
+ html_generate: gr.Button(interactive=True)}
1895
+
1896
+ return {report_task: gr.Dropdown(visible=True,
1897
+ info=f'Found {label} in your uploaded dataset. '
1898
+ 'Is it compound-target interaction or binding affinity?'),
1899
+ html_report: '',
1900
+ analyze_btn: gr.Button(interactive=False),
1901
+ csv_generate: gr.Button(interactive=False),
1902
+ html_generate: gr.Button(interactive=False)}
1903
+ else:
1904
+ return {report_task: gr.Dropdown(visible=False)}
1905
+
1906
 
1907
  file_for_report.change(fn=update_df, inputs=file_for_report, outputs=[
1908
+ html_report, raw_df, report_df, analyze_btn]).success(
1909
+ fn=lambda: [gr.Button(interactive=False)]*2 + [gr.File(visible=False)]*2 + [gr.Dropdown(visible=False)],
1910
+ outputs=[csv_generate, html_generate, csv_download_file, html_download_file, report_task]
1911
+ ).then(
1912
+ fn=inquire_task, inputs=[raw_df, report_upload_flag],
1913
+ outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate]
1914
+ )
1915
+ file_for_report.clear(fn=lambda: gr.Dropdown(visible=False), outputs=report_task)
1916
+ file_for_report.upload(
1917
+ fn=lambda: True, outputs=report_upload_flag
1918
+ )
1919
+
1920
+ analyze_btn.click(fn=submit_report, inputs=[raw_df, scores, filters, report_task], outputs=[
1921
+ html_report, report_df, csv_download_file, html_download_file
1922
+ ]).success(fn=lambda: [gr.Button(interactive=True)] * 2,
1923
+ outputs=[csv_generate, html_generate])
1924
+
1925
+ report_task.select(fn=lambda: gr.Button(interactive=True),
1926
+ outputs=analyze_btn)
1927
+
1928
+
1929
+ def create_csv_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
1930
  try:
1931
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1932
  filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
1933
  df.drop(labels=['Compound', 'Scaffold'], axis=1).to_csv(filename, index=False)
1934
 
1935
+ return gr.File(filename)
1936
  except Exception as e:
1937
  gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
1938
+ return None
1939
+
1940
 
1941
+ def create_html_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
1942
  try:
1943
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
1944
  filename = f"reports/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
1945
  create_html_report(df, filename)
1946
+ return gr.File(filename, visible=True)
1947
  except Exception as e:
1948
  gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
1949
+ return None
1950
+
1951
 
1952
  html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
1953
+ csv_generate.click(
1954
+ lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[csv_generate, csv_download_file],
1955
+ ).then(fn=create_csv_report_file, inputs=[report_df, file_for_report],
1956
+ outputs=csv_download_file, show_progress='full')
1957
+ html_generate.click(
1958
+ lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
1959
+ ).then(fn=create_html_report_file, inputs=[report_df, file_for_report],
1960
+ outputs=html_download_file, show_progress='full')
1961
 
1962
  # screen_waiting.change(fn=check_job_status, inputs=run_state, outputs=[pair_waiting, tabs, file_for_report],
1963
  # every=5)