Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -1491,13 +1491,13 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1491 |
drug_library_upload_btn = gr.UploadButton(
|
1492 |
label='OR Upload Your Own Library', variant='primary')
|
1493 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
1494 |
-
|
1495 |
-
|
1496 |
-
|
1497 |
-
|
1498 |
-
|
1499 |
-
|
1500 |
-
|
1501 |
with gr.Row():
|
1502 |
with gr.Column():
|
1503 |
drug_screen_email = gr.Textbox(
|
@@ -1507,10 +1507,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1507 |
)
|
1508 |
|
1509 |
with gr.Row(visible=True):
|
1510 |
-
with gr.
|
1511 |
drug_screen_clr_btn = gr.ClearButton(size='lg')
|
1512 |
drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
|
1513 |
-
|
1514 |
|
1515 |
screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
1516 |
|
@@ -1598,13 +1598,12 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1598 |
label='OR Upload Your Own Library', variant='primary')
|
1599 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
1600 |
|
1601 |
-
|
1602 |
-
|
1603 |
-
|
1604 |
-
|
1605 |
-
|
1606 |
-
|
1607 |
-
)
|
1608 |
with gr.Row():
|
1609 |
with gr.Column():
|
1610 |
target_identify_email = gr.Textbox(
|
@@ -1708,6 +1707,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1708 |
label='Step 4. Select a Preset Model')
|
1709 |
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
1710 |
# variant='primary')
|
|
|
1711 |
|
1712 |
with gr.Row():
|
1713 |
pair_infer_email = gr.Textbox(
|
@@ -1742,7 +1742,8 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1742 |
with gr.Row():
|
1743 |
with gr.Column(scale=1):
|
1744 |
file_for_report = gr.File(interactive=True, type='filepath')
|
1745 |
-
report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False,
|
|
|
1746 |
label='Specify the Task Labels in the Uploaded Dataset')
|
1747 |
with gr.Column(scale=2):
|
1748 |
with gr.Row():
|
@@ -1908,9 +1909,10 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1908 |
|
1909 |
alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
|
1910 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1911 |
-
|
|
|
1912 |
info=f"Reason: Best sequence identity ({row['score']}) "
|
1913 |
-
f"with {row['ID2']} from family {
|
1914 |
except Exception as e:
|
1915 |
gr.Warning("Failed to detect the protein family due to error: " + str(e))
|
1916 |
|
@@ -2044,7 +2046,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2044 |
], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
|
2045 |
|
2046 |
|
2047 |
-
def identify_recommend_model(smiles, task):
|
2048 |
task = TASK_MAP[task]
|
2049 |
score = TASK_METRIC_MAP[task]
|
2050 |
benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
|
@@ -2052,15 +2054,24 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2052 |
if not smiles:
|
2053 |
gr.Warning('Please enter a valid SMILES for model recommendation.')
|
2054 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2055 |
|
2056 |
-
seen_compounds = pd.read_csv(
|
2057 |
-
f'data/benchmarks/seen_compounds/all_families_full_{task.lower()}_random_split.csv')
|
2058 |
if rdkit_canonicalize(smiles) in seen_compounds['X1'].values:
|
2059 |
scenario = "Seen Compound"
|
2060 |
else:
|
2061 |
scenario = "Unseen Compound"
|
2062 |
|
2063 |
-
filtered_df = benchmark_df[(benchmark_df['Family'] ==
|
2064 |
& (benchmark_df['Scenario'] == scenario)
|
2065 |
& (benchmark_df['Type'] == 'General')]
|
2066 |
|
@@ -2072,7 +2083,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2072 |
|
2073 |
|
2074 |
identify_preset_recommend_btn.click(fn=identify_recommend_model,
|
2075 |
-
inputs=[compound_smiles, target_identify_task],
|
2076 |
outputs=target_identify_preset, show_progress='hidden')
|
2077 |
|
2078 |
|
@@ -2304,22 +2315,36 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2304 |
drug_screen_clr_btn.click(
|
2305 |
lambda: ['General'] + [[]] + [None] * 5,
|
2306 |
outputs=[drug_screen_target_family, drug_screen_opts,
|
2307 |
-
target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email]
|
|
|
|
|
2308 |
|
2309 |
target_identify_clr_btn.click(
|
2310 |
lambda: ['General'] + [[]] + [None] * 5,
|
2311 |
outputs=[target_identify_target_family, target_identify_opts,
|
2312 |
-
compound_smiles, target_identify_preset, target_library, target_library_upload, target_identify_email]
|
|
|
|
|
2313 |
|
2314 |
pair_infer_clr_btn.click(
|
2315 |
lambda: ['General'] + [None] * 5,
|
2316 |
outputs=[pair_infer_target_family,
|
2317 |
-
infer_pair, infer_drug, infer_target, pair_infer_preset, pair_infer_email]
|
|
|
|
|
2318 |
|
2319 |
report_clr_btn.click(
|
2320 |
-
lambda: [[]] * 3 + [None] * 5
|
2321 |
-
|
2322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2323 |
|
2324 |
|
2325 |
def update_preset(family, preset):
|
@@ -2405,7 +2430,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2405 |
pair_infer_click.success(
|
2406 |
fn=submit_predict,
|
2407 |
inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
|
2408 |
-
pair_infer_target_family, run_state, ], # , pair_infer_email],
|
2409 |
outputs=[run_state, ]
|
2410 |
)
|
2411 |
|
@@ -2448,7 +2473,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2448 |
report_df_change = file_for_report.change(
|
2449 |
fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
|
2450 |
concurrency_limit=100,
|
2451 |
-
).
|
2452 |
fn=lambda: [gr.Button(interactive=True)] * 2,
|
2453 |
outputs=[csv_generate, html_generate],
|
2454 |
)
|
@@ -2457,7 +2482,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2457 |
fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
|
2458 |
cancels=[report_df_change],
|
2459 |
concurrency_limit=100,
|
2460 |
-
).
|
2461 |
fn=inquire_task, inputs=[raw_df],
|
2462 |
outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate],
|
2463 |
)
|
@@ -2465,7 +2490,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2465 |
file_for_report.clear(
|
2466 |
fn=lambda: [gr.Button(interactive=False)] * 3 +
|
2467 |
[gr.File(visible=False, value=None)] * 2 +
|
2468 |
-
[gr.Dropdown(visible=False, value=None),
|
2469 |
cancels=[report_df_change],
|
2470 |
outputs=[
|
2471 |
csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
|
@@ -2481,29 +2506,26 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2481 |
concurrency_limit=100,
|
2482 |
)
|
2483 |
|
2484 |
-
report_task.select(fn=lambda: gr.Button(interactive=True),
|
2485 |
-
outputs=analyze_btn)
|
2486 |
-
|
2487 |
|
2488 |
def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
|
2489 |
csv_sep_map = {
|
2490 |
'Comma': ',',
|
2491 |
'Tab': '\t',
|
2492 |
}
|
2493 |
-
|
2494 |
if isinstance(task, str):
|
2495 |
if task == 'Compound-Protein Interaction':
|
2496 |
-
|
2497 |
elif task == 'Compound-Protein Binding Affinity':
|
2498 |
-
|
2499 |
try:
|
2500 |
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
2501 |
filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
|
2502 |
-
df.rename(columns={'Y^':
|
2503 |
labels=['Compound', 'Scaffold'], axis=1
|
2504 |
).to_csv(filename, index=False, na_rep='', sep=csv_sep_map[sep])
|
2505 |
|
2506 |
-
return gr.File(filename)
|
2507 |
except Exception as e:
|
2508 |
gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
|
2509 |
return None
|
@@ -2523,11 +2545,11 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2523 |
# html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
|
2524 |
|
2525 |
csv_generate.click(
|
2526 |
-
lambda:
|
2527 |
).then(fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
|
2528 |
outputs=csv_download_file, show_progress='full')
|
2529 |
html_generate.click(
|
2530 |
-
lambda:
|
2531 |
).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
|
2532 |
outputs=html_download_file, show_progress='full')
|
2533 |
|
|
|
1491 |
drug_library_upload_btn = gr.UploadButton(
|
1492 |
label='OR Upload Your Own Library', variant='primary')
|
1493 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
1494 |
+
|
1495 |
+
drug_screen_opts = gr.CheckboxGroup(
|
1496 |
+
['Include Max. Tanimoto Similarity'],
|
1497 |
+
label='Step 6. Select Additional Options',
|
1498 |
+
info="Calculating the maximum Tanimoto similarity of the library compounds to the "
|
1499 |
+
"training dataset is an experimental feature and may take a considerable amount of time."
|
1500 |
+
)
|
1501 |
with gr.Row():
|
1502 |
with gr.Column():
|
1503 |
drug_screen_email = gr.Textbox(
|
|
|
1507 |
)
|
1508 |
|
1509 |
with gr.Row(visible=True):
|
1510 |
+
with gr.Row():
|
1511 |
drug_screen_clr_btn = gr.ClearButton(size='lg')
|
1512 |
drug_screen_btn = gr.Button(value='SUBMIT THE SCREENING JOB', variant='primary', size='lg')
|
1513 |
+
# TODO Modify the pd df directly with df['X2'] = target
|
1514 |
|
1515 |
screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
1516 |
|
|
|
1598 |
label='OR Upload Your Own Library', variant='primary')
|
1599 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
1600 |
|
1601 |
+
target_identify_opts = gr.CheckboxGroup(
|
1602 |
+
['Include Max. Sequence Identity'],
|
1603 |
+
label='Step 6. Select Additional Options',
|
1604 |
+
info="Calculating the maximum sequence identity of the library protein to the "
|
1605 |
+
"training dataset is an experimental feature and may take a considerable amount of time."
|
1606 |
+
)
|
|
|
1607 |
with gr.Row():
|
1608 |
with gr.Column():
|
1609 |
target_identify_email = gr.Textbox(
|
|
|
1707 |
label='Step 4. Select a Preset Model')
|
1708 |
# infer_preset_recommend_btn = gr.Button(value='OR Let Us Recommend for You',
|
1709 |
# variant='primary')
|
1710 |
+
pair_infer_opts = gr.CheckboxGroup(visible=False)
|
1711 |
|
1712 |
with gr.Row():
|
1713 |
pair_infer_email = gr.Textbox(
|
|
|
1742 |
with gr.Row():
|
1743 |
with gr.Column(scale=1):
|
1744 |
file_for_report = gr.File(interactive=True, type='filepath')
|
1745 |
+
report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False,
|
1746 |
+
value='Compound-Protein Interaction',
|
1747 |
label='Specify the Task Labels in the Uploaded Dataset')
|
1748 |
with gr.Column(scale=2):
|
1749 |
with gr.Row():
|
|
|
1909 |
|
1910 |
alignment_df['score'] = alignment_df['X2'].parallel_apply(align_score)
|
1911 |
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1912 |
+
family = row['Target Family'].title()
|
1913 |
+
return gr.Dropdown(value=family,
|
1914 |
info=f"Reason: Best sequence identity ({row['score']}) "
|
1915 |
+
f"with {row['ID2']} from family {family}")
|
1916 |
except Exception as e:
|
1917 |
gr.Warning("Failed to detect the protein family due to error: " + str(e))
|
1918 |
|
|
|
2046 |
], inputs=target_library_upload_btn, outputs=[target_library_upload, target_library])
|
2047 |
|
2048 |
|
2049 |
+
def identify_recommend_model(smiles, family, task):
|
2050 |
task = TASK_MAP[task]
|
2051 |
score = TASK_METRIC_MAP[task]
|
2052 |
benchmark_df = pd.read_csv(f'data/benchmarks/{task}_test_metrics.csv')
|
|
|
2054 |
if not smiles:
|
2055 |
gr.Warning('Please enter a valid SMILES for model recommendation.')
|
2056 |
return None
|
2057 |
+
if family == 'Family-Specific Auto-Recommendation':
|
2058 |
+
return None
|
2059 |
+
|
2060 |
+
if family == 'General':
|
2061 |
+
seen_compounds = pd.read_csv(
|
2062 |
+
f'data/benchmarks/seen_compounds/all_families_full_{task.lower()}_random_split.csv')
|
2063 |
+
family = 'All Families'
|
2064 |
+
|
2065 |
+
else:
|
2066 |
+
seen_compounds = pd.read_csv(
|
2067 |
+
f'data/benchmarks/seen_compounds/{TARGET_FAMILY_MAP[family.title()]}_{task.lower()}_random_split.csv')
|
2068 |
|
|
|
|
|
2069 |
if rdkit_canonicalize(smiles) in seen_compounds['X1'].values:
|
2070 |
scenario = "Seen Compound"
|
2071 |
else:
|
2072 |
scenario = "Unseen Compound"
|
2073 |
|
2074 |
+
filtered_df = benchmark_df[(benchmark_df['Family'] == family)
|
2075 |
& (benchmark_df['Scenario'] == scenario)
|
2076 |
& (benchmark_df['Type'] == 'General')]
|
2077 |
|
|
|
2083 |
|
2084 |
|
2085 |
identify_preset_recommend_btn.click(fn=identify_recommend_model,
|
2086 |
+
inputs=[compound_smiles, target_identify_target_family, target_identify_task],
|
2087 |
outputs=target_identify_preset, show_progress='hidden')
|
2088 |
|
2089 |
|
|
|
2315 |
drug_screen_clr_btn.click(
|
2316 |
lambda: ['General'] + [[]] + [None] * 5,
|
2317 |
outputs=[drug_screen_target_family, drug_screen_opts,
|
2318 |
+
target_fasta, drug_screen_preset, drug_library, drug_library_upload, drug_screen_email],
|
2319 |
+
show_progress='hidden'
|
2320 |
+
)
|
2321 |
|
2322 |
target_identify_clr_btn.click(
|
2323 |
lambda: ['General'] + [[]] + [None] * 5,
|
2324 |
outputs=[target_identify_target_family, target_identify_opts,
|
2325 |
+
compound_smiles, target_identify_preset, target_library, target_library_upload, target_identify_email],
|
2326 |
+
show_progress='hidden'
|
2327 |
+
)
|
2328 |
|
2329 |
pair_infer_clr_btn.click(
|
2330 |
lambda: ['General'] + [None] * 5,
|
2331 |
outputs=[pair_infer_target_family,
|
2332 |
+
infer_pair, infer_drug, infer_target, pair_infer_preset, pair_infer_email],
|
2333 |
+
show_progress='hidden'
|
2334 |
+
)
|
2335 |
|
2336 |
report_clr_btn.click(
|
2337 |
+
lambda: [[]] * 3 + [None] * 5 +
|
2338 |
+
[gr.Button(interactive=False)] * 3 +
|
2339 |
+
[gr.File(visible=False, value=None)] * 2 +
|
2340 |
+
[gr.Dropdown(visible=False, value=None), ''],
|
2341 |
+
outputs=[
|
2342 |
+
scores, filters, html_opts,
|
2343 |
+
file_for_report, raw_df, report_df,
|
2344 |
+
csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
|
2345 |
+
],
|
2346 |
+
show_progress='hidden'
|
2347 |
+
)
|
2348 |
|
2349 |
|
2350 |
def update_preset(family, preset):
|
|
|
2430 |
pair_infer_click.success(
|
2431 |
fn=submit_predict,
|
2432 |
inputs=[infer_data_for_predict, pair_infer_task, pair_infer_preset,
|
2433 |
+
pair_infer_target_family, pair_infer_opts, run_state, ], # , pair_infer_email],
|
2434 |
outputs=[run_state, ]
|
2435 |
)
|
2436 |
|
|
|
2473 |
report_df_change = file_for_report.change(
|
2474 |
fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
|
2475 |
concurrency_limit=100,
|
2476 |
+
).success(
|
2477 |
fn=lambda: [gr.Button(interactive=True)] * 2,
|
2478 |
outputs=[csv_generate, html_generate],
|
2479 |
)
|
|
|
2482 |
fn=update_df, inputs=file_for_report, outputs=[html_report, raw_df, report_df, analyze_btn, report_task],
|
2483 |
cancels=[report_df_change],
|
2484 |
concurrency_limit=100,
|
2485 |
+
).success(
|
2486 |
fn=inquire_task, inputs=[raw_df],
|
2487 |
outputs=[report_task, html_report, analyze_btn, csv_generate, html_generate],
|
2488 |
)
|
|
|
2490 |
file_for_report.clear(
|
2491 |
fn=lambda: [gr.Button(interactive=False)] * 3 +
|
2492 |
[gr.File(visible=False, value=None)] * 2 +
|
2493 |
+
[gr.Dropdown(visible=False, value=None), ''],
|
2494 |
cancels=[report_df_change],
|
2495 |
outputs=[
|
2496 |
csv_generate, html_generate, analyze_btn, csv_download_file, html_download_file, report_task, html_report
|
|
|
2506 |
concurrency_limit=100,
|
2507 |
)
|
2508 |
|
|
|
|
|
|
|
2509 |
|
2510 |
def create_csv_report_file(df, file_report, task, sep, progress=gr.Progress(track_tqdm=True)):
|
2511 |
csv_sep_map = {
|
2512 |
'Comma': ',',
|
2513 |
'Tab': '\t',
|
2514 |
}
|
2515 |
+
y_colname = 'Y^'
|
2516 |
if isinstance(task, str):
|
2517 |
if task == 'Compound-Protein Interaction':
|
2518 |
+
y_colname = 'Y^_prob'
|
2519 |
elif task == 'Compound-Protein Binding Affinity':
|
2520 |
+
y_colname = 'Y^_pIC50'
|
2521 |
try:
|
2522 |
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
2523 |
filename = f"{SERVER_DATA_DIR}/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.csv"
|
2524 |
+
df.rename(columns={'Y^': y_colname}).drop(
|
2525 |
labels=['Compound', 'Scaffold'], axis=1
|
2526 |
).to_csv(filename, index=False, na_rep='', sep=csv_sep_map[sep])
|
2527 |
|
2528 |
+
return gr.File(filename, visible=True)
|
2529 |
except Exception as e:
|
2530 |
gr.Warning(f"Failed to generate CSV due to error: {str(e)}")
|
2531 |
return None
|
|
|
2545 |
# html_report.change(lambda: [gr.Button(visible=True)] * 2, outputs=[csv_generate, html_generate])
|
2546 |
|
2547 |
csv_generate.click(
|
2548 |
+
lambda: gr.File(visible=True), outputs=csv_download_file,
|
2549 |
).then(fn=create_csv_report_file, inputs=[report_df, file_for_report, report_task, csv_sep],
|
2550 |
outputs=csv_download_file, show_progress='full')
|
2551 |
html_generate.click(
|
2552 |
+
lambda: gr.File(visible=True), outputs=html_download_file,
|
2553 |
).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task, html_opts],
|
2554 |
outputs=html_download_file, show_progress='full')
|
2555 |
|