Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload app.py
Browse files
app.py
CHANGED
@@ -60,7 +60,7 @@ SESSION.mount('https://', ADAPTER)
|
|
60 |
|
61 |
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
62 |
|
63 |
-
CUSTOM_DATASET_MAX_LEN =
|
64 |
|
65 |
CSS = """
|
66 |
.help-tip {
|
@@ -353,7 +353,7 @@ TASK_MAP = {
|
|
353 |
|
354 |
PRESET_MAP = {
|
355 |
'DeepDTA': 'deep_dta',
|
356 |
-
'DeepConvDTI
|
357 |
'GraphDTA': 'graph_dta',
|
358 |
'MGraphDTA': 'm_graph_dta',
|
359 |
'HyperAttentionDTI': 'hyper_attention_dti',
|
@@ -403,12 +403,12 @@ def validate_columns(df, mandatory_cols):
|
|
403 |
|
404 |
|
405 |
def process_target_fasta(sequence):
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
record = SeqIO.parse(io.StringIO(sequence), "fasta")[0]
|
411 |
-
return str(record.seq)
|
412 |
|
413 |
|
414 |
def send_email(receiver, msg):
|
@@ -749,7 +749,7 @@ def process_drug_library_upload(library_upload):
|
|
749 |
else:
|
750 |
raise gr.Error('Currently only CSV and SDF files are supported as compound libraries.')
|
751 |
validate_columns(screen_df, ['X1'])
|
752 |
-
return
|
753 |
|
754 |
|
755 |
def target_library_from_fasta(fasta_path):
|
@@ -783,7 +783,7 @@ theme = gr.themes.Base(spacing_size="sm", text_size='md').set(
|
|
783 |
code_background_fill='white',
|
784 |
)
|
785 |
|
786 |
-
with
|
787 |
run_state = gr.State(value=False)
|
788 |
screen_flag = gr.State(value=False)
|
789 |
identify_flag = gr.State(value=False)
|
@@ -802,18 +802,17 @@ To predict interactions/binding affinities of a single target against a library
|
|
802 |
with gr.Row():
|
803 |
with gr.Column():
|
804 |
HelpTip(
|
805 |
-
"
|
806 |
-
"
|
807 |
-
"
|
808 |
-
"only the first one will be used."
|
809 |
)
|
810 |
with gr.Row():
|
811 |
target_input_type = gr.Dropdown(
|
812 |
-
label='Target Input Type',
|
813 |
choices=['Sequence', 'UniProt ID', 'Gene symbol'],
|
814 |
info='Enter (paste) a FASTA string below manually or upload a FASTA file.',
|
815 |
value='Sequence',
|
816 |
-
scale=
|
817 |
)
|
818 |
target_id = gr.Textbox(show_label=False, visible=False,
|
819 |
interactive=True, scale=4,
|
@@ -823,19 +822,9 @@ To predict interactions/binding affinities of a single target against a library
|
|
823 |
interactive=True, scale=4,
|
824 |
info='Query a sequence on UniProt with a gene symbol.')
|
825 |
target_organism = gr.Textbox(
|
826 |
-
info='Organism
|
827 |
-
placeholder='
|
828 |
visible=False, interactive=True, scale=4, )
|
829 |
-
with gr.Column():
|
830 |
-
HelpTip(
|
831 |
-
"Identify the protein family by conducting sequence alignment. "
|
832 |
-
"You may select General if you find the alignment score unsatisfactory."
|
833 |
-
)
|
834 |
-
drug_screen_target_family = gr.Dropdown(
|
835 |
-
choices=list(TARGET_FAMILY_MAP.keys()),
|
836 |
-
value='General',
|
837 |
-
label='Select Input Protein Family (Optional)', interactive=True)
|
838 |
-
# with gr.Column(scale=1, min_width=24):
|
839 |
|
840 |
with gr.Row():
|
841 |
with gr.Column():
|
@@ -844,14 +833,39 @@ To predict interactions/binding affinities of a single target against a library
|
|
844 |
size='lg')
|
845 |
target_query_btn = gr.Button(value='Query the sequence', variant='primary',
|
846 |
visible=False)
|
847 |
-
target_family_detect_btn = gr.Button(value='Auto-detect', variant='primary')
|
848 |
|
849 |
target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
|
|
|
|
|
850 |
example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
|
|
|
|
|
|
|
851 |
|
852 |
with gr.Row():
|
853 |
with gr.Column():
|
854 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
855 |
choices=list(DRUG_LIBRARY_MAP.keys()))
|
856 |
with gr.Row():
|
857 |
gr.File(label='Example SDF compound library',
|
@@ -861,24 +875,33 @@ To predict interactions/binding affinities of a single target against a library
|
|
861 |
drug_library_upload_btn = gr.UploadButton(
|
862 |
label='Upload a custom library', variant='primary')
|
863 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
864 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
865 |
value='Compound-protein interaction')
|
|
|
|
|
866 |
with gr.Column():
|
867 |
-
HelpTip("
|
868 |
-
"
|
869 |
-
|
870 |
-
|
871 |
-
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Select a Preset Model')
|
872 |
screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
873 |
-
|
874 |
-
|
875 |
-
|
876 |
-
|
877 |
-
|
|
|
878 |
|
879 |
with gr.Row(visible=True):
|
|
|
880 |
# drug_screen_clr_btn = gr.ClearButton(size='lg')
|
881 |
-
|
882 |
# TODO Modify the pd df directly with df['X2'] = target
|
883 |
|
884 |
screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
@@ -914,29 +937,38 @@ Example CSV target library:
|
|
914 |
with gr.Row():
|
915 |
with gr.Column():
|
916 |
HelpTip(
|
917 |
-
"
|
918 |
-
|
919 |
-
|
920 |
-
representing your drug of interest.
|
921 |
-
"""
|
922 |
)
|
923 |
compound_type = gr.Dropdown(
|
924 |
-
label='Compound Input Type',
|
925 |
choices=['SMILES', 'SDF'],
|
926 |
-
info='Enter (paste) an SMILES string or upload an
|
927 |
value='SMILES',
|
928 |
interactive=True)
|
929 |
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
|
930 |
|
931 |
-
target_identify_target_family = gr.Dropdown(choices=['General'], value='General',
|
932 |
-
label='Target Protein Family')
|
933 |
-
|
934 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
935 |
example_drug = gr.Button(value='Example: Aspirin', elem_id='example')
|
936 |
|
937 |
with gr.Row():
|
938 |
with gr.Column():
|
939 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
940 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
941 |
with gr.Row():
|
942 |
gr.File(label='Example FASTA target library',
|
@@ -946,22 +978,30 @@ Example CSV target library:
|
|
946 |
target_library_upload_btn = gr.UploadButton(
|
947 |
label='Upload a custom library', variant='primary')
|
948 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
949 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
950 |
value='Compound-protein interaction')
|
951 |
|
|
|
952 |
with gr.Column():
|
953 |
-
HelpTip("
|
954 |
-
"
|
955 |
-
|
956 |
-
|
957 |
-
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Preset')
|
958 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
959 |
|
960 |
-
|
961 |
-
|
962 |
-
|
963 |
-
|
964 |
-
|
|
|
965 |
|
966 |
with gr.Row(visible=True):
|
967 |
# target_identify_clr_btn = gr.ClearButton(size='lg')
|
@@ -1327,6 +1367,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
1327 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
1328 |
else:
|
1329 |
screen_df = process_drug_library_upload(library_upload)
|
|
|
1330 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
1331 |
raise gr.Error(f'The uploaded compound library has more records '
|
1332 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
@@ -1564,3 +1605,5 @@ if __name__ == "__main__":
|
|
1564 |
demo.launch(
|
1565 |
show_api=False,
|
1566 |
)
|
|
|
|
|
|
60 |
|
61 |
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
62 |
|
63 |
+
CUSTOM_DATASET_MAX_LEN = 10000
|
64 |
|
65 |
CSS = """
|
66 |
.help-tip {
|
|
|
353 |
|
354 |
PRESET_MAP = {
|
355 |
'DeepDTA': 'deep_dta',
|
356 |
+
'DeepConvDTI': 'deep_conv_dti',
|
357 |
'GraphDTA': 'graph_dta',
|
358 |
'MGraphDTA': 'm_graph_dta',
|
359 |
'HyperAttentionDTI': 'hyper_attention_dti',
|
|
|
403 |
|
404 |
|
405 |
def process_target_fasta(sequence):
|
406 |
+
lines = sequence.strip().split("\n")
|
407 |
+
if lines[0].startswith(">"):
|
408 |
+
lines = lines[1:]
|
409 |
+
return ''.join(lines).split(">")[0]
|
410 |
+
# record = SeqIO.parse(io.StringIO(sequence), "fasta")[0]
|
411 |
+
# return str(record.seq)
|
412 |
|
413 |
|
414 |
def send_email(receiver, msg):
|
|
|
749 |
else:
|
750 |
raise gr.Error('Currently only CSV and SDF files are supported as compound libraries.')
|
751 |
validate_columns(screen_df, ['X1'])
|
752 |
+
return screen_df
|
753 |
|
754 |
|
755 |
def target_library_from_fasta(fasta_path):
|
|
|
783 |
code_background_fill='white',
|
784 |
)
|
785 |
|
786 |
+
with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS) as demo:
|
787 |
run_state = gr.State(value=False)
|
788 |
screen_flag = gr.State(value=False)
|
789 |
identify_flag = gr.State(value=False)
|
|
|
802 |
with gr.Row():
|
803 |
with gr.Column():
|
804 |
HelpTip(
|
805 |
+
"Enter (paste) a amino acid sequence below manually or upload a FASTA file."
|
806 |
+
"If multiple entities are in the FASTA, only the first will be used."
|
807 |
+
"Alternatively, enter a Uniprot ID or gene symbol with organism and click Query for the sequence."
|
|
|
808 |
)
|
809 |
with gr.Row():
|
810 |
target_input_type = gr.Dropdown(
|
811 |
+
label='Step 1. Select Target Input Type and Input',
|
812 |
choices=['Sequence', 'UniProt ID', 'Gene symbol'],
|
813 |
info='Enter (paste) a FASTA string below manually or upload a FASTA file.',
|
814 |
value='Sequence',
|
815 |
+
scale=4, interactive=True
|
816 |
)
|
817 |
target_id = gr.Textbox(show_label=False, visible=False,
|
818 |
interactive=True, scale=4,
|
|
|
822 |
interactive=True, scale=4,
|
823 |
info='Query a sequence on UniProt with a gene symbol.')
|
824 |
target_organism = gr.Textbox(
|
825 |
+
info='Organism scientific name (default: Homo sapiens).',
|
826 |
+
placeholder='Homo sapiens', show_label=False,
|
827 |
visible=False, interactive=True, scale=4, )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
|
829 |
with gr.Row():
|
830 |
with gr.Column():
|
|
|
833 |
size='lg')
|
834 |
target_query_btn = gr.Button(value='Query the sequence', variant='primary',
|
835 |
visible=False)
|
|
|
836 |
|
837 |
target_fasta = gr.Code(label='Input or Display FASTA', interactive=True, lines=5)
|
838 |
+
# with gr.Row():
|
839 |
+
# with gr.Column():
|
840 |
example_fasta = gr.Button(value='Example: Human MAPK14', elem_id='example')
|
841 |
+
# with gr.Column():
|
842 |
+
# gr.File(label='Example FASTA file',
|
843 |
+
# value='data/examples/MAPK14.fasta', interactive=False)
|
844 |
|
845 |
with gr.Row():
|
846 |
with gr.Column():
|
847 |
+
HelpTip(
|
848 |
+
"Click Auto-detect to identify the protein family using sequence alignment. "
|
849 |
+
"This optional step allows applying a family-specific model instead of a all-family model (general)."
|
850 |
+
"Manually select general if the alignment results are unsatisfactory."
|
851 |
+
)
|
852 |
+
drug_screen_target_family = gr.Dropdown(
|
853 |
+
choices=list(TARGET_FAMILY_MAP.keys()),
|
854 |
+
value='General',
|
855 |
+
label='Step 2. Select Input Protein Family (Optional)', interactive=True)
|
856 |
+
# with gr.Column(scale=1, min_width=24):
|
857 |
+
|
858 |
+
with gr.Row():
|
859 |
+
with gr.Column():
|
860 |
+
target_family_detect_btn = gr.Button(value='Auto-detect', variant='primary')
|
861 |
+
|
862 |
+
with gr.Row():
|
863 |
+
with gr.Column():
|
864 |
+
HelpTip(
|
865 |
+
"Select a preset compound library (e.g., DrugBank)."
|
866 |
+
"Alternatively, upload a CSV file with a column named X1 containing compound SMILES, or use an SDF file."
|
867 |
+
)
|
868 |
+
drug_library = gr.Dropdown(label='Step 3. Select or Upload a Compound Library',
|
869 |
choices=list(DRUG_LIBRARY_MAP.keys()))
|
870 |
with gr.Row():
|
871 |
gr.File(label='Example SDF compound library',
|
|
|
875 |
drug_library_upload_btn = gr.UploadButton(
|
876 |
label='Upload a custom library', variant='primary')
|
877 |
drug_library_upload = gr.File(label='Custom compound library file', visible=False)
|
878 |
+
with gr.Row():
|
879 |
+
with gr.Column():
|
880 |
+
HelpTip(
|
881 |
+
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
882 |
+
"while affinity prediction directly estimates their binding strength measured using IC50."
|
883 |
+
)
|
884 |
+
drug_screen_task = gr.Dropdown(list(TASK_MAP.keys()), label='Step 4. Select a Prediction Task',
|
885 |
value='Compound-protein interaction')
|
886 |
+
|
887 |
+
with gr.Row():
|
888 |
with gr.Column():
|
889 |
+
HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the target was trained."
|
890 |
+
"Please refer to documentation for detailed benchamrk results."
|
891 |
+
)
|
892 |
+
drug_screen_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a Preset Model')
|
|
|
893 |
screen_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
894 |
+
with gr.Row():
|
895 |
+
with gr.Column():
|
896 |
+
drug_screen_email = gr.Textbox(
|
897 |
+
label='Step 6. Email (Optional)',
|
898 |
+
info="If an email is provided, a notification email will be sent to you when your job is completed."
|
899 |
+
)
|
900 |
|
901 |
with gr.Row(visible=True):
|
902 |
+
with gr.Column():
|
903 |
# drug_screen_clr_btn = gr.ClearButton(size='lg')
|
904 |
+
drug_screen_btn = gr.Button(value='SCREEN', variant='primary', size='lg')
|
905 |
# TODO Modify the pd df directly with df['X2'] = target
|
906 |
|
907 |
screen_data_for_predict = gr.File(visible=False, file_count="single", type='filepath')
|
|
|
937 |
with gr.Row():
|
938 |
with gr.Column():
|
939 |
HelpTip(
|
940 |
+
"Enter (paste) a compound SMILES below manually or upload a SDF file."
|
941 |
+
"If multiple entities are in the SDF, only the first will be used."
|
942 |
+
"SMILES can be obtained by searching for the compound of interest in databases such as NCBI, PubChem and and ChEMBL."
|
|
|
|
|
943 |
)
|
944 |
compound_type = gr.Dropdown(
|
945 |
+
label='Step 1. Select Compound Input Type and Input',
|
946 |
choices=['SMILES', 'SDF'],
|
947 |
+
info='Enter (paste) an SMILES string or upload an SDF file.',
|
948 |
value='SMILES',
|
949 |
interactive=True)
|
950 |
compound_upload_btn = gr.UploadButton(label='Upload', variant='primary', type='binary')
|
951 |
|
|
|
|
|
|
|
952 |
compound_smiles = gr.Code(label='Input or Display Compound SMILES', interactive=True, lines=5)
|
953 |
example_drug = gr.Button(value='Example: Aspirin', elem_id='example')
|
954 |
|
955 |
with gr.Row():
|
956 |
with gr.Column():
|
957 |
+
HelpTip(
|
958 |
+
"By default, models trained on all protein families (general) will be applied."
|
959 |
+
"If the proteins in the target library of interest all belong to the same protein family, manually selecting the family is supported."
|
960 |
+
)
|
961 |
+
target_identify_target_family = gr.Dropdown(choices=list(TARGET_FAMILY_MAP.keys()),
|
962 |
+
value='General',
|
963 |
+
label='Step 2. Select Target Protein Family (Optional)')
|
964 |
+
|
965 |
+
with gr.Row():
|
966 |
+
with gr.Column():
|
967 |
+
HelpTip(
|
968 |
+
"Select a preset target library (e.g., ChEMBL33_human_proteins)."
|
969 |
+
"Alternatively, upload a CSV file with a column named X2 containing tareget protein sequences, or use an FASTA file."
|
970 |
+
)
|
971 |
+
target_library = gr.Dropdown(label='Step 3. Select or Upload a Target Library',
|
972 |
choices=list(TARGET_LIBRARY_MAP.keys()))
|
973 |
with gr.Row():
|
974 |
gr.File(label='Example FASTA target library',
|
|
|
978 |
target_library_upload_btn = gr.UploadButton(
|
979 |
label='Upload a custom library', variant='primary')
|
980 |
target_library_upload = gr.File(label='Custom target library file', visible=False)
|
981 |
+
|
982 |
+
with gr.Row():
|
983 |
+
with gr.Column():
|
984 |
+
HelpTip(
|
985 |
+
"Interaction prediction provides you binding probability score between the target of interest and each compound in the library,"
|
986 |
+
"while affinity prediction directly estimates their binding strength measured using IC50."
|
987 |
+
)
|
988 |
+
target_identify_task = gr.Dropdown(list(TASK_MAP.keys()), label='Step 4. Select a Prediction Task',
|
989 |
value='Compound-protein interaction')
|
990 |
|
991 |
+
with gr.Row():
|
992 |
with gr.Column():
|
993 |
+
HelpTip("Select your preferred model, or click Recommend for the best-performing model based on the selected task, family, and whether the compound was trained."
|
994 |
+
"Please refer to documentation for detailed benchamrk results."
|
995 |
+
)
|
996 |
+
target_identify_preset = gr.Dropdown(list(PRESET_MAP.keys()), label='Step 5. Select a Preset Model')
|
|
|
997 |
identify_preset_recommend_btn = gr.Button(value='Recommend a model', variant='primary')
|
998 |
|
999 |
+
with gr.Row():
|
1000 |
+
with gr.Column():
|
1001 |
+
target_identify_email = gr.Textbox(
|
1002 |
+
label='Step 6. Email (Optional)',
|
1003 |
+
info="If an email is provided, a notification email will be sent to you when your job is completed."
|
1004 |
+
)
|
1005 |
|
1006 |
with gr.Row(visible=True):
|
1007 |
# target_identify_clr_btn = gr.ClearButton(size='lg')
|
|
|
1367 |
screen_df = pd.read_csv(Path('data/drug_libraries', DRUG_LIBRARY_MAP[library]))
|
1368 |
else:
|
1369 |
screen_df = process_drug_library_upload(library_upload)
|
1370 |
+
print(screen_df.shape)
|
1371 |
if len(screen_df) >= CUSTOM_DATASET_MAX_LEN:
|
1372 |
raise gr.Error(f'The uploaded compound library has more records '
|
1373 |
f'than the allowed maximum (CUSTOM_DATASET_MAX_LEN).')
|
|
|
1605 |
demo.launch(
|
1606 |
show_api=False,
|
1607 |
)
|
1608 |
+
|
1609 |
+
#%%
|