File size: 3,972 Bytes
84bfd88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import plotly.graph_objects as go
import numpy as np
import pandas as pd
from model.model import DTIModel


dt_str = "14062024_0910"


def make_spider_plot(predictions, model_names, smiles_list):
    fig = go.Figure()

    for i, (prediction, smiles) in enumerate(zip(predictions, smiles_list)):
        fig.add_trace(go.Scatterpolar(
            r=prediction,
            theta=model_names,
            fill='toself',
            name=smiles
        ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 1]
            )),
        showlegend=True
    )

    return fig


def predict_and_plot(amino_acid_sequence, smiles_input, datasets):
    model_ensemble = {}

    gbm_model_paths = {
        "BindingDB": f"model/xgb_models/xgb_model_BindingDB_{dt_str}_bt_optimized_0.json",
        "BioSNAP": f"model/xgb_models/xgb_model_BIOSNAP_full_data_{dt_str}_bt_optimized_0.json",
        "DAVIS": f"model/xgb_models/xgb_model_DAVIS_{dt_str}_bt_optimized_0.json",
        "BarlowDTI XXL": f"model/xgb_models/{dt_str}_barlowdti_xxl_model.json",
    }

    for model in datasets:
        print(f"Loading model {model}")

        model_ensemble[model] = DTIModel(
            bt_model_path=f"model/stash/{dt_str}",
            gbm_model_path=gbm_model_paths[model],
        )

    smiles_list = smiles_input.strip().split('\n')
    predictions = []
    for model in model_ensemble.values():
        model_predictions = model.predict(smiles_list, amino_acid_sequence)
        predictions.append(model_predictions)
    
    predictions = np.array(predictions).transpose().tolist()
    
    df = pd.DataFrame(predictions, index=smiles_list, columns=datasets).reset_index()
    df.columns = ["SMILES"] + datasets
    
    fig = make_spider_plot(predictions, datasets, smiles_list)
    
    return fig, df


dataset_names = [
    "BarlowDTI XXL",
    "BindingDB",
    "BioSNAP",
    "DAVIS",
]

title = "Predict Drug-Target Interactions with <span style='font-variant:small-caps;'>BarlowDTI</span>"

description = """
Input Amino Acid Sequence and SMILES to get interaction predictions visualized as a spider graph and in a table. 
The values ca be interpreted as the probability of interaction between the drug and target (0 = no interaction, 1 = interaction).

__Note: Inference may take a loger time, you can upgrade to a paid GPU-enabled plan for faster inference.__
"""

article = """
This interface enables the use of <span style='font-variant:small-caps;'>BarlowDTI</span> to predict drug-target interactions. 
The model ensemble consists of three models trained on different datasets: BindingDB, BIOSNAP, and DAVIS.

If you use this interface in your research, please cite our paper:
```
@misc{schuh2024barlowtwinsdeepneural,
      title={Barlow Twins Deep Neural Network for Advanced 1D Drug-Target Interaction Prediction}, 
      author={Maximilian G. Schuh and Davide Boldini and Stephan A. Sieber},
      year={2024},
      eprint={2408.00040},
      archivePrefix={arXiv},
      primaryClass={q-bio.BM},
      url={https://arxiv.org/abs/2408.00040}, 
}
```
"""

theme = gr.themes.Base(
    primary_hue="violet",
    font=[gr.themes.GoogleFont('IBM Plex Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
)

iface = gr.Interface(
    fn=predict_and_plot,
    inputs=[
        gr.Textbox(label="Protein Sequence", info="Just one sequence is allowed. Remove FASTA syntax (e.g. >ABC)."), 
        gr.Textbox(label="Molecule SMILES", info="One per line, multiple allowed."),
        gr.CheckboxGroup(choices=dataset_names, label="Select Models for Prediction", value="BarlowDTI XXL")
    ],
    outputs=[
        gr.Plot(label="Predictions Visualization"), 
        gr.DataFrame(label="Predictions DataFrame"),
        # gr.DownloadButton(label="Download Predictions")
    ],
    title=title,
    description=description,
    article=article,
    theme=theme
)

iface.launch()