File size: 8,408 Bytes
f3dc1ee
 
 
 
 
 
 
 
 
 
608897b
f3dc1ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608897b
 
 
f3dc1ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2fcde1
 
 
f3dc1ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import streamlit as st
import re
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

st.image("banner.png", use_column_width=True)
st.markdown(
    "<h1 style='text-align: center;'>CMR and Heart Failure Colocalisation Drug Interaction Viewer</h1>",
    unsafe_allow_html=True
)

# Description text
st.markdown(
    """
    This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs.
    You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information.
    Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data.
    """,
    unsafe_allow_html=True
)

# Load and prepare colocalisation results
annotations = pd.read_csv("colocalisation_results.csv")
annotations.fillna(0, inplace=True)
annotations = annotations.set_index("Gene")

# Filter based on gene list
st.markdown("### View colocalising gene drug interaction results for selected genes/diseases or the entire dataset.")

# Define a function to collect genes from input
collect_genes = lambda x: [str(i) for i in re.split(",|,\s+|\s+", x) if i != ""]
input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):")
gene_list = collect_genes(input_gene_list)

# Function to convert DataFrame to CSV for download
@st.cache_data
def convert_df(df):
    return df.to_csv(index=False).encode('utf-8')

if len(gene_list) > 1:
    # Filter for input gene list
    df = annotations[annotations.index.isin(gene_list)]
    df['Gene'] = df.index
    df.reset_index(drop=True, inplace=True)
    
    # Reorder columns to have "Gene" as the first column
    df = df[['Gene'] + [col for col in df.columns if col != 'Gene']]

    # Display the filtered results
    st.dataframe(df)
    output = df[['Gene']]
    csv = convert_df(output)
    # st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv')

# Add a new search box for filtering by disease name
input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):")

if input_disease:
    # Search for partial matches in the "terms_drug" column
    df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)]
    
    if not df_disease_filtered.empty:
        st.markdown(f"### Colocalisation results for disease: {input_disease}")
        df_disease_filtered['Gene'] = df_disease_filtered.index
        df_disease_filtered.reset_index(drop=True, inplace=True)
        
        # Reorder columns to have "Gene" as the first column
        df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']]

        # Display filtered dataframe
        st.dataframe(df_disease_filtered)
        
        # Convert filtered dataframe to CSV for download
        csv_disease_filtered = convert_df(df_disease_filtered)
        # st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv')
    else:
        st.write(f"No results found for disease: {input_disease}")

# Display individual gene details if a single gene is input
input_gene = st.text_input("Input an individual HGNC gene:")
if input_gene:
    df2 = annotations[annotations.index == input_gene]
    if not df2.empty:
        df2['Gene'] = df2.index
        df2.reset_index(drop=True, inplace=True)
        
        # Reorder columns to have "Gene" as the first column
        df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']]

        st.dataframe(df2)

        # Provide a link to the gene's DrugnomeAI page
        url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}"
        markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})"
        st.markdown(markdown_link, unsafe_allow_html=True)
    else:
        st.write("Gene not found in the dataset.")

# Display the entire dataset with download option
st.markdown("### All Colocalisation Results Interacting with Drugs")
df_total_output = annotations.copy()
df_total_output['Gene'] = df_total_output.index
df_total_output.reset_index(drop=True, inplace=True)

# Reorder columns to have "Gene" as the first column
df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']]

st.dataframe(df_total_output)
csv = convert_df(df_total_output)
# st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv')

# Protein interaction network visualization using STRINGDB_data.tsv
st.markdown(
    "<h1 style='text-align: center;'>Protein Interaction Networks of Colocalising Drug Targets</h1>",
    unsafe_allow_html=True
)

# Description text
st.markdown(
    """
    - The colour of each node represents its degree (number of direct connections it has with other nodes).
    - The size of each node represents its betweenness centrality (larger nodes play a more central role in the network, facilitating communication between other proteins).
    - Node edges/connections are colour-coded by confidence of PPI (lighter colours (brighter) represent stronger interactions).
    - Genes that interact with cardiovascular drugs are highlighted with a bold black outline.
    """,
    unsafe_allow_html=True
)


# Load STRINGDB dataset
ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t')

# Create a graph from the STRINGDB PPI data
G = nx.Graph()

# Add edges to the graph based on PPI data
for index, row in ppi_data.iterrows():
    G.add_edge(row['node1'], row['node2'], weight=row['combined_score'])

# Function to rescale values to a given range
def rescale(l, newmin, newmax):
    arr = list(l)
    return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr]

# Use the plasma colormap
graph_colormap = plt.get_cmap('plasma', 12)

# Node color varies with Degree
c = rescale([G.degree(v) for v in G], 0.0, 0.9)
c = [graph_colormap(i) for i in c]

# Node size varies with betweeness centrality - map to range [1500, 7000]
bc = nx.betweenness_centrality(G)
s = rescale([v for v in bc.values()], 1500, 7000)

# Edge width shows 1 - weight (to convert cost back to strength of interaction)
ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4)
ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1)
ec = [graph_colormap(i) for i in ec]

# Adjust spring_layout parameters to bring the networks closer together
pos = nx.spring_layout(G, k=0.5)

# Prepare to highlight genes with "Cardiovascular_Drug" as "Yes"
highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index

# Draw the network plot
plt.figure(figsize=(19, 9), facecolor='white')

# Draw the nodes with black outline for highlighted ones
nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2)

# Draw the edges
nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew)

# Draw node labels with customized font color based on degree
# Draw node labels with customized font color based on degree
for node, (x, y) in pos.items():
    # Determine font color
    font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black'
    
    # Dynamically adjust font size for nodes with white text (smaller font size to fit inside node)
    if font_color == 'white':
        font_size = min(s[list(G.nodes).index(node)] * 0.01, 10)  # Adjust the multiplier and limit font size
    else:
        font_size = 12  # Default size for black font
    
    plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color)



# Add a colorbar to represent the node degree color scale
sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1))
sm.set_array([])
cbar = plt.colorbar(sm)
cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12)

plt.axis('off')

# Display the network plot in the Streamlit app directly
st.pyplot(plt)