Spaces:

hlnicholls
/

CMR-HF-Coloc

Sleeping

App Files Files Community

hlnicholls commited on Oct 24, 2024

Commit

f3dc1ee

verified ·

1 Parent(s): a72dad9

Upload 5 files

Browse files

Files changed (5) hide show

STRINGdb_data.tsv +14 -0
app.py +206 -0
banner.png +0 -0
colocalisation_results.csv +0 -0
requirements.txt +6 -0

STRINGdb_data.tsv ADDED Viewed

	@@ -0,0 +1,14 @@

+node1	node2	node1_string_id	node2_string_id	neighborhood_on_chromosome	gene_fusion	phylogenetic_cooccurrence	homology	coexpression	experimentally_determined_interaction	database_annotated	automated_textmining	combined_score
+CRHR1	MAPT	9606.ENSP00000381333	9606.ENSP00000340820	0	0	0	0	0.172	0	0	0.717	0.755
+EPHA2	ITGB3	9606.ENSP00000351209	9606.ENSP00000452786	0	0	0	0	0.056	0.300	0	0.366	0.544
+EPHA2	PTPN11	9606.ENSP00000351209	9606.ENSP00000489597	0	0	0	0	0.083	0.455	0	0.773	0.877
+ITGB3	RAF1	9606.ENSP00000452786	9606.ENSP00000401888	0	0	0	0	0	0.052	0.500	0.086	0.529
+ITGB3	PTPN11	9606.ENSP00000452786	9606.ENSP00000489597	0	0	0	0	0.106	0.328	0	0.492	0.668
+MAPT	NPEPPS	9606.ENSP00000340820	9606.ENSP00000320324	0	0	0	0	0.056	0.319	0	0.480	0.636
+MAPT	PTPN11	9606.ENSP00000340820	9606.ENSP00000489597	0	0	0	0	0	0.303	0	0.513	0.646
+MYL2	RPL6	9606.ENSP00000228841	9606.ENSP00000403172	0	0	0	0	0.067	0	0	0.568	0.580
+MYL2	TNNT3	9606.ENSP00000228841	9606.ENSP00000370975	0	0	0	0	0.510	0.127	0.500	0.500	0.879
+MYL2	MYL4	9606.ENSP00000228841	9606.ENSP00000347055	0	0	0	0.673	0.168	0.311	0.900	0.584	0.973
+MYL4	TNNT3	9606.ENSP00000347055	9606.ENSP00000370975	0	0	0	0	0.157	0.127	0.500	0.395	0.747
+PTPN11	RAF1	9606.ENSP00000489597	9606.ENSP00000401888	0	0	0	0	0.095	0.098	0	0.693	0.728
+RPL6	RPL7A	9606.ENSP00000403172	9606.ENSP00000361076	0	0	0	0	0.990	0.995	0.720	0.712	0.999

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import streamlit as st
+import re
+import pandas as pd
+import networkx as nx
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib import cm
+st.image("banner.png", use_column_width=True)
+st.markdown(
+    "<h1 style='text-align: center;'>CMR and Heart Failure Colocalisation Viewer</h1>",
+    unsafe_allow_html=True
+)
+# Description text
+st.markdown(
+    """
+    This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs.
+    You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information.
+    Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data.
+    """,
+    unsafe_allow_html=True
+)
+# Load and prepare colocalisation results
+annotations = pd.read_csv("colocalisation_results.csv")
+annotations.fillna(0, inplace=True)
+annotations = annotations.set_index("Gene")
+# Define a function to collect genes from input
+collect_genes = lambda x: [str(i) for i in re.split(",|,\s+|\s+", x) if i != ""]
+input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):")
+gene_list = collect_genes(input_gene_list)
+# Function to convert DataFrame to CSV for download
+@st.cache_data
+def convert_df(df):
+    return df.to_csv(index=False).encode('utf-8')
+# Filter based on gene list
+st.markdown("### View colocalisation results for selected genes or the entire dataset.")
+if len(gene_list) > 1:
+    # Filter for input gene list
+    df = annotations[annotations.index.isin(gene_list)]
+    df['Gene'] = df.index
+    df.reset_index(drop=True, inplace=True)
+    # Reorder columns to have "Gene" as the first column
+    df = df[['Gene'] + [col for col in df.columns if col != 'Gene']]
+    # Display the filtered results
+    st.dataframe(df)
+    output = df[['Gene']]
+    csv = convert_df(output)
+    # st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv')
+# Add a new search box for filtering by disease name
+input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):")
+if input_disease:
+    # Search for partial matches in the "terms_drug" column
+    df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)]
+    if not df_disease_filtered.empty:
+        st.markdown(f"### Colocalisation results for disease: {input_disease}")
+        df_disease_filtered['Gene'] = df_disease_filtered.index
+        df_disease_filtered.reset_index(drop=True, inplace=True)
+        # Reorder columns to have "Gene" as the first column
+        df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']]
+        # Display filtered dataframe
+        st.dataframe(df_disease_filtered)
+        # Convert filtered dataframe to CSV for download
+        csv_disease_filtered = convert_df(df_disease_filtered)
+        # st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv')
+    else:
+        st.write(f"No results found for disease: {input_disease}")
+# Display individual gene details if a single gene is input
+input_gene = st.text_input("Input an individual HGNC gene:")
+if input_gene:
+    df2 = annotations[annotations.index == input_gene]
+    if not df2.empty:
+        df2['Gene'] = df2.index
+        df2.reset_index(drop=True, inplace=True)
+        # Reorder columns to have "Gene" as the first column
+        df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']]
+        st.dataframe(df2)
+        # Provide a link to the gene's DrugnomeAI page
+        url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}"
+        markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})"
+        st.markdown(markdown_link, unsafe_allow_html=True)
+    else:
+        st.write("Gene not found in the dataset.")
+# Display the entire dataset with download option
+st.markdown("### All Colocalisation Results Interacting with Drugs")
+df_total_output = annotations.copy()
+df_total_output['Gene'] = df_total_output.index
+df_total_output.reset_index(drop=True, inplace=True)
+# Reorder columns to have "Gene" as the first column
+df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']]
+st.dataframe(df_total_output)
+csv = convert_df(df_total_output)
+# st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv')
+# Protein interaction network visualization using STRINGDB_data.tsv
+st.markdown(
+    "<h1 style='text-align: center;'>Protein Interaction Networks of Colocalising Drug Targets</h1>",
+    unsafe_allow_html=True
+)
+# Description text
+# Description text
+st.markdown(
+    """
+    - The color of each node represents its degree (number of direct connections it has with other nodes).
+    - The size of each node represents its betweenness centrality.
+    - Larger nodes play a more central role in the network, facilitating communication between other proteins.
+    - Node edges/connections are colour-coded by confidence of PPI (lighter colors (brighter) represent stronger interactions).
+    - Genes that interact with cardiovascular drugs are highlighted with a bold black outline.
+    """,
+    unsafe_allow_html=True
+)
+# Load STRINGDB dataset
+ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t')
+# Create a graph from the STRINGDB PPI data
+G = nx.Graph()
+# Add edges to the graph based on PPI data
+for index, row in ppi_data.iterrows():
+    G.add_edge(row['node1'], row['node2'], weight=row['combined_score'])
+# Function to rescale values to a given range
+def rescale(l, newmin, newmax):
+    arr = list(l)
+    return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr]
+# Use the plasma colormap
+graph_colormap = plt.get_cmap('plasma', 12)
+# Node color varies with Degree
+c = rescale([G.degree(v) for v in G], 0.0, 0.9)
+c = [graph_colormap(i) for i in c]
+# Node size varies with betweeness centrality - map to range [1500, 7000]
+bc = nx.betweenness_centrality(G)
+s = rescale([v for v in bc.values()], 1500, 7000)
+# Edge width shows 1 - weight (to convert cost back to strength of interaction)
+ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4)
+ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1)
+ec = [graph_colormap(i) for i in ec]
+# Adjust spring_layout parameters to bring the networks closer together
+pos = nx.spring_layout(G, k=0.5)
+# Prepare to highlight genes with "Cardiovascular_Drug" as "Yes"
+highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index
+# Draw the network plot
+plt.figure(figsize=(19, 9), facecolor='white')
+# Draw the nodes with black outline for highlighted ones
+nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2)
+# Draw the edges
+nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew)
+# Draw node labels with customized font color based on degree
+# Draw node labels with customized font color based on degree
+for node, (x, y) in pos.items():
+    # Determine font color
+    font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black'
+    # Dynamically adjust font size for nodes with white text (smaller font size to fit inside node)
+    if font_color == 'white':
+        font_size = min(s[list(G.nodes).index(node)] * 0.01, 10)  # Adjust the multiplier and limit font size
+    else:
+        font_size = 12  # Default size for black font
+    plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color)
+# Add a colorbar to represent the node degree color scale
+sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1))
+sm.set_array([])
+cbar = plt.colorbar(sm)
+cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12)
+plt.axis('off')
+# Display the network plot in the Streamlit app directly
+st.pyplot(plt)

banner.png ADDED Viewed

colocalisation_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy==1.23.4
+altair==5.1.2
+pandas==2.0.3
+plotly==5.20.0
+matplotlib==3.4.3
+networkx