Spaces:
Running
Running
Erva Ulusoy
commited on
Commit
·
51641fb
1
Parent(s):
8f4b741
added coloring to predicted/ground truth go term edges
Browse files- visualize_kg.py +37 -29
visualize_kg.py
CHANGED
@@ -63,7 +63,6 @@ def _gather_protein_edges(data, protein_id):
|
|
63 |
return protein_edges
|
64 |
|
65 |
def _filter_edges(protein_id, protein_edges, prediction_df, limit=10):
|
66 |
-
|
67 |
filtered_edges = {}
|
68 |
|
69 |
prediction_categories = prediction_df['GO_category'].unique()
|
@@ -75,32 +74,35 @@ def _filter_edges(protein_id, protein_edges, prediction_df, limit=10):
|
|
75 |
if edges is None or len(edges) == 0:
|
76 |
continue
|
77 |
|
78 |
-
if edge_type[2]
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
98 |
else:
|
99 |
-
#
|
100 |
-
filtered_edges[edge_type] = [(edge,
|
101 |
else:
|
102 |
-
# For non-GO edges, include all edges up to limit
|
103 |
-
filtered_edges[edge_type] = [(edge, None) for edge in list(edges)[:limit]]
|
104 |
|
105 |
return filtered_edges
|
106 |
|
@@ -186,7 +188,7 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
186 |
source_type, relation_type, target_type = edge_type
|
187 |
|
188 |
for edge_info in edges:
|
189 |
-
edge, probability = edge_info
|
190 |
source, target = edge[0], edge[1]
|
191 |
source_str = str(source)
|
192 |
target_str = str(target)
|
@@ -218,10 +220,16 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
218 |
# Add edge with relationship type and probability as label
|
219 |
edge_label = f"{relation_type}"
|
220 |
if probability is not None:
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
222 |
net.add_edge(source_str, target_str,
|
223 |
label=edge_label,
|
224 |
-
|
|
|
225 |
title=edge_label,
|
226 |
length=200,
|
227 |
smooth={'type': 'curvedCW', 'roundness': 0.1})
|
@@ -229,7 +237,7 @@ def visualize_protein_subgraph(data, protein_id, prediction_df, limit=10):
|
|
229 |
net.add_edge(source_str, target_str,
|
230 |
label=edge_label,
|
231 |
font={'size': 0},
|
232 |
-
color='#666666',
|
233 |
title=edge_label,
|
234 |
length=200,
|
235 |
smooth={'type': 'curvedCW', 'roundness': 0.1})
|
|
|
63 |
return protein_edges
|
64 |
|
65 |
def _filter_edges(protein_id, protein_edges, prediction_df, limit=10):
|
|
|
66 |
filtered_edges = {}
|
67 |
|
68 |
prediction_categories = prediction_df['GO_category'].unique()
|
|
|
74 |
if edges is None or len(edges) == 0:
|
75 |
continue
|
76 |
|
77 |
+
if edge_type[2].startswith('GO_term'): # Check if it's any GO term edge
|
78 |
+
if edge_type[2] in prediction_categories:
|
79 |
+
# Handle edges for GO terms that are in prediction_df
|
80 |
+
category_mask = (prediction_df['GO_category'] == go_category_reverse_mapping[edge_type[2]]) & (prediction_df['UniProt_ID'] == protein_id)
|
81 |
+
category_predictions = prediction_df[category_mask]
|
82 |
+
|
83 |
+
if len(category_predictions) > 0:
|
84 |
+
category_predictions = category_predictions.sort_values(by='Probability', ascending=False)
|
85 |
+
edges_set = set(edges) # Convert to set for O(1) lookup
|
86 |
+
|
87 |
+
valid_edges = []
|
88 |
+
for _, row in category_predictions.iterrows():
|
89 |
+
term = row['GO_ID']
|
90 |
+
prob = row['Probability']
|
91 |
+
edge = (protein_id, term)
|
92 |
+
is_ground_truth = edge in edges_set
|
93 |
+
valid_edges.append((edge, prob, is_ground_truth))
|
94 |
+
if len(valid_edges) >= limit:
|
95 |
+
break
|
96 |
+
filtered_edges[edge_type] = valid_edges
|
97 |
+
else:
|
98 |
+
# If no predictions but it's a GO category in prediction_df
|
99 |
+
filtered_edges[edge_type] = [(edge, 'no_pred', True) for edge in list(edges)[:limit]]
|
100 |
else:
|
101 |
+
# For GO terms not in prediction_df, mark them as ground truth with blue color
|
102 |
+
filtered_edges[edge_type] = [(edge, 'no_pred', True) for edge in list(edges)[:limit]]
|
103 |
else:
|
104 |
+
# For non-GO edges, include all edges up to limit
|
105 |
+
filtered_edges[edge_type] = [(edge, None, True) for edge in list(edges)[:limit]]
|
106 |
|
107 |
return filtered_edges
|
108 |
|
|
|
188 |
source_type, relation_type, target_type = edge_type
|
189 |
|
190 |
for edge_info in edges:
|
191 |
+
edge, probability, is_ground_truth = edge_info
|
192 |
source, target = edge[0], edge[1]
|
193 |
source_str = str(source)
|
194 |
target_str = str(target)
|
|
|
220 |
# Add edge with relationship type and probability as label
|
221 |
edge_label = f"{relation_type}"
|
222 |
if probability is not None:
|
223 |
+
if probability == 'no_pred':
|
224 |
+
edge_color = '#219ebc'
|
225 |
+
edge_label += '(P=Not generated)'
|
226 |
+
else:
|
227 |
+
edge_label += f"(P={probability:.2f})"
|
228 |
+
edge_color = '#c1121f' if is_ground_truth else '#219ebc'
|
229 |
net.add_edge(source_str, target_str,
|
230 |
label=edge_label,
|
231 |
+
font={'size': 0},
|
232 |
+
color=edge_color,
|
233 |
title=edge_label,
|
234 |
length=200,
|
235 |
smooth={'type': 'curvedCW', 'roundness': 0.1})
|
|
|
237 |
net.add_edge(source_str, target_str,
|
238 |
label=edge_label,
|
239 |
font={'size': 0},
|
240 |
+
color='#666666', # Keep default gray for non-GO edges
|
241 |
title=edge_label,
|
242 |
length=200,
|
243 |
smooth={'type': 'curvedCW', 'roundness': 0.1})
|