Spaces:

HUBioDataLab
/

ProtHGT

Running

App Files Files Community

Erva Ulusoy commited on Feb 3

Commit

e6abfd3

1 Parent(s): 7f3941f

fixed bugs on new edge index assignments

Browse files

Files changed (1) hide show

run_prothgt_app.py +14 -14

run_prothgt_app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import yaml
 import os
 from datasets import load_dataset
 import gdown
 class ProtHGT(torch.nn.Module):
     def __init__(self, data,hidden_channels, num_heads, num_layers, mlp_hidden_layers, mlp_dropout):
@@ -45,23 +46,22 @@ class ProtHGT(torch.nn.Module):
         return self.mlp(z).view(-1), x_dict
-def _load_data(heterodata, protein_ids, go_category=None):
     """Process the loaded heterodata for specific proteins and GO categories."""
     # Get protein indices for all input proteins
     protein_indices = [heterodata['Protein']['id_mapping'][pid] for pid in protein_ids]
-    # Create edge indices for prediction
-    categories = [go_category] if go_category else ['GO_term_F', 'GO_term_P', 'GO_term_C']
-    for category in categories:
-        # Create pairs for all proteins with all GO terms
-        n_terms = len(heterodata[category]['id_mapping'])
-        protein_indices_repeated = torch.tensor(protein_indices).repeat_interleave(n_terms)
-        term_indices = torch.arange(n_terms).repeat(len(protein_indices))
-        edge_index = torch.stack([protein_indices_repeated, term_indices])
-        heterodata.edge_index_dict[('Protein', 'protein_function', category)] = edge_index
     return heterodata
 def get_available_proteins(protein_list_file='data/available_proteins.txt'):
@@ -169,7 +169,7 @@ def generate_prediction_df(protein_ids, model_paths, model_config_paths, go_cate
         print(f'Generating predictions for {go_cat}...')
         # Process data for current GO category
-        processed_data = _load_data(heterodata, protein_ids, go_cat)
         # Load model config
         with open(model_config_path, 'r') as file:

 import os
 from datasets import load_dataset
 import gdown
+import copy
 class ProtHGT(torch.nn.Module):
     def __init__(self, data,hidden_channels, num_heads, num_layers, mlp_hidden_layers, mlp_dropout):
         return self.mlp(z).view(-1), x_dict
+def _load_data(heterodata, protein_ids, go_category):
     """Process the loaded heterodata for specific proteins and GO categories."""
     # Get protein indices for all input proteins
     protein_indices = [heterodata['Protein']['id_mapping'][pid] for pid in protein_ids]
+    n_terms = len(heterodata[go_category]['id_mapping'])
+    all_edges = []
+    for protein_idx in protein_indices:
+        for term_idx in range(n_terms):
+            all_edges.append([protein_idx, term_idx])
+    edge_index = torch.tensor(all_edges).t()
+    heterodata[('Protein', 'protein_function', go_category)].edge_index = edge_index
+    heterodata[(go_category, 'rev_protein_function', 'Protein')].edge_index = torch.stack([edge_index[1], edge_index[0]])
     return heterodata
 def get_available_proteins(protein_list_file='data/available_proteins.txt'):
         print(f'Generating predictions for {go_cat}...')
         # Process data for current GO category
+        processed_data = _load_data(copy.deepcopy(heterodata), protein_ids, go_cat)
         # Load model config
         with open(model_config_path, 'r') as file: