PROBE

Sleeping

App Files Files Community

mgyigit commited on Sep 30, 2024

Commit

3054c11

verified ·

1 Parent(s): 7dcad68

Update src/bin/function_predictor.py

Browse files

Files changed (1) hide show

src/bin/function_predictor.py +11 -7

src/bin/function_predictor.py CHANGED Viewed

@@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
 import pandas as pd
 import numpy as np
 from datetime import datetime
@@ -63,7 +66,8 @@ def MultiLabelSVC_cross_val_predict(representation_name, dataset, X, y, classifi
     y_pred = cross_val_predict(clf, Xn, y, cv=kf)
     if detailed_output:
-        with open(r"../results/Ontology_based_function_prediction_{1}_{0}_model.pkl".format(representation_name,dataset.split(".")[0]),"wb") as file:
             pickle.dump(clf,file)
     acc_cv = []
@@ -114,7 +118,7 @@ def MultiLabelSVC_cross_val_predict(representation_name, dataset, X, y, classifi
 def ProtDescModel():
     #desc_file = pd.read_csv(r"protein_representations\final\{0}_dim{1}.tsv".format(representation_name,desc_dim),sep="\t")
-    datasets = os.listdir(r"../data/auxilary_input/GO_datasets")
     if  dataset_type == "All_Data_Sets" and aspect_type == "All_Aspects":
         filtered_datasets = datasets
     elif dataset_type == "All_Data_Sets":
@@ -129,7 +133,7 @@ def ProtDescModel():
     for dt in tqdm(filtered_datasets,total=len(filtered_datasets)):
         print(r"Protein function prediction is started for the dataset: {0}".format(dt.split(".")[0]))
-        dt_file = pd.read_csv(r"../data/auxilary_input/GO_datasets/{0}".format(dt),sep="\t")
         dt_merge = dt_file.merge(representation_dataframe,left_on="Protein_Id",right_on="Entry")
         dt_X = dt_merge['Vector']
@@ -149,7 +153,7 @@ def ProtDescModel():
         predictions = dt_merge.iloc[:,:6]
         predictions["predicted_values"] = list(model[3].toarray())
         if detailed_output:
-            predictions.to_csv(r"../results/Ontology_based_function_prediction_{1}_{0}_predictions.tsv".format(representation_name,dt.split(".")[0]),sep="\t",index=None)
     return (cv_results, cv_mean_results,cv_std_results)
@@ -164,7 +168,7 @@ def pred_output():
     for i in cv_result:
         df_cv_result.loc[len(df_cv_result)] = i
     if detailed_output:
-        df_cv_result.to_csv(r"../results/Ontology_based_function_prediction_5cv_{0}.tsv".format(representation_name),sep="\t",index=None)
     cv_mean_result = model[1]
     df_cv_mean_result =  pd.DataFrame({"Model": pd.Series([], dtype='str') ,"Accuracy": pd.Series([], dtype='float'),"F1_Micro": pd.Series([], dtype='float'),\
@@ -178,7 +182,7 @@ def pred_output():
     for j in cv_mean_result:
         df_cv_mean_result.loc[len(df_cv_mean_result)] = j
-    df_cv_mean_result.to_csv(r"../results/Ontology_based_function_prediction_5cv_mean_{0}.tsv".format(representation_name),sep="\t",index=None)
 #save std deviation of scores to file
     cv_std_result = model[2]
@@ -193,7 +197,7 @@ def pred_output():
     for k in cv_std_result:
         df_cv_std_result.loc[len(df_cv_std_result)] = k
-    df_cv_std_result.to_csv(r"../results/Ontology_based_function_prediction_5cv_std_{0}.tsv".format(representation_name),sep="\t",index=None)
 print(datetime.now())

 # -*- coding: utf-8 -*-
+import os
+script_dir = os.path.dirname(os.path.abspath(__file__))
 import pandas as pd
 import numpy as np
 from datetime import datetime
     y_pred = cross_val_predict(clf, Xn, y, cv=kf)
     if detailed_output:
+        ont_path = r"../results/Ontology_based_function_prediction_{1}_{0}_model.pkl".format(representation_name,dataset.split(".")[0])
+        with open(os.path.join(script_dir, ont_path),"wb") as file:
             pickle.dump(clf,file)
     acc_cv = []
 def ProtDescModel():
     #desc_file = pd.read_csv(r"protein_representations\final\{0}_dim{1}.tsv".format(representation_name,desc_dim),sep="\t")
+    datasets = os.listdir(os.path.join(script_dir, r"../data/auxilary_input/GO_datasets"))
     if  dataset_type == "All_Data_Sets" and aspect_type == "All_Aspects":
         filtered_datasets = datasets
     elif dataset_type == "All_Data_Sets":
     for dt in tqdm(filtered_datasets,total=len(filtered_datasets)):
         print(r"Protein function prediction is started for the dataset: {0}".format(dt.split(".")[0]))
+        dt_file = pd.read_csv(os.path.join(script_dir, r"../data/auxilary_input/GO_datasets/{0}".format(dt)),sep="\t")
         dt_merge = dt_file.merge(representation_dataframe,left_on="Protein_Id",right_on="Entry")
         dt_X = dt_merge['Vector']
         predictions = dt_merge.iloc[:,:6]
         predictions["predicted_values"] = list(model[3].toarray())
         if detailed_output:
+            predictions.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_{1}_{0}_predictions.tsv".format(representation_name,dt.split(".")[0])),sep="\t",index=None)
     return (cv_results, cv_mean_results,cv_std_results)
     for i in cv_result:
         df_cv_result.loc[len(df_cv_result)] = i
     if detailed_output:
+        df_cv_result.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_5cv_{0}.tsv".format(representation_name)),sep="\t",index=None)
     cv_mean_result = model[1]
     df_cv_mean_result =  pd.DataFrame({"Model": pd.Series([], dtype='str') ,"Accuracy": pd.Series([], dtype='float'),"F1_Micro": pd.Series([], dtype='float'),\
     for j in cv_mean_result:
         df_cv_mean_result.loc[len(df_cv_mean_result)] = j
+    df_cv_mean_result.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_5cv_mean_{0}.tsv".format(representation_name)),sep="\t",index=None)
 #save std deviation of scores to file
     cv_std_result = model[2]
     for k in cv_std_result:
         df_cv_std_result.loc[len(df_cv_std_result)] = k
+    df_cv_std_result.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_5cv_std_{0}.tsv".format(representation_name)),sep="\t",index=None)
 print(datetime.now())