mgyigit commited on
Commit
3054c11
·
verified ·
1 Parent(s): 7dcad68

Update src/bin/function_predictor.py

Browse files
Files changed (1) hide show
  1. src/bin/function_predictor.py +11 -7
src/bin/function_predictor.py CHANGED
@@ -1,4 +1,7 @@
1
  # -*- coding: utf-8 -*-
 
 
 
2
  import pandas as pd
3
  import numpy as np
4
  from datetime import datetime
@@ -63,7 +66,8 @@ def MultiLabelSVC_cross_val_predict(representation_name, dataset, X, y, classifi
63
  y_pred = cross_val_predict(clf, Xn, y, cv=kf)
64
 
65
  if detailed_output:
66
- with open(r"../results/Ontology_based_function_prediction_{1}_{0}_model.pkl".format(representation_name,dataset.split(".")[0]),"wb") as file:
 
67
  pickle.dump(clf,file)
68
 
69
  acc_cv = []
@@ -114,7 +118,7 @@ def MultiLabelSVC_cross_val_predict(representation_name, dataset, X, y, classifi
114
 
115
  def ProtDescModel():
116
  #desc_file = pd.read_csv(r"protein_representations\final\{0}_dim{1}.tsv".format(representation_name,desc_dim),sep="\t")
117
- datasets = os.listdir(r"../data/auxilary_input/GO_datasets")
118
  if dataset_type == "All_Data_Sets" and aspect_type == "All_Aspects":
119
  filtered_datasets = datasets
120
  elif dataset_type == "All_Data_Sets":
@@ -129,7 +133,7 @@ def ProtDescModel():
129
 
130
  for dt in tqdm(filtered_datasets,total=len(filtered_datasets)):
131
  print(r"Protein function prediction is started for the dataset: {0}".format(dt.split(".")[0]))
132
- dt_file = pd.read_csv(r"../data/auxilary_input/GO_datasets/{0}".format(dt),sep="\t")
133
  dt_merge = dt_file.merge(representation_dataframe,left_on="Protein_Id",right_on="Entry")
134
 
135
  dt_X = dt_merge['Vector']
@@ -149,7 +153,7 @@ def ProtDescModel():
149
  predictions = dt_merge.iloc[:,:6]
150
  predictions["predicted_values"] = list(model[3].toarray())
151
  if detailed_output:
152
- predictions.to_csv(r"../results/Ontology_based_function_prediction_{1}_{0}_predictions.tsv".format(representation_name,dt.split(".")[0]),sep="\t",index=None)
153
 
154
  return (cv_results, cv_mean_results,cv_std_results)
155
 
@@ -164,7 +168,7 @@ def pred_output():
164
  for i in cv_result:
165
  df_cv_result.loc[len(df_cv_result)] = i
166
  if detailed_output:
167
- df_cv_result.to_csv(r"../results/Ontology_based_function_prediction_5cv_{0}.tsv".format(representation_name),sep="\t",index=None)
168
 
169
  cv_mean_result = model[1]
170
  df_cv_mean_result = pd.DataFrame({"Model": pd.Series([], dtype='str') ,"Accuracy": pd.Series([], dtype='float'),"F1_Micro": pd.Series([], dtype='float'),\
@@ -178,7 +182,7 @@ def pred_output():
178
 
179
  for j in cv_mean_result:
180
  df_cv_mean_result.loc[len(df_cv_mean_result)] = j
181
- df_cv_mean_result.to_csv(r"../results/Ontology_based_function_prediction_5cv_mean_{0}.tsv".format(representation_name),sep="\t",index=None)
182
 
183
  #save std deviation of scores to file
184
  cv_std_result = model[2]
@@ -193,7 +197,7 @@ def pred_output():
193
 
194
  for k in cv_std_result:
195
  df_cv_std_result.loc[len(df_cv_std_result)] = k
196
- df_cv_std_result.to_csv(r"../results/Ontology_based_function_prediction_5cv_std_{0}.tsv".format(representation_name),sep="\t",index=None)
197
 
198
  print(datetime.now())
199
 
 
1
  # -*- coding: utf-8 -*-
2
+ import os
3
+ script_dir = os.path.dirname(os.path.abspath(__file__))
4
+
5
  import pandas as pd
6
  import numpy as np
7
  from datetime import datetime
 
66
  y_pred = cross_val_predict(clf, Xn, y, cv=kf)
67
 
68
  if detailed_output:
69
+ ont_path = r"../results/Ontology_based_function_prediction_{1}_{0}_model.pkl".format(representation_name,dataset.split(".")[0])
70
+ with open(os.path.join(script_dir, ont_path),"wb") as file:
71
  pickle.dump(clf,file)
72
 
73
  acc_cv = []
 
118
 
119
  def ProtDescModel():
120
  #desc_file = pd.read_csv(r"protein_representations\final\{0}_dim{1}.tsv".format(representation_name,desc_dim),sep="\t")
121
+ datasets = os.listdir(os.path.join(script_dir, r"../data/auxilary_input/GO_datasets"))
122
  if dataset_type == "All_Data_Sets" and aspect_type == "All_Aspects":
123
  filtered_datasets = datasets
124
  elif dataset_type == "All_Data_Sets":
 
133
 
134
  for dt in tqdm(filtered_datasets,total=len(filtered_datasets)):
135
  print(r"Protein function prediction is started for the dataset: {0}".format(dt.split(".")[0]))
136
+ dt_file = pd.read_csv(os.path.join(script_dir, r"../data/auxilary_input/GO_datasets/{0}".format(dt)),sep="\t")
137
  dt_merge = dt_file.merge(representation_dataframe,left_on="Protein_Id",right_on="Entry")
138
 
139
  dt_X = dt_merge['Vector']
 
153
  predictions = dt_merge.iloc[:,:6]
154
  predictions["predicted_values"] = list(model[3].toarray())
155
  if detailed_output:
156
+ predictions.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_{1}_{0}_predictions.tsv".format(representation_name,dt.split(".")[0])),sep="\t",index=None)
157
 
158
  return (cv_results, cv_mean_results,cv_std_results)
159
 
 
168
  for i in cv_result:
169
  df_cv_result.loc[len(df_cv_result)] = i
170
  if detailed_output:
171
+ df_cv_result.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_5cv_{0}.tsv".format(representation_name)),sep="\t",index=None)
172
 
173
  cv_mean_result = model[1]
174
  df_cv_mean_result = pd.DataFrame({"Model": pd.Series([], dtype='str') ,"Accuracy": pd.Series([], dtype='float'),"F1_Micro": pd.Series([], dtype='float'),\
 
182
 
183
  for j in cv_mean_result:
184
  df_cv_mean_result.loc[len(df_cv_mean_result)] = j
185
+ df_cv_mean_result.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_5cv_mean_{0}.tsv".format(representation_name)),sep="\t",index=None)
186
 
187
  #save std deviation of scores to file
188
  cv_std_result = model[2]
 
197
 
198
  for k in cv_std_result:
199
  df_cv_std_result.loc[len(df_cv_std_result)] = k
200
+ df_cv_std_result.to_csv(os.path.join(script_dir, r"../results/Ontology_based_function_prediction_5cv_std_{0}.tsv".format(representation_name)),sep="\t",index=None)
201
 
202
  print(datetime.now())
203