jinysun commited on
Commit
fc06566
·
1 Parent(s): cecc846

Upload 2 files

Browse files
Files changed (2) hide show
  1. dataset.py +4 -2
  2. utils.py +29 -27
dataset.py CHANGED
@@ -149,7 +149,7 @@ class Graph_Regression_Dataset_test(object):
149
  adjoin_matrix.set_shape([None,None])
150
  y.set_shape([None])
151
  return x, adjoin_matrix , y
152
-
153
  class predict_smiles(object):
154
  def __init__(self,smiles ,normalize=False,max_len=1000,addH=True):
155
 
@@ -165,13 +165,15 @@ class predict_smiles(object):
165
  self.min = self.df[self.label_field].min()
166
  self.df[self.label_field] = (self.df[self.label_field]-self.min)/(self.max-self.min)-0.5
167
  self.value_range = self.max-self.min
 
168
  def numerical_smiles(self, atoms_list,adj,label):
169
 
170
  atom = np.array(atoms_list)
171
  atoms_list = []
172
  for i in atom:
173
  if i not in [' ']:
174
- atoms_list.append(i)
 
175
  label = np.array(label)
176
 
177
  adj = np.array(adj)
 
149
  adjoin_matrix.set_shape([None,None])
150
  y.set_shape([None])
151
  return x, adjoin_matrix , y
152
+
153
  class predict_smiles(object):
154
  def __init__(self,smiles ,normalize=False,max_len=1000,addH=True):
155
 
 
165
  self.min = self.df[self.label_field].min()
166
  self.df[self.label_field] = (self.df[self.label_field]-self.min)/(self.max-self.min)-0.5
167
  self.value_range = self.max-self.min
168
+
169
  def numerical_smiles(self, atoms_list,adj,label):
170
 
171
  atom = np.array(atoms_list)
172
  atoms_list = []
173
  for i in atom:
174
  if i not in [' ']:
175
+
176
+ atoms_list.append(str(i,encoding='utf-8'))
177
  label = np.array(label)
178
 
179
  adj = np.array(adj)
utils.py CHANGED
@@ -646,12 +646,12 @@ def gen_adj(shape,edges,length):
646
 
647
  adj=edges
648
  e = shape
649
- ones = np.zeros([e,e])
650
 
651
  #for i in range(e):
652
  for i in range (len(length)):
653
  if adj[i,0] != adj[i,1]:
654
- ones[adj[i,0],adj[i,1]]=format(float(length[i] ), '.3f')
655
 
656
  return ones
657
 
@@ -659,38 +659,40 @@ def gen_adj(shape,edges,length):
659
  if __name__ == "__main__":
660
  import pandas as pd
661
  from tqdm import tqdm
662
- f = pd.read_csv (r"data/reg/train3.csv")
663
- re = []
664
- pce = f['PCE']
665
- for ind,smile in enumerate ( f.iloc[:,1]):
666
- print(ind)
667
- atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
668
- np.save('data/reg/train/adj'+str(ind)+'.npy',np.array(adj))
669
- re.append([atom,'data/reg/train/adj'+str(ind)+'.npy',pce[ind] ])
670
- r = pd.DataFrame(re)
671
- r.to_csv('data/reg/train/train.csv')
672
- re = []
673
-
674
- f = pd.read_csv(r'data/reg/test3.csv')
675
- re = []
676
- pce = f['PCE']
677
 
678
- for ind,smile in enumerate ( f.iloc[:,1]):
679
- print(ind)
680
- atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
681
- np.save('data/reg/test/adj'+str(ind)+'.npy',np.array(adj))
682
- re.append([atom,'data/reg/test/adj'+str(ind)+'.npy',pce[ind] ])
683
- r = pd.DataFrame(re)
684
- r.to_csv('data/reg/test/test.csv')
685
 
686
- f = pd.read_csv(r'val.csv')
687
  re = []
688
  pce = f['PCE']
689
 
690
- for ind,smile in enumerate ( f.iloc[:,1]):
 
 
691
  print(ind)
692
  atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
693
  np.save('data/reg/val/adj'+str(ind)+'.npy',np.array(adj))
694
  re.append([atom,'data/reg/val/adj'+str(ind)+'.npy',pce[ind] ])
695
  r = pd.DataFrame(re)
696
- r.to_csv('data/reg/val/val.csv')
 
646
 
647
  adj=edges
648
  e = shape
649
+ ones = np.eye(e)
650
 
651
  #for i in range(e):
652
  for i in range (len(length)):
653
  if adj[i,0] != adj[i,1]:
654
+ ones[adj[i,0],adj[i,1]]=(float(length[i] ))
655
 
656
  return ones
657
 
 
659
  if __name__ == "__main__":
660
  import pandas as pd
661
  from tqdm import tqdm
662
+ f = pd.read_csv (r"J:\screenacc\new4.csv")
663
+ # re = []
664
+ # pce = f['PCE']
665
+ # for ind,smile in enumerate ( f.iloc[:,1]):
666
+ # print(ind)
667
+ # atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
668
+ # np.save('data/reg/train/adj'+str(ind)+'.npy',np.array(adj))
669
+ # re.append([atom,'data/reg/train/adj'+str(ind)+'.npy',pce[ind] ])
670
+ # r = pd.DataFrame(re)
671
+ # r.to_csv('data/reg/train/train.csv')
672
+ # re = []
673
+
674
+ # f = pd.read_csv(r'data/reg/test3.csv')
675
+ # re = []
676
+ # pce = f['PCE']
677
 
678
+ # for ind,smile in enumerate ( f.iloc[:,1]):
679
+ # print(ind)
680
+ # atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
681
+ # np.save('data/reg/test/adj'+str(ind)+'.npy',np.array(adj))
682
+ # re.append([atom,'data/reg/test/adj'+str(ind)+'.npy',pce[ind] ])
683
+ # r = pd.DataFrame(re)
684
+ # r.to_csv('data/reg/test/test.csv')
685
 
686
+ # f = pd.read_csv(r'val.csv')
687
  re = []
688
  pce = f['PCE']
689
 
690
+ for ind,smile in enumerate ( f.iloc[ 22000: ,0]):
691
+
692
+ ind = ind + 22000
693
  print(ind)
694
  atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
695
  np.save('data/reg/val/adj'+str(ind)+'.npy',np.array(adj))
696
  re.append([atom,'data/reg/val/adj'+str(ind)+'.npy',pce[ind] ])
697
  r = pd.DataFrame(re)
698
+ r.to_csv('data/reg/val/val22000.csv')