Spaces:
Running
Running
Upload 2 files
Browse files- dataset.py +4 -2
- utils.py +29 -27
dataset.py
CHANGED
@@ -149,7 +149,7 @@ class Graph_Regression_Dataset_test(object):
|
|
149 |
adjoin_matrix.set_shape([None,None])
|
150 |
y.set_shape([None])
|
151 |
return x, adjoin_matrix , y
|
152 |
-
|
153 |
class predict_smiles(object):
|
154 |
def __init__(self,smiles ,normalize=False,max_len=1000,addH=True):
|
155 |
|
@@ -165,13 +165,15 @@ class predict_smiles(object):
|
|
165 |
self.min = self.df[self.label_field].min()
|
166 |
self.df[self.label_field] = (self.df[self.label_field]-self.min)/(self.max-self.min)-0.5
|
167 |
self.value_range = self.max-self.min
|
|
|
168 |
def numerical_smiles(self, atoms_list,adj,label):
|
169 |
|
170 |
atom = np.array(atoms_list)
|
171 |
atoms_list = []
|
172 |
for i in atom:
|
173 |
if i not in [' ']:
|
174 |
-
|
|
|
175 |
label = np.array(label)
|
176 |
|
177 |
adj = np.array(adj)
|
|
|
149 |
adjoin_matrix.set_shape([None,None])
|
150 |
y.set_shape([None])
|
151 |
return x, adjoin_matrix , y
|
152 |
+
|
153 |
class predict_smiles(object):
|
154 |
def __init__(self,smiles ,normalize=False,max_len=1000,addH=True):
|
155 |
|
|
|
165 |
self.min = self.df[self.label_field].min()
|
166 |
self.df[self.label_field] = (self.df[self.label_field]-self.min)/(self.max-self.min)-0.5
|
167 |
self.value_range = self.max-self.min
|
168 |
+
|
169 |
def numerical_smiles(self, atoms_list,adj,label):
|
170 |
|
171 |
atom = np.array(atoms_list)
|
172 |
atoms_list = []
|
173 |
for i in atom:
|
174 |
if i not in [' ']:
|
175 |
+
|
176 |
+
atoms_list.append(str(i,encoding='utf-8'))
|
177 |
label = np.array(label)
|
178 |
|
179 |
adj = np.array(adj)
|
utils.py
CHANGED
@@ -646,12 +646,12 @@ def gen_adj(shape,edges,length):
|
|
646 |
|
647 |
adj=edges
|
648 |
e = shape
|
649 |
-
ones = np.
|
650 |
|
651 |
#for i in range(e):
|
652 |
for i in range (len(length)):
|
653 |
if adj[i,0] != adj[i,1]:
|
654 |
-
ones[adj[i,0],adj[i,1]]=
|
655 |
|
656 |
return ones
|
657 |
|
@@ -659,38 +659,40 @@ def gen_adj(shape,edges,length):
|
|
659 |
if __name__ == "__main__":
|
660 |
import pandas as pd
|
661 |
from tqdm import tqdm
|
662 |
-
f = pd.read_csv (r"
|
663 |
-
re = []
|
664 |
-
pce = f['PCE']
|
665 |
-
for ind,smile in enumerate ( f.iloc[:,1]):
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
r = pd.DataFrame(re)
|
671 |
-
r.to_csv('data/reg/train/train.csv')
|
672 |
-
re = []
|
673 |
-
|
674 |
-
f = pd.read_csv(r'data/reg/test3.csv')
|
675 |
-
re = []
|
676 |
-
pce = f['PCE']
|
677 |
|
678 |
-
for ind,smile in enumerate ( f.iloc[:,1]):
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
r = pd.DataFrame(re)
|
684 |
-
r.to_csv('data/reg/test/test.csv')
|
685 |
|
686 |
-
f = pd.read_csv(r'val.csv')
|
687 |
re = []
|
688 |
pce = f['PCE']
|
689 |
|
690 |
-
for ind,smile in enumerate ( f.iloc[
|
|
|
|
|
691 |
print(ind)
|
692 |
atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
|
693 |
np.save('data/reg/val/adj'+str(ind)+'.npy',np.array(adj))
|
694 |
re.append([atom,'data/reg/val/adj'+str(ind)+'.npy',pce[ind] ])
|
695 |
r = pd.DataFrame(re)
|
696 |
-
r.to_csv('data/reg/val/
|
|
|
646 |
|
647 |
adj=edges
|
648 |
e = shape
|
649 |
+
ones = np.eye(e)
|
650 |
|
651 |
#for i in range(e):
|
652 |
for i in range (len(length)):
|
653 |
if adj[i,0] != adj[i,1]:
|
654 |
+
ones[adj[i,0],adj[i,1]]=(float(length[i] ))
|
655 |
|
656 |
return ones
|
657 |
|
|
|
659 |
if __name__ == "__main__":
|
660 |
import pandas as pd
|
661 |
from tqdm import tqdm
|
662 |
+
f = pd.read_csv (r"J:\screenacc\new4.csv")
|
663 |
+
# re = []
|
664 |
+
# pce = f['PCE']
|
665 |
+
# for ind,smile in enumerate ( f.iloc[:,1]):
|
666 |
+
# print(ind)
|
667 |
+
# atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
|
668 |
+
# np.save('data/reg/train/adj'+str(ind)+'.npy',np.array(adj))
|
669 |
+
# re.append([atom,'data/reg/train/adj'+str(ind)+'.npy',pce[ind] ])
|
670 |
+
# r = pd.DataFrame(re)
|
671 |
+
# r.to_csv('data/reg/train/train.csv')
|
672 |
+
# re = []
|
673 |
+
|
674 |
+
# f = pd.read_csv(r'data/reg/test3.csv')
|
675 |
+
# re = []
|
676 |
+
# pce = f['PCE']
|
677 |
|
678 |
+
# for ind,smile in enumerate ( f.iloc[:,1]):
|
679 |
+
# print(ind)
|
680 |
+
# atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
|
681 |
+
# np.save('data/reg/test/adj'+str(ind)+'.npy',np.array(adj))
|
682 |
+
# re.append([atom,'data/reg/test/adj'+str(ind)+'.npy',pce[ind] ])
|
683 |
+
# r = pd.DataFrame(re)
|
684 |
+
# r.to_csv('data/reg/test/test.csv')
|
685 |
|
686 |
+
# f = pd.read_csv(r'val.csv')
|
687 |
re = []
|
688 |
pce = f['PCE']
|
689 |
|
690 |
+
for ind,smile in enumerate ( f.iloc[ 22000: ,0]):
|
691 |
+
|
692 |
+
ind = ind + 22000
|
693 |
print(ind)
|
694 |
atom,adj = mol_to_geognn_graph_data_MMFF3d(smile)
|
695 |
np.save('data/reg/val/adj'+str(ind)+'.npy',np.array(adj))
|
696 |
re.append([atom,'data/reg/val/adj'+str(ind)+'.npy',pce[ind] ])
|
697 |
r = pd.DataFrame(re)
|
698 |
+
r.to_csv('data/reg/val/val22000.csv')
|