feiyang-cai commited on
Commit
c6866a7
·
1 Parent(s): ec780ac

add time evaluation

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. utils.py +101 -78
app.py CHANGED
@@ -91,7 +91,7 @@ def predict_file(file, property_name):
91
  except Exception as e:
92
  # no matter what the error is, we should return
93
  print(e)
94
- return None, None, gr.update(visible=False), file, "Prediction failed"
95
 
96
  return gr.update(visible=False), gr.DownloadButton(label="Download", value=prediction_file, visible=True), gr.update(visible=False), prediction_file, "Prediction is done"
97
 
 
91
  except Exception as e:
92
  # no matter what the error is, we should return
93
  print(e)
94
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), file, "Prediction failed"
95
 
96
  return gr.update(visible=False), gr.DownloadButton(label="Download", value=prediction_file, visible=True), gr.update(visible=False), prediction_file, "Prediction is done"
97
 
utils.py CHANGED
@@ -15,6 +15,24 @@ import pickle
15
  from sklearn import preprocessing
16
  import json
17
  import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  from rdkit import RDLogger, Chem
20
  # Suppress RDKit INFO messages
@@ -209,99 +227,104 @@ class MolecularPropertyPredictionModel():
209
  # switched: adapter is switched successfully
210
  # error: adapter is not found
211
 
212
- if adapter_name == self.adapter_name:
213
- return "keep"
214
- # switch adapter
215
- try:
216
- #self.adapter_name = adapter_name
217
- #print(self.adapter_name, adapter_id)
218
- #self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
219
- #self.lora_model.to("cuda")
220
- #print(self.lora_model)
221
-
222
- self.base_model.set_adapter(adapter_name)
223
- self.base_model.eval()
224
-
225
- #if adapter_name not in self.apapter_scaler_path:
226
- # self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
227
- if os.path.exists(self.apapter_scaler_path[adapter_name]):
228
- self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
229
- else:
230
- self.scaler = None
231
-
232
- self.adapter_name = adapter_name
233
-
234
- return "switched"
235
- except Exception as e:
236
- # handle error
237
- return "error"
 
238
 
239
  @spaces.GPU(duration=20)
240
  def predict(self, valid_df, task_type):
241
- test_dataset = Dataset.from_pandas(valid_df)
242
- # construct the dataloader
243
- test_loader = torch.utils.data.DataLoader(
244
- test_dataset,
245
- batch_size=16,
246
- collate_fn=self.data_collator,
247
- )
248
 
249
- # predict
250
- y_pred = []
251
- for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
252
- with torch.no_grad():
253
- batch = {k: v.to(self.base_model.device) for k, v in batch.items()}
254
- outputs = self.base_model(**batch)
255
- if task_type == "regression": # TODO: check if the model is regression or classification
256
- y_pred.append(outputs.logits.cpu().detach().numpy())
257
- else:
258
- y_pred.append((torch.sigmoid(outputs.logits) > 0.5).cpu().detach().numpy())
 
 
 
 
 
 
 
 
 
259
 
260
- y_pred = np.concatenate(y_pred, axis=0)
261
- if task_type=="regression" and self.scaler is not None:
262
- y_pred = self.scaler.inverse_transform(y_pred)
263
 
264
 
265
  return y_pred
266
 
267
  def predict_single_smiles(self, smiles, task_type):
268
- assert task_type in ["regression", "classification"]
 
269
 
270
- # check the SMILES string is valid
271
- if not Chem.MolFromSmiles(smiles):
272
- return None
273
 
274
- valid_df = pd.DataFrame([smiles], columns=['smiles'])
275
- results = self.predict(valid_df, task_type)
276
- # predict
277
  return results.item()
278
 
279
  def predict_file(self, df, task_type):
280
- # we should add the index first
281
- df = df.reset_index()
282
- # we need to check the SMILES strings are valid, the invalid ones will be moved to the last
283
- valid_idx = []
284
- invalid_idx = []
285
- for idx, smiles in enumerate(df['smiles']):
286
- if Chem.MolFromSmiles(smiles):
287
- valid_idx.append(idx)
288
- else:
289
- invalid_idx.append(idx)
290
- valid_df = df.loc[valid_idx]
291
- # get the smiles list
292
- valid_df_smiles = valid_df['smiles'].tolist()
293
-
294
- input_df = pd.DataFrame(valid_df_smiles, columns=['smiles'])
295
- results = self.predict(input_df, task_type)
296
-
297
- # add the results to the dataframe
298
- df.loc[valid_idx, 'prediction'] = results
299
- df.loc[invalid_idx, 'prediction'] = np.nan
300
-
301
- # drop the index column
302
- df = df.drop(columns=['index'])
303
-
304
- # phrase file
 
305
  return df
306
 
307
 
 
15
  from sklearn import preprocessing
16
  import json
17
  import spaces
18
+ import time
19
+
20
+ class calculateDuration:
21
+ def __init__(self, activity_name=""):
22
+ self.activity_name = activity_name
23
+
24
+ def __enter__(self):
25
+ self.start_time = time.time()
26
+ return self
27
+
28
+ def __exit__(self, exc_type, exc_value, traceback):
29
+ self.end_time = time.time()
30
+ self.elapsed_time = self.end_time - self.start_time
31
+ if self.activity_name:
32
+ print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds")
33
+ else:
34
+ print(f"Elapsed time: {self.elapsed_time:.6f} seconds")
35
+
36
 
37
  from rdkit import RDLogger, Chem
38
  # Suppress RDKit INFO messages
 
227
  # switched: adapter is switched successfully
228
  # error: adapter is not found
229
 
230
+ with calculateDuration("switching adapter"):
231
+ if adapter_name == self.adapter_name:
232
+ return "keep"
233
+ # switch adapter
234
+ try:
235
+ #self.adapter_name = adapter_name
236
+ #print(self.adapter_name, adapter_id)
237
+ #self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
238
+ #self.lora_model.to("cuda")
239
+ #print(self.lora_model)
240
+
241
+ self.base_model.set_adapter(adapter_name)
242
+ self.base_model.eval()
243
+
244
+ #if adapter_name not in self.apapter_scaler_path:
245
+ # self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
246
+ if os.path.exists(self.apapter_scaler_path[adapter_name]):
247
+ self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
248
+ else:
249
+ self.scaler = None
250
+
251
+ self.adapter_name = adapter_name
252
+
253
+ return "switched"
254
+ except Exception as e:
255
+ # handle error
256
+ return "error"
257
 
258
  @spaces.GPU(duration=20)
259
  def predict(self, valid_df, task_type):
 
 
 
 
 
 
 
260
 
261
+ with calculateDuration("predicting"):
262
+ test_dataset = Dataset.from_pandas(valid_df)
263
+ # construct the dataloader
264
+ test_loader = torch.utils.data.DataLoader(
265
+ test_dataset,
266
+ batch_size=16,
267
+ collate_fn=self.data_collator,
268
+ )
269
+
270
+ # predict
271
+ y_pred = []
272
+ for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
273
+ with torch.no_grad():
274
+ batch = {k: v.to(self.base_model.device) for k, v in batch.items()}
275
+ outputs = self.base_model(**batch)
276
+ if task_type == "regression": # TODO: check if the model is regression or classification
277
+ y_pred.append(outputs.logits.cpu().detach().numpy())
278
+ else:
279
+ y_pred.append((torch.sigmoid(outputs.logits) > 0.5).cpu().detach().numpy())
280
 
281
+ y_pred = np.concatenate(y_pred, axis=0)
282
+ if task_type=="regression" and self.scaler is not None:
283
+ y_pred = self.scaler.inverse_transform(y_pred)
284
 
285
 
286
  return y_pred
287
 
288
  def predict_single_smiles(self, smiles, task_type):
289
+ with calculateDuration("predicting a single SMILES"):
290
+ assert task_type in ["regression", "classification"]
291
 
292
+ # check the SMILES string is valid
293
+ if not Chem.MolFromSmiles(smiles):
294
+ return None
295
 
296
+ valid_df = pd.DataFrame([smiles], columns=['smiles'])
297
+ results = self.predict(valid_df, task_type)
298
+ # predict
299
  return results.item()
300
 
301
  def predict_file(self, df, task_type):
302
+ with calculateDuration("predicting a file"):
303
+ # we should add the index first
304
+ df = df.reset_index()
305
+ # we need to check the SMILES strings are valid, the invalid ones will be moved to the last
306
+ valid_idx = []
307
+ invalid_idx = []
308
+ for idx, smiles in enumerate(df['smiles']):
309
+ if Chem.MolFromSmiles(smiles):
310
+ valid_idx.append(idx)
311
+ else:
312
+ invalid_idx.append(idx)
313
+ valid_df = df.loc[valid_idx]
314
+ # get the smiles list
315
+ valid_df_smiles = valid_df['smiles'].tolist()
316
+
317
+ input_df = pd.DataFrame(valid_df_smiles, columns=['smiles'])
318
+ results = self.predict(input_df, task_type)
319
+
320
+ # add the results to the dataframe
321
+ df.loc[valid_idx, 'prediction'] = results
322
+ df.loc[invalid_idx, 'prediction'] = np.nan
323
+
324
+ # drop the index column
325
+ df = df.drop(columns=['index'])
326
+
327
+ # phrase file
328
  return df
329
 
330