Spaces:

ChemFM
/

molecular_property_prediction

Sleeping

App Files Files Community

feiyang-cai commited on Oct 18, 2024

Commit

c6866a7

1 Parent(s): ec780ac

add time evaluation

Browse files

Files changed (2) hide show

app.py +1 -1
utils.py +101 -78

app.py CHANGED Viewed

@@ -91,7 +91,7 @@ def predict_file(file, property_name):
     except Exception as e:
         # no matter what the error is, we should return
         print(e)
-        return None, None, gr.update(visible=False), file, "Prediction failed"
     return gr.update(visible=False), gr.DownloadButton(label="Download", value=prediction_file, visible=True), gr.update(visible=False), prediction_file, "Prediction is done"

     except Exception as e:
         # no matter what the error is, we should return
         print(e)
+        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), file, "Prediction failed"
     return gr.update(visible=False), gr.DownloadButton(label="Download", value=prediction_file, visible=True), gr.update(visible=False), prediction_file, "Prediction is done"

utils.py CHANGED Viewed

@@ -15,6 +15,24 @@ import pickle
 from sklearn import preprocessing
 import json
 import spaces
 from rdkit import RDLogger, Chem
 # Suppress RDKit INFO messages
@@ -209,99 +227,104 @@ class MolecularPropertyPredictionModel():
         # switched: adapter is switched successfully
         # error: adapter is not found
-        if adapter_name == self.adapter_name:
-            return "keep"
-        # switch adapter
-        try:
-            #self.adapter_name = adapter_name
-            #print(self.adapter_name, adapter_id)
-            #self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
-            #self.lora_model.to("cuda")
-            #print(self.lora_model)
-            self.base_model.set_adapter(adapter_name)
-            self.base_model.eval()
-            #if adapter_name not in self.apapter_scaler_path:
-            #    self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
-            if os.path.exists(self.apapter_scaler_path[adapter_name]):
-                self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
-            else:
-                self.scaler = None
-            self.adapter_name = adapter_name
-            return "switched"
-        except Exception as e:
-            # handle error
-            return "error"
     @spaces.GPU(duration=20)
     def predict(self, valid_df, task_type):
-        test_dataset = Dataset.from_pandas(valid_df)
-        # construct the dataloader
-        test_loader = torch.utils.data.DataLoader(
-            test_dataset,
-            batch_size=16,
-            collate_fn=self.data_collator,
-        )
-        # predict
-        y_pred = []
-        for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
-            with torch.no_grad():
-                batch = {k: v.to(self.base_model.device) for k, v in batch.items()}
-                outputs = self.base_model(**batch)
-            if task_type == "regression": # TODO: check if the model is regression or classification
-                y_pred.append(outputs.logits.cpu().detach().numpy())
-            else:
-                y_pred.append((torch.sigmoid(outputs.logits) > 0.5).cpu().detach().numpy())
-        y_pred = np.concatenate(y_pred, axis=0)
-        if task_type=="regression" and self.scaler is not None:
-            y_pred = self.scaler.inverse_transform(y_pred)
         return y_pred
     def predict_single_smiles(self, smiles, task_type):
-        assert task_type in ["regression", "classification"]
-        # check the SMILES string is valid
-        if not Chem.MolFromSmiles(smiles):
-            return None
-        valid_df = pd.DataFrame([smiles], columns=['smiles'])
-        results = self.predict(valid_df, task_type)
-        # predict
         return results.item()
     def predict_file(self, df, task_type):
-        # we should add the index first
-        df = df.reset_index()
-        # we need to check the SMILES strings are valid, the invalid ones will be moved to the last
-        valid_idx = []
-        invalid_idx = []
-        for idx, smiles in enumerate(df['smiles']):
-            if Chem.MolFromSmiles(smiles):
-                valid_idx.append(idx)
-            else:
-                invalid_idx.append(idx)
-        valid_df = df.loc[valid_idx]
-        # get the smiles list
-        valid_df_smiles = valid_df['smiles'].tolist()
-        input_df = pd.DataFrame(valid_df_smiles, columns=['smiles'])
-        results = self.predict(input_df, task_type)
-        # add the results to the dataframe
-        df.loc[valid_idx, 'prediction'] = results
-        df.loc[invalid_idx, 'prediction'] = np.nan
-        # drop the index column
-        df = df.drop(columns=['index'])
-        # phrase file
         return df

 from sklearn import preprocessing
 import json
 import spaces
+import time
+class calculateDuration:
+    def __init__(self, activity_name=""):
+        self.activity_name = activity_name
+    def __enter__(self):
+        self.start_time = time.time()
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.end_time = time.time()
+        self.elapsed_time = self.end_time - self.start_time
+        if self.activity_name:
+            print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds")
+        else:
+            print(f"Elapsed time: {self.elapsed_time:.6f} seconds")
 from rdkit import RDLogger, Chem
 # Suppress RDKit INFO messages
         # switched: adapter is switched successfully
         # error: adapter is not found
+        with calculateDuration("switching adapter"):
+            if adapter_name == self.adapter_name:
+                return "keep"
+            # switch adapter
+            try:
+                #self.adapter_name = adapter_name
+                #print(self.adapter_name, adapter_id)
+                #self.lora_model = PeftModel.from_pretrained(self.base_model, adapter_id, token = os.environ.get("TOKEN"))
+                #self.lora_model.to("cuda")
+                #print(self.lora_model)
+                self.base_model.set_adapter(adapter_name)
+                self.base_model.eval()
+                #if adapter_name not in self.apapter_scaler_path:
+                #    self.apapter_scaler_path[adapter_name] = hf_hub_download(adapter_id, filename="scaler.pkl", token = os.environ.get("TOKEN"))
+                if os.path.exists(self.apapter_scaler_path[adapter_name]):
+                    self.scaler = pickle.load(open(self.apapter_scaler_path[adapter_name], "rb"))
+                else:
+                    self.scaler = None
+                self.adapter_name = adapter_name
+                return "switched"
+            except Exception as e:
+                # handle error
+                return "error"
     @spaces.GPU(duration=20)
     def predict(self, valid_df, task_type):
+        with calculateDuration("predicting"):
+            test_dataset = Dataset.from_pandas(valid_df)
+            # construct the dataloader
+            test_loader = torch.utils.data.DataLoader(
+                test_dataset,
+                batch_size=16,
+                collate_fn=self.data_collator,
+            )
+            # predict
+            y_pred = []
+            for i, batch in tqdm(enumerate(test_loader), total=len(test_loader), desc="Evaluating"):
+                with torch.no_grad():
+                    batch = {k: v.to(self.base_model.device) for k, v in batch.items()}
+                    outputs = self.base_model(**batch)
+                if task_type == "regression": # TODO: check if the model is regression or classification
+                    y_pred.append(outputs.logits.cpu().detach().numpy())
+                else:
+                    y_pred.append((torch.sigmoid(outputs.logits) > 0.5).cpu().detach().numpy())
+            y_pred = np.concatenate(y_pred, axis=0)
+            if task_type=="regression" and self.scaler is not None:
+                y_pred = self.scaler.inverse_transform(y_pred)
         return y_pred
     def predict_single_smiles(self, smiles, task_type):
+        with calculateDuration("predicting a single SMILES"):
+            assert task_type in ["regression", "classification"]
+            # check the SMILES string is valid
+            if not Chem.MolFromSmiles(smiles):
+                return None
+            valid_df = pd.DataFrame([smiles], columns=['smiles'])
+            results = self.predict(valid_df, task_type)
+            # predict
         return results.item()
     def predict_file(self, df, task_type):
+        with calculateDuration("predicting a file"):
+            # we should add the index first
+            df = df.reset_index()
+            # we need to check the SMILES strings are valid, the invalid ones will be moved to the last
+            valid_idx = []
+            invalid_idx = []
+            for idx, smiles in enumerate(df['smiles']):
+                if Chem.MolFromSmiles(smiles):
+                    valid_idx.append(idx)
+                else:
+                    invalid_idx.append(idx)
+            valid_df = df.loc[valid_idx]
+            # get the smiles list
+            valid_df_smiles = valid_df['smiles'].tolist()
+            input_df = pd.DataFrame(valid_df_smiles, columns=['smiles'])
+            results = self.predict(input_df, task_type)
+            # add the results to the dataframe
+            df.loc[valid_idx, 'prediction'] = results
+            df.loc[invalid_idx, 'prediction'] = np.nan
+            # drop the index column
+            df = df.drop(columns=['index'])
+            # phrase file
         return df