IlayMalinyak commited on
Commit
a79c5f2
·
1 Parent(s): 82a319f

tested locally

Browse files
.gitignore CHANGED
@@ -14,6 +14,6 @@ eval-queue-bk/
14
  eval-results-bk/
15
  logs/
16
  tasks/model
17
- req.txt
18
 
19
  emissions.csv
 
14
  eval-results-bk/
15
  logs/
16
  tasks/model
17
+ req.txtal
18
 
19
  emissions.csv
req.txt ADDED
Binary file (20.5 kB). View file
 
tasks/audio.py CHANGED
@@ -128,7 +128,6 @@ async def evaluate_audio(request: AudioEvaluationRequest):
128
  accumulation_step=1, max_iter=np.inf,
129
  exp_name=f"frugal_cnnencoder_inference")
130
  predictions, true_labels, acc = trainer.predict(test_dl, device=device)
131
- # true_labels = test_dataset["label"]
132
 
133
  # Make random predictions (placeholder for actual model inference)
134
  print("accuracy: ", acc)
@@ -144,7 +143,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
144
 
145
  # Calculate accuracy
146
  accuracy = accuracy_score(true_labels, predictions)
147
-
148
  # Prepare results dictionary
149
  results = {
150
  "username": username,
@@ -162,14 +161,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
162
  "test_seed": request.test_seed
163
  }
164
  }
 
 
165
 
166
  return results
167
-
168
- # if __name__ == "__main__":
169
- # sample_request = AudioEvaluationRequest(
170
- # dataset_name="rfcx/frugalai", # Replace with actual dataset name
171
- # test_size=0.2, # Example values
172
- # test_seed=42
173
- # )
174
- # #
175
- # asyncio.run(evaluate_audio(sample_request))
 
128
  accumulation_step=1, max_iter=np.inf,
129
  exp_name=f"frugal_cnnencoder_inference")
130
  predictions, true_labels, acc = trainer.predict(test_dl, device=device)
 
131
 
132
  # Make random predictions (placeholder for actual model inference)
133
  print("accuracy: ", acc)
 
143
 
144
  # Calculate accuracy
145
  accuracy = accuracy_score(true_labels, predictions)
146
+
147
  # Prepare results dictionary
148
  results = {
149
  "username": username,
 
161
  "test_seed": request.test_seed
162
  }
163
  }
164
+
165
+ print('results: ', results)
166
 
167
  return results
 
 
 
 
 
 
 
 
 
tasks/run.py CHANGED
@@ -38,9 +38,7 @@ def create_dataframe(ds, save_name='train'):
38
  # Flatten the nested dictionary structure
39
  feature_dict = {'label': label}
40
  for k, v in features.items():
41
- if isinstance(v, dict):
42
- for sub_k, sub_v in v.items():
43
- feature_dict[f"{k}_{sub_k}"] = sub_v[0].item() # Aggregate (e.g., mean)
44
  data.append(feature_dict)
45
  # Convert to DataFrame
46
  df = pd.DataFrame(data)
@@ -88,13 +86,14 @@ test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate
88
 
89
 
90
  x,y = create_dataframe(full_ds, save_name='train_val')
91
- print(x.shape)
92
  x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
 
93
 
94
  evals_result = {}
95
  num_boost_round = 1000 # Set a large number of boosting rounds
96
 
97
  # Watchlist to monitor performance on train and validation data
 
98
 
99
  dtrain = xgb.DMatrix(x_train, label=y_train)
100
  dval = xgb.DMatrix(x_val, label=y_val)
@@ -178,13 +177,13 @@ model = CNNKanFeaturesEncoder(xgb_model, model_args, kan_args.get_dict())
178
  # model = KanEncoder(kan_args.get_dict())
179
  model = model.to(local_rank)
180
 
181
- # state_dict = torch.load(data_args.checkpoint_path, map_location=torch.device('cpu'))
182
- # new_state_dict = OrderedDict()
183
- # for key, value in state_dict.items():
184
- # if key.startswith('module.'):
185
- # key = key[7:]
186
- # new_state_dict[key] = value
187
- # missing, unexpected = model.load_state_dict(new_state_dict)
188
 
189
  # model = DDP(model, device_ids=[local_rank], output_device=local_rank)
190
  num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
@@ -209,11 +208,11 @@ trainer = Trainer(model=model, optimizer=optimizer,
209
  range_update=None,
210
  accumulation_step=1, max_iter=np.inf,
211
  exp_name=f"frugal_kan_features_{exp_num}")
212
- fit_res = trainer.fit(num_epochs=100, device=local_rank,
213
- early_stopping=10, only_p=False, best='loss', conf=True)
214
- output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
215
- with open(output_filename, "w") as f:
216
- json.dump(fit_res, f, indent=2)
217
  preds, tru, acc = trainer.predict(test_dl, local_rank)
218
  print(f"Accuracy: {acc}")
219
 
 
38
  # Flatten the nested dictionary structure
39
  feature_dict = {'label': label}
40
  for k, v in features.items():
41
+ feature_dict[f"{k}"] = v[0].item() # Aggregate (e.g., mean)
 
 
42
  data.append(feature_dict)
43
  # Convert to DataFrame
44
  df = pd.DataFrame(data)
 
86
 
87
 
88
  x,y = create_dataframe(full_ds, save_name='train_val')
 
89
  x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
90
+ print(x_train.shape)
91
 
92
  evals_result = {}
93
  num_boost_round = 1000 # Set a large number of boosting rounds
94
 
95
  # Watchlist to monitor performance on train and validation data
96
+ print(x_train.head())
97
 
98
  dtrain = xgb.DMatrix(x_train, label=y_train)
99
  dval = xgb.DMatrix(x_val, label=y_val)
 
177
  # model = KanEncoder(kan_args.get_dict())
178
  model = model.to(local_rank)
179
 
180
+ state_dict = torch.load(data_args.checkpoint_path, map_location=torch.device('cpu'))
181
+ new_state_dict = OrderedDict()
182
+ for key, value in state_dict.items():
183
+ if key.startswith('module.'):
184
+ key = key[7:]
185
+ new_state_dict[key] = value
186
+ missing, unexpected = model.load_state_dict(new_state_dict)
187
 
188
  # model = DDP(model, device_ids=[local_rank], output_device=local_rank)
189
  num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
 
208
  range_update=None,
209
  accumulation_step=1, max_iter=np.inf,
210
  exp_name=f"frugal_kan_features_{exp_num}")
211
+ # fit_res = trainer.fit(num_epochs=100, device=local_rank,
212
+ # early_stopping=10, only_p=False, best='loss', conf=True)
213
+ # output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
214
+ # with open(output_filename, "w") as f:
215
+ # json.dump(fit_res, f, indent=2)
216
  preds, tru, acc = trainer.predict(test_dl, local_rank)
217
  print(f"Accuracy: {acc}")
218
 
tasks/utils/data.py CHANGED
@@ -57,6 +57,7 @@ class FFTDataset(IterableDataset):
57
  orig_sample_rate=12000,
58
  target_sample_rate=3000,
59
  features=False):
 
60
  self.dataset = original_dataset
61
  self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
62
  self.target_sample_rate = target_sample_rate
 
57
  orig_sample_rate=12000,
58
  target_sample_rate=3000,
59
  features=False):
60
+ super().__init__()
61
  self.dataset = original_dataset
62
  self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
63
  self.target_sample_rate = target_sample_rate
tasks/utils/dfs/test.csv DELETED
The diff for this file is too large to render. See raw diff
 
tasks/utils/dfs/train.csv DELETED
The diff for this file is too large to render. See raw diff
 
tasks/utils/dfs/train_val.csv CHANGED
The diff for this file is too large to render. See raw diff
 
tasks/utils/dfs/val.csv DELETED
The diff for this file is too large to render. See raw diff
 
tasks/utils/models.py CHANGED
@@ -234,7 +234,7 @@ class CNNKanFeaturesEncoder(nn.Module):
234
  for batch_idx in range(batch_size):
235
  feature_dict = {}
236
  for k, v in features[batch_idx].items():
237
- feature_dict[f"frequency_domain_{k}"] = v[0].item()
238
  data.append(feature_dict)
239
 
240
  return pd.DataFrame(data)
 
234
  for batch_idx in range(batch_size):
235
  feature_dict = {}
236
  for k, v in features[batch_idx].items():
237
+ feature_dict[f"{k}"] = v[0].item()
238
  data.append(feature_dict)
239
 
240
  return pd.DataFrame(data)
tasks/utils/train.py CHANGED
@@ -226,7 +226,7 @@ class Trainer(object):
226
 
227
  def train_batch(self, batch, batch_idx, device):
228
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
229
- # features = batch['audio']['features']
230
  # cwt = batch['audio']['cwt_mag']
231
  x = x.to(device).float()
232
  fft = fft.to(device).float()
@@ -267,7 +267,7 @@ class Trainer(object):
267
 
268
  def eval_batch(self, batch, batch_idx, device):
269
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
270
- # features = batch['audio']['features']
271
 
272
  # features = batch['audio']['features_arr'].to(device).float()
273
  x = x.to(device).float()
@@ -294,6 +294,7 @@ class Trainer(object):
294
  pbar = tqdm(test_dataloader)
295
  for i,batch in enumerate(pbar):
296
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
 
297
  x = x.to(device).float()
298
  fft = fft.to(device).float()
299
  x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
@@ -305,7 +306,7 @@ class Trainer(object):
305
  cls_pred = (probs > 0.5).float()
306
  acc = (cls_pred == y).sum()
307
  predictions.extend(cls_pred.cpu().numpy())
308
- true_labels.extend(y.cpu().numpy())
309
  all_accs += acc
310
  total += len(y)
311
  pbar.set_description("acc: {:.4f}".format(acc))
 
226
 
227
  def train_batch(self, batch, batch_idx, device):
228
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
229
+ # features = torch.stack(batch['audio']['features']).to(device).float()
230
  # cwt = batch['audio']['cwt_mag']
231
  x = x.to(device).float()
232
  fft = fft.to(device).float()
 
267
 
268
  def eval_batch(self, batch, batch_idx, device):
269
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
270
+ # features = torch.stack(batch['audio']['features']).to(device).float()
271
 
272
  # features = batch['audio']['features_arr'].to(device).float()
273
  x = x.to(device).float()
 
294
  pbar = tqdm(test_dataloader)
295
  for i,batch in enumerate(pbar):
296
  x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
297
+ # features = batch['audio']['features']
298
  x = x.to(device).float()
299
  fft = fft.to(device).float()
300
  x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
 
306
  cls_pred = (probs > 0.5).float()
307
  acc = (cls_pred == y).sum()
308
  predictions.extend(cls_pred.cpu().numpy())
309
+ true_labels.extend(y.cpu().numpy().astype(np.int64))
310
  all_accs += acc
311
  total += len(y)
312
  pbar.set_description("acc: {:.4f}".format(acc))