Spaces:
Sleeping
Sleeping
IlayMalinyak
commited on
Commit
·
a79c5f2
1
Parent(s):
82a319f
tested locally
Browse files- .gitignore +1 -1
- req.txt +0 -0
- tasks/audio.py +3 -11
- tasks/run.py +15 -16
- tasks/utils/data.py +1 -0
- tasks/utils/dfs/test.csv +0 -0
- tasks/utils/dfs/train.csv +0 -0
- tasks/utils/dfs/train_val.csv +0 -0
- tasks/utils/dfs/val.csv +0 -0
- tasks/utils/models.py +1 -1
- tasks/utils/train.py +4 -3
.gitignore
CHANGED
@@ -14,6 +14,6 @@ eval-queue-bk/
|
|
14 |
eval-results-bk/
|
15 |
logs/
|
16 |
tasks/model
|
17 |
-
req.
|
18 |
|
19 |
emissions.csv
|
|
|
14 |
eval-results-bk/
|
15 |
logs/
|
16 |
tasks/model
|
17 |
+
req.txtal
|
18 |
|
19 |
emissions.csv
|
req.txt
ADDED
Binary file (20.5 kB). View file
|
|
tasks/audio.py
CHANGED
@@ -128,7 +128,6 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
128 |
accumulation_step=1, max_iter=np.inf,
|
129 |
exp_name=f"frugal_cnnencoder_inference")
|
130 |
predictions, true_labels, acc = trainer.predict(test_dl, device=device)
|
131 |
-
# true_labels = test_dataset["label"]
|
132 |
|
133 |
# Make random predictions (placeholder for actual model inference)
|
134 |
print("accuracy: ", acc)
|
@@ -144,7 +143,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
144 |
|
145 |
# Calculate accuracy
|
146 |
accuracy = accuracy_score(true_labels, predictions)
|
147 |
-
|
148 |
# Prepare results dictionary
|
149 |
results = {
|
150 |
"username": username,
|
@@ -162,14 +161,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
162 |
"test_seed": request.test_seed
|
163 |
}
|
164 |
}
|
|
|
|
|
165 |
|
166 |
return results
|
167 |
-
|
168 |
-
# if __name__ == "__main__":
|
169 |
-
# sample_request = AudioEvaluationRequest(
|
170 |
-
# dataset_name="rfcx/frugalai", # Replace with actual dataset name
|
171 |
-
# test_size=0.2, # Example values
|
172 |
-
# test_seed=42
|
173 |
-
# )
|
174 |
-
# #
|
175 |
-
# asyncio.run(evaluate_audio(sample_request))
|
|
|
128 |
accumulation_step=1, max_iter=np.inf,
|
129 |
exp_name=f"frugal_cnnencoder_inference")
|
130 |
predictions, true_labels, acc = trainer.predict(test_dl, device=device)
|
|
|
131 |
|
132 |
# Make random predictions (placeholder for actual model inference)
|
133 |
print("accuracy: ", acc)
|
|
|
143 |
|
144 |
# Calculate accuracy
|
145 |
accuracy = accuracy_score(true_labels, predictions)
|
146 |
+
|
147 |
# Prepare results dictionary
|
148 |
results = {
|
149 |
"username": username,
|
|
|
161 |
"test_seed": request.test_seed
|
162 |
}
|
163 |
}
|
164 |
+
|
165 |
+
print('results: ', results)
|
166 |
|
167 |
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tasks/run.py
CHANGED
@@ -38,9 +38,7 @@ def create_dataframe(ds, save_name='train'):
|
|
38 |
# Flatten the nested dictionary structure
|
39 |
feature_dict = {'label': label}
|
40 |
for k, v in features.items():
|
41 |
-
|
42 |
-
for sub_k, sub_v in v.items():
|
43 |
-
feature_dict[f"{k}_{sub_k}"] = sub_v[0].item() # Aggregate (e.g., mean)
|
44 |
data.append(feature_dict)
|
45 |
# Convert to DataFrame
|
46 |
df = pd.DataFrame(data)
|
@@ -88,13 +86,14 @@ test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate
|
|
88 |
|
89 |
|
90 |
x,y = create_dataframe(full_ds, save_name='train_val')
|
91 |
-
print(x.shape)
|
92 |
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
|
|
|
93 |
|
94 |
evals_result = {}
|
95 |
num_boost_round = 1000 # Set a large number of boosting rounds
|
96 |
|
97 |
# Watchlist to monitor performance on train and validation data
|
|
|
98 |
|
99 |
dtrain = xgb.DMatrix(x_train, label=y_train)
|
100 |
dval = xgb.DMatrix(x_val, label=y_val)
|
@@ -178,13 +177,13 @@ model = CNNKanFeaturesEncoder(xgb_model, model_args, kan_args.get_dict())
|
|
178 |
# model = KanEncoder(kan_args.get_dict())
|
179 |
model = model.to(local_rank)
|
180 |
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
|
189 |
# model = DDP(model, device_ids=[local_rank], output_device=local_rank)
|
190 |
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
@@ -209,11 +208,11 @@ trainer = Trainer(model=model, optimizer=optimizer,
|
|
209 |
range_update=None,
|
210 |
accumulation_step=1, max_iter=np.inf,
|
211 |
exp_name=f"frugal_kan_features_{exp_num}")
|
212 |
-
fit_res = trainer.fit(num_epochs=100, device=local_rank,
|
213 |
-
|
214 |
-
output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
|
215 |
-
with open(output_filename, "w") as f:
|
216 |
-
|
217 |
preds, tru, acc = trainer.predict(test_dl, local_rank)
|
218 |
print(f"Accuracy: {acc}")
|
219 |
|
|
|
38 |
# Flatten the nested dictionary structure
|
39 |
feature_dict = {'label': label}
|
40 |
for k, v in features.items():
|
41 |
+
feature_dict[f"{k}"] = v[0].item() # Aggregate (e.g., mean)
|
|
|
|
|
42 |
data.append(feature_dict)
|
43 |
# Convert to DataFrame
|
44 |
df = pd.DataFrame(data)
|
|
|
86 |
|
87 |
|
88 |
x,y = create_dataframe(full_ds, save_name='train_val')
|
|
|
89 |
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
|
90 |
+
print(x_train.shape)
|
91 |
|
92 |
evals_result = {}
|
93 |
num_boost_round = 1000 # Set a large number of boosting rounds
|
94 |
|
95 |
# Watchlist to monitor performance on train and validation data
|
96 |
+
print(x_train.head())
|
97 |
|
98 |
dtrain = xgb.DMatrix(x_train, label=y_train)
|
99 |
dval = xgb.DMatrix(x_val, label=y_val)
|
|
|
177 |
# model = KanEncoder(kan_args.get_dict())
|
178 |
model = model.to(local_rank)
|
179 |
|
180 |
+
state_dict = torch.load(data_args.checkpoint_path, map_location=torch.device('cpu'))
|
181 |
+
new_state_dict = OrderedDict()
|
182 |
+
for key, value in state_dict.items():
|
183 |
+
if key.startswith('module.'):
|
184 |
+
key = key[7:]
|
185 |
+
new_state_dict[key] = value
|
186 |
+
missing, unexpected = model.load_state_dict(new_state_dict)
|
187 |
|
188 |
# model = DDP(model, device_ids=[local_rank], output_device=local_rank)
|
189 |
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
|
208 |
range_update=None,
|
209 |
accumulation_step=1, max_iter=np.inf,
|
210 |
exp_name=f"frugal_kan_features_{exp_num}")
|
211 |
+
# fit_res = trainer.fit(num_epochs=100, device=local_rank,
|
212 |
+
# early_stopping=10, only_p=False, best='loss', conf=True)
|
213 |
+
# output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
|
214 |
+
# with open(output_filename, "w") as f:
|
215 |
+
# json.dump(fit_res, f, indent=2)
|
216 |
preds, tru, acc = trainer.predict(test_dl, local_rank)
|
217 |
print(f"Accuracy: {acc}")
|
218 |
|
tasks/utils/data.py
CHANGED
@@ -57,6 +57,7 @@ class FFTDataset(IterableDataset):
|
|
57 |
orig_sample_rate=12000,
|
58 |
target_sample_rate=3000,
|
59 |
features=False):
|
|
|
60 |
self.dataset = original_dataset
|
61 |
self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
|
62 |
self.target_sample_rate = target_sample_rate
|
|
|
57 |
orig_sample_rate=12000,
|
58 |
target_sample_rate=3000,
|
59 |
features=False):
|
60 |
+
super().__init__()
|
61 |
self.dataset = original_dataset
|
62 |
self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
|
63 |
self.target_sample_rate = target_sample_rate
|
tasks/utils/dfs/test.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
tasks/utils/dfs/train.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
tasks/utils/dfs/train_val.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tasks/utils/dfs/val.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
tasks/utils/models.py
CHANGED
@@ -234,7 +234,7 @@ class CNNKanFeaturesEncoder(nn.Module):
|
|
234 |
for batch_idx in range(batch_size):
|
235 |
feature_dict = {}
|
236 |
for k, v in features[batch_idx].items():
|
237 |
-
feature_dict[f"
|
238 |
data.append(feature_dict)
|
239 |
|
240 |
return pd.DataFrame(data)
|
|
|
234 |
for batch_idx in range(batch_size):
|
235 |
feature_dict = {}
|
236 |
for k, v in features[batch_idx].items():
|
237 |
+
feature_dict[f"{k}"] = v[0].item()
|
238 |
data.append(feature_dict)
|
239 |
|
240 |
return pd.DataFrame(data)
|
tasks/utils/train.py
CHANGED
@@ -226,7 +226,7 @@ class Trainer(object):
|
|
226 |
|
227 |
def train_batch(self, batch, batch_idx, device):
|
228 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
229 |
-
# features = batch['audio']['features']
|
230 |
# cwt = batch['audio']['cwt_mag']
|
231 |
x = x.to(device).float()
|
232 |
fft = fft.to(device).float()
|
@@ -267,7 +267,7 @@ class Trainer(object):
|
|
267 |
|
268 |
def eval_batch(self, batch, batch_idx, device):
|
269 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
270 |
-
# features = batch['audio']['features']
|
271 |
|
272 |
# features = batch['audio']['features_arr'].to(device).float()
|
273 |
x = x.to(device).float()
|
@@ -294,6 +294,7 @@ class Trainer(object):
|
|
294 |
pbar = tqdm(test_dataloader)
|
295 |
for i,batch in enumerate(pbar):
|
296 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
|
|
297 |
x = x.to(device).float()
|
298 |
fft = fft.to(device).float()
|
299 |
x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
|
@@ -305,7 +306,7 @@ class Trainer(object):
|
|
305 |
cls_pred = (probs > 0.5).float()
|
306 |
acc = (cls_pred == y).sum()
|
307 |
predictions.extend(cls_pred.cpu().numpy())
|
308 |
-
true_labels.extend(y.cpu().numpy())
|
309 |
all_accs += acc
|
310 |
total += len(y)
|
311 |
pbar.set_description("acc: {:.4f}".format(acc))
|
|
|
226 |
|
227 |
def train_batch(self, batch, batch_idx, device):
|
228 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
229 |
+
# features = torch.stack(batch['audio']['features']).to(device).float()
|
230 |
# cwt = batch['audio']['cwt_mag']
|
231 |
x = x.to(device).float()
|
232 |
fft = fft.to(device).float()
|
|
|
267 |
|
268 |
def eval_batch(self, batch, batch_idx, device):
|
269 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
270 |
+
# features = torch.stack(batch['audio']['features']).to(device).float()
|
271 |
|
272 |
# features = batch['audio']['features_arr'].to(device).float()
|
273 |
x = x.to(device).float()
|
|
|
294 |
pbar = tqdm(test_dataloader)
|
295 |
for i,batch in enumerate(pbar):
|
296 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
297 |
+
# features = batch['audio']['features']
|
298 |
x = x.to(device).float()
|
299 |
fft = fft.to(device).float()
|
300 |
x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
|
|
|
306 |
cls_pred = (probs > 0.5).float()
|
307 |
acc = (cls_pred == y).sum()
|
308 |
predictions.extend(cls_pred.cpu().numpy())
|
309 |
+
true_labels.extend(y.cpu().numpy().astype(np.int64))
|
310 |
all_accs += acc
|
311 |
total += len(y)
|
312 |
pbar.set_description("acc: {:.4f}".format(acc))
|