Spaces:
Runtime error
Runtime error
Dean
commited on
Commit
·
7e3c514
1
Parent(s):
c9eec48
Starting to apply fixes for the project to latest version
Browse files- .gitignore +0 -1
- data.dvc +0 -14
- dvc.lock +29 -12
- dvc.yaml +11 -2
- requirements.txt +0 -1
- src/models/evaluate_model.py +3 -3
- src/models/model.py +5 -7
- src/models/train_model.py +0 -8
.gitignore
CHANGED
@@ -93,6 +93,5 @@ coverage.xml
|
|
93 |
.vscode
|
94 |
/data
|
95 |
|
96 |
-
wandb/
|
97 |
summarization-dagshub/
|
98 |
/models
|
|
|
93 |
.vscode
|
94 |
/data
|
95 |
|
|
|
96 |
summarization-dagshub/
|
97 |
/models
|
data.dvc
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
deps:
|
2 |
-
- path: params.yml
|
3 |
-
md5: d0f3e81bc9191e752a69761045a449d9
|
4 |
-
size: 196
|
5 |
-
- path: src/data/make_dataset.py
|
6 |
-
md5: 9de71de0f8df5d0a7beb235ef7c7777d
|
7 |
-
size: 772
|
8 |
-
cmd: python src/data/make_dataset.py
|
9 |
-
outs:
|
10 |
-
- md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
11 |
-
nfiles: 3
|
12 |
-
path: data/raw
|
13 |
-
size: 1358833013
|
14 |
-
md5: ff502232006c7fbef1015b5aa5cc4bbb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dvc.lock
CHANGED
@@ -10,19 +10,22 @@ stages:
|
|
10 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
11 |
size: 1138285
|
12 |
- path: params.yml
|
13 |
-
md5:
|
14 |
-
size:
|
15 |
- path: src/models/train_model.py
|
16 |
-
md5:
|
17 |
-
size:
|
18 |
outs:
|
19 |
- path: models
|
20 |
-
md5:
|
21 |
-
size:
|
22 |
-
nfiles:
|
23 |
-
- path: reports/training_metrics.
|
24 |
-
md5:
|
25 |
-
size:
|
|
|
|
|
|
|
26 |
eval:
|
27 |
cmd: python src/models/evaluate_model.py
|
28 |
deps:
|
@@ -51,8 +54,8 @@ stages:
|
|
51 |
size: 0
|
52 |
nfiles: 0
|
53 |
- path: params.yml
|
54 |
-
md5:
|
55 |
-
size:
|
56 |
- path: src/data/process_data.py
|
57 |
md5: ba3ba7b7c8a905b736b6b0a28d2334c4
|
58 |
size: 623
|
@@ -66,3 +69,17 @@ stages:
|
|
66 |
- path: data/processed/validation.csv
|
67 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
68 |
size: 1138285
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
11 |
size: 1138285
|
12 |
- path: params.yml
|
13 |
+
md5: 8ac76f9483ae2d78cf89a2e2be4e8446
|
14 |
+
size: 189
|
15 |
- path: src/models/train_model.py
|
16 |
+
md5: d57b5ff84bc29a8ea75e191027d70148
|
17 |
+
size: 988
|
18 |
outs:
|
19 |
- path: models
|
20 |
+
md5: b8dd7baa6b7b85a7b4c2fcfbe3d831bf.dir
|
21 |
+
size: 243476333
|
22 |
+
nfiles: 5
|
23 |
+
- path: reports/training_metrics.csv
|
24 |
+
md5: f0c89a07561ca8aea8ab3f4764b648e7
|
25 |
+
size: 26
|
26 |
+
- path: reports/training_params.yml
|
27 |
+
md5: 8a80554c91d9fca8acb82f023de02f11
|
28 |
+
size: 3
|
29 |
eval:
|
30 |
cmd: python src/models/evaluate_model.py
|
31 |
deps:
|
|
|
54 |
size: 0
|
55 |
nfiles: 0
|
56 |
- path: params.yml
|
57 |
+
md5: 8ac76f9483ae2d78cf89a2e2be4e8446
|
58 |
+
size: 189
|
59 |
- path: src/data/process_data.py
|
60 |
md5: ba3ba7b7c8a905b736b6b0a28d2334c4
|
61 |
size: 623
|
|
|
69 |
- path: data/processed/validation.csv
|
70 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
71 |
size: 1138285
|
72 |
+
download_data:
|
73 |
+
cmd: python src/data/make_dataset.py
|
74 |
+
deps:
|
75 |
+
- path: params.yml
|
76 |
+
md5: 8ac76f9483ae2d78cf89a2e2be4e8446
|
77 |
+
size: 189
|
78 |
+
- path: src/data/make_dataset.py
|
79 |
+
md5: 9de71de0f8df5d0a7beb235ef7c7777d
|
80 |
+
size: 772
|
81 |
+
outs:
|
82 |
+
- path: data/raw
|
83 |
+
md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
84 |
+
size: 1358833013
|
85 |
+
nfiles: 3
|
dvc.yaml
CHANGED
@@ -1,4 +1,11 @@
|
|
1 |
stages:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
process_data:
|
3 |
cmd: python src/data/process_data.py
|
4 |
deps:
|
@@ -25,8 +32,10 @@ stages:
|
|
25 |
outs:
|
26 |
- models:
|
27 |
persist: true
|
|
|
|
|
28 |
metrics:
|
29 |
-
- reports/training_metrics.
|
30 |
cache: false
|
31 |
eval:
|
32 |
cmd: python src/models/evaluate_model.py
|
@@ -36,6 +45,6 @@ stages:
|
|
36 |
- models
|
37 |
- src/models/evaluate_model.py
|
38 |
metrics:
|
39 |
-
- reports/metrics.
|
40 |
cache: false
|
41 |
|
|
|
1 |
stages:
|
2 |
+
download_data:
|
3 |
+
cmd: python src/data/make_dataset.py
|
4 |
+
deps:
|
5 |
+
- params.yml
|
6 |
+
- src/data/make_dataset.py
|
7 |
+
outs:
|
8 |
+
- data/raw
|
9 |
process_data:
|
10 |
cmd: python src/data/process_data.py
|
11 |
deps:
|
|
|
32 |
outs:
|
33 |
- models:
|
34 |
persist: true
|
35 |
+
- reports/training_params.yml:
|
36 |
+
cache: false
|
37 |
metrics:
|
38 |
+
- reports/training_metrics.csv:
|
39 |
cache: false
|
40 |
eval:
|
41 |
cmd: python src/models/evaluate_model.py
|
|
|
45 |
- models
|
46 |
- src/models/evaluate_model.py
|
47 |
metrics:
|
48 |
+
- reports/metrics.csv:
|
49 |
cache: false
|
50 |
|
requirements.txt
CHANGED
@@ -9,7 +9,6 @@ rouge_score
|
|
9 |
pyyaml
|
10 |
dvc
|
11 |
mlflow
|
12 |
-
wandb
|
13 |
|
14 |
# external requirements
|
15 |
click
|
|
|
9 |
pyyaml
|
10 |
dvc
|
11 |
mlflow
|
|
|
12 |
|
13 |
# external requirements
|
14 |
click
|
src/models/evaluate_model.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import
|
2 |
import yaml
|
3 |
|
4 |
from model import Summarization
|
@@ -18,8 +18,8 @@ def evaluate_model():
|
|
18 |
model.load_model(model_type=params['model_type'], model_dir=params['model_dir'])
|
19 |
results = model.evaluate(test_df=test_df, metrics=params['metric'])
|
20 |
|
21 |
-
with
|
22 |
-
|
23 |
|
24 |
|
25 |
if __name__ == '__main__':
|
|
|
1 |
+
from dagshub import dagshub_logger
|
2 |
import yaml
|
3 |
|
4 |
from model import Summarization
|
|
|
18 |
model.load_model(model_type=params['model_type'], model_dir=params['model_dir'])
|
19 |
results = model.evaluate(test_df=test_df, metrics=params['metric'])
|
20 |
|
21 |
+
with dagshub_logger(should_log_hparams=False) as logger:
|
22 |
+
logger.log_metrics(results)
|
23 |
|
24 |
|
25 |
if __name__ == '__main__':
|
src/models/model.py
CHANGED
@@ -7,7 +7,8 @@ from transformers import (
|
|
7 |
)
|
8 |
from torch.utils.data import Dataset, DataLoader
|
9 |
import pytorch_lightning as pl
|
10 |
-
from pytorch_lightning.loggers import MLFlowLogger
|
|
|
11 |
from pytorch_lightning import Trainer
|
12 |
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
13 |
from pytorch_lightning import LightningDataModule
|
@@ -15,8 +16,6 @@ from pytorch_lightning import LightningModule
|
|
15 |
from datasets import load_metric
|
16 |
from tqdm.auto import tqdm
|
17 |
|
18 |
-
# from dagshub.pytorch_lightning import DAGsHubLogger
|
19 |
-
|
20 |
|
21 |
torch.cuda.empty_cache()
|
22 |
pl.seed_everything(42)
|
@@ -330,9 +329,8 @@ class Summarization:
|
|
330 |
MLlogger = MLFlowLogger(experiment_name="Summarization",
|
331 |
tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
|
332 |
|
333 |
-
|
334 |
-
|
335 |
-
# logger = DAGsHubLogger(metrics_path='reports/training_metrics.txt')
|
336 |
|
337 |
early_stop_callback = (
|
338 |
[
|
@@ -351,7 +349,7 @@ class Summarization:
|
|
351 |
gpus = -1 if use_gpu and torch.cuda.is_available() else 0
|
352 |
|
353 |
trainer = Trainer(
|
354 |
-
logger=[
|
355 |
callbacks=early_stop_callback,
|
356 |
max_epochs=max_epochs,
|
357 |
gpus=gpus,
|
|
|
7 |
)
|
8 |
from torch.utils.data import Dataset, DataLoader
|
9 |
import pytorch_lightning as pl
|
10 |
+
from pytorch_lightning.loggers import MLFlowLogger
|
11 |
+
from dagshub.pytorch_lightning import DAGsHubLogger
|
12 |
from pytorch_lightning import Trainer
|
13 |
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
14 |
from pytorch_lightning import LightningDataModule
|
|
|
16 |
from datasets import load_metric
|
17 |
from tqdm.auto import tqdm
|
18 |
|
|
|
|
|
19 |
|
20 |
torch.cuda.empty_cache()
|
21 |
pl.seed_everything(42)
|
|
|
329 |
MLlogger = MLFlowLogger(experiment_name="Summarization",
|
330 |
tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
|
331 |
|
332 |
+
logger = DAGsHubLogger(metrics_path='reports/training_metrics.csv',
|
333 |
+
hparams_path='reports/training_params.yml')
|
|
|
334 |
|
335 |
early_stop_callback = (
|
336 |
[
|
|
|
349 |
gpus = -1 if use_gpu and torch.cuda.is_available() else 0
|
350 |
|
351 |
trainer = Trainer(
|
352 |
+
logger=[MLlogger, logger],
|
353 |
callbacks=early_stop_callback,
|
354 |
max_epochs=max_epochs,
|
355 |
gpus=gpus,
|
src/models/train_model.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
import json
|
2 |
-
|
3 |
import yaml
|
4 |
|
5 |
from model import Summarization
|
@@ -30,12 +28,6 @@ def train_model():
|
|
30 |
|
31 |
model.save_model(model_dir=params['model_dir'])
|
32 |
|
33 |
-
with open('wandb/latest-run/files/wandb-summary.json') as json_file:
|
34 |
-
data = json.load(json_file)
|
35 |
-
|
36 |
-
with open('reports/training_metrics.txt', 'w') as fp:
|
37 |
-
json.dump(data, fp)
|
38 |
-
|
39 |
|
40 |
if __name__ == '__main__':
|
41 |
train_model()
|
|
|
|
|
|
|
1 |
import yaml
|
2 |
|
3 |
from model import Summarization
|
|
|
28 |
|
29 |
model.save_model(model_dir=params['model_dir'])
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
if __name__ == '__main__':
|
33 |
train_model()
|