Spaces:
Runtime error
Runtime error
Dean
commited on
Commit
·
ec2a2c2
1
Parent(s):
d5a6d18
fix visualization stage.
Browse filesHF upload not tested in current version
- Makefile +3 -3
- data_params.yml +1 -1
- dvc.lock +57 -46
- dvc.yaml +0 -3
- reports/evaluation_metrics.csv +36 -4
- reports/training_metrics.csv +8 -10
- src/__init__.py +0 -12
- src/data/__init__.py +0 -0
- src/models/__init__.py +0 -4
- src/models/predict_model.py +1 -1
- src/visualization/__init__.py +0 -0
- src/visualization/visualize.py +1 -4
Makefile
CHANGED
@@ -48,15 +48,15 @@ pull:
|
|
48 |
|
49 |
## run the DVC pipeline - recompute any modified outputs such as processed data or trained models
|
50 |
run:
|
51 |
-
dvc repro
|
52 |
|
53 |
## run the visualization using Streamlit
|
54 |
visualize:
|
55 |
-
dvc repro
|
56 |
|
57 |
## push the trained model to HF model hub
|
58 |
push_to_hf_hub:
|
59 |
-
dvc repro
|
60 |
|
61 |
#################################################################################
|
62 |
# PROJECT RULES #
|
|
|
48 |
|
49 |
## run the DVC pipeline - recompute any modified outputs such as processed data or trained models
|
50 |
run:
|
51 |
+
dvc repro eval
|
52 |
|
53 |
## run the visualization using Streamlit
|
54 |
visualize:
|
55 |
+
dvc repro visualize
|
56 |
|
57 |
## push the trained model to HF model hub
|
58 |
push_to_hf_hub:
|
59 |
+
dvc repro push_to_hf_hub
|
60 |
|
61 |
#################################################################################
|
62 |
# PROJECT RULES #
|
data_params.yml
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
data: cnn_dailymail
|
2 |
-
split: 0.
|
|
|
1 |
data: cnn_dailymail
|
2 |
+
split: 0.001
|
dvc.lock
CHANGED
@@ -4,48 +4,45 @@ stages:
|
|
4 |
cmd: python src/models/train_model.py
|
5 |
deps:
|
6 |
- path: data/processed/train.csv
|
7 |
-
md5:
|
8 |
-
size:
|
9 |
- path: data/processed/validation.csv
|
10 |
-
md5:
|
11 |
-
size:
|
12 |
-
- path:
|
13 |
-
md5:
|
14 |
-
size:
|
15 |
- path: src/models/train_model.py
|
16 |
-
md5:
|
17 |
-
size:
|
18 |
outs:
|
19 |
- path: models
|
20 |
-
md5:
|
21 |
-
size:
|
22 |
-
nfiles:
|
23 |
- path: reports/training_metrics.csv
|
24 |
-
md5:
|
25 |
-
size:
|
26 |
-
- path: reports/training_params.yml
|
27 |
-
md5: 075736962fab2a5e5b3ff189c13e101b
|
28 |
-
size: 16
|
29 |
eval:
|
30 |
cmd: python src/models/evaluate_model.py
|
31 |
deps:
|
32 |
- path: data/processed/test.csv
|
33 |
-
md5:
|
34 |
-
size:
|
|
|
|
|
|
|
35 |
- path: models
|
36 |
-
md5:
|
37 |
-
size:
|
38 |
-
nfiles:
|
39 |
-
- path: params.yml
|
40 |
-
md5: 200ce3c4d9f2e8b9eb040ef93eb22757
|
41 |
-
size: 189
|
42 |
- path: src/models/evaluate_model.py
|
43 |
-
md5:
|
44 |
-
size:
|
45 |
outs:
|
46 |
-
- path: reports/
|
47 |
-
md5:
|
48 |
-
size:
|
49 |
process_data:
|
50 |
cmd: python src/data/process_data.py
|
51 |
deps:
|
@@ -53,33 +50,47 @@ stages:
|
|
53 |
md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
54 |
size: 1358833013
|
55 |
nfiles: 3
|
56 |
-
- path:
|
57 |
-
md5:
|
58 |
-
size:
|
59 |
- path: src/data/process_data.py
|
60 |
-
md5:
|
61 |
-
size:
|
62 |
outs:
|
63 |
- path: data/processed/test.csv
|
64 |
-
md5:
|
65 |
-
size:
|
66 |
- path: data/processed/train.csv
|
67 |
-
md5:
|
68 |
-
size:
|
69 |
- path: data/processed/validation.csv
|
70 |
-
md5:
|
71 |
-
size:
|
72 |
download_data:
|
73 |
cmd: python src/data/make_dataset.py
|
74 |
deps:
|
75 |
-
- path:
|
76 |
-
md5:
|
77 |
-
size:
|
78 |
- path: src/data/make_dataset.py
|
79 |
-
md5:
|
80 |
-
size:
|
81 |
outs:
|
82 |
- path: data/raw
|
83 |
md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
84 |
size: 1358833013
|
85 |
nfiles: 3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
cmd: python src/models/train_model.py
|
5 |
deps:
|
6 |
- path: data/processed/train.csv
|
7 |
+
md5: 5331b9c32b2d097d8d7aca01de5524bc
|
8 |
+
size: 1198262
|
9 |
- path: data/processed/validation.csv
|
10 |
+
md5: 6069153a075b00dfb6d9e0843dd2da89
|
11 |
+
size: 52739
|
12 |
+
- path: model_params.yml
|
13 |
+
md5: 9fcf006ee30f2b751078598a3fba9bb5
|
14 |
+
size: 235
|
15 |
- path: src/models/train_model.py
|
16 |
+
md5: f7d1121426c3d5530c2b9697cb7ac74a
|
17 |
+
size: 951
|
18 |
outs:
|
19 |
- path: models
|
20 |
+
md5: fc37870a93db61b94af9f0847577f09b.dir
|
21 |
+
size: 243476333
|
22 |
+
nfiles: 5
|
23 |
- path: reports/training_metrics.csv
|
24 |
+
md5: 0b6c1518aed802bea976e883caac2a90
|
25 |
+
size: 320
|
|
|
|
|
|
|
26 |
eval:
|
27 |
cmd: python src/models/evaluate_model.py
|
28 |
deps:
|
29 |
- path: data/processed/test.csv
|
30 |
+
md5: 3eec94ac211c76363a3d968663b82d02
|
31 |
+
size: 39574
|
32 |
+
- path: model_params.yml
|
33 |
+
md5: 9fcf006ee30f2b751078598a3fba9bb5
|
34 |
+
size: 235
|
35 |
- path: models
|
36 |
+
md5: fc37870a93db61b94af9f0847577f09b.dir
|
37 |
+
size: 243476333
|
38 |
+
nfiles: 5
|
|
|
|
|
|
|
39 |
- path: src/models/evaluate_model.py
|
40 |
+
md5: 89edb77aaab3055605ae6db2e21eab82
|
41 |
+
size: 705
|
42 |
outs:
|
43 |
+
- path: reports/evaluation_metrics.csv
|
44 |
+
md5: a5fa12e6df10884217614c007d146a26
|
45 |
+
size: 2122
|
46 |
process_data:
|
47 |
cmd: python src/data/process_data.py
|
48 |
deps:
|
|
|
50 |
md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
51 |
size: 1358833013
|
52 |
nfiles: 3
|
53 |
+
- path: data_params.yml
|
54 |
+
md5: a68eabf79c3b3e28afb05baa1944bbc7
|
55 |
+
size: 32
|
56 |
- path: src/data/process_data.py
|
57 |
+
md5: 68db554a69a0c8ce807907afa2be5e9c
|
58 |
+
size: 521
|
59 |
outs:
|
60 |
- path: data/processed/test.csv
|
61 |
+
md5: 3eec94ac211c76363a3d968663b82d02
|
62 |
+
size: 39574
|
63 |
- path: data/processed/train.csv
|
64 |
+
md5: 5331b9c32b2d097d8d7aca01de5524bc
|
65 |
+
size: 1198262
|
66 |
- path: data/processed/validation.csv
|
67 |
+
md5: 6069153a075b00dfb6d9e0843dd2da89
|
68 |
+
size: 52739
|
69 |
download_data:
|
70 |
cmd: python src/data/make_dataset.py
|
71 |
deps:
|
72 |
+
- path: data_params.yml
|
73 |
+
md5: a68eabf79c3b3e28afb05baa1944bbc7
|
74 |
+
size: 32
|
75 |
- path: src/data/make_dataset.py
|
76 |
+
md5: a0667f4ad8c06551609bd0bf950167b7
|
77 |
+
size: 776
|
78 |
outs:
|
79 |
- path: data/raw
|
80 |
md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
81 |
size: 1358833013
|
82 |
nfiles: 3
|
83 |
+
visualize:
|
84 |
+
cmd: streamlit run src/visualization/visualize.py
|
85 |
+
deps:
|
86 |
+
- path: models
|
87 |
+
md5: fc37870a93db61b94af9f0847577f09b.dir
|
88 |
+
size: 243476333
|
89 |
+
nfiles: 5
|
90 |
+
- path: src/visualization/visualize.py
|
91 |
+
md5: a71303fef593a9fd275fc4964623baf8
|
92 |
+
size: 814
|
93 |
+
outs:
|
94 |
+
- path: reports/visualization_metrics.txt
|
95 |
+
md5: fd7b6bb170dbaa9ef1076bc8be7e7593
|
96 |
+
size: 2144
|
dvc.yaml
CHANGED
@@ -50,9 +50,6 @@ stages:
|
|
50 |
deps:
|
51 |
- models
|
52 |
- src/visualization/visualize.py
|
53 |
-
metrics:
|
54 |
-
- reports/visualization_metrics.txt:
|
55 |
-
cache: false
|
56 |
push_to_hf_hub:
|
57 |
cmd: python src/models/hf_upload.py
|
58 |
deps:
|
|
|
50 |
deps:
|
51 |
- models
|
52 |
- src/visualization/visualize.py
|
|
|
|
|
|
|
53 |
push_to_hf_hub:
|
54 |
cmd: python src/models/hf_upload.py
|
55 |
deps:
|
reports/evaluation_metrics.csv
CHANGED
@@ -1,5 +1,37 @@
|
|
1 |
Name,Value,Timestamp,Step
|
2 |
-
"
|
3 |
-
"
|
4 |
-
"
|
5 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Name,Value,Timestamp,Step
|
2 |
+
"Rouge_1 Low Precision",0.23786550570641482,1628194352980,1
|
3 |
+
"Rouge_1 Low recall",0.23355396379384713,1628194352980,1
|
4 |
+
"Rouge_1 Low F1",0.23602599457077003,1628194352980,1
|
5 |
+
"Rouge_1 Mid Precision",0.3569471852499436,1628194352980,1
|
6 |
+
"Rouge_1 Mid recall",0.31915939075819916,1628194352980,1
|
7 |
+
"Rouge_1 Mid F1",0.3317618573023773,1628194352980,1
|
8 |
+
"Rouge_1 High Precision",0.4726861301480842,1628194352980,1
|
9 |
+
"Rouge_1 High recall",0.4019654200001146,1628194352980,1
|
10 |
+
"Rouge_1 High F1",0.4298956952594035,1628194352980,1
|
11 |
+
"Rouge_2 Low Precision",0.06184772400193972,1628194352980,1
|
12 |
+
"Rouge_2 Low recall",0.05626972412346313,1628194352980,1
|
13 |
+
"Rouge_2 Low F1",0.058680298802341754,1628194352980,1
|
14 |
+
"Rouge_2 Mid Precision",0.1367034298993256,1628194352980,1
|
15 |
+
"Rouge_2 Mid recall",0.11953160646342464,1628194352980,1
|
16 |
+
"Rouge_2 Mid F1",0.12485064123505887,1628194352980,1
|
17 |
+
"Rouge_2 High Precision",0.22739029631016827,1628194352980,1
|
18 |
+
"Rouge_2 High recall",0.18851628169809986,1628194352980,1
|
19 |
+
"Rouge_2 High F1",0.20306657551189072,1628194352980,1
|
20 |
+
"Rouge_L Low Precision",0.18248956154159507,1628194352980,1
|
21 |
+
"Rouge_L Low recall",0.18048774357814204,1628194352980,1
|
22 |
+
"Rouge_L Low F1",0.18151380309623336,1628194352980,1
|
23 |
+
"Rouge_L Mid Precision",0.2614974838710314,1628194352980,1
|
24 |
+
"Rouge_L Mid recall",0.24286688705755238,1628194352980,1
|
25 |
+
"Rouge_L Mid F1",0.24674586991996245,1628194352980,1
|
26 |
+
"Rouge_L High Precision",0.3574471638807763,1628194352980,1
|
27 |
+
"Rouge_L High recall",0.30836083808542225,1628194352980,1
|
28 |
+
"Rouge_L High F1",0.32385446385474176,1628194352980,1
|
29 |
+
"rougeLsum Low Precision",0.21468633089019287,1628194352980,1
|
30 |
+
"rougeLsum Low recall",0.2057771050364415,1628194352980,1
|
31 |
+
"rougeLsum Low F1",0.21170611912426093,1628194352980,1
|
32 |
+
"rougeLsum Mid Precision",0.3060593850789648,1628194352980,1
|
33 |
+
"rougeLsum Mid recall",0.27733553744690076,1628194352980,1
|
34 |
+
"rougeLsum Mid F1",0.28530501988436374,1628194352980,1
|
35 |
+
"rougeLsum High Precision",0.4094614601758424,1628194352980,1
|
36 |
+
"rougeLsum High recall",0.34640369291505535,1628194352980,1
|
37 |
+
"rougeLsum High F1",0.36454440079714096,1628194352980,1
|
reports/training_metrics.csv
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
Name,Value,Timestamp,Step
|
2 |
-
"val_loss",
|
3 |
-
"epoch",0,
|
4 |
-
"val_loss",
|
5 |
-
"epoch",1,
|
6 |
-
"val_loss",
|
7 |
-
"epoch",2,
|
8 |
-
"
|
9 |
-
"epoch",3,
|
10 |
-
"val_loss",4.228608131408691,1628178200552,59
|
11 |
-
"epoch",3,1628178200552,59
|
|
|
1 |
Name,Value,Timestamp,Step
|
2 |
+
"val_loss",2.615034580230713,1628194199660,0
|
3 |
+
"epoch",0,1628194199660,0
|
4 |
+
"val_loss",2.6141018867492676,1628194229556,1
|
5 |
+
"epoch",1,1628194229556,1
|
6 |
+
"val_loss",2.6132164001464844,1628194259447,2
|
7 |
+
"epoch",2,1628194259447,2
|
8 |
+
"val_loss",2.612450361251831,1628194289914,3
|
9 |
+
"epoch",3,1628194289914,3
|
|
|
|
src/__init__.py
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
import os # noqa: F401
|
2 |
-
import sys # noqa: F401
|
3 |
-
|
4 |
-
from src.data.make_dataset import make_dataset # noqa: F401
|
5 |
-
from src.data.process_data import process_data # noqa: F401
|
6 |
-
from src.models.evaluate_model import evaluate_model # noqa: F401
|
7 |
-
from src.models.model import Summarization # noqa: F401
|
8 |
-
from src.models.predict_model import predict_model # noqa: F401
|
9 |
-
from src.models.train_model import train_model # noqa: F401
|
10 |
-
from src.visualization.visualize import visualize # noqa: F401
|
11 |
-
|
12 |
-
sys.path.append(os.path.dirname(os.path.realpath(__file__))) # noqa: F401
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/data/__init__.py
DELETED
File without changes
|
src/models/__init__.py
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
from .model import Summarization # noqa: F401
|
2 |
-
from .train_model import train_model # noqa: F401
|
3 |
-
from .predict_model import predict_model # noqa: F401
|
4 |
-
from .evaluate_model import evaluate_model # noqa: F401
|
|
|
|
|
|
|
|
|
|
src/models/predict_model.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import yaml
|
2 |
|
3 |
-
from model import Summarization
|
4 |
import pandas as pd
|
5 |
|
6 |
|
|
|
1 |
import yaml
|
2 |
|
3 |
+
from src.models.model import Summarization
|
4 |
import pandas as pd
|
5 |
|
6 |
|
src/visualization/__init__.py
DELETED
File without changes
|
src/visualization/visualize.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
from
|
4 |
|
5 |
|
6 |
def visualize():
|
@@ -18,9 +18,6 @@ def visualize():
|
|
18 |
sumtext = predict_model(text=text)
|
19 |
st.write("# Generated Summary:")
|
20 |
st.write("{}".format(sumtext))
|
21 |
-
with open("reports/visualization_metrics.txt", "w") as file1:
|
22 |
-
file1.writelines(text)
|
23 |
-
file1.writelines(sumtext)
|
24 |
|
25 |
|
26 |
if __name__ == "__main__":
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
from src.models.predict_model import predict_model
|
4 |
|
5 |
|
6 |
def visualize():
|
|
|
18 |
sumtext = predict_model(text=text)
|
19 |
st.write("# Generated Summary:")
|
20 |
st.write("{}".format(sumtext))
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
if __name__ == "__main__":
|