Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
f89abd9
1
Parent(s):
fcfe5f6
Commits branch 9 from gedi
Browse files- config_files/algorithm/feature_extraction.json +2 -4
- data/BaselineED_bench.csv +0 -21
- data/GenBaselineED_bench.csv +0 -25
- data/GenBaseline_ED_bench.csv +25 -0
- data/{GenBaselineED_feat.csv → GenBaseline_ED_feat.csv} +1 -1
- data/GenED_bench.csv +0 -0
- data/GenED_feat.csv +1 -1
- data/baseline_ED_bench.csv +18 -0
- data/{BaselineED_feat.csv → baseline_ED_feat.csv} +1 -1
- gedi/generator.py +71 -8
- gedi/plotter.py +14 -22
- merge_csvs.py +0 -21
- notebooks/benchmarking_process_discovery.ipynb +0 -0
- notebooks/feature_selection.ipynb +0 -0
- notebooks/gedi_benchmark_distributions.ipynb +0 -0
- notebooks/gedi_fig6_benchmark_boxplots.ipynb +0 -0
- notebooks/gedi_figs4and5_representativeness.ipynb +0 -0
- notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb +0 -0
- notebooks/gedi_figs9and10_consistency.ipynb +0 -0
- notebooks/gedi_representativeness.ipynb +0 -0
- utils/param_keys/plotter.py +0 -2
config_files/algorithm/feature_extraction.json
CHANGED
@@ -4,9 +4,7 @@
|
|
4 |
"input_path": "data/test",
|
5 |
"feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
|
6 |
"output_path": "output/plots",
|
7 |
-
"real_eventlog_path": "data/
|
8 |
-
"plot_type": "boxplot"
|
9 |
-
"font_size": 24,
|
10 |
-
"boxplot_width":10
|
11 |
}
|
12 |
]
|
|
|
4 |
"input_path": "data/test",
|
5 |
"feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
|
6 |
"output_path": "output/plots",
|
7 |
+
"real_eventlog_path": "data/bpic_features.csv",
|
8 |
+
"plot_type": "boxplot"
|
|
|
|
|
9 |
}
|
10 |
]
|
data/BaselineED_bench.csv
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
log,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu
|
2 |
-
BPIC12,,,,,,,0.999782450408571,0.106249999999999,0.192086381040032,69,41,37,,,,,,
|
3 |
-
BPIC13cp,0.999955347339294,0.792379879879879,0.8841476594077591,20.0,8.0,6.0,0.990412853232678,0.9470205909661912,0.9682307987170752,15,10,9,0.989977119234364,0.8684298767708941,0.925228660364203,14.0,9.0,8.0
|
4 |
-
BPIC13inc,0.99997694649763,0.625730547968199,0.7697770045565601,19.0,7.0,5.0,0.957240933170762,0.716391417907929,0.819486058514255,16,10,8,0.99128117000846,0.8850810072924521,0.935175678848088,14.0,8.0,8.0
|
5 |
-
BPIC13op,0.99993033237412,0.9065645824471852,0.950961282086593,10.0,5.0,3.0,0.8513195049834781,0.9065645824471852,0.8780739493381781,17,10,8,0.990133346397138,0.9620563035495712,0.975892918274616,12.0,7.0,7.0
|
6 |
-
BPIC14dc_p,,,,,,,0.9998326981312632,1.0,0.9999163420675672,606,366,364,0.92732126656531,1.0,0.962290286162716,547.0,364.0,364.0
|
7 |
-
BPIC14di_p,,,,,,,0.999900009999,1.0,0.9999500024998752,10,4,2,1.0,1.0,1.0,10.0,2.0,2.0
|
8 |
-
BPIC15f2,,,,,,,0.9677497565467512,0.010598531351998,0.0209674330962,381,134,115,,,,,,
|
9 |
-
BPIC16c_p,0.999843623073484,0.75266316984805,0.8588217446396421,270.0,123.0,120.0,0.8853691071783161,0.9174262372560932,0.901112653845042,110,38,34,0.7688674244586541,0.9952442715088632,0.8675311223109071,92.0,50.0,49.0
|
10 |
-
BPIC16wm_p,0.9999495832135112,1.0,0.999974790971276,4.0,3.0,1.0,0.999900004026629,1.0,0.999949999513391,5,4,2,0.999900004026629,1.0,0.999949999513391,5.0,4.0,2.0
|
11 |
-
BPIC17,,,,,,,0.930672500139456,0.244851509976953,0.387702105600728,73,48,40,,,,,,
|
12 |
-
BPIC17ol,0.999984636044501,0.6172893728926371,0.7633584481974761,39.0,18.0,15.0,0.9960693326660932,0.898064579352246,0.944531514451642,14,6,4,0.9107234276582472,1.0,0.9532760361602052,24.0,12.0,9.0
|
13 |
-
BPIC20a,0.999962791752526,0.188093126224035,0.316628409088329,89.0,38.0,38.0,0.9368177153041932,0.375765199161425,0.5363828699729011,36,21,18,0.8903598625893641,0.867035609327888,0.878542955546676,40.0,19.0,18.0
|
14 |
-
BPIC20b,0.99998483485473,0.11309976930835,0.203215557399531,193.0,94.0,90.0,0.8859445593469291,0.348704855833889,0.500438693033593,79,46,43,0.6970214666884511,0.9141924615708572,0.7909710302567481,124.0,62.0,55.0
|
15 |
-
BPIC20c,,,,,,,0.7723547059308711,0.190996223166598,0.306257724619519,122,71,67,,,,,,
|
16 |
-
BPIC20d,0.999976992746818,0.213233968166344,0.351511928441461,170.0,82.0,79.0,0.867127706306101,0.40344856566562,0.5506815089742241,78,45,41,0.778405152397002,0.8877260430015661,0.8294791282917191,110.0,57.0,55.0
|
17 |
-
BPIC20e,0.9999625734194992,0.177946979285382,0.302129002909987,101.0,43.0,43.0,0.9184257431784232,0.38688423100734,0.544429207489319,46,29,25,0.8957327113789421,0.808290592116352,0.8497681013791021,48.0,23.0,22.0
|
18 |
-
HD,0.999957093840268,0.412049000421671,0.583611250200463,67.0,29.0,26.0,0.9784476270770972,0.759636896649265,0.8552690146197981,45,29,27,0.7266871858430181,0.8474784912426241,0.782448466293276,61.0,33.0,26.0
|
19 |
-
RTFMP,0.9999788763172012,0.589212029307434,0.7415088783783841,43.0,17.0,14.0,0.878359786969879,0.7802754349784181,0.8264174735665141,41,25,20,0.847745391902833,0.991356698750484,0.9139439048749932,47.0,25.0,22.0
|
20 |
-
RWABOCSL,0.999985675961848,0.18194590014049,0.307874495646305,133.0,62.0,58.0,0.8277414379848941,0.252082243592322,0.386468499184599,77,45,43,0.7998506743994891,0.680938416422287,0.7356200217515501,83.0,43.0,38.0
|
21 |
-
SEPSIS,0.9999870882139372,0.19811033775102,0.330703956956029,96.0,47.0,44.0,0.9605344308961652,0.443996632051641,0.6072831901523931,43,27,23,0.650269438232782,0.7023809523809521,0.675321384593596,64.0,33.0,29.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/GenBaselineED_bench.csv
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
log,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu
|
2 |
-
genELBPIC12_04231_02756_02261_07083_0262_06863_03336,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0
|
3 |
-
genELBPIC13cp_03109_02884_02865_07054_03315_08406_01231,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11,0.347586684851292,0.9355932203389832,0.5068653966783401,47.0,30.0,17.0
|
4 |
-
genELBPIC13inc_04047_03916_03911_07178_02322_07944_02,0.999958215484349,0.408035700462898,0.5795744516613001,25.0,8.0,12.0,0.9686666193751672,0.691338675620024,0.8068368212742281,31,18,16,0.985072402672326,0.712919969188461,0.8271860328700411,29.0,18.0,18.0
|
5 |
-
genELBPIC13op_02768_0263_02621_0703_02173_07692_01319,0.9999849524483452,0.468847352024922,0.6383850567451991,18.0,7.0,7.0,0.9999267492461212,0.6979087706782,0.8220556586844351,21,14,10,0.63766810311605,0.82383808095952,0.7188957146869801,20.0,12.0,10.0
|
6 |
-
genELBPIC14dc_p_04193_03267_03126_04708_00749_07651_00484,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11,0.347586684851292,0.9355932203389832,0.5068653966783401,46.0,29.0,16.0
|
7 |
-
genELBPIC15f1_06103_03639_02702_06529_00067_01218_09758,0.9999851056034972,0.7639844601581931,0.866197576079873,50.0,34.0,12.0,0.9999702116506732,0.7639844601581931,0.8661919884129461,32,33,4,0.244571491396844,0.970825492684492,0.390713884832271,48.0,28.0,13.0
|
8 |
-
genELBPIC15f2_06024_03905_03172_0628_00024_01034_09952,0.9999670209931352,0.99625386996904,0.998106992083072,14.0,6.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14,6,4,0.598731029752207,0.771688142034321,0.6742953476423611,12.0,2.0,4.0
|
9 |
-
genELBPIC15f3_06057_04049_03415_06618_00106_01377_09574,0.999994016598948,0.187744606298656,0.316136024084765,37.0,11.0,14.0,0.8327048201120321,0.49525012025012,0.6211011379360081,43,28,24,,,,,,
|
10 |
-
genELBPIC15f4_06039_04128_03559_0653_00028_01026_09962,0.999977383275612,0.302621609334747,0.464632272467502,38.0,11.0,17.0,0.946902744936401,0.63868632378007,0.7628380455854801,42,27,20,0.675397479517932,0.902480467048128,0.7725984561543561,31.0,18.0,18.0
|
11 |
-
genELBPIC15f5_06033_04046_03424_06487_00017_01021_09974,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0
|
12 |
-
genELBPIC16c_p_06838_04701_04047_08995_01018_04248_04381,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0
|
13 |
-
genELBPIC16wm_p_00_00_00_00_02958_07141_00029,0.9999835444611692,0.31221384063791,0.475856310241554,38.0,14.0,15.0,0.853166759305167,0.6460181552942631,0.735281165968741,39,25,21,0.7559297979920231,0.842836745090442,0.797021195137192,31.0,18.0,16.0
|
14 |
-
genELBPIC17_04616_02905_02319_07417_00335_05313_05056,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
|
15 |
-
genELBPIC17ol_01051_0066_00527_08135_03806_03806_00004,0.9999783279433192,0.340552711229226,0.5080752639070121,42.0,14.0,12.0,0.935457409263567,0.79857953477885,0.8616161786652851,44,28,24,0.8887581366581631,0.8234395340870161,0.854852916190842,41.0,24.0,17.0
|
16 |
-
genELBPIC19_0328_03203_03202_06455_01998_09464_00476,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0
|
17 |
-
genELBPIC20a_01648_01044_00854_06965_04398_09501_00094,,,,,,,0.9168707487964872,0.314787191876771,0.468667734435799,46,28,24,,,,,,
|
18 |
-
genELBPIC20b_03394_01938_01456_07583_02123_08113_01168,0.999969621176065,0.427355623100303,0.598802049151352,21.0,7.0,6.0,0.99991994157317,0.902439024390243,0.9486819182778732,21,14,11,0.6965863019071621,0.8709677419354831,0.7740775519905101,13.0,7.0,5.0
|
19 |
-
genELBPIC20c_04202_02155_01373_07337_01353_07575_02092,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
|
20 |
-
genELBPIC20d_0317_02144_01849_07238_02711_08228_00962,0.999982296525308,0.484721663109443,0.652942397986183,44.0,20.0,10.0,0.9999179179170272,0.7032408784863411,0.8257399797307741,35,27,11,0.225420538815396,0.637019197304859,0.333002306545369,55.0,36.0,23.0
|
21 |
-
genELBPIC20e_0189_01187_00976_07037_04373_09335_00129,0.999970639140175,0.880258899676375,0.9363038249811212,23.0,12.0,5.0,0.9999185975932092,0.8029049230541211,0.890646871938623,26,20,11,0.344766967838924,0.996734180708667,0.5123231114582241,20.0,11.0,6.0
|
22 |
-
genELHD_02541_01546_01185_07991_05166_09063_00493,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0
|
23 |
-
genELRTFMP_01119_00684_00526_07694_03756_09931_00015,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0
|
24 |
-
genELRWABOCSL_02355_01381_01006_06894_04972_0887_00809,0.999955496609388,0.8994933189848441,0.947067676789098,10.0,7.0,2.0,0.999933093365992,1.0,0.999966545563834,12,9,4,0.530065017562215,1.0,0.692866004356787,6.0,5.0,0.0
|
25 |
-
genELSEPSIS_05223_02995_02194_06958_00333_02743_08057,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/GenBaseline_ED_bench.csv
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
log,fitness_heuristics,precision_heuristics,fscore_heuristics,size_heuristics,pnsize_heuristics,cfc_heuristics,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf
|
2 |
+
genELBPIC20b_03394_01938_01456_07583_02123_08113_01168,0.6965863019071621,0.8709677419354831,0.7740775519905101,13.0,7.0,5.0,0.999969621176065,0.427355623100303,0.598802049151352,21.0,7.0,6.0,0.99991994157317,0.902439024390243,0.9486819182778732,21,14,11
|
3 |
+
genELBPIC15f1_06103_03639_02702_06529_00067_01218_09758,0.244571491396844,0.970825492684492,0.390713884832271,48.0,28.0,13.0,0.9999851056034972,0.7639844601581931,0.866197576079873,50.0,34.0,12.0,0.9999702116506732,0.7639844601581931,0.8661919884129461,32,33,4
|
4 |
+
genELBPIC12_04231_02756_02261_07083_0262_06863_03336,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26
|
5 |
+
genELRTFMP_01119_00684_00526_07694_03756_09931_00015,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4
|
6 |
+
genELBPIC15f5_06033_04046_03424_06487_00017_01021_09974,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26
|
7 |
+
genELHD_02541_01546_01185_07991_05166_09063_00493,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9
|
8 |
+
genELBPIC13op_02768_0263_02621_0703_02173_07692_01319,0.63766810311605,0.82383808095952,0.7188957146869801,20.0,12.0,10.0,0.9999849524483452,0.468847352024922,0.6383850567451991,18.0,7.0,7.0,0.9999267492461212,0.6979087706782,0.8220556586844351,21,14,10
|
9 |
+
genELRWABOCSL_02355_01381_01006_06894_04972_0887_00809,0.530065017562215,1.0,0.692866004356787,6.0,5.0,0.0,0.999955496609388,0.8994933189848441,0.947067676789098,10.0,7.0,2.0,0.999933093365992,1.0,0.999966545563834,12,9,4
|
10 |
+
genELBPIC13inc_04047_03916_03911_07178_02322_07944_02,0.985072402672326,0.712919969188461,0.8271860328700411,29.0,18.0,18.0,0.999958215484349,0.408035700462898,0.5795744516613001,25.0,8.0,12.0,0.9686666193751672,0.691338675620024,0.8068368212742281,31,18,16
|
11 |
+
genELBPIC15f2_06024_03905_03172_0628_00024_01034_09952,0.598731029752207,0.771688142034321,0.6742953476423611,12.0,2.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14.0,6.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14,6,4
|
12 |
+
genELBPIC20e_0189_01187_00976_07037_04373_09335_00129,0.344766967838924,0.996734180708667,0.5123231114582241,20.0,11.0,6.0,0.999970639140175,0.880258899676375,0.9363038249811212,23.0,12.0,5.0,0.9999185975932092,0.8029049230541211,0.890646871938623,26,20,11
|
13 |
+
genELBPIC20d_0317_02144_01849_07238_02711_08228_00962,0.225420538815396,0.637019197304859,0.333002306545369,55.0,36.0,23.0,0.999982296525308,0.484721663109443,0.652942397986183,44.0,20.0,10.0,0.9999179179170272,0.7032408784863411,0.8257399797307741,35,27,11
|
14 |
+
genELBPIC14dc_p_04193_03267_03126_04708_00749_07651_00484,0.347586684851292,0.9355932203389832,0.5068653966783401,46.0,29.0,16.0,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11
|
15 |
+
genELBPIC16c_p_06838_04701_04047_08995_01018_04248_04381,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4
|
16 |
+
genELBPIC17ol_01051_0066_00527_08135_03806_03806_00004,0.8887581366581631,0.8234395340870161,0.854852916190842,41.0,24.0,17.0,0.9999783279433192,0.340552711229226,0.5080752639070121,42.0,14.0,12.0,0.935457409263567,0.79857953477885,0.8616161786652851,44,28,24
|
17 |
+
genELBPIC19_0328_03203_03202_06455_01998_09464_00476,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9
|
18 |
+
genELBPIC13cp_03109_02884_02865_07054_03315_08406_01231,0.347586684851292,0.9355932203389832,0.5068653966783401,47.0,30.0,17.0,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11
|
19 |
+
genELBPIC15f4_06039_04128_03559_0653_00028_01026_09962,0.675397479517932,0.902480467048128,0.7725984561543561,31.0,18.0,18.0,0.999977383275612,0.302621609334747,0.464632272467502,38.0,11.0,17.0,0.946902744936401,0.63868632378007,0.7628380455854801,42,27,20
|
20 |
+
genELBPIC20c_04202_02155_01373_07337_01353_07575_02092,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
|
21 |
+
genELBPIC15f3_06057_04049_03415_06618_00106_01377_09574,,,,,,,0.999994016598948,0.187744606298656,0.316136024084765,37.0,11.0,14.0,0.8327048201120321,0.49525012025012,0.6211011379360081,43,28,24
|
22 |
+
genELBPIC16wm_p_00_00_00_00_02958_07141_00029,,,,,,,0.9999835444611692,0.31221384063791,0.475856310241554,38.0,14.0,15.0,0.853166759305167,0.6460181552942631,0.735281165968741,39,25,21
|
23 |
+
genELSEPSIS_05223_02995_02194_06958_00333_02743_08057,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
|
24 |
+
genELBPIC17_04616_02905_02319_07417_00335_05313_05056,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
|
25 |
+
genELBPIC20a_01648_01044_00854_06965_04398_09501_00094,,,,,,,,,,,,,0.9168707487964872,0.314787191876771,0.468667734435799,46,28,24
|
data/{GenBaselineED_feat.csv → GenBaseline_ED_feat.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
0.21031587365053903,0.23750499800079902,0.7944822071171531,0.8436095804469511,0.454318645274405,0.207520432496227,0.288223924276644,BPIC20c
|
3 |
0.22916666666666602,0.208333333333333,0.39583333333333304,0.401685982808314,0.245964987620705,0.029935020945679004,0.10766848262252701,BPIC20b
|
4 |
0.493082835183603,0.12929120409906,0.556105892399658,0.80784773712104,0.49684445215246903,0.276433398156238,0.33730492928925604,BPIC15f1
|
|
|
1 |
+
ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,log
|
2 |
0.21031587365053903,0.23750499800079902,0.7944822071171531,0.8436095804469511,0.454318645274405,0.207520432496227,0.288223924276644,BPIC20c
|
3 |
0.22916666666666602,0.208333333333333,0.39583333333333304,0.401685982808314,0.245964987620705,0.029935020945679004,0.10766848262252701,BPIC20b
|
4 |
0.493082835183603,0.12929120409906,0.556105892399658,0.80784773712104,0.49684445215246903,0.276433398156238,0.33730492928925604,BPIC15f1
|
data/GenED_bench.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/GenED_feat.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
log,
|
2 |
2_rmcv_rt10v_genELtask_40_03_06,0.475,0.3107142857142857,0.5714285714285714,0.711996755762796,0.40848153131541576,0.10988860893433217,0.1999007815532011
|
3 |
2_enself_rutpt_genELtask_25_02_02,0.19246861924686193,0.25784518828451886,0.7975941422594143,0.8336522045635787,0.45176947602735823,0.2018481552079625,0.2842730838492838
|
4 |
2_rt10v_rutpt_genELtask_39_03_05,0.5,0.3,0.3,0.3935954518140152,0.25153078703466797,0.06196334316806251,0.1255248346244991
|
|
|
1 |
+
log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
|
2 |
2_rmcv_rt10v_genELtask_40_03_06,0.475,0.3107142857142857,0.5714285714285714,0.711996755762796,0.40848153131541576,0.10988860893433217,0.1999007815532011
|
3 |
2_enself_rutpt_genELtask_25_02_02,0.19246861924686193,0.25784518828451886,0.7975941422594143,0.8336522045635787,0.45176947602735823,0.2018481552079625,0.2842730838492838
|
4 |
2_rt10v_rutpt_genELtask_39_03_05,0.5,0.3,0.3,0.3935954518140152,0.25153078703466797,0.06196334316806251,0.1255248346244991
|
data/baseline_ED_bench.csv
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
log,fitness_heuristics,precision_heuristics,fscore_heuristics,size_heuristics,pnsize_heuristics,cfc_heuristics,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf
|
2 |
+
BPIC16wm_p,0.999900004026629,1.0,0.999949999513391,5.0,4.0,2.0,0.9999495832135112,1.0,0.999974790971276,4.0,3.0,1.0,0.999900004026629,1.0,0.999949999513391,5,4,2
|
3 |
+
BPIC13op,0.990133346397138,0.9620563035495712,0.975892918274616,12.0,7.0,7.0,0.99993033237412,0.9065645824471852,0.950961282086593,10.0,5.0,3.0,0.8513195049834781,0.9065645824471852,0.8780739493381781,17,10,8
|
4 |
+
BPIC13cp,0.989977119234364,0.8684298767708941,0.925228660364203,14.0,9.0,8.0,0.999955347339294,0.792379879879879,0.8841476594077591,20.0,8.0,6.0,0.990412853232678,0.9470205909661912,0.9682307987170752,15,10,9
|
5 |
+
RTFMP,0.847745391902833,0.991356698750484,0.9139439048749932,47.0,25.0,22.0,0.9999788763172012,0.589212029307434,0.7415088783783841,43.0,17.0,14.0,0.878359786969879,0.7802754349784181,0.8264174735665141,41,25,20
|
6 |
+
SEPSIS,0.650269438232782,0.7023809523809521,0.675321384593596,64.0,33.0,29.0,0.9999870882139372,0.19811033775102,0.330703956956029,96.0,47.0,44.0,0.9605344308961652,0.443996632051641,0.6072831901523931,43,27,23
|
7 |
+
HD,0.7266871858430181,0.8474784912426241,0.782448466293276,61.0,33.0,26.0,0.999957093840268,0.412049000421671,0.583611250200463,67.0,29.0,26.0,0.9784476270770972,0.759636896649265,0.8552690146197981,45,29,27
|
8 |
+
BPIC20d,0.778405152397002,0.8877260430015661,0.8294791282917191,110.0,57.0,55.0,0.999976992746818,0.213233968166344,0.351511928441461,170.0,82.0,79.0,0.867127706306101,0.40344856566562,0.5506815089742241,78,45,41
|
9 |
+
BPIC13inc,0.99128117000846,0.8850810072924521,0.935175678848088,14.0,8.0,8.0,0.99997694649763,0.625730547968199,0.7697770045565601,19.0,7.0,5.0,0.957240933170762,0.716391417907929,0.819486058514255,16,10,8
|
10 |
+
BPIC14di_p,1.0,1.0,1.0,10.0,2.0,2.0,,,,,,,0.999900009999,1.0,0.9999500024998752,10,4,2
|
11 |
+
BPIC20e,0.8957327113789421,0.808290592116352,0.8497681013791021,48.0,23.0,22.0,0.9999625734194992,0.177946979285382,0.302129002909987,101.0,43.0,43.0,0.9184257431784232,0.38688423100734,0.544429207489319,46,29,25
|
12 |
+
BPIC14dc_p,0.92732126656531,1.0,0.962290286162716,547.0,364.0,364.0,,,,,,,0.9998326981312632,1.0,0.9999163420675672,606,366,364
|
13 |
+
BPIC16c_p,0.7688674244586541,0.9952442715088632,0.8675311223109071,92.0,50.0,49.0,0.999843623073484,0.75266316984805,0.8588217446396421,270.0,123.0,120.0,0.8853691071783161,0.9174262372560932,0.901112653845042,110,38,34
|
14 |
+
BPIC20a,0.8903598625893641,0.867035609327888,0.878542955546676,40.0,19.0,18.0,0.999962791752526,0.188093126224035,0.316628409088329,89.0,38.0,38.0,0.9368177153041932,0.375765199161425,0.5363828699729011,36,21,18
|
15 |
+
BPIC20b,0.6970214666884511,0.9141924615708572,0.7909710302567481,124.0,62.0,55.0,0.99998483485473,0.11309976930835,0.203215557399531,193.0,94.0,90.0,0.8859445593469291,0.348704855833889,0.500438693033593,79,46,43
|
16 |
+
RWABOCSL,0.7998506743994891,0.680938416422287,0.7356200217515501,83.0,43.0,38.0,0.999985675961848,0.18194590014049,0.307874495646305,133.0,62.0,58.0,0.8277414379848941,0.252082243592322,0.386468499184599,77,45,43
|
17 |
+
BPIC17ol,0.9107234276582472,1.0,0.9532760361602052,24.0,12.0,9.0,0.999984636044501,0.6172893728926371,0.7633584481974761,39.0,18.0,15.0,0.9960693326660932,0.898064579352246,0.944531514451642,14,6,4
|
18 |
+
BPIC20c,,,,,,,,,,,,,0.7723547059308711,0.190996223166598,0.306257724619519,122,71,67
|
data/{BaselineED_feat.csv → baseline_ED_feat.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
log,
|
2 |
BPIC16wm_p,0.002882363538101243,0.29580255809764006,0.7141055665645829,0.0,0.0,0.0,0.0
|
3 |
BPIC15f5,0.9974048442906575,0.0017301038062283738,0.10207612456747404,0.648702019618582,0.6032598312788823,0.34240966430145864,0.4045799140620184
|
4 |
BPIC15f1,0.97581317764804,0.006672226855713094,0.12176814011676397,0.6528546738228733,0.610294028540377,0.270241403634718,0.3639276823477533
|
|
|
1 |
+
log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
|
2 |
BPIC16wm_p,0.002882363538101243,0.29580255809764006,0.7141055665645829,0.0,0.0,0.0,0.0
|
3 |
BPIC15f5,0.9974048442906575,0.0017301038062283738,0.10207612456747404,0.648702019618582,0.6032598312788823,0.34240966430145864,0.4045799140620184
|
4 |
BPIC15f1,0.97581317764804,0.006672226855713094,0.12176814011676397,0.6528546738228733,0.610294028540377,0.270241403634718,0.3639276823477533
|
gedi/generator.py
CHANGED
@@ -2,7 +2,6 @@ import multiprocessing
|
|
2 |
import os
|
3 |
import pandas as pd
|
4 |
import random
|
5 |
-
|
6 |
from ConfigSpace import Configuration, ConfigurationSpace
|
7 |
from datetime import datetime as dt
|
8 |
from feeed.activities import Activities as activities
|
@@ -21,8 +20,9 @@ from smac import HyperparameterOptimizationFacade, Scenario
|
|
21 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
22 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
23 |
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
"""
|
28 |
Parameters
|
@@ -72,13 +72,72 @@ def get_tasks(experiment, output_path="", reference_feature=None):
|
|
72 |
raise FileNotFoundError(f"{experiment} not found. Please check path in filesystem.")
|
73 |
return tasks, output_path
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
class GenerateEventLogs():
|
76 |
# TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
|
77 |
def __init__(self, params):
|
78 |
print("=========================== Generator ==========================")
|
79 |
print(f"INFO: Running with {params}")
|
80 |
start = dt.now()
|
81 |
-
if params.get(OUTPUT_PATH)
|
82 |
self.output_path = 'data/generated'
|
83 |
else:
|
84 |
self.output_path = params.get(OUTPUT_PATH)
|
@@ -91,7 +150,7 @@ class GenerateEventLogs():
|
|
91 |
|
92 |
self.params = params.get(GENERATOR_PARAMS)
|
93 |
experiment = self.params.get(EXPERIMENT)
|
94 |
-
if experiment
|
95 |
tasks, output_path = get_tasks(experiment, self.output_path)
|
96 |
self.output_path = output_path
|
97 |
|
@@ -114,6 +173,7 @@ class GenerateEventLogs():
|
|
114 |
save_path = get_output_key_value_location(self.params[EXPERIMENT],
|
115 |
self.output_path, "genEL")+".xes"
|
116 |
write_xes(temp['log'], save_path)
|
|
|
117 |
print("SUCCESS: Saved generated event log in", save_path)
|
118 |
print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.log_config)} event logs.")
|
119 |
print(f" Saved generated logs in {self.output_path}")
|
@@ -140,6 +200,7 @@ class GenerateEventLogs():
|
|
140 |
self.output_path, identifier)+".xes"
|
141 |
|
142 |
write_xes(log_config['log'], save_path)
|
|
|
143 |
print("SUCCESS: Saved generated event log in", save_path)
|
144 |
features_to_dump = log_config['metafeatures']
|
145 |
features_to_dump['log'] = identifier.replace('genEL', '')
|
@@ -165,9 +226,10 @@ class GenerateEventLogs():
|
|
165 |
log = play_out(tree, parameters={"num_traces": config["num_traces"]})
|
166 |
|
167 |
for i, trace in enumerate(log):
|
168 |
-
trace.attributes['concept:name']=str(i)
|
169 |
for j, event in enumerate(trace):
|
170 |
-
event['time:timestamp']=dt.now()
|
|
|
171 |
random.seed(RANDOM_SEED)
|
172 |
metafeatures = self.compute_metafeatures(log)
|
173 |
return {
|
@@ -203,6 +265,7 @@ class GenerateEventLogs():
|
|
203 |
trace.attributes['concept:name'] = str(i)
|
204 |
for j, event in enumerate(trace):
|
205 |
event['time:timestamp'] = dt.fromtimestamp(j * 1000)
|
|
|
206 |
|
207 |
metafeatures_computation = {}
|
208 |
for ft_name in self.objectives.keys():
|
@@ -219,7 +282,7 @@ class GenerateEventLogs():
|
|
219 |
return log_evaluation
|
220 |
|
221 |
def optimize(self):
|
222 |
-
if self.params.get(CONFIG_SPACE)
|
223 |
configspace = ConfigurationSpace({
|
224 |
"mode": (5, 40),
|
225 |
"sequence": (0.01, 1),
|
|
|
2 |
import os
|
3 |
import pandas as pd
|
4 |
import random
|
|
|
5 |
from ConfigSpace import Configuration, ConfigurationSpace
|
6 |
from datetime import datetime as dt
|
7 |
from feeed.activities import Activities as activities
|
|
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
|
23 |
+
import xml.etree.ElementTree as ET
|
24 |
+
import re
|
25 |
+
from xml.dom import minidom
|
26 |
|
27 |
"""
|
28 |
Parameters
|
|
|
72 |
raise FileNotFoundError(f"{experiment} not found. Please check path in filesystem.")
|
73 |
return tasks, output_path
|
74 |
|
75 |
+
|
76 |
+
def removeextralines(elem):
|
77 |
+
hasWords = re.compile("\\w")
|
78 |
+
for element in elem.iter():
|
79 |
+
if not re.search(hasWords,str(element.tail)):
|
80 |
+
element.tail=""
|
81 |
+
if not re.search(hasWords,str(element.text)):
|
82 |
+
element.text = ""
|
83 |
+
|
84 |
+
def add_extension_before_traces(xes_file):
|
85 |
+
# Register the namespace
|
86 |
+
ET.register_namespace('', "http://www.xes-standard.org/")
|
87 |
+
|
88 |
+
# Parse the original XML
|
89 |
+
tree = ET.parse(xes_file)
|
90 |
+
root = tree.getroot()
|
91 |
+
|
92 |
+
# Add extensions
|
93 |
+
extensions = [
|
94 |
+
{'name': 'Lifecycle', 'prefix': 'lifecycle', 'uri': 'http://www.xes-standard.org/lifecycle.xesext'},
|
95 |
+
{'name': 'Time', 'prefix': 'time', 'uri': 'http://www.xes-standard.org/time.xesext'},
|
96 |
+
{'name': 'Concept', 'prefix': 'concept', 'uri': 'http://www.xes-standard.org/concept.xesext'}
|
97 |
+
]
|
98 |
+
|
99 |
+
for ext in extensions:
|
100 |
+
extension_elem = ET.Element('extension', ext)
|
101 |
+
root.insert(0, extension_elem)
|
102 |
+
|
103 |
+
# Add global variables
|
104 |
+
globals = [
|
105 |
+
{
|
106 |
+
'scope': 'event',
|
107 |
+
'attributes': [
|
108 |
+
{'key': 'lifecycle:transition', 'value': 'complete'},
|
109 |
+
{'key': 'concept:name', 'value': '__INVALID__'},
|
110 |
+
{'key': 'time:timestamp', 'value': '1970-01-01T01:00:00.000+01:00'}
|
111 |
+
]
|
112 |
+
},
|
113 |
+
{
|
114 |
+
'scope': 'trace',
|
115 |
+
'attributes': [
|
116 |
+
{'key': 'concept:name', 'value': '__INVALID__'}
|
117 |
+
]
|
118 |
+
}
|
119 |
+
]
|
120 |
+
|
121 |
+
for global_var in globals:
|
122 |
+
global_elem = ET.Element('global', {'scope': global_var['scope']})
|
123 |
+
for attr in global_var['attributes']:
|
124 |
+
string_elem = ET.SubElement(global_elem, 'string', {'key': attr['key'], 'value': attr['value']})
|
125 |
+
root.insert(len(extensions), global_elem)
|
126 |
+
|
127 |
+
|
128 |
+
# Pretty print the Xes
|
129 |
+
removeextralines(root)
|
130 |
+
xml_str = minidom.parseString(ET.tostring(root)).toprettyxml()
|
131 |
+
with open(xes_file, "w") as f:
|
132 |
+
f.write(xml_str)
|
133 |
+
|
134 |
class GenerateEventLogs():
|
135 |
# TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
|
136 |
def __init__(self, params):
|
137 |
print("=========================== Generator ==========================")
|
138 |
print(f"INFO: Running with {params}")
|
139 |
start = dt.now()
|
140 |
+
if params.get(OUTPUT_PATH) is None:
|
141 |
self.output_path = 'data/generated'
|
142 |
else:
|
143 |
self.output_path = params.get(OUTPUT_PATH)
|
|
|
150 |
|
151 |
self.params = params.get(GENERATOR_PARAMS)
|
152 |
experiment = self.params.get(EXPERIMENT)
|
153 |
+
if experiment is not None:
|
154 |
tasks, output_path = get_tasks(experiment, self.output_path)
|
155 |
self.output_path = output_path
|
156 |
|
|
|
173 |
save_path = get_output_key_value_location(self.params[EXPERIMENT],
|
174 |
self.output_path, "genEL")+".xes"
|
175 |
write_xes(temp['log'], save_path)
|
176 |
+
add_extension_before_traces(save_path)
|
177 |
print("SUCCESS: Saved generated event log in", save_path)
|
178 |
print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.log_config)} event logs.")
|
179 |
print(f" Saved generated logs in {self.output_path}")
|
|
|
200 |
self.output_path, identifier)+".xes"
|
201 |
|
202 |
write_xes(log_config['log'], save_path)
|
203 |
+
add_extension_before_traces(save_path)
|
204 |
print("SUCCESS: Saved generated event log in", save_path)
|
205 |
features_to_dump = log_config['metafeatures']
|
206 |
features_to_dump['log'] = identifier.replace('genEL', '')
|
|
|
226 |
log = play_out(tree, parameters={"num_traces": config["num_traces"]})
|
227 |
|
228 |
for i, trace in enumerate(log):
|
229 |
+
trace.attributes['concept:name'] = str(i)
|
230 |
for j, event in enumerate(trace):
|
231 |
+
event['time:timestamp'] = dt.now()
|
232 |
+
event['lifecycle:transition'] = "complete"
|
233 |
random.seed(RANDOM_SEED)
|
234 |
metafeatures = self.compute_metafeatures(log)
|
235 |
return {
|
|
|
265 |
trace.attributes['concept:name'] = str(i)
|
266 |
for j, event in enumerate(trace):
|
267 |
event['time:timestamp'] = dt.fromtimestamp(j * 1000)
|
268 |
+
event['lifecycle:transition'] = "complete"
|
269 |
|
270 |
metafeatures_computation = {}
|
271 |
for ft_name in self.objectives.keys():
|
|
|
282 |
return log_evaluation
|
283 |
|
284 |
def optimize(self):
|
285 |
+
if self.params.get(CONFIG_SPACE) is None:
|
286 |
configspace = ConfigurationSpace({
|
287 |
"mode": (5, 40),
|
288 |
"sequence": (0.01, 1),
|
gedi/plotter.py
CHANGED
@@ -12,9 +12,9 @@ from matplotlib.axes import Axes
|
|
12 |
from matplotlib.figure import Figure
|
13 |
from matplotlib.lines import Line2D
|
14 |
from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
|
15 |
-
from utils.param_keys import INPUT_PATH, OUTPUT_PATH
|
16 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
|
17 |
-
from utils.param_keys.plotter import REAL_EVENTLOG_PATH
|
18 |
from collections import defaultdict
|
19 |
|
20 |
from sklearn.preprocessing import Normalizer, StandardScaler
|
@@ -318,19 +318,17 @@ class BenchmarkPlotter:
|
|
318 |
class FeaturesPlotter:
|
319 |
def __init__(self, features, params=None):
|
320 |
output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
|
321 |
-
plot_type = f", plot_type='{params[PLOT_TYPE]}'" if
|
322 |
-
font_size = f", font_size='{params[FONT_SIZE]}'" if params.get(FONT_SIZE) else ""
|
323 |
-
boxplot_w = f", boxplot_w='{params[BOXPLOT_WIDTH]}'" if params.get(BOXPLOT_WIDTH) else ""
|
324 |
-
LEGEND = ", legend=True" if params.get(PIPELINE_STEP) else ""
|
325 |
|
326 |
source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
|
327 |
#output_path = os.path.join(output_path, source_name)
|
328 |
if REAL_EVENTLOG_PATH in params:
|
|
|
329 |
real_eventlogs_path=params[REAL_EVENTLOG_PATH]
|
330 |
real_eventlogs = pd.read_csv(real_eventlogs_path)
|
331 |
-
fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type}
|
332 |
else:
|
333 |
-
fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type}
|
334 |
|
335 |
if output_path != None:
|
336 |
os.makedirs(os.path.split(output_path)[0], exist_ok=True)
|
@@ -338,14 +336,14 @@ class FeaturesPlotter:
|
|
338 |
print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
|
339 |
|
340 |
|
341 |
-
def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot"
|
342 |
columns = features.columns[1:]
|
343 |
df1=features.select_dtypes(exclude=['object'])
|
344 |
|
345 |
-
fig, axes = plt.subplots(len(df1.columns),1, figsize=(
|
346 |
for i, ax in enumerate(axes):
|
347 |
eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
|
348 |
-
fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=
|
349 |
fig.tight_layout()
|
350 |
|
351 |
|
@@ -353,12 +351,11 @@ class FeaturesPlotter:
|
|
353 |
|
354 |
return fig, output_path
|
355 |
|
356 |
-
def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot"
|
357 |
-
font_size=24, legend=False, boxplot_w=16):
|
358 |
LOG_NATURE = "Log Nature"
|
359 |
GENERATED = "Generated"
|
360 |
REAL = "Real"
|
361 |
-
FONT_SIZE=
|
362 |
alpha = 0.7
|
363 |
color = sns.color_palette("bright")
|
364 |
markers = ['o','X']
|
@@ -377,7 +374,7 @@ class FeaturesPlotter:
|
|
377 |
if plot_type == 'violinplot':
|
378 |
inner_param = 'inner = None,'
|
379 |
|
380 |
-
fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(
|
381 |
if isinstance(axes, Axes): # not isinstance(axes, list):
|
382 |
axes = [axes]
|
383 |
#nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
|
@@ -403,14 +400,8 @@ class FeaturesPlotter:
|
|
403 |
ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
|
404 |
ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE)
|
405 |
|
406 |
-
|
407 |
-
if legend:
|
408 |
-
fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE})
|
409 |
-
plt.legend(fontsize=FONT_SIZE)
|
410 |
#fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1)
|
411 |
-
plt.yticks(fontsize=FONT_SIZE)
|
412 |
-
plt.xticks(fontsize=FONT_SIZE)
|
413 |
-
|
414 |
fig.tight_layout()
|
415 |
|
416 |
output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg"
|
@@ -631,6 +622,7 @@ class AugmentationPlotter(object):
|
|
631 |
|
632 |
|
633 |
class GenerationPlotter(object):
|
|
|
634 |
def __init__(self, gen_cfg, model_params, output_path, input_path=None):
|
635 |
print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}")
|
636 |
self.output_path = output_path
|
|
|
12 |
from matplotlib.figure import Figure
|
13 |
from matplotlib.lines import Line2D
|
14 |
from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
|
15 |
+
from utils.param_keys import INPUT_PATH, OUTPUT_PATH
|
16 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
|
17 |
+
from utils.param_keys.plotter import REAL_EVENTLOG_PATH
|
18 |
from collections import defaultdict
|
19 |
|
20 |
from sklearn.preprocessing import Normalizer, StandardScaler
|
|
|
318 |
class FeaturesPlotter:
|
319 |
def __init__(self, features, params=None):
|
320 |
output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
|
321 |
+
plot_type = f", plot_type='{params[PLOT_TYPE]}'" if PLOT_TYPE else ""
|
|
|
|
|
|
|
322 |
|
323 |
source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
|
324 |
#output_path = os.path.join(output_path, source_name)
|
325 |
if REAL_EVENTLOG_PATH in params:
|
326 |
+
#real_eventlogs_path != None:
|
327 |
real_eventlogs_path=params[REAL_EVENTLOG_PATH]
|
328 |
real_eventlogs = pd.read_csv(real_eventlogs_path)
|
329 |
+
fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type})")
|
330 |
else:
|
331 |
+
fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
|
332 |
|
333 |
if output_path != None:
|
334 |
os.makedirs(os.path.split(output_path)[0], exist_ok=True)
|
|
|
336 |
print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
|
337 |
|
338 |
|
339 |
+
def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot"):
|
340 |
columns = features.columns[1:]
|
341 |
df1=features.select_dtypes(exclude=['object'])
|
342 |
|
343 |
+
fig, axes = plt.subplots(len(df1.columns),1, figsize=(17,len(df1.columns)))
|
344 |
for i, ax in enumerate(axes):
|
345 |
eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
|
346 |
+
fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=16, y=1)
|
347 |
fig.tight_layout()
|
348 |
|
349 |
|
|
|
351 |
|
352 |
return fig, output_path
|
353 |
|
354 |
+
def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot"):
|
|
|
355 |
LOG_NATURE = "Log Nature"
|
356 |
GENERATED = "Generated"
|
357 |
REAL = "Real"
|
358 |
+
FONT_SIZE=20
|
359 |
alpha = 0.7
|
360 |
color = sns.color_palette("bright")
|
361 |
markers = ['o','X']
|
|
|
374 |
if plot_type == 'violinplot':
|
375 |
inner_param = 'inner = None,'
|
376 |
|
377 |
+
fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(12,len(dmf1.columns)*1.25), dpi=100)
|
378 |
if isinstance(axes, Axes): # not isinstance(axes, list):
|
379 |
axes = [axes]
|
380 |
#nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
|
|
|
400 |
ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
|
401 |
ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE)
|
402 |
|
403 |
+
fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE})
|
|
|
|
|
|
|
404 |
#fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1)
|
|
|
|
|
|
|
405 |
fig.tight_layout()
|
406 |
|
407 |
output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg"
|
|
|
622 |
|
623 |
|
624 |
class GenerationPlotter(object):
|
625 |
+
|
626 |
def __init__(self, gen_cfg, model_params, output_path, input_path=None):
|
627 |
print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}")
|
628 |
self.output_path = output_path
|
merge_csvs.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import pandas as pd
|
3 |
-
import sys
|
4 |
-
import tqdm
|
5 |
-
|
6 |
-
from gedi.utils.io_helpers import sort_files
|
7 |
-
|
8 |
-
FILE_START = sys.argv[1]
|
9 |
-
ROOT_PATH, FILE_START = os.path.split(FILE_START)
|
10 |
-
filename_list = os.listdir(str(ROOT_PATH))
|
11 |
-
filename_list = [filename for filename in filename_list if filename.startswith(FILE_START)]
|
12 |
-
|
13 |
-
OUTPUT_PATH = os.path.join(ROOT_PATH, FILE_START+".csv")
|
14 |
-
|
15 |
-
result = pd.DataFrame(columns=['log'])
|
16 |
-
for filename in filename_list:
|
17 |
-
df = pd.read_csv(os.path.join(ROOT_PATH, filename))
|
18 |
-
result = result.merge(df, on='log', how='outer')
|
19 |
-
print(df.shape)
|
20 |
-
result.to_csv(OUTPUT_PATH, index=False)
|
21 |
-
print(f"Saved dataframe with {result.shape} in {OUTPUT_PATH}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
notebooks/benchmarking_process_discovery.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/feature_selection.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_benchmark_distributions.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_fig6_benchmark_boxplots.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_figs4and5_representativeness.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_figs9and10_consistency.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_representativeness.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
utils/param_keys/plotter.py
CHANGED
@@ -2,5 +2,3 @@
|
|
2 |
|
3 |
# Analysis Files
|
4 |
REAL_EVENTLOG_PATH = 'real_eventlog_path'
|
5 |
-
FONT_SIZE = 'font_size'
|
6 |
-
BOXPLOT_WIDTH = 'boxplot_width'
|
|
|
2 |
|
3 |
# Analysis Files
|
4 |
REAL_EVENTLOG_PATH = 'real_eventlog_path'
|
|
|
|