Andrea Maldonado commited on
Commit
f89abd9
·
1 Parent(s): fcfe5f6

Commits branch 9 from gedi

Browse files
config_files/algorithm/feature_extraction.json CHANGED
@@ -4,9 +4,7 @@
4
  "input_path": "data/test",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
- "real_eventlog_path": "data/BaselineED_feat.csv",
8
- "plot_type": "boxplot",
9
- "font_size": 24,
10
- "boxplot_width":10
11
  }
12
  ]
 
4
  "input_path": "data/test",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
+ "real_eventlog_path": "data/bpic_features.csv",
8
+ "plot_type": "boxplot"
 
 
9
  }
10
  ]
data/BaselineED_bench.csv DELETED
@@ -1,21 +0,0 @@
1
- log,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu
2
- BPIC12,,,,,,,0.999782450408571,0.106249999999999,0.192086381040032,69,41,37,,,,,,
3
- BPIC13cp,0.999955347339294,0.792379879879879,0.8841476594077591,20.0,8.0,6.0,0.990412853232678,0.9470205909661912,0.9682307987170752,15,10,9,0.989977119234364,0.8684298767708941,0.925228660364203,14.0,9.0,8.0
4
- BPIC13inc,0.99997694649763,0.625730547968199,0.7697770045565601,19.0,7.0,5.0,0.957240933170762,0.716391417907929,0.819486058514255,16,10,8,0.99128117000846,0.8850810072924521,0.935175678848088,14.0,8.0,8.0
5
- BPIC13op,0.99993033237412,0.9065645824471852,0.950961282086593,10.0,5.0,3.0,0.8513195049834781,0.9065645824471852,0.8780739493381781,17,10,8,0.990133346397138,0.9620563035495712,0.975892918274616,12.0,7.0,7.0
6
- BPIC14dc_p,,,,,,,0.9998326981312632,1.0,0.9999163420675672,606,366,364,0.92732126656531,1.0,0.962290286162716,547.0,364.0,364.0
7
- BPIC14di_p,,,,,,,0.999900009999,1.0,0.9999500024998752,10,4,2,1.0,1.0,1.0,10.0,2.0,2.0
8
- BPIC15f2,,,,,,,0.9677497565467512,0.010598531351998,0.0209674330962,381,134,115,,,,,,
9
- BPIC16c_p,0.999843623073484,0.75266316984805,0.8588217446396421,270.0,123.0,120.0,0.8853691071783161,0.9174262372560932,0.901112653845042,110,38,34,0.7688674244586541,0.9952442715088632,0.8675311223109071,92.0,50.0,49.0
10
- BPIC16wm_p,0.9999495832135112,1.0,0.999974790971276,4.0,3.0,1.0,0.999900004026629,1.0,0.999949999513391,5,4,2,0.999900004026629,1.0,0.999949999513391,5.0,4.0,2.0
11
- BPIC17,,,,,,,0.930672500139456,0.244851509976953,0.387702105600728,73,48,40,,,,,,
12
- BPIC17ol,0.999984636044501,0.6172893728926371,0.7633584481974761,39.0,18.0,15.0,0.9960693326660932,0.898064579352246,0.944531514451642,14,6,4,0.9107234276582472,1.0,0.9532760361602052,24.0,12.0,9.0
13
- BPIC20a,0.999962791752526,0.188093126224035,0.316628409088329,89.0,38.0,38.0,0.9368177153041932,0.375765199161425,0.5363828699729011,36,21,18,0.8903598625893641,0.867035609327888,0.878542955546676,40.0,19.0,18.0
14
- BPIC20b,0.99998483485473,0.11309976930835,0.203215557399531,193.0,94.0,90.0,0.8859445593469291,0.348704855833889,0.500438693033593,79,46,43,0.6970214666884511,0.9141924615708572,0.7909710302567481,124.0,62.0,55.0
15
- BPIC20c,,,,,,,0.7723547059308711,0.190996223166598,0.306257724619519,122,71,67,,,,,,
16
- BPIC20d,0.999976992746818,0.213233968166344,0.351511928441461,170.0,82.0,79.0,0.867127706306101,0.40344856566562,0.5506815089742241,78,45,41,0.778405152397002,0.8877260430015661,0.8294791282917191,110.0,57.0,55.0
17
- BPIC20e,0.9999625734194992,0.177946979285382,0.302129002909987,101.0,43.0,43.0,0.9184257431784232,0.38688423100734,0.544429207489319,46,29,25,0.8957327113789421,0.808290592116352,0.8497681013791021,48.0,23.0,22.0
18
- HD,0.999957093840268,0.412049000421671,0.583611250200463,67.0,29.0,26.0,0.9784476270770972,0.759636896649265,0.8552690146197981,45,29,27,0.7266871858430181,0.8474784912426241,0.782448466293276,61.0,33.0,26.0
19
- RTFMP,0.9999788763172012,0.589212029307434,0.7415088783783841,43.0,17.0,14.0,0.878359786969879,0.7802754349784181,0.8264174735665141,41,25,20,0.847745391902833,0.991356698750484,0.9139439048749932,47.0,25.0,22.0
20
- RWABOCSL,0.999985675961848,0.18194590014049,0.307874495646305,133.0,62.0,58.0,0.8277414379848941,0.252082243592322,0.386468499184599,77,45,43,0.7998506743994891,0.680938416422287,0.7356200217515501,83.0,43.0,38.0
21
- SEPSIS,0.9999870882139372,0.19811033775102,0.330703956956029,96.0,47.0,44.0,0.9605344308961652,0.443996632051641,0.6072831901523931,43,27,23,0.650269438232782,0.7023809523809521,0.675321384593596,64.0,33.0,29.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/GenBaselineED_bench.csv DELETED
@@ -1,25 +0,0 @@
1
- log,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu
2
- genELBPIC12_04231_02756_02261_07083_0262_06863_03336,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0
3
- genELBPIC13cp_03109_02884_02865_07054_03315_08406_01231,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11,0.347586684851292,0.9355932203389832,0.5068653966783401,47.0,30.0,17.0
4
- genELBPIC13inc_04047_03916_03911_07178_02322_07944_02,0.999958215484349,0.408035700462898,0.5795744516613001,25.0,8.0,12.0,0.9686666193751672,0.691338675620024,0.8068368212742281,31,18,16,0.985072402672326,0.712919969188461,0.8271860328700411,29.0,18.0,18.0
5
- genELBPIC13op_02768_0263_02621_0703_02173_07692_01319,0.9999849524483452,0.468847352024922,0.6383850567451991,18.0,7.0,7.0,0.9999267492461212,0.6979087706782,0.8220556586844351,21,14,10,0.63766810311605,0.82383808095952,0.7188957146869801,20.0,12.0,10.0
6
- genELBPIC14dc_p_04193_03267_03126_04708_00749_07651_00484,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11,0.347586684851292,0.9355932203389832,0.5068653966783401,46.0,29.0,16.0
7
- genELBPIC15f1_06103_03639_02702_06529_00067_01218_09758,0.9999851056034972,0.7639844601581931,0.866197576079873,50.0,34.0,12.0,0.9999702116506732,0.7639844601581931,0.8661919884129461,32,33,4,0.244571491396844,0.970825492684492,0.390713884832271,48.0,28.0,13.0
8
- genELBPIC15f2_06024_03905_03172_0628_00024_01034_09952,0.9999670209931352,0.99625386996904,0.998106992083072,14.0,6.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14,6,4,0.598731029752207,0.771688142034321,0.6742953476423611,12.0,2.0,4.0
9
- genELBPIC15f3_06057_04049_03415_06618_00106_01377_09574,0.999994016598948,0.187744606298656,0.316136024084765,37.0,11.0,14.0,0.8327048201120321,0.49525012025012,0.6211011379360081,43,28,24,,,,,,
10
- genELBPIC15f4_06039_04128_03559_0653_00028_01026_09962,0.999977383275612,0.302621609334747,0.464632272467502,38.0,11.0,17.0,0.946902744936401,0.63868632378007,0.7628380455854801,42,27,20,0.675397479517932,0.902480467048128,0.7725984561543561,31.0,18.0,18.0
11
- genELBPIC15f5_06033_04046_03424_06487_00017_01021_09974,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0
12
- genELBPIC16c_p_06838_04701_04047_08995_01018_04248_04381,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0
13
- genELBPIC16wm_p_00_00_00_00_02958_07141_00029,0.9999835444611692,0.31221384063791,0.475856310241554,38.0,14.0,15.0,0.853166759305167,0.6460181552942631,0.735281165968741,39,25,21,0.7559297979920231,0.842836745090442,0.797021195137192,31.0,18.0,16.0
14
- genELBPIC17_04616_02905_02319_07417_00335_05313_05056,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
15
- genELBPIC17ol_01051_0066_00527_08135_03806_03806_00004,0.9999783279433192,0.340552711229226,0.5080752639070121,42.0,14.0,12.0,0.935457409263567,0.79857953477885,0.8616161786652851,44,28,24,0.8887581366581631,0.8234395340870161,0.854852916190842,41.0,24.0,17.0
16
- genELBPIC19_0328_03203_03202_06455_01998_09464_00476,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0
17
- genELBPIC20a_01648_01044_00854_06965_04398_09501_00094,,,,,,,0.9168707487964872,0.314787191876771,0.468667734435799,46,28,24,,,,,,
18
- genELBPIC20b_03394_01938_01456_07583_02123_08113_01168,0.999969621176065,0.427355623100303,0.598802049151352,21.0,7.0,6.0,0.99991994157317,0.902439024390243,0.9486819182778732,21,14,11,0.6965863019071621,0.8709677419354831,0.7740775519905101,13.0,7.0,5.0
19
- genELBPIC20c_04202_02155_01373_07337_01353_07575_02092,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
20
- genELBPIC20d_0317_02144_01849_07238_02711_08228_00962,0.999982296525308,0.484721663109443,0.652942397986183,44.0,20.0,10.0,0.9999179179170272,0.7032408784863411,0.8257399797307741,35,27,11,0.225420538815396,0.637019197304859,0.333002306545369,55.0,36.0,23.0
21
- genELBPIC20e_0189_01187_00976_07037_04373_09335_00129,0.999970639140175,0.880258899676375,0.9363038249811212,23.0,12.0,5.0,0.9999185975932092,0.8029049230541211,0.890646871938623,26,20,11,0.344766967838924,0.996734180708667,0.5123231114582241,20.0,11.0,6.0
22
- genELHD_02541_01546_01185_07991_05166_09063_00493,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0
23
- genELRTFMP_01119_00684_00526_07694_03756_09931_00015,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0
24
- genELRWABOCSL_02355_01381_01006_06894_04972_0887_00809,0.999955496609388,0.8994933189848441,0.947067676789098,10.0,7.0,2.0,0.999933093365992,1.0,0.999966545563834,12,9,4,0.530065017562215,1.0,0.692866004356787,6.0,5.0,0.0
25
- genELSEPSIS_05223_02995_02194_06958_00333_02743_08057,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/GenBaseline_ED_bench.csv ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log,fitness_heuristics,precision_heuristics,fscore_heuristics,size_heuristics,pnsize_heuristics,cfc_heuristics,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf
2
+ genELBPIC20b_03394_01938_01456_07583_02123_08113_01168,0.6965863019071621,0.8709677419354831,0.7740775519905101,13.0,7.0,5.0,0.999969621176065,0.427355623100303,0.598802049151352,21.0,7.0,6.0,0.99991994157317,0.902439024390243,0.9486819182778732,21,14,11
3
+ genELBPIC15f1_06103_03639_02702_06529_00067_01218_09758,0.244571491396844,0.970825492684492,0.390713884832271,48.0,28.0,13.0,0.9999851056034972,0.7639844601581931,0.866197576079873,50.0,34.0,12.0,0.9999702116506732,0.7639844601581931,0.8661919884129461,32,33,4
4
+ genELBPIC12_04231_02756_02261_07083_0262_06863_03336,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26
5
+ genELRTFMP_01119_00684_00526_07694_03756_09931_00015,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4
6
+ genELBPIC15f5_06033_04046_03424_06487_00017_01021_09974,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26
7
+ genELHD_02541_01546_01185_07991_05166_09063_00493,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9
8
+ genELBPIC13op_02768_0263_02621_0703_02173_07692_01319,0.63766810311605,0.82383808095952,0.7188957146869801,20.0,12.0,10.0,0.9999849524483452,0.468847352024922,0.6383850567451991,18.0,7.0,7.0,0.9999267492461212,0.6979087706782,0.8220556586844351,21,14,10
9
+ genELRWABOCSL_02355_01381_01006_06894_04972_0887_00809,0.530065017562215,1.0,0.692866004356787,6.0,5.0,0.0,0.999955496609388,0.8994933189848441,0.947067676789098,10.0,7.0,2.0,0.999933093365992,1.0,0.999966545563834,12,9,4
10
+ genELBPIC13inc_04047_03916_03911_07178_02322_07944_02,0.985072402672326,0.712919969188461,0.8271860328700411,29.0,18.0,18.0,0.999958215484349,0.408035700462898,0.5795744516613001,25.0,8.0,12.0,0.9686666193751672,0.691338675620024,0.8068368212742281,31,18,16
11
+ genELBPIC15f2_06024_03905_03172_0628_00024_01034_09952,0.598731029752207,0.771688142034321,0.6742953476423611,12.0,2.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14.0,6.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14,6,4
12
+ genELBPIC20e_0189_01187_00976_07037_04373_09335_00129,0.344766967838924,0.996734180708667,0.5123231114582241,20.0,11.0,6.0,0.999970639140175,0.880258899676375,0.9363038249811212,23.0,12.0,5.0,0.9999185975932092,0.8029049230541211,0.890646871938623,26,20,11
13
+ genELBPIC20d_0317_02144_01849_07238_02711_08228_00962,0.225420538815396,0.637019197304859,0.333002306545369,55.0,36.0,23.0,0.999982296525308,0.484721663109443,0.652942397986183,44.0,20.0,10.0,0.9999179179170272,0.7032408784863411,0.8257399797307741,35,27,11
14
+ genELBPIC14dc_p_04193_03267_03126_04708_00749_07651_00484,0.347586684851292,0.9355932203389832,0.5068653966783401,46.0,29.0,16.0,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11
15
+ genELBPIC16c_p_06838_04701_04047_08995_01018_04248_04381,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4
16
+ genELBPIC17ol_01051_0066_00527_08135_03806_03806_00004,0.8887581366581631,0.8234395340870161,0.854852916190842,41.0,24.0,17.0,0.9999783279433192,0.340552711229226,0.5080752639070121,42.0,14.0,12.0,0.935457409263567,0.79857953477885,0.8616161786652851,44,28,24
17
+ genELBPIC19_0328_03203_03202_06455_01998_09464_00476,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9
18
+ genELBPIC13cp_03109_02884_02865_07054_03315_08406_01231,0.347586684851292,0.9355932203389832,0.5068653966783401,47.0,30.0,17.0,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11
19
+ genELBPIC15f4_06039_04128_03559_0653_00028_01026_09962,0.675397479517932,0.902480467048128,0.7725984561543561,31.0,18.0,18.0,0.999977383275612,0.302621609334747,0.464632272467502,38.0,11.0,17.0,0.946902744936401,0.63868632378007,0.7628380455854801,42,27,20
20
+ genELBPIC20c_04202_02155_01373_07337_01353_07575_02092,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
21
+ genELBPIC15f3_06057_04049_03415_06618_00106_01377_09574,,,,,,,0.999994016598948,0.187744606298656,0.316136024084765,37.0,11.0,14.0,0.8327048201120321,0.49525012025012,0.6211011379360081,43,28,24
22
+ genELBPIC16wm_p_00_00_00_00_02958_07141_00029,,,,,,,0.9999835444611692,0.31221384063791,0.475856310241554,38.0,14.0,15.0,0.853166759305167,0.6460181552942631,0.735281165968741,39,25,21
23
+ genELSEPSIS_05223_02995_02194_06958_00333_02743_08057,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
24
+ genELBPIC17_04616_02905_02319_07417_00335_05313_05056,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
25
+ genELBPIC20a_01648_01044_00854_06965_04398_09501_00094,,,,,,,,,,,,,0.9168707487964872,0.314787191876771,0.468667734435799,46,28,24
data/{GenBaselineED_feat.csv → GenBaseline_ED_feat.csv} RENAMED
@@ -1,4 +1,4 @@
1
- ratio_variants_per_number_of_traces,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,log
2
  0.21031587365053903,0.23750499800079902,0.7944822071171531,0.8436095804469511,0.454318645274405,0.207520432496227,0.288223924276644,BPIC20c
3
  0.22916666666666602,0.208333333333333,0.39583333333333304,0.401685982808314,0.245964987620705,0.029935020945679004,0.10766848262252701,BPIC20b
4
  0.493082835183603,0.12929120409906,0.556105892399658,0.80784773712104,0.49684445215246903,0.276433398156238,0.33730492928925604,BPIC15f1
 
1
+ ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,log
2
  0.21031587365053903,0.23750499800079902,0.7944822071171531,0.8436095804469511,0.454318645274405,0.207520432496227,0.288223924276644,BPIC20c
3
  0.22916666666666602,0.208333333333333,0.39583333333333304,0.401685982808314,0.245964987620705,0.029935020945679004,0.10766848262252701,BPIC20b
4
  0.493082835183603,0.12929120409906,0.556105892399658,0.80784773712104,0.49684445215246903,0.276433398156238,0.33730492928925604,BPIC15f1
data/GenED_bench.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/GenED_feat.csv CHANGED
@@ -1,4 +1,4 @@
1
- log,ratio_variants_per_number_of_traces,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  2_rmcv_rt10v_genELtask_40_03_06,0.475,0.3107142857142857,0.5714285714285714,0.711996755762796,0.40848153131541576,0.10988860893433217,0.1999007815532011
3
  2_enself_rutpt_genELtask_25_02_02,0.19246861924686193,0.25784518828451886,0.7975941422594143,0.8336522045635787,0.45176947602735823,0.2018481552079625,0.2842730838492838
4
  2_rt10v_rutpt_genELtask_39_03_05,0.5,0.3,0.3,0.3935954518140152,0.25153078703466797,0.06196334316806251,0.1255248346244991
 
1
+ log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  2_rmcv_rt10v_genELtask_40_03_06,0.475,0.3107142857142857,0.5714285714285714,0.711996755762796,0.40848153131541576,0.10988860893433217,0.1999007815532011
3
  2_enself_rutpt_genELtask_25_02_02,0.19246861924686193,0.25784518828451886,0.7975941422594143,0.8336522045635787,0.45176947602735823,0.2018481552079625,0.2842730838492838
4
  2_rt10v_rutpt_genELtask_39_03_05,0.5,0.3,0.3,0.3935954518140152,0.25153078703466797,0.06196334316806251,0.1255248346244991
data/baseline_ED_bench.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log,fitness_heuristics,precision_heuristics,fscore_heuristics,size_heuristics,pnsize_heuristics,cfc_heuristics,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf
2
+ BPIC16wm_p,0.999900004026629,1.0,0.999949999513391,5.0,4.0,2.0,0.9999495832135112,1.0,0.999974790971276,4.0,3.0,1.0,0.999900004026629,1.0,0.999949999513391,5,4,2
3
+ BPIC13op,0.990133346397138,0.9620563035495712,0.975892918274616,12.0,7.0,7.0,0.99993033237412,0.9065645824471852,0.950961282086593,10.0,5.0,3.0,0.8513195049834781,0.9065645824471852,0.8780739493381781,17,10,8
4
+ BPIC13cp,0.989977119234364,0.8684298767708941,0.925228660364203,14.0,9.0,8.0,0.999955347339294,0.792379879879879,0.8841476594077591,20.0,8.0,6.0,0.990412853232678,0.9470205909661912,0.9682307987170752,15,10,9
5
+ RTFMP,0.847745391902833,0.991356698750484,0.9139439048749932,47.0,25.0,22.0,0.9999788763172012,0.589212029307434,0.7415088783783841,43.0,17.0,14.0,0.878359786969879,0.7802754349784181,0.8264174735665141,41,25,20
6
+ SEPSIS,0.650269438232782,0.7023809523809521,0.675321384593596,64.0,33.0,29.0,0.9999870882139372,0.19811033775102,0.330703956956029,96.0,47.0,44.0,0.9605344308961652,0.443996632051641,0.6072831901523931,43,27,23
7
+ HD,0.7266871858430181,0.8474784912426241,0.782448466293276,61.0,33.0,26.0,0.999957093840268,0.412049000421671,0.583611250200463,67.0,29.0,26.0,0.9784476270770972,0.759636896649265,0.8552690146197981,45,29,27
8
+ BPIC20d,0.778405152397002,0.8877260430015661,0.8294791282917191,110.0,57.0,55.0,0.999976992746818,0.213233968166344,0.351511928441461,170.0,82.0,79.0,0.867127706306101,0.40344856566562,0.5506815089742241,78,45,41
9
+ BPIC13inc,0.99128117000846,0.8850810072924521,0.935175678848088,14.0,8.0,8.0,0.99997694649763,0.625730547968199,0.7697770045565601,19.0,7.0,5.0,0.957240933170762,0.716391417907929,0.819486058514255,16,10,8
10
+ BPIC14di_p,1.0,1.0,1.0,10.0,2.0,2.0,,,,,,,0.999900009999,1.0,0.9999500024998752,10,4,2
11
+ BPIC20e,0.8957327113789421,0.808290592116352,0.8497681013791021,48.0,23.0,22.0,0.9999625734194992,0.177946979285382,0.302129002909987,101.0,43.0,43.0,0.9184257431784232,0.38688423100734,0.544429207489319,46,29,25
12
+ BPIC14dc_p,0.92732126656531,1.0,0.962290286162716,547.0,364.0,364.0,,,,,,,0.9998326981312632,1.0,0.9999163420675672,606,366,364
13
+ BPIC16c_p,0.7688674244586541,0.9952442715088632,0.8675311223109071,92.0,50.0,49.0,0.999843623073484,0.75266316984805,0.8588217446396421,270.0,123.0,120.0,0.8853691071783161,0.9174262372560932,0.901112653845042,110,38,34
14
+ BPIC20a,0.8903598625893641,0.867035609327888,0.878542955546676,40.0,19.0,18.0,0.999962791752526,0.188093126224035,0.316628409088329,89.0,38.0,38.0,0.9368177153041932,0.375765199161425,0.5363828699729011,36,21,18
15
+ BPIC20b,0.6970214666884511,0.9141924615708572,0.7909710302567481,124.0,62.0,55.0,0.99998483485473,0.11309976930835,0.203215557399531,193.0,94.0,90.0,0.8859445593469291,0.348704855833889,0.500438693033593,79,46,43
16
+ RWABOCSL,0.7998506743994891,0.680938416422287,0.7356200217515501,83.0,43.0,38.0,0.999985675961848,0.18194590014049,0.307874495646305,133.0,62.0,58.0,0.8277414379848941,0.252082243592322,0.386468499184599,77,45,43
17
+ BPIC17ol,0.9107234276582472,1.0,0.9532760361602052,24.0,12.0,9.0,0.999984636044501,0.6172893728926371,0.7633584481974761,39.0,18.0,15.0,0.9960693326660932,0.898064579352246,0.944531514451642,14,6,4
18
+ BPIC20c,,,,,,,,,,,,,0.7723547059308711,0.190996223166598,0.306257724619519,122,71,67
data/{BaselineED_feat.csv → baseline_ED_feat.csv} RENAMED
@@ -1,4 +1,4 @@
1
- log,ratio_variants_per_number_of_traces,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  BPIC16wm_p,0.002882363538101243,0.29580255809764006,0.7141055665645829,0.0,0.0,0.0,0.0
3
  BPIC15f5,0.9974048442906575,0.0017301038062283738,0.10207612456747404,0.648702019618582,0.6032598312788823,0.34240966430145864,0.4045799140620184
4
  BPIC15f1,0.97581317764804,0.006672226855713094,0.12176814011676397,0.6528546738228733,0.610294028540377,0.270241403634718,0.3639276823477533
 
1
+ log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  BPIC16wm_p,0.002882363538101243,0.29580255809764006,0.7141055665645829,0.0,0.0,0.0,0.0
3
  BPIC15f5,0.9974048442906575,0.0017301038062283738,0.10207612456747404,0.648702019618582,0.6032598312788823,0.34240966430145864,0.4045799140620184
4
  BPIC15f1,0.97581317764804,0.006672226855713094,0.12176814011676397,0.6528546738228733,0.610294028540377,0.270241403634718,0.3639276823477533
gedi/generator.py CHANGED
@@ -2,7 +2,6 @@ import multiprocessing
2
  import os
3
  import pandas as pd
4
  import random
5
-
6
  from ConfigSpace import Configuration, ConfigurationSpace
7
  from datetime import datetime as dt
8
  from feeed.activities import Activities as activities
@@ -21,8 +20,9 @@ from smac import HyperparameterOptimizationFacade, Scenario
21
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
22
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
23
  from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
24
-
25
-
 
26
 
27
  """
28
  Parameters
@@ -72,13 +72,72 @@ def get_tasks(experiment, output_path="", reference_feature=None):
72
  raise FileNotFoundError(f"{experiment} not found. Please check path in filesystem.")
73
  return tasks, output_path
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  class GenerateEventLogs():
76
  # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
77
  def __init__(self, params):
78
  print("=========================== Generator ==========================")
79
  print(f"INFO: Running with {params}")
80
  start = dt.now()
81
- if params.get(OUTPUT_PATH) == None:
82
  self.output_path = 'data/generated'
83
  else:
84
  self.output_path = params.get(OUTPUT_PATH)
@@ -91,7 +150,7 @@ class GenerateEventLogs():
91
 
92
  self.params = params.get(GENERATOR_PARAMS)
93
  experiment = self.params.get(EXPERIMENT)
94
- if experiment!= None:
95
  tasks, output_path = get_tasks(experiment, self.output_path)
96
  self.output_path = output_path
97
 
@@ -114,6 +173,7 @@ class GenerateEventLogs():
114
  save_path = get_output_key_value_location(self.params[EXPERIMENT],
115
  self.output_path, "genEL")+".xes"
116
  write_xes(temp['log'], save_path)
 
117
  print("SUCCESS: Saved generated event log in", save_path)
118
  print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.log_config)} event logs.")
119
  print(f" Saved generated logs in {self.output_path}")
@@ -140,6 +200,7 @@ class GenerateEventLogs():
140
  self.output_path, identifier)+".xes"
141
 
142
  write_xes(log_config['log'], save_path)
 
143
  print("SUCCESS: Saved generated event log in", save_path)
144
  features_to_dump = log_config['metafeatures']
145
  features_to_dump['log'] = identifier.replace('genEL', '')
@@ -165,9 +226,10 @@ class GenerateEventLogs():
165
  log = play_out(tree, parameters={"num_traces": config["num_traces"]})
166
 
167
  for i, trace in enumerate(log):
168
- trace.attributes['concept:name']=str(i)
169
  for j, event in enumerate(trace):
170
- event['time:timestamp']=dt.now()
 
171
  random.seed(RANDOM_SEED)
172
  metafeatures = self.compute_metafeatures(log)
173
  return {
@@ -203,6 +265,7 @@ class GenerateEventLogs():
203
  trace.attributes['concept:name'] = str(i)
204
  for j, event in enumerate(trace):
205
  event['time:timestamp'] = dt.fromtimestamp(j * 1000)
 
206
 
207
  metafeatures_computation = {}
208
  for ft_name in self.objectives.keys():
@@ -219,7 +282,7 @@ class GenerateEventLogs():
219
  return log_evaluation
220
 
221
  def optimize(self):
222
- if self.params.get(CONFIG_SPACE) == None:
223
  configspace = ConfigurationSpace({
224
  "mode": (5, 40),
225
  "sequence": (0.01, 1),
 
2
  import os
3
  import pandas as pd
4
  import random
 
5
  from ConfigSpace import Configuration, ConfigurationSpace
6
  from datetime import datetime as dt
7
  from feeed.activities import Activities as activities
 
20
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
  from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
23
+ import xml.etree.ElementTree as ET
24
+ import re
25
+ from xml.dom import minidom
26
 
27
  """
28
  Parameters
 
72
  raise FileNotFoundError(f"{experiment} not found. Please check path in filesystem.")
73
  return tasks, output_path
74
 
75
+
76
+ def removeextralines(elem):
77
+ hasWords = re.compile("\\w")
78
+ for element in elem.iter():
79
+ if not re.search(hasWords,str(element.tail)):
80
+ element.tail=""
81
+ if not re.search(hasWords,str(element.text)):
82
+ element.text = ""
83
+
84
+ def add_extension_before_traces(xes_file):
85
+ # Register the namespace
86
+ ET.register_namespace('', "http://www.xes-standard.org/")
87
+
88
+ # Parse the original XML
89
+ tree = ET.parse(xes_file)
90
+ root = tree.getroot()
91
+
92
+ # Add extensions
93
+ extensions = [
94
+ {'name': 'Lifecycle', 'prefix': 'lifecycle', 'uri': 'http://www.xes-standard.org/lifecycle.xesext'},
95
+ {'name': 'Time', 'prefix': 'time', 'uri': 'http://www.xes-standard.org/time.xesext'},
96
+ {'name': 'Concept', 'prefix': 'concept', 'uri': 'http://www.xes-standard.org/concept.xesext'}
97
+ ]
98
+
99
+ for ext in extensions:
100
+ extension_elem = ET.Element('extension', ext)
101
+ root.insert(0, extension_elem)
102
+
103
+ # Add global variables
104
+ globals = [
105
+ {
106
+ 'scope': 'event',
107
+ 'attributes': [
108
+ {'key': 'lifecycle:transition', 'value': 'complete'},
109
+ {'key': 'concept:name', 'value': '__INVALID__'},
110
+ {'key': 'time:timestamp', 'value': '1970-01-01T01:00:00.000+01:00'}
111
+ ]
112
+ },
113
+ {
114
+ 'scope': 'trace',
115
+ 'attributes': [
116
+ {'key': 'concept:name', 'value': '__INVALID__'}
117
+ ]
118
+ }
119
+ ]
120
+
121
+ for global_var in globals:
122
+ global_elem = ET.Element('global', {'scope': global_var['scope']})
123
+ for attr in global_var['attributes']:
124
+ string_elem = ET.SubElement(global_elem, 'string', {'key': attr['key'], 'value': attr['value']})
125
+ root.insert(len(extensions), global_elem)
126
+
127
+
128
+ # Pretty print the Xes
129
+ removeextralines(root)
130
+ xml_str = minidom.parseString(ET.tostring(root)).toprettyxml()
131
+ with open(xes_file, "w") as f:
132
+ f.write(xml_str)
133
+
134
  class GenerateEventLogs():
135
  # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
136
  def __init__(self, params):
137
  print("=========================== Generator ==========================")
138
  print(f"INFO: Running with {params}")
139
  start = dt.now()
140
+ if params.get(OUTPUT_PATH) is None:
141
  self.output_path = 'data/generated'
142
  else:
143
  self.output_path = params.get(OUTPUT_PATH)
 
150
 
151
  self.params = params.get(GENERATOR_PARAMS)
152
  experiment = self.params.get(EXPERIMENT)
153
+ if experiment is not None:
154
  tasks, output_path = get_tasks(experiment, self.output_path)
155
  self.output_path = output_path
156
 
 
173
  save_path = get_output_key_value_location(self.params[EXPERIMENT],
174
  self.output_path, "genEL")+".xes"
175
  write_xes(temp['log'], save_path)
176
+ add_extension_before_traces(save_path)
177
  print("SUCCESS: Saved generated event log in", save_path)
178
  print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.log_config)} event logs.")
179
  print(f" Saved generated logs in {self.output_path}")
 
200
  self.output_path, identifier)+".xes"
201
 
202
  write_xes(log_config['log'], save_path)
203
+ add_extension_before_traces(save_path)
204
  print("SUCCESS: Saved generated event log in", save_path)
205
  features_to_dump = log_config['metafeatures']
206
  features_to_dump['log'] = identifier.replace('genEL', '')
 
226
  log = play_out(tree, parameters={"num_traces": config["num_traces"]})
227
 
228
  for i, trace in enumerate(log):
229
+ trace.attributes['concept:name'] = str(i)
230
  for j, event in enumerate(trace):
231
+ event['time:timestamp'] = dt.now()
232
+ event['lifecycle:transition'] = "complete"
233
  random.seed(RANDOM_SEED)
234
  metafeatures = self.compute_metafeatures(log)
235
  return {
 
265
  trace.attributes['concept:name'] = str(i)
266
  for j, event in enumerate(trace):
267
  event['time:timestamp'] = dt.fromtimestamp(j * 1000)
268
+ event['lifecycle:transition'] = "complete"
269
 
270
  metafeatures_computation = {}
271
  for ft_name in self.objectives.keys():
 
282
  return log_evaluation
283
 
284
  def optimize(self):
285
+ if self.params.get(CONFIG_SPACE) is None:
286
  configspace = ConfigurationSpace({
287
  "mode": (5, 40),
288
  "sequence": (0.01, 1),
gedi/plotter.py CHANGED
@@ -12,9 +12,9 @@ from matplotlib.axes import Axes
12
  from matplotlib.figure import Figure
13
  from matplotlib.lines import Line2D
14
  from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
- from utils.param_keys import INPUT_PATH, OUTPUT_PATH, PIPELINE_STEP
16
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
- from utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
18
  from collections import defaultdict
19
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
@@ -318,19 +318,17 @@ class BenchmarkPlotter:
318
  class FeaturesPlotter:
319
  def __init__(self, features, params=None):
320
  output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
321
- plot_type = f", plot_type='{params[PLOT_TYPE]}'" if params.get(PLOT_TYPE) else ""
322
- font_size = f", font_size='{params[FONT_SIZE]}'" if params.get(FONT_SIZE) else ""
323
- boxplot_w = f", boxplot_w='{params[BOXPLOT_WIDTH]}'" if params.get(BOXPLOT_WIDTH) else ""
324
- LEGEND = ", legend=True" if params.get(PIPELINE_STEP) else ""
325
 
326
  source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
327
  #output_path = os.path.join(output_path, source_name)
328
  if REAL_EVENTLOG_PATH in params:
 
329
  real_eventlogs_path=params[REAL_EVENTLOG_PATH]
330
  real_eventlogs = pd.read_csv(real_eventlogs_path)
331
- fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type}{font_size}{boxplot_w}{LEGEND})")
332
  else:
333
- fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type}{font_size}{boxplot_w})")
334
 
335
  if output_path != None:
336
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)
@@ -338,14 +336,14 @@ class FeaturesPlotter:
338
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
339
 
340
 
341
- def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot", font_size=16, boxplot_w=16):
342
  columns = features.columns[1:]
343
  df1=features.select_dtypes(exclude=['object'])
344
 
345
- fig, axes = plt.subplots(len(df1.columns),1, figsize=(int(boxplot_w),len(df1.columns)))
346
  for i, ax in enumerate(axes):
347
  eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
348
- fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=font_size, y=1)
349
  fig.tight_layout()
350
 
351
 
@@ -353,12 +351,11 @@ class FeaturesPlotter:
353
 
354
  return fig, output_path
355
 
356
- def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot",
357
- font_size=24, legend=False, boxplot_w=16):
358
  LOG_NATURE = "Log Nature"
359
  GENERATED = "Generated"
360
  REAL = "Real"
361
- FONT_SIZE=font_size
362
  alpha = 0.7
363
  color = sns.color_palette("bright")
364
  markers = ['o','X']
@@ -377,7 +374,7 @@ class FeaturesPlotter:
377
  if plot_type == 'violinplot':
378
  inner_param = 'inner = None,'
379
 
380
- fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(int(boxplot_w),len(dmf1.columns)*1.25), dpi=300)
381
  if isinstance(axes, Axes): # not isinstance(axes, list):
382
  axes = [axes]
383
  #nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
@@ -403,14 +400,8 @@ class FeaturesPlotter:
403
  ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
404
  ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE)
405
 
406
-
407
- if legend:
408
- fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE})
409
- plt.legend(fontsize=FONT_SIZE)
410
  #fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1)
411
- plt.yticks(fontsize=FONT_SIZE)
412
- plt.xticks(fontsize=FONT_SIZE)
413
-
414
  fig.tight_layout()
415
 
416
  output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg"
@@ -631,6 +622,7 @@ class AugmentationPlotter(object):
631
 
632
 
633
  class GenerationPlotter(object):
 
634
  def __init__(self, gen_cfg, model_params, output_path, input_path=None):
635
  print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}")
636
  self.output_path = output_path
 
12
  from matplotlib.figure import Figure
13
  from matplotlib.lines import Line2D
14
  from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
+ from utils.param_keys import INPUT_PATH, OUTPUT_PATH
16
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
+ from utils.param_keys.plotter import REAL_EVENTLOG_PATH
18
  from collections import defaultdict
19
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
 
318
  class FeaturesPlotter:
319
  def __init__(self, features, params=None):
320
  output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
321
+ plot_type = f", plot_type='{params[PLOT_TYPE]}'" if PLOT_TYPE else ""
 
 
 
322
 
323
  source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
324
  #output_path = os.path.join(output_path, source_name)
325
  if REAL_EVENTLOG_PATH in params:
326
+ #real_eventlogs_path != None:
327
  real_eventlogs_path=params[REAL_EVENTLOG_PATH]
328
  real_eventlogs = pd.read_csv(real_eventlogs_path)
329
+ fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type})")
330
  else:
331
+ fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
332
 
333
  if output_path != None:
334
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)
 
336
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
337
 
338
 
339
+ def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot"):
340
  columns = features.columns[1:]
341
  df1=features.select_dtypes(exclude=['object'])
342
 
343
+ fig, axes = plt.subplots(len(df1.columns),1, figsize=(17,len(df1.columns)))
344
  for i, ax in enumerate(axes):
345
  eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
346
+ fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=16, y=1)
347
  fig.tight_layout()
348
 
349
 
 
351
 
352
  return fig, output_path
353
 
354
+ def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot"):
 
355
  LOG_NATURE = "Log Nature"
356
  GENERATED = "Generated"
357
  REAL = "Real"
358
+ FONT_SIZE=20
359
  alpha = 0.7
360
  color = sns.color_palette("bright")
361
  markers = ['o','X']
 
374
  if plot_type == 'violinplot':
375
  inner_param = 'inner = None,'
376
 
377
+ fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(12,len(dmf1.columns)*1.25), dpi=100)
378
  if isinstance(axes, Axes): # not isinstance(axes, list):
379
  axes = [axes]
380
  #nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
 
400
  ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
401
  ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE)
402
 
403
+ fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE})
 
 
 
404
  #fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1)
 
 
 
405
  fig.tight_layout()
406
 
407
  output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg"
 
622
 
623
 
624
  class GenerationPlotter(object):
625
+
626
  def __init__(self, gen_cfg, model_params, output_path, input_path=None):
627
  print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}")
628
  self.output_path = output_path
merge_csvs.py DELETED
@@ -1,21 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import sys
4
- import tqdm
5
-
6
- from gedi.utils.io_helpers import sort_files
7
-
8
- FILE_START = sys.argv[1]
9
- ROOT_PATH, FILE_START = os.path.split(FILE_START)
10
- filename_list = os.listdir(str(ROOT_PATH))
11
- filename_list = [filename for filename in filename_list if filename.startswith(FILE_START)]
12
-
13
- OUTPUT_PATH = os.path.join(ROOT_PATH, FILE_START+".csv")
14
-
15
- result = pd.DataFrame(columns=['log'])
16
- for filename in filename_list:
17
- df = pd.read_csv(os.path.join(ROOT_PATH, filename))
18
- result = result.merge(df, on='log', how='outer')
19
- print(df.shape)
20
- result.to_csv(OUTPUT_PATH, index=False)
21
- print(f"Saved dataframe with {result.shape} in {OUTPUT_PATH}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/benchmarking_process_discovery.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/feature_selection.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_benchmark_distributions.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_fig6_benchmark_boxplots.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs4and5_representativeness.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs9and10_consistency.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_representativeness.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
utils/param_keys/plotter.py CHANGED
@@ -2,5 +2,3 @@
2
 
3
  # Analysis Files
4
  REAL_EVENTLOG_PATH = 'real_eventlog_path'
5
- FONT_SIZE = 'font_size'
6
- BOXPLOT_WIDTH = 'boxplot_width'
 
2
 
3
  # Analysis Files
4
  REAL_EVENTLOG_PATH = 'real_eventlog_path'