Andrea MH commited on
Commit
403e9e0
·
unverified ·
2 Parent(s): 761e409 4ef3cc6

Merge pull request #11 from lmu-dbs/produce_experiments

Browse files
.gitignore CHANGED
@@ -4,4 +4,5 @@ output/
4
  .ipynb_checkpoints/
5
  notebooks/.ipynb_checkpoints/*
6
  gedi.egg-info/
7
- build/
 
 
4
  .ipynb_checkpoints/
5
  notebooks/.ipynb_checkpoints/*
6
  gedi.egg-info/
7
+ build/
8
+ *.pyc
config_files/algorithm/feature_extraction.json CHANGED
@@ -4,7 +4,9 @@
4
  "input_path": "data/test",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
- "real_eventlog_path": "data/bpic_features.csv",
8
- "plot_type": "boxplot"
 
 
9
  }
10
  ]
 
4
  "input_path": "data/test",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
+ "real_eventlog_path": "data/BaselineED_feat.csv",
8
+ "plot_type": "boxplot",
9
+ "font_size": 24,
10
+ "boxplot_width":10
11
  }
12
  ]
data/BaselineED_bench.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu
2
+ BPIC12,,,,,,,0.999782450408571,0.106249999999999,0.192086381040032,69,41,37,,,,,,
3
+ BPIC13cp,0.999955347339294,0.792379879879879,0.8841476594077591,20.0,8.0,6.0,0.990412853232678,0.9470205909661912,0.9682307987170752,15,10,9,0.989977119234364,0.8684298767708941,0.925228660364203,14.0,9.0,8.0
4
+ BPIC13inc,0.99997694649763,0.625730547968199,0.7697770045565601,19.0,7.0,5.0,0.957240933170762,0.716391417907929,0.819486058514255,16,10,8,0.99128117000846,0.8850810072924521,0.935175678848088,14.0,8.0,8.0
5
+ BPIC13op,0.99993033237412,0.9065645824471852,0.950961282086593,10.0,5.0,3.0,0.8513195049834781,0.9065645824471852,0.8780739493381781,17,10,8,0.990133346397138,0.9620563035495712,0.975892918274616,12.0,7.0,7.0
6
+ BPIC14dc_p,,,,,,,0.9998326981312632,1.0,0.9999163420675672,606,366,364,0.92732126656531,1.0,0.962290286162716,547.0,364.0,364.0
7
+ BPIC14di_p,,,,,,,0.999900009999,1.0,0.9999500024998752,10,4,2,1.0,1.0,1.0,10.0,2.0,2.0
8
+ BPIC15f2,,,,,,,0.9677497565467512,0.010598531351998,0.0209674330962,381,134,115,,,,,,
9
+ BPIC16c_p,0.999843623073484,0.75266316984805,0.8588217446396421,270.0,123.0,120.0,0.8853691071783161,0.9174262372560932,0.901112653845042,110,38,34,0.7688674244586541,0.9952442715088632,0.8675311223109071,92.0,50.0,49.0
10
+ BPIC16wm_p,0.9999495832135112,1.0,0.999974790971276,4.0,3.0,1.0,0.999900004026629,1.0,0.999949999513391,5,4,2,0.999900004026629,1.0,0.999949999513391,5.0,4.0,2.0
11
+ BPIC17,,,,,,,0.930672500139456,0.244851509976953,0.387702105600728,73,48,40,,,,,,
12
+ BPIC17ol,0.999984636044501,0.6172893728926371,0.7633584481974761,39.0,18.0,15.0,0.9960693326660932,0.898064579352246,0.944531514451642,14,6,4,0.9107234276582472,1.0,0.9532760361602052,24.0,12.0,9.0
13
+ BPIC20a,0.999962791752526,0.188093126224035,0.316628409088329,89.0,38.0,38.0,0.9368177153041932,0.375765199161425,0.5363828699729011,36,21,18,0.8903598625893641,0.867035609327888,0.878542955546676,40.0,19.0,18.0
14
+ BPIC20b,0.99998483485473,0.11309976930835,0.203215557399531,193.0,94.0,90.0,0.8859445593469291,0.348704855833889,0.500438693033593,79,46,43,0.6970214666884511,0.9141924615708572,0.7909710302567481,124.0,62.0,55.0
15
+ BPIC20c,,,,,,,0.7723547059308711,0.190996223166598,0.306257724619519,122,71,67,,,,,,
16
+ BPIC20d,0.999976992746818,0.213233968166344,0.351511928441461,170.0,82.0,79.0,0.867127706306101,0.40344856566562,0.5506815089742241,78,45,41,0.778405152397002,0.8877260430015661,0.8294791282917191,110.0,57.0,55.0
17
+ BPIC20e,0.9999625734194992,0.177946979285382,0.302129002909987,101.0,43.0,43.0,0.9184257431784232,0.38688423100734,0.544429207489319,46,29,25,0.8957327113789421,0.808290592116352,0.8497681013791021,48.0,23.0,22.0
18
+ HD,0.999957093840268,0.412049000421671,0.583611250200463,67.0,29.0,26.0,0.9784476270770972,0.759636896649265,0.8552690146197981,45,29,27,0.7266871858430181,0.8474784912426241,0.782448466293276,61.0,33.0,26.0
19
+ RTFMP,0.9999788763172012,0.589212029307434,0.7415088783783841,43.0,17.0,14.0,0.878359786969879,0.7802754349784181,0.8264174735665141,41,25,20,0.847745391902833,0.991356698750484,0.9139439048749932,47.0,25.0,22.0
20
+ RWABOCSL,0.999985675961848,0.18194590014049,0.307874495646305,133.0,62.0,58.0,0.8277414379848941,0.252082243592322,0.386468499184599,77,45,43,0.7998506743994891,0.680938416422287,0.7356200217515501,83.0,43.0,38.0
21
+ SEPSIS,0.9999870882139372,0.19811033775102,0.330703956956029,96.0,47.0,44.0,0.9605344308961652,0.443996632051641,0.6072831901523931,43,27,23,0.650269438232782,0.7023809523809521,0.675321384593596,64.0,33.0,29.0
data/{baseline_ED_feat.csv → BaselineED_feat.csv} RENAMED
@@ -1,4 +1,4 @@
1
- log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  BPIC16wm_p,0.002882363538101243,0.29580255809764006,0.7141055665645829,0.0,0.0,0.0,0.0
3
  BPIC15f5,0.9974048442906575,0.0017301038062283738,0.10207612456747404,0.648702019618582,0.6032598312788823,0.34240966430145864,0.4045799140620184
4
  BPIC15f1,0.97581317764804,0.006672226855713094,0.12176814011676397,0.6528546738228733,0.610294028540377,0.270241403634718,0.3639276823477533
 
1
+ log,ratio_variants_per_number_of_traces,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  BPIC16wm_p,0.002882363538101243,0.29580255809764006,0.7141055665645829,0.0,0.0,0.0,0.0
3
  BPIC15f5,0.9974048442906575,0.0017301038062283738,0.10207612456747404,0.648702019618582,0.6032598312788823,0.34240966430145864,0.4045799140620184
4
  BPIC15f1,0.97581317764804,0.006672226855713094,0.12176814011676397,0.6528546738228733,0.610294028540377,0.270241403634718,0.3639276823477533
data/GenBaselineED_bench.csv ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu
2
+ genELBPIC12_04231_02756_02261_07083_0262_06863_03336,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0
3
+ genELBPIC13cp_03109_02884_02865_07054_03315_08406_01231,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11,0.347586684851292,0.9355932203389832,0.5068653966783401,47.0,30.0,17.0
4
+ genELBPIC13inc_04047_03916_03911_07178_02322_07944_02,0.999958215484349,0.408035700462898,0.5795744516613001,25.0,8.0,12.0,0.9686666193751672,0.691338675620024,0.8068368212742281,31,18,16,0.985072402672326,0.712919969188461,0.8271860328700411,29.0,18.0,18.0
5
+ genELBPIC13op_02768_0263_02621_0703_02173_07692_01319,0.9999849524483452,0.468847352024922,0.6383850567451991,18.0,7.0,7.0,0.9999267492461212,0.6979087706782,0.8220556586844351,21,14,10,0.63766810311605,0.82383808095952,0.7188957146869801,20.0,12.0,10.0
6
+ genELBPIC14dc_p_04193_03267_03126_04708_00749_07651_00484,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11,0.347586684851292,0.9355932203389832,0.5068653966783401,46.0,29.0,16.0
7
+ genELBPIC15f1_06103_03639_02702_06529_00067_01218_09758,0.9999851056034972,0.7639844601581931,0.866197576079873,50.0,34.0,12.0,0.9999702116506732,0.7639844601581931,0.8661919884129461,32,33,4,0.244571491396844,0.970825492684492,0.390713884832271,48.0,28.0,13.0
8
+ genELBPIC15f2_06024_03905_03172_0628_00024_01034_09952,0.9999670209931352,0.99625386996904,0.998106992083072,14.0,6.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14,6,4,0.598731029752207,0.771688142034321,0.6742953476423611,12.0,2.0,4.0
9
+ genELBPIC15f3_06057_04049_03415_06618_00106_01377_09574,0.999994016598948,0.187744606298656,0.316136024084765,37.0,11.0,14.0,0.8327048201120321,0.49525012025012,0.6211011379360081,43,28,24,,,,,,
10
+ genELBPIC15f4_06039_04128_03559_0653_00028_01026_09962,0.999977383275612,0.302621609334747,0.464632272467502,38.0,11.0,17.0,0.946902744936401,0.63868632378007,0.7628380455854801,42,27,20,0.675397479517932,0.902480467048128,0.7725984561543561,31.0,18.0,18.0
11
+ genELBPIC15f5_06033_04046_03424_06487_00017_01021_09974,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0
12
+ genELBPIC16c_p_06838_04701_04047_08995_01018_04248_04381,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0
13
+ genELBPIC16wm_p_00_00_00_00_02958_07141_00029,0.9999835444611692,0.31221384063791,0.475856310241554,38.0,14.0,15.0,0.853166759305167,0.6460181552942631,0.735281165968741,39,25,21,0.7559297979920231,0.842836745090442,0.797021195137192,31.0,18.0,16.0
14
+ genELBPIC17_04616_02905_02319_07417_00335_05313_05056,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
15
+ genELBPIC17ol_01051_0066_00527_08135_03806_03806_00004,0.9999783279433192,0.340552711229226,0.5080752639070121,42.0,14.0,12.0,0.935457409263567,0.79857953477885,0.8616161786652851,44,28,24,0.8887581366581631,0.8234395340870161,0.854852916190842,41.0,24.0,17.0
16
+ genELBPIC19_0328_03203_03202_06455_01998_09464_00476,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0
17
+ genELBPIC20a_01648_01044_00854_06965_04398_09501_00094,,,,,,,0.9168707487964872,0.314787191876771,0.468667734435799,46,28,24,,,,,,
18
+ genELBPIC20b_03394_01938_01456_07583_02123_08113_01168,0.999969621176065,0.427355623100303,0.598802049151352,21.0,7.0,6.0,0.99991994157317,0.902439024390243,0.9486819182778732,21,14,11,0.6965863019071621,0.8709677419354831,0.7740775519905101,13.0,7.0,5.0
19
+ genELBPIC20c_04202_02155_01373_07337_01353_07575_02092,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
20
+ genELBPIC20d_0317_02144_01849_07238_02711_08228_00962,0.999982296525308,0.484721663109443,0.652942397986183,44.0,20.0,10.0,0.9999179179170272,0.7032408784863411,0.8257399797307741,35,27,11,0.225420538815396,0.637019197304859,0.333002306545369,55.0,36.0,23.0
21
+ genELBPIC20e_0189_01187_00976_07037_04373_09335_00129,0.999970639140175,0.880258899676375,0.9363038249811212,23.0,12.0,5.0,0.9999185975932092,0.8029049230541211,0.890646871938623,26,20,11,0.344766967838924,0.996734180708667,0.5123231114582241,20.0,11.0,6.0
22
+ genELHD_02541_01546_01185_07991_05166_09063_00493,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0
23
+ genELRTFMP_01119_00684_00526_07694_03756_09931_00015,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0
24
+ genELRWABOCSL_02355_01381_01006_06894_04972_0887_00809,0.999955496609388,0.8994933189848441,0.947067676789098,10.0,7.0,2.0,0.999933093365992,1.0,0.999966545563834,12,9,4,0.530065017562215,1.0,0.692866004356787,6.0,5.0,0.0
25
+ genELSEPSIS_05223_02995_02194_06958_00333_02743_08057,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10,,,,,,
data/{GenBaseline_ED_feat.csv → GenBaselineED_feat.csv} RENAMED
@@ -1,4 +1,4 @@
1
- ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,log
2
  0.21031587365053903,0.23750499800079902,0.7944822071171531,0.8436095804469511,0.454318645274405,0.207520432496227,0.288223924276644,BPIC20c
3
  0.22916666666666602,0.208333333333333,0.39583333333333304,0.401685982808314,0.245964987620705,0.029935020945679004,0.10766848262252701,BPIC20b
4
  0.493082835183603,0.12929120409906,0.556105892399658,0.80784773712104,0.49684445215246903,0.276433398156238,0.33730492928925604,BPIC15f1
 
1
+ ratio_variants_per_number_of_traces,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,log
2
  0.21031587365053903,0.23750499800079902,0.7944822071171531,0.8436095804469511,0.454318645274405,0.207520432496227,0.288223924276644,BPIC20c
3
  0.22916666666666602,0.208333333333333,0.39583333333333304,0.401685982808314,0.245964987620705,0.029935020945679004,0.10766848262252701,BPIC20b
4
  0.493082835183603,0.12929120409906,0.556105892399658,0.80784773712104,0.49684445215246903,0.276433398156238,0.33730492928925604,BPIC15f1
data/GenBaseline_ED_bench.csv DELETED
@@ -1,25 +0,0 @@
1
- log,fitness_heuristics,precision_heuristics,fscore_heuristics,size_heuristics,pnsize_heuristics,cfc_heuristics,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf
2
- genELBPIC20b_03394_01938_01456_07583_02123_08113_01168,0.6965863019071621,0.8709677419354831,0.7740775519905101,13.0,7.0,5.0,0.999969621176065,0.427355623100303,0.598802049151352,21.0,7.0,6.0,0.99991994157317,0.902439024390243,0.9486819182778732,21,14,11
3
- genELBPIC15f1_06103_03639_02702_06529_00067_01218_09758,0.244571491396844,0.970825492684492,0.390713884832271,48.0,28.0,13.0,0.9999851056034972,0.7639844601581931,0.866197576079873,50.0,34.0,12.0,0.9999702116506732,0.7639844601581931,0.8661919884129461,32,33,4
4
- genELBPIC12_04231_02756_02261_07083_0262_06863_03336,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26
5
- genELRTFMP_01119_00684_00526_07694_03756_09931_00015,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4
6
- genELBPIC15f5_06033_04046_03424_06487_00017_01021_09974,0.938048056994855,0.492925487219797,0.6462562461747551,49.0,30.0,30.0,0.999983354100017,0.128493715326455,0.227725631143263,54.0,10.0,28.0,0.9099497610012792,0.397165646466794,0.552974562177985,48,30,26
7
- genELHD_02541_01546_01185_07991_05166_09063_00493,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9
8
- genELBPIC13op_02768_0263_02621_0703_02173_07692_01319,0.63766810311605,0.82383808095952,0.7188957146869801,20.0,12.0,10.0,0.9999849524483452,0.468847352024922,0.6383850567451991,18.0,7.0,7.0,0.9999267492461212,0.6979087706782,0.8220556586844351,21,14,10
9
- genELRWABOCSL_02355_01381_01006_06894_04972_0887_00809,0.530065017562215,1.0,0.692866004356787,6.0,5.0,0.0,0.999955496609388,0.8994933189848441,0.947067676789098,10.0,7.0,2.0,0.999933093365992,1.0,0.999966545563834,12,9,4
10
- genELBPIC13inc_04047_03916_03911_07178_02322_07944_02,0.985072402672326,0.712919969188461,0.8271860328700411,29.0,18.0,18.0,0.999958215484349,0.408035700462898,0.5795744516613001,25.0,8.0,12.0,0.9686666193751672,0.691338675620024,0.8068368212742281,31,18,16
11
- genELBPIC15f2_06024_03905_03172_0628_00024_01034_09952,0.598731029752207,0.771688142034321,0.6742953476423611,12.0,2.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14.0,6.0,4.0,0.9999670209931352,0.99625386996904,0.998106992083072,14,6,4
12
- genELBPIC20e_0189_01187_00976_07037_04373_09335_00129,0.344766967838924,0.996734180708667,0.5123231114582241,20.0,11.0,6.0,0.999970639140175,0.880258899676375,0.9363038249811212,23.0,12.0,5.0,0.9999185975932092,0.8029049230541211,0.890646871938623,26,20,11
13
- genELBPIC20d_0317_02144_01849_07238_02711_08228_00962,0.225420538815396,0.637019197304859,0.333002306545369,55.0,36.0,23.0,0.999982296525308,0.484721663109443,0.652942397986183,44.0,20.0,10.0,0.9999179179170272,0.7032408784863411,0.8257399797307741,35,27,11
14
- genELBPIC14dc_p_04193_03267_03126_04708_00749_07651_00484,0.347586684851292,0.9355932203389832,0.5068653966783401,46.0,29.0,16.0,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11
15
- genELBPIC16c_p_06838_04701_04047_08995_01018_04248_04381,0.818032265179306,0.74400127547631,0.779262458626504,17.0,10.0,10.0,0.999977812170716,0.969934322549258,0.9847269679872972,13.0,8.0,3.0,0.951125602902324,0.969934322549258,0.960437886489628,13,8,4
16
- genELBPIC17ol_01051_0066_00527_08135_03806_03806_00004,0.8887581366581631,0.8234395340870161,0.854852916190842,41.0,24.0,17.0,0.9999783279433192,0.340552711229226,0.5080752639070121,42.0,14.0,12.0,0.935457409263567,0.79857953477885,0.8616161786652851,44,28,24
17
- genELBPIC19_0328_03203_03202_06455_01998_09464_00476,0.5636771288053071,0.9640768588137012,0.711407831532712,10.0,6.0,4.0,0.999959264300715,0.499102378696454,0.6658592723568011,17.0,6.0,5.0,0.999890512287676,0.976915568570034,0.988269530105323,17,12,9
18
- genELBPIC13cp_03109_02884_02865_07054_03315_08406_01231,0.347586684851292,0.9355932203389832,0.5068653966783401,47.0,30.0,17.0,0.9999801548798732,0.395704287667927,0.567028509892201,47.0,22.0,12.0,0.980044672023712,0.631207493904786,0.7678643410683581,37,28,11
19
- genELBPIC15f4_06039_04128_03559_0653_00028_01026_09962,0.675397479517932,0.902480467048128,0.7725984561543561,31.0,18.0,18.0,0.999977383275612,0.302621609334747,0.464632272467502,38.0,11.0,17.0,0.946902744936401,0.63868632378007,0.7628380455854801,42,27,20
20
- genELBPIC20c_04202_02155_01373_07337_01353_07575_02092,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
21
- genELBPIC15f3_06057_04049_03415_06618_00106_01377_09574,,,,,,,0.999994016598948,0.187744606298656,0.316136024084765,37.0,11.0,14.0,0.8327048201120321,0.49525012025012,0.6211011379360081,43,28,24
22
- genELBPIC16wm_p_00_00_00_00_02958_07141_00029,,,,,,,0.9999835444611692,0.31221384063791,0.475856310241554,38.0,14.0,15.0,0.853166759305167,0.6460181552942631,0.735281165968741,39,25,21
23
- genELSEPSIS_05223_02995_02194_06958_00333_02743_08057,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
24
- genELBPIC17_04616_02905_02319_07417_00335_05313_05056,,,,,,,0.9999529422841532,0.943943512563813,0.9711413284222972,53.0,22.0,16.0,0.9999170610110152,0.943943512563813,0.971124406425324,32,22,10
25
- genELBPIC20a_01648_01044_00854_06965_04398_09501_00094,,,,,,,,,,,,,0.9168707487964872,0.314787191876771,0.468667734435799,46,28,24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/GenED_bench.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/GenED_feat.csv CHANGED
@@ -1,4 +1,4 @@
1
- log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  2_rmcv_rt10v_genELtask_40_03_06,0.475,0.3107142857142857,0.5714285714285714,0.711996755762796,0.40848153131541576,0.10988860893433217,0.1999007815532011
3
  2_enself_rutpt_genELtask_25_02_02,0.19246861924686193,0.25784518828451886,0.7975941422594143,0.8336522045635787,0.45176947602735823,0.2018481552079625,0.2842730838492838
4
  2_rt10v_rutpt_genELtask_39_03_05,0.5,0.3,0.3,0.3935954518140152,0.25153078703466797,0.06196334316806251,0.1255248346244991
 
1
+ log,ratio_variants_per_number_of_traces,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
  2_rmcv_rt10v_genELtask_40_03_06,0.475,0.3107142857142857,0.5714285714285714,0.711996755762796,0.40848153131541576,0.10988860893433217,0.1999007815532011
3
  2_enself_rutpt_genELtask_25_02_02,0.19246861924686193,0.25784518828451886,0.7975941422594143,0.8336522045635787,0.45176947602735823,0.2018481552079625,0.2842730838492838
4
  2_rt10v_rutpt_genELtask_39_03_05,0.5,0.3,0.3,0.3935954518140152,0.25153078703466797,0.06196334316806251,0.1255248346244991
data/baseline_ED_bench.csv DELETED
@@ -1,18 +0,0 @@
1
- log,fitness_heuristics,precision_heuristics,fscore_heuristics,size_heuristics,pnsize_heuristics,cfc_heuristics,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf
2
- BPIC16wm_p,0.999900004026629,1.0,0.999949999513391,5.0,4.0,2.0,0.9999495832135112,1.0,0.999974790971276,4.0,3.0,1.0,0.999900004026629,1.0,0.999949999513391,5,4,2
3
- BPIC13op,0.990133346397138,0.9620563035495712,0.975892918274616,12.0,7.0,7.0,0.99993033237412,0.9065645824471852,0.950961282086593,10.0,5.0,3.0,0.8513195049834781,0.9065645824471852,0.8780739493381781,17,10,8
4
- BPIC13cp,0.989977119234364,0.8684298767708941,0.925228660364203,14.0,9.0,8.0,0.999955347339294,0.792379879879879,0.8841476594077591,20.0,8.0,6.0,0.990412853232678,0.9470205909661912,0.9682307987170752,15,10,9
5
- RTFMP,0.847745391902833,0.991356698750484,0.9139439048749932,47.0,25.0,22.0,0.9999788763172012,0.589212029307434,0.7415088783783841,43.0,17.0,14.0,0.878359786969879,0.7802754349784181,0.8264174735665141,41,25,20
6
- SEPSIS,0.650269438232782,0.7023809523809521,0.675321384593596,64.0,33.0,29.0,0.9999870882139372,0.19811033775102,0.330703956956029,96.0,47.0,44.0,0.9605344308961652,0.443996632051641,0.6072831901523931,43,27,23
7
- HD,0.7266871858430181,0.8474784912426241,0.782448466293276,61.0,33.0,26.0,0.999957093840268,0.412049000421671,0.583611250200463,67.0,29.0,26.0,0.9784476270770972,0.759636896649265,0.8552690146197981,45,29,27
8
- BPIC20d,0.778405152397002,0.8877260430015661,0.8294791282917191,110.0,57.0,55.0,0.999976992746818,0.213233968166344,0.351511928441461,170.0,82.0,79.0,0.867127706306101,0.40344856566562,0.5506815089742241,78,45,41
9
- BPIC13inc,0.99128117000846,0.8850810072924521,0.935175678848088,14.0,8.0,8.0,0.99997694649763,0.625730547968199,0.7697770045565601,19.0,7.0,5.0,0.957240933170762,0.716391417907929,0.819486058514255,16,10,8
10
- BPIC14di_p,1.0,1.0,1.0,10.0,2.0,2.0,,,,,,,0.999900009999,1.0,0.9999500024998752,10,4,2
11
- BPIC20e,0.8957327113789421,0.808290592116352,0.8497681013791021,48.0,23.0,22.0,0.9999625734194992,0.177946979285382,0.302129002909987,101.0,43.0,43.0,0.9184257431784232,0.38688423100734,0.544429207489319,46,29,25
12
- BPIC14dc_p,0.92732126656531,1.0,0.962290286162716,547.0,364.0,364.0,,,,,,,0.9998326981312632,1.0,0.9999163420675672,606,366,364
13
- BPIC16c_p,0.7688674244586541,0.9952442715088632,0.8675311223109071,92.0,50.0,49.0,0.999843623073484,0.75266316984805,0.8588217446396421,270.0,123.0,120.0,0.8853691071783161,0.9174262372560932,0.901112653845042,110,38,34
14
- BPIC20a,0.8903598625893641,0.867035609327888,0.878542955546676,40.0,19.0,18.0,0.999962791752526,0.188093126224035,0.316628409088329,89.0,38.0,38.0,0.9368177153041932,0.375765199161425,0.5363828699729011,36,21,18
15
- BPIC20b,0.6970214666884511,0.9141924615708572,0.7909710302567481,124.0,62.0,55.0,0.99998483485473,0.11309976930835,0.203215557399531,193.0,94.0,90.0,0.8859445593469291,0.348704855833889,0.500438693033593,79,46,43
16
- RWABOCSL,0.7998506743994891,0.680938416422287,0.7356200217515501,83.0,43.0,38.0,0.999985675961848,0.18194590014049,0.307874495646305,133.0,62.0,58.0,0.8277414379848941,0.252082243592322,0.386468499184599,77,45,43
17
- BPIC17ol,0.9107234276582472,1.0,0.9532760361602052,24.0,12.0,9.0,0.999984636044501,0.6172893728926371,0.7633584481974761,39.0,18.0,15.0,0.9960693326660932,0.898064579352246,0.944531514451642,14,6,4
18
- BPIC20c,,,,,,,,,,,,,0.7723547059308711,0.190996223166598,0.306257724619519,122,71,67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gedi/plotter.py CHANGED
@@ -12,9 +12,9 @@ from matplotlib.axes import Axes
12
  from matplotlib.figure import Figure
13
  from matplotlib.lines import Line2D
14
  from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
- from utils.param_keys import INPUT_PATH, OUTPUT_PATH
16
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
- from utils.param_keys.plotter import REAL_EVENTLOG_PATH
18
  from collections import defaultdict
19
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
@@ -318,17 +318,19 @@ class BenchmarkPlotter:
318
  class FeaturesPlotter:
319
  def __init__(self, features, params=None):
320
  output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
321
- plot_type = f", plot_type='{params[PLOT_TYPE]}'" if PLOT_TYPE else ""
 
 
 
322
 
323
  source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
324
  #output_path = os.path.join(output_path, source_name)
325
  if REAL_EVENTLOG_PATH in params:
326
- #real_eventlogs_path != None:
327
  real_eventlogs_path=params[REAL_EVENTLOG_PATH]
328
  real_eventlogs = pd.read_csv(real_eventlogs_path)
329
- fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type})")
330
  else:
331
- fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
332
 
333
  if output_path != None:
334
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)
@@ -336,14 +338,14 @@ class FeaturesPlotter:
336
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
337
 
338
 
339
- def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot"):
340
  columns = features.columns[1:]
341
  df1=features.select_dtypes(exclude=['object'])
342
 
343
- fig, axes = plt.subplots(len(df1.columns),1, figsize=(17,len(df1.columns)))
344
  for i, ax in enumerate(axes):
345
  eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
346
- fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=16, y=1)
347
  fig.tight_layout()
348
 
349
 
@@ -351,11 +353,12 @@ class FeaturesPlotter:
351
 
352
  return fig, output_path
353
 
354
- def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot"):
 
355
  LOG_NATURE = "Log Nature"
356
  GENERATED = "Generated"
357
  REAL = "Real"
358
- FONT_SIZE=20
359
  alpha = 0.7
360
  color = sns.color_palette("bright")
361
  markers = ['o','X']
@@ -374,7 +377,7 @@ class FeaturesPlotter:
374
  if plot_type == 'violinplot':
375
  inner_param = 'inner = None,'
376
 
377
- fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(12,len(dmf1.columns)*1.25), dpi=100)
378
  if isinstance(axes, Axes): # not isinstance(axes, list):
379
  axes = [axes]
380
  #nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
@@ -400,8 +403,14 @@ class FeaturesPlotter:
400
  ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
401
  ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE)
402
 
403
- fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE})
 
 
 
404
  #fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1)
 
 
 
405
  fig.tight_layout()
406
 
407
  output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg"
@@ -622,7 +631,6 @@ class AugmentationPlotter(object):
622
 
623
 
624
  class GenerationPlotter(object):
625
-
626
  def __init__(self, gen_cfg, model_params, output_path, input_path=None):
627
  print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}")
628
  self.output_path = output_path
 
12
  from matplotlib.figure import Figure
13
  from matplotlib.lines import Line2D
14
  from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
+ from utils.param_keys import INPUT_PATH, OUTPUT_PATH, PIPELINE_STEP
16
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
+ from utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
18
  from collections import defaultdict
19
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
 
318
  class FeaturesPlotter:
319
  def __init__(self, features, params=None):
320
  output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
321
+ plot_type = f", plot_type='{params[PLOT_TYPE]}'" if params.get(PLOT_TYPE) else ""
322
+ font_size = f", font_size='{params[FONT_SIZE]}'" if params.get(FONT_SIZE) else ""
323
+ boxplot_w = f", boxplot_w='{params[BOXPLOT_WIDTH]}'" if params.get(BOXPLOT_WIDTH) else ""
324
+ LEGEND = ", legend=True" if params.get(PIPELINE_STEP) else ""
325
 
326
  source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
327
  #output_path = os.path.join(output_path, source_name)
328
  if REAL_EVENTLOG_PATH in params:
 
329
  real_eventlogs_path=params[REAL_EVENTLOG_PATH]
330
  real_eventlogs = pd.read_csv(real_eventlogs_path)
331
+ fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type}{font_size}{boxplot_w}{LEGEND})")
332
  else:
333
+ fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type}{font_size}{boxplot_w})")
334
 
335
  if output_path != None:
336
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)
 
338
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
339
 
340
 
341
+ def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot", font_size=16, boxplot_w=16):
342
  columns = features.columns[1:]
343
  df1=features.select_dtypes(exclude=['object'])
344
 
345
+ fig, axes = plt.subplots(len(df1.columns),1, figsize=(int(boxplot_w),len(df1.columns)))
346
  for i, ax in enumerate(axes):
347
  eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
348
+ fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=font_size, y=1)
349
  fig.tight_layout()
350
 
351
 
 
353
 
354
  return fig, output_path
355
 
356
+ def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot",
357
+ font_size=24, legend=False, boxplot_w=16):
358
  LOG_NATURE = "Log Nature"
359
  GENERATED = "Generated"
360
  REAL = "Real"
361
+ FONT_SIZE=font_size
362
  alpha = 0.7
363
  color = sns.color_palette("bright")
364
  markers = ['o','X']
 
377
  if plot_type == 'violinplot':
378
  inner_param = 'inner = None,'
379
 
380
+ fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(int(boxplot_w),len(dmf1.columns)*1.25), dpi=300)
381
  if isinstance(axes, Axes): # not isinstance(axes, list):
382
  axes = [axes]
383
  #nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
 
403
  ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE)
404
  ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE)
405
 
406
+
407
+ if legend:
408
+ fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE})
409
+ plt.legend(fontsize=FONT_SIZE)
410
  #fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1)
411
+ plt.yticks(fontsize=FONT_SIZE)
412
+ plt.xticks(fontsize=FONT_SIZE)
413
+
414
  fig.tight_layout()
415
 
416
  output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg"
 
631
 
632
 
633
  class GenerationPlotter(object):
 
634
  def __init__(self, gen_cfg, model_params, output_path, input_path=None):
635
  print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}")
636
  self.output_path = output_path
merge_csvs.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import sys
4
+ import tqdm
5
+
6
+ from gedi.utils.io_helpers import sort_files
7
+
8
+ FILE_START = sys.argv[1]
9
+ ROOT_PATH, FILE_START = os.path.split(FILE_START)
10
+ filename_list = os.listdir(str(ROOT_PATH))
11
+ filename_list = [filename for filename in filename_list if filename.startswith(FILE_START)]
12
+
13
+ OUTPUT_PATH = os.path.join(ROOT_PATH, FILE_START+".csv")
14
+
15
+ result = pd.DataFrame(columns=['log'])
16
+ for filename in filename_list:
17
+ df = pd.read_csv(os.path.join(ROOT_PATH, filename))
18
+ result = result.merge(df, on='log', how='outer')
19
+ print(df.shape)
20
+ result.to_csv(OUTPUT_PATH, index=False)
21
+ print(f"Saved dataframe with {result.shape} in {OUTPUT_PATH}")
notebooks/benchmarking_process_discovery.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/feature_selection.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_benchmark_distributions.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_fig6_benchmark_boxplots.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs4and5_representativeness.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs9and10_consistency.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_representativeness.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
utils/param_keys/plotter.py CHANGED
@@ -2,3 +2,5 @@
2
 
3
  # Analysis Files
4
  REAL_EVENTLOG_PATH = 'real_eventlog_path'
 
 
 
2
 
3
  # Analysis Files
4
  REAL_EVENTLOG_PATH = 'real_eventlog_path'
5
+ FONT_SIZE = 'font_size'
6
+ BOXPLOT_WIDTH = 'boxplot_width'