vivek9 commited on
Commit
7a9a856
·
verified ·
1 Parent(s): 8c68262

Upload 5 files

Browse files
Files changed (5) hide show
  1. config.yaml +33 -0
  2. fixed.csv +16 -0
  3. growth.csv +24 -0
  4. main.py +79 -0
  5. utils.py +276 -0
config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ SPM:
3
+ path_to_csv: "fixed.csv"
4
+ identifier_column: "Identifier"
5
+ sequence_column: "Sequence"
6
+ sortby: "S-Support"
7
+ sliding_window_min: 1
8
+ sliding_window_max: 4
9
+ min_gap: 1
10
+ max_gap: 12
11
+ S_support_thresh: 0.4
12
+ I_support_thresh: 0
13
+ dataset_format: 0
14
+
15
+ DSM:
16
+ path_to_csv_left: "fixed.csv"
17
+ path_to_csv_right: "growth.csv"
18
+ identifier_column: "Identifier"
19
+ sequence_column: "Sequence"
20
+ sortby: "S-Support"
21
+ sliding_window_min: 1
22
+ sliding_window_max: 1
23
+ min_gap: 1
24
+ max_gap: 12
25
+ S_support_thresh: 0.4
26
+ I_support_thresh: 0
27
+ threshold_pvalue: 0.1
28
+ dataset_format: 0
29
+ test_type: "ttest_ind"
30
+
31
+ dsm_result_path: "/home/vivek.trivedi/ET623_project/dsm_result.csv"
32
+ spm_result_path: "/home/vivek.trivedi/ET623_project/spm_result.csv"
33
+ occurrence_matrix_path: "/home/vivek.trivedi/ET623_project/occurrence_matrix.csv"
fixed.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Identifier,Sequence,timestamp
2
+ 453,Execute_success;Execute_success;Reading;Highlighted;Highlighted;Videos;Reading;Videos;Reading;Videos;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Reading;Execute_success;Reading;Execute_success;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Reading;Reading;Reading;Assessment;Assessment;Assessment;Reading;Reading;Reading;Assessment;Assessment;Assessment,27-08-2022 09:30
3
+ 456,Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_success;Execute_success;Execute_error;execute;Videos;Videos;Reading;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Execute_error;Execute_success;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Quiz;Execute_error;Execute_success;Quiz;Execute_error;Execute_success;Quiz;Assessment;Assessment;Assessment,27-08-2022 09:30
4
+ 458,Execute_success;Execute_success;Execute_success;execute;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Videos;Videos;Videos;Videos;Reading;Reading;Reading,27-08-2022 09:30
5
+ 461,Execute_success;Execute_success;Videos;Reading;Quiz;Quiz;Highlighted;Highlighted;Videos;Videos;Videos;Reading;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Reading;Reading;Highlighted;Highlighted;Highlighted;Reading;Reading;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Reading;Reading;Highlighted;Highlighted;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Videos;Videos;Videos,27-08-2022 09:30
6
+ 464,Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Highlighted;Highlighted;Videos;Videos;Reading;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Quiz;Reading;Reading;Execute_success;Quiz;Assessment;Reading;Reading;Execute_success;Quiz;Quiz;Assessment;Assessment;Assessment;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Videos;Videos;Reading;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Highlighted;Highlighted;Highlighted;Videos;Videos;Videos;Assessment;Assessment;Assessment;Reading;Reading;Reading;Reading;Highlighted;Reading;Highlighted;Reading;Highlighted,27-08-2022 09:30
7
+ 473,Execute_success;Execute_error;Execute_error;Execute_success;Highlighted;Reading;Reading;Execute_success;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Reading;Execute_error;Reading;Execute_error;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;execute;execute;execute,27-08-2022 09:30
8
+ 478,Execute_error;Execute_success;Execute_success;Execute_success;Highlighted;Videos;Videos;Reading;Highlighted;Quiz;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;execute;Videos;Videos;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Reading;Execute_success;Reading;Execute_success;Reading;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Videos;Execute_success;Execute_success;Videos;Execute_success;Execute_success;Videos;Assessment;Verified;Assessment;Verified;Assessment;Verified;Reading;Reading;Reading;Assessment;Assessment;Assessment,27-08-2022 09:30
9
+ 480,Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Videos;Videos;Reading;Quiz;Assessment;Quiz;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Execute_success;Execute_success;Videos;Execute_success;Execute_success;Videos;Execute_success;Execute_success;Videos;Reading;Videos;Reading;Reading;Videos;Reading;Reading;Videos;Reading;Execute_error;Execute_error;Execute_error;Videos;Reading;Videos;Reading;Videos;Reading,27-08-2022 09:30
10
+ 481,Execute_success;Execute_error;Execute_error;Execute_error;Videos;Videos;Reading;Reading;Reading;Reading;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Highlighted;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Videos;Videos;Reading;Videos;Reading;Quiz;Assessment;Verified;Reading;Reading;Reading;Highlighted;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Reading;Videos;Videos;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;execute;execute;execute;Reading;Reading;Reading;Videos;Reading;Videos;Reading;Videos;Reading;Quiz;Assessment;Quiz;Assessment;Quiz;Assessment;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Videos;Videos;Reading;Videos;Videos;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Videos;Videos;Reading;Execute_success;Videos;Videos;Reading;Execute_success;Videos;Videos;Reading;Execute_error;Execute_error;Execute_error,27-08-2022 09:30
11
+ 482,Execute_success;Execute_success;Execute_success;Execute_success;Reading;Highlighted;Highlighted;execute;execute;execute;execute;execute;execute;Quiz;Quiz;Reading;Quiz;Quiz;Reading;Quiz;Quiz;Reading;Videos;Videos;Videos;Assessment;Assessment;Assessment;Reading;Reading;Reading;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified,27-08-2022 09:30
12
+ 496,Highlighted;Highlighted;Execute_success;Videos;Videos;Reading;Videos;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Videos;Videos;Reading;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_success;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Quiz;Assessment;Assessment;Quiz;Assessment;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Execute_success;Reading;Execute_success;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Execute_success;Reading;Execute_success;Reading;Execute_success;Reading,27-08-2022 09:30
13
+ 497,Highlighted;Execute_success;Videos;Videos;Reading;Reading;Highlighted;Reading;Highlighted;Highlighted;Videos;Reading;Highlighted;execute;execute;execute;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Highlighted;Highlighted;Execute_success;Highlighted;Highlighted;Execute_success;Highlighted;Highlighted;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Assessment;Assessment;Assessment;Verified;Verified;Verified,27-08-2022 09:30
14
+ 500,Videos;Videos;Reading;Execute_success;Reading;Reading;Videos;Videos;Reading;Videos;Videos;Reading;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Reading;Execute_error;Reading;Execute_error;Reading,27-08-2022 09:30
15
+ 501,Highlighted;Highlighted;Highlighted;Highlighted;Reading;Videos;Quiz;Assessment;Verified;Verified;Verified,27-08-2022 09:30
16
+ 502,Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_success;Execute_error;Reading;Reading;Videos;Videos;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Videos;Videos;Videos,27-08-2022 09:30
growth.csv ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Identifier,Sequence,timestamp
2
+ 447,Execute_error;Execute_error;Execute_success;Reading;Reading;Reading;Reading;Execute_success;Execute_error;Execute_error;Execute_success;Execute_success;Execute_error;Videos;Reading;Execute_success;Videos;Reading;execute;Reading;Reading;Reading;Reading;Reading;Reading;Videos;Assessment;Videos;Assessment;Videos;Assessment;Reading;Reading;Reading;Videos;Reading;Videos;Reading;Videos;Reading;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified,27-08-2022 09:25
3
+ 450,Highlighted;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;execute;execute;execute,27-08-2022 09:25
4
+ 451,Execute_error;Execute_success;Execute_error;Videos;Reading;Reading;Reading;Highlighted;Execute_success;Reading;Videos;Reading;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Quiz;Assessment;Quiz;Assessment;Assessment;Reading;Reading;Highlighted;Videos;Assessment;Assessment;Assessment;Videos;Assessment;Reading;Videos;Reading;Videos;Assessment;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos,27-08-2022 09:25
5
+ 452,Execute_success;Execute_success;Execute_error;Execute_error;Execute_success;Execute_success;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_success;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Quiz;Reading;Reading;Reading;Reading;Reading;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Reading;Reading;execute;execute;execute,27-08-2022 09:25
6
+ 457,Highlighted;Highlighted;Execute_error;Execute_success;Execute_success;Execute_success;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Videos;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_error;Execute_success;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Assessment;Assessment;Assessment,27-08-2022 09:25
7
+ 462,execute;Videos;Videos;Reading;Reading;execute;execute;execute;Videos;Videos;Videos;Reading;Reading;Reading;Videos;Reading;Videos;Reading;Reading;Videos;Reading;Videos;Reading;Reading;Videos;Reading;Videos;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;execute;Reading;Execute_error;execute;Reading;Execute_error;execute;Reading;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Videos;Execute_success;Videos;Execute_success;Videos;Quiz;Assessment;Quiz;Assessment;Quiz;Assessment,27-08-2022 09:25
8
+ 468,Execute_success;Execute_error;Execute_success;Execute_error;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Videos;Videos;Videos;Videos;Reading;Reading;execute;execute;execute;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Assessment;Assessment;Assessment,27-08-2022 09:25
9
+ 469,Videos;Videos;Reading;Reading;Reading;Highlighted;Reading;Videos;Videos;Reading;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted,27-08-2022 09:25
10
+ 470,Quiz;Assessment;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted,27-08-2022 09:25
11
+ 471,Execute_success;Execute_success;Execute_error;Execute_success;Execute_success;Videos;Videos;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Reading;Reading;Execute_success;Reading;Reading;Execute_success;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Reading,27-08-2022 09:25
12
+ 472,Highlighted;Highlighted;Highlighted;Highlighted;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Quiz;Assessment;Assessment;Reading;Videos;Videos;Reading;Videos;Reading;Videos;Videos;Reading;Reading;Videos;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;execute;execute;execute;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_success;Execute_success;Reading;Execute_error;Reading;Execute_error;Reading;Execute_error;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Reading;Reading;Reading,27-08-2022 09:25
13
+ 474,Reading;execute;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted;Assessment;Assessment;Assessment;Verified;Verified;Verified;Verified;Verified;Verified;Reading;Reading;Reading;Verified;Verified;Verified;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted,27-08-2022 09:25
14
+ 477,Execute_success;Execute_success;Execute_error;Videos;Reading;Reading;Reading;Videos;Reading;Videos;Videos;Reading;Videos;Reading;Videos;Reading;Videos;Videos;Reading;Reading;Videos;Reading;Reading;Videos;Videos;Reading;Videos;Reading;Videos;Videos;Reading;Reading;Videos;Reading;Videos;Videos;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Reading;Videos;Videos;Reading;execute;execute;execute;Reading;Reading;Reading;Videos;Videos;Videos;Reading;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Execute_success;Reading;Execute_success;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Assessment;Assessment;Assessment,27-08-2022 09:25
15
+ 484,Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_success;Execute_success;Videos;Videos;Videos;Reading;Execute_success;Execute_success;Videos;Videos;Videos;Reading;Execute_success;Execute_success;Videos;Videos;Videos;Reading;Videos;Videos;Reading;Videos;Videos;Reading;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Assessment;Execute_success;Reading;Assessment;Execute_success;Reading;Assessment,27-08-2022 09:25
16
+ 485,Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Reading;Reading;Execute_success;Execute_success;Execute_success;Videos;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_success;Videos;Videos;Execute_success;Videos;Videos;Execute_success;Videos;Videos;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error,27-08-2022 09:25
17
+ 486,Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_error;Execute_error;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Videos;Reading;Highlighted;Highlighted;Reading;execute;execute;execute;execute;execute;Execute_error;execute;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;execute;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Videos;Reading;Reading;Videos;Videos;Videos;Reading;Quiz;Assessment;Quiz;Quiz;Videos;Reading;Assessment;Assessment;Assessment;Highlighted;Highlighted;Highlighted;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Reading;Reading;Reading;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;execute;execute;execute;Reading;Reading;Reading,27-08-2022 09:25
18
+ 488,Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Videos;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error,27-08-2022 09:25
19
+ 491,Videos;Videos;Reading;Highlighted;Videos;Videos;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;execute;execute;execute;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Reading;Execute_error;Execute_error;Execute_error;Execute_success;Videos;Execute_success;Videos;Execute_success;Videos;Reading;Reading;Reading;Verified;Verified;Verified;Assessment;Assessment;Assessment;Verified;Assessment;Verified;Assessment;Verified;Assessment,27-08-2022 09:25
20
+ 492,Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Reading;Reading;Reading;Reading;Assessment;Assessment;Assessment;Verified;Verified;Verified;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error,27-08-2022 09:25
21
+ 493,Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Videos;Videos;Reading;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Videos;Reading;Reading;Execute_success;Reading;Videos;Videos;Reading;Execute_error;Execute_error;Execute_error;Reading;Execute_success;Reading;Reading;Reading;execute;execute;execute;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Videos;Videos;Videos;Videos;Videos;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Videos;Reading;Highlighted;Highlighted;Highlighted;Videos;Reading;Highlighted;Highlighted;Highlighted;Reading;Reading;Reading;Assessment;Assessment;Assessment;Assessment;Assessment;Assessment;Assessment;Assessment;Assessment;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Verified;Reading;Highlighted;Reading;Highlighted;Reading;Highlighted;Highlighted;Assessment;Verified;Highlighted;Assessment;Verified;Highlighted;Assessment;Verified,27-08-2022 09:25
22
+ 494,Execute_success;Videos;Videos;Reading;Reading;Reading;Reading;Videos;Reading;Videos;Videos;Reading;Videos;Reading;Quiz;Reading;Videos;Reading;Videos;Videos;Videos;Reading;Reading;Highlighted;Highlighted;Videos;Reading;Videos;Reading;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Execute_error;Execute_error;Execute_error;Reading;Reading;Reading;Highlighted;Highlighted;Highlighted,27-08-2022 09:25
23
+ 498,Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Videos;Videos;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Reading;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Highlighted;Videos;Videos;Reading;Highlighted;execute;execute;execute;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_success;Execute_success;Execute_success;Reading;Reading;Reading;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;Execute_error;execute;execute;execute;execute;execute;execute,27-08-2022 09:25
24
+ 499,Reading;Reading;Highlighted;Highlighted;Highlighted;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_success;Execute_error;Execute_error;Videos;Videos;Reading;Execute_error;Execute_error;Videos;Videos;Reading;Execute_error;Execute_error;Videos;Videos;Reading,27-08-2022 09:25
main.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import yaml
3
+ import pandas as pd
4
+ from utils import *
5
+
6
+ def load_config(filename):
7
+ with open(filename, 'r') as stream:
8
+ try:
9
+ return yaml.safe_load(stream)
10
+ except yaml.YAMLError as exc:
11
+ print(exc)
12
+
13
+ def save_results(dsm_result, spm_result, occurrence_matrix, dsm_result_path, spm_result_path, occurrence_matrix_path):
14
+ dsm_result.to_csv(dsm_result_path, index=False)
15
+ spm_result.to_csv(spm_result_path, index=False)
16
+ occurrence_matrix.to_csv(occurrence_matrix_path, index=False)
17
+
18
+ def main():
19
+ if len(sys.argv) > 1:
20
+ config_file = 'config.yaml'
21
+ command_args = sys.argv[1:]
22
+ overridden_params = {}
23
+ for arg in command_args:
24
+ if '=' in arg:
25
+ key, value = arg.split('=')
26
+ overridden_params[key] = value
27
+ else:
28
+ print(f"Ignoring invalid argument: {arg}")
29
+
30
+ config = load_config(config_file)
31
+
32
+ # SPM parameters
33
+ spm_params = config.get('SPM', {})
34
+ for key, value in overridden_params.items():
35
+ if key in spm_params:
36
+ spm_params[key] = value
37
+
38
+ spm_result, occurrence_matrix = SPM(spm_params)
39
+
40
+ # DSM parameters
41
+ dsm_params = config.get('DSM', {})
42
+ for key, value in overridden_params.items():
43
+ if key in dsm_params:
44
+ dsm_params[key] = value
45
+
46
+ ptrn_left, ptrn_right, ptrn_both_left, ptrn_both_right, dsm_result = DSM(dsm_params)
47
+
48
+ # Fetching paths from config
49
+ dsm_result_path = config.get('dsm_result_path')
50
+ spm_result_path = config.get('spm_result_path')
51
+ occurrence_matrix_path = config.get('occurrence_matrix_path')
52
+
53
+ # Saving results
54
+ save_results(dsm_result, spm_result, occurrence_matrix, dsm_result_path, spm_result_path, occurrence_matrix_path)
55
+
56
+ else:
57
+ config_file = 'config.yaml'
58
+ config = load_config(config_file)
59
+
60
+ # SPM parameters
61
+ spm_params = config.get('SPM', {})
62
+
63
+ spm_result, occurrence_matrix = SPM(spm_params)
64
+
65
+ # DSM parameters
66
+ dsm_params = config.get('DSM', {})
67
+
68
+ ptrn_left, ptrn_right, ptrn_both_left, ptrn_both_right, dsm_result = DSM(dsm_params)
69
+
70
+ # Fetching paths from config
71
+ dsm_result_path = config.get('dsm_result_path')
72
+ spm_result_path = config.get('spm_result_path')
73
+ occurrence_matrix_path = config.get('occurrence_matrix_path')
74
+
75
+ # Saving results
76
+ save_results(dsm_result, spm_result, occurrence_matrix, dsm_result_path, spm_result_path, occurrence_matrix_path)
77
+
78
+ if __name__ == "__main__":
79
+ main()
utils.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from scipy import stats
4
+ import csv
5
+
6
+ def process_csv(input_file, output_file="output.csv",user_id_column="user_id",timestamp_column="timestamp",action_column="actions"):
7
+
8
+ with open(input_file, 'r', newline='') as csvfile, open(output_file, 'w', newline='') as new_csvfile:
9
+ reader = csv.reader(csvfile)
10
+ writer = csv.writer(new_csvfile)
11
+
12
+ # Get the column indices for user_id, timestamp, and action
13
+ user_id_index, timestamp_index, action_index = None, None, None
14
+ for i, row in enumerate(reader):
15
+ if i == 0: # Header row
16
+ user_id_index = row.index(user_id_column)
17
+ timestamp_index = row.index(timestamp_column)
18
+ action_index = row.index(action_column)
19
+ break
20
+
21
+ # Read the rest of the data and store it in a dictionary
22
+ user_actions = {}
23
+ for row in reader:
24
+ user_id = row[user_id_index]
25
+ timestamp = row[timestamp_index]
26
+ action = row[action_index]
27
+
28
+ if user_id not in user_actions:
29
+ user_actions[user_id] = []
30
+
31
+ # Sort actions by timestamp in ascending order
32
+ user_actions[user_id].append((timestamp, action))
33
+
34
+ # Write the data to the output CSV file with the new column
35
+ writer.writerow([user_id_column, 'actions']) # Write header with new column
36
+ for user_id, actions in user_actions.items():
37
+ sorted_actions = actions # Sort by timestamp
38
+ concatenated_actions = ';'.join([action for timestamp, action in sorted_actions])
39
+ writer.writerow([user_id, concatenated_actions])
40
+
41
+
42
+ def generate_sequence_list(sentence, min_gap,max_gap, sliding_window_min=1,sliding_window_max=1):
43
+ # Split the sentence into words
44
+ words = sentence.split(";")
45
+
46
+ # Generate n-grams
47
+ ngrams = []
48
+ for sliding_window in range(sliding_window_min,sliding_window_max+1):
49
+ for gram_length in range(min_gap,max_gap + 1): # Loop from 1 to n
50
+ for i in range(0, len(words) - gram_length + 1, sliding_window):
51
+ ngram = '--->'.join(words[i:i+gram_length])
52
+ ngrams.append(ngram)
53
+
54
+ return ngrams
55
+
56
+
57
+
58
+ def create_dict_from_df(df, identifier_column, sequence,min_gap,max_gap,sliding_window_min=1,sliding_window_max=1):
59
+ result_dict = {}
60
+ unique_values_set = set() # Initialize set to store unique values
61
+ for index, row in df.iterrows():
62
+ key = row[identifier_column]
63
+ values = generate_sequence_list(row[sequence],min_gap,max_gap,sliding_window_min,sliding_window_max)
64
+ result_dict[key] = values
65
+ unique_values_set.update(values) # Update the set with unique values
66
+ return result_dict, unique_values_set
67
+
68
+ def create_dataframe_from_dict_and_set(result_dict, unique_values_set):
69
+ # Initialize an empty dictionary to store counts
70
+ counts_dict = {}
71
+ # Iterate over the set
72
+ for value in unique_values_set:
73
+ counts_dict[value] = {}
74
+ # Iterate over the keys in the result_dict
75
+ for key, values in result_dict.items():
76
+ counts_dict[value][key] = values.count(value)
77
+
78
+ # Create a DataFrame from the counts dictionary
79
+ df = pd.DataFrame(counts_dict).fillna(0)
80
+ # Transpose the DataFrame so that keys become columns and values become rows
81
+ df = df.transpose()
82
+
83
+ return df
84
+
85
+
86
+ def process_dataframe(df):
87
+ # Calculate num_student
88
+ num_student = len(df.columns)
89
+
90
+ # Calculate I-Frequency and S-Frequency
91
+ I_Frequency = df.sum(axis=1)
92
+ S_Frequency = (df > 0).sum(axis=1)
93
+
94
+ # Create a dictionary for new data
95
+ new_data = {
96
+ 'I-Frequency': I_Frequency,
97
+ 'S-Frequency': S_Frequency
98
+ }
99
+
100
+ # Create a DataFrame from the new data
101
+ new_df = pd.DataFrame(new_data)
102
+
103
+ # Calculate I-Support by dividing I-Frequency with num_student
104
+ new_df['I-Support (mean)'] = new_df['I-Frequency'] / num_student
105
+ new_df['S-Support'] = new_df['S-Frequency'] / num_student
106
+
107
+ # Calculate standard deviation of each row
108
+ new_df['I-Support (sd)'] = df.std(axis=1)
109
+
110
+ return new_df
111
+
112
+
113
+ def calculate_p_value(test_type, vector_a, vector_b=None, **kwargs):
114
+ """
115
+ Calculate the p-value for different types of t-tests.
116
+
117
+ Parameters:
118
+ test_type (str): Type of test to perform.
119
+ vector_a (array-like): Data for sample A.
120
+ vector_b (array-like, optional): Data for sample B (only required for some tests).
121
+ **kwargs: Additional keyword arguments required for specific tests.
122
+
123
+ Returns:
124
+ p_value (float): The p-value obtained from the test.
125
+ """
126
+ if test_type == 'poisson_means_test':
127
+ # Poisson means test
128
+ result = stats.poisson_means_test(vector_a, vector_b, **kwargs)
129
+ elif test_type == 'ttest_ind':
130
+ # T-test for the means of two independent samples
131
+ result = stats.ttest_ind(vector_a, vector_b, **kwargs)
132
+ elif test_type == 'mannwhitneyu':
133
+ # Mann-Whitney U rank test on two independent samples
134
+ result = stats.mannwhitneyu(vector_a, vector_b, **kwargs)
135
+ elif test_type == 'bws_test':
136
+ # Baumgartner-Weiss-Schindler test on two independent samples
137
+ result = stats.bws_test(vector_a, vector_b, **kwargs)
138
+ elif test_type == 'ranksums':
139
+ # Wilcoxon rank-sum statistic for two samples
140
+ result = stats.ranksums(vector_a, vector_b, **kwargs)
141
+ elif test_type == 'brunnermunzel':
142
+ # Brunner-Munzel test on samples
143
+ result = stats.brunnermunzel(vector_a, vector_b, **kwargs)
144
+ elif test_type == 'mood':
145
+ # Mood's test for equal scale parameters
146
+ result = stats.mood(vector_a, vector_b, **kwargs)
147
+ elif test_type == 'ansari':
148
+ # Ansari-Bradley test for equal scale parameters
149
+ result = stats.ansari(vector_a, vector_b, **kwargs)
150
+ elif test_type == 'cramervonmises_2samp':
151
+ # Two-sample Cram�r-von Mises test for goodness of fit
152
+ result = stats.cramervonmises_2samp(vector_a, vector_b, **kwargs)
153
+ elif test_type == 'epps_singleton_2samp':
154
+ # Epps-Singleton (ES) test statistic
155
+ result = stats.epps_singleton_2samp(vector_a, vector_b, **kwargs)
156
+ elif test_type == 'ks_2samp':
157
+ # Two-sample Kolmogorov-Smirnov test for goodness of fit
158
+ result = stats.ks_2samp(vector_a, vector_b, **kwargs)
159
+ elif test_type == 'kstest':
160
+ # One-sample or two-sample Kolmogorov-Smirnov test for goodness of fit
161
+ result = stats.kstest(vector_a, vector_b, **kwargs)
162
+ else:
163
+ raise ValueError("Invalid test type.")
164
+
165
+ # Get the p-value
166
+ p_value = result.pvalue
167
+ return p_value
168
+
169
+ def SPM_(path_to_csv,dataset_format, identifier_column, sequence_column,sortby="S-Support",min_gap=1,max_gap=1,sliding_window_min=1,sliding_window_max=1,S_support_thresh=0,I_support_thresh=0,timestamp_column="timestamp"):
170
+
171
+ if dataset_format==1:
172
+ process_csv(path_to_csv, output_file="output.csv",user_id_column=identifier_column,timestamp_column=timestamp_column,action_column=sequence_column)
173
+ path_to_csv="output.csv"
174
+ # Read CSV file
175
+ data = pd.read_csv(path_to_csv)
176
+
177
+ # Create dictionary from DataFrame
178
+ data_seq, corpus = create_dict_from_df(data, identifier_column, sequence_column, min_gap,max_gap,sliding_window_min,sliding_window_max)
179
+
180
+ # Create occurrence matrix
181
+ occurence_matrix = create_dataframe_from_dict_and_set(data_seq, corpus)
182
+
183
+ # Process occurrence matrix
184
+ spm_result = process_dataframe(occurence_matrix)
185
+ spm_result = spm_result.sort_values(by=sortby, ascending=False)
186
+
187
+ return spm_result[(spm_result['S-Support'] > S_support_thresh) & (spm_result['I-Support (mean)'] > I_support_thresh)], occurence_matrix
188
+
189
+
190
+
191
+ def SPM(config):
192
+ path_to_csv = config.get('path_to_csv')
193
+ dataset_format = config.get('dataset_format')
194
+ identifier_column = config.get('identifier_column')
195
+ sequence_column = config.get('sequence_column')
196
+ sortby = config.get('sortby', "S-Support")
197
+ min_gap = config.get('min_gap', 1)
198
+ max_gap = config.get('max_gap', 1)
199
+ sliding_window_min = config.get('sliding_window_min', 1)
200
+ sliding_window_max = config.get('sliding_window_max', 1)
201
+ S_support_thresh = config.get('S_support_thresh', 0)
202
+ I_support_thresh = config.get('I_support_thresh', 0)
203
+ timestamp_column = config.get('timestamp_column', "timestamp")
204
+
205
+ return SPM_(path_to_csv,dataset_format, identifier_column, sequence_column,sortby,min_gap,max_gap,sliding_window_min,sliding_window_max,S_support_thresh,I_support_thresh,timestamp_column)
206
+
207
+
208
+
209
+ def DSM(config):
210
+ path_to_csv_left = config['path_to_csv_left']
211
+ dataset_format = config['dataset_format']
212
+ path_to_csv_right = config['path_to_csv_right']
213
+ identifier_column = config['identifier_column']
214
+ sequence_column = config['sequence_column']
215
+ sortby = config['sortby']
216
+ min_gap = config['min_gap']
217
+ max_gap = config['max_gap']
218
+ sliding_window_min = config['sliding_window_min']
219
+ sliding_window_max = config['sliding_window_max']
220
+ S_support_thresh = config['S_support_thresh']
221
+ I_support_thresh = config['I_support_thresh']
222
+ threshold_pvalue = config['threshold_pvalue']
223
+ test_type = config['test_type']
224
+ timestamp_column = config.get('timestamp_column', 'timestamp')
225
+
226
+ if dataset_format == 1:
227
+ process_csv(path_to_csv_left, output_file="output_left.csv", user_id_column=identifier_column,
228
+ timestamp_column=timestamp_column, action_column=sequence_column)
229
+ path_to_csv_left = "output_left.csv"
230
+ process_csv(path_to_csv_right, output_file="output_right.csv", user_id_column=identifier_column,
231
+ timestamp_column=timestamp_column, action_column=sequence_column)
232
+ path_to_csv_left = "output_right.csv"
233
+
234
+ ptrn_left = []
235
+ ptrn_right = []
236
+ ptrn_both_left = []
237
+ ptrn_both_right = []
238
+
239
+ spm_result_left, occurence_matrix_left = SPM_(path_to_csv_left, 0, identifier_column, sequence_column, sortby,
240
+ min_gap, max_gap, sliding_window_min, sliding_window_max,
241
+ S_support_thresh, I_support_thresh)
242
+ spm_result_right, occurence_matrix_right = SPM_(path_to_csv_right, 0, identifier_column, sequence_column, sortby,
243
+ min_gap, max_gap, sliding_window_min, sliding_window_max,
244
+ S_support_thresh, I_support_thresh)
245
+
246
+ result_data = []
247
+ all_ptrn = set(spm_result_left.index)
248
+ all_ptrn.update(spm_result_right.index)
249
+ left_ptrn_data = set(spm_result_left.index)
250
+ right_ptrn_data = set(spm_result_right.index)
251
+
252
+ for ptrn in all_ptrn:
253
+ isupport_left = occurence_matrix_left.loc[ptrn, :].values if ptrn in spm_result_left.index else np.zeros(
254
+ occurence_matrix_left.shape[1])
255
+ isupport_right = occurence_matrix_right.loc[ptrn, :].values if ptrn in spm_result_right.index else np.zeros(
256
+ occurence_matrix_right.shape[1])
257
+ p_value = calculate_p_value(test_type, isupport_left, isupport_right)
258
+ if p_value < threshold_pvalue:
259
+ if (ptrn in left_ptrn_data) and (ptrn in right_ptrn_data):
260
+ if isupport_left.mean() > isupport_right.mean():
261
+ ptrn_both_left.append(ptrn)
262
+ result_data.append((ptrn, p_value, isupport_left.mean(), isupport_right.mean(), "both_left"))
263
+ else:
264
+ ptrn_both_right.append(ptrn)
265
+ result_data.append((ptrn, p_value, isupport_left.mean(), isupport_right.mean(), "both_right"))
266
+ else:
267
+ if ptrn in left_ptrn_data:
268
+ ptrn_left.append(ptrn)
269
+ result_data.append((ptrn, p_value, isupport_left.mean(), np.nan, "left"))
270
+ else:
271
+ ptrn_right.append(ptrn)
272
+ result_data.append((ptrn, p_value, np.nan, isupport_right.mean(), "right"))
273
+
274
+ result_df = pd.DataFrame(result_data,
275
+ columns=['ptrn', 'ttest_value', 'isupportleft_mean', 'isupportright_mean', "Group"])
276
+ return ptrn_left, ptrn_right, ptrn_both_left, ptrn_both_right, result_df