File size: 7,872 Bytes
8097001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from pm4py.objects.conversion.log import converter as log_converter
from typing import Union, Optional, Dict, Any
from pm4py.objects.log.obj import EventLog, EventStream
from enum import Enum
from pm4py.util import exec_utils, constants, xes_constants
import pandas as pd
import pm4py
from copy import copy


class Parameters(Enum):
    MAX_LEN = "max_len"
    RESPONSE_HEADER = "response_header"
    DEFAULT_MIN_ACTIVITIES = "default_min_activities"
    ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
    RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY


def get_abstr_from_dict(ret, activities_dict, response_header):
    """
    Internal method to get the textual abstraction starting from the computations already performed.
    """
    abstr = ["\n\n"]

    if response_header:
        abstr.append(
            "In the following text, you find the top activities along with their number of occurrences in the event log and the number of unique resources performing them.")
        abstr.append("The top resources for such activities are included.\n\n")

    sort_act = sorted([(x, activities_dict[x][0], activities_dict[x][1], ret[x]) for x in ret],
                      key=lambda x: (x[1], x[2], x[0]), reverse=True)

    for el in sort_act:
        abstr.append("%s (num.occ=%d ; num.resources=%d)" % (el[0], el[1], el[2]))

        if el[3]:
            abstr.append(" top resources=[")

            this_res = sorted([(x, y) for x, y in el[3].items()], key=lambda z: (z[1], z[0]), reverse=True)

            for i in range(len(this_res)):
                if i > 0:
                    abstr.append("; ")
                abstr.append("%s=%d" % (this_res[i][0], this_res[i][1]))
            abstr.append("]")

        abstr.append("\n")

    abstr.append("\n\n")

    abstr1 = "".join(abstr)
    return abstr1


def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> str:
    """
    Textually abstracts the top activities/resources combinations in the event log.


    Minimum Viable Example:

        import pm4py
        from pm4py.algo.querying.llm.abstractions import log_to_resources


        log = pm4py.read_xes("C:/receipt.xes")
        res = log_to_resources.apply(log)
        print(res)


    Example output:

    In the following text, you find the top activities along with their number of occurrences in the event log and the number of unique resources performing them.The top resources for such activities are included.

        Confirmation of receipt (num.occ=1434 ; num.resources=41) top resources=[Resource01=195; admin2=114; Resource02=102; Resource03=87; Resource04=81; Resource07=78; Resource08=74; Resource06=70; Resource05=65; Resource11=58; Resource09=55; Resource15=51; Resource12=49; Resource13=47; Resource14=44; Resource17=43; Resource27=37; Resource16=35; Resource18=29; Resource10=21; Resource21=19; Resource20=18; Resource23=14; Resource22=12; Resource26=7; Resource25=7; Resource30=4; Resource33=2; Resource31=2; Resource29=2; Resource28=2; admin3=1; admin1=1; Resource43=1; Resource42=1; Resource38=1; Resource37=1; Resource36=1; Resource35=1; Resource34=1; Resource19=1]
        T06 Determine necessity of stop advice (num.occ=1416 ; num.resources=34) top resources=[Resource01=203; Resource02=114; Resource04=85; Resource03=85; Resource05=84; Resource07=83; Resource08=75; Resource06=75; Resource11=74; Resource12=72; Resource09=67; Resource15=58; Resource13=53; Resource14=48; Resource17=43; Resource16=36; Resource18=28; admin2=20; Resource20=18; Resource21=16; Resource22=15; Resource23=14; Resource26=12; Resource25=12; Resource29=6; Resource28=6; Resource37=2; Resource35=2; Resource34=2; Resource33=2; Resource31=2; Resource30=2; test=1; Resource36=1]
        T02 Check confirmation of receipt (num.occ=1368 ; num.resources=40) top resources=[Resource01=209; Resource02=95; Resource04=91; Resource03=86; Resource06=73; Resource08=65; Resource05=65; Resource19=64; Resource10=62; Resource13=55; Resource09=51; Resource07=50; Resource24=44; Resource12=44; Resource14=43; Resource16=36; Resource17=32; Resource15=32; Resource18=30; Resource11=30; Resource21=18; Resource20=18; Resource22=13; Resource23=12; admin2=9; Resource32=9; Resource25=6; Resource26=5; Resource28=4; Resource30=3; Resource39=2; Resource34=2; Resource31=2; Resource29=2; admin1=1; TEST=1; Resource38=1; Resource36=1; Resource35=1; Resource33=1]
        T04 Determine confirmation of receipt (num.occ=1307 ; num.resources=37) top resources=[Resource10=240; Resource01=184; Resource03=81; Resource04=68; Resource02=67; Resource06=66; Resource19=61; Resource05=60; Resource07=58; Resource09=46; Resource14=41; Resource12=41; Resource13=40; Resource18=36; Resource16=36; Resource08=31; Resource11=29; Resource15=28; Resource20=18; Resource21=15; Resource17=13; Resource22=12; Resource23=11; admin2=3; Resource26=3; Resource25=3; admin3=2; admin1=2; Resource31=2; Resource29=2; Resource28=2; Resource38=1; Resource36=1; Resource35=1; Resource34=1; Resource33=1; Resource24=1]


    Parameters
    ----------------
    log
        Log object
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY => the attribute to be used as activity
        - Parameters.RESOURCE_KEY => the attribute to be used as resource
        - Parameters.DEFAULT_MIN_ACTIVITIES => minimum number of different activities to include in the textual abstraction
        - Parameters.ACTIVITY_KEY => attribute of the log to be used as activity
        - Parameters.RESOURCE_KEY => attribute of the log to be used as resource

    Returns
    ----------------
    textual_abstraction
        Textual abstraction
    """
    if parameters is None:
        parameters = {}

    max_len = exec_utils.get_param_value(Parameters.MAX_LEN, parameters, constants.OPENAI_MAX_LEN)
    response_header = exec_utils.get_param_value(Parameters.RESPONSE_HEADER, parameters, True)
    default_min_activities = exec_utils.get_param_value(Parameters.DEFAULT_MIN_ACTIVITIES, parameters, 15)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY)

    log = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME, parameters=parameters)

    activities = log[activity_key].value_counts().to_dict()
    activities_unq_resources = log.groupby(activity_key)[resource_key].nunique().to_dict()
    activities = [(x, y, activities_unq_resources[x]) for x, y in activities.items()]
    activities_dict = {x[0]: (x[1], x[2]) for x in activities}

    activities = sorted(activities, key=lambda z: (z[1], z[2], z[0]), reverse=True)

    ret = {}
    for i in range(min(len(activities), default_min_activities)):
        new_ret = copy(ret)
        new_ret[activities[i][0]] = {}

        if len(get_abstr_from_dict(new_ret, activities_dict, response_header)) > max_len:
            break

        ret = new_ret

    activities_resources = log.groupby([activity_key, resource_key]).size().to_dict()
    activities_resources = sorted([(x, y) for x, y in activities_resources.items()], key=lambda z: (z[1], z[0]),
                                  reverse=True)

    for el in activities_resources:
        new_ret = copy(ret)
        if el[0][0] not in new_ret:
            new_ret[el[0][0]] = {}
        new_ret[el[0][0]][el[0][1]] = el[1]

        if len(get_abstr_from_dict(new_ret, activities_dict, response_header)) > max_len:
            break

        ret = new_ret

    return get_abstr_from_dict(ret, activities_dict, response_header)


if __name__ == "__main__":
    log = pm4py.read_xes("../../../tests/input_data/receipt.xes")
    textual_abstraction = apply(log)
    print(textual_abstraction)