Spaces:
Sleeping
Sleeping
from pm4py.objects.conversion.log import converter as log_converter | |
from typing import Union, Optional, Dict, Any | |
from pm4py.objects.log.obj import EventLog, EventStream | |
from enum import Enum | |
from pm4py.util import exec_utils, constants, xes_constants | |
import pandas as pd | |
import pm4py | |
from copy import copy | |
class Parameters(Enum): | |
MAX_LEN = "max_len" | |
RESPONSE_HEADER = "response_header" | |
DEFAULT_MIN_ACTIVITIES = "default_min_activities" | |
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY | |
RESOURCE_KEY = constants.PARAMETER_CONSTANT_RESOURCE_KEY | |
def get_abstr_from_dict(ret, activities_dict, response_header): | |
""" | |
Internal method to get the textual abstraction starting from the computations already performed. | |
""" | |
abstr = ["\n\n"] | |
if response_header: | |
abstr.append( | |
"In the following text, you find the top activities along with their number of occurrences in the event log and the number of unique resources performing them.") | |
abstr.append("The top resources for such activities are included.\n\n") | |
sort_act = sorted([(x, activities_dict[x][0], activities_dict[x][1], ret[x]) for x in ret], | |
key=lambda x: (x[1], x[2], x[0]), reverse=True) | |
for el in sort_act: | |
abstr.append("%s (num.occ=%d ; num.resources=%d)" % (el[0], el[1], el[2])) | |
if el[3]: | |
abstr.append(" top resources=[") | |
this_res = sorted([(x, y) for x, y in el[3].items()], key=lambda z: (z[1], z[0]), reverse=True) | |
for i in range(len(this_res)): | |
if i > 0: | |
abstr.append("; ") | |
abstr.append("%s=%d" % (this_res[i][0], this_res[i][1])) | |
abstr.append("]") | |
abstr.append("\n") | |
abstr.append("\n\n") | |
abstr1 = "".join(abstr) | |
return abstr1 | |
def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> str: | |
""" | |
Textually abstracts the top activities/resources combinations in the event log. | |
Minimum Viable Example: | |
import pm4py | |
from pm4py.algo.querying.llm.abstractions import log_to_resources | |
log = pm4py.read_xes("C:/receipt.xes") | |
res = log_to_resources.apply(log) | |
print(res) | |
Example output: | |
In the following text, you find the top activities along with their number of occurrences in the event log and the number of unique resources performing them.The top resources for such activities are included. | |
Confirmation of receipt (num.occ=1434 ; num.resources=41) top resources=[Resource01=195; admin2=114; Resource02=102; Resource03=87; Resource04=81; Resource07=78; Resource08=74; Resource06=70; Resource05=65; Resource11=58; Resource09=55; Resource15=51; Resource12=49; Resource13=47; Resource14=44; Resource17=43; Resource27=37; Resource16=35; Resource18=29; Resource10=21; Resource21=19; Resource20=18; Resource23=14; Resource22=12; Resource26=7; Resource25=7; Resource30=4; Resource33=2; Resource31=2; Resource29=2; Resource28=2; admin3=1; admin1=1; Resource43=1; Resource42=1; Resource38=1; Resource37=1; Resource36=1; Resource35=1; Resource34=1; Resource19=1] | |
T06 Determine necessity of stop advice (num.occ=1416 ; num.resources=34) top resources=[Resource01=203; Resource02=114; Resource04=85; Resource03=85; Resource05=84; Resource07=83; Resource08=75; Resource06=75; Resource11=74; Resource12=72; Resource09=67; Resource15=58; Resource13=53; Resource14=48; Resource17=43; Resource16=36; Resource18=28; admin2=20; Resource20=18; Resource21=16; Resource22=15; Resource23=14; Resource26=12; Resource25=12; Resource29=6; Resource28=6; Resource37=2; Resource35=2; Resource34=2; Resource33=2; Resource31=2; Resource30=2; test=1; Resource36=1] | |
T02 Check confirmation of receipt (num.occ=1368 ; num.resources=40) top resources=[Resource01=209; Resource02=95; Resource04=91; Resource03=86; Resource06=73; Resource08=65; Resource05=65; Resource19=64; Resource10=62; Resource13=55; Resource09=51; Resource07=50; Resource24=44; Resource12=44; Resource14=43; Resource16=36; Resource17=32; Resource15=32; Resource18=30; Resource11=30; Resource21=18; Resource20=18; Resource22=13; Resource23=12; admin2=9; Resource32=9; Resource25=6; Resource26=5; Resource28=4; Resource30=3; Resource39=2; Resource34=2; Resource31=2; Resource29=2; admin1=1; TEST=1; Resource38=1; Resource36=1; Resource35=1; Resource33=1] | |
T04 Determine confirmation of receipt (num.occ=1307 ; num.resources=37) top resources=[Resource10=240; Resource01=184; Resource03=81; Resource04=68; Resource02=67; Resource06=66; Resource19=61; Resource05=60; Resource07=58; Resource09=46; Resource14=41; Resource12=41; Resource13=40; Resource18=36; Resource16=36; Resource08=31; Resource11=29; Resource15=28; Resource20=18; Resource21=15; Resource17=13; Resource22=12; Resource23=11; admin2=3; Resource26=3; Resource25=3; admin3=2; admin1=2; Resource31=2; Resource29=2; Resource28=2; Resource38=1; Resource36=1; Resource35=1; Resource34=1; Resource33=1; Resource24=1] | |
Parameters | |
---------------- | |
log | |
Log object | |
parameters | |
Parameters of the algorithm, including: | |
- Parameters.ACTIVITY_KEY => the attribute to be used as activity | |
- Parameters.RESOURCE_KEY => the attribute to be used as resource | |
- Parameters.DEFAULT_MIN_ACTIVITIES => minimum number of different activities to include in the textual abstraction | |
- Parameters.ACTIVITY_KEY => attribute of the log to be used as activity | |
- Parameters.RESOURCE_KEY => attribute of the log to be used as resource | |
Returns | |
---------------- | |
textual_abstraction | |
Textual abstraction | |
""" | |
if parameters is None: | |
parameters = {} | |
max_len = exec_utils.get_param_value(Parameters.MAX_LEN, parameters, constants.OPENAI_MAX_LEN) | |
response_header = exec_utils.get_param_value(Parameters.RESPONSE_HEADER, parameters, True) | |
default_min_activities = exec_utils.get_param_value(Parameters.DEFAULT_MIN_ACTIVITIES, parameters, 15) | |
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) | |
resource_key = exec_utils.get_param_value(Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY) | |
log = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME, parameters=parameters) | |
activities = log[activity_key].value_counts().to_dict() | |
activities_unq_resources = log.groupby(activity_key)[resource_key].nunique().to_dict() | |
activities = [(x, y, activities_unq_resources[x]) for x, y in activities.items()] | |
activities_dict = {x[0]: (x[1], x[2]) for x in activities} | |
activities = sorted(activities, key=lambda z: (z[1], z[2], z[0]), reverse=True) | |
ret = {} | |
for i in range(min(len(activities), default_min_activities)): | |
new_ret = copy(ret) | |
new_ret[activities[i][0]] = {} | |
if len(get_abstr_from_dict(new_ret, activities_dict, response_header)) > max_len: | |
break | |
ret = new_ret | |
activities_resources = log.groupby([activity_key, resource_key]).size().to_dict() | |
activities_resources = sorted([(x, y) for x, y in activities_resources.items()], key=lambda z: (z[1], z[0]), | |
reverse=True) | |
for el in activities_resources: | |
new_ret = copy(ret) | |
if el[0][0] not in new_ret: | |
new_ret[el[0][0]] = {} | |
new_ret[el[0][0]][el[0][1]] = el[1] | |
if len(get_abstr_from_dict(new_ret, activities_dict, response_header)) > max_len: | |
break | |
ret = new_ret | |
return get_abstr_from_dict(ret, activities_dict, response_header) | |
if __name__ == "__main__": | |
log = pm4py.read_xes("../../../tests/input_data/receipt.xes") | |
textual_abstraction = apply(log) | |
print(textual_abstraction) | |