Spaces:
Running
Running
File size: 20,013 Bytes
e60e568 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 |
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
import pandas as pd
from pm4py.objects.log.obj import EventLog, EventStream, Trace
from typing import Union, Optional, Dict, Tuple, List, Any
from pm4py.utils import get_properties, constants, check_is_pandas_dataframe
from pm4py.utils import __event_log_deprecation_warning
from pm4py.objects.ocel.obj import OCEL
from sqlite3 import Connection as SQ3_Connection
from tempfile import NamedTemporaryFile
from copy import copy
from pm4py.objects.petri_net.obj import PetriNet, Marking
def openai_query(prompt: str, api_key: Optional[str] = None, openai_model: Optional[str] = None, api_url: Optional[str] = None, **kwargs) -> str:
"""
Executes the provided prompt, obtaining the answer from the OpenAI APIs.
:param prompt: prompt that should be executed
:param api_key: OpenAI API key
:param openai_model: OpenAI model to be used (default: gpt-3.5-turbo)
:param api_url: OpenAI API URL
:rtype: ``str``
.. code-block:: python3
import pm4py
resp = pm4py.llm.openai_query('what is the result of 3+3?', api_key="sk-382393", openai_model="gpt-3.5-turbo")
print(resp)
"""
parameters = copy(kwargs) if kwargs is not None else {}
if api_url is not None:
parameters["api_url"] = api_url
if api_key is not None:
parameters["api_key"] = api_key
if openai_model is not None:
parameters["openai_model"] = openai_model
from pm4py.algo.querying.llm.connectors import openai as perform_query
return perform_query.apply(prompt, parameters=parameters)
def abstract_dfg(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, include_performance: bool = True, relative_frequency: bool = False, response_header: bool = True, primary_performance_aggregation: str = "mean", secondary_performance_aggregation: Optional[str] = None, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Obtains the DFG abstraction of a traditional event log
:param log_obj: log object
:param max_len: maximum length of the (string) abstraction
:param include_performance: (boolean) includes the performance of the paths in the abstraction
:param relative_frequency: (boolean) uses the relative instead of the absolute frequency of the paths
:param response_header: includes a short header before the paths, pointing to the description of the abstraction
:param primary_performance_aggregation: primary aggregation to be used for the arc's performance (default: mean, other options: median, min, max, sum, stdev)
:param secondary_performance_aggregation: (optional) secondary aggregation to be used for the arc's performance (default None, other options: mean, median, min, max, sum, stdev)
:param activity_key: the column to be used as activity
:param timestamp_key: the column to be used as timestamp
:param case_id_key: the column to be used as case identifier
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_dfg(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["include_performance"] = include_performance
parameters["relative_frequency"] = relative_frequency
parameters["response_header"] = response_header
parameters["primary_performance_aggregation"] = primary_performance_aggregation
parameters["secondary_performance_aggregation"] = secondary_performance_aggregation
from pm4py.algo.querying.llm.abstractions import log_to_dfg_descr
return log_to_dfg_descr.apply(log_obj, parameters=parameters)
def abstract_variants(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, include_performance: bool = True, relative_frequency: bool = False, response_header: bool = True, primary_performance_aggregation: str = "mean", secondary_performance_aggregation: Optional[str] = None, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Obtains the variants abstraction of a traditional event log
:param log_obj: log object
:param max_len: maximum length of the (string) abstraction
:param include_performance: (boolean) includes the performance of the variants in the abstraction
:param relative_frequency: (boolean) uses the relative instead of the absolute frequency of the variants
:param response_header: includes a short header before the variants, pointing to the description of the abstraction
:param primary_performance_aggregation: primary aggregation to be used for the arc's performance (default: mean, other options: median, min, max, sum, stdev)
:param secondary_performance_aggregation: (optional) secondary aggregation to be used for the arc's performance (default None, other options: mean, median, min, max, sum, stdev)
:param activity_key: the column to be used as activity
:param timestamp_key: the column to be used as timestamp
:param case_id_key: the column to be used as case identifier
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_variants(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["include_performance"] = include_performance
parameters["relative_frequency"] = relative_frequency
parameters["response_header"] = response_header
parameters["primary_performance_aggregation"] = primary_performance_aggregation
parameters["secondary_performance_aggregation"] = secondary_performance_aggregation
from pm4py.algo.querying.llm.abstractions import log_to_variants_descr
return log_to_variants_descr.apply(log_obj, parameters=parameters)
def abstract_ocel(ocel: OCEL, include_timestamps: bool = True) -> str:
"""
Obtains the abstraction of an object-centric event log, including the list of events and the objects of the OCEL
:param ocel: object-centric event log
:param include_timestamps: (boolean) includes the timestamp information in the abstraction
:rtype: ``str``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel")
print(pm4py.llm.abstract_ocel(ocel))
"""
parameters = {}
parameters["include_timestamps"] = include_timestamps
from pm4py.algo.transformation.ocel.description import algorithm as ocel_description
return ocel_description.apply(ocel, parameters=parameters)
def abstract_ocel_ocdfg(ocel: OCEL, include_header: bool = True, include_timestamps: bool = True, max_len: int = constants.OPENAI_MAX_LEN) -> str:
"""
Obtains the abstraction of an object-centric event log, representing in text the object-centric directly-follows
graph
:param ocel: object-centric event log
:param include_header: (boolean) includes the header in the abstraction
:param include_timestamps: (boolean) includes the timestamp information in the abstraction
:param max_len: maximum length of the abstraction
:rtype: ``str``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel")
print(pm4py.llm.abstract_ocel_ocdfg(ocel))
"""
parameters = {}
parameters["include_header"] = include_header
parameters["include_timestamps"] = include_timestamps
parameters["max_len"] = max_len
from pm4py.algo.querying.llm.abstractions import ocel_ocdfg_descr
return ocel_ocdfg_descr.apply(ocel, parameters=parameters)
def abstract_ocel_features(ocel: OCEL, obj_type: str, include_header: bool = True, max_len: int = constants.OPENAI_MAX_LEN, debug: bool = False, enable_object_lifecycle_paths: bool = True) -> str:
"""
Obtains the abstraction of an object-centric event log, representing in text the features and their values.
:param ocel: object-centric event log
:param obj_type: the object type that should be considered in the feature extraction
:param include_header: (boolean) includes the header in the abstraction
:param max_len: maximum length of the abstraction
:param debug: enables debugging mode (telling at which point of the feature extraction you are)
:param enable_object_lifecycle_paths: enables the "lifecycle paths" feature
:rtype: ``str``
.. code-block:: python3
import pm4py
ocel = pm4py.read_ocel("tests/input_data/ocel/example_log.jsonocel")
print(pm4py.llm.abstract_ocel_ocdfg(ocel))
"""
parameters = {}
parameters["include_header"] = include_header
parameters["max_len"] = max_len
parameters["debug"] = debug
parameters["enable_object_lifecycle_paths"] = enable_object_lifecycle_paths
from pm4py.algo.querying.llm.abstractions import ocel_fea_descr
return ocel_fea_descr.apply(ocel, obj_type, parameters=parameters)
def abstract_event_stream(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, response_header: bool = True, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Obtains the event stream abstraction of a traditional event log
:param log_obj: log object
:param max_len: maximum length of the (string) abstraction
:param response_header: includes a short header before the variants, pointing to the description of the abstraction
:param activity_key: the column to be used as activity
:param timestamp_key: the column to be used as timestamp
:param case_id_key: the column to be used as case identifier
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_event_stream(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["response_header"] = response_header
from pm4py.algo.querying.llm.abstractions import stream_to_descr
return stream_to_descr.apply(log_obj, parameters=parameters)
def abstract_petri_net(net: PetriNet, im: Marking, fm: Marking, response_header: bool = True) -> str:
"""
Obtain an abstraction of a Petri net
:param net: Petri net
:param im: Initial marking
:param fm: Final marking
:param response_header: includes the header of the response
:rtype: ``str``
.. code-block:: python3
import pm4py
net, im, fm = pm4py.read_pnml('tests/input_data/running-example.pnml')
print(pm4py.llm.abstract_petri_net(net, im, fm))
"""
parameters = {}
parameters["response_header"] = response_header
from pm4py.algo.querying.llm.abstractions import net_to_descr
return net_to_descr.apply(net, im, fm, parameters=parameters)
def abstract_log_attributes(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Abstracts the attributes of a log (reporting their name, their type, and the top values)
:param log_obj: log object
:param max_len: maximum length of the (string) abstraction
:param activity_key: the column to be used as activity
:param timestamp_key: the column to be used as timestamp
:param case_id_key: the column to be used as case identifier
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_log_attributes(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
from pm4py.algo.querying.llm.abstractions import log_to_cols_descr
return log_to_cols_descr.apply(log_obj, parameters=parameters)
def abstract_log_features(log_obj: Union[pd.DataFrame, EventLog, EventStream], max_len: int = constants.OPENAI_MAX_LEN, include_header: bool = True, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> str:
"""
Abstracts the machine learning features obtained from a log (reporting the top features until the desired length is obtained)
:param log_obj: log object
:param max_len: maximum length of the (string) abstraction
:param activity_key: the column to be used as activity
:param timestamp_key: the column to be used as timestamp
:param case_id_key: the column to be used as case identifier
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes")
print(pm4py.llm.abstract_log_features(log))
"""
__event_log_deprecation_warning(log_obj)
parameters = get_properties(
log_obj, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["max_len"] = max_len
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import log_to_fea_descr
return log_to_fea_descr.apply(log_obj, parameters=parameters)
def abstract_temporal_profile(temporal_profile: Dict[Tuple[str, str], Tuple[float, float]], include_header: bool = True) -> str:
"""
Abstracts a temporal profile model to a string.
:param temporal_profile: temporal profile model
:param include_header: includes an header in the response, describing the temporal profile
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
temporal_profile = pm4py.discover_temporal_profile(log)
text_abstr = pm4py.llm.abstract_temporal_profile(temporal_profile, include_header=True)
print(text_abstr)
"""
parameters = {}
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import tempprofile_to_descr
return tempprofile_to_descr.apply(temporal_profile, parameters=parameters)
def abstract_case(case: Trace, include_case_attributes: bool = True, include_event_attributes: bool = True, include_timestamp: bool = True, include_header: bool = True, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp") -> str:
"""
Textually abstracts a case
:param case: case object
:param include_case_attributes: (boolean) include or not the attributes at the case level
:param include_event_attributes: (boolean) include or not the attributes at the event level
:param include_timestamp: (boolean) include or not the event timestamp in the abstraction
:param include_header: (boolean) includes the header of the response
:param activity_key: the column to be used as activity
:param timestamp_key: the column to be used as timestamp
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
print(pm4py.llm.abstract_case(log[0]))
"""
parameters = {}
parameters["include_case_attributes"] = include_case_attributes
parameters["include_event_attributes"] = include_event_attributes
parameters["include_timestamp"] = include_timestamp
parameters["include_header"] = include_header
parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = activity_key
parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = timestamp_key
from pm4py.algo.querying.llm.abstractions import case_to_descr
return case_to_descr.apply(case, parameters=parameters)
def abstract_declare(declare_model, include_header: bool = True) -> str:
"""
Textually abstracts a DECLARE model
:param declare: DECLARE model
:param include_header: (boolean) includes the header of the response
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
log_ske = pm4py.discover_declare(log)
print(pm4py.llm.abstract_declare(log_ske))
"""
parameters = {}
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import declare_to_descr
return declare_to_descr.apply(declare_model, parameters=parameters)
def abstract_log_skeleton(log_skeleton, include_header: bool = True) -> str:
"""
Textually abstracts a log skeleton process model
:param log_skeleton: log skeleton
:param include_header: (boolean) includes the header of the response
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/roadtraffic100traces.xes", return_legacy_log_object=True)
log_ske = pm4py.discover_log_skeleton(log)
print(pm4py.llm.abstract_log_skeleton(log_ske))
"""
parameters = {}
parameters["include_header"] = include_header
from pm4py.algo.querying.llm.abstractions import logske_to_descr
return logske_to_descr.apply(log_skeleton, parameters=parameters)
def explain_visualization(vis_saver, *args, connector=openai_query, **kwargs) -> str:
"""
Explains a process mining visualization by using LLMs (saving that first in a .png image, then providing the .png file to the
Large Language Model along with possibly a description of the visualization).
:param vis_saver: the visualizer (saving to disk) to be used
:param args: the mandatory arguments that should be provided to the visualization
:param connector: the connector method to the large language model
:param kwargs: optional parameters of the visualization or the connector (for example, the annotation of the visualization, or the API key)
:rtype: ``str``
.. code-block:: python3
import pm4py
log = pm4py.read_xes("tests/input_data/running-example.xes")
descr = pm4py.llm.explain_visualization(pm4py.save_vis_dotted_chart, log, api_key="sk-5HN", show_legend=False)
print(descr)
"""
F = NamedTemporaryFile(suffix=".png")
image_path = F.name
F.close()
description = vis_saver(*args, image_path, **kwargs)
parameters = copy(kwargs) if kwargs is not None else {}
parameters["image_path"] = image_path
return connector("Could you explain the included process mining visualization?\n\n" + description, **parameters)
|