Spaces:
Running
Running
File size: 4,158 Bytes
e60e568 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
from typing import Optional, Dict, Any
import pandas as pd
from datetime import datetime
from pm4py.util import pandas_utils
import importlib.util
def apply(parameters: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
"""
Extract a process mining dataframe from all the events recorded in the Windows registry.
CASE ID (case:concept:name) => name of the computer emitting the events.
ACTIVITY (concept:name) => concatenation of the source name of the event and the event identifier
(see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
TIMESTAMP (time:timestamp) => timestamp of generation of the event
RESOURCE (org:resource) => username involved in the event
Returns
----------------
dataframe
Pandas dataframe
"""
if parameters is None:
parameters = {}
import win32com.client
print(":: executing SQL query against the Windows registry. this can take time.")
strComputer = "."
objWMIService = win32com.client.Dispatch("WbemScripting.SWbemLocator")
objSWbemServices = objWMIService.ConnectServer(strComputer, "root\cimv2")
colItems = objSWbemServices.ExecQuery("Select * from Win32_NTLogEvent")
events = []
progress = None
if importlib.util.find_spec("tqdm"):
from tqdm.auto import tqdm
progress = tqdm(total=len(colItems),
desc="extracting Windows events, progress :: ")
for objItem in colItems:
events.append({"category": str(objItem.Properties_("Category")), "categoryString": str(objItem.Properties_("CategoryString")),
"computerName": str(objItem.Properties_("ComputerName")), "eventCode": str(objItem.Properties_("EventCode")),
"eventIdentifier": str(objItem.Properties_("EventIdentifier")), "eventType": str(objItem.Properties_("EventType")),
"logFile": str(objItem.Properties_("LogFile")), "message": str(objItem.Properties_("Message")),
"recordNumber": str(objItem.Properties_("RecordNumber")),
"sourceName": str(objItem.Properties_("SourceName")),
"timeGenerated": datetime.strptime(str(str(objItem.Properties_("TimeGenerated"))).split("+")[0].split("-")[0], "%Y%m%d%H%M%S.%f"),
"timeWritten": datetime.strptime(str(str(objItem.Properties_("TimeWritten"))).split("+")[0].split("-")[0], "%Y%m%d%H%M%S.%f"),
"type": str(str(objItem.Properties_("Type"))), "user": str(str(objItem.Properties_("User")))})
if progress is not None:
progress.update()
if progress is not None:
progress.close()
dataframe = pandas_utils.instantiate_dataframe(events)
dataframe["case:concept:name"] = dataframe["computerName"]
dataframe["time:timestamp"] = dataframe["timeGenerated"]
dataframe["concept:name"] = dataframe["sourceName"] + " " + dataframe["eventIdentifier"]
dataframe["org:resource"] = dataframe["user"]
dataframe = pandas_utils.insert_index(dataframe, "@@index", copy_dataframe=False, reset_index=False)
dataframe = dataframe.sort_values(["time:timestamp", "@@index"])
dataframe["@@case_index"] = dataframe.groupby("case:concept:name", sort=False).ngroup()
dataframe = dataframe.sort_values(["@@case_index", "time:timestamp", "@@index"])
return dataframe
|