kota
initial commit
e60e568
raw
history blame
4.16 kB
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
from typing import Optional, Dict, Any
import pandas as pd
from datetime import datetime
from pm4py.util import pandas_utils
import importlib.util
def apply(parameters: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
"""
Extract a process mining dataframe from all the events recorded in the Windows registry.
CASE ID (case:concept:name) => name of the computer emitting the events.
ACTIVITY (concept:name) => concatenation of the source name of the event and the event identifier
(see https://learn.microsoft.com/en-us/previous-versions/windows/desktop/eventlogprov/win32-ntlogevent)
TIMESTAMP (time:timestamp) => timestamp of generation of the event
RESOURCE (org:resource) => username involved in the event
Returns
----------------
dataframe
Pandas dataframe
"""
if parameters is None:
parameters = {}
import win32com.client
print(":: executing SQL query against the Windows registry. this can take time.")
strComputer = "."
objWMIService = win32com.client.Dispatch("WbemScripting.SWbemLocator")
objSWbemServices = objWMIService.ConnectServer(strComputer, "root\cimv2")
colItems = objSWbemServices.ExecQuery("Select * from Win32_NTLogEvent")
events = []
progress = None
if importlib.util.find_spec("tqdm"):
from tqdm.auto import tqdm
progress = tqdm(total=len(colItems),
desc="extracting Windows events, progress :: ")
for objItem in colItems:
events.append({"category": str(objItem.Properties_("Category")), "categoryString": str(objItem.Properties_("CategoryString")),
"computerName": str(objItem.Properties_("ComputerName")), "eventCode": str(objItem.Properties_("EventCode")),
"eventIdentifier": str(objItem.Properties_("EventIdentifier")), "eventType": str(objItem.Properties_("EventType")),
"logFile": str(objItem.Properties_("LogFile")), "message": str(objItem.Properties_("Message")),
"recordNumber": str(objItem.Properties_("RecordNumber")),
"sourceName": str(objItem.Properties_("SourceName")),
"timeGenerated": datetime.strptime(str(str(objItem.Properties_("TimeGenerated"))).split("+")[0].split("-")[0], "%Y%m%d%H%M%S.%f"),
"timeWritten": datetime.strptime(str(str(objItem.Properties_("TimeWritten"))).split("+")[0].split("-")[0], "%Y%m%d%H%M%S.%f"),
"type": str(str(objItem.Properties_("Type"))), "user": str(str(objItem.Properties_("User")))})
if progress is not None:
progress.update()
if progress is not None:
progress.close()
dataframe = pandas_utils.instantiate_dataframe(events)
dataframe["case:concept:name"] = dataframe["computerName"]
dataframe["time:timestamp"] = dataframe["timeGenerated"]
dataframe["concept:name"] = dataframe["sourceName"] + " " + dataframe["eventIdentifier"]
dataframe["org:resource"] = dataframe["user"]
dataframe = pandas_utils.insert_index(dataframe, "@@index", copy_dataframe=False, reset_index=False)
dataframe = dataframe.sort_values(["time:timestamp", "@@index"])
dataframe["@@case_index"] = dataframe.groupby("case:concept:name", sort=False).ngroup()
dataframe = dataframe.sort_values(["@@case_index", "time:timestamp", "@@index"])
return dataframe