SOC-AI / format_input.py
karimaloulou's picture
Upload 3 files
a039a4b verified
raw
history blame
10.1 kB
import re
import sys
import os
# Add the project root to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from location.IP_Checker import get_all_details
def append_severity_definition(log_text):
severity_definitions = {
"Information": "The event provides useful context but is not critical.",
"Notice": "Signifies noteworthy event that do not require immediate action.",
"Warning": "Indicates potential issue that should be addressed and should be investigated before escalation.",
"Error": "Indicates a problem that needs to be resolved and impacts system functionality but doesn't cause immediate service disruption.",
"Critical": "Indicates a severe issue that causes significant loss of service and requires immediate attention.",
}
match = re.search(r"Severity:\s*(\S+)", log_text)
if match:
severity = match.group(1)
definition = severity_definitions.get(severity)
if definition:
log_text = re.sub(
r"(Severity:\s*" + re.escape(severity) + r")\n",
r"\1\n - Severity Definition: " + definition + "\n",
log_text,
)
return log_text
def format_sophos_row(row):
def get_value(key, default="Unknown"):
return row.get(key, default)
return f"""
- Device Information
- Device Name: {get_value("device_name")}
- Device Model: {get_value("device_model")}
- Device Serial ID: {get_value("device_serial_id")}
- Connection Details
- Source IP: {get_value("src_ip")}
- Source Country: {get_value("src_country", "Unknown") if get_value("src_country") != 'R1' else "Unknown"}
- Destination IP: {get_value("dst_ip")}
- Destination Country: {get_value("dst_country", "Unknown") if get_value("dst_country") != 'R1' else "Unknown"}
- Protocol: {get_value("protocol")}
- Source Port: {get_value("src_port")}
- Destination Port: {get_value("dst_port")}
- Timestamp: {get_value("timestamp")}
- Log Information
- Log Type: {get_value("log_type")}
- Log Component: {get_value("log_component")}
- Log Subtype: {get_value("log_subtype")}
- Severity: {get_value("severity")}
- Additional Information
- Heartbeat Status: {get_value("hb_status")}
- Application Resolved By: {get_value("app_resolved_by")}
- Application Is Cloud: {get_value("app_is_cloud")}
"""
def format_azure_sign_in_logs(data_row):
def get_value(key, default="Unknown"):
return data_row.get(key, default)
device_detail = (
f"\n- Device Detail:"
f"\n Device ID: {get_value('deviceDetail_deviceId')}"
f"\n Display Name: {get_value('deviceDetail_displayName')}"
f"\n Operating System: {get_value('deviceDetail_operatingSystem')}"
f"\n Browser: {get_value('deviceDetail_browser')}"
f"\n Compliance: {get_value('deviceDetail_isCompliant')}"
f"\n Managed: {get_value('deviceDetail_isManaged')}"
f"\n Trust Type: {get_value('deviceDetail_trustType')}"
)
location = (
f"\n- Location:"
f"\n City: {get_value('location_city')}"
f"\n State: {get_value('location_state')}"
f"\n Country/Region: {get_value('location_countryOrRegion')}"
f"\n Latitude: {get_value('location_geoCoordinates_latitude')}"
f"\n Longitude: {get_value('location_geoCoordinates_longitude')}"
)
return (
f"Created Date: {get_value('createdDateTime')}\n"
f"User: {get_value('userDisplayName')}\n"
f"User Principal Name: {get_value('userPrincipalName')}\n"
f"User ID: {get_value('userId')}\n"
f"Application ID: {get_value('appId')}\n"
f"Application Display Name: {get_value('appDisplayName')}\n"
f"IP Address: {get_value('ipAddress')}\n"
f"More information about Location:{get_all_details({get_value('ipAddress')})}"
f"Client App Used: {get_value('clientAppUsed')}\n"
f"Conditional Access Status: {get_value('conditionalAccessStatus')}\n"
f"{device_detail}\n"
f"{location}\n"
f"- Status: {get_value('status_additionalDetails')}\n"
)
def format_palo_alto_logs(data_row):
def get_value(key, default="Unknown"):
return data_row.get(key, default)
core_identifiers = (
f"Source: {get_value('src')}\n"
f"Destination: {get_value('dest')}\n"
f"Source IP: {get_value('src_ip')}\n"
f"Destination IP: {get_value('dest_ip')}\n"
f"Source Port: {get_value('src_port')}\n"
f"Destination Port: {get_value('dest_port')}\n"
f"Protocol: {get_value('protocol')}\n"
)
traffic_volume = (
f"Bytes: {get_value('bytes')}\n"
f"Bytes In: {get_value('bytes_in')}\n"
f"Bytes Out: {get_value('bytes_out')}\n"
f"Packets: {get_value('packets')}\n"
f"Packets In: {get_value('packets_in')}\n"
f"Packets Out: {get_value('packets_out')}\n"
)
temporal_info = (
f"Start Time: {get_value('start_time')}\n"
f"Date: {get_value('date_year')}-{get_value('date_month')}-{get_value('date_mday')} "
f"{get_value('date_hour')}:{get_value('date_minute')}:{get_value('date_second')}\n"
f"Duration: {get_value('duration')}\n"
)
network_device_info = (
f"Source Zone: {get_value('src_zone')}\n"
f"Destination Zone: {get_value('dest_zone')}\n"
f"Source Interface: {get_value('src_interface')}\n"
f"Destination Interface: {get_value('dest_interface')}\n"
f"Device ID: {get_value('dvc')}\n"
f"Device Name: {get_value('dvc_name')}\n"
)
app_user_info = (
f"Application: {get_value('application')}\n"
f"User: {get_value('user')}\n"
f"User Agent: {get_value('user_agent')}\n"
)
security_info = (
f"Action: {get_value('action')}\n"
f"Severity: {get_value('severity')}\n"
f"Threat: {get_value('threat')}\n"
f"Threat Category: {get_value('threat_category')}\n"
f"Signature: {get_value('signature')}\n"
f"Signature ID: {get_value('signature_id')}\n"
)
return (
f"{core_identifiers}\n"
f"{traffic_volume}\n"
f"{temporal_info}\n"
f"{network_device_info}\n"
f"{app_user_info}\n"
f"{security_info}\n"
)
def format_office365_logs(data_row):
def get_value(key, default="Unknown"):
return data_row.get(key, default)
actor_info = (
f"- Actor Information:\n"
f" - User ID: {get_value('UserId')}\n"
f" - User Key: {get_value('UserKey')}\n"
f" - User Type: {get_value('UserType')}\n"
f" - User Principal Name: {get_value('UserPrincipalName')}\n"
f" - Actor IP Address: {get_value('ActorIpAddress')}\n"
)
device_info = (
f"- Device Information:\n"
f" - Client IP: {get_value('ClientIP')}\n"
f" - Client App ID: {get_value('ClientAppId')}\n"
f" - Client App Name: {get_value('AppAccessContext.ClientAppName')}\n"
f" - Device ID: {get_value('DeviceId')}\n"
f" - Device Name: {get_value('DeviceName')}\n"
f" - Device Operating System: {get_value('DeviceOperatingSystem')}\n"
)
operation_info = (
f"- Operation Information:\n"
f" - Operation: {get_value('Operation')}\n"
f" - Operation Properties: {get_value('OperationProperties')}\n"
f" - Object ID: {get_value('ObjectId')}\n"
f" - Object Type: {get_value('ObjectType')}\n"
f" - Object Name: {get_value('ObjectName')}\n"
)
policy_info = (
f"- Policy Information:\n"
f" - Policy Details: {get_value('PolicyDetails')}\n"
f" - Policy Identifier: {get_value('PolicyIdentifierString')}\n"
f" - Policy Last Updated Time: {get_value('PolicyLastUpdatedTime')}\n"
)
event_info = (
f"- Event Information:\n"
f" - Creation Time: {get_value('CreationTime')}\n"
f" - Result Status: {get_value('ResultStatus')}\n"
f" - Record Type: {get_value('RecordType')}\n"
f" - Request ID: {get_value('RequestId')}\n"
f" - Organization ID: {get_value('OrganizationId')}\n"
f" - Organization Name: {get_value('OrganizationName')}\n"
f" - Tenant ID: {get_value('TenantId')}\n"
)
additional_info = (
f"- Additional Information:\n"
f" - App Display Name: {get_value('ApplicationDisplayName')}\n"
f" - User Agent: {get_value('UserAgent')}\n"
f" - Session ID: {get_value('SessionId')}\n"
)
return (
f"{actor_info}\n"
f"{device_info}\n"
f"{operation_info}\n"
f"{policy_info}\n"
f"{event_info}\n"
f"{additional_info}\n"
)
def flatten_json(json_data, parent_key="", separator="_"):
def _flatten(obj, parent_key=""):
items = {}
for k, v in obj.items():
new_key = f"{parent_key}{separator}{k}" if parent_key else k
if isinstance(v, dict):
items.update(_flatten(v, new_key))
else:
items[new_key] = v
return items
return _flatten(json_data, parent_key)
def detect_log_type(log_data):
if "device_name" in log_data and "src_ip" in log_data and "dst_ip" in log_data:
return "sophos"
elif "userPrincipalName" in log_data and "ipAddress" in log_data:
return "azure-sign-in"
elif "src" in log_data and "dest" in log_data and "severity" in log_data:
return "palo-alto"
elif "UserId" in log_data and "ClientIP" in log_data and "Operation" in log_data:
return "office365"
else:
return "Unknown"