File size: 9,322 Bytes
1e8f979
527cbfe
1e8f979
ca30f38
ce32810
1e8f979
3235017
ce32810
 
 
182de3d
 
 
 
 
ce32810
347f661
182de3d
347f661
 
 
 
 
 
 
 
 
 
 
ce32810
 
 
 
 
 
 
347f661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182de3d
ca30f38
ce32810
 
 
 
 
 
ca30f38
 
347f661
ca30f38
182de3d
347f661
ca30f38
182de3d
347f661
ce32810
182de3d
347f661
ce32810
182de3d
347f661
 
182de3d
347f661
ca30f38
 
1e8f979
347f661
ca30f38
182de3d
ca30f38
0713ea2
ca30f38
 
 
0713ea2
ca30f38
 
 
 
 
 
347f661
ca30f38
182de3d
 
ce32810
0713ea2
ca30f38
ce32810
0713ea2
ca30f38
ce32810
0713ea2
ca30f38
ce32810
0713ea2
ca30f38
ce32810
0713ea2
ca30f38
ce32810
0713ea2
ca30f38
 
0713ea2
ca30f38
182de3d
 
ca30f38
182de3d
 
0713ea2
 
 
 
 
 
 
 
 
 
 
 
ca30f38
 
1e8f979
ca30f38
527cbfe
 
bec2869
527cbfe
 
 
bec2869
527cbfe
 
8cd8a06
ca30f38
 
8cd8a06
ca30f38
8cd8a06
ca30f38
1e8f979
 
ca30f38
1e8f979
347f661
527cbfe
f230245
ca30f38
1e8f979
527cbfe
 
182de3d
527cbfe
ca30f38
182de3d
1e8f979
25824c1
182de3d
1e8f979
 
182de3d
1e8f979
04e48f0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os
import pandas as pd
import gradio as gr
from datetime import datetime
from transformers import pipeline

qa_pipeline = pipeline("question-answering", model="deepset/deberta-v3-large-squad2")

# Function to analyze each section with prompts
def prompt_based_analysis(question, context):
    try:
        response = qa_pipeline(question=question, context=context)
        return response['answer']
    except Exception as e:
        return f"Error analyzing data: {str(e)}"

# Function to analyze audit data for GDPR compliance using the GDPR framework
def analyze_gdpr_compliance(audit_data):
    # GDPR Principles
    principles = {
        "Lawfulness, Fairness, and Transparency": "Ensure that data processing is done lawfully, fairly, and in a transparent manner.",
        "Purpose Limitation": "Ensure that data collected is for specified, explicit, and legitimate purposes.",
        "Data Minimization": "Ensure that data collected is adequate, relevant, and limited to what is necessary.",
        "Accuracy": "Ensure that personal data is accurate and up to date.",
        "Storage Limitation": "Ensure that personal data is kept no longer than necessary.",
        "Integrity and Confidentiality": "Ensure that personal data is processed securely to prevent unauthorized access, loss, or destruction."
    }

    # Prompt context from audit data
    system_info_context = f"OS Version: {audit_data.get('os_version', 'Unknown')}, Architecture: {audit_data.get('architecture', 'Unknown')}, Memory: {audit_data.get('memory', 'Unknown')}"
    disk_usage_context = f"Disk Usage: {audit_data.get('disk_usage', {}).get('usage_percent', 'Unknown')}%"
    network_info_context = f"Interfaces: {', '.join(audit_data.get('network_info', {}).get('interfaces', []))}"
    security_measures_context = f"Encryption: {audit_data.get('security_measures', {}).get('encryption', False)}, Data Anonymization: {audit_data.get('security_measures', {}).get('data_anonymization', False)}"
    processes_context = f"Running Processes: {', '.join(audit_data.get('running_processes', []))}"
    software_inventory_context = f"Software Installed: {', '.join(audit_data.get('software_inventory', []))}"

    # Prompts for GDPR principles applied to sections
    system_info_analysis = prompt_based_analysis(
        f"Evaluate the system information in terms of GDPR compliance focusing on {principles['Lawfulness, Fairness, and Transparency']} and {principles['Purpose Limitation']}.", system_info_context
    )
    disk_usage_analysis = prompt_based_analysis(
        f"Evaluate the disk usage under the {principles['Storage Limitation']} principle and ensure compliance.", disk_usage_context
    )
    network_info_analysis = prompt_based_analysis(
        f"Evaluate the network interfaces with respect to {principles['Integrity and Confidentiality']}, identifying any potential security risks.", network_info_context
    )
    security_measures_analysis = prompt_based_analysis(
        f"Analyze the encryption and anonymization methods under the {principles['Integrity and Confidentiality']} principle, identifying any weaknesses.", security_measures_context
    )
    processes_analysis = prompt_based_analysis(
        f"Evaluate the running processes for GDPR compliance under {principles['Lawfulness, Fairness, and Transparency']}, focusing on unauthorized or risky processes.", processes_context
    )
    software_inventory_analysis = prompt_based_analysis(
        f"Assess the installed software for GDPR compliance focusing on {principles['Accuracy']} and {principles['Integrity and Confidentiality']}.", software_inventory_context
    )

    # Findings organized by section
    findings = {
        "system_info": system_info_analysis,
        "disk_usage": disk_usage_analysis,
        "network_info": network_info_analysis,
        "security_measures": security_measures_analysis,
        "running_processes": processes_analysis,
        "software_inventory": software_inventory_analysis,
    }
    
    # Detailed Recommendations based on the findings
    recommendations = []
    if "Unknown" in system_info_context:
        recommendations.append("Review System Information: Ensure that the OS, architecture, and memory configurations are well documented and up to date in accordance with GDPR transparency requirements.")
    
    if 'not encrypted' in security_measures_analysis.lower() or 'no encryption' in security_measures_analysis.lower():
        recommendations.append("Implement Encryption: Ensure that both stored and transmitted data are encrypted to meet GDPR security requirements.")
    
    if 'not anonymized' in security_measures_analysis.lower():
        recommendations.append("Implement Data Anonymization: Ensure that sensitive data is anonymized during storage to comply with GDPR's confidentiality principle.")
    
    if 'outdated' in software_inventory_analysis.lower() or 'vulnerable' in software_inventory_analysis.lower():
        recommendations.append("Update Software: Ensure that all installed software is up to date and free from known vulnerabilities to maintain the integrity and confidentiality of personal data.")

    if 'vulnerable processes' in processes_analysis.lower() or 'unauthorized processes' in processes_analysis.lower():
        recommendations.append("Review Running Processes: Regularly audit running processes and ensure that no unauthorized or risky processes are running to maintain GDPR compliance.")
    
    return findings, recommendations

# Generate GDPR Compliance Report with advanced prompts for each section using the GDPR framework
def generate_gdpr_report(audit_data, company_name="Company Name", system_name="System Name"):
    findings, recommendations = analyze_gdpr_compliance(audit_data)
    
    report_content = """
    GDPR Compliance Evaluation Report
    
    Title: GDPR Compliance Evaluation Report
    Date: {date}
    Prepared by: [Your Name]
    For: {company_name}
    
    Executive Summary:
    This report evaluates the compliance of {company_name} with the General Data Protection Regulation (GDPR).
    Based on the system audit and analysis of data handling processes, this report provides findings, identifies compliance gaps,
    and suggests recommendations to enhance GDPR adherence based on the key principles of GDPR, such as Lawfulness, Fairness, Transparency, Purpose Limitation, Data Minimization, Accuracy, Storage Limitation, and Integrity & Confidentiality.
    
    Key Findings:
    
    System Information Analysis:
    {system_info}
    
    Disk Usage Analysis:
    {disk_usage}
    
    Network Info Analysis:
    {network_info}
    
    Security Measures Analysis:
    {security_measures}
    
    Running Processes Analysis:
    {running_processes}
    
    Software Inventory Analysis:
    {software_inventory}
    
    Recommendations:
    {recommendations}
    
    Conclusion:
    The analysis shows that while {company_name} has some strong protective measures in place, there are several areas for improvement. Implementing the suggested recommendations will enhance {company_name}'s compliance with GDPR and reduce potential risks of non-compliance.
    
    References:
    - GDPR Regulation (EU) 2016/679
    - System Audit Report, {date}
    """.format(
        date=datetime.now().strftime('%Y-%m-%d'),
        company_name=company_name,
        system_info=findings['system_info'],
        disk_usage=findings['disk_usage'],
        network_info=findings['network_info'],
        security_measures=findings['security_measures'],
        running_processes=findings['running_processes'],
        software_inventory=findings['software_inventory'],
        recommendations=''.join(f'- {rec}\n' for rec in recommendations)
    )
    
    return report_content

# Analyze CSV file input and convert it to JSON-like dictionary for processing
def analyze_csv_file(file_obj):
    # Read the CSV file into a pandas DataFrame
    try:
        df = pd.read_csv(file_obj)
    except Exception as e:
        raise ValueError(f"Error reading CSV file: {str(e)}")
    
    # Convert DataFrame to dictionary for processing
    audit_data = df.to_dict(orient='records')
    
    # Convert the dictionary to a JSON-like structure suitable for analysis
    audit_data_json = {}
    for record in audit_data:
        audit_data_json.update(record)
    
    return audit_data_json

# Gradio Interface
with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown("# GDPR Compliance Evaluation\n### Upload Audit Data in CSV Format")
        csv_file = gr.File(label="Upload CSV file")
        
        gdpr_compliance = gr.Textbox(lines=10, placeholder="GDPR Compliance Analysis...", label="GDPR Compliance Analysis")
        
        def run_compliance_checks(csv_file):
            if csv_file is None:
                return "No file uploaded"
            audit_data = analyze_csv_file(csv_file)
            gdpr_report = generate_gdpr_report(audit_data)
            return gdpr_report
        
        check_compliance_btn = gr.Button("Run Compliance Checks")
        check_compliance_btn.click(run_compliance_checks, inputs=[csv_file], outputs=[gdpr_compliance])
        
        clear_btn = gr.Button("Clear")
        clear_btn.click(lambda: "", None, [gdpr_compliance])

demo.launch(share=True)