Spaces:
Sleeping
Sleeping
import os | |
import pandas as pd | |
import gradio as gr | |
from datetime import datetime | |
from transformers import pipeline | |
qa_pipeline = pipeline("question-answering", model="deepset/deberta-v3-large-squad2") | |
# Function to analyze each section with prompts | |
def prompt_based_analysis(question, context): | |
try: | |
response = qa_pipeline(question=question, context=context) | |
return response['answer'] | |
except Exception as e: | |
return f"Error analyzing data: {str(e)}" | |
# Function to analyze audit data for GDPR compliance using the GDPR framework | |
def analyze_gdpr_compliance(audit_data): | |
# GDPR Principles | |
principles = { | |
"Lawfulness, Fairness, and Transparency": "Ensure that data processing is done lawfully, fairly, and in a transparent manner.", | |
"Purpose Limitation": "Ensure that data collected is for specified, explicit, and legitimate purposes.", | |
"Data Minimization": "Ensure that data collected is adequate, relevant, and limited to what is necessary.", | |
"Accuracy": "Ensure that personal data is accurate and up to date.", | |
"Storage Limitation": "Ensure that personal data is kept no longer than necessary.", | |
"Integrity and Confidentiality": "Ensure that personal data is processed securely to prevent unauthorized access, loss, or destruction." | |
} | |
# Prompt context from audit data | |
system_info_context = f"OS Version: {audit_data.get('os_version', 'Unknown')}, Architecture: {audit_data.get('architecture', 'Unknown')}, Memory: {audit_data.get('memory', 'Unknown')}" | |
disk_usage_context = f"Disk Usage: {audit_data.get('disk_usage', {}).get('usage_percent', 'Unknown')}%" | |
network_info_context = f"Interfaces: {', '.join(audit_data.get('network_info', {}).get('interfaces', []))}" | |
security_measures_context = f"Encryption: {audit_data.get('security_measures', {}).get('encryption', False)}, Data Anonymization: {audit_data.get('security_measures', {}).get('data_anonymization', False)}" | |
processes_context = f"Running Processes: {', '.join(audit_data.get('running_processes', []))}" | |
software_inventory_context = f"Software Installed: {', '.join(audit_data.get('software_inventory', []))}" | |
# Prompts for GDPR principles applied to sections | |
system_info_analysis = prompt_based_analysis( | |
f"Evaluate the system information in terms of GDPR compliance focusing on {principles['Lawfulness, Fairness, and Transparency']} and {principles['Purpose Limitation']}.", system_info_context | |
) | |
disk_usage_analysis = prompt_based_analysis( | |
f"Evaluate the disk usage under the {principles['Storage Limitation']} principle and ensure compliance.", disk_usage_context | |
) | |
network_info_analysis = prompt_based_analysis( | |
f"Evaluate the network interfaces with respect to {principles['Integrity and Confidentiality']}, identifying any potential security risks.", network_info_context | |
) | |
security_measures_analysis = prompt_based_analysis( | |
f"Analyze the encryption and anonymization methods under the {principles['Integrity and Confidentiality']} principle, identifying any weaknesses.", security_measures_context | |
) | |
processes_analysis = prompt_based_analysis( | |
f"Evaluate the running processes for GDPR compliance under {principles['Lawfulness, Fairness, and Transparency']}, focusing on unauthorized or risky processes.", processes_context | |
) | |
software_inventory_analysis = prompt_based_analysis( | |
f"Assess the installed software for GDPR compliance focusing on {principles['Accuracy']} and {principles['Integrity and Confidentiality']}.", software_inventory_context | |
) | |
# Findings organized by section | |
findings = { | |
"system_info": system_info_analysis, | |
"disk_usage": disk_usage_analysis, | |
"network_info": network_info_analysis, | |
"security_measures": security_measures_analysis, | |
"running_processes": processes_analysis, | |
"software_inventory": software_inventory_analysis, | |
} | |
# Detailed Recommendations based on the findings | |
recommendations = [] | |
if "Unknown" in system_info_context: | |
recommendations.append("Review System Information: Ensure that the OS, architecture, and memory configurations are well documented and up to date in accordance with GDPR transparency requirements.") | |
if 'not encrypted' in security_measures_analysis.lower() or 'no encryption' in security_measures_analysis.lower(): | |
recommendations.append("Implement Encryption: Ensure that both stored and transmitted data are encrypted to meet GDPR security requirements.") | |
if 'not anonymized' in security_measures_analysis.lower(): | |
recommendations.append("Implement Data Anonymization: Ensure that sensitive data is anonymized during storage to comply with GDPR's confidentiality principle.") | |
if 'outdated' in software_inventory_analysis.lower() or 'vulnerable' in software_inventory_analysis.lower(): | |
recommendations.append("Update Software: Ensure that all installed software is up to date and free from known vulnerabilities to maintain the integrity and confidentiality of personal data.") | |
if 'vulnerable processes' in processes_analysis.lower() or 'unauthorized processes' in processes_analysis.lower(): | |
recommendations.append("Review Running Processes: Regularly audit running processes and ensure that no unauthorized or risky processes are running to maintain GDPR compliance.") | |
return findings, recommendations | |
# Generate GDPR Compliance Report with advanced prompts for each section using the GDPR framework | |
def generate_gdpr_report(audit_data, company_name="Company Name", system_name="System Name"): | |
findings, recommendations = analyze_gdpr_compliance(audit_data) | |
report_content = """ | |
GDPR Compliance Evaluation Report | |
Title: GDPR Compliance Evaluation Report | |
Date: {date} | |
Prepared by: [Your Name] | |
For: {company_name} | |
Executive Summary: | |
This report evaluates the compliance of {company_name} with the General Data Protection Regulation (GDPR). | |
Based on the system audit and analysis of data handling processes, this report provides findings, identifies compliance gaps, | |
and suggests recommendations to enhance GDPR adherence based on the key principles of GDPR, such as Lawfulness, Fairness, Transparency, Purpose Limitation, Data Minimization, Accuracy, Storage Limitation, and Integrity & Confidentiality. | |
Key Findings: | |
System Information Analysis: | |
{system_info} | |
Disk Usage Analysis: | |
{disk_usage} | |
Network Info Analysis: | |
{network_info} | |
Security Measures Analysis: | |
{security_measures} | |
Running Processes Analysis: | |
{running_processes} | |
Software Inventory Analysis: | |
{software_inventory} | |
Recommendations: | |
{recommendations} | |
Conclusion: | |
The analysis shows that while {company_name} has some strong protective measures in place, there are several areas for improvement. Implementing the suggested recommendations will enhance {company_name}'s compliance with GDPR and reduce potential risks of non-compliance. | |
References: | |
- GDPR Regulation (EU) 2016/679 | |
- System Audit Report, {date} | |
""".format( | |
date=datetime.now().strftime('%Y-%m-%d'), | |
company_name=company_name, | |
system_info=findings['system_info'], | |
disk_usage=findings['disk_usage'], | |
network_info=findings['network_info'], | |
security_measures=findings['security_measures'], | |
running_processes=findings['running_processes'], | |
software_inventory=findings['software_inventory'], | |
recommendations=''.join(f'- {rec}\n' for rec in recommendations) | |
) | |
return report_content | |
# Analyze CSV file input and convert it to JSON-like dictionary for processing | |
def analyze_csv_file(file_obj): | |
# Read the CSV file into a pandas DataFrame | |
try: | |
df = pd.read_csv(file_obj) | |
except Exception as e: | |
raise ValueError(f"Error reading CSV file: {str(e)}") | |
# Convert DataFrame to dictionary for processing | |
audit_data = df.to_dict(orient='records') | |
# Convert the dictionary to a JSON-like structure suitable for analysis | |
audit_data_json = {} | |
for record in audit_data: | |
audit_data_json.update(record) | |
return audit_data_json | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
with gr.Column(): | |
gr.Markdown("# GDPR Compliance Evaluation\n### Upload Audit Data in CSV Format") | |
csv_file = gr.File(label="Upload CSV file") | |
gdpr_compliance = gr.Textbox(lines=10, placeholder="GDPR Compliance Analysis...", label="GDPR Compliance Analysis") | |
def run_compliance_checks(csv_file): | |
if csv_file is None: | |
return "No file uploaded" | |
audit_data = analyze_csv_file(csv_file) | |
gdpr_report = generate_gdpr_report(audit_data) | |
return gdpr_report | |
check_compliance_btn = gr.Button("Run Compliance Checks") | |
check_compliance_btn.click(run_compliance_checks, inputs=[csv_file], outputs=[gdpr_compliance]) | |
clear_btn = gr.Button("Clear") | |
clear_btn.click(lambda: "", None, [gdpr_compliance]) | |
demo.launch(share=True) | |