import os import pandas as pd import gradio as gr from datetime import datetime from transformers import pipeline qa_pipeline = pipeline("question-answering", model="deepset/deberta-v3-large-squad2") # Function to analyze each section with prompts def prompt_based_analysis(question, context): try: response = qa_pipeline(question=question, context=context) return response['answer'] except Exception as e: return f"Error analyzing data: {str(e)}" # Function to analyze audit data for GDPR compliance using the GDPR framework def analyze_gdpr_compliance(audit_data): # GDPR Principles principles = { "Lawfulness, Fairness, and Transparency": "Ensure that data processing is done lawfully, fairly, and in a transparent manner.", "Purpose Limitation": "Ensure that data collected is for specified, explicit, and legitimate purposes.", "Data Minimization": "Ensure that data collected is adequate, relevant, and limited to what is necessary.", "Accuracy": "Ensure that personal data is accurate and up to date.", "Storage Limitation": "Ensure that personal data is kept no longer than necessary.", "Integrity and Confidentiality": "Ensure that personal data is processed securely to prevent unauthorized access, loss, or destruction." } # Prompt context from audit data system_info_context = f"OS Version: {audit_data.get('os_version', 'Unknown')}, Architecture: {audit_data.get('architecture', 'Unknown')}, Memory: {audit_data.get('memory', 'Unknown')}" disk_usage_context = f"Disk Usage: {audit_data.get('disk_usage', {}).get('usage_percent', 'Unknown')}%" network_info_context = f"Interfaces: {', '.join(audit_data.get('network_info', {}).get('interfaces', []))}" security_measures_context = f"Encryption: {audit_data.get('security_measures', {}).get('encryption', False)}, Data Anonymization: {audit_data.get('security_measures', {}).get('data_anonymization', False)}" processes_context = f"Running Processes: {', '.join(audit_data.get('running_processes', []))}" software_inventory_context = f"Software Installed: {', '.join(audit_data.get('software_inventory', []))}" # Prompts for GDPR principles applied to sections system_info_analysis = prompt_based_analysis( f"Evaluate the system information in terms of GDPR compliance focusing on {principles['Lawfulness, Fairness, and Transparency']} and {principles['Purpose Limitation']}.", system_info_context ) disk_usage_analysis = prompt_based_analysis( f"Evaluate the disk usage under the {principles['Storage Limitation']} principle and ensure compliance.", disk_usage_context ) network_info_analysis = prompt_based_analysis( f"Evaluate the network interfaces with respect to {principles['Integrity and Confidentiality']}, identifying any potential security risks.", network_info_context ) security_measures_analysis = prompt_based_analysis( f"Analyze the encryption and anonymization methods under the {principles['Integrity and Confidentiality']} principle, identifying any weaknesses.", security_measures_context ) processes_analysis = prompt_based_analysis( f"Evaluate the running processes for GDPR compliance under {principles['Lawfulness, Fairness, and Transparency']}, focusing on unauthorized or risky processes.", processes_context ) software_inventory_analysis = prompt_based_analysis( f"Assess the installed software for GDPR compliance focusing on {principles['Accuracy']} and {principles['Integrity and Confidentiality']}.", software_inventory_context ) # Findings organized by section findings = { "system_info": system_info_analysis, "disk_usage": disk_usage_analysis, "network_info": network_info_analysis, "security_measures": security_measures_analysis, "running_processes": processes_analysis, "software_inventory": software_inventory_analysis, } # Detailed Recommendations based on the findings recommendations = [] if "Unknown" in system_info_context: recommendations.append("Review System Information: Ensure that the OS, architecture, and memory configurations are well documented and up to date in accordance with GDPR transparency requirements.") if 'not encrypted' in security_measures_analysis.lower() or 'no encryption' in security_measures_analysis.lower(): recommendations.append("Implement Encryption: Ensure that both stored and transmitted data are encrypted to meet GDPR security requirements.") if 'not anonymized' in security_measures_analysis.lower(): recommendations.append("Implement Data Anonymization: Ensure that sensitive data is anonymized during storage to comply with GDPR's confidentiality principle.") if 'outdated' in software_inventory_analysis.lower() or 'vulnerable' in software_inventory_analysis.lower(): recommendations.append("Update Software: Ensure that all installed software is up to date and free from known vulnerabilities to maintain the integrity and confidentiality of personal data.") if 'vulnerable processes' in processes_analysis.lower() or 'unauthorized processes' in processes_analysis.lower(): recommendations.append("Review Running Processes: Regularly audit running processes and ensure that no unauthorized or risky processes are running to maintain GDPR compliance.") return findings, recommendations # Generate GDPR Compliance Report with advanced prompts for each section using the GDPR framework def generate_gdpr_report(audit_data, company_name="Company Name", system_name="System Name"): findings, recommendations = analyze_gdpr_compliance(audit_data) report_content = """ GDPR Compliance Evaluation Report Title: GDPR Compliance Evaluation Report Date: {date} Prepared by: [Your Name] For: {company_name} Executive Summary: This report evaluates the compliance of {company_name} with the General Data Protection Regulation (GDPR). Based on the system audit and analysis of data handling processes, this report provides findings, identifies compliance gaps, and suggests recommendations to enhance GDPR adherence based on the key principles of GDPR, such as Lawfulness, Fairness, Transparency, Purpose Limitation, Data Minimization, Accuracy, Storage Limitation, and Integrity & Confidentiality. Key Findings: System Information Analysis: {system_info} Disk Usage Analysis: {disk_usage} Network Info Analysis: {network_info} Security Measures Analysis: {security_measures} Running Processes Analysis: {running_processes} Software Inventory Analysis: {software_inventory} Recommendations: {recommendations} Conclusion: The analysis shows that while {company_name} has some strong protective measures in place, there are several areas for improvement. Implementing the suggested recommendations will enhance {company_name}'s compliance with GDPR and reduce potential risks of non-compliance. References: - GDPR Regulation (EU) 2016/679 - System Audit Report, {date} """.format( date=datetime.now().strftime('%Y-%m-%d'), company_name=company_name, system_info=findings['system_info'], disk_usage=findings['disk_usage'], network_info=findings['network_info'], security_measures=findings['security_measures'], running_processes=findings['running_processes'], software_inventory=findings['software_inventory'], recommendations=''.join(f'- {rec}\n' for rec in recommendations) ) return report_content # Analyze CSV file input and convert it to JSON-like dictionary for processing def analyze_csv_file(file_obj): # Read the CSV file into a pandas DataFrame try: df = pd.read_csv(file_obj) except Exception as e: raise ValueError(f"Error reading CSV file: {str(e)}") # Convert DataFrame to dictionary for processing audit_data = df.to_dict(orient='records') # Convert the dictionary to a JSON-like structure suitable for analysis audit_data_json = {} for record in audit_data: audit_data_json.update(record) return audit_data_json # Gradio Interface with gr.Blocks() as demo: with gr.Column(): gr.Markdown("# GDPR Compliance Evaluation\n### Upload Audit Data in CSV Format") csv_file = gr.File(label="Upload CSV file") gdpr_compliance = gr.Textbox(lines=10, placeholder="GDPR Compliance Analysis...", label="GDPR Compliance Analysis") def run_compliance_checks(csv_file): if csv_file is None: return "No file uploaded" audit_data = analyze_csv_file(csv_file) gdpr_report = generate_gdpr_report(audit_data) return gdpr_report check_compliance_btn = gr.Button("Run Compliance Checks") check_compliance_btn.click(run_compliance_checks, inputs=[csv_file], outputs=[gdpr_compliance]) clear_btn = gr.Button("Clear") clear_btn.click(lambda: "", None, [gdpr_compliance]) demo.launch(share=True)