Consultus_Legis / app.py
michaelmc1618's picture
Update app.py
3235017 verified
import os
import pandas as pd
import gradio as gr
from datetime import datetime
from transformers import pipeline
qa_pipeline = pipeline("question-answering", model="deepset/deberta-v3-large-squad2")
# Function to analyze each section with prompts
def prompt_based_analysis(question, context):
try:
response = qa_pipeline(question=question, context=context)
return response['answer']
except Exception as e:
return f"Error analyzing data: {str(e)}"
# Function to analyze audit data for GDPR compliance using the GDPR framework
def analyze_gdpr_compliance(audit_data):
# GDPR Principles
principles = {
"Lawfulness, Fairness, and Transparency": "Ensure that data processing is done lawfully, fairly, and in a transparent manner.",
"Purpose Limitation": "Ensure that data collected is for specified, explicit, and legitimate purposes.",
"Data Minimization": "Ensure that data collected is adequate, relevant, and limited to what is necessary.",
"Accuracy": "Ensure that personal data is accurate and up to date.",
"Storage Limitation": "Ensure that personal data is kept no longer than necessary.",
"Integrity and Confidentiality": "Ensure that personal data is processed securely to prevent unauthorized access, loss, or destruction."
}
# Prompt context from audit data
system_info_context = f"OS Version: {audit_data.get('os_version', 'Unknown')}, Architecture: {audit_data.get('architecture', 'Unknown')}, Memory: {audit_data.get('memory', 'Unknown')}"
disk_usage_context = f"Disk Usage: {audit_data.get('disk_usage', {}).get('usage_percent', 'Unknown')}%"
network_info_context = f"Interfaces: {', '.join(audit_data.get('network_info', {}).get('interfaces', []))}"
security_measures_context = f"Encryption: {audit_data.get('security_measures', {}).get('encryption', False)}, Data Anonymization: {audit_data.get('security_measures', {}).get('data_anonymization', False)}"
processes_context = f"Running Processes: {', '.join(audit_data.get('running_processes', []))}"
software_inventory_context = f"Software Installed: {', '.join(audit_data.get('software_inventory', []))}"
# Prompts for GDPR principles applied to sections
system_info_analysis = prompt_based_analysis(
f"Evaluate the system information in terms of GDPR compliance focusing on {principles['Lawfulness, Fairness, and Transparency']} and {principles['Purpose Limitation']}.", system_info_context
)
disk_usage_analysis = prompt_based_analysis(
f"Evaluate the disk usage under the {principles['Storage Limitation']} principle and ensure compliance.", disk_usage_context
)
network_info_analysis = prompt_based_analysis(
f"Evaluate the network interfaces with respect to {principles['Integrity and Confidentiality']}, identifying any potential security risks.", network_info_context
)
security_measures_analysis = prompt_based_analysis(
f"Analyze the encryption and anonymization methods under the {principles['Integrity and Confidentiality']} principle, identifying any weaknesses.", security_measures_context
)
processes_analysis = prompt_based_analysis(
f"Evaluate the running processes for GDPR compliance under {principles['Lawfulness, Fairness, and Transparency']}, focusing on unauthorized or risky processes.", processes_context
)
software_inventory_analysis = prompt_based_analysis(
f"Assess the installed software for GDPR compliance focusing on {principles['Accuracy']} and {principles['Integrity and Confidentiality']}.", software_inventory_context
)
# Findings organized by section
findings = {
"system_info": system_info_analysis,
"disk_usage": disk_usage_analysis,
"network_info": network_info_analysis,
"security_measures": security_measures_analysis,
"running_processes": processes_analysis,
"software_inventory": software_inventory_analysis,
}
# Detailed Recommendations based on the findings
recommendations = []
if "Unknown" in system_info_context:
recommendations.append("Review System Information: Ensure that the OS, architecture, and memory configurations are well documented and up to date in accordance with GDPR transparency requirements.")
if 'not encrypted' in security_measures_analysis.lower() or 'no encryption' in security_measures_analysis.lower():
recommendations.append("Implement Encryption: Ensure that both stored and transmitted data are encrypted to meet GDPR security requirements.")
if 'not anonymized' in security_measures_analysis.lower():
recommendations.append("Implement Data Anonymization: Ensure that sensitive data is anonymized during storage to comply with GDPR's confidentiality principle.")
if 'outdated' in software_inventory_analysis.lower() or 'vulnerable' in software_inventory_analysis.lower():
recommendations.append("Update Software: Ensure that all installed software is up to date and free from known vulnerabilities to maintain the integrity and confidentiality of personal data.")
if 'vulnerable processes' in processes_analysis.lower() or 'unauthorized processes' in processes_analysis.lower():
recommendations.append("Review Running Processes: Regularly audit running processes and ensure that no unauthorized or risky processes are running to maintain GDPR compliance.")
return findings, recommendations
# Generate GDPR Compliance Report with advanced prompts for each section using the GDPR framework
def generate_gdpr_report(audit_data, company_name="Company Name", system_name="System Name"):
findings, recommendations = analyze_gdpr_compliance(audit_data)
report_content = """
GDPR Compliance Evaluation Report
Title: GDPR Compliance Evaluation Report
Date: {date}
Prepared by: [Your Name]
For: {company_name}
Executive Summary:
This report evaluates the compliance of {company_name} with the General Data Protection Regulation (GDPR).
Based on the system audit and analysis of data handling processes, this report provides findings, identifies compliance gaps,
and suggests recommendations to enhance GDPR adherence based on the key principles of GDPR, such as Lawfulness, Fairness, Transparency, Purpose Limitation, Data Minimization, Accuracy, Storage Limitation, and Integrity & Confidentiality.
Key Findings:
System Information Analysis:
{system_info}
Disk Usage Analysis:
{disk_usage}
Network Info Analysis:
{network_info}
Security Measures Analysis:
{security_measures}
Running Processes Analysis:
{running_processes}
Software Inventory Analysis:
{software_inventory}
Recommendations:
{recommendations}
Conclusion:
The analysis shows that while {company_name} has some strong protective measures in place, there are several areas for improvement. Implementing the suggested recommendations will enhance {company_name}'s compliance with GDPR and reduce potential risks of non-compliance.
References:
- GDPR Regulation (EU) 2016/679
- System Audit Report, {date}
""".format(
date=datetime.now().strftime('%Y-%m-%d'),
company_name=company_name,
system_info=findings['system_info'],
disk_usage=findings['disk_usage'],
network_info=findings['network_info'],
security_measures=findings['security_measures'],
running_processes=findings['running_processes'],
software_inventory=findings['software_inventory'],
recommendations=''.join(f'- {rec}\n' for rec in recommendations)
)
return report_content
# Analyze CSV file input and convert it to JSON-like dictionary for processing
def analyze_csv_file(file_obj):
# Read the CSV file into a pandas DataFrame
try:
df = pd.read_csv(file_obj)
except Exception as e:
raise ValueError(f"Error reading CSV file: {str(e)}")
# Convert DataFrame to dictionary for processing
audit_data = df.to_dict(orient='records')
# Convert the dictionary to a JSON-like structure suitable for analysis
audit_data_json = {}
for record in audit_data:
audit_data_json.update(record)
return audit_data_json
# Gradio Interface
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown("# GDPR Compliance Evaluation\n### Upload Audit Data in CSV Format")
csv_file = gr.File(label="Upload CSV file")
gdpr_compliance = gr.Textbox(lines=10, placeholder="GDPR Compliance Analysis...", label="GDPR Compliance Analysis")
def run_compliance_checks(csv_file):
if csv_file is None:
return "No file uploaded"
audit_data = analyze_csv_file(csv_file)
gdpr_report = generate_gdpr_report(audit_data)
return gdpr_report
check_compliance_btn = gr.Button("Run Compliance Checks")
check_compliance_btn.click(run_compliance_checks, inputs=[csv_file], outputs=[gdpr_compliance])
clear_btn = gr.Button("Clear")
clear_btn.click(lambda: "", None, [gdpr_compliance])
demo.launch(share=True)