Spaces:

Tirath5504
/

contract_compliance

Sleeping

App Files Files Community

Tirath5504 commited on Mar 21

Commit

30646ea

verified ·

1 Parent(s): 6074ede

Create app.py

Browse files

Files changed (1) hide show

app.py +390 -0

app.py ADDED Viewed

	@@ -0,0 +1,390 @@

+import os
+import gradio as gr
+import PyPDF2
+from groq import Groq
+import smtplib
+from fpdf import FPDF
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.base import MIMEBase
+from email import encoders
+import matplotlib.pyplot as plt
+import numpy as np
+import spacy
+client = Groq()
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    import os
+    os.system("python -m spacy download en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+# Global variables
+generated_summaries = {}
+# Regulatory data
+RISK_KEYWORDS = [
+    "penalty", "breach", "liability", "default", "hidden obligations",
+    "indemnity", "terms of service", "non-compliance", "force majeure"
+]
+REGULATORY_UPDATES = [
+    {
+        "name": "GDPR",
+        "last_updated": "2023-12-01",
+        "description": "General Data Protection Regulation updates on user consent.",
+        "status": "Compliant",
+    },
+    {
+        "name": "PCI DSS",
+        "last_updated": "2024-01-01",
+        "description": "Payment Card Industry Data Security Standard updates for credit card transactions.",
+        "status": "Pending Review",
+    },
+]
+REGULATORY_CATEGORIES = [
+    {
+        "category": "Data Privacy",
+        "keywords": ["personal data", "user consent", "data breach", "GDPR", "data protection"],
+        "description": "Regulations related to user data privacy and protection.",
+    },
+    {
+        "category": "Financial Compliance",
+        "keywords": ["payment card", "PCI DSS", "credit card security", "financial transactions"],
+        "description": "Regulations related to financial data security and transactions.",
+    },
+    {
+        "category": "Health Information Compliance",
+        "keywords": ["HIPAA", "health records", "patient data", "medical privacy"],
+        "description": "Regulations related to the security and privacy of health information.",
+    },
+]
+def extract_text_from_pdf(file_path):
+    with open(file_path, "rb") as file:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page_text in reader.pages:
+            text += page_text.extract_text()
+    return text
+def extract_text_from_txt(file_path):
+    with open(file_path, "r", encoding="utf-8") as file:
+        return file.read()
+def split_text_into_chunks(text, chunk_size=2000):
+    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+def process_with_groq(text_chunks, task_type):
+    results = []
+    for chunk in text_chunks:
+        prompt = f"Task: {task_type}\n\nDocument:\n{chunk}\n\nOutput:"
+        try:
+            response = client.chat.completions.create(
+                messages=[{"role": "user", "content": prompt}],
+                model="llama-3.3-70b-versatile",
+            )
+            if hasattr(response, "choices") and response.choices:
+                results.append(response.choices[0].message.content)
+            else:
+                results.append("No response received from the model.")
+        except Exception as e:
+            results.append(f"Error: {str(e)}")
+    return " ".join(results)
+def detect_risks(text_chunks):
+    risks_found = []
+    for chunk_index, chunk in enumerate(text_chunks):
+        if not chunk.strip():
+            continue
+        for keyword in RISK_KEYWORDS:
+            if keyword.lower() in chunk.lower():
+                risks_found.append(f"Risk detected: '{keyword}' in text: {chunk[:150]}...")
+    if not risks_found:
+        risks_found.append("No risks detected.")
+    return risks_found
+def check_regulatory_compliance(text_chunks):
+    compliance_issues = []
+    for chunk in text_chunks:
+        # Check for regulatory updates
+        for update in REGULATORY_UPDATES:
+            if update["name"].lower() in chunk.lower():
+                compliance_issues.append(
+                    f"Regulatory Update Detected: {update['name']} - {update['description']}\n"
+                    f"Status: {update.get('status', 'Unknown')} | Last Updated: {update.get('last_updated', 'N/A')}"
+                )
+    # Add a fallback message if no updates are found
+    if not compliance_issues:
+        compliance_issues.append("No regulatory issues detected.")
+    return compliance_issues
+def check_regulatory_compliance_with_nlp(text_chunks):
+    compliance_issues = []
+    for chunk in text_chunks:
+        doc = nlp(chunk)
+        for category in REGULATORY_CATEGORIES:
+            for keyword in category["keywords"]:
+                if keyword.lower() in doc.text.lower():
+                    compliance_issues.append(
+                        f"Detected Regulatory Category: {category['category']} - {category['description']}"
+                    )
+    # Add a fallback message if no categories are matched
+    if not compliance_issues:
+        compliance_issues.append("No regulatory categories detected.")
+    return compliance_issues
+def answer_question_with_summary(summary, question):
+    prompt = f"Context:\n{summary}\n\nQuestion: {question}\n\nAnswer:"
+    try:
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}],
+            model="llama-3.3-70b-versatile",
+        )
+        if hasattr(response, "choices") and response.choices:
+            return response.choices[0].message.content.strip()
+        else:
+            return "No response received from the model."
+    except Exception as e:
+        return f"Error: {str(e)}"
+def send_email(receiver_email, email_subject, sender_email, sender_password, email_body, pdf_filename):
+    try:
+        # Create PDF
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_font("Arial", size=12)
+        pdf.multi_cell(190, 10, email_body)
+        pdf.output(pdf_filename)
+        # Set up email
+        server = smtplib.SMTP("smtp.gmail.com", 587)
+        server.starttls()
+        server.login(sender_email, sender_password)
+        message = f"""From: {sender_email}
+To: {receiver_email}
+Subject: {email_subject}\n
+Please find the attached PDF summary for your review.
+"""
+        msg = MIMEMultipart()
+        msg['From'] = sender_email
+        msg['To'] = receiver_email
+        msg['Subject'] = email_subject
+        msg.attach(MIMEText(message, 'plain'))
+        # Attach PDF
+        with open(pdf_filename, "rb") as pdf_file:
+            attachment = pdf_file.read()
+        part = MIMEBase('application', 'octet-stream')
+        part.set_payload(attachment)
+        encoders.encode_base64(part)
+        part.add_header(
+            "Content-Disposition",
+            f"attachment; filename={pdf_filename}",
+        )
+        msg.attach(part)
+        server.send_message(msg)
+        server.quit()
+        os.remove(pdf_filename)
+        return "📧 Email sent successfully with the attached PDF!"
+    except Exception as e:
+        return f"Failed to send email: {str(e)}"
+def process_files(files, tasks):
+    if not files or not tasks:
+        return "Please upload files and select tasks."
+    results = {}
+    all_text_chunks = {}
+    for file in files:
+        file_name = os.path.basename(file.name)
+        file_extension = os.path.splitext(file_name)[1].lower()
+        if file_extension == '.pdf':
+            text = extract_text_from_pdf(file.name)
+        elif file_extension == '.txt':
+            text = extract_text_from_txt(file.name)
+        else:
+            results[file_name] = "Unsupported file format"
+            continue
+        text_chunks = split_text_into_chunks(text)
+        all_text_chunks[file_name] = text_chunks
+        file_results = {}
+        for task in tasks:
+            if task == "Risk Detection":
+                file_results[task] = "\n".join(detect_risks(text_chunks))
+            elif task == "Regulatory Update Tracker":
+                compliance_issues = check_regulatory_compliance(text_chunks)
+                category_issues = check_regulatory_compliance_with_nlp(text_chunks)
+                file_results[task] = "\n".join(compliance_issues) + "\n\n" + "\n".join(category_issues)
+            else:
+                file_results[task] = process_with_groq(text_chunks, task)
+        results[file_name] = file_results
+        generated_summaries[file_name] = file_results
+    return results, all_text_chunks
+def display_results(results):
+    if isinstance(results, str):
+        return results
+    output = ""
+    for file_name, file_results in results.items():
+        output += f"## Results for {file_name}:\n\n"
+        for task, result in file_results.items():
+            output += f"### Task: {task}\n\n{result}\n\n---\n\n"
+    return output
+def email_summary(file_task_selection, receiver_email, email_subject, sender_email, sender_password):
+    if not file_task_selection or not receiver_email or not sender_email or not sender_password:
+        return "Please fill in all required fields."
+    try:
+        file_name, task = file_task_selection.split(" - ")
+        email_body = generated_summaries[file_name][task]
+        pdf_filename = f"{file_name}_{task}.pdf"
+        result = send_email(receiver_email, email_subject, sender_email, sender_password, email_body, pdf_filename)
+        return result
+    except Exception as e:
+        return f"Error: {str(e)}"
+def answer_questions(file_name, question):
+    if not file_name or not question:
+        return "Please select a file and enter a question."
+    if file_name in generated_summaries and "Summarize" in generated_summaries[file_name]:
+        summary = generated_summaries[file_name]["Summarize"]
+        answer = answer_question_with_summary(summary, question)
+        return answer
+    else:
+        return "No summary available for the selected file."
+with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
+    gr.Markdown("""
+    # 📜 Advanced AI-Driven Legal Document Summarization and Risk Assessment
+    **Welcome to the Enhanced Legal Document Assistant!**
+    - 📜 Provide readable legal summaries
+    - 📑 Extract key clauses from legal documents
+    - ⚖️ Detect potential legal risks
+    - 📰 Track regulatory updates
+    - ✉️ Send summaries directly via email
+    - ❓ Ask Questions Based on the Summary
+    """)
+    with gr.Tab("Process Documents"):
+        with gr.Row():
+            with gr.Column():
+                files = gr.File(label="Upload PDFs or Text Files", file_types=["pdf", "txt"], multiple=True)
+                task_checkboxes = gr.CheckboxGroup(
+                    ["Summarize", "Extract Clauses", "Risk Detection", "Regulatory Update Tracker"],
+                    label="Choose Tasks"
+                )
+                process_btn = gr.Button("Process Documents")
+            with gr.Column():
+                progress = gr.Plot(label="Processing Progress")
+                results_md = gr.Markdown(label="Results")
+    with gr.Tab("Email Summary"):
+        with gr.Row():
+            with gr.Column():
+                file_task_dropdown = gr.Dropdown(label="Select a Task Summary to Send", choices=[])
+                receiver_email = gr.Textbox(label="Receiver Email")
+                email_subject = gr.Textbox(label="Email Subject", value="Legal Document Summary")
+                sender_email = gr.Textbox(label="Sender Email (Gmail)", value="[email protected]")
+                sender_password = gr.Textbox(label="Sender Email Password", type="password")
+                send_email_btn = gr.Button("Send Email")
+            with gr.Column():
+                email_result = gr.Textbox(label="Email Status")
+    with gr.Tab("Ask Questions"):
+        with gr.Row():
+            with gr.Column():
+                file_dropdown = gr.Dropdown(label="Select a File Summary", choices=[])
+                question = gr.Textbox(label="Ask a Question")
+                ask_btn = gr.Button("Get Answer")
+            with gr.Column():
+                answer = gr.Textbox(label="Answer")
+    def process_with_progress(files, tasks):
+        if not files or not tasks:
+            return None, "Please upload files and select tasks."
+        # Create progress visualization
+        progress_data = []
+        fig, ax = plt.subplots(figsize=(8, 4))
+        ax.set_title("Processing Progress")
+        ax.set_xlabel("Steps")
+        ax.set_ylabel("Progress (%)")
+        ax.grid(True)
+        total_steps = len(files) * len(tasks)
+        current_step = 0
+        results, all_text_chunks = process_files(files, tasks)
+        file_task_choices = [f"{file_name} - {task}" for file_name, file_results in results.items() for task in file_results.keys()]
+        file_choices = list(results.keys())
+        # For each step, update progress
+        for i in range(total_steps):
+            current_step += 1
+            progress = (current_step / total_steps) * 100
+            progress_data.append(progress)
+            ax.clear()
+            ax.plot(progress_data, color="blue", marker="o")
+            ax.set_title("Processing Progress")
+            ax.set_xlabel("Steps")
+            ax.set_ylabel("Progress (%)")
+            ax.set_ylim(0, 100)
+            ax.grid(True)
+            yield fig, display_results(results), gr.Dropdown.update(choices=file_task_choices), gr.Dropdown.update(choices=file_choices)
+    process_btn.click(
+        process_with_progress,
+        inputs=[files, task_checkboxes],
+        outputs=[progress, results_md, file_task_dropdown, file_dropdown]
+    )
+    send_email_btn.click(
+        email_summary,
+        inputs=[file_task_dropdown, receiver_email, email_subject, sender_email, sender_password],
+        outputs=[email_result]
+    )
+    ask_btn.click(
+        answer_questions,
+        inputs=[file_dropdown, question],
+        outputs=[answer]
+    )
+if __name__ == "__main__":
+    app.launch()