Spaces:

Kushalmanda
/

contract-risk-analyzer1

Sleeping

App Files Files Community

Kushalmanda commited on Jun 7

Commit

3069766

verified ·

1 Parent(s): 82a7d0e

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -179

app.py CHANGED Viewed

@@ -1,206 +1,211 @@
 import gradio as gr
-import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
-from io import BytesIO
-import os
-import logging
-import base64
-from simple_salesforce import Salesforce
-from transformers import BertTokenizer, BertForSequenceClassification
-import torch
-# Configure logging to show detailed messages
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger(__name__)
-# Salesforce credentials (use environment variables in production)
-SALESFORCE_USERNAME = os.getenv("SALESFORCE_USERNAME", "username")
-SALESFORCE_PASSWORD = os.getenv("SALESFORCE_PASSWORD", "password")
-SALESFORCE_SECURITY_TOKEN = os.getenv("SALESFORCE_SECURITY_TOKEN", "token")
-SALESFORCE_DOMAIN = os.getenv("SALESFORCE_DOMAIN", "login")
-# Load the BERT model and tokenizer for risk classification
-model = BertForSequenceClassification.from_pretrained('path_to_model')  # Replace with the path to your fine-tuned model
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-# Function to authenticate with Salesforce
-def get_salesforce_connection():
-    try:
-        sf = Salesforce(
-            username=SALESFORCE_USERNAME,
-            password=SALESFORCE_PASSWORD,
-            security_token=SALESFORCE_SECURITY_TOKEN,
-            domain=SALESFORCE_DOMAIN
-        )
-        return sf
-    except Exception as e:
-        logger.error(f"Failed to connect to Salesforce: {str(e)}", exc_info=True)
-        return None
-# Function to process the contract text and predict risk score using BERT
-def process_contract(contract_text):
-    inputs = tokenizer(contract_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    logits = outputs.logits
-    predicted_class = torch.argmax(logits, dim=1).item()
-    risk_labels = ["low", "medium", "high"]
-    risk_tag = risk_labels[predicted_class]
-    return risk_tag, logits.max().item()
-# Function to generate heatmap of risk levels across contract sections
-def generate_heatmap(contract_text):
-    sections = contract_text.split("\n\n")  # Split contract into sections (paragraphs)
-    risks = []
-    for section in sections:
-        risk_tag, score = process_contract(section)
-        risks.append((section, risk_tag, score))
-    # Create a heatmap
-    fig, ax = plt.subplots(figsize=(10, len(sections) * 0.5))
-    ax.barh(range(len(sections)), [r[2] for r in risks], color='red', height=0.4)
-    ax.set_yticks(range(len(sections)))
-    ax.set_yticklabels([r[0][:50] for r in risks])  # Display first 50 characters of each section
-    ax.set_xlabel('Risk Score')
-    ax.set_title('Risk Heatmap of Contract Sections')
     plt.tight_layout()
     return fig
-# Function to upload contract result (PDF, heatmap, etc.) to Salesforce
-def upload_file_to_salesforce(file_path, file_name, record_id=None):
-    sf = get_salesforce_connection()
-    if not sf:
-        logger.error("Salesforce connection failed. Cannot upload file.")
-        return None
-    with open(file_path, "rb") as f:
-        file_data = f.read()
-    encoded_file_data = base64.b64encode(file_data).decode('utf-8')
-    content_version_data = {
-        "Title": file_name,
-        "PathOnClient": file_name,
-        "VersionData": encoded_file_data,
-    }
-    if record_id:
-        content_version_data["FirstPublishLocationId"] = record_id
-    content_version = sf.ContentVersion.create(content_version_data)
-    return content_version["id"]
-# Function to generate a PDF report
-def generate_pdf_report(project_title, risk_tags, ai_plan_score, estimated_duration, location, weather, gantt_chart_path=None):
-    pdf_file = BytesIO()
-    doc = SimpleDocTemplate(pdf_file, pagesize=letter)
-    styles = getSampleStyleSheet()
-    elements = []
-    title_style = ParagraphStyle('Title', parent=styles['Heading1'], fontSize=18, alignment=1, spaceAfter=20)
-    elements.append(Paragraph(f"Project Report: {project_title}", title_style))
-    details_style = styles['BodyText']
-    details = [
-        f"<b>Location:</b> {location}",
-        f"<b>Weather:</b> {weather.capitalize()}",
-        f"<b>Estimated Duration:</b> {estimated_duration} days",
-        f"<b>AI Plan Score:</b> {ai_plan_score:.1f}%",
-    ]
-    for detail in details:
-        elements.append(Paragraph(detail, details_style))
-    elements.append(Spacer(1, 12))
-    elements.append(Paragraph("<b>Risk Assessment:</b>", styles['Heading2']))
-    for risk in risk_tags.split("\n"):
-        elements.append(Paragraph(f"• {risk}", details_style))
-    if gantt_chart_path:
-        elements.append(Spacer(1, 24))
-        elements.append(Paragraph("<b>Project Timeline:</b>", styles['Heading2']))
-        img = Image(gantt_chart_path, width=6 * inch, height=4 * inch)
-        elements.append(img)
-    doc.build(elements)
-    pdf_file.seek(0)
-    return pdf_file
-# Function to send project data to Salesforce
-def send_to_salesforce(project_title, gantt_chart_url, ai_plan_score, estimated_duration, risk_tags, status="Draft", record_id=None, location="", weather_type=""):
-    sf = get_salesforce_connection()
-    if not sf:
-        logger.error("Salesforce connection failed. Cannot proceed with record creation/update.")
-        return None
-    sf_data = {
-        "Name": project_title[:80],
-        "Project_Title__c": project_title,
-        "Estimated_Duration__c": estimated_duration,
-        "AI_Plan_Score__c": ai_plan_score,
-        "Status__c": status,
-        "Location__c": location,
-        "Weather_Type__c": weather_type,
-        "Risk_Tags__c": risk_tags,
-    }
-    if gantt_chart_url:
-        sf_data["Gantt_Chart_PDF__c"] = gantt_chart_url
-    if record_id:
-        sf.AI_Project_Timeline__c.update(record_id, sf_data)
-        return record_id
-    else:
-        project_record = sf.AI_Project_Timeline__c.create(sf_data)
-        return project_record['id']
-# Gradio interface function
-def gradio_interface(contract_file, weather, location, project_title):
     try:
-        contract_text = contract_file.read().decode("utf-8")  # Assuming it's a text file; adapt if PDF
-        fig = generate_heatmap(contract_text)
-        risk_tags = "Risk tags will be displayed here..."  # Logic for extracting risk tags based on contract analysis
-        ai_plan_score = 90  # Placeholder AI plan score based on risk level
-        # Generate PDF report
-        pdf_report = generate_pdf_report(project_title, risk_tags, ai_plan_score, estimated_duration=30, location=location, weather=weather)
-        # Upload to Salesforce
-        pdf_content_id, pdf_url = upload_file_to_salesforce(pdf_report, project_title)
-        return fig, risk_tags, pdf_url, pdf_report
     except Exception as e:
-        logger.error(f"Error in Gradio interface: {str(e)}")
-        return None, f"Error in Gradio interface: {str(e)}", None, None
-# Gradio interface setup
-demo = gr.Blocks()
-with demo:
-    gr.Markdown("## Contract Risk Analyzer")
-    gr.Markdown("Upload a contract, and the system will generate a heatmap and PDF report highlighting risk-prone clauses.")
     with gr.Row():
         with gr.Column():
-            contract_file = gr.File(label="Upload Contract (PDF or Text)")
-            weather = gr.Dropdown(label="Weather", choices=["sunny", "rainy", "cloudy"], value="sunny")
-            location = gr.Textbox(label="Location", placeholder="Enter project location")
-            project_title = gr.Textbox(label="Project Title", placeholder="Enter project title")
-            submit_btn = gr.Button("Analyze Contract")
         with gr.Column():
-            plot_output = gr.Plot(label="Heatmap Visualization")
-            risk_tags_output = gr.Textbox(label="Risk Tags")
-            download_pdf = gr.File(label="Download Full Report (PDF)")
-    submit_btn.click(fn=gradio_interface, inputs=[contract_file, weather, location, project_title], outputs=[plot_output, risk_tags_output, download_pdf])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import pdfplumber
 import matplotlib.pyplot as plt
+import numpy as np
+from word2number import w2n
+import re
+from typing import Tuple, List, Dict
+# Custom CSS for styling
+css = """
+.risk-low { color: #28a745; font-weight: bold; }
+.risk-medium { color: #ffc107; font-weight: bold; }
+.risk-high { color: #dc3545; font-weight: bold; }
+.result-box { padding: 20px; border-radius: 5px; margin-bottom: 20px; }
+.penalty-box { background-color: #f8f9fa; }
+.obligation-box { background-color: #f8f9fa; }
+.delay-box { background-color: #f8f9fa; }
+"""
+def extract_text_from_pdf(pdf_path: str) -> str:
+    """Extract text from PDF using pdfplumber"""
+    text = ""
+    with pdfplumber.open(pdf_path) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text() or ""
+    return text
+def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]:
+    """Count occurrences of keywords in text"""
+    counts = {}
+    for keyword in keywords:
+        counts[keyword] = len(re.findall(r'\b' + re.escape(keyword) + r'\b', text, flags=re.IGNORECASE))
+    return counts
+def find_penalty_values(text: str) -> List[float]:
+    """Find penalty amounts in the text"""
+    patterns = [
+        r'\$\s*[\d,]+(?:\.\d+)?',
+        r'(?:USD|usd)\s*[\d,]+(?:\.\d+)?',
+        r'\d+\s*(?:percent|%)',
+        r'(?:\b[a-z]+\s*)+dollars',
+    ]
+    penalties = []
+    for pattern in patterns:
+        matches = re.finditer(pattern, text, flags=re.IGNORECASE)
+        for match in matches:
+            penalty_text = match.group()
+            try:
+                if any(word in penalty_text.lower() for word in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'hundred', 'thousand', 'million']):
+                    penalty_value = w2n.word_to_num(penalty_text.split('dollars')[0].strip())
+                else:
+                    penalty_value = float(re.sub(r'[^\d.]', '', penalty_text))
+                penalties.append(penalty_value)
+            except:
+                continue
+    return penalties
+def calculate_risk_score(penalty_count: int, penalty_values: List[float], obligation_count: int, delay_count: int) -> Tuple[float, str]:
+    """Calculate risk score based on various factors"""
+    score = 0
+    score += min(penalty_count * 5, 30)
+    if penalty_values:
+        avg_penalty = sum(penalty_values) / len(penalty_values)
+        if avg_penalty > 1000000:
+            score += 40
+        elif avg_penalty > 100000:
+            score += 25
+        elif avg_penalty > 10000:
+            score += 15
+        else:
+            score += 5
+    score += min(obligation_count * 2, 20)
+    score += min(delay_count * 10, 30)
+    score = min(score, 100)
+    if score < 30:
+        return score, "Low"
+    elif score < 70:
+        return score, "Medium"
+    else:
+        return score, "High"
+def generate_heatmap(risk_level: str):
+    """Generate a simple heatmap based on risk level"""
+    fig, ax = plt.subplots(figsize=(8, 2))
+    if risk_level == "Low":
+        cmap = plt.cm.Greens
+    elif risk_level == "Medium":
+        cmap = plt.cm.Oranges
+    else:
+        cmap = plt.cm.Reds
+    gradient = np.linspace(0, 1, 256).reshape(1, -1)
+    gradient = np.vstack((gradient, gradient))
+    ax.imshow(gradient, aspect='auto', cmap=cmap)
+    ax.text(128, 0.5, f"{risk_level} Risk", color='white' if risk_level == "High" else 'black',
+            ha='center', va='center', fontsize=24, fontweight='bold')
+    ax.set_axis_off()
     plt.tight_layout()
     return fig
+def analyze_pdf(file_obj) -> List:
+    """Main analysis function for Gradio interface"""
     try:
+        # Extract text from the uploaded file
+        text = extract_text_from_pdf(file_obj.name)
+        # Define keywords to search for
+        penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"]
+        obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"]
+        delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"]
+        # Count keyword occurrences
+        penalty_counts = count_keywords(text, penalty_keywords)
+        obligation_counts = count_keywords(text, obligation_keywords)
+        delay_counts = count_keywords(text, delay_keywords)
+        # Find penalty values
+        penalty_values = find_penalty_values(text)
+        # Calculate total counts
+        total_penalties = sum(penalty_counts.values())
+        total_obligations = sum(obligation_counts.values())
+        total_delays = sum(delay_counts.values())
+        # Calculate risk score
+        risk_score, risk_level = calculate_risk_score(
+            total_penalties, penalty_values, total_obligations, total_delays
+        )
+        # Generate heatmap
+        heatmap = generate_heatmap(risk_level)
+        # Prepare results
+        penalty_details = "\n".join([f"- {kw}: {count}" for kw, count in penalty_counts.items()])
+        obligation_details = "\n".join([f"- {kw}: {count}" for kw, count in obligation_counts.items()])
+        delay_details = "\n".join([f"- {kw}: {count}" for kw, count in delay_counts.items()])
+        penalty_amounts = "\n".join([f"- ${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found"
+        # Find example sentences with penalties
+        penalty_sentences = []
+        for sentence in re.split(r'(?<=[.!?])\s+', text):
+            if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
+                penalty_sentences.append(sentence.strip())
+        penalty_examples = "\n\n".join([f"{i+1}. {sent}" for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "No penalty clauses found"
+        # Return all results
+        return [
+            f"<div class='risk-{risk_level.lower()}'>{risk_score:.1f}/100</div>",
+            f"<div class='risk-{risk_level.lower()}'>{risk_level}</div>",
+            heatmap,
+            f"Total: {total_penalties}\n\n{penalty_details}",
+            f"{len(penalty_values)} amounts found\n\n{penalty_amounts}",
+            f"Total: {total_obligations}\n\n{obligation_details}",
+            f"Total: {total_delays}\n\n{delay_details}",
+            penalty_examples
+        ]
     except Exception as e:
+        return [f"Error: {str(e)}"] * 8
+# Create Gradio interface
+with gr.Blocks(css=css, title="PDF Contract Risk Analyzer") as demo:
+    gr.Markdown("# 📄 PDF Contract Risk Analyzer")
+    gr.Markdown("Upload a contract PDF to analyze penalties, obligations, and delays.")
     with gr.Row():
         with gr.Column():
+            file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+            submit_btn = gr.Button("Analyze PDF", variant="primary")
         with gr.Column():
+            gr.Markdown("### 🔍 Overall Risk Assessment")
+            risk_score = gr.HTML(label="Risk Score")
+            risk_level = gr.HTML(label="Risk Level")
+            heatmap = gr.Plot(label="Risk Heatmap")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 📊 Penalties Analysis")
+            penalty_count = gr.Textbox(label="Penalty Clauses", lines=5)
+            penalty_amounts = gr.Textbox(label="Penalty Amounts", lines=5)
+        with gr.Column():
+            gr.Markdown("### ⚖️ Obligations Analysis")
+            obligation_count = gr.Textbox(label="Obligation Clauses", lines=5)
+        with gr.Column():
+            gr.Markdown("### ⏱️ Delays Analysis")
+            delay_count = gr.Textbox(label="Delay Clauses", lines=5)
+    with gr.Row():
+        gr.Markdown("### 🔎 Extracted Penalty Clauses")
+        penalty_examples = gr.Textbox(label="Example Penalty Clauses", lines=5)
+    submit_btn.click(
+        fn=analyze_pdf,
+        inputs=file_input,
+        outputs=[risk_score, risk_level, heatmap, penalty_count, penalty_amounts,
+                obligation_count, delay_count, penalty_examples]
+    )
 if __name__ == "__main__":
+    demo.launch()