Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,37 +5,118 @@ import numpy as np
|
|
5 |
from word2number import w2n
|
6 |
import re
|
7 |
from typing import Tuple, List, Dict
|
|
|
|
|
8 |
|
9 |
# Custom CSS for styling
|
10 |
css = """
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
.obligation-box { background-color: #f8f9fa; }
|
17 |
-
.delay-box { background-color: #f8f9fa; }
|
18 |
-
.risk-container {
|
19 |
-
display: flex;
|
20 |
-
justify-content: space-between;
|
21 |
-
margin-bottom: 20px;
|
22 |
-
gap: 10px;
|
23 |
}
|
24 |
-
.risk-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
padding: 15px;
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
.heatmap-container {
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
margin-bottom: 20px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
}
|
37 |
-
.
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
}
|
40 |
"""
|
41 |
|
@@ -105,66 +186,59 @@ def calculate_risk_score(penalty_count: int, penalty_values: List[float], obliga
|
|
105 |
else:
|
106 |
return score, "High"
|
107 |
|
108 |
-
def
|
109 |
-
"""
|
110 |
-
fig, ax = plt.subplots(figsize=(6, 1))
|
111 |
-
|
112 |
-
if level == "Low":
|
113 |
-
cmap = plt.cm.Greens
|
114 |
-
color = '#28a745'
|
115 |
-
elif level == "Medium":
|
116 |
-
cmap = plt.cm.Oranges
|
117 |
-
color = '#ffc107'
|
118 |
-
else:
|
119 |
-
cmap = plt.cm.Reds
|
120 |
-
color = '#dc3545'
|
121 |
-
|
122 |
-
gradient = np.linspace(0, 1, 256).reshape(1, -1)
|
123 |
-
gradient = np.vstack((gradient, gradient))
|
124 |
-
|
125 |
-
ax.imshow(gradient, aspect='auto', cmap=cmap)
|
126 |
-
ax.text(128, 0.5, f"{level}: {score:.1f}%" if score > 0 else f"{level}: 0%",
|
127 |
-
color='white' if level == "High" else 'black',
|
128 |
-
ha='center', va='center', fontsize=14, fontweight='bold')
|
129 |
-
|
130 |
-
ax.set_axis_off()
|
131 |
-
plt.tight_layout()
|
132 |
-
return fig
|
133 |
-
|
134 |
-
def generate_risk_display(risk_score: float, risk_level: str) -> Tuple[str, plt.Figure, plt.Figure, plt.Figure]:
|
135 |
-
"""Generate HTML display and heatmaps for all three risk levels"""
|
136 |
risk_levels = ["Low", "Medium", "High"]
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
for level in risk_levels:
|
141 |
-
if level == risk_level:
|
142 |
-
score = risk_score
|
143 |
-
else:
|
144 |
-
score = 0
|
145 |
-
heatmaps.append(generate_heatmap(score, level))
|
146 |
|
147 |
-
# Generate HTML display
|
148 |
-
risk_html = """
|
149 |
-
<div class='risk-container'>
|
150 |
-
"""
|
151 |
for level in risk_levels:
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
else
|
156 |
-
value = "0%"
|
157 |
-
active_class = ""
|
158 |
|
159 |
-
|
160 |
-
<div class='risk-
|
161 |
-
<
|
162 |
-
<div
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
</div>
|
164 |
-
"""
|
165 |
|
166 |
-
|
167 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
def analyze_pdf(file_obj) -> List:
|
170 |
"""Main analysis function for Gradio interface"""
|
@@ -195,15 +269,8 @@ def analyze_pdf(file_obj) -> List:
|
|
195 |
total_penalties, penalty_values, total_obligations, total_delays
|
196 |
)
|
197 |
|
198 |
-
# Generate risk display
|
199 |
-
risk_display
|
200 |
-
|
201 |
-
# Prepare results
|
202 |
-
penalty_details = "\n".join([f"- {kw}: {count}" for kw, count in penalty_counts.items()])
|
203 |
-
obligation_details = "\n".join([f"- {kw}: {count}" for kw, count in obligation_counts.items()])
|
204 |
-
delay_details = "\n".join([f"- {kw}: {count}" for kw, count in delay_counts.items()])
|
205 |
-
|
206 |
-
penalty_amounts = "\n".join([f"- ${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found"
|
207 |
|
208 |
# Find example sentences with penalties
|
209 |
penalty_sentences = []
|
@@ -211,65 +278,93 @@ def analyze_pdf(file_obj) -> List:
|
|
211 |
if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
|
212 |
penalty_sentences.append(sentence.strip())
|
213 |
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
-
# Return all results
|
217 |
return [
|
218 |
risk_display,
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
f"Total: {total_obligations}\n\n{obligation_details}",
|
225 |
-
f"Total: {total_delays}\n\n{delay_details}",
|
226 |
-
penalty_examples
|
227 |
]
|
228 |
except Exception as e:
|
229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
# Create Gradio interface
|
232 |
with gr.Blocks(css=css, title="PDF Contract Risk Analyzer") as demo:
|
233 |
-
gr.Markdown("
|
234 |
-
|
|
|
|
|
|
|
|
|
235 |
|
236 |
with gr.Row():
|
237 |
-
with gr.Column():
|
238 |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
239 |
-
submit_btn = gr.Button("Analyze
|
240 |
|
241 |
-
with gr.Column():
|
242 |
-
gr.Markdown("### π
|
243 |
-
risk_display = gr.HTML(
|
244 |
-
with gr.Row():
|
245 |
-
low_heatmap = gr.Plot(label="Low Risk")
|
246 |
-
medium_heatmap = gr.Plot(label="Medium Risk")
|
247 |
-
high_heatmap = gr.Plot(label="High Risk")
|
248 |
|
249 |
with gr.Row():
|
250 |
with gr.Column():
|
251 |
-
gr.
|
252 |
-
|
253 |
-
penalty_amounts = gr.Textbox(label="Penalty Amounts", lines=5)
|
254 |
|
255 |
with gr.Column():
|
256 |
-
gr.
|
257 |
-
|
258 |
-
|
259 |
-
with gr.Column():
|
260 |
-
gr.Markdown("### β±οΈ Delays Analysis")
|
261 |
-
delay_count = gr.Textbox(label="Delay Clauses", lines=5)
|
262 |
|
263 |
-
|
264 |
-
gr.Markdown("### π Extracted Penalty Clauses")
|
265 |
-
penalty_examples = gr.Textbox(label="Example Penalty Clauses", lines=5)
|
266 |
|
267 |
submit_btn.click(
|
268 |
fn=analyze_pdf,
|
269 |
inputs=file_input,
|
270 |
-
outputs=[risk_display,
|
271 |
-
|
272 |
-
delay_count, penalty_examples]
|
273 |
)
|
274 |
|
275 |
if __name__ == "__main__":
|
|
|
5 |
from word2number import w2n
|
6 |
import re
|
7 |
from typing import Tuple, List, Dict
|
8 |
+
from io import BytesIO
|
9 |
+
import base64
|
10 |
|
11 |
# Custom CSS for styling
|
12 |
css = """
|
13 |
+
:root {
|
14 |
+
--low-color: #28a745;
|
15 |
+
--medium-color: #ffc107;
|
16 |
+
--high-color: #dc3545;
|
17 |
+
--inactive-color: #e9ecef;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
+
.risk-container {
|
20 |
+
display: flex;
|
21 |
+
flex-direction: column;
|
22 |
+
gap: 12px;
|
23 |
+
margin-bottom: 25px;
|
24 |
+
}
|
25 |
+
.risk-row {
|
26 |
+
display: flex;
|
27 |
+
align-items: center;
|
28 |
+
background: white;
|
29 |
+
border-radius: 8px;
|
30 |
padding: 15px;
|
31 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
32 |
+
transition: all 0.3s ease;
|
33 |
+
}
|
34 |
+
.risk-row.active {
|
35 |
+
transform: scale(1.02);
|
36 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.15);
|
37 |
+
}
|
38 |
+
.risk-label {
|
39 |
+
width: 100px;
|
40 |
+
font-weight: 600;
|
41 |
+
font-size: 16px;
|
42 |
+
color: #495057;
|
43 |
}
|
44 |
+
.risk-score {
|
45 |
+
width: 80px;
|
46 |
+
font-size: 20px;
|
47 |
+
font-weight: 700;
|
48 |
+
text-align: center;
|
49 |
+
}
|
50 |
+
.risk-low { color: var(--low-color); }
|
51 |
+
.risk-medium { color: var(--medium-color); }
|
52 |
+
.risk-high { color: var(--high-color); }
|
53 |
.heatmap-container {
|
54 |
+
flex-grow: 1;
|
55 |
+
height: 30px;
|
56 |
+
border-radius: 15px;
|
57 |
+
overflow: hidden;
|
58 |
+
position: relative;
|
59 |
+
}
|
60 |
+
.heatmap-bar {
|
61 |
+
height: 100%;
|
62 |
+
border-radius: 15px;
|
63 |
+
transition: width 0.5s ease;
|
64 |
+
}
|
65 |
+
.risk-meter {
|
66 |
+
position: absolute;
|
67 |
+
right: 10px;
|
68 |
+
top: 50%;
|
69 |
+
transform: translateY(-50%);
|
70 |
+
font-size: 12px;
|
71 |
+
font-weight: 600;
|
72 |
+
color: white;
|
73 |
+
text-shadow: 0 1px 2px rgba(0,0,0,0.3);
|
74 |
+
}
|
75 |
+
.result-section {
|
76 |
+
background: white;
|
77 |
+
border-radius: 8px;
|
78 |
+
padding: 20px;
|
79 |
margin-bottom: 20px;
|
80 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
81 |
+
}
|
82 |
+
.result-title {
|
83 |
+
font-size: 18px;
|
84 |
+
font-weight: 600;
|
85 |
+
margin-bottom: 15px;
|
86 |
+
color: #343a40;
|
87 |
+
display: flex;
|
88 |
+
align-items: center;
|
89 |
+
gap: 8px;
|
90 |
+
}
|
91 |
+
.clause-item {
|
92 |
+
margin-bottom: 8px;
|
93 |
+
padding-left: 15px;
|
94 |
+
position: relative;
|
95 |
+
}
|
96 |
+
.clause-item:before {
|
97 |
+
content: "β’";
|
98 |
+
position: absolute;
|
99 |
+
left: 0;
|
100 |
+
color: #6c757d;
|
101 |
+
}
|
102 |
+
.penalty-amount {
|
103 |
+
font-family: monospace;
|
104 |
+
background: #f8f9fa;
|
105 |
+
padding: 2px 6px;
|
106 |
+
border-radius: 4px;
|
107 |
+
margin-left: 5px;
|
108 |
}
|
109 |
+
.example-clause {
|
110 |
+
background: #f8f9fa;
|
111 |
+
padding: 12px;
|
112 |
+
border-radius: 6px;
|
113 |
+
margin-bottom: 10px;
|
114 |
+
border-left: 3px solid #6c757d;
|
115 |
+
}
|
116 |
+
.example-number {
|
117 |
+
font-weight: 600;
|
118 |
+
margin-right: 8px;
|
119 |
+
color: #6c757d;
|
120 |
}
|
121 |
"""
|
122 |
|
|
|
186 |
else:
|
187 |
return score, "High"
|
188 |
|
189 |
+
def create_risk_display(risk_score: float, risk_level: str) -> str:
|
190 |
+
"""Create HTML display for all three risk levels"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
risk_levels = ["Low", "Medium", "High"]
|
192 |
+
colors = {
|
193 |
+
"Low": "var(--low-color)",
|
194 |
+
"Medium": "var(--medium-color)",
|
195 |
+
"High": "var(--high-color)"
|
196 |
+
}
|
197 |
|
198 |
+
html_parts = []
|
199 |
+
html_parts.append("<div class='risk-container'>")
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
|
|
|
|
|
|
|
|
201 |
for level in risk_levels:
|
202 |
+
active = level == risk_level
|
203 |
+
score = risk_score if active else 0
|
204 |
+
color = colors[level] if active else "var(--inactive-color)"
|
205 |
+
opacity = "1" if active else "0.6"
|
|
|
|
|
206 |
|
207 |
+
html_parts.append(f"""
|
208 |
+
<div class='risk-row {'active' if active else ''}'>
|
209 |
+
<div class='risk-label risk-{level.lower()}'>{level} Risk</div>
|
210 |
+
<div class='risk-score risk-{level.lower()}'>{score:.1f}%</div>
|
211 |
+
<div class='heatmap-container'>
|
212 |
+
<div class='heatmap-bar"
|
213 |
+
style="width: {score}%; background: {color}; opacity: {opacity}">
|
214 |
+
<span class='risk-meter'>{score:.1f}%</span>
|
215 |
+
</div>
|
216 |
+
</div>
|
217 |
</div>
|
218 |
+
""")
|
219 |
|
220 |
+
html_parts.append("</div>")
|
221 |
+
return "\n".join(html_parts)
|
222 |
+
|
223 |
+
def format_clauses(counts: Dict[str, int]) -> str:
|
224 |
+
"""Format clause counts into HTML"""
|
225 |
+
return "\n".join([f"<div class='clause-item'>{kw}: <strong>{count}</strong></div>" for kw, count in counts.items()])
|
226 |
+
|
227 |
+
def format_penalty_amounts(amounts: List[float]) -> str:
|
228 |
+
"""Format penalty amounts into HTML"""
|
229 |
+
if not amounts:
|
230 |
+
return "<div style='color: #6c757d;'>No specific penalty amounts found</div>"
|
231 |
+
return "\n".join([f"<div class='clause-item'><span class='penalty-amount'>${amt:,.2f}</span></div>" for amt in amounts[:5]])
|
232 |
+
|
233 |
+
def format_examples(sentences: List[str]) -> str:
|
234 |
+
"""Format example sentences into HTML"""
|
235 |
+
if not sentences:
|
236 |
+
return "<div style='color: #6c757d;'>No penalty clauses found</div>"
|
237 |
+
return "\n".join([f"""
|
238 |
+
<div class='example-clause'>
|
239 |
+
<span class='example-number'>{i+1}.</span> {sent}
|
240 |
+
</div>
|
241 |
+
""" for i, sent in enumerate(sentences[:3])])
|
242 |
|
243 |
def analyze_pdf(file_obj) -> List:
|
244 |
"""Main analysis function for Gradio interface"""
|
|
|
269 |
total_penalties, penalty_values, total_obligations, total_delays
|
270 |
)
|
271 |
|
272 |
+
# Generate risk display
|
273 |
+
risk_display = create_risk_display(risk_score, risk_level)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
# Find example sentences with penalties
|
276 |
penalty_sentences = []
|
|
|
278 |
if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
|
279 |
penalty_sentences.append(sentence.strip())
|
280 |
|
281 |
+
# Format all results
|
282 |
+
penalty_html = f"""
|
283 |
+
<div class='result-section'>
|
284 |
+
<div class='result-title'>π Penalty Clauses: <strong>{total_penalties}</strong> found</div>
|
285 |
+
{format_clauses(penalty_counts)}
|
286 |
+
</div>
|
287 |
+
"""
|
288 |
+
|
289 |
+
amounts_html = f"""
|
290 |
+
<div class='result-section'>
|
291 |
+
<div class='result-title'>π° Penalty Amounts: <strong>{len(penalty_values)}</strong> found</div>
|
292 |
+
{format_penalty_amounts(penalty_values)}
|
293 |
+
</div>
|
294 |
+
"""
|
295 |
+
|
296 |
+
obligation_html = f"""
|
297 |
+
<div class='result-section'>
|
298 |
+
<div class='result-title'>βοΈ Obligation Clauses: <strong>{total_obligations}</strong> found</div>
|
299 |
+
{format_clauses(obligation_counts)}
|
300 |
+
</div>
|
301 |
+
"""
|
302 |
+
|
303 |
+
delay_html = f"""
|
304 |
+
<div class='result-section'>
|
305 |
+
<div class='result-title'>β±οΈ Delay Clauses: <strong>{total_delays}</strong> found</div>
|
306 |
+
{format_clauses(delay_counts)}
|
307 |
+
</div>
|
308 |
+
"""
|
309 |
+
|
310 |
+
examples_html = f"""
|
311 |
+
<div class='result-section'>
|
312 |
+
<div class='result-title'>π Example Penalty Clauses</div>
|
313 |
+
{format_examples(penalty_sentences)}
|
314 |
+
</div>
|
315 |
+
"""
|
316 |
|
|
|
317 |
return [
|
318 |
risk_display,
|
319 |
+
penalty_html,
|
320 |
+
amounts_html,
|
321 |
+
obligation_html,
|
322 |
+
delay_html,
|
323 |
+
examples_html
|
|
|
|
|
|
|
324 |
]
|
325 |
except Exception as e:
|
326 |
+
error_html = f"""
|
327 |
+
<div class='result-section' style='background: #fff3cd;'>
|
328 |
+
<div class='result-title'>β Error</div>
|
329 |
+
<div>{str(e)}</div>
|
330 |
+
</div>
|
331 |
+
"""
|
332 |
+
return [error_html] * 6
|
333 |
|
334 |
# Create Gradio interface
|
335 |
with gr.Blocks(css=css, title="PDF Contract Risk Analyzer") as demo:
|
336 |
+
gr.Markdown("""
|
337 |
+
<div style='text-align: center; margin-bottom: 30px;'>
|
338 |
+
<h1 style='margin-bottom: 10px;'>π PDF Contract Risk Analyzer</h1>
|
339 |
+
<p style='color: #6c757d;'>Upload a contract PDF to analyze penalties, obligations, and delays</p>
|
340 |
+
</div>
|
341 |
+
""")
|
342 |
|
343 |
with gr.Row():
|
344 |
+
with gr.Column(scale=1):
|
345 |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
346 |
+
submit_btn = gr.Button("Analyze Contract", variant="primary")
|
347 |
|
348 |
+
with gr.Column(scale=3):
|
349 |
+
gr.Markdown("### π Risk Assessment Summary")
|
350 |
+
risk_display = gr.HTML()
|
|
|
|
|
|
|
|
|
351 |
|
352 |
with gr.Row():
|
353 |
with gr.Column():
|
354 |
+
penalty_count = gr.HTML()
|
355 |
+
penalty_amounts = gr.HTML()
|
|
|
356 |
|
357 |
with gr.Column():
|
358 |
+
obligation_count = gr.HTML()
|
359 |
+
delay_count = gr.HTML()
|
|
|
|
|
|
|
|
|
360 |
|
361 |
+
penalty_examples = gr.HTML()
|
|
|
|
|
362 |
|
363 |
submit_btn.click(
|
364 |
fn=analyze_pdf,
|
365 |
inputs=file_input,
|
366 |
+
outputs=[risk_display, penalty_count, penalty_amounts,
|
367 |
+
obligation_count, delay_count, penalty_examples]
|
|
|
368 |
)
|
369 |
|
370 |
if __name__ == "__main__":
|