Tirath5504 commited on
Commit
30646ea
Β·
verified Β·
1 Parent(s): 6074ede

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +390 -0
app.py ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import PyPDF2
4
+ from groq import Groq
5
+ import smtplib
6
+ from fpdf import FPDF
7
+ from email.mime.multipart import MIMEMultipart
8
+ from email.mime.text import MIMEText
9
+ from email.mime.base import MIMEBase
10
+ from email import encoders
11
+ import matplotlib.pyplot as plt
12
+ import numpy as np
13
+ import spacy
14
+
15
+ client = Groq()
16
+
17
+ try:
18
+ nlp = spacy.load("en_core_web_sm")
19
+ except OSError:
20
+ import os
21
+ os.system("python -m spacy download en_core_web_sm")
22
+ nlp = spacy.load("en_core_web_sm")
23
+
24
+ # Global variables
25
+ generated_summaries = {}
26
+
27
+ # Regulatory data
28
+ RISK_KEYWORDS = [
29
+ "penalty", "breach", "liability", "default", "hidden obligations",
30
+ "indemnity", "terms of service", "non-compliance", "force majeure"
31
+ ]
32
+
33
+ REGULATORY_UPDATES = [
34
+ {
35
+ "name": "GDPR",
36
+ "last_updated": "2023-12-01",
37
+ "description": "General Data Protection Regulation updates on user consent.",
38
+ "status": "Compliant",
39
+ },
40
+ {
41
+ "name": "PCI DSS",
42
+ "last_updated": "2024-01-01",
43
+ "description": "Payment Card Industry Data Security Standard updates for credit card transactions.",
44
+ "status": "Pending Review",
45
+ },
46
+ ]
47
+
48
+ REGULATORY_CATEGORIES = [
49
+ {
50
+ "category": "Data Privacy",
51
+ "keywords": ["personal data", "user consent", "data breach", "GDPR", "data protection"],
52
+ "description": "Regulations related to user data privacy and protection.",
53
+ },
54
+ {
55
+ "category": "Financial Compliance",
56
+ "keywords": ["payment card", "PCI DSS", "credit card security", "financial transactions"],
57
+ "description": "Regulations related to financial data security and transactions.",
58
+ },
59
+ {
60
+ "category": "Health Information Compliance",
61
+ "keywords": ["HIPAA", "health records", "patient data", "medical privacy"],
62
+ "description": "Regulations related to the security and privacy of health information.",
63
+ },
64
+ ]
65
+
66
+ def extract_text_from_pdf(file_path):
67
+ with open(file_path, "rb") as file:
68
+ reader = PyPDF2.PdfReader(file)
69
+ text = ""
70
+ for page_text in reader.pages:
71
+ text += page_text.extract_text()
72
+ return text
73
+
74
+ def extract_text_from_txt(file_path):
75
+ with open(file_path, "r", encoding="utf-8") as file:
76
+ return file.read()
77
+
78
+ def split_text_into_chunks(text, chunk_size=2000):
79
+ return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
80
+
81
+ def process_with_groq(text_chunks, task_type):
82
+ results = []
83
+ for chunk in text_chunks:
84
+ prompt = f"Task: {task_type}\n\nDocument:\n{chunk}\n\nOutput:"
85
+ try:
86
+ response = client.chat.completions.create(
87
+ messages=[{"role": "user", "content": prompt}],
88
+ model="llama-3.3-70b-versatile",
89
+ )
90
+ if hasattr(response, "choices") and response.choices:
91
+ results.append(response.choices[0].message.content)
92
+ else:
93
+ results.append("No response received from the model.")
94
+ except Exception as e:
95
+ results.append(f"Error: {str(e)}")
96
+ return " ".join(results)
97
+
98
+ def detect_risks(text_chunks):
99
+ risks_found = []
100
+ for chunk_index, chunk in enumerate(text_chunks):
101
+ if not chunk.strip():
102
+ continue
103
+
104
+ for keyword in RISK_KEYWORDS:
105
+ if keyword.lower() in chunk.lower():
106
+ risks_found.append(f"Risk detected: '{keyword}' in text: {chunk[:150]}...")
107
+
108
+ if not risks_found:
109
+ risks_found.append("No risks detected.")
110
+
111
+ return risks_found
112
+
113
+ def check_regulatory_compliance(text_chunks):
114
+ compliance_issues = []
115
+
116
+ for chunk in text_chunks:
117
+ # Check for regulatory updates
118
+ for update in REGULATORY_UPDATES:
119
+ if update["name"].lower() in chunk.lower():
120
+ compliance_issues.append(
121
+ f"Regulatory Update Detected: {update['name']} - {update['description']}\n"
122
+ f"Status: {update.get('status', 'Unknown')} | Last Updated: {update.get('last_updated', 'N/A')}"
123
+ )
124
+
125
+ # Add a fallback message if no updates are found
126
+ if not compliance_issues:
127
+ compliance_issues.append("No regulatory issues detected.")
128
+
129
+ return compliance_issues
130
+
131
+ def check_regulatory_compliance_with_nlp(text_chunks):
132
+ compliance_issues = []
133
+
134
+ for chunk in text_chunks:
135
+ doc = nlp(chunk)
136
+ for category in REGULATORY_CATEGORIES:
137
+ for keyword in category["keywords"]:
138
+ if keyword.lower() in doc.text.lower():
139
+ compliance_issues.append(
140
+ f"Detected Regulatory Category: {category['category']} - {category['description']}"
141
+ )
142
+
143
+ # Add a fallback message if no categories are matched
144
+ if not compliance_issues:
145
+ compliance_issues.append("No regulatory categories detected.")
146
+
147
+ return compliance_issues
148
+
149
+ def answer_question_with_summary(summary, question):
150
+ prompt = f"Context:\n{summary}\n\nQuestion: {question}\n\nAnswer:"
151
+ try:
152
+ response = client.chat.completions.create(
153
+ messages=[{"role": "user", "content": prompt}],
154
+ model="llama-3.3-70b-versatile",
155
+ )
156
+ if hasattr(response, "choices") and response.choices:
157
+ return response.choices[0].message.content.strip()
158
+ else:
159
+ return "No response received from the model."
160
+ except Exception as e:
161
+ return f"Error: {str(e)}"
162
+
163
+ def send_email(receiver_email, email_subject, sender_email, sender_password, email_body, pdf_filename):
164
+ try:
165
+ # Create PDF
166
+ pdf = FPDF()
167
+ pdf.add_page()
168
+ pdf.set_font("Arial", size=12)
169
+ pdf.multi_cell(190, 10, email_body)
170
+ pdf.output(pdf_filename)
171
+
172
+ # Set up email
173
+ server = smtplib.SMTP("smtp.gmail.com", 587)
174
+ server.starttls()
175
+ server.login(sender_email, sender_password)
176
+
177
+ message = f"""From: {sender_email}
178
+ To: {receiver_email}
179
+ Subject: {email_subject}\n
180
+ Please find the attached PDF summary for your review.
181
+ """
182
+
183
+ msg = MIMEMultipart()
184
+ msg['From'] = sender_email
185
+ msg['To'] = receiver_email
186
+ msg['Subject'] = email_subject
187
+ msg.attach(MIMEText(message, 'plain'))
188
+
189
+ # Attach PDF
190
+ with open(pdf_filename, "rb") as pdf_file:
191
+ attachment = pdf_file.read()
192
+
193
+ part = MIMEBase('application', 'octet-stream')
194
+ part.set_payload(attachment)
195
+ encoders.encode_base64(part)
196
+ part.add_header(
197
+ "Content-Disposition",
198
+ f"attachment; filename={pdf_filename}",
199
+ )
200
+ msg.attach(part)
201
+
202
+ server.send_message(msg)
203
+ server.quit()
204
+
205
+ os.remove(pdf_filename)
206
+ return "πŸ“§ Email sent successfully with the attached PDF!"
207
+ except Exception as e:
208
+ return f"Failed to send email: {str(e)}"
209
+
210
+ def process_files(files, tasks):
211
+ if not files or not tasks:
212
+ return "Please upload files and select tasks."
213
+
214
+ results = {}
215
+ all_text_chunks = {}
216
+
217
+ for file in files:
218
+ file_name = os.path.basename(file.name)
219
+ file_extension = os.path.splitext(file_name)[1].lower()
220
+
221
+ if file_extension == '.pdf':
222
+ text = extract_text_from_pdf(file.name)
223
+ elif file_extension == '.txt':
224
+ text = extract_text_from_txt(file.name)
225
+ else:
226
+ results[file_name] = "Unsupported file format"
227
+ continue
228
+
229
+ text_chunks = split_text_into_chunks(text)
230
+ all_text_chunks[file_name] = text_chunks
231
+ file_results = {}
232
+
233
+ for task in tasks:
234
+ if task == "Risk Detection":
235
+ file_results[task] = "\n".join(detect_risks(text_chunks))
236
+ elif task == "Regulatory Update Tracker":
237
+ compliance_issues = check_regulatory_compliance(text_chunks)
238
+ category_issues = check_regulatory_compliance_with_nlp(text_chunks)
239
+ file_results[task] = "\n".join(compliance_issues) + "\n\n" + "\n".join(category_issues)
240
+ else:
241
+ file_results[task] = process_with_groq(text_chunks, task)
242
+
243
+ results[file_name] = file_results
244
+ generated_summaries[file_name] = file_results
245
+
246
+ return results, all_text_chunks
247
+
248
+ def display_results(results):
249
+ if isinstance(results, str):
250
+ return results
251
+
252
+ output = ""
253
+ for file_name, file_results in results.items():
254
+ output += f"## Results for {file_name}:\n\n"
255
+ for task, result in file_results.items():
256
+ output += f"### Task: {task}\n\n{result}\n\n---\n\n"
257
+
258
+ return output
259
+
260
+ def email_summary(file_task_selection, receiver_email, email_subject, sender_email, sender_password):
261
+ if not file_task_selection or not receiver_email or not sender_email or not sender_password:
262
+ return "Please fill in all required fields."
263
+
264
+ try:
265
+ file_name, task = file_task_selection.split(" - ")
266
+ email_body = generated_summaries[file_name][task]
267
+ pdf_filename = f"{file_name}_{task}.pdf"
268
+
269
+ result = send_email(receiver_email, email_subject, sender_email, sender_password, email_body, pdf_filename)
270
+ return result
271
+ except Exception as e:
272
+ return f"Error: {str(e)}"
273
+
274
+ def answer_questions(file_name, question):
275
+ if not file_name or not question:
276
+ return "Please select a file and enter a question."
277
+
278
+ if file_name in generated_summaries and "Summarize" in generated_summaries[file_name]:
279
+ summary = generated_summaries[file_name]["Summarize"]
280
+ answer = answer_question_with_summary(summary, question)
281
+ return answer
282
+ else:
283
+ return "No summary available for the selected file."
284
+
285
+ with gr.Blocks(title="AI-Driven Legal Document Analysis") as app:
286
+ gr.Markdown("""
287
+ # πŸ“œ Advanced AI-Driven Legal Document Summarization and Risk Assessment
288
+
289
+ **Welcome to the Enhanced Legal Document Assistant!**
290
+ - πŸ“œ Provide readable legal summaries
291
+ - πŸ“‘ Extract key clauses from legal documents
292
+ - βš–οΈ Detect potential legal risks
293
+ - πŸ“° Track regulatory updates
294
+ - βœ‰οΈ Send summaries directly via email
295
+ - ❓ Ask Questions Based on the Summary
296
+ """)
297
+
298
+ with gr.Tab("Process Documents"):
299
+ with gr.Row():
300
+ with gr.Column():
301
+ files = gr.File(label="Upload PDFs or Text Files", file_types=["pdf", "txt"], multiple=True)
302
+ task_checkboxes = gr.CheckboxGroup(
303
+ ["Summarize", "Extract Clauses", "Risk Detection", "Regulatory Update Tracker"],
304
+ label="Choose Tasks"
305
+ )
306
+ process_btn = gr.Button("Process Documents")
307
+
308
+ with gr.Column():
309
+ progress = gr.Plot(label="Processing Progress")
310
+ results_md = gr.Markdown(label="Results")
311
+
312
+ with gr.Tab("Email Summary"):
313
+ with gr.Row():
314
+ with gr.Column():
315
+ file_task_dropdown = gr.Dropdown(label="Select a Task Summary to Send", choices=[])
316
+ receiver_email = gr.Textbox(label="Receiver Email")
317
+ email_subject = gr.Textbox(label="Email Subject", value="Legal Document Summary")
318
+ sender_email = gr.Textbox(label="Sender Email (Gmail)", value="[email protected]")
319
+ sender_password = gr.Textbox(label="Sender Email Password", type="password")
320
+ send_email_btn = gr.Button("Send Email")
321
+
322
+ with gr.Column():
323
+ email_result = gr.Textbox(label="Email Status")
324
+
325
+ with gr.Tab("Ask Questions"):
326
+ with gr.Row():
327
+ with gr.Column():
328
+ file_dropdown = gr.Dropdown(label="Select a File Summary", choices=[])
329
+ question = gr.Textbox(label="Ask a Question")
330
+ ask_btn = gr.Button("Get Answer")
331
+
332
+ with gr.Column():
333
+ answer = gr.Textbox(label="Answer")
334
+
335
+ def process_with_progress(files, tasks):
336
+ if not files or not tasks:
337
+ return None, "Please upload files and select tasks."
338
+
339
+ # Create progress visualization
340
+ progress_data = []
341
+ fig, ax = plt.subplots(figsize=(8, 4))
342
+ ax.set_title("Processing Progress")
343
+ ax.set_xlabel("Steps")
344
+ ax.set_ylabel("Progress (%)")
345
+ ax.grid(True)
346
+
347
+ total_steps = len(files) * len(tasks)
348
+ current_step = 0
349
+
350
+ results, all_text_chunks = process_files(files, tasks)
351
+
352
+ file_task_choices = [f"{file_name} - {task}" for file_name, file_results in results.items() for task in file_results.keys()]
353
+ file_choices = list(results.keys())
354
+
355
+ # For each step, update progress
356
+ for i in range(total_steps):
357
+ current_step += 1
358
+ progress = (current_step / total_steps) * 100
359
+ progress_data.append(progress)
360
+
361
+ ax.clear()
362
+ ax.plot(progress_data, color="blue", marker="o")
363
+ ax.set_title("Processing Progress")
364
+ ax.set_xlabel("Steps")
365
+ ax.set_ylabel("Progress (%)")
366
+ ax.set_ylim(0, 100)
367
+ ax.grid(True)
368
+
369
+ yield fig, display_results(results), gr.Dropdown.update(choices=file_task_choices), gr.Dropdown.update(choices=file_choices)
370
+
371
+ process_btn.click(
372
+ process_with_progress,
373
+ inputs=[files, task_checkboxes],
374
+ outputs=[progress, results_md, file_task_dropdown, file_dropdown]
375
+ )
376
+
377
+ send_email_btn.click(
378
+ email_summary,
379
+ inputs=[file_task_dropdown, receiver_email, email_subject, sender_email, sender_password],
380
+ outputs=[email_result]
381
+ )
382
+
383
+ ask_btn.click(
384
+ answer_questions,
385
+ inputs=[file_dropdown, question],
386
+ outputs=[answer]
387
+ )
388
+
389
+ if __name__ == "__main__":
390
+ app.launch()