Sunab commited on
Commit
8c6e2dd
·
1 Parent(s): a25e0db

Add application file

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import io
3
+ import fitz # PyMuPDF
4
+
5
+ def redact_submission_ids(input_pdf):
6
+ """Redacts Submission IDs and places a white rectangle above 'Document Details' on the first page."""
7
+ filename = input_pdf.name # Extract the original filename
8
+ doc = fitz.open(input_pdf)
9
+
10
+ # Pattern to find Submission IDs in the format trn:oid:::
11
+ pattern = r"Submission ID trn:oid:::\d+:\d+"
12
+
13
+ for page_num, page in enumerate(doc):
14
+ # Redact Submission IDs
15
+ text_instances = page.search_for("Submission ID trn:oid:::")
16
+ for inst in text_instances:
17
+ rect = fitz.Rect(inst.x0, inst.y0, inst.x1 + 100, inst.y1) # Expand width as needed
18
+ page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle
19
+
20
+ # If it's the first page, place a white rectangle above "Document Details"
21
+ if page_num == 0:
22
+ details_instances = page.search_for("Document Details")
23
+ for inst in details_instances:
24
+ # Draw above "Document Details" with increased width
25
+ rect = fitz.Rect(0, inst.y0 - 50, page.rect.x1, inst.y0) # Extend width fully
26
+ page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle
27
+
28
+ # Save the redacted PDF to an in-memory byte stream
29
+ output_pdf = io.BytesIO()
30
+ doc.save(output_pdf)
31
+ output_pdf.seek(0)
32
+
33
+ return output_pdf, filename
34
+
35
+ # Create Gradio Interface
36
+ iface = gr.Interface(
37
+ fn=redact_submission_ids,
38
+ inputs=gr.inputs.File(label="Upload PDF"),
39
+ outputs=[gr.outputs.File(label="Download Redacted PDF"), gr.outputs.Textbox(label="File Name")],
40
+ live=False,
41
+ title="Redact Submission IDs"
42
+ )
43
+
44
+ if __name__ == '__main__':
45
+ iface.launch(debug=True)