File size: 1,784 Bytes
8c6e2dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eff8079
 
8c6e2dd
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import gradio as gr
import io
import fitz  # PyMuPDF

def redact_submission_ids(input_pdf):
    """Redacts Submission IDs and places a white rectangle above 'Document Details' on the first page."""
    filename = input_pdf.name  # Extract the original filename
    doc = fitz.open(input_pdf)

    # Pattern to find Submission IDs in the format trn:oid:::
    pattern = r"Submission ID trn:oid:::\d+:\d+"

    for page_num, page in enumerate(doc):
        # Redact Submission IDs
        text_instances = page.search_for("Submission ID trn:oid:::")
        for inst in text_instances:
            rect = fitz.Rect(inst.x0, inst.y0, inst.x1 + 100, inst.y1)  # Expand width as needed
            page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1))  # White rectangle

        # If it's the first page, place a white rectangle above "Document Details"
        if page_num == 0:
            details_instances = page.search_for("Document Details")
            for inst in details_instances:
                # Draw above "Document Details" with increased width
                rect = fitz.Rect(0, inst.y0 - 50, page.rect.x1, inst.y0)  # Extend width fully
                page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1))  # White rectangle

    # Save the redacted PDF to an in-memory byte stream
    output_pdf = io.BytesIO()
    doc.save(output_pdf)
    output_pdf.seek(0)

    return output_pdf, filename

# Create Gradio Interface
iface = gr.Interface(
    fn=redact_submission_ids,
    inputs=gr.File(label="Upload PDF"),  # Updated from gr.inputs.File
    outputs=[gr.File(label="Download Redacted PDF"), gr.Textbox(label="File Name")],  # Updated from gr.outputs.File
    live=False,
    title="Redact Submission IDs"
)

if __name__ == '__main__':
    iface.launch(debug=True)