Spaces:
Sleeping
Sleeping
File size: 1,784 Bytes
8c6e2dd eff8079 8c6e2dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
import io
import fitz # PyMuPDF
def redact_submission_ids(input_pdf):
"""Redacts Submission IDs and places a white rectangle above 'Document Details' on the first page."""
filename = input_pdf.name # Extract the original filename
doc = fitz.open(input_pdf)
# Pattern to find Submission IDs in the format trn:oid:::
pattern = r"Submission ID trn:oid:::\d+:\d+"
for page_num, page in enumerate(doc):
# Redact Submission IDs
text_instances = page.search_for("Submission ID trn:oid:::")
for inst in text_instances:
rect = fitz.Rect(inst.x0, inst.y0, inst.x1 + 100, inst.y1) # Expand width as needed
page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle
# If it's the first page, place a white rectangle above "Document Details"
if page_num == 0:
details_instances = page.search_for("Document Details")
for inst in details_instances:
# Draw above "Document Details" with increased width
rect = fitz.Rect(0, inst.y0 - 50, page.rect.x1, inst.y0) # Extend width fully
page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle
# Save the redacted PDF to an in-memory byte stream
output_pdf = io.BytesIO()
doc.save(output_pdf)
output_pdf.seek(0)
return output_pdf, filename
# Create Gradio Interface
iface = gr.Interface(
fn=redact_submission_ids,
inputs=gr.File(label="Upload PDF"), # Updated from gr.inputs.File
outputs=[gr.File(label="Download Redacted PDF"), gr.Textbox(label="File Name")], # Updated from gr.outputs.File
live=False,
title="Redact Submission IDs"
)
if __name__ == '__main__':
iface.launch(debug=True)
|