import gradio as gr import io import fitz # PyMuPDF def redact_submission_ids(input_pdf): """Redacts Submission IDs and places a white rectangle above 'Document Details' on the first page.""" filename = input_pdf.name # Extract the original filename doc = fitz.open(input_pdf) # Pattern to find Submission IDs in the format trn:oid::: pattern = r"Submission ID trn:oid:::\d+:\d+" for page_num, page in enumerate(doc): # Redact Submission IDs text_instances = page.search_for("Submission ID trn:oid:::") for inst in text_instances: rect = fitz.Rect(inst.x0, inst.y0, inst.x1 + 100, inst.y1) # Expand width as needed page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle # If it's the first page, place a white rectangle above "Document Details" if page_num == 0: details_instances = page.search_for("Document Details") for inst in details_instances: # Draw above "Document Details" with increased width rect = fitz.Rect(0, inst.y0 - 50, page.rect.x1, inst.y0) # Extend width fully page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle # Save the redacted PDF to an in-memory byte stream output_pdf = io.BytesIO() doc.save(output_pdf) output_pdf.seek(0) return output_pdf, filename # Create Gradio Interface iface = gr.Interface( fn=redact_submission_ids, inputs=gr.File(label="Upload PDF"), # Updated from gr.inputs.File outputs=[gr.File(label="Download Redacted PDF"), gr.Textbox(label="File Name")], # Updated from gr.outputs.File live=False, title="Redact Submission IDs" ) if __name__ == '__main__': iface.launch(debug=True)