import os import gradio as gr import csv import fitz # PyMuPDF def pdf_to_csv(pdf_file): # Open the uploaded PDF file (pdf_file is a TemporaryFile) # pdf_reader = PyPDF2.PdfReader(pdf_file.name) text_lines = [] file_name = os.path.basename(pdf_file.name) text_lines.append(f"File Name: {file_name}") text_lines.append(' 地区 (Region): 2010\n* 收入/支出金额 (Income/Expense Amount): +10,000.00\n* ') csv_filename = "extracted_text.csv" # Write each line into the CSV file (each line in its own row) with open(csv_filename, "w", newline="", encoding="utf-8-sig") as csvfile: writer = csv.writer(csvfile) for line in text_lines: writer.writerow([line]) # Return the CSV file path so Gradio can offer it as a download return csv_filename def pdf_to_pngs(pdf_file): # Open the PDF doc = fitz.open(pdf_file) pix = None outputs = [] # Loop through each page and save as PNG for page_num in range(doc.page_count): page = doc.load_page(page_num) # Get the page pix = page.get_pixmap() # Get the image of the page output_path = f'page_{page_num + 1}.png' pix.save(output_path) # Save as PNG print(f'Saved {output_path}') outputs.append(output_path) return outputs # Create a simple single-page Gradio interface demo = gr.Interface( fn=pdf_to_pngs, inputs=gr.File(label="Upload PDF", file_types=[".pdf"]), outputs=gr.File(label="Download CSV"), title="PDF to CSV Converter", description="Upload a PDF file, extract its text line-by-line, and download a CSV." ) demo.launch()