|
|
|
|
|
import os |
|
|
|
import gradio as gr |
|
import csv |
|
import fitz |
|
|
|
|
|
def pdf_to_csv(pdf_file): |
|
|
|
|
|
text_lines = [] |
|
|
|
file_name = os.path.basename(pdf_file.name) |
|
|
|
text_lines.append(f"File Name: {file_name}") |
|
text_lines.append(' 地区 (Region): 2010\n* 收入/支出金额 (Income/Expense Amount): +10,000.00\n* ') |
|
csv_filename = "extracted_text.csv" |
|
|
|
with open(csv_filename, "w", newline="", encoding="utf-8-sig") as csvfile: |
|
writer = csv.writer(csvfile) |
|
for line in text_lines: |
|
writer.writerow([line]) |
|
|
|
|
|
return csv_filename |
|
|
|
|
|
def pdf_to_pngs(pdf_file): |
|
|
|
doc = fitz.open(pdf_file) |
|
pix = None |
|
outputs = [] |
|
|
|
for page_num in range(doc.page_count): |
|
page = doc.load_page(page_num) |
|
pix = page.get_pixmap() |
|
|
|
output_path = f'page_{page_num + 1}.png' |
|
pix.save(output_path) |
|
print(f'Saved {output_path}') |
|
outputs.append(output_path) |
|
return outputs |
|
|
|
|
|
demo = gr.Interface( |
|
fn=pdf_to_pngs, |
|
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]), |
|
outputs=gr.File(label="Download CSV"), |
|
title="PDF to CSV Converter", |
|
description="Upload a PDF file, extract its text line-by-line, and download a CSV." |
|
) |
|
|
|
demo.launch() |
|
|
|
|