csv_test / app.py
JasonData's picture
Update app.py
422beae verified
raw
history blame
1.67 kB
import os
import gradio as gr
import csv
import fitz # PyMuPDF
def pdf_to_csv(pdf_file):
# Open the uploaded PDF file (pdf_file is a TemporaryFile)
# pdf_reader = PyPDF2.PdfReader(pdf_file.name)
text_lines = []
file_name = os.path.basename(pdf_file.name)
text_lines.append(f"File Name: {file_name}")
text_lines.append(' 地区 (Region): 2010\n* 收入/支出金额 (Income/Expense Amount): +10,000.00\n* ')
csv_filename = "extracted_text.csv"
# Write each line into the CSV file (each line in its own row)
with open(csv_filename, "w", newline="", encoding="utf-8-sig") as csvfile:
writer = csv.writer(csvfile)
for line in text_lines:
writer.writerow([line])
# Return the CSV file path so Gradio can offer it as a download
return csv_filename
def pdf_to_pngs(pdf_file):
# Open the PDF
doc = fitz.open(pdf_file)
pix = None
outputs = []
# Loop through each page and save as PNG
for page_num in range(doc.page_count):
page = doc.load_page(page_num) # Get the page
pix = page.get_pixmap() # Get the image of the page
output_path = f'page_{page_num + 1}.png'
pix.save(output_path) # Save as PNG
print(f'Saved {output_path}')
outputs.append(output_path)
return outputs
# Create a simple single-page Gradio interface
demo = gr.Interface(
fn=pdf_to_pngs,
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
outputs=gr.File(label="Download CSV"),
title="PDF to CSV Converter",
description="Upload a PDF file, extract its text line-by-line, and download a CSV."
)
demo.launch()