Spaces:

JasonData
/

csv_test

Runtime error

csv_test / app.py

Update app.py

422beae verified 4 months ago

1.67 kB



	import os

	import gradio as gr
	import csv
	import fitz # PyMuPDF


	def pdf_to_csv(pdf_file):
	# Open the uploaded PDF file (pdf_file is a TemporaryFile)
	# pdf_reader = PyPDF2.PdfReader(pdf_file.name)
	text_lines = []

	file_name = os.path.basename(pdf_file.name)

	text_lines.append(f"File Name: {file_name}")
	text_lines.append(' 地区 (Region): 2010\n* 收入/支出金额 (Income/Expense Amount): +10,000.00\n* ')
	csv_filename = "extracted_text.csv"
	# Write each line into the CSV file (each line in its own row)
	with open(csv_filename, "w", newline="", encoding="utf-8-sig") as csvfile:
	writer = csv.writer(csvfile)
	for line in text_lines:
	writer.writerow([line])

	# Return the CSV file path so Gradio can offer it as a download
	return csv_filename


	def pdf_to_pngs(pdf_file):
	# Open the PDF
	doc = fitz.open(pdf_file)
	pix = None
	outputs = []
	# Loop through each page and save as PNG
	for page_num in range(doc.page_count):
	page = doc.load_page(page_num) # Get the page
	pix = page.get_pixmap() # Get the image of the page

	output_path = f'page_{page_num + 1}.png'
	pix.save(output_path) # Save as PNG
	print(f'Saved {output_path}')
	outputs.append(output_path)
	return outputs

	# Create a simple single-page Gradio interface
	demo = gr.Interface(
	fn=pdf_to_pngs,
	inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
	outputs=gr.File(label="Download CSV"),
	title="PDF to CSV Converter",
	description="Upload a PDF file, extract its text line-by-line, and download a CSV."
	)

	demo.launch()