Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

TransformoDocs-Demo / app.py

Update app.py

5523faf verified 3 months ago

1.29 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	import io

	# Function to convert PDF to text (handles both byte data and file uploads)
	def pdf_to_text(file_input):
	# If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding)
	if isinstance(file_input, bytes):
	# Treat it as byte data and convert it to a file-like object
	pdf_file = io.BytesIO(file_input)
	else:
	# If it's a regular PDF file (file upload), open it from the file input
	pdf_file = file_input.name # This will get the file path if it's a regular file upload

	try:
	reader = PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text
	except Exception as e:
	return f"Error while processing the PDF: {str(e)}"

	# Gradio interface: allow both file uploads and byte data input
	iface = gr.Interface(
	fn=pdf_to_text, # Function to call for text extraction
	inputs=gr.File(label="Upload PDF or send Byte data"), # File input
	outputs="text", # Output the extracted text
	title="PDF to Text Conversion",
	description="Upload a PDF file or send byte data (Base64 encoded) to extract its text."
	)

	if __name__ == "__main__":
	iface.launch()