Spaces:

eberhenriquez94
/

OCR

Sleeping

OCR / app.py

bc69584 verified 4 months ago

4.92 kB

	import gradio as gr
	import subprocess
	import logging
	from PyPDF2 import PdfReader
	import tempfile
	import os
	import shlex
	from gradio_pdf import PDF
	from pdf2image import convert_from_path # Importar para la función de mostrar_paginas

	# Configuración de logs
	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

	def verificar_comando(comando):
	"""Verifica si un comando está disponible en el sistema."""
	try:
	subprocess.run(f"which {comando}", shell=True, check=True, capture_output=True)
	logger.info(f"Comando encontrado: {comando}")
	except subprocess.CalledProcessError:
	raise gr.Error(f"El comando '{comando}' no está disponible. Por favor, instálalo antes de continuar.")

	def ejecutar_comando(comando):
	"""Ejecuta un comando de shell y maneja errores."""
	try:
	resultado = subprocess.run(comando, shell=True, check=True, capture_output=True, text=True)
	logger.info(f"Comando ejecutado: {comando}\nSalida:\n{resultado.stdout}")
	return resultado.stdout
	except subprocess.CalledProcessError as e:
	error_message = f"Error al ejecutar el comando: {comando}\nError: {e}\nSalida de error:\n{e.stderr}"
	logger.error(error_message)
	raise RuntimeError(error_message)

	def reparar_pdf(input_pdf, output_pdf):
	"""Repara un PDF usando qpdf."""
	verificar_comando("qpdf")
	comando = f"qpdf --linearize {shlex.quote(input_pdf)} {shlex.quote(output_pdf)}"
	ejecutar_comando(comando)

	def simplificar_pdf(input_pdf, output_pdf):
	"""Simplifica un PDF usando Ghostscript."""
	verificar_comando("gs")
	comando = f"gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dBATCH -sOutputFile={shlex.quote(output_pdf)} {shlex.quote(input_pdf)}"
	ejecutar_comando(comando)

	def crear_pdf_con_texto_incrustado(pdf_original, archivo_salida, idioma="spa"):
	"""Procesa un PDF con OCR usando OCRmyPDF."""
	verificar_comando("ocrmypdf")
	comando = f"ocrmypdf -l {idioma} --force-ocr --deskew --output-type pdf {shlex.quote(pdf_original)} {shlex.quote(archivo_salida)}"
	ejecutar_comando(comando)

	def flujo_principal(pdf_file, idioma="spa"):
	"""Procesa un PDF subido con reparación, simplificación y OCR."""
	if not pdf_file:
	raise gr.Error("No se subió ningún archivo.")

	input_pdf = pdf_file
	reparado_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
	simplificado_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
	output_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name

	try:
	# Reparar el PDF
	reparar_pdf(input_pdf, reparado_pdf)

	# Simplificar el PDF
	simplificar_pdf(reparado_pdf, simplificado_pdf)

	# Procesar con OCR
	crear_pdf_con_texto_incrustado(simplificado_pdf, output_pdf, idioma)

	return input_pdf, output_pdf
	except gr.Error as e:
	logger.error("Error durante el procesamiento del PDF.")
	raise e
	finally:
	# Limpiar archivos temporales
	for temp_file in [reparado_pdf, simplificado_pdf]:
	if os.path.exists(temp_file):
	os.remove(temp_file)

	def mostrar_pdf_zoom(pdf_path):
	return f"""
	<iframe
	src='viewer.html?file={pdf_path}#zoom=100'
	width='100%'
	height='800px'
	style='border: none;'
	></iframe>
	"""

	def mostrar_pdf(pdf_path):
	return f"""
	<iframe
	src='viewer.html?file={pdf_path}'
	width='100%'
	height='800px'
	style='border: none;'
	></iframe>
	"""

	def mostrar_paginas(pdf_path):
	pages = convert_from_path(pdf_path, dpi=150)
	temp_images = []
	for i, page in enumerate(pages):
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
	page.save(temp_file.name, "PNG")
	temp_images.append(temp_file.name)
	return temp_images

	with gr.Blocks() as interfaz:
	gr.Markdown("## Visualizador de PDFs con OCR")

	with gr.Row():
	archivo_pdf = PDF(label="Sube tu archivo PDF")
	idioma_ocr = gr.Dropdown(["spa", "eng", "fra", "deu"], label="Idioma OCR", value="spa")
	boton_procesar = gr.Button("Procesar OCR")

	with gr.Row():
	pdf_original_vista = gr.HTML(label="PDF Original")
	pdf_ocr_vista = gr.HTML(label="PDF con OCR")
	# Eliminamos el método `style` y configuramos las columnas y altura mediante atributos predefinidos
	imagenes = gr.Gallery(label="Páginas del PDF", show_label=True, elem_id="gallery-container")

	boton_procesar.click(fn=flujo_principal, inputs=[archivo_pdf, idioma_ocr], outputs=[pdf_original_vista, pdf_ocr_vista])
	boton_procesar.click(fn=mostrar_paginas, inputs=[archivo_pdf], outputs=[imagenes])

	if __name__ == "__main__":
	interfaz.launch()