Spaces:

AIAcceleratorLab
/

ocr

Sleeping

ocr / image_route.py

msmhmorsi

change to v1

68f98f8 7 months ago

4.72 kB

	import cv2
	import fitz
	import numpy as np
	from io import BytesIO
	import matplotlib.pyplot as plt
	from skimage.color import rgb2gray
	from skimage.measure import label, regionprops
	from fastapi import APIRouter, UploadFile, File, HTTPException
	from fastapi.responses import StreamingResponse

	router = APIRouter()

	def convert_and_process_pdf(pdf_content: bytes, area_threshold: int = 100) -> BytesIO:
	"""
	Convert the first page of a PDF to a PNG and apply image enhancement.
	Args:
	pdf_content: The PDF file content as bytes.
	area_threshold: Threshold for area filtering (default: 100).
	Returns:
	BytesIO: Enhanced PNG image content.
	"""
	# Open the PDF from bytes
	doc = fitz.open(stream=pdf_content, filetype="pdf")

	# Load the first page
	page = doc.load_page(0)

	# Render the page as an image
	pix = page.get_pixmap(dpi=300)
	png_image = pix.tobytes("png")

	# Load the image with OpenCV
	np_array = np.frombuffer(png_image, dtype=np.uint8)
	img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)

	# Convert to grayscale
	img_gray = rgb2gray(img)

	# Convert grayscale to binary using Otsu's threshold
	_, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

	# Invert the binary image
	img_binary = ~img_binary

	# Label connected components
	label_img = label(img_binary)
	regions = regionprops(label_img)

	# Filter by area threshold
	valid_labels = [region.label for region in regions if region.area >= area_threshold]
	img_filtered = np.isin(label_img, valid_labels)

	# Save enhanced image to memory
	output_buffer = BytesIO()
	plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
	output_buffer.seek(0)
	return output_buffer

	@router.post("/process-pdf/")
	async def process_pdf(
	file: UploadFile = File(...),
	area_threshold: int = 100
	):
	"""
	Process a PDF file and return an enhanced PNG image.
	Args:
	file: The PDF file to process
	area_threshold: Threshold for area filtering (default: 100)
	Returns:
	StreamingResponse: Enhanced PNG image
	"""
	try:
	# Read PDF file content
	pdf_content = await file.read()

	# Process the PDF and get the enhanced image
	enhanced_image = convert_and_process_pdf(pdf_content, area_threshold)

	# Return the processed image as a StreamingResponse
	return StreamingResponse(
	enhanced_image,
	media_type="image/png",
	headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
	)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")

	@router.post("/process-image/")
	async def process_image(
	file: UploadFile = File(...),
	area_threshold: int = 100
	):
	"""
	Process an image file and return an enhanced image.
	Args:
	file: The image file to process
	area_threshold: Threshold for area filtering (default: 100)
	Returns:
	StreamingResponse: Enhanced image
	"""
	try:
	# Read image file content
	image_content = await file.read()

	# Convert to numpy array
	np_array = np.frombuffer(image_content, dtype=np.uint8)
	img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)

	# Convert to grayscale
	img_gray = rgb2gray(img)

	# Convert grayscale to binary using Otsu's threshold
	_, img_binary = cv2.threshold((img_gray * 255).astype(np.uint8), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

	# Invert the binary image
	img_binary = ~img_binary

	# Label connected components
	label_img = label(img_binary)
	regions = regionprops(label_img)

	# Filter by area threshold
	valid_labels = [region.label for region in regions if region.area >= area_threshold]
	img_filtered = np.isin(label_img, valid_labels)

	# Save enhanced image to memory
	output_buffer = BytesIO()
	plt.imsave(output_buffer, ~img_filtered, cmap="gray", format="png")
	output_buffer.seek(0)

	# Return the processed image as a StreamingResponse
	return StreamingResponse(
	output_buffer,
	media_type="image/png",
	headers={"Content-Disposition": f"attachment; filename={file.filename.rsplit('.', 1)[0]}_enhanced.png"}
	)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")