File size: 5,215 Bytes
74f6a97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import json
import tempfile
from fastapi import FastAPI, UploadFile, File, HTTPException
from paddleocr import PPStructure
import logging
import paddle
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize FastAPI app
app = FastAPI()
# Global variable for OCR engine
ocr_engine = None
# Function to initialize the PaddleOCR engine based on GPU availability
def init_ocr_engine():
global ocr_engine
if ocr_engine is None:
use_gpu = is_gpu_available()
if use_gpu:
logger.info("NVIDIA GPU detected, running PaddleOCR on GPU.")
else:
logger.info("No GPU detected, running PaddleOCR on CPU.")
# Initialize the OCR engine with the use_gpu variable
ocr_engine = PPStructure(
table=True,
ocr=True,
show_log=True,
layout_score_threshold=0.1,
structure_version='PP-StructureV2',
use_gpu=use_gpu
)
return ocr_engine
# Function to check for GPU availability using Paddle
def is_gpu_available():
# Check if PaddlePaddle is compiled with CUDA and if a CUDA device is available
return paddle.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0
# Function to perform OCR and save the structured result
def perform_ocr_and_save(pdf_path, save_folder='./output'):
# Initialize PaddleOCR engine
ocr_engine = init_ocr_engine()
# Directly pass the PDF to PaddleOCR
result = ocr_engine(pdf_path)
if not result:
logger.error(f"OCR failed for {pdf_path}")
return result
# Function to format results to strings and sort them
def format_to_strings_and_sort(results):
logger.info("Formatting and sorting OCR results.")
formatted_data = []
for idx, elements in enumerate(results):
for element in elements:
type = element['type']
bbox = element['bbox']
responses = element['res']
if type != 'table':
for response in responses:
y_coordinate = bbox[1] # Use y1 coordinate for sorting
formatted_data.append({
'page_num': idx + 1,
'type': type,
'text': response['text'],
'confidence': response['confidence'],
'bbox': bbox,
'y_coordinate': y_coordinate # Add y-coordinate for sorting
})
else:
formatted_data.append({
'page_num': idx + 1,
'type': type,
'html': responses['html'],
'bbox': bbox,
'y_coordinate': bbox[1] # Use bbox y1 for sorting
})
sorted_data = sorted(formatted_data, key=lambda x: (x['page_num'], x['y_coordinate']))
logger.info("Sorting completed.")
return sorted_data
# Function to save results to a JSON file
def save_to_json(data, filename):
logger.info(f"Saving sorted results to {filename}.")
with open(filename, "w") as json_file:
json.dump(data, json_file, indent=4)
# FastAPI endpoint to process uploaded PDF
@app.post("/process-ocr/")
async def process_ocr(file: UploadFile = File(...)):
try:
# Validate file type
if file.content_type != "application/pdf":
logger.warning(f"Invalid file type uploaded: {file.content_type}")
raise HTTPException(status_code=400, detail="Invalid file type. Please upload a PDF file.")
# Create a temporary file to store the uploaded PDF
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
contents = await file.read()
temp_file.write(contents)
temp_file_path = temp_file.name
logger.info(f"Temporary file created at: {temp_file_path}")
# Perform OCR and save results
result = perform_ocr_and_save(temp_file_path)
if result is None:
raise HTTPException(status_code=500, detail="OCR processing failed. Check the input file.")
# Sort and format the results
result_json = format_to_strings_and_sort(result)
# Optionally, save the result JSON to a file (for debugging)
save_to_json(result_json, 'result_json.json')
# Return sorted result as JSON
return result_json
except Exception as e:
logger.error(f"An error occurred during OCR processing: {e}")
raise HTTPException(status_code=500, detail="An error occurred during OCR processing.")
finally:
# Clean up the temporary file
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
logger.info(f"Temporary file {temp_file_path} deleted.")
# Endpoint to check if GPU is available
@app.get("/check-gpu/")
def check_gpu():
if is_gpu_available():
return {"gpu_available": True, "message": "NVIDIA GPU is available and will be used."}
else:
return {"gpu_available": False, "message": "NVIDIA GPU is not available, using CPU instead."} |