|
import os |
|
import json |
|
import tempfile |
|
from fastapi import FastAPI, UploadFile, File, HTTPException |
|
from paddleocr import PPStructure |
|
import logging |
|
import paddle |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
ocr_engine = None |
|
|
|
|
|
def init_ocr_engine(): |
|
global ocr_engine |
|
if ocr_engine is None: |
|
use_gpu = is_gpu_available() |
|
|
|
if use_gpu: |
|
logger.info("NVIDIA GPU detected, running PaddleOCR on GPU.") |
|
else: |
|
logger.info("No GPU detected, running PaddleOCR on CPU.") |
|
|
|
|
|
ocr_engine = PPStructure( |
|
table=True, |
|
ocr=True, |
|
show_log=True, |
|
layout_score_threshold=0.1, |
|
structure_version='PP-StructureV2', |
|
use_gpu=use_gpu |
|
) |
|
|
|
return ocr_engine |
|
|
|
|
|
def is_gpu_available(): |
|
|
|
return paddle.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0 |
|
|
|
|
|
def perform_ocr_and_save(pdf_path, save_folder='./output'): |
|
|
|
ocr_engine = init_ocr_engine() |
|
|
|
result = ocr_engine(pdf_path) |
|
|
|
if not result: |
|
logger.error(f"OCR failed for {pdf_path}") |
|
|
|
return result |
|
|
|
|
|
def format_to_strings_and_sort(results): |
|
logger.info("Formatting and sorting OCR results.") |
|
formatted_data = [] |
|
|
|
for idx, elements in enumerate(results): |
|
for element in elements: |
|
type = element['type'] |
|
bbox = element['bbox'] |
|
responses = element['res'] |
|
|
|
if type != 'table': |
|
for response in responses: |
|
y_coordinate = bbox[1] |
|
formatted_data.append({ |
|
'page_num': idx + 1, |
|
'type': type, |
|
'text': response['text'], |
|
'confidence': response['confidence'], |
|
'bbox': bbox, |
|
'y_coordinate': y_coordinate |
|
}) |
|
else: |
|
formatted_data.append({ |
|
'page_num': idx + 1, |
|
'type': type, |
|
'html': responses['html'], |
|
'bbox': bbox, |
|
'y_coordinate': bbox[1] |
|
}) |
|
|
|
sorted_data = sorted(formatted_data, key=lambda x: (x['page_num'], x['y_coordinate'])) |
|
|
|
logger.info("Sorting completed.") |
|
return sorted_data |
|
|
|
|
|
def save_to_json(data, filename): |
|
logger.info(f"Saving sorted results to {filename}.") |
|
with open(filename, "w") as json_file: |
|
json.dump(data, json_file, indent=4) |
|
|
|
|
|
@app.post("/process-ocr/") |
|
async def process_ocr(file: UploadFile = File(...)): |
|
try: |
|
|
|
if file.content_type != "application/pdf": |
|
logger.warning(f"Invalid file type uploaded: {file.content_type}") |
|
raise HTTPException(status_code=400, detail="Invalid file type. Please upload a PDF file.") |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: |
|
contents = await file.read() |
|
temp_file.write(contents) |
|
temp_file_path = temp_file.name |
|
logger.info(f"Temporary file created at: {temp_file_path}") |
|
|
|
|
|
result = perform_ocr_and_save(temp_file_path) |
|
|
|
if result is None: |
|
raise HTTPException(status_code=500, detail="OCR processing failed. Check the input file.") |
|
|
|
|
|
result_json = format_to_strings_and_sort(result) |
|
|
|
|
|
save_to_json(result_json, 'result_json.json') |
|
|
|
|
|
return result_json |
|
|
|
except Exception as e: |
|
logger.error(f"An error occurred during OCR processing: {e}") |
|
raise HTTPException(status_code=500, detail="An error occurred during OCR processing.") |
|
|
|
finally: |
|
|
|
if os.path.exists(temp_file_path): |
|
os.remove(temp_file_path) |
|
logger.info(f"Temporary file {temp_file_path} deleted.") |
|
|
|
|
|
@app.get("/check-gpu/") |
|
def check_gpu(): |
|
if is_gpu_available(): |
|
return {"gpu_available": True, "message": "NVIDIA GPU is available and will be used."} |
|
else: |
|
return {"gpu_available": False, "message": "NVIDIA GPU is not available, using CPU instead."} |