from src.document import PDF_Processing from src.ocr_model import OCR from src.llms import LLM from src import logging import os class Pipeline: def __init__(self): self.cwd = os.getcwd() def process(file,type): """ file : data it can be image or pdf Type : format of PDF / Image (png, jpg) return : Clean Text. """ try: print("startd") if type == "pdf": image = PDF_Processing.pdf_to_image(file) else: image = PDF_Processing.load_image(file) text = OCR.extract_text(image) json_text = LLM().get_json(input_data=text,key = "json") final = LLM().get_json(input_data=json_text) return final except Exception as e: logging.info(f"Error :{e} :Pipeline.process") if __name__ == "__main__": path = "test_docs/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf" result = Pipeline.process(path)