File size: 1,058 Bytes
2b3ce8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from src.document import PDF_Processing
from src.ocr_model import OCR
from src.llms import LLM
from src import logging
import os


class Pipeline:
    def __init__(self):
        self.cwd = os.getcwd()

    def process(file,type):
        """

        file : data it can be image or pdf

        Type : format of PDF / Image (png, jpg)

        

        return : Clean Text.

        """
        try:
            print("startd")
            if type == "pdf":
                image = PDF_Processing.pdf_to_image(file)
            else:
                image = PDF_Processing.load_image(file)
            text = OCR.extract_text(image)
            json_text = LLM().get_json(input_data=text,key = "json")
            final = LLM().get_json(input_data=json_text)
            return final
        except Exception as e:
            logging.info(f"Error :{e} :Pipeline.process")

if __name__ == "__main__":
    path = "test_docs/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf"
    result = Pipeline.process(path)