Lalit1997's picture
Upload 7 files
2b3ce8d verified
from src.document import PDF_Processing
from src.ocr_model import OCR
from src.llms import LLM
from src import logging
import os
class Pipeline:
def __init__(self):
self.cwd = os.getcwd()
def process(file,type):
"""
file : data it can be image or pdf
Type : format of PDF / Image (png, jpg)
return : Clean Text.
"""
try:
print("startd")
if type == "pdf":
image = PDF_Processing.pdf_to_image(file)
else:
image = PDF_Processing.load_image(file)
text = OCR.extract_text(image)
json_text = LLM().get_json(input_data=text,key = "json")
final = LLM().get_json(input_data=json_text)
return final
except Exception as e:
logging.info(f"Error :{e} :Pipeline.process")
if __name__ == "__main__":
path = "test_docs/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf"
result = Pipeline.process(path)