Spaces:
Sleeping
Sleeping
File size: 1,058 Bytes
2b3ce8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from src.document import PDF_Processing
from src.ocr_model import OCR
from src.llms import LLM
from src import logging
import os
class Pipeline:
def __init__(self):
self.cwd = os.getcwd()
def process(file,type):
"""
file : data it can be image or pdf
Type : format of PDF / Image (png, jpg)
return : Clean Text.
"""
try:
print("startd")
if type == "pdf":
image = PDF_Processing.pdf_to_image(file)
else:
image = PDF_Processing.load_image(file)
text = OCR.extract_text(image)
json_text = LLM().get_json(input_data=text,key = "json")
final = LLM().get_json(input_data=json_text)
return final
except Exception as e:
logging.info(f"Error :{e} :Pipeline.process")
if __name__ == "__main__":
path = "test_docs/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf"
result = Pipeline.process(path) |