Spaces:
Sleeping
Sleeping
from src.document import PDF_Processing | |
from src.ocr_model import OCR | |
from src.llms import LLM | |
from src import logging | |
import os | |
class Pipeline: | |
def __init__(self): | |
self.cwd = os.getcwd() | |
def process(file,type): | |
""" | |
file : data it can be image or pdf | |
Type : format of PDF / Image (png, jpg) | |
return : Clean Text. | |
""" | |
try: | |
print("startd") | |
if type == "pdf": | |
image = PDF_Processing.pdf_to_image(file) | |
else: | |
image = PDF_Processing.load_image(file) | |
text = OCR.extract_text(image) | |
json_text = LLM().get_json(input_data=text,key = "json") | |
final = LLM().get_json(input_data=json_text) | |
return final | |
except Exception as e: | |
logging.info(f"Error :{e} :Pipeline.process") | |
if __name__ == "__main__": | |
path = "test_docs/CBC-test-report-format-example-sample-template-Drlogy-lab-report.pdf" | |
result = Pipeline.process(path) |