import cv2 from run_yolo import get_layout_results from order_text_blocks import get_ordered_data from run_ocr import OCR from tqdm import tqdm import time def driver(img, language_name, st): onnx_path = "./best.onnx" img_ori = img.copy() labels = get_layout_results(img_ori, onnx_path) output_dict = get_ordered_data(labels, img) st.markdown("

Layout Analysis Completed!

", unsafe_allow_html=True) article_wise_ocr = {} h, w = img.shape[:2] with st.spinner('Performing OCR...'): # Add your spinner message with custom CSS for itr, article in tqdm(enumerate(output_dict['Articles'])): ocr_dict = {} article_key = "" for key in article: if article[key] == []: continue if key == 'Articles': x1, y1, x2, y2 = int(article[key][0][0]), int(article[key][0][1]), int(article[key][0][2]), int(article[key][0][3]) article_key = '_'.join([str(x1), str(y1), str(x2), str(y2)]) if key == 'Headlines' or key == 'Sub-headlines' or key == 'Text Block': for coord in article[key]: x1, y1, x2, y2 = int(coord[0]), int(coord[1]), int(coord[2]), int(coord[3]) # check if the coordinates are valid, w.r.t image dimensions, if not then skip if x1 < 0 or x2 < 0 or y1 < 0 or y2 < 0 or x1 > w or x2 > w or y1 > h or y2 > h: continue crop = img[int(coord[1]):int(coord[3]), int(coord[0]):int(coord[2])] output_text = OCR(crop, lang=language_name) box_key = "_".join([str(int(coord[0])), str(int(coord[1])), str(int(coord[2])), str(int(coord[3]))]) if key not in ocr_dict: ocr_dict[key] = [{box_key: output_text}] else: ocr_dict[key].append({box_key: output_text}) article_wise_ocr[article_key] = ocr_dict st.markdown("

OCR Completed!

", unsafe_allow_html=True) return output_dict, article_wise_ocr