|
import cv2 |
|
import os |
|
import pytesseract |
|
import gradio as gr |
|
from gradio import Interface, Image, Text |
|
import numpy as np |
|
from PIL import Image |
|
from PIL import UnidentifiedImageError |
|
|
|
def process_image(input_image): |
|
try: |
|
|
|
if isinstance(input_image, Image.Image): |
|
img = np.array(input_image) |
|
else: |
|
|
|
img = cv2.imread(input_image) |
|
|
|
|
|
if img is None or img.dtype != np.uint8: |
|
raise Exception("Could not read the image. Please check the image format.") |
|
|
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) |
|
|
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) |
|
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18)) |
|
dilation = cv2.dilate(thresh1, rect_kernel, iterations=1) |
|
|
|
|
|
text_lines = [] |
|
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
for cnt in contours: |
|
x, y, w, h = cv2.boundingRect(cnt) |
|
text_lines.append((y, y + h, x, x + w)) |
|
|
|
|
|
text_lines.sort(key=lambda line: line[0]) |
|
|
|
|
|
recognized_text = [] |
|
for y_min, y_max, x_min, x_max in text_lines: |
|
cropped_img = img[y_min:y_max, x_min:x_max] |
|
custom_config = r'-l eng+khm --oem 3 --psm 6' |
|
extracted_text = pytesseract.image_to_string(cropped_img, config=custom_config) |
|
recognized_text.append(extracted_text.strip()) |
|
|
|
|
|
full_text = "\n".join(recognized_text) |
|
|
|
|
|
result_rgb = img.copy() |
|
for y_min, y_max, x_min, x_max in text_lines: |
|
cv2.rectangle(result_rgb, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) |
|
|
|
return full_text, result_rgb |
|
except Exception as e: |
|
return "Could not process the image. Error: " + str(e), None |
|
|
|
iface = gr.Interface( |
|
process_image, |
|
inputs=[gr.Image(type="pil", label="Processed Image")], |
|
outputs=[ |
|
gr.Text(label="Detected Labels"), |
|
gr.Image(type="pil", label="Processed Image") |
|
], |
|
title="Bank Statement OCR", |
|
|
|
flagging_options=["blurry", "incorrect", "other"],) |
|
|
|
iface.launch(debug=True , share=True) |