OCR-image-to-text-ZeroGPU

Running

File size: 5,238 Bytes

import gradio as gr
import requests
import tensorflow as tf
import keras_ocr
import cv2
import os
import numpy as np
import pandas as pd
from datetime import datetime
import scipy.ndimage.interpolation as inter
import easyocr
from PIL import Image
from paddleocr import PaddleOCR
import socket
from send_email_user import send_user_email

if not os.path.isdir('images'):
    os.mkdir('images')

def get_device_ip_address():

    if os.name == "nt":
        result = "Running on Windows"
        hostname = socket.gethostname()
        result += "\nHostname:  " + hostname
        host = socket.gethostbyname(hostname)
        result += "\nHost-IP-Address:" + host
        return result
    elif os.name == "posix":
        gw = os.popen("ip -4 route show default").read().split()
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect((gw[2], 0))
        ipaddr = s.getsockname()[0]
        gateway = gw[2]
        host = socket.gethostname()
        result = "\nIP address:\t\t" + ipaddr  + "\r\nHost:\t\t" + host
        return result
    else:
        result = os.name + " not supported yet."
        return result
            
   
"""
Paddle OCR
"""
def ocr_with_paddle(img):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)
    # img_path = 'exp.jpeg'
    result = ocr.ocr(img)
    
    for i in range(len(result[0])):
        text = result[0][i][1][0]
        finaltext += ' '+ text
    return finaltext

"""
Keras OCR
"""
def ocr_with_keras(img):
    output_text = ''
    pipeline=keras_ocr.pipeline.Pipeline()
    images=[keras_ocr.tools.read(img)]
    predictions=pipeline.recognize(images)
    first=predictions[0]
    for text,box in first:
        output_text += ' '+ text
    return output_text

"""
easy OCR
"""
# gray scale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Thresholding or Binarization
def thresholding(src):
    return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]

def ocr_with_easy(img):
    gray_scale_image=get_grayscale(img)
    thresholding(gray_scale_image)
    cv2.imwrite('image.png',gray_scale_image)
    reader = easyocr.Reader(['th','en'])
    bounds = reader.readtext('image.png',paragraph="False",detail = 0)
    bounds = ''.join(bounds)
    return bounds
"""
Generate OCR
"""
def generate_ocr(Method,img):
    try:
        text_output = ''
        
        print("Method___________________",Method)
        if Method == 'EasyOCR':
            text_output = ocr_with_easy(img)
        if Method == 'KerasOCR':
            text_output = ocr_with_keras(img)
        if Method == 'PaddleOCR':
            text_output = ocr_with_paddle(img)
        save_details(Method,text_output,img)

        return text_output
        # hostname = socket.gethostname()
        # IPAddr = socket.gethostbyname(hostname)
        # print(hostname)
        # print("\nHost-IP-Address:" + IPAddr)
    except Exception as e:
        print("Error in ocr generation ==>",e)
        text_output = "Something went wrong"
        return text_output
"""
Save generated details
"""
def save_details(Method,text_output,img):
    method = []
    img_path = []
    text = []
    picture_path = "image.jpg"    

    curr_datetime = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
    if text_output:
        splitted_path = os.path.splitext(picture_path)
        modified_picture_path = splitted_path[0] + curr_datetime + splitted_path[1]
        cv2.imwrite('images/'+ modified_picture_path, img)
        input_img = 'images/'+ modified_picture_path
        try:
            df = pd.read_csv("AllDetails.csv")
            df2 = {'method': Method, 'input_img': input_img, 'generated_text': text_output}
            df = df.append(df2, ignore_index = True)
            df.to_csv("AllDetails.csv", index=False)
        except:
            method.append(Method)
            img_path.append(input_img)
            text.append(text_output)
            dict = {'method': method, 'input_img': img_path, 'generated_text': text}
            df = pd.DataFrame(dict,index=None)
            df.to_csv("AllDetails.csv")

        hostname = get_device_ip_address()
    return send_user_email(input_img,hostname,text_output,Method)
    # return hostname

"""
Create user interface for OCR demo
"""

image = gr.Image(shape=(224, 224),elem_id="img_div")
method = gr.Radio(["EasyOCR", "KerasOCR", "PaddleOCR"],elem_id="radio_div")
output = gr.Textbox(label="Output")

demo = gr.Interface(
    generate_ocr,
    [method,image],
    output,
    title="Optical Character Recognition",
    description="Try OCR with different methods", 
    theme="darkpeach",
    css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
)
# .gradio-container.gap-2 {gap: 10rem;row-gap: 10rem;column-gap: 10rem;}
# .gradio-container {background-color: lightgray}
demo.launch()

# with gr.Blocks(css=".gradio-container {background-color: red}") as demo:
#     input = [gr.Image(shape=(224, 224)), gr.Radio(["EasyOCR", "KerasOCR", "PaddleOCR"],text_color="blue")]
#     sub_btn = gr.Button("Submit")
#     output = gr.Textbox(label="Output")
#     event = sub_btn.click(generate_ocr, inputs=input, outputs=output)

# demo.launch()