OCR-image-to-text-ZeroGPU

Running

App Files Files Community

OCR-image-to-text-ZeroGPU / app.py

pragnakalp

Update app.py

ad311aa almost 2 years ago

raw

history blame

4.83 kB

	import gradio as gr
	import requests
	import tensorflow as tf
	import keras_ocr
	import cv2
	import os
	import numpy as np
	import pandas as pd
	from datetime import datetime
	import scipy.ndimage.interpolation as inter
	import easyocr
	from PIL import Image
	from paddleocr import PaddleOCR
	import socket
	from send_email_user import send_user_email

	# if not os.path.isdir('images'):
	# os.mkdir('images')
	# print("create folder--->")
	def get_device_ip_address():

	if os.name == "nt":
	result = "Running on Windows"
	hostname = socket.gethostname()
	result += "\nHostname: " + hostname
	host = socket.gethostbyname(hostname)
	result += "\nHost-IP-Address:" + host
	return result
	elif os.name == "posix":
	gw = os.popen("ip -4 route show default").read().split()
	s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
	s.connect((gw[2], 0))
	ipaddr = s.getsockname()[0]
	gateway = gw[2]
	host = socket.gethostname()
	result = "\nIP address:\t\t" + ipaddr + "\r\nHost:\t\t" + host
	return result
	else:
	result = os.name + " not supported yet."
	return result



	"""
	Paddle OCR
	"""
	def ocr_with_paddle(img):
	finaltext = ''
	ocr = PaddleOCR(lang='en', use_angle_cls=True)
	# img_path = 'exp.jpeg'
	result = ocr.ocr(img)

	for i in range(len(result[0])):
	text = result[0][i][1][0]
	finaltext += ' '+ text
	return finaltext

	"""
	Keras OCR
	"""
	def ocr_with_keras(img):
	output_text = ''
	pipeline=keras_ocr.pipeline.Pipeline()
	images=[keras_ocr.tools.read(img)]
	predictions=pipeline.recognize(images)
	first=predictions[0]
	for text,box in first:
	output_text += ' '+ text
	return output_text

	"""
	easy OCR
	"""
	# gray scale image
	def get_grayscale(image):
	return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Thresholding or Binarization
	def thresholding(src):
	return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]

	def ocr_with_easy(img):
	gray_scale_image=get_grayscale(img)
	thresholding(gray_scale_image)
	cv2.imwrite('image.png',gray_scale_image)
	reader = easyocr.Reader(['th','en'])
	bounds = reader.readtext('image.png',paragraph="False",detail = 0)
	bounds = ''.join(bounds)
	return bounds
	"""
	Generate OCR
	"""
	def generate_ocr(Method,img):
	try:
	text_output = ''

	print("Method___________________",Method)
	if Method == 'EasyOCR':
	text_output = ocr_with_easy(img)
	if Method == 'KerasOCR':
	text_output = ocr_with_keras(img)
	if Method == 'PaddleOCR':
	text_output = ocr_with_paddle(img)
	save_details(Method,text_output,img)

	return text_output
	# hostname = socket.gethostname()
	# IPAddr = socket.gethostbyname(hostname)
	# print(hostname)
	# print("\nHost-IP-Address:" + IPAddr)
	except Exception as e:
	print("Error in ocr generation ==>",e)
	text_output = "Something went wrong"
	return text_output
	"""
	Save generated details
	"""
	def save_details(Method,text_output,img):
	method = []
	img_path = []
	text = []
	input_img = ''
	hostname = ''
	picture_path = "image.jpg"
	curr_datetime = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
	if text_output:
	splitted_path = os.path.splitext(picture_path)
	modified_picture_path = splitted_path[0] + curr_datetime + splitted_path[1]
	cv2.imwrite(modified_picture_path, img)
	input_img = modified_picture_path
	try:
	df = pd.read_csv("AllDetails.csv")
	df2 = {'method': Method, 'input_img': input_img, 'generated_text': text_output}
	df = df.append(df2, ignore_index = True)
	df.to_csv("AllDetails.csv", index=False)
	except:
	method.append(Method)
	img_path.append(input_img)
	text.append(text_output)
	dict = {'method': method, 'input_img': img_path, 'generated_text': text}
	df = pd.DataFrame(dict,index=None)
	df.to_csv("AllDetails.csv")

	hostname = get_device_ip_address()
	return send_user_email(input_img,hostname,text_output,Method)
	# return hostname

	"""
	Create user interface for OCR demo
	"""

	image = gr.Image(shape=(224, 224),elem_id="img_div")
	method = gr.Radio(["EasyOCR", "KerasOCR", "PaddleOCR"],elem_id="radio_div")
	output = gr.Textbox(label="Output")

	demo = gr.Interface(
	generate_ocr,
	[method,image],
	output,
	title="Optical Character Recognition",
	description="Try OCR with different methods",
	theme="darkpeach",
	css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
	)

	demo.launch(enable_queue = False)