Spaces:

pantatwiai
/

Newspapers-OCR-Demo

Sleeping

Devesh Pant

1b870f4 over 1 year ago

1.38 kB

	import cv2
	import numpy
	import argparse
	from pytesseract import*
	from PIL import Image, ImageFont, ImageDraw
	import numpy as np



	# def preprocess_image(image):



	def OCR(img, lang='hin', min_conf=0.25):
	rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	# preprocessed_image = preprocess_image(rgb)
	# write the preprocessed image to disk as a temporary file so we can
	results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang=lang)
	out_text = ""
	for i in range(0, len(results["text"])):

	# We can then extract the bounding box coordinates
	# of the text region from the current result
	x = results["left"][i]
	y = results["top"][i]
	w = results["width"][i]
	h = results["height"][i]

	# We will also extract the OCR text itself along
	# with the confidence of the text localization
	text = results["text"][i]
	conf = int(results["conf"][i])

	# filter out weak confidence text localizations
	if conf > min_conf:
	# We then strip out non-ASCII text so we can
	# draw the text on the image We will be using
	# OpenCV, then draw a bounding box around the
	# text along with the text itself
	text = "".join(text).strip()
	out_text += text + " "

	return out_text