Tanaanan's picture
Upload app.py
1848c0a
import easyocr as ocr #OCR
import streamlit as st #Web App
from PIL import Image, ImageOps #Image Processing
import time
from unittest import result
import editdistance
from pythainlp.util import isthai
import numpy as np
st.title("Thai-Identification Card (OCR) Webapp.")
#subtitle
st.markdown("ระบบตรวจจับข้อมูลจากบัตรประชาชน (ชื่อ-นามสกุล , เลขบัตรประชาชน).")
st.markdown("")
pages_name = ['Detection (ตรวจจับ)', 'Example image (ภาพตัวอย่าง)']
page = st.radio('Select option mode :', pages_name)
@st.cache
def load_model():
reader = ocr.Reader(['en'],model_storage_directory='.')
return reader
reader = load_model() #load model
#set default size as 1280 x 1280
def img_resize(input_path,img_size): # padding
desired_size = img_size
im = Image.open(input_path)
im = ImageOps.exif_transpose(im) # fix image rotating
width, height = im.size # get img_input size
if (width == 1280) and (height == 1280):
new_im = im
else:
#im = im.convert('L') #Convert to gray
old_size = im.size # old_size[0] is in (width, height) format
ratio = float(desired_size)/max(old_size)
new_size = tuple([int(x*ratio) for x in old_size])
im = im.resize(new_size, Image.ANTIALIAS)
new_im = Image.new("RGB", (desired_size, desired_size))
new_im.paste(im, ((desired_size-new_size[0])//2,
(desired_size-new_size[1])//2))
return new_im
def Get_OCR(input_image):
input_image = Image.open(input_image) #read image
with st.spinner("On working... "):
t1 = time.perf_counter()
result = reader.readtext(np.array(input_image))
result_text = [] #empty list for results
for text in result:
result_text.append(text[1])
st.write(result_text)
t2 = time.perf_counter()
st.write('time taken to run: {:.2f} sec'.format(t2-t1))
#st.success("Here you go!")
def Get_Idcard_detail(file_path):
raw_data = []
id_num = {"id_num" : "None"}
name = file_path
img = Image.open(name)
img = ImageOps.exif_transpose(img) # fix image rotating
width, height = img.size # get img_input size
if (width == 1280) and (height == 1280):
result = reader.readtext(np.array(img))
else:
#im = im.convert('L') #Convert to gray
old_size = img.size # old_size[0] is in (width, height) format
ratio = float(1280)/max(old_size)
new_size = tuple([int(x*ratio) for x in old_size])
img = img.resize(new_size, Image.ANTIALIAS)
new_im = Image.new("RGB", (1280, 1280))
new_im.paste(img, ((1280-new_size[0])//2,
(1280-new_size[1])//2))
result = reader.readtext(np.array(new_im))
result_text = [] #empty list for results
for text in result:
result_text.append(text[1])
raw_data = result_text
def get_english(raw_list): # Cut only english var
eng_name = []
thai_name = []
for name in raw_list:
if isthai(name) == True:
thai_name.append(name)
else:
eng_name.append(name)
return eng_name
raw_data = get_english(raw_data)
def Clear_syntax(raw_list):
Clean_syntax = ["","#","{","}","=","/","@","#","$","—","|","%","-","(",")","¥", "[", "]", "‘",':',';']
for k in range(len(Clean_syntax)):
while (Clean_syntax[k] in raw_list): # remove single symbol
raw_list.remove(Clean_syntax[k])
for l in range(len(raw_list)):
raw_list[l] = raw_list[l].replace("!","l") #split ! --> l (Error OCR Check)
raw_list[l] = raw_list[l].replace(",",".") #split ! --> l (Error OCR Check)
raw_list[l] = raw_list[l].replace(" ","") #split " " out from str
raw_list[l] = raw_list[l].lower() #Set all string to lowercase
for m in range(len(raw_list)): #Clear symbol in str "Hi/'" --> "Hi"
for n in range(len(Clean_syntax)):
raw_list[m] = raw_list[m].replace(Clean_syntax[n],"")
return raw_list
raw_data = Clear_syntax(raw_data)
def get_idnum(raw_list):
id_num = {"id_num" : "None"}
# 1. normal check
for i in range(len(raw_list)): # check if len(list) = 1, 4, 5, 2, 1 (13 digit idcard) and all is int
try:
if ((len(raw_list[i]) == 1) and (len(raw_list[i+1]) == 4) and (len(raw_list[i+2]) == 5) and (len(raw_list[i+3]) == 2) and (len(raw_list[i+4]) == 1)) and ((raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4]).isnumeric()):
id_num["id_num"] = (raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4])
break
except:
pass
# 2. Hardcore Check
if id_num["id_num"] == "None":
id_count = 0
index_first = 0
index_end = 0
for i in range(len(raw_list)):
if id_count == 13:
index_end = i-1 #ลบ 1 index เพราะ ครบ 13 รอบก่อนหน้านี้
#print(f"index_first == {index_first} index_end == {index_end}")
#print(f"id = {raw_list[index_first:index_end+1]}")
id_num["id_num"] = ''.join(raw_list[index_first:index_end+1])
break
else:
if raw_list[i].isnumeric() == True and index_first == 0:
id_count += len(raw_list[i])
index_first = i
elif raw_list[i].isnumeric() == True and index_first != 0:
id_count += len(raw_list[i])
elif raw_list[i].isnumeric() == False:
id_count = 0
index_first = 0
return id_num
id_num = (get_idnum(raw_data))
#Complete list name check
def list_name_check(raw_list):
sum_list = raw_list
name_key = ['name', 'lastname']
#1. name_key check
if ("name" in sum_list) and ("lastname" in sum_list): # if name and lastname in list pass it!
pass
else:
for i in range(len(name_key)):
for j in range(len(sum_list)):
if (editdistance.eval(name_key[i], sum_list[j]) <= 2 ):
sum_list[j] = name_key[i]
gender_key = ["mr.", "mrs.", 'master', 'miss']
#2 gender_key check
count = 0 # check for break
for i in range(len(gender_key)):
for j in range(len(sum_list)):
if (count == 0):
try:
if (sum_list[i] == "name") or (sum_list[i] == "lastname"): # skip "name" and "lastname"
pass
else:
# mr, mrs sensitive case double check with len(gender_key) == len(keyword)
if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 and (len(gender_key[i]) == len(sum_list[j]))):
sum_list[j] = gender_key[i]
count+=1
#print(1)
elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])):
sum_list[j] = gender_key[i]
count+=1
#print(1)
except:
if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 2 and (len(gender_key[i]) == len(sum_list[j]))):
sum_list[j] = gender_key[i]
count+=1
#print(1)
elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])):
sum_list[j] = gender_key[i]
count+=1
#print(1)
else:
break
return sum_list
raw_data = list_name_check(raw_data)
#get_eng_name
def get_engname(raw_list):
get_data = raw_list
engname_list = []
name_pos = []
lastname_pos = []
mr_pos = []
mrs_pos = []
# check keyword by name, lastname, master, mr, miss, mrs
for j in range(len(get_data)): #get "name" , "lastname" index
if "name" == get_data[j]:
name_pos.append(j)
elif "lastname" == get_data[j]:
lastname_pos.append(j)
elif ("mr." == get_data[j]) or ("master" == get_data[j]):
mr_pos.append(j)
elif ("miss" == get_data[j]) or ("mrs." == get_data[j]):
mrs_pos.append(j)
if len(name_pos) != 0: #get_engname ex --> ['name', 'master', 'tanaanan', 'lastname', 'chalermpan']
engname_list = get_data[name_pos[0]:name_pos[0]+6] # select first index กรณีมี "name" มากกว่า 1 ตัว
elif len(lastname_pos) != 0:
engname_list = get_data[lastname_pos[0]-3:lastname_pos[0]+3]
elif len(mr_pos) != 0:
engname_list = get_data[mr_pos[0]-1:mr_pos[0]+5]
elif len(mrs_pos) != 0:
engname_list = get_data[mrs_pos[0]-1:mrs_pos[0]+5]
else:
print("Can't find eng name!!")
return engname_list
raw_data = get_engname(raw_data)
def split_genkey(raw_list): # remove stringname + gender_key ex. "missjate" -> "jate"
data = raw_list
key = ['mrs.','mr.','master','miss']
name = "" #gen_key name
name_pos = 0
gen_index = 0
gen_type = "" #male / female
# check keyword
for key_val in key:
for each_text in data:
if (each_text[:len(key_val)] == key_val) or (editdistance.eval(each_text[:len(key_val)],key_val) <= 1 and (len(each_text[:len(key_val)]) == len(key_val))):
#each_text = each_text[len(key):]
if (each_text == "name") or (each_text == "lastname"):
pass
else:
name = (each_text[:len(key_val)])
name_pos = data.index(each_text) # get_index
gen_index = len(key_val)
break
if (name_pos != 0):
data[name_pos] = data[name_pos][gen_index:] # split gender_key on list
for empty_str in range(data.count('')): # clear "empty string"
data.remove('')
return data
raw_data = split_genkey(raw_data)
def clean_name_data(raw_list): # delete all single string and int string
for k in range(len(raw_list)):
try:
while ((len(raw_list[k]) <= 2) or (raw_list[k].isnumeric() == True)): # remove single symbol
raw_list.remove(raw_list[k])
except IndexError:
pass
return raw_list
raw_data = clean_name_data(raw_data)
def name_sum(raw_list):
info = {"name" : "None",
"lastname" : "None"}
key = ['mr.','mrs.', 'master', 'miss', 'mrs','mr']
name_pos = 0
lastname_pos = 0
for key_val in key: # remove gender_key in string
if key_val in raw_list:
raw_list.remove(key_val)
try:
for i in range(len(raw_list)):
if raw_list[i] == "name":
info["name"] = raw_list[i+1]
name_pos = i+1
elif raw_list[i] == "lastname":
info["lastname"] = raw_list[i+1]
lastname_pos = i+1
except:
pass
# กรณี หาอย่างใดอย่าหนึ่งเจอให้ลองข้ามไปดู 1 index name, "name_val", lastname , "lastname_val"
if (info["name"] != "None") and (info["lastname"] == "None"):
try:
info["lastname"] = raw_list[name_pos+2]
except:
pass
elif (info["lastname"] != "None") and (info["name"] == "None"):
try:
info["name"] = raw_list[lastname_pos-2]
except:
pass
# remove . on "mr." and "mrs."
info["name"] = info["name"].replace(".","")
info["lastname"] = info["lastname"].replace(".","")
return info
st.success("Process Completed!.....")
st.write(id_num)
st.write(name_sum(raw_data))
if page == "Detection (ตรวจจับ)":
#image uploader
image = st.file_uploader(label = "upload Idcard image here.. OwO",type=['png','jpg','jpeg'])
if image is not None:
new_img = img_resize(image, 1280)
st.image(new_img)
#Get_OCR(image)
with st.spinner("On working..."):
t1 = time.perf_counter()
Get_Idcard_detail(image)
t2 = time.perf_counter()
st.write('time taken to run: {:.2f} sec'.format(t2-t1))
else:
st.write("## Waiting for image.. U w U")
st.image('spy-x-family-anya-heh-anime.jpg')
st.warning("""ระบบไม่มีการเก็บข้อมูลบัตรประชาชนจากผู้ใช้งาน ใช้ในการศึกษา และ เป็นแนวทางในการพัฒนาต่อเท่านั้น \n
(No data kept in this system used for education and development guildlines only)""")
else:
image = "./id_ex.jpg"
new_img = img_resize(image, 1280)
st.image(new_img)
#Get_OCR(image)
with st.spinner("On working..."):
t1 = time.perf_counter()
Get_Idcard_detail(image)
t2 = time.perf_counter()
st.write('time taken to run: {:.2f} sec'.format(t2-t1))
st.warning("""ภาพตัวอย่างบัตรประชาชนเป็นภาพสังเคราะห์ที่ได้มาจาก [AI for Thai.](https://aiforthai.in.th/files/iappIDcr-front-ex.jpg) \n
Example Identification Card is synthetic image from [AI for Thai.](https://aiforthai.in.th/files/iappIDcr-front-ex.jpg)""")
st.caption("Developed by Tanaanan .M [email protected]")