|
import easyocr as ocr |
|
import streamlit as st |
|
from PIL import Image, ImageOps |
|
import time |
|
from unittest import result |
|
import editdistance |
|
from pythainlp.util import isthai |
|
import numpy as np |
|
|
|
st.title("Thai-Identification Card (OCR) Webapp.") |
|
|
|
|
|
st.markdown("ระบบตรวจจับข้อมูลจากบัตรประชาชน (ชื่อ-นามสกุล , เลขบัตรประชาชน).") |
|
|
|
st.markdown("") |
|
|
|
pages_name = ['Detection (ตรวจจับ)', 'Example image (ภาพตัวอย่าง)'] |
|
page = st.radio('Select option mode :', pages_name) |
|
|
|
|
|
|
|
|
|
@st.cache |
|
def load_model(): |
|
reader = ocr.Reader(['en'],model_storage_directory='.') |
|
return reader |
|
|
|
reader = load_model() |
|
|
|
|
|
def img_resize(input_path,img_size): |
|
desired_size = img_size |
|
im = Image.open(input_path) |
|
im = ImageOps.exif_transpose(im) |
|
width, height = im.size |
|
if (width == 1280) and (height == 1280): |
|
new_im = im |
|
else: |
|
|
|
old_size = im.size |
|
ratio = float(desired_size)/max(old_size) |
|
new_size = tuple([int(x*ratio) for x in old_size]) |
|
im = im.resize(new_size, Image.ANTIALIAS) |
|
new_im = Image.new("RGB", (desired_size, desired_size)) |
|
new_im.paste(im, ((desired_size-new_size[0])//2, |
|
(desired_size-new_size[1])//2)) |
|
|
|
return new_im |
|
|
|
def Get_OCR(input_image): |
|
input_image = Image.open(input_image) |
|
with st.spinner("On working... "): |
|
|
|
t1 = time.perf_counter() |
|
result = reader.readtext(np.array(input_image)) |
|
|
|
result_text = [] |
|
|
|
|
|
for text in result: |
|
result_text.append(text[1]) |
|
|
|
st.write(result_text) |
|
t2 = time.perf_counter() |
|
st.write('time taken to run: {:.2f} sec'.format(t2-t1)) |
|
|
|
|
|
|
|
def Get_Idcard_detail(file_path): |
|
raw_data = [] |
|
id_num = {"id_num" : "None"} |
|
name = file_path |
|
img = Image.open(name) |
|
img = ImageOps.exif_transpose(img) |
|
|
|
width, height = img.size |
|
if (width == 1280) and (height == 1280): |
|
result = reader.readtext(np.array(img)) |
|
else: |
|
|
|
old_size = img.size |
|
ratio = float(1280)/max(old_size) |
|
new_size = tuple([int(x*ratio) for x in old_size]) |
|
img = img.resize(new_size, Image.ANTIALIAS) |
|
new_im = Image.new("RGB", (1280, 1280)) |
|
new_im.paste(img, ((1280-new_size[0])//2, |
|
(1280-new_size[1])//2)) |
|
|
|
result = reader.readtext(np.array(new_im)) |
|
|
|
|
|
|
|
|
|
result_text = [] |
|
for text in result: |
|
result_text.append(text[1]) |
|
|
|
|
|
raw_data = result_text |
|
|
|
|
|
|
|
def get_english(raw_list): |
|
eng_name = [] |
|
thai_name = [] |
|
|
|
for name in raw_list: |
|
if isthai(name) == True: |
|
thai_name.append(name) |
|
else: |
|
eng_name.append(name) |
|
|
|
return eng_name |
|
|
|
raw_data = get_english(raw_data) |
|
|
|
|
|
def Clear_syntax(raw_list): |
|
|
|
Clean_syntax = ["","#","{","}","=","/","@","#","$","—","|","%","-","(",")","¥", "[", "]", "‘",':',';'] |
|
|
|
for k in range(len(Clean_syntax)): |
|
while (Clean_syntax[k] in raw_list): |
|
raw_list.remove(Clean_syntax[k]) |
|
|
|
for l in range(len(raw_list)): |
|
raw_list[l] = raw_list[l].replace("!","l") |
|
raw_list[l] = raw_list[l].replace(",",".") |
|
raw_list[l] = raw_list[l].replace(" ","") |
|
raw_list[l] = raw_list[l].lower() |
|
|
|
for m in range(len(raw_list)): |
|
for n in range(len(Clean_syntax)): |
|
raw_list[m] = raw_list[m].replace(Clean_syntax[n],"") |
|
return raw_list |
|
|
|
raw_data = Clear_syntax(raw_data) |
|
|
|
|
|
def get_idnum(raw_list): |
|
id_num = {"id_num" : "None"} |
|
|
|
for i in range(len(raw_list)): |
|
try: |
|
if ((len(raw_list[i]) == 1) and (len(raw_list[i+1]) == 4) and (len(raw_list[i+2]) == 5) and (len(raw_list[i+3]) == 2) and (len(raw_list[i+4]) == 1)) and ((raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4]).isnumeric()): |
|
id_num["id_num"] = (raw_list[i] + raw_list[i+1] + raw_list[i+2] + raw_list[i+3] + raw_list[i+4]) |
|
break |
|
except: |
|
pass |
|
|
|
|
|
if id_num["id_num"] == "None": |
|
id_count = 0 |
|
index_first = 0 |
|
index_end = 0 |
|
for i in range(len(raw_list)): |
|
if id_count == 13: |
|
index_end = i-1 |
|
|
|
|
|
id_num["id_num"] = ''.join(raw_list[index_first:index_end+1]) |
|
break |
|
else: |
|
if raw_list[i].isnumeric() == True and index_first == 0: |
|
id_count += len(raw_list[i]) |
|
index_first = i |
|
elif raw_list[i].isnumeric() == True and index_first != 0: |
|
id_count += len(raw_list[i]) |
|
elif raw_list[i].isnumeric() == False: |
|
id_count = 0 |
|
index_first = 0 |
|
|
|
return id_num |
|
|
|
id_num = (get_idnum(raw_data)) |
|
|
|
|
|
def list_name_check(raw_list): |
|
sum_list = raw_list |
|
name_key = ['name', 'lastname'] |
|
|
|
|
|
if ("name" in sum_list) and ("lastname" in sum_list): |
|
pass |
|
else: |
|
for i in range(len(name_key)): |
|
for j in range(len(sum_list)): |
|
if (editdistance.eval(name_key[i], sum_list[j]) <= 2 ): |
|
sum_list[j] = name_key[i] |
|
|
|
gender_key = ["mr.", "mrs.", 'master', 'miss'] |
|
|
|
count = 0 |
|
for i in range(len(gender_key)): |
|
for j in range(len(sum_list)): |
|
if (count == 0): |
|
try: |
|
if (sum_list[i] == "name") or (sum_list[i] == "lastname"): |
|
pass |
|
else: |
|
|
|
if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 and (len(gender_key[i]) == len(sum_list[j]))): |
|
sum_list[j] = gender_key[i] |
|
count+=1 |
|
|
|
elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])): |
|
sum_list[j] = gender_key[i] |
|
count+=1 |
|
|
|
except: |
|
if (gender_key[i] == "mr." or gender_key[i] == "mrs.") and (editdistance.eval(gender_key[i], sum_list[j]) <= 2 and (len(gender_key[i]) == len(sum_list[j]))): |
|
sum_list[j] = gender_key[i] |
|
count+=1 |
|
|
|
elif (gender_key[i] == "master" or gender_key[i] == "miss") and (editdistance.eval(gender_key[i], sum_list[j]) <= 3 ) and (len(gender_key[i]) == len(sum_list[j])): |
|
sum_list[j] = gender_key[i] |
|
count+=1 |
|
|
|
else: |
|
break |
|
|
|
return sum_list |
|
|
|
raw_data = list_name_check(raw_data) |
|
|
|
|
|
def get_engname(raw_list): |
|
get_data = raw_list |
|
engname_list = [] |
|
|
|
name_pos = [] |
|
lastname_pos = [] |
|
mr_pos = [] |
|
mrs_pos = [] |
|
|
|
|
|
for j in range(len(get_data)): |
|
if "name" == get_data[j]: |
|
name_pos.append(j) |
|
elif "lastname" == get_data[j]: |
|
lastname_pos.append(j) |
|
elif ("mr." == get_data[j]) or ("master" == get_data[j]): |
|
mr_pos.append(j) |
|
elif ("miss" == get_data[j]) or ("mrs." == get_data[j]): |
|
mrs_pos.append(j) |
|
|
|
|
|
if len(name_pos) != 0: |
|
engname_list = get_data[name_pos[0]:name_pos[0]+6] |
|
elif len(lastname_pos) != 0: |
|
engname_list = get_data[lastname_pos[0]-3:lastname_pos[0]+3] |
|
elif len(mr_pos) != 0: |
|
engname_list = get_data[mr_pos[0]-1:mr_pos[0]+5] |
|
elif len(mrs_pos) != 0: |
|
engname_list = get_data[mrs_pos[0]-1:mrs_pos[0]+5] |
|
else: |
|
print("Can't find eng name!!") |
|
|
|
return engname_list |
|
|
|
raw_data = get_engname(raw_data) |
|
|
|
|
|
|
|
|
|
def split_genkey(raw_list): |
|
data = raw_list |
|
key = ['mrs.','mr.','master','miss'] |
|
name = "" |
|
name_pos = 0 |
|
gen_index = 0 |
|
gen_type = "" |
|
|
|
for key_val in key: |
|
for each_text in data: |
|
if (each_text[:len(key_val)] == key_val) or (editdistance.eval(each_text[:len(key_val)],key_val) <= 1 and (len(each_text[:len(key_val)]) == len(key_val))): |
|
|
|
if (each_text == "name") or (each_text == "lastname"): |
|
pass |
|
else: |
|
name = (each_text[:len(key_val)]) |
|
name_pos = data.index(each_text) |
|
gen_index = len(key_val) |
|
break |
|
if (name_pos != 0): |
|
data[name_pos] = data[name_pos][gen_index:] |
|
for empty_str in range(data.count('')): |
|
data.remove('') |
|
return data |
|
|
|
raw_data = split_genkey(raw_data) |
|
|
|
|
|
def clean_name_data(raw_list): |
|
for k in range(len(raw_list)): |
|
try: |
|
while ((len(raw_list[k]) <= 2) or (raw_list[k].isnumeric() == True)): |
|
raw_list.remove(raw_list[k]) |
|
except IndexError: |
|
pass |
|
return raw_list |
|
|
|
raw_data = clean_name_data(raw_data) |
|
|
|
|
|
def name_sum(raw_list): |
|
info = {"name" : "None", |
|
"lastname" : "None"} |
|
key = ['mr.','mrs.', 'master', 'miss', 'mrs','mr'] |
|
name_pos = 0 |
|
lastname_pos = 0 |
|
for key_val in key: |
|
if key_val in raw_list: |
|
raw_list.remove(key_val) |
|
try: |
|
for i in range(len(raw_list)): |
|
if raw_list[i] == "name": |
|
info["name"] = raw_list[i+1] |
|
name_pos = i+1 |
|
elif raw_list[i] == "lastname": |
|
info["lastname"] = raw_list[i+1] |
|
lastname_pos = i+1 |
|
except: |
|
pass |
|
|
|
|
|
if (info["name"] != "None") and (info["lastname"] == "None"): |
|
try: |
|
info["lastname"] = raw_list[name_pos+2] |
|
except: |
|
pass |
|
elif (info["lastname"] != "None") and (info["name"] == "None"): |
|
try: |
|
info["name"] = raw_list[lastname_pos-2] |
|
except: |
|
pass |
|
|
|
|
|
info["name"] = info["name"].replace(".","") |
|
info["lastname"] = info["lastname"].replace(".","") |
|
|
|
|
|
return info |
|
|
|
st.success("Process Completed!.....") |
|
st.write(id_num) |
|
st.write(name_sum(raw_data)) |
|
|
|
|
|
if page == "Detection (ตรวจจับ)": |
|
|
|
image = st.file_uploader(label = "upload Idcard image here.. OwO",type=['png','jpg','jpeg']) |
|
|
|
if image is not None: |
|
new_img = img_resize(image, 1280) |
|
st.image(new_img) |
|
|
|
|
|
with st.spinner("On working..."): |
|
|
|
t1 = time.perf_counter() |
|
Get_Idcard_detail(image) |
|
t2 = time.perf_counter() |
|
st.write('time taken to run: {:.2f} sec'.format(t2-t1)) |
|
|
|
else: |
|
st.write("## Waiting for image.. U w U") |
|
st.image('spy-x-family-anya-heh-anime.jpg') |
|
|
|
st.warning("""ระบบไม่มีการเก็บข้อมูลบัตรประชาชนจากผู้ใช้งาน ใช้ในการศึกษา และ เป็นแนวทางในการพัฒนาต่อเท่านั้น \n |
|
(No data kept in this system used for education and development guildlines only)""") |
|
else: |
|
image = "./id_ex.jpg" |
|
new_img = img_resize(image, 1280) |
|
st.image(new_img) |
|
|
|
|
|
with st.spinner("On working..."): |
|
|
|
t1 = time.perf_counter() |
|
Get_Idcard_detail(image) |
|
t2 = time.perf_counter() |
|
st.write('time taken to run: {:.2f} sec'.format(t2-t1)) |
|
|
|
st.warning("""ภาพตัวอย่างบัตรประชาชนเป็นภาพสังเคราะห์ที่ได้มาจาก [AI for Thai.](https://aiforthai.in.th/files/iappIDcr-front-ex.jpg) \n |
|
Example Identification Card is synthetic image from [AI for Thai.](https://aiforthai.in.th/files/iappIDcr-front-ex.jpg)""") |
|
|
|
|
|
|
|
st.caption("Developed by Tanaanan .M [email protected]") |
|
|