Spaces:
Sleeping
Sleeping
pawandev
commited on
Commit
·
3951fa7
1
Parent(s):
6d78d98
Added new model and did some modification in script of pan data extraction
Browse files
app/__init__.py
CHANGED
@@ -10,7 +10,7 @@ def create_app():
|
|
10 |
# Load model once
|
11 |
app.models = {
|
12 |
'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
|
13 |
-
'panModel': YOLO('models/
|
14 |
}
|
15 |
|
16 |
return app
|
|
|
10 |
# Load model once
|
11 |
app.models = {
|
12 |
'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
|
13 |
+
'panModel': YOLO('models/PanYolo_v4.pt') # Load additional models as needed
|
14 |
}
|
15 |
|
16 |
return app
|
app/services/panServices/panDataExtractor.py
CHANGED
@@ -1,4 +1,18 @@
|
|
1 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
def extract_panData(data):
|
4 |
unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
|
@@ -6,7 +20,10 @@ def extract_panData(data):
|
|
6 |
"VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
|
7 |
'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
|
8 |
'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
|
9 |
-
"Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name"]
|
|
|
|
|
|
|
10 |
|
11 |
# Initialize result object
|
12 |
result = {
|
@@ -27,7 +44,8 @@ def extract_panData(data):
|
|
27 |
for item in data:
|
28 |
if item not in unwanted_words and not combination_pattern.search(item):
|
29 |
cleaned_data.append(item)
|
30 |
-
|
|
|
31 |
# Check and extract PAN number
|
32 |
pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
|
33 |
for item in cleaned_data:
|
@@ -45,13 +63,14 @@ def extract_panData(data):
|
|
45 |
break
|
46 |
|
47 |
# If only two values are left, assume they are name and father's name
|
|
|
48 |
if len(cleaned_data) == 2:
|
49 |
result["data"]["name"] = cleaned_data[0]
|
50 |
result["data"]["fatherName"] = cleaned_data[1]
|
51 |
else:
|
52 |
# Further cleaning of the data array to extract name and father's name
|
53 |
cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
|
54 |
-
|
55 |
# Check and extract name
|
56 |
name_pattern = re.compile(r'^[A-Za-z .]+$')
|
57 |
if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
|
@@ -72,4 +91,4 @@ def extract_panData(data):
|
|
72 |
result["error"] = f"{key} value is not found due to bad image."
|
73 |
break
|
74 |
|
75 |
-
return result
|
|
|
1 |
import re
|
2 |
+
def filter_array(arr):
|
3 |
+
# Define the regex patterns
|
4 |
+
pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
|
5 |
+
pattern_numeric = re.compile(r'^[0-9]+$')
|
6 |
+
pattern_non_alpha = re.compile(r'[^.\s]*[^a-zA-Z\s][^.\s]*')
|
7 |
+
|
8 |
+
# Filter the array
|
9 |
+
filtered_array = [
|
10 |
+
item for item in arr
|
11 |
+
if not (pattern_alphanumeric_special.search(item) or
|
12 |
+
pattern_numeric.match(item) or
|
13 |
+
pattern_non_alpha.search(item))
|
14 |
+
]
|
15 |
+
return filtered_array
|
16 |
|
17 |
def extract_panData(data):
|
18 |
unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
|
|
|
20 |
"VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
|
21 |
'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
|
22 |
'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
|
23 |
+
"Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name']
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
|
28 |
# Initialize result object
|
29 |
result = {
|
|
|
44 |
for item in data:
|
45 |
if item not in unwanted_words and not combination_pattern.search(item):
|
46 |
cleaned_data.append(item)
|
47 |
+
|
48 |
+
|
49 |
# Check and extract PAN number
|
50 |
pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
|
51 |
for item in cleaned_data:
|
|
|
63 |
break
|
64 |
|
65 |
# If only two values are left, assume they are name and father's name
|
66 |
+
cleaned_data = filter_array(cleaned_data)
|
67 |
if len(cleaned_data) == 2:
|
68 |
result["data"]["name"] = cleaned_data[0]
|
69 |
result["data"]["fatherName"] = cleaned_data[1]
|
70 |
else:
|
71 |
# Further cleaning of the data array to extract name and father's name
|
72 |
cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
|
73 |
+
print(cleaned_data, "after cleaning")
|
74 |
# Check and extract name
|
75 |
name_pattern = re.compile(r'^[A-Za-z .]+$')
|
76 |
if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
|
|
|
91 |
result["error"] = f"{key} value is not found due to bad image."
|
92 |
break
|
93 |
|
94 |
+
return result
|
app/services/panServices/panOcr.py
CHANGED
@@ -2,7 +2,7 @@ from io import BytesIO
|
|
2 |
from ...utils.azureOCR import analyze_image
|
3 |
from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
|
4 |
from .panDataExtractor import extract_panData
|
5 |
-
from collections import defaultdict
|
6 |
|
7 |
def process_results(results, img):
|
8 |
label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
|
|
|
2 |
from ...utils.azureOCR import analyze_image
|
3 |
from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
|
4 |
from .panDataExtractor import extract_panData
|
5 |
+
# from collections import defaultdict
|
6 |
|
7 |
def process_results(results, img):
|
8 |
label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
|