Spaces:
Sleeping
Sleeping
pawandev
commited on
Commit
·
6d78d98
1
Parent(s):
0b1d1c8
Make changes in pan data extractor to extract data from pan card
Browse files
app/services/panServices/panDataExtractor.py
CHANGED
@@ -1,16 +1,12 @@
|
|
1 |
import re
|
2 |
|
3 |
def extract_panData(data):
|
4 |
-
unwanted_words = ["Name","/Name",'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
for item in data:
|
12 |
-
if item not in unwanted_words and not combination_pattern.search(item):
|
13 |
-
cleaned_data.append(item)
|
14 |
|
15 |
# Initialize result object
|
16 |
result = {
|
@@ -24,33 +20,50 @@ def extract_panData(data):
|
|
24 |
}
|
25 |
}
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Check and extract PAN number
|
28 |
-
print(cleaned_data, "cleaned data")
|
29 |
pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
name_pattern = re.compile(r'^[A-Za-z .]+$')
|
37 |
-
if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
|
38 |
-
result["data"]["name"] = cleaned_data[1]
|
39 |
-
else:
|
40 |
-
result["data"]["name"] = ''
|
41 |
-
|
42 |
-
# Check and extract father's name
|
43 |
-
if len(cleaned_data) > 2 and name_pattern.match(cleaned_data[2]):
|
44 |
-
result["data"]["fatherName"] = cleaned_data[2]
|
45 |
-
else:
|
46 |
-
result["data"]["fatherName"] = ''
|
47 |
-
|
48 |
# Check and extract date of birth
|
49 |
dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
else:
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Check if any value is empty and set error message
|
56 |
for key, value in result["data"].items():
|
@@ -59,4 +72,4 @@ def extract_panData(data):
|
|
59 |
result["error"] = f"{key} value is not found due to bad image."
|
60 |
break
|
61 |
|
62 |
-
return result
|
|
|
1 |
import re
|
2 |
|
3 |
def extract_panData(data):
|
4 |
+
unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
|
5 |
+
"पिता का नाम", 'नाम / Name', "पिता का नाम/ Father's Name", '414 / Name', 'पिता का नाम / Fath',
|
6 |
+
"VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
|
7 |
+
'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
|
8 |
+
'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
|
9 |
+
"Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name"]
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Initialize result object
|
12 |
result = {
|
|
|
20 |
}
|
21 |
}
|
22 |
|
23 |
+
# Clean the array by removing unwanted words and invalid entries
|
24 |
+
cleaned_data = []
|
25 |
+
combination_pattern = re.compile(r'(?=.*[0-9])(?=.*[!@#$%^&*(),?":{}|<>])')
|
26 |
+
|
27 |
+
for item in data:
|
28 |
+
if item not in unwanted_words and not combination_pattern.search(item):
|
29 |
+
cleaned_data.append(item)
|
30 |
+
|
31 |
# Check and extract PAN number
|
|
|
32 |
pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
|
33 |
+
for item in cleaned_data:
|
34 |
+
if pan_pattern.match(item):
|
35 |
+
result["data"]["panNo"] = item
|
36 |
+
cleaned_data.remove(item)
|
37 |
+
break
|
38 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Check and extract date of birth
|
40 |
dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
|
41 |
+
for item in cleaned_data:
|
42 |
+
if dob_pattern.match(item):
|
43 |
+
result["data"]["dob"] = item
|
44 |
+
cleaned_data.remove(item)
|
45 |
+
break
|
46 |
+
|
47 |
+
# If only two values are left, assume they are name and father's name
|
48 |
+
if len(cleaned_data) == 2:
|
49 |
+
result["data"]["name"] = cleaned_data[0]
|
50 |
+
result["data"]["fatherName"] = cleaned_data[1]
|
51 |
else:
|
52 |
+
# Further cleaning of the data array to extract name and father's name
|
53 |
+
cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
|
54 |
+
|
55 |
+
# Check and extract name
|
56 |
+
name_pattern = re.compile(r'^[A-Za-z .]+$')
|
57 |
+
if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
|
58 |
+
result["data"]["name"] = cleaned_data[0]
|
59 |
+
else:
|
60 |
+
result["data"]["name"] = ''
|
61 |
+
|
62 |
+
# Check and extract father's name
|
63 |
+
if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
|
64 |
+
result["data"]["fatherName"] = cleaned_data[1]
|
65 |
+
else:
|
66 |
+
result["data"]["fatherName"] = ''
|
67 |
|
68 |
# Check if any value is empty and set error message
|
69 |
for key, value in result["data"].items():
|
|
|
72 |
result["error"] = f"{key} value is not found due to bad image."
|
73 |
break
|
74 |
|
75 |
+
return result
|