pawandev commited on
Commit
6d78d98
·
1 Parent(s): 0b1d1c8

Make changes in pan data extractor to extract data from pan card

Browse files
app/services/panServices/panDataExtractor.py CHANGED
@@ -1,16 +1,12 @@
1
  import re
2
 
3
  def extract_panData(data):
4
- unwanted_words = ["Name","/Name",'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
5
- "पिता का नाम",'नाम / Name', "पिता का नाम/ Father's Name", '414 / Name', 'पिता का नाम / Fath',"VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe',"पिता का नाम / Father's Na", 'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name',"पिता का नाम / Father's Name", 'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth", "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name"]
6
-
7
- # Clean the array by removing unwanted words and invalid entries
8
- cleaned_data = []
9
- combination_pattern = re.compile(r'(?=.*[A-Za-z])(?=.*[0-9])(?=.*[!@#$%^&*(),?":{}|<>])')
10
-
11
- for item in data:
12
- if item not in unwanted_words and not combination_pattern.search(item):
13
- cleaned_data.append(item)
14
 
15
  # Initialize result object
16
  result = {
@@ -24,33 +20,50 @@ def extract_panData(data):
24
  }
25
  }
26
 
 
 
 
 
 
 
 
 
27
  # Check and extract PAN number
28
- print(cleaned_data, "cleaned data")
29
  pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
30
- if len(cleaned_data) > 0 and pan_pattern.match(cleaned_data[0]):
31
- result["data"]["panNo"] = cleaned_data[0]
32
- else:
33
- result["data"]["panNo"] = ''
34
-
35
- # Check and extract name
36
- name_pattern = re.compile(r'^[A-Za-z .]+$')
37
- if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
38
- result["data"]["name"] = cleaned_data[1]
39
- else:
40
- result["data"]["name"] = ''
41
-
42
- # Check and extract father's name
43
- if len(cleaned_data) > 2 and name_pattern.match(cleaned_data[2]):
44
- result["data"]["fatherName"] = cleaned_data[2]
45
- else:
46
- result["data"]["fatherName"] = ''
47
-
48
  # Check and extract date of birth
49
  dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
50
- if len(cleaned_data) > 3 and dob_pattern.match(cleaned_data[3]):
51
- result["data"]["dob"] = cleaned_data[3]
 
 
 
 
 
 
 
 
52
  else:
53
- result["data"]["dob"] = ''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Check if any value is empty and set error message
56
  for key, value in result["data"].items():
@@ -59,4 +72,4 @@ def extract_panData(data):
59
  result["error"] = f"{key} value is not found due to bad image."
60
  break
61
 
62
- return result
 
1
  import re
2
 
3
  def extract_panData(data):
4
+ unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
5
+ "पिता का नाम", 'नाम / Name', "पिता का नाम/ Father's Name", '414 / Name', 'पिता का नाम / Fath',
6
+ "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
7
+ 'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
8
+ 'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
9
+ "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name"]
 
 
 
 
10
 
11
  # Initialize result object
12
  result = {
 
20
  }
21
  }
22
 
23
+ # Clean the array by removing unwanted words and invalid entries
24
+ cleaned_data = []
25
+ combination_pattern = re.compile(r'(?=.*[0-9])(?=.*[!@#$%^&*(),?":{}|<>])')
26
+
27
+ for item in data:
28
+ if item not in unwanted_words and not combination_pattern.search(item):
29
+ cleaned_data.append(item)
30
+
31
  # Check and extract PAN number
 
32
  pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
33
+ for item in cleaned_data:
34
+ if pan_pattern.match(item):
35
+ result["data"]["panNo"] = item
36
+ cleaned_data.remove(item)
37
+ break
38
+
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Check and extract date of birth
40
  dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
41
+ for item in cleaned_data:
42
+ if dob_pattern.match(item):
43
+ result["data"]["dob"] = item
44
+ cleaned_data.remove(item)
45
+ break
46
+
47
+ # If only two values are left, assume they are name and father's name
48
+ if len(cleaned_data) == 2:
49
+ result["data"]["name"] = cleaned_data[0]
50
+ result["data"]["fatherName"] = cleaned_data[1]
51
  else:
52
+ # Further cleaning of the data array to extract name and father's name
53
+ cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
54
+
55
+ # Check and extract name
56
+ name_pattern = re.compile(r'^[A-Za-z .]+$')
57
+ if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
58
+ result["data"]["name"] = cleaned_data[0]
59
+ else:
60
+ result["data"]["name"] = ''
61
+
62
+ # Check and extract father's name
63
+ if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
64
+ result["data"]["fatherName"] = cleaned_data[1]
65
+ else:
66
+ result["data"]["fatherName"] = ''
67
 
68
  # Check if any value is empty and set error message
69
  for key, value in result["data"].items():
 
72
  result["error"] = f"{key} value is not found due to bad image."
73
  break
74
 
75
+ return result