pawandev commited on
Commit
3951fa7
·
1 Parent(s): 6d78d98

Added new model and did some modification in script of pan data extraction

Browse files
app/__init__.py CHANGED
@@ -10,7 +10,7 @@ def create_app():
10
  # Load model once
11
  app.models = {
12
  'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
13
- 'panModel': YOLO('models/PanModal_v3.pt') # Load additional models as needed
14
  }
15
 
16
  return app
 
10
  # Load model once
11
  app.models = {
12
  'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
13
+ 'panModel': YOLO('models/PanYolo_v4.pt') # Load additional models as needed
14
  }
15
 
16
  return app
app/services/panServices/panDataExtractor.py CHANGED
@@ -1,4 +1,18 @@
1
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def extract_panData(data):
4
  unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
@@ -6,7 +20,10 @@ def extract_panData(data):
6
  "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
7
  'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
8
  'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
9
- "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name"]
 
 
 
10
 
11
  # Initialize result object
12
  result = {
@@ -27,7 +44,8 @@ def extract_panData(data):
27
  for item in data:
28
  if item not in unwanted_words and not combination_pattern.search(item):
29
  cleaned_data.append(item)
30
-
 
31
  # Check and extract PAN number
32
  pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
33
  for item in cleaned_data:
@@ -45,13 +63,14 @@ def extract_panData(data):
45
  break
46
 
47
  # If only two values are left, assume they are name and father's name
 
48
  if len(cleaned_data) == 2:
49
  result["data"]["name"] = cleaned_data[0]
50
  result["data"]["fatherName"] = cleaned_data[1]
51
  else:
52
  # Further cleaning of the data array to extract name and father's name
53
  cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
54
-
55
  # Check and extract name
56
  name_pattern = re.compile(r'^[A-Za-z .]+$')
57
  if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
@@ -72,4 +91,4 @@ def extract_panData(data):
72
  result["error"] = f"{key} value is not found due to bad image."
73
  break
74
 
75
- return result
 
1
  import re
2
+ def filter_array(arr):
3
+ # Define the regex patterns
4
+ pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
5
+ pattern_numeric = re.compile(r'^[0-9]+$')
6
+ pattern_non_alpha = re.compile(r'[^.\s]*[^a-zA-Z\s][^.\s]*')
7
+
8
+ # Filter the array
9
+ filtered_array = [
10
+ item for item in arr
11
+ if not (pattern_alphanumeric_special.search(item) or
12
+ pattern_numeric.match(item) or
13
+ pattern_non_alpha.search(item))
14
+ ]
15
+ return filtered_array
16
 
17
  def extract_panData(data):
18
  unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
 
20
  "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
21
  'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
22
  'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
23
+ "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name']
24
+
25
+
26
+
27
 
28
  # Initialize result object
29
  result = {
 
44
  for item in data:
45
  if item not in unwanted_words and not combination_pattern.search(item):
46
  cleaned_data.append(item)
47
+
48
+
49
  # Check and extract PAN number
50
  pan_pattern = re.compile(r'^[A-Z]{5}[0-9]{4}[A-Z]$')
51
  for item in cleaned_data:
 
63
  break
64
 
65
  # If only two values are left, assume they are name and father's name
66
+ cleaned_data = filter_array(cleaned_data)
67
  if len(cleaned_data) == 2:
68
  result["data"]["name"] = cleaned_data[0]
69
  result["data"]["fatherName"] = cleaned_data[1]
70
  else:
71
  # Further cleaning of the data array to extract name and father's name
72
  cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
73
+ print(cleaned_data, "after cleaning")
74
  # Check and extract name
75
  name_pattern = re.compile(r'^[A-Za-z .]+$')
76
  if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
 
91
  result["error"] = f"{key} value is not found due to bad image."
92
  break
93
 
94
+ return result
app/services/panServices/panOcr.py CHANGED
@@ -2,7 +2,7 @@ from io import BytesIO
2
  from ...utils.azureOCR import analyze_image
3
  from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
4
  from .panDataExtractor import extract_panData
5
- from collections import defaultdict
6
 
7
  def process_results(results, img):
8
  label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
 
2
  from ...utils.azureOCR import analyze_image
3
  from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
4
  from .panDataExtractor import extract_panData
5
+ # from collections import defaultdict
6
 
7
  def process_results(results, img):
8
  label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}