Spaces:

pawanmau01
/

TestAPI

Sleeping

App Files Files Community

Pavan+2-at-244075126032 commited on Sep 18, 2024

Commit

70c5320

1 Parent(s): d25f036

REver ocr changes

Browse files

Files changed (9) hide show

app/__init__.py +7 -7
app/api.py +8 -9
app/routes/adhaarApi.py +60 -60
app/routes/panApi.py +64 -64
app/services/adhaarServices/adhaarDataExtractor.py +47 -47
app/services/adhaarServices/ocr.py +37 -37
app/services/panServices/panDataExtractor.py +80 -80
app/services/panServices/panOcr.py +53 -53
gunicorn.conf.py +1 -1

app/__init__.py CHANGED Viewed

@@ -1,16 +1,16 @@
 from flask import Flask
-# from ultralytics import YOLO
 def create_app():
     app = Flask(__name__)
     from .api import ocr_bp
     app.register_blueprint(ocr_bp)
-    # with app.app_context():
-    #     # Load model once
-    #     app.models = {
-    #         'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
-    #         'panModel': YOLO('models/PanModel_v6.pt')  # Load additional models as needed
-    #     }
     return app

 from flask import Flask
+from ultralytics import YOLO
 def create_app():
     app = Flask(__name__)
     from .api import ocr_bp
     app.register_blueprint(ocr_bp)
+    with app.app_context():
+        # Load model once
+        app.models = {
+            'adhaarModel': YOLO('models/aadhaarYolov8.pt'),
+            'panModel': YOLO('models/PanModel_v6.pt')  # Load additional models as needed
+        }
     return app

app/api.py CHANGED Viewed

@@ -1,22 +1,21 @@
-# from app.services.UidaiServices.uidaiServices import generate_captcha, validate_aadhaar
 from app.routes.uidaiServices import generate_captcha, validate_aadhaar
 from flask import Blueprint, request, jsonify
 import requests
 import os
-# from .routes.adhaarApi import ocrAdhaar
-# from .routes.panApi import ocrPan
 import uuid
 ocr_bp = Blueprint('ocr', __name__)
 session = requests.Session()
 mode = os.getenv("PROJECT_MODE")
-# @ocr_bp.route('/ocrPan', methods=['POST'])
-# def getResponse_Pan():
-#     return ocrPan(mode, session)
-# @ocr_bp.route('/ocrAdhaar', methods=['POST'])
-# def getResponse_Adhaar():
-#     return ocrAdhaar(mode, session)
 # Route to generate captcha
 @ocr_bp.route('/generateCaptcha', methods=['POST'])

 from app.routes.uidaiServices import generate_captcha, validate_aadhaar
 from flask import Blueprint, request, jsonify
 import requests
 import os
+from .routes.adhaarApi import ocrAdhaar
+from .routes.panApi import ocrPan
 import uuid
 ocr_bp = Blueprint('ocr', __name__)
 session = requests.Session()
 mode = os.getenv("PROJECT_MODE")
+@ocr_bp.route('/ocrPan', methods=['POST'])
+def getResponse_Pan():
+    return ocrPan(mode, session)
+@ocr_bp.route('/ocrAdhaar', methods=['POST'])
+def getResponse_Adhaar():
+    return ocrAdhaar(mode, session)
 # Route to generate captcha
 @ocr_bp.route('/generateCaptcha', methods=['POST'])

app/routes/adhaarApi.py CHANGED Viewed

@@ -1,70 +1,70 @@
-# import re
-# from flask import request, jsonify, current_app
-# from PIL import Image, UnidentifiedImageError
-# from io import BytesIO
-# import base64
-# import requests
-# from  ..services.adhaarServices.ocr import process_results
-# import io
-# def ocrAdhaar(mode, session):
-#     try:
-#         print("API HIT ************* AADHAAROCR")
-#         data = request.get_json()
-#         if not data:
-#             return jsonify({"error": "Invalid request payload"}), 400
-#         if mode == "prod":
-#             if not data.get('image'):
-#                 return jsonify({"error": "Image data/buffer is required"}), 400
-#             #removing 'data:image/png,base64 ' from buffer.
-#             imgBuffer = data.get('image')
-#             imgBuffer = re.sub("^data:image/.+;base64,", "", imgBuffer)
-#             # Adjust base64 string padding
-#             if len(imgBuffer) % 4:
-#                 imgBuffer += '=' * (4 - len(imgBuffer) % 4)
-#             try:
-#                 img_data = base64.b64decode(imgBuffer)
-#                 img = Image.open(BytesIO(img_data))
-#                 img.verify()  # Verify image format
-#                 img = Image.open(io.BytesIO(img_data))  # Re-open image after verification
-#             except (base64.binascii.Error, ValueError) as decode_err:
-#                 return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
-#             except UnidentifiedImageError:
-#                 return jsonify({"error": "Unable to identify image format."}), 400
-#         elif mode == "dev":
-#             if not data.get('imgUrl'):
-#                 return jsonify({"error": "Image URL is required"}), 400
-#             img_url = data.get('imgUrl')
-#             response = session.get(img_url)
-#             response.raise_for_status()
-#             img = Image.open(BytesIO(response.content))
-#             img.verify()  # Verify image format
-#             img = Image.open(BytesIO(response.content))  # Re-open image after verification
-#         else:
-#             return jsonify({"error": "Invalid mode configuration"}), 500
-#         # Check image format
-#         if img.format not in ['JPEG', 'JPG', 'PNG']:
-#             return jsonify({"error": "Invalid image format. Only JPG and PNG are supported."}), 400
-#         # Run detection
-#         model = current_app.models.get('adhaarModel')
-#         results = model.predict(source=img, save=False)
-#         extracted_data = process_results(results, img)
-#         if extracted_data.get('statusCode') == 400:
-#             return jsonify(extracted_data), 400
-#         return jsonify(extracted_data), 200
-#     except requests.RequestException as req_err:
-#         return jsonify({"error": f"Image download failed: {str(req_err)}"}), 500
-#     except UnidentifiedImageError:
-#         return jsonify({"error": "Unable to identify image format."}), 400
-#     except Exception as e:
-#         current_app.logger.error(f"Unexpected error: {str(e)}")
-#         return jsonify({"error": "An unexpected error occurred."}), 500

+import re
+from flask import request, jsonify, current_app
+from PIL import Image, UnidentifiedImageError
+from io import BytesIO
+import base64
+import requests
+from  ..services.adhaarServices.ocr import process_results
+import io
+def ocrAdhaar(mode, session):
+    try:
+        print("API HIT ************* AADHAAROCR")
+        data = request.get_json()
+        if not data:
+            return jsonify({"error": "Invalid request payload"}), 400
+        if mode == "prod":
+            if not data.get('image'):
+                return jsonify({"error": "Image data/buffer is required"}), 400
+            #removing 'data:image/png,base64 ' from buffer.
+            imgBuffer = data.get('image')
+            imgBuffer = re.sub("^data:image/.+;base64,", "", imgBuffer)
+            # Adjust base64 string padding
+            if len(imgBuffer) % 4:
+                imgBuffer += '=' * (4 - len(imgBuffer) % 4)
+            try:
+                img_data = base64.b64decode(imgBuffer)
+                img = Image.open(BytesIO(img_data))
+                img.verify()  # Verify image format
+                img = Image.open(io.BytesIO(img_data))  # Re-open image after verification
+            except (base64.binascii.Error, ValueError) as decode_err:
+                return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
+            except UnidentifiedImageError:
+                return jsonify({"error": "Unable to identify image format."}), 400
+        elif mode == "dev":
+            if not data.get('imgUrl'):
+                return jsonify({"error": "Image URL is required"}), 400
+            img_url = data.get('imgUrl')
+            response = session.get(img_url)
+            response.raise_for_status()
+            img = Image.open(BytesIO(response.content))
+            img.verify()  # Verify image format
+            img = Image.open(BytesIO(response.content))  # Re-open image after verification
+        else:
+            return jsonify({"error": "Invalid mode configuration"}), 500
+        # Check image format
+        if img.format not in ['JPEG', 'JPG', 'PNG']:
+            return jsonify({"error": "Invalid image format. Only JPG and PNG are supported."}), 400
+        # Run detection
+        model = current_app.models.get('adhaarModel')
+        results = model.predict(source=img, save=False)
+        extracted_data = process_results(results, img)
+        if extracted_data.get('statusCode') == 400:
+            return jsonify(extracted_data), 400
+        return jsonify(extracted_data), 200
+    except requests.RequestException as req_err:
+        return jsonify({"error": f"Image download failed: {str(req_err)}"}), 500
+    except UnidentifiedImageError:
+        return jsonify({"error": "Unable to identify image format."}), 400
+    except Exception as e:
+        current_app.logger.error(f"Unexpected error: {str(e)}")
+        return jsonify({"error": "An unexpected error occurred."}), 500

app/routes/panApi.py CHANGED Viewed

@@ -1,74 +1,74 @@
-# import re
-# from flask import request, jsonify, current_app
-# from PIL import Image, UnidentifiedImageError
-# from io import BytesIO
-# import base64
-# import requests
-# from  ..services.panServices.panOcr import process_results
-# import io
-# def ocrPan(mode, session):
-#     try:
-#         print("API HIT ************* PANOCR")
-#         data = request.get_json()
-#         if not data:
-#             return jsonify({"error": "Invalid request payload"}), 400
-#         if mode == "prod":
-#             if not data.get('image'):
-#                 return jsonify({"error": "Image data/buffer is required"}), 400
-#             #removing 'data:image/png,base64 ' from buffer.
-#             imgBuffer = data.get('image')
-#             imgBuffer = re.sub("^data:image/.+;base64,", "", imgBuffer)
-#             # Adjust base64 string padding
-#             if len(imgBuffer) % 4:
-#                 imgBuffer += '=' * (4 - len(imgBuffer) % 4)
-#             try:
-#                 img_data = base64.b64decode(imgBuffer)
-#                 img = Image.open(BytesIO(img_data))
-#                 img.verify()  # Verify image format
-#                 print(img, "img")
-#                 img = Image.open(io.BytesIO(img_data))  # Re-open image after verification
-#             except (base64.binascii.Error, ValueError) as decode_err:
-#                 return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
-#             except UnidentifiedImageError:
-#                 return jsonify({"error": "Unable to identify image format."}), 400
-#         elif mode == "dev":
-#             if not data.get('imgUrl'):
-#                 return jsonify({"error": "Image URL is required"}), 400
-#             img_url = data.get('imgUrl')
-#             response = session.get(img_url)
-#             response.raise_for_status()
-#             img = Image.open(BytesIO(response.content))
-#             print(img, "img")
-#             img.verify()  # Verify image format
-#             img = Image.open(BytesIO(response.content))  # Re-open image after verification
-#         else:
-#             return jsonify({"error": "Invalid mode configuration"}), 500
-#         # Check image format
-#         if img.format not in ['JPEG', 'JPG', 'PNG']:
-#             return jsonify({"error": "Invalid image format. Only JPG and PNG are supported."}), 400
-#         # Run detection
-#         model = current_app.models.get('panModel')
-#         results = model.predict(source=img, imgsz=680, iou=0.7, augment=True)
-#         # print(results,"model result")
-#         extracted_data = process_results(results, img)
-#         # print(extracted_data, "extracted data")
-#         if extracted_data.get('statusCode') == 400:
-#             return jsonify(extracted_data), 400
-#         return jsonify(extracted_data), 200
-#     except requests.RequestException as req_err:
-#         return jsonify({"error": f"Image download failed: {str(req_err)}"}), 500
-#     except UnidentifiedImageError:
-#         return jsonify({"error": "Unable to identify image format."}), 400
-#     except Exception as e:
-#         current_app.logger.error(f"Unexpected error: {str(e)}")
-#         return jsonify({"error": "An unexpected error occurred on api call."}), 500

+import re
+from flask import request, jsonify, current_app
+from PIL import Image, UnidentifiedImageError
+from io import BytesIO
+import base64
+import requests
+from  ..services.panServices.panOcr import process_results
+import io
+def ocrPan(mode, session):
+    try:
+        print("API HIT ************* PANOCR")
+        data = request.get_json()
+        if not data:
+            return jsonify({"error": "Invalid request payload"}), 400
+        if mode == "prod":
+            if not data.get('image'):
+                return jsonify({"error": "Image data/buffer is required"}), 400
+            #removing 'data:image/png,base64 ' from buffer.
+            imgBuffer = data.get('image')
+            imgBuffer = re.sub("^data:image/.+;base64,", "", imgBuffer)
+            # Adjust base64 string padding
+            if len(imgBuffer) % 4:
+                imgBuffer += '=' * (4 - len(imgBuffer) % 4)
+            try:
+                img_data = base64.b64decode(imgBuffer)
+                img = Image.open(BytesIO(img_data))
+                img.verify()  # Verify image format
+                print(img, "img")
+                img = Image.open(io.BytesIO(img_data))  # Re-open image after verification
+            except (base64.binascii.Error, ValueError) as decode_err:
+                return jsonify({"error": f"Image decoding failed: {str(decode_err)}"}), 400
+            except UnidentifiedImageError:
+                return jsonify({"error": "Unable to identify image format."}), 400
+        elif mode == "dev":
+            if not data.get('imgUrl'):
+                return jsonify({"error": "Image URL is required"}), 400
+            img_url = data.get('imgUrl')
+            response = session.get(img_url)
+            response.raise_for_status()
+            img = Image.open(BytesIO(response.content))
+            print(img, "img")
+            img.verify()  # Verify image format
+            img = Image.open(BytesIO(response.content))  # Re-open image after verification
+        else:
+            return jsonify({"error": "Invalid mode configuration"}), 500
+        # Check image format
+        if img.format not in ['JPEG', 'JPG', 'PNG']:
+            return jsonify({"error": "Invalid image format. Only JPG and PNG are supported."}), 400
+        # Run detection
+        model = current_app.models.get('panModel')
+        results = model.predict(source=img, imgsz=680, iou=0.7, augment=True)
+        # print(results,"model result")
+        extracted_data = process_results(results, img)
+        # print(extracted_data, "extracted data")
+        if extracted_data.get('statusCode') == 400:
+            return jsonify(extracted_data), 400
+        return jsonify(extracted_data), 200
+    except requests.RequestException as req_err:
+        return jsonify({"error": f"Image download failed: {str(req_err)}"}), 500
+    except UnidentifiedImageError:
+        return jsonify({"error": "Unable to identify image format."}), 400
+    except Exception as e:
+        current_app.logger.error(f"Unexpected error: {str(e)}")
+        return jsonify({"error": "An unexpected error occurred on api call."}), 500

app/services/adhaarServices/adhaarDataExtractor.py CHANGED Viewed

@@ -1,59 +1,59 @@
-# import datetime
-# def extract_details(texts):
-#     details = {'name': '', 'gender': '', 'dob': '', 'aadhaarNo': ''}
-#     current_year = datetime.datetime.now().year
-#     for text in texts:
-#         # Check if colon exists in text and split accordingly
-#         if ':' in text:
-#             text = text.split(':')[1].strip()
-#         cleaned_text = text.replace(':', '').strip()
-#         # Remove leading non-alphabetic characters for gender detection and strip spaces
-#         cleaned_gender = cleaned_text.lstrip('.-/').strip()
-#         # Check if the text is the name (only alphabets, spaces, and possibly dots)
-#         if (all(char.isalpha() or char.isspace() or char == '.' for char in cleaned_text)
-#                 and cleaned_gender.lower() not in ['male', 'female']):
-#             details['name'] = cleaned_text
-#         # Check if the text is the DOB (format: dd/mm/yyyy or yyyy)
-#         elif (len(cleaned_text) == 4 and
-#               cleaned_text.isdigit() and
-#               1900 < int(cleaned_text) < current_year):
-#             details['dob'] = cleaned_text
-#         # Check if the text is the DOB (format: dd/mm/yyyy or dd-mm-yyyy)
-#         elif (len(cleaned_text) == 10 and
-#               (cleaned_text[2] in ['/', '-']) and
-#               (cleaned_text[5] in ['/', '-']) and
-#               cleaned_text.replace('/', '').replace('-', '').isdigit()):
-#             details['dob'] = cleaned_text
-#         # Check if the text is the gender (either 'Male' or 'Female')
-#         elif cleaned_gender.lower() in ['male', 'female']:
-#             details['gender'] = cleaned_gender.capitalize()
-#         # Check if the text is the Aadhaar number (12 digits after removing spaces)
-#         elif cleaned_text.replace(' ', '').isdigit() and len(cleaned_text.replace(' ', '')) == 12:
-#             details['aadhaarNo'] = cleaned_text
-#     # Check if any key's value is empty
-#     if any(value == '' for value in details.values()):
-#         error_key = next(key for key, value in details.items() if value == '')
-#         result = {
-#             'statusCode': 400,
-#             'result': details,
-#             'error': f'{error_key} value is not found due to bad image.'
-#         }
-#     else:
-#         result = {
-#             'statusCode': 200,
-#             'result': details,
-#             'error': ''
-#         }
-#     return result

+import datetime
+def extract_details(texts):
+    details = {'name': '', 'gender': '', 'dob': '', 'aadhaarNo': ''}
+    current_year = datetime.datetime.now().year
+    for text in texts:
+        # Check if colon exists in text and split accordingly
+        if ':' in text:
+            text = text.split(':')[1].strip()
+        cleaned_text = text.replace(':', '').strip()
+        # Remove leading non-alphabetic characters for gender detection and strip spaces
+        cleaned_gender = cleaned_text.lstrip('.-/').strip()
+        # Check if the text is the name (only alphabets, spaces, and possibly dots)
+        if (all(char.isalpha() or char.isspace() or char == '.' for char in cleaned_text)
+                and cleaned_gender.lower() not in ['male', 'female']):
+            details['name'] = cleaned_text
+        # Check if the text is the DOB (format: dd/mm/yyyy or yyyy)
+        elif (len(cleaned_text) == 4 and
+              cleaned_text.isdigit() and
+              1900 < int(cleaned_text) < current_year):
+            details['dob'] = cleaned_text
+        # Check if the text is the DOB (format: dd/mm/yyyy or dd-mm-yyyy)
+        elif (len(cleaned_text) == 10 and
+              (cleaned_text[2] in ['/', '-']) and
+              (cleaned_text[5] in ['/', '-']) and
+              cleaned_text.replace('/', '').replace('-', '').isdigit()):
+            details['dob'] = cleaned_text
+        # Check if the text is the gender (either 'Male' or 'Female')
+        elif cleaned_gender.lower() in ['male', 'female']:
+            details['gender'] = cleaned_gender.capitalize()
+        # Check if the text is the Aadhaar number (12 digits after removing spaces)
+        elif cleaned_text.replace(' ', '').isdigit() and len(cleaned_text.replace(' ', '')) == 12:
+            details['aadhaarNo'] = cleaned_text
+    # Check if any key's value is empty
+    if any(value == '' for value in details.values()):
+        error_key = next(key for key, value in details.items() if value == '')
+        result = {
+            'statusCode': 400,
+            'result': details,
+            'error': f'{error_key} value is not found due to bad image.'
+        }
+    else:
+        result = {
+            'statusCode': 200,
+            'result': details,
+            'error': ''
+        }
+    return result

app/services/adhaarServices/ocr.py CHANGED Viewed

@@ -1,44 +1,44 @@
-# from io import BytesIO
-# from ...utils.azureOCR import analyze_image
-# from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
-# from app.services.adhaarServices.adhaarDataExtractor import extract_details
-# def process_results(results, img):
-#     precision_data = {label: {"correct": 0, "total": 0} for label in ["aadharNo", "name", "dob", "gender", "address"]}
-#     confidence_threshold = 0.3
-#     input_image_format = img.format if img.format else "PNG"
-#     valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
-#     input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
-#     label_to_image = {}
-#     extracted_data = {"adhaarNo": "", "dob": "", "gender": "", "name": "", "address": ""}
-#     for result in results:
-#         for bbox, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
-#             label = ["aadharNo", "dob", "gender", "name", "address"][int(cls)]
-#             print(label, conf)
-#             if conf < confidence_threshold or label == "address":
-#                 continue
-#             x1, y1, x2, y2 = map(int, bbox.tolist())
-#             crop_img = img.crop((x1, y1, x2, y2))
-#             crop_img = resize_if_needed(crop_img)
-#             if label not in label_to_image or label_to_image[label][1] < conf:
-#                 label_to_image[label] = (crop_img, conf)
-#                 precision_data[label]["total"] += 1
-#                 precision_data[label]["correct"] += 1  # Replace with actual OCR validation check
-#     cropped_images = [img for label, (img, conf) in sorted(label_to_image.items()) if label != "address"]
-#     final_image = all_cropped_images_to_one_image(cropped_images, separator_image_path='app/utils/seprator3.png')
-#     buffer = BytesIO()
-#     final_image.save(buffer, format=input_image_format)
-#     buffer.seek(0)
-#     response = analyze_image(buffer.getvalue(), input_image_format)
-#     # print(response)
-#     lines = response['readResult']['blocks'][0]['lines']
-#     texts = [line['text'] for line in lines]
-#     print(texts)
-#     extracted_data = extract_details(texts)
-#     return extracted_data

+from io import BytesIO
+from ...utils.azureOCR import analyze_image
+from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
+from app.services.adhaarServices.adhaarDataExtractor import extract_details
+def process_results(results, img):
+    precision_data = {label: {"correct": 0, "total": 0} for label in ["aadharNo", "name", "dob", "gender", "address"]}
+    confidence_threshold = 0.3
+    input_image_format = img.format if img.format else "PNG"
+    valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
+    input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
+    label_to_image = {}
+    extracted_data = {"adhaarNo": "", "dob": "", "gender": "", "name": "", "address": ""}
+    for result in results:
+        for bbox, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
+            label = ["aadharNo", "dob", "gender", "name", "address"][int(cls)]
+            print(label, conf)
+            if conf < confidence_threshold or label == "address":
+                continue
+            x1, y1, x2, y2 = map(int, bbox.tolist())
+            crop_img = img.crop((x1, y1, x2, y2))
+            crop_img = resize_if_needed(crop_img)
+            if label not in label_to_image or label_to_image[label][1] < conf:
+                label_to_image[label] = (crop_img, conf)
+                precision_data[label]["total"] += 1
+                precision_data[label]["correct"] += 1  # Replace with actual OCR validation check
+    cropped_images = [img for label, (img, conf) in sorted(label_to_image.items()) if label != "address"]
+    final_image = all_cropped_images_to_one_image(cropped_images, separator_image_path='app/utils/seprator3.png')
+    buffer = BytesIO()
+    final_image.save(buffer, format=input_image_format)
+    buffer.seek(0)
+    response = analyze_image(buffer.getvalue(), input_image_format)
+    # print(response)
+    lines = response['readResult']['blocks'][0]['lines']
+    texts = [line['text'] for line in lines]
+    print(texts)
+    extracted_data = extract_details(texts)
+    return extracted_data

app/services/panServices/panDataExtractor.py CHANGED Viewed

@@ -1,95 +1,95 @@
-# import re
-# def filter_array(arr):
-#     # Define the regex patterns
-#     pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
-#     pattern_numeric = re.compile(r'^[0-9]+$')
-#     pattern_special_chars = re.compile(r'[^a-zA-Z.\s]+')
-#     # Filter the array
-#     filtered_array = [
-#         item for item in arr
-#         if not (pattern_alphanumeric_special.search(item) or
-#                 pattern_numeric.match(item) or
-#                 pattern_special_chars.search(item))
-#     ]
-#     return filtered_array
-# def extract_panData(data):
-#     unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
-#                       "पिता का नाम", 'नाम / Name', "पिता का नाम/ Father's Name", '414 / Name', 'पिता का नाम / Fath',
-#                       "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
-#                       'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
-#                       'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
-#                       "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name',"inent Account Number", "anent Account Number C","Permanent Account Number Car",
-#                       'ugr Name']
-#     # Initialize result object
-#     result = {
-#         "statusCode": 200,
-#         "error": '',
-#         "data": {
-#             "panNo": '',
-#             "name": '',
-#             "fatherName": '',
-#             "dob": ''
-#         }
-#     }
-#     # Clean the array by removing unwanted words and invalid entries
-#     cleaned_data = []
-#     combination_pattern = re.compile(r'(?=.*[0-9])(?=.*[!@#$%^&*(),?":{}|<>])')
-#     for item in data:
-#         if item not in unwanted_words and not combination_pattern.search(item):
-#             cleaned_data.append(item)
-#     # Check and extract PAN number
-#     pan_pattern = re.compile(r'^[A-Z]{5}\s*[0-9]{4}\s*[A-Z]$')
-#     for item in cleaned_data:
-#         if pan_pattern.match(item):
-#             result["data"]["panNo"] = item
-#             cleaned_data.remove(item)
-#             break
-#     # Check and extract date of birth
-#     dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
-#     for item in cleaned_data:
-#         if dob_pattern.match(item):
-#             result["data"]["dob"] = item
-#             cleaned_data.remove(item)
-#             break
-#     # If only two values are left, assume they are name and father's name
-#     cleaned_data = filter_array(cleaned_data)
-#     if len(cleaned_data) == 2:
-#         result["data"]["name"] = cleaned_data[0]
-#         result["data"]["fatherName"] = cleaned_data[1]
-#     else:
-#         # Further cleaning of the data array to extract name and father's name
-#         cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
-#         print(cleaned_data, "after cleaning")
-#         # Check and extract name
-#         name_pattern = re.compile(r'^[A-Za-z .]+$')
-#         if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
-#             result["data"]["name"] = cleaned_data[0]
-#         else:
-#             result["data"]["name"] = ''
-#         # Check and extract father's name
-#         if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
-#             result["data"]["fatherName"] = cleaned_data[1]
-#         else:
-#             result["data"]["fatherName"] = ''
-#     # Check if any value is empty and set error message
-#     for key, value in result["data"].items():
-#         if value == '':
-#             result["statusCode"] = 400
-#             result["error"] = f"{key} value is not found due to bad image."
-#             break
-#     return result

+import re
+def filter_array(arr):
+    # Define the regex patterns
+    pattern_alphanumeric_special = re.compile(r'[\w]+[^.\s\w]+|[^.\s\w]+[\w]+')
+    pattern_numeric = re.compile(r'^[0-9]+$')
+    pattern_special_chars = re.compile(r'[^a-zA-Z.\s]+')
+    # Filter the array
+    filtered_array = [
+        item for item in arr
+        if not (pattern_alphanumeric_special.search(item) or
+                pattern_numeric.match(item) or
+                pattern_special_chars.search(item))
+    ]
+    return filtered_array
+def extract_panData(data):
+    unwanted_words = ["Name", "/Name", 'Permanent', 'Account', 'Number', 'Card', 'नाम', '/Name',
+                      "पिता का नाम", 'नाम / Name', "पिता का नाम/ Father's Name", '414 / Name', 'पिता का नाम / Fath',
+                      "VIT VE Hra / Father's Nama", 'पिता का नाम/ Fal', 'पिता का नाम / Fathe', "पिता का नाम / Father's Na",
+                      'जन्म की तारीख /।', 'जन्म का ताराख', "पिता का नाम/ Father's Nam", 'नाम /Name', "पिता का नाम / Father's Name",
+                      'जन्म का वाराज़', 'Date of Birth', 'Permanent Account Number Card', "Date of Birth", "/Date of Birth",
+                      "Permanent Account Number", "Father's Name", "14 /Name", "/Father's Name", 'HTH / Name',"inent Account Number", "anent Account Number C","Permanent Account Number Car",
+                      'ugr Name']
+    # Initialize result object
+    result = {
+        "statusCode": 200,
+        "error": '',
+        "data": {
+            "panNo": '',
+            "name": '',
+            "fatherName": '',
+            "dob": ''
+        }
+    }
+    # Clean the array by removing unwanted words and invalid entries
+    cleaned_data = []
+    combination_pattern = re.compile(r'(?=.*[0-9])(?=.*[!@#$%^&*(),?":{}|<>])')
+    for item in data:
+        if item not in unwanted_words and not combination_pattern.search(item):
+            cleaned_data.append(item)
+    # Check and extract PAN number
+    pan_pattern = re.compile(r'^[A-Z]{5}\s*[0-9]{4}\s*[A-Z]$')
+    for item in cleaned_data:
+        if pan_pattern.match(item):
+            result["data"]["panNo"] = item
+            cleaned_data.remove(item)
+            break
+    # Check and extract date of birth
+    dob_pattern = re.compile(r'^\d{2}[-/]\d{2}[-/]\d{4}$')
+    for item in cleaned_data:
+        if dob_pattern.match(item):
+            result["data"]["dob"] = item
+            cleaned_data.remove(item)
+            break
+    # If only two values are left, assume they are name and father's name
+    cleaned_data = filter_array(cleaned_data)
+    if len(cleaned_data) == 2:
+        result["data"]["name"] = cleaned_data[0]
+        result["data"]["fatherName"] = cleaned_data[1]
+    else:
+        # Further cleaning of the data array to extract name and father's name
+        cleaned_data = [item for item in cleaned_data if not combination_pattern.search(item) and item not in unwanted_words]
+        print(cleaned_data, "after cleaning")
+        # Check and extract name
+        name_pattern = re.compile(r'^[A-Za-z .]+$')
+        if len(cleaned_data) > 0 and name_pattern.match(cleaned_data[0]):
+            result["data"]["name"] = cleaned_data[0]
+        else:
+            result["data"]["name"] = ''
+        # Check and extract father's name
+        if len(cleaned_data) > 1 and name_pattern.match(cleaned_data[1]):
+            result["data"]["fatherName"] = cleaned_data[1]
+        else:
+            result["data"]["fatherName"] = ''
+    # Check if any value is empty and set error message
+    for key, value in result["data"].items():
+        if value == '':
+            result["statusCode"] = 400
+            result["error"] = f"{key} value is not found due to bad image."
+            break
+    return result

app/services/panServices/panOcr.py CHANGED Viewed

@@ -1,66 +1,66 @@
-# from io import BytesIO
-# from ...utils.azureOCR import analyze_image
-# from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
-# from .panDataExtractor import extract_panData
-# # from collections import defaultdict
-# def process_results(results, img):
-#     label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
-#     confidence_threshold = 0.3
-#     input_image_format = img.format if img.format else "PNG"
-#     valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
-#     input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
-#     best_crops = {label: (None, -1) for label in label_indices.keys()}  # Store best (image, confidence) pairs
-#     precision_data = {label: {"correct": 0, "total": 0} for label in label_indices.keys()}
-#     for result in results:
-#         for bbox, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
-#             # Ensure the class index is within the bounds of the label list
-#             if int(cls) >= len(label_indices):
-#                 print(f"Warning: Class index {cls} is out of range. Skipping this bbox.")
-#                 continue
-#             label = list(label_indices.keys())[int(cls)]
-#             print(label, conf)
-#             if conf < confidence_threshold:
-#                 continue
-#             x1, y1, x2, y2 = map(int, bbox.tolist())
-#             crop_img = img.crop((x1, y1, x2, y2))
-#             crop_img = resize_if_needed(crop_img)
-#             crop_img.save(f"temp_{label}.png")
-#             # Replace old crop if new one has higher confidence
-#             _, best_conf = best_crops[label]
-#             if conf > best_conf:
-#                 best_crops[label] = (crop_img, conf)
-#                 precision_data[label]["total"] += 1
-#                 precision_data[label]["correct"] += 1  # Replace with actual OCR validation check
-#     # Extract the images for final processing
-#     cropped_images_with_labels = [(img, label_indices[label], conf) for label, (img, conf) in best_crops.items() if img is not None]
-#     # Sort the images by their label indices in ascending order
-#     cropped_images_with_labels.sort(key=lambda x: x[1])
-#     print(cropped_images_with_labels, "cropped images with labels")
-#     if not cropped_images_with_labels:
-#         raise ValueError("No images were cropped.")
-#     # Extract only the images for concatenation
-#     cropped_images = [img for img, _, _ in cropped_images_with_labels]
-#     final_image = all_cropped_images_to_one_image(cropped_images, separator_image_path='app/utils/seprator3.png')
-#     buffer = BytesIO()
-#     final_image.save(buffer, format=input_image_format)
-#     buffer.seek(0)
-#     response = analyze_image(buffer.getvalue(), input_image_format)
-#     print(response, "response")
-#     lines = response['readResult']['blocks'][0]['lines']
-#     texts = [line['text'] for line in lines]
-#     print(texts, "text after microsoft ocr")
-#     extracted_data = extract_panData(texts)
-#     return extracted_data

+from io import BytesIO
+from ...utils.azureOCR import analyze_image
+from ...utils.imageUtils import resize_if_needed, all_cropped_images_to_one_image
+from .panDataExtractor import extract_panData
+# from collections import defaultdict
+def process_results(results, img):
+    label_indices = {"pan_num": 0, "name": 1, "father": 2, "dob": 3}
+    confidence_threshold = 0.3
+    input_image_format = img.format if img.format else "PNG"
+    valid_formats = ["JPEG", "PNG", "BMP", "GIF", "TIFF"]
+    input_image_format = input_image_format if input_image_format in valid_formats else "PNG"
+    best_crops = {label: (None, -1) for label in label_indices.keys()}  # Store best (image, confidence) pairs
+    precision_data = {label: {"correct": 0, "total": 0} for label in label_indices.keys()}
+    for result in results:
+        for bbox, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
+            # Ensure the class index is within the bounds of the label list
+            if int(cls) >= len(label_indices):
+                print(f"Warning: Class index {cls} is out of range. Skipping this bbox.")
+                continue
+            label = list(label_indices.keys())[int(cls)]
+            print(label, conf)
+            if conf < confidence_threshold:
+                continue
+            x1, y1, x2, y2 = map(int, bbox.tolist())
+            crop_img = img.crop((x1, y1, x2, y2))
+            crop_img = resize_if_needed(crop_img)
+            crop_img.save(f"temp_{label}.png")
+            # Replace old crop if new one has higher confidence
+            _, best_conf = best_crops[label]
+            if conf > best_conf:
+                best_crops[label] = (crop_img, conf)
+                precision_data[label]["total"] += 1
+                precision_data[label]["correct"] += 1  # Replace with actual OCR validation check
+    # Extract the images for final processing
+    cropped_images_with_labels = [(img, label_indices[label], conf) for label, (img, conf) in best_crops.items() if img is not None]
+    # Sort the images by their label indices in ascending order
+    cropped_images_with_labels.sort(key=lambda x: x[1])
+    print(cropped_images_with_labels, "cropped images with labels")
+    if not cropped_images_with_labels:
+        raise ValueError("No images were cropped.")
+    # Extract only the images for concatenation
+    cropped_images = [img for img, _, _ in cropped_images_with_labels]
+    final_image = all_cropped_images_to_one_image(cropped_images, separator_image_path='app/utils/seprator3.png')
+    buffer = BytesIO()
+    final_image.save(buffer, format=input_image_format)
+    buffer.seek(0)
+    response = analyze_image(buffer.getvalue(), input_image_format)
+    print(response, "response")
+    lines = response['readResult']['blocks'][0]['lines']
+    texts = [line['text'] for line in lines]
+    print(texts, "text after microsoft ocr")
+    extracted_data = extract_panData(texts)
+    return extracted_data

gunicorn.conf.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # gunicorn.conf.py
 bind = '0.0.0.0:7860'
-workers = 15
 timeout = 120
 loglevel = 'info'
 accesslog = '-'

 # gunicorn.conf.py
 bind = '0.0.0.0:7860'
+workers = 1
 timeout = 120
 loglevel = 'info'
 accesslog = '-'