Update app.py
Browse files
app.py
CHANGED
@@ -49,9 +49,10 @@ def extract_text(image):
|
|
49 |
def clean_extracted_text(text):
|
50 |
# Replace carriage returns and normalize newlines
|
51 |
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
52 |
-
#
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
# Function to extract attributes using regex
|
57 |
def extract_attributes(extracted_text):
|
@@ -64,10 +65,10 @@ def extract_attributes(extracted_text):
|
|
64 |
|
65 |
# Patterns for extracting personal information
|
66 |
patterns = {
|
67 |
-
"Name": r"Name\s*[:\-]?\s*([\w\s\-\.\']+)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
|
68 |
-
"Age": r"Age[:\-]?\s*(\d{1,3})",
|
69 |
-
"Gender": r"Gender[:\-]?\s*(Male|Female|Other)",
|
70 |
-
"Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)[:\-]?\s*(?:\+91)?([6-9]\d{9})"
|
71 |
}
|
72 |
|
73 |
for readable_attr, pattern in patterns.items():
|
|
|
49 |
def clean_extracted_text(text):
|
50 |
# Replace carriage returns and normalize newlines
|
51 |
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
52 |
+
# Split into lines, clean each line, then join back
|
53 |
+
lines = text.split('\n')
|
54 |
+
cleaned_lines = [re.sub(r'\s+', ' ', line.strip()) for line in lines]
|
55 |
+
return '\n'.join(cleaned_lines)
|
56 |
|
57 |
# Function to extract attributes using regex
|
58 |
def extract_attributes(extracted_text):
|
|
|
65 |
|
66 |
# Patterns for extracting personal information
|
67 |
patterns = {
|
68 |
+
"Name": r"Name\s*[:\-]?\s*([\w\s\-\.\',]+)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
|
69 |
+
"Age": r"Age\s*[:\-]?\s*(\d{1,3})(?=\s*(?:Gender|Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
|
70 |
+
"Gender": r"Gender\s*[:\-]?\s*(Male|Female|Other)(?=\s*(?:Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
|
71 |
+
"Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)\s*[:\-]?\s*(?:\+91)?([6-9]\d{9})(?=\s*(?:$|\n|\r\n|\Z))"
|
72 |
}
|
73 |
|
74 |
for readable_attr, pattern in patterns.items():
|