Pranay25 commited on
Commit
dabb465
·
verified ·
1 Parent(s): b85d243

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -63,21 +63,23 @@ def extract_attributes(extracted_text):
63
  print(f"Raw extracted text: '{extracted_text}'")
64
  print(f"Cleaned extracted text: '{cleaned_text}'")
65
 
66
- # Patterns for extracting personal information
67
  patterns = {
68
- "Name": r"Name\s*[:\-]?\s*([\w\s\-\.\',]+)(?=\s*(?:Age|Gender|Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
69
- "Age": r"Age\s*[:\-]?\s*(\d{1,3})(?=\s*(?:Gender|Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
70
- "Gender": r"Gender\s*[:\-]?\s*(Male|Female|Other)(?=\s*(?:Phone Number|Phone|Mobile|$|\n|\r\n|\Z))",
71
- "Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)\s*[:\-]?\s*(?:\+91)?([6-9]\d{9})(?=\s*(?:$|\n|\r\n|\Z))"
72
  }
73
 
74
- for readable_attr, pattern in patterns.items():
75
- match = re.search(pattern, cleaned_text, re.IGNORECASE)
76
- if match:
77
- attributes[readable_attr] = match.group(1).strip()
78
- print(f"Extracted {readable_attr}: '{attributes[readable_attr]}'")
79
- else:
80
- print(f"No match for {readable_attr} with pattern: {pattern}")
 
 
81
 
82
  if "Gender" in attributes:
83
  attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])
 
63
  print(f"Raw extracted text: '{extracted_text}'")
64
  print(f"Cleaned extracted text: '{cleaned_text}'")
65
 
66
+ # Patterns for extracting personal information (simplified for line-by-line matching)
67
  patterns = {
68
+ "Name": r"Name\s*[:\-]?\s*([\w\s\-\.\',]+)",
69
+ "Age": r"Age\s*[:\-]?\s*(\d{1,3})",
70
+ "Gender": r"Gender\s*[:\-]?\s*(Male|Female|Other)",
71
+ "Phone Number": r"(?:(?:Phone Number)|Phone|Mobile|Phonenumber)\s*[:\-]?\s*(?:\+91)?([6-9]\d{9})"
72
  }
73
 
74
+ # Process each line separately
75
+ lines = cleaned_text.split('\n')
76
+ for line in lines:
77
+ for readable_attr, pattern in patterns.items():
78
+ match = re.search(pattern, line, re.IGNORECASE)
79
+ if match:
80
+ attributes[readable_attr] = match.group(1).strip()
81
+ print(f"Extracted {readable_attr}: '{attributes[readable_attr]}' from line: '{line}'")
82
+ break # Move to the next line once a match is found
83
 
84
  if "Gender" in attributes:
85
  attributes["Gender"] = GENDER_MAPPING.get(attributes["Gender"], attributes["Gender"])