Nassiraaa commited on
Commit
d0016c1
·
verified ·
1 Parent(s): f507f50

Create personal_information.py

Browse files
Files changed (1) hide show
  1. personal_information.py +76 -0
personal_information.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from openai_utils import get_ai_response
4
+ from ocr_extractor import process_file
5
+ from cv_prompt import get_personal_info_prompt
6
+
7
+ # Load the scoring data
8
+ with open('personal_info_scores.json', 'r') as f:
9
+ score_data = json.load(f)
10
+
11
+ def extract_email(text):
12
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
13
+ emails = re.findall(email_pattern, text)
14
+ return emails[0] if emails else None
15
+
16
+ def extract_phone(text):
17
+ phone_pattern = r'\b(?:\+?1[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}\b'
18
+ phones = re.findall(phone_pattern, text)
19
+ return phones[0] if phones else None
20
+
21
+ def extract_location(text):
22
+ prompt = get_personal_info_prompt(text)
23
+ messages = [
24
+ {"role": "user", "content": prompt}
25
+ ]
26
+
27
+ response = get_ai_response(messages)
28
+
29
+ if response:
30
+ try:
31
+ location_data = json.loads(response)
32
+ city = list(location_data.get('city', {}).keys())[0] if location_data.get('city') else None
33
+ country = any(location_data.get('country', {}).values())
34
+ except json.JSONDecodeError:
35
+ print("Failed to parse JSON from response")
36
+ city, country = None, False
37
+ else:
38
+ city, country = None, False
39
+
40
+ return city, country
41
+
42
+ def calculate_score(email_exists, phone_exists, city_exists, country_exists):
43
+ score = 0
44
+ if email_exists:
45
+ score += score_data['email']
46
+ if phone_exists:
47
+ score += score_data['phone']
48
+ if city_exists:
49
+ score += score_data['city']
50
+ if country_exists:
51
+ score += score_data['country']
52
+ return score
53
+
54
+ def analyze_personal_info(file_path):
55
+ text = process_file(file_path, 'ocr_weights.json')
56
+
57
+ email = extract_email(text)
58
+ phone = extract_phone(text)
59
+ city, country = extract_location(text)
60
+
61
+ email_exists = email is not None
62
+ phone_exists = phone is not None
63
+ city_exists = city is not None
64
+ country_exists = country
65
+
66
+ score = calculate_score(email_exists, phone_exists, city_exists, country_exists)
67
+
68
+ result = {
69
+ "email": email_exists,
70
+ "phone": phone_exists,
71
+ "city": city if city_exists else None,
72
+ "country": country_exists,
73
+ "personal_info_score": score
74
+ }
75
+
76
+ return result