Nassiraaa commited on
Commit
f507f50
·
verified ·
1 Parent(s): 67c8748

Delete personal_information.py

Browse files
Files changed (1) hide show
  1. personal_information.py +0 -76
personal_information.py DELETED
@@ -1,76 +0,0 @@
1
- import json
2
- import re
3
- from openai_utils import get_ai_response
4
- from ocr_extractor import process_file
5
- from cv_prompt import get_personal_info_prompt
6
-
7
- # Load the scoring data
8
- with open('personal_info_scores.json', 'r') as f:
9
- score_data = json.load(f)
10
-
11
- def extract_email(text):
12
- email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
13
- emails = re.findall(email_pattern, text)
14
- return emails[0] if emails else None
15
-
16
- def extract_phone(text):
17
- phone_pattern = r'\b(?:\+?1[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}\b'
18
- phones = re.findall(phone_pattern, text)
19
- return phones[0] if phones else None
20
-
21
- def extract_location(text):
22
- prompt = get_personal_info_prompt(text)
23
- messages = [
24
- {"role": "user", "content": prompt}
25
- ]
26
-
27
- response = get_ai_response(messages)
28
-
29
- if response:
30
- try:
31
- location_data = json.loads(response)
32
- city = list(location_data.get('city', {}).keys())[0] if location_data.get('city') else None
33
- country = any(location_data.get('country', {}).values())
34
- except json.JSONDecodeError:
35
- print("Failed to parse JSON from response")
36
- city, country = None, False
37
- else:
38
- city, country = None, False
39
-
40
- return city, country
41
-
42
- def calculate_score(email_exists, phone_exists, city_exists, country_exists):
43
- score = 0
44
- if email_exists:
45
- score += score_data['email']
46
- if phone_exists:
47
- score += score_data['phone']
48
- if city_exists:
49
- score += score_data['city']
50
- if country_exists:
51
- score += score_data['country']
52
- return score
53
-
54
- def analyze_personal_info(file_path):
55
- text = process_file(file_path, 'ocr_weights.json')
56
-
57
- email = extract_email(text)
58
- phone = extract_phone(text)
59
- city, country = extract_location(text)
60
-
61
- email_exists = email is not None
62
- phone_exists = phone is not None
63
- city_exists = city is not None
64
- country_exists = country
65
-
66
- score = calculate_score(email_exists, phone_exists, city_exists, country_exists)
67
-
68
- result = {
69
- "email": email_exists,
70
- "phone": phone_exists,
71
- "city": city if city_exists else None,
72
- "country": country_exists,
73
- "personal_info_score": score
74
- }
75
-
76
- return result