umarigan commited on
Commit
feb0faa
·
verified ·
1 Parent(s): 260c8c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -98,13 +98,13 @@ def create_mask_dict(entities, additional_masks=None):
98
  entity_counters = {}
99
 
100
  for entity in entities:
101
- if entity['entity_group'] not in ['CARDINAL', 'EVENT', 'PERCENT', 'QUANTITY']:
102
- if entity['word'] not in mask_dict:
103
- if entity['entity_group'] not in entity_counters:
104
- entity_counters[entity['entity_group']] = 1
105
- else:
106
- entity_counters[entity['entity_group']] += 1
107
- mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
108
 
109
  if additional_masks:
110
  for word, replacement in additional_masks.items():
@@ -129,7 +129,9 @@ def mask_patterns(text):
129
  masks[email] = "<EMAIL>"
130
 
131
  # Phone pattern (Turkish)
132
- phone_pattern = r"\+90\d{10}|\b\d{3}[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2}\b"
 
 
133
  phones = re.findall(phone_pattern, text)
134
  for phone in phones:
135
  masks[phone] = "<PHONE>"
 
98
  entity_counters = {}
99
 
100
  for entity in entities:
101
+ if entity['entity_group'] not in ['CARDINAL', 'EVENT', 'PERCENT', 'QUANTITY']:
102
+ if entity['word'] not in mask_dict:
103
+ if entity['entity_group'] not in entity_counters:
104
+ entity_counters[entity['entity_group']] = 1
105
+ else:
106
+ entity_counters[entity['entity_group']] += 1
107
+ mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
108
 
109
  if additional_masks:
110
  for word, replacement in additional_masks.items():
 
129
  masks[email] = "<EMAIL>"
130
 
131
  # Phone pattern (Turkish)
132
+ #phone_pattern = r"\+90\d{10}|\b\d{3}[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2}\b"
133
+ phone_pattern = r"\b(0?5\d{2}[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2}|\b5\d{3}[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2}|\b\d{3}[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2})\b"
134
+
135
  phones = re.findall(phone_pattern, text)
136
  for phone in phones:
137
  masks[phone] = "<PHONE>"