umarigan commited on
Commit
8bd5af2
·
verified ·
1 Parent(s): 3547909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -13
app.py CHANGED
@@ -105,12 +105,18 @@ def create_mask_dict(entities):
105
  entity_counters[entity['entity_group']] += 1
106
  mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
107
  return mask_dict
 
 
 
 
 
 
 
108
  def export_masked_text(masked_text, file_type):
109
  if file_type == "txt":
110
  return masked_text.encode("utf-8")
111
  elif file_type == "pdf":
112
  pdf_buffer = io.BytesIO()
113
- from fpdf import FPDF
114
  pdf = FPDF()
115
  pdf.add_page()
116
  pdf.set_font("Arial", size=12)
@@ -157,7 +163,7 @@ if Run_Button and input_text:
157
  # Create mask dictionary
158
  mask_dict = create_mask_dict(output_comb)
159
 
160
- masked_text = " ".join(entity['masked_word'] for entity in output_comb)
161
 
162
  # Apply masking and add masked_word column
163
  for entity in output_comb:
@@ -183,20 +189,36 @@ if Run_Button and input_text:
183
  else:
184
  label = entity['entity_group']
185
  spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
188
  st.write(html, unsafe_allow_html=True)
189
 
 
190
  export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
191
- if st.button("Download Masked Text"):
192
- masked_file_content = export_masked_text(masked_text, export_file_type)
193
- if masked_file_content:
194
- st.download_button(
195
- label="Download",
196
- data=masked_file_content,
197
- file_name=f"masked_output.{export_file_type}",
198
- mime=f"application/{export_file_type}" if export_file_type != "txt" else "text/plain"
199
- )
200
 
201
  st.subheader("Masking Dictionary")
202
- st.json(mask_dict)
 
 
 
 
105
  entity_counters[entity['entity_group']] += 1
106
  mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
107
  return mask_dict
108
+ def create_masked_text(input_text, entities, mask_dict):
109
+ masked_text = input_text
110
+ for entity in sorted(entities, key=lambda x: x['start'], reverse=True):
111
+ if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
112
+ masked_text = masked_text[:entity['start']] + mask_dict[entity['word']] + masked_text[entity['end']:]
113
+ return masked_text
114
+
115
  def export_masked_text(masked_text, file_type):
116
  if file_type == "txt":
117
  return masked_text.encode("utf-8")
118
  elif file_type == "pdf":
119
  pdf_buffer = io.BytesIO()
 
120
  pdf = FPDF()
121
  pdf.add_page()
122
  pdf.set_font("Arial", size=12)
 
163
  # Create mask dictionary
164
  mask_dict = create_mask_dict(output_comb)
165
 
166
+ masked_text = create_masked_text(input_text, output_comb, mask_dict)
167
 
168
  # Apply masking and add masked_word column
169
  for entity in output_comb:
 
189
  else:
190
  label = entity['entity_group']
191
  spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
192
+ # Custom CSS to prevent label overlap
193
+ custom_css = """
194
+ <style>
195
+ .entity-label {
196
+ font-size: 0.7em;
197
+ line-height: 1;
198
+ padding: 0.25em;
199
+ border-radius: 0.25em;
200
+ top: -1.5em;
201
+ position: relative;
202
+ }
203
+ </style>
204
+ """
205
 
206
+ html = custom_css + spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
207
  st.write(html, unsafe_allow_html=True)
208
 
209
+ # Download button
210
  export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
211
+ masked_file_content = export_masked_text(masked_text, export_file_type)
212
+ if masked_file_content:
213
+ st.download_button(
214
+ label="Download Masked Text",
215
+ data=masked_file_content,
216
+ file_name=f"masked_output.{export_file_type}",
217
+ mime=f"application/{export_file_type}" if export_file_type != "txt" else "text/plain"
218
+ )
 
219
 
220
  st.subheader("Masking Dictionary")
221
+ st.json(mask_dict)
222
+
223
+ st.subheader("Masked Text Preview")
224
+ st.text(masked_text)