umarigan commited on
Commit
20bef1c
·
verified ·
1 Parent(s): 8c045a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -1
app.py CHANGED
@@ -105,7 +105,29 @@ def create_mask_dict(entities):
105
  entity_counters[entity['entity_group']] += 1
106
  mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
107
  return mask_dict
108
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  Run_Button = st.button("Run")
110
 
111
  if Run_Button and input_text:
@@ -147,6 +169,8 @@ if Run_Button and input_text:
147
 
148
  st.subheader("Recognized Entities")
149
  st.dataframe(df_final)
 
 
150
 
151
  # Spacy display logic with entity numbering
152
  spacy_display = {"ents": [], "text": input_text, "title": None}
@@ -160,5 +184,16 @@ if Run_Button and input_text:
160
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
161
  st.write(html, unsafe_allow_html=True)
162
 
 
 
 
 
 
 
 
 
 
 
 
163
  st.subheader("Masking Dictionary")
164
  st.json(mask_dict)
 
105
  entity_counters[entity['entity_group']] += 1
106
  mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
107
  return mask_dict
108
+ def export_masked_text(masked_text, file_type):
109
+ if file_type == "txt":
110
+ return masked_text.encode("utf-8")
111
+ elif file_type == "pdf":
112
+ pdf_buffer = io.BytesIO()
113
+ from fpdf import FPDF
114
+ pdf = FPDF()
115
+ pdf.add_page()
116
+ pdf.set_font("Arial", size=12)
117
+ pdf.multi_cell(0, 10, masked_text)
118
+ pdf.output(pdf_buffer)
119
+ pdf_buffer.seek(0)
120
+ return pdf_buffer.getvalue()
121
+ elif file_type == "docx":
122
+ doc = docx.Document()
123
+ doc.add_paragraph(masked_text)
124
+ buffer = io.BytesIO()
125
+ doc.save(buffer)
126
+ buffer.seek(0)
127
+ return buffer.getvalue()
128
+ else:
129
+ st.error("Unsupported file type for export")
130
+ return None
131
  Run_Button = st.button("Run")
132
 
133
  if Run_Button and input_text:
 
169
 
170
  st.subheader("Recognized Entities")
171
  st.dataframe(df_final)
172
+
173
+
174
 
175
  # Spacy display logic with entity numbering
176
  spacy_display = {"ents": [], "text": input_text, "title": None}
 
184
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
185
  st.write(html, unsafe_allow_html=True)
186
 
187
+ export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
188
+ if st.button("Download Masked Text"):
189
+ masked_file_content = export_masked_text(masked_text, export_file_type)
190
+ if masked_file_content:
191
+ st.download_button(
192
+ label="Download",
193
+ data=masked_file_content,
194
+ file_name=f"masked_output.{export_file_type}",
195
+ mime=f"application/{export_file_type}" if export_file_type != "txt" else "text/plain"
196
+ )
197
+
198
  st.subheader("Masking Dictionary")
199
  st.json(mask_dict)