Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -105,12 +105,18 @@ def create_mask_dict(entities):
|
|
105 |
entity_counters[entity['entity_group']] += 1
|
106 |
mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
|
107 |
return mask_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def export_masked_text(masked_text, file_type):
|
109 |
if file_type == "txt":
|
110 |
return masked_text.encode("utf-8")
|
111 |
elif file_type == "pdf":
|
112 |
pdf_buffer = io.BytesIO()
|
113 |
-
from fpdf import FPDF
|
114 |
pdf = FPDF()
|
115 |
pdf.add_page()
|
116 |
pdf.set_font("Arial", size=12)
|
@@ -157,7 +163,7 @@ if Run_Button and input_text:
|
|
157 |
# Create mask dictionary
|
158 |
mask_dict = create_mask_dict(output_comb)
|
159 |
|
160 |
-
masked_text =
|
161 |
|
162 |
# Apply masking and add masked_word column
|
163 |
for entity in output_comb:
|
@@ -183,20 +189,36 @@ if Run_Button and input_text:
|
|
183 |
else:
|
184 |
label = entity['entity_group']
|
185 |
spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
-
html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
188 |
st.write(html, unsafe_allow_html=True)
|
189 |
|
|
|
190 |
export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
)
|
200 |
|
201 |
st.subheader("Masking Dictionary")
|
202 |
-
st.json(mask_dict)
|
|
|
|
|
|
|
|
105 |
entity_counters[entity['entity_group']] += 1
|
106 |
mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
|
107 |
return mask_dict
|
108 |
+
def create_masked_text(input_text, entities, mask_dict):
|
109 |
+
masked_text = input_text
|
110 |
+
for entity in sorted(entities, key=lambda x: x['start'], reverse=True):
|
111 |
+
if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
|
112 |
+
masked_text = masked_text[:entity['start']] + mask_dict[entity['word']] + masked_text[entity['end']:]
|
113 |
+
return masked_text
|
114 |
+
|
115 |
def export_masked_text(masked_text, file_type):
|
116 |
if file_type == "txt":
|
117 |
return masked_text.encode("utf-8")
|
118 |
elif file_type == "pdf":
|
119 |
pdf_buffer = io.BytesIO()
|
|
|
120 |
pdf = FPDF()
|
121 |
pdf.add_page()
|
122 |
pdf.set_font("Arial", size=12)
|
|
|
163 |
# Create mask dictionary
|
164 |
mask_dict = create_mask_dict(output_comb)
|
165 |
|
166 |
+
masked_text = create_masked_text(input_text, output_comb, mask_dict)
|
167 |
|
168 |
# Apply masking and add masked_word column
|
169 |
for entity in output_comb:
|
|
|
189 |
else:
|
190 |
label = entity['entity_group']
|
191 |
spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
|
192 |
+
# Custom CSS to prevent label overlap
|
193 |
+
custom_css = """
|
194 |
+
<style>
|
195 |
+
.entity-label {
|
196 |
+
font-size: 0.7em;
|
197 |
+
line-height: 1;
|
198 |
+
padding: 0.25em;
|
199 |
+
border-radius: 0.25em;
|
200 |
+
top: -1.5em;
|
201 |
+
position: relative;
|
202 |
+
}
|
203 |
+
</style>
|
204 |
+
"""
|
205 |
|
206 |
+
html = custom_css + spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
207 |
st.write(html, unsafe_allow_html=True)
|
208 |
|
209 |
+
# Download button
|
210 |
export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
|
211 |
+
masked_file_content = export_masked_text(masked_text, export_file_type)
|
212 |
+
if masked_file_content:
|
213 |
+
st.download_button(
|
214 |
+
label="Download Masked Text",
|
215 |
+
data=masked_file_content,
|
216 |
+
file_name=f"masked_output.{export_file_type}",
|
217 |
+
mime=f"application/{export_file_type}" if export_file_type != "txt" else "text/plain"
|
218 |
+
)
|
|
|
219 |
|
220 |
st.subheader("Masking Dictionary")
|
221 |
+
st.json(mask_dict)
|
222 |
+
|
223 |
+
st.subheader("Masked Text Preview")
|
224 |
+
st.text(masked_text)
|