Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import
|
2 |
import warnings
|
3 |
|
4 |
import matplotlib.colors as mcolors
|
@@ -119,7 +119,7 @@ st.markdown(
|
|
119 |
)
|
120 |
|
121 |
# Initialization for German Legal NER
|
122 |
-
tkn =
|
123 |
tokenizer = AutoTokenizer.from_pretrained("harshildarji/JuraBERT", use_auth_token=tkn)
|
124 |
model = AutoModelForTokenClassification.from_pretrained(
|
125 |
"harshildarji/JuraBERT", use_auth_token=tkn
|
@@ -262,19 +262,17 @@ st.markdown("<hr>", unsafe_allow_html=True)
|
|
262 |
|
263 |
uploaded_file = st.file_uploader("Upload a .txt file", type="txt")
|
264 |
|
|
|
265 |
if uploaded_file is not None:
|
266 |
try:
|
267 |
-
# Read raw content of the file
|
268 |
raw_content = uploaded_file.read()
|
269 |
|
270 |
-
# Dynamically detect encoding
|
271 |
detected = detect(raw_content)
|
272 |
encoding = detected["encoding"]
|
273 |
|
274 |
if encoding is None:
|
275 |
raise ValueError("Unable to detect file encoding.")
|
276 |
|
277 |
-
# Decode file content with the detected encoding
|
278 |
lines = raw_content.decode(encoding).splitlines()
|
279 |
|
280 |
anonymize_mode = st.checkbox("Anonymize")
|
@@ -283,6 +281,9 @@ if uploaded_file is not None:
|
|
283 |
unsafe_allow_html=True,
|
284 |
)
|
285 |
|
|
|
|
|
|
|
286 |
for line_number, line in enumerate(lines, start=1):
|
287 |
if line.strip():
|
288 |
results = ner(line)
|
@@ -290,13 +291,32 @@ if uploaded_file is not None:
|
|
290 |
|
291 |
if anonymize_mode:
|
292 |
anonymized_text = anonymize_text(line, merged_results)
|
293 |
-
|
|
|
|
|
294 |
else:
|
295 |
colored_html = color_substrings(line, merged_results)
|
296 |
st.markdown(f"{colored_html}", unsafe_allow_html=True)
|
297 |
-
|
298 |
else:
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
|
301 |
if not anonymize_mode:
|
302 |
st.markdown(
|
|
|
1 |
+
import re
|
2 |
import warnings
|
3 |
|
4 |
import matplotlib.colors as mcolors
|
|
|
119 |
)
|
120 |
|
121 |
# Initialization for German Legal NER
|
122 |
+
tkn = open("./token").read()
|
123 |
tokenizer = AutoTokenizer.from_pretrained("harshildarji/JuraBERT", use_auth_token=tkn)
|
124 |
model = AutoModelForTokenClassification.from_pretrained(
|
125 |
"harshildarji/JuraBERT", use_auth_token=tkn
|
|
|
262 |
|
263 |
uploaded_file = st.file_uploader("Upload a .txt file", type="txt")
|
264 |
|
265 |
+
|
266 |
if uploaded_file is not None:
|
267 |
try:
|
|
|
268 |
raw_content = uploaded_file.read()
|
269 |
|
|
|
270 |
detected = detect(raw_content)
|
271 |
encoding = detected["encoding"]
|
272 |
|
273 |
if encoding is None:
|
274 |
raise ValueError("Unable to detect file encoding.")
|
275 |
|
|
|
276 |
lines = raw_content.decode(encoding).splitlines()
|
277 |
|
278 |
anonymize_mode = st.checkbox("Anonymize")
|
|
|
281 |
unsafe_allow_html=True,
|
282 |
)
|
283 |
|
284 |
+
anonymized_lines = []
|
285 |
+
displayed_lines = []
|
286 |
+
|
287 |
for line_number, line in enumerate(lines, start=1):
|
288 |
if line.strip():
|
289 |
results = ner(line)
|
|
|
291 |
|
292 |
if anonymize_mode:
|
293 |
anonymized_text = anonymize_text(line, merged_results)
|
294 |
+
displayed_lines.append(anonymized_text)
|
295 |
+
plain_text = re.sub(r"<.*?>", "", anonymized_text)
|
296 |
+
anonymized_lines.append(plain_text.strip())
|
297 |
else:
|
298 |
colored_html = color_substrings(line, merged_results)
|
299 |
st.markdown(f"{colored_html}", unsafe_allow_html=True)
|
|
|
300 |
else:
|
301 |
+
displayed_lines.append("<br>")
|
302 |
+
anonymized_lines.append("")
|
303 |
+
|
304 |
+
if anonymize_mode:
|
305 |
+
original_file_name = uploaded_file.name
|
306 |
+
download_file_name = f"Anon_{original_file_name}"
|
307 |
+
|
308 |
+
anonymized_content = "\n".join(anonymized_lines)
|
309 |
+
|
310 |
+
for displayed_line in displayed_lines:
|
311 |
+
st.markdown(f"{displayed_line}", unsafe_allow_html=True)
|
312 |
+
|
313 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
314 |
+
st.download_button(
|
315 |
+
label="Download Anonymized Text",
|
316 |
+
data=anonymized_content,
|
317 |
+
file_name=download_file_name,
|
318 |
+
mime="text/plain",
|
319 |
+
)
|
320 |
|
321 |
if not anonymize_mode:
|
322 |
st.markdown(
|