harshildarji commited on
Commit
504150b
·
verified ·
1 Parent(s): 56fe3c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -8
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  import warnings
3
 
4
  import matplotlib.colors as mcolors
@@ -119,7 +119,7 @@ st.markdown(
119
  )
120
 
121
  # Initialization for German Legal NER
122
- tkn = os.getenv("tkn")
123
  tokenizer = AutoTokenizer.from_pretrained("harshildarji/JuraBERT", use_auth_token=tkn)
124
  model = AutoModelForTokenClassification.from_pretrained(
125
  "harshildarji/JuraBERT", use_auth_token=tkn
@@ -262,19 +262,17 @@ st.markdown("<hr>", unsafe_allow_html=True)
262
 
263
  uploaded_file = st.file_uploader("Upload a .txt file", type="txt")
264
 
 
265
  if uploaded_file is not None:
266
  try:
267
- # Read raw content of the file
268
  raw_content = uploaded_file.read()
269
 
270
- # Dynamically detect encoding
271
  detected = detect(raw_content)
272
  encoding = detected["encoding"]
273
 
274
  if encoding is None:
275
  raise ValueError("Unable to detect file encoding.")
276
 
277
- # Decode file content with the detected encoding
278
  lines = raw_content.decode(encoding).splitlines()
279
 
280
  anonymize_mode = st.checkbox("Anonymize")
@@ -283,6 +281,9 @@ if uploaded_file is not None:
283
  unsafe_allow_html=True,
284
  )
285
 
 
 
 
286
  for line_number, line in enumerate(lines, start=1):
287
  if line.strip():
288
  results = ner(line)
@@ -290,13 +291,32 @@ if uploaded_file is not None:
290
 
291
  if anonymize_mode:
292
  anonymized_text = anonymize_text(line, merged_results)
293
- st.markdown(f"{anonymized_text}", unsafe_allow_html=True)
 
 
294
  else:
295
  colored_html = color_substrings(line, merged_results)
296
  st.markdown(f"{colored_html}", unsafe_allow_html=True)
297
-
298
  else:
299
- st.markdown("<br>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  if not anonymize_mode:
302
  st.markdown(
 
1
+ import re
2
  import warnings
3
 
4
  import matplotlib.colors as mcolors
 
119
  )
120
 
121
  # Initialization for German Legal NER
122
+ tkn = open("./token").read()
123
  tokenizer = AutoTokenizer.from_pretrained("harshildarji/JuraBERT", use_auth_token=tkn)
124
  model = AutoModelForTokenClassification.from_pretrained(
125
  "harshildarji/JuraBERT", use_auth_token=tkn
 
262
 
263
  uploaded_file = st.file_uploader("Upload a .txt file", type="txt")
264
 
265
+
266
  if uploaded_file is not None:
267
  try:
 
268
  raw_content = uploaded_file.read()
269
 
 
270
  detected = detect(raw_content)
271
  encoding = detected["encoding"]
272
 
273
  if encoding is None:
274
  raise ValueError("Unable to detect file encoding.")
275
 
 
276
  lines = raw_content.decode(encoding).splitlines()
277
 
278
  anonymize_mode = st.checkbox("Anonymize")
 
281
  unsafe_allow_html=True,
282
  )
283
 
284
+ anonymized_lines = []
285
+ displayed_lines = []
286
+
287
  for line_number, line in enumerate(lines, start=1):
288
  if line.strip():
289
  results = ner(line)
 
291
 
292
  if anonymize_mode:
293
  anonymized_text = anonymize_text(line, merged_results)
294
+ displayed_lines.append(anonymized_text)
295
+ plain_text = re.sub(r"<.*?>", "", anonymized_text)
296
+ anonymized_lines.append(plain_text.strip())
297
  else:
298
  colored_html = color_substrings(line, merged_results)
299
  st.markdown(f"{colored_html}", unsafe_allow_html=True)
 
300
  else:
301
+ displayed_lines.append("<br>")
302
+ anonymized_lines.append("")
303
+
304
+ if anonymize_mode:
305
+ original_file_name = uploaded_file.name
306
+ download_file_name = f"Anon_{original_file_name}"
307
+
308
+ anonymized_content = "\n".join(anonymized_lines)
309
+
310
+ for displayed_line in displayed_lines:
311
+ st.markdown(f"{displayed_line}", unsafe_allow_html=True)
312
+
313
+ st.markdown("<hr>", unsafe_allow_html=True)
314
+ st.download_button(
315
+ label="Download Anonymized Text",
316
+ data=anonymized_content,
317
+ file_name=download_file_name,
318
+ mime="text/plain",
319
+ )
320
 
321
  if not anonymize_mode:
322
  st.markdown(