Spaces:

harshildarji
/

Juristische-NER

Running

App Files Files Community

harshildarji commited on Jan 13

Commit

504150b

verified ·

1 Parent(s): 56fe3c0

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -8

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import warnings
 import matplotlib.colors as mcolors
@@ -119,7 +119,7 @@ st.markdown(
 )
 # Initialization for German Legal NER
-tkn = os.getenv("tkn")
 tokenizer = AutoTokenizer.from_pretrained("harshildarji/JuraBERT", use_auth_token=tkn)
 model = AutoModelForTokenClassification.from_pretrained(
     "harshildarji/JuraBERT", use_auth_token=tkn
@@ -262,19 +262,17 @@ st.markdown("<hr>", unsafe_allow_html=True)
 uploaded_file = st.file_uploader("Upload a .txt file", type="txt")
 if uploaded_file is not None:
     try:
-        # Read raw content of the file
         raw_content = uploaded_file.read()
-        # Dynamically detect encoding
         detected = detect(raw_content)
         encoding = detected["encoding"]
         if encoding is None:
             raise ValueError("Unable to detect file encoding.")
-        # Decode file content with the detected encoding
         lines = raw_content.decode(encoding).splitlines()
         anonymize_mode = st.checkbox("Anonymize")
@@ -283,6 +281,9 @@ if uploaded_file is not None:
             unsafe_allow_html=True,
         )
         for line_number, line in enumerate(lines, start=1):
             if line.strip():
                 results = ner(line)
@@ -290,13 +291,32 @@ if uploaded_file is not None:
                 if anonymize_mode:
                     anonymized_text = anonymize_text(line, merged_results)
-                    st.markdown(f"{anonymized_text}", unsafe_allow_html=True)
                 else:
                     colored_html = color_substrings(line, merged_results)
                     st.markdown(f"{colored_html}", unsafe_allow_html=True)
             else:
-                st.markdown("<br>", unsafe_allow_html=True)
         if not anonymize_mode:
             st.markdown(

+import re
 import warnings
 import matplotlib.colors as mcolors
 )
 # Initialization for German Legal NER
+tkn = open("./token").read()
 tokenizer = AutoTokenizer.from_pretrained("harshildarji/JuraBERT", use_auth_token=tkn)
 model = AutoModelForTokenClassification.from_pretrained(
     "harshildarji/JuraBERT", use_auth_token=tkn
 uploaded_file = st.file_uploader("Upload a .txt file", type="txt")
 if uploaded_file is not None:
     try:
         raw_content = uploaded_file.read()
         detected = detect(raw_content)
         encoding = detected["encoding"]
         if encoding is None:
             raise ValueError("Unable to detect file encoding.")
         lines = raw_content.decode(encoding).splitlines()
         anonymize_mode = st.checkbox("Anonymize")
             unsafe_allow_html=True,
         )
+        anonymized_lines = []
+        displayed_lines = []
         for line_number, line in enumerate(lines, start=1):
             if line.strip():
                 results = ner(line)
                 if anonymize_mode:
                     anonymized_text = anonymize_text(line, merged_results)
+                    displayed_lines.append(anonymized_text)
+                    plain_text = re.sub(r"<.*?>", "", anonymized_text)
+                    anonymized_lines.append(plain_text.strip())
                 else:
                     colored_html = color_substrings(line, merged_results)
                     st.markdown(f"{colored_html}", unsafe_allow_html=True)
             else:
+                displayed_lines.append("<br>")
+                anonymized_lines.append("")
+        if anonymize_mode:
+            original_file_name = uploaded_file.name
+            download_file_name = f"Anon_{original_file_name}"
+            anonymized_content = "\n".join(anonymized_lines)
+            for displayed_line in displayed_lines:
+                st.markdown(f"{displayed_line}", unsafe_allow_html=True)
+            st.markdown("<hr>", unsafe_allow_html=True)
+            st.download_button(
+                label="Download Anonymized Text",
+                data=anonymized_content,
+                file_name=download_file_name,
+                mime="text/plain",
+            )
         if not anonymize_mode:
             st.markdown(