Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
|
3 |
import json
|
4 |
from json import JSONEncoder
|
5 |
-
|
6 |
import pandas as pd
|
7 |
import streamlit as st
|
8 |
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
|
@@ -54,6 +54,24 @@ def anonymize(text, analyze_results):
|
|
54 |
res = anonymizer_engine().anonymize(text, analyze_results)
|
55 |
return res.text
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
st.set_page_config(page_title="Presidio demo (English)", layout="wide")
|
59 |
|
@@ -88,34 +106,35 @@ engine = analyzer_engine()
|
|
88 |
analyzer_load_state.empty()
|
89 |
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
# Before:
|
95 |
-
col1.subheader("Input string:")
|
96 |
-
st_text = col1.text_area(
|
97 |
-
label="Enter text",
|
98 |
-
value="Type in some text, "
|
99 |
"like a phone number (212-141-4544) "
|
100 |
-
"or a name (Lebron James)
|
101 |
-
|
102 |
-
|
103 |
-
"<table><tr><th>nationality</th><td>American</td></tr></table>",
|
104 |
-
height=400,
|
105 |
)
|
106 |
|
107 |
# After
|
108 |
-
|
109 |
-
|
110 |
-
st_analyze_results = analyze(
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
)
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
|
121 |
# table result
|
|
|
2 |
|
3 |
import json
|
4 |
from json import JSONEncoder
|
5 |
+
from annotated_text import annotated_text
|
6 |
import pandas as pd
|
7 |
import streamlit as st
|
8 |
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
|
|
|
54 |
res = anonymizer_engine().anonymize(text, analyze_results)
|
55 |
return res.text
|
56 |
|
57 |
+
def annotate(text, st_analyze_results, st_entities):
|
58 |
+
tokens = []
|
59 |
+
# sort by start index
|
60 |
+
results = sorted(st_analyze_results, key=lambda x: x.start)
|
61 |
+
for i, res in enumerate(results):
|
62 |
+
if i == 0:
|
63 |
+
tokens.append(text[:res.start])
|
64 |
+
|
65 |
+
# append entity text and entity type
|
66 |
+
tokens.append((text[res.start: res.end], res.entity_type))
|
67 |
+
|
68 |
+
# if another entity coming i.e. we're not at the last results element, add text up to next entity
|
69 |
+
if i != len(results) - 1:
|
70 |
+
tokens.append(text[res.end:results[i+1].start])
|
71 |
+
# if no more entities coming, add all remaining text
|
72 |
+
else:
|
73 |
+
tokens.append(text[res.end:])
|
74 |
+
return tokens
|
75 |
|
76 |
st.set_page_config(page_title="Presidio demo (English)", layout="wide")
|
77 |
|
|
|
106 |
analyzer_load_state.empty()
|
107 |
|
108 |
|
109 |
+
st_text = st.text_area(
|
110 |
+
label="Type in some text",
|
111 |
+
value=
|
|
|
|
|
|
|
|
|
|
|
112 |
"like a phone number (212-141-4544) "
|
113 |
+
"or a name (Lebron James).",
|
114 |
+
height=200,
|
115 |
+
# label_visibility="collapsed",
|
|
|
|
|
116 |
)
|
117 |
|
118 |
# After
|
119 |
+
st.subheader("Analyzed")
|
120 |
+
with st.spinner("Analyzing..."):
|
121 |
+
st_analyze_results = analyze(
|
122 |
+
text=st_text,
|
123 |
+
entities=st_entities,
|
124 |
+
language="en",
|
125 |
+
score_threshold=st_threshold,
|
126 |
+
return_decision_process=st_return_decision_process,
|
127 |
+
)
|
128 |
+
annotated_tokens = annotate(st_text, st_analyze_results, st_entities)
|
129 |
+
# annotated_tokens
|
130 |
+
annotated_text(*annotated_tokens)
|
131 |
+
st.text("")
|
132 |
+
|
133 |
+
st.subheader("Anonymized")
|
134 |
+
|
135 |
+
with st.spinner("Anonymizing..."):
|
136 |
+
st_anonymize_results = anonymize(st_text, st_analyze_results)
|
137 |
+
st_anonymize_results
|
138 |
|
139 |
|
140 |
# table result
|