Spaces:
Running
Running
Commit
·
40a9cf9
1
Parent(s):
dfd2cc1
switch to en_student_name_detector
Browse files- app.py +3 -3
- requirements.txt +1 -1
- spacy_recognizer.py +2 -2
app.py
CHANGED
@@ -26,7 +26,7 @@ def analyzer_engine():
|
|
26 |
configuration = {
|
27 |
"nlp_engine_name": "spacy",
|
28 |
"models": [
|
29 |
-
{"lang_code": "en", "model_name": "
|
30 |
}
|
31 |
|
32 |
# Create NLP engine based on configuration
|
@@ -96,7 +96,7 @@ st.set_page_config(page_title="Student Name Detector (English)", layout="wide")
|
|
96 |
|
97 |
# Side bar
|
98 |
st.sidebar.markdown(
|
99 |
-
"""Detect and anonymize PII in text using an [NLP model](https://huggingface.co/langdonholmes/
|
100 |
"""
|
101 |
)
|
102 |
|
@@ -127,7 +127,7 @@ analyzer_load_state.empty()
|
|
127 |
|
128 |
st_text = st.text_area(
|
129 |
label="Type in some text",
|
130 |
-
value="Learning Reflection\n\nJohn Williams\n\nIn this course I learned many things. As Liedtke (2004) said, \"Students grow when they learn\"
|
131 |
height=200,
|
132 |
)
|
133 |
|
|
|
26 |
configuration = {
|
27 |
"nlp_engine_name": "spacy",
|
28 |
"models": [
|
29 |
+
{"lang_code": "en", "model_name": "en_student_name_detector"}],
|
30 |
}
|
31 |
|
32 |
# Create NLP engine based on configuration
|
|
|
96 |
|
97 |
# Side bar
|
98 |
st.sidebar.markdown(
|
99 |
+
"""Detect and anonymize PII in text using an [NLP model](https://huggingface.co/langdonholmes/en_student_name_recognizer) [trained](https://github.com/aialoe/deidentification-pipeline) on student-generated text collected by Coursera.
|
100 |
"""
|
101 |
)
|
102 |
|
|
|
127 |
|
128 |
st_text = st.text_area(
|
129 |
label="Type in some text",
|
130 |
+
value="Learning Reflection\n\nJohn Williams\n\nIn this course I learned many things. As Liedtke (2004) said, \"Students grow when they learn\"\n\nBy John H. Williams",
|
131 |
height=200,
|
132 |
)
|
133 |
|
requirements.txt
CHANGED
@@ -4,4 +4,4 @@ presidio-anonymizer
|
|
4 |
presidio-analyzer
|
5 |
torch
|
6 |
st-annotated-text
|
7 |
-
https://huggingface.co/langdonholmes/en_pipeline/resolve/main/
|
|
|
4 |
presidio-analyzer
|
5 |
torch
|
6 |
st-annotated-text
|
7 |
+
https://huggingface.co/langdonholmes/en_pipeline/resolve/main/en_student_name_detector-any-py3-none-any.whl
|
spacy_recognizer.py
CHANGED
@@ -28,14 +28,14 @@ class CustomSpacyRecognizer(LocalRecognizer):
|
|
28 |
CHECK_LABEL_GROUPS = [
|
29 |
({"LOCATION"}, {"LOC", "LOCATION", "STREET_ADDRESS", "COORDINATE"}),
|
30 |
({"PERSON"}, {"PER", "PERSON"}),
|
31 |
-
({"STUDENT"}, {"STUDENT"})
|
32 |
({"NRP"}, {"NORP", "NRP"}),
|
33 |
({"ORGANIZATION"}, {"ORG"}),
|
34 |
({"DATE_TIME"}, {"DATE_TIME"}),
|
35 |
]
|
36 |
|
37 |
MODEL_LANGUAGES = {
|
38 |
-
"en": "langdonholmes/
|
39 |
}
|
40 |
|
41 |
PRESIDIO_EQUIVALENCES = {
|
|
|
28 |
CHECK_LABEL_GROUPS = [
|
29 |
({"LOCATION"}, {"LOC", "LOCATION", "STREET_ADDRESS", "COORDINATE"}),
|
30 |
({"PERSON"}, {"PER", "PERSON"}),
|
31 |
+
({"STUDENT"}, {"STUDENT"}),
|
32 |
({"NRP"}, {"NORP", "NRP"}),
|
33 |
({"ORGANIZATION"}, {"ORG"}),
|
34 |
({"DATE_TIME"}, {"DATE_TIME"}),
|
35 |
]
|
36 |
|
37 |
MODEL_LANGUAGES = {
|
38 |
+
"en": "langdonholmes/en_student_name_detector",
|
39 |
}
|
40 |
|
41 |
PRESIDIO_EQUIVALENCES = {
|