langdonholmes commited on
Commit
40a9cf9
·
1 Parent(s): dfd2cc1

switch to en_student_name_detector

Browse files
Files changed (3) hide show
  1. app.py +3 -3
  2. requirements.txt +1 -1
  3. spacy_recognizer.py +2 -2
app.py CHANGED
@@ -26,7 +26,7 @@ def analyzer_engine():
26
  configuration = {
27
  "nlp_engine_name": "spacy",
28
  "models": [
29
- {"lang_code": "en", "model_name": "en_pipeline"}],
30
  }
31
 
32
  # Create NLP engine based on configuration
@@ -96,7 +96,7 @@ st.set_page_config(page_title="Student Name Detector (English)", layout="wide")
96
 
97
  # Side bar
98
  st.sidebar.markdown(
99
- """Detect and anonymize PII in text using an [NLP model](https://huggingface.co/langdonholmes/en_pipeline) [trained](https://github.com/aialoe/deidentification-pipeline) on student-generated text collected by Coursera.
100
  """
101
  )
102
 
@@ -127,7 +127,7 @@ analyzer_load_state.empty()
127
 
128
  st_text = st.text_area(
129
  label="Type in some text",
130
- value="Learning Reflection\n\nJohn Williams\n\nIn this course I learned many things. As Liedtke (2004) said, \"Students grow when they learn\" \n\nBy John H. Williams",
131
  height=200,
132
  )
133
 
 
26
  configuration = {
27
  "nlp_engine_name": "spacy",
28
  "models": [
29
+ {"lang_code": "en", "model_name": "en_student_name_detector"}],
30
  }
31
 
32
  # Create NLP engine based on configuration
 
96
 
97
  # Side bar
98
  st.sidebar.markdown(
99
+ """Detect and anonymize PII in text using an [NLP model](https://huggingface.co/langdonholmes/en_student_name_recognizer) [trained](https://github.com/aialoe/deidentification-pipeline) on student-generated text collected by Coursera.
100
  """
101
  )
102
 
 
127
 
128
  st_text = st.text_area(
129
  label="Type in some text",
130
+ value="Learning Reflection\n\nJohn Williams\n\nIn this course I learned many things. As Liedtke (2004) said, \"Students grow when they learn\"\n\nBy John H. Williams",
131
  height=200,
132
  )
133
 
requirements.txt CHANGED
@@ -4,4 +4,4 @@ presidio-anonymizer
4
  presidio-analyzer
5
  torch
6
  st-annotated-text
7
- https://huggingface.co/langdonholmes/en_pipeline/resolve/main/en_pipeline-any-py3-none-any.whl
 
4
  presidio-analyzer
5
  torch
6
  st-annotated-text
7
+ https://huggingface.co/langdonholmes/en_pipeline/resolve/main/en_student_name_detector-any-py3-none-any.whl
spacy_recognizer.py CHANGED
@@ -28,14 +28,14 @@ class CustomSpacyRecognizer(LocalRecognizer):
28
  CHECK_LABEL_GROUPS = [
29
  ({"LOCATION"}, {"LOC", "LOCATION", "STREET_ADDRESS", "COORDINATE"}),
30
  ({"PERSON"}, {"PER", "PERSON"}),
31
- ({"STUDENT"}, {"STUDENT"})
32
  ({"NRP"}, {"NORP", "NRP"}),
33
  ({"ORGANIZATION"}, {"ORG"}),
34
  ({"DATE_TIME"}, {"DATE_TIME"}),
35
  ]
36
 
37
  MODEL_LANGUAGES = {
38
- "en": "langdonholmes/en_pipeline",
39
  }
40
 
41
  PRESIDIO_EQUIVALENCES = {
 
28
  CHECK_LABEL_GROUPS = [
29
  ({"LOCATION"}, {"LOC", "LOCATION", "STREET_ADDRESS", "COORDINATE"}),
30
  ({"PERSON"}, {"PER", "PERSON"}),
31
+ ({"STUDENT"}, {"STUDENT"}),
32
  ({"NRP"}, {"NORP", "NRP"}),
33
  ({"ORGANIZATION"}, {"ORG"}),
34
  ({"DATE_TIME"}, {"DATE_TIME"}),
35
  ]
36
 
37
  MODEL_LANGUAGES = {
38
+ "en": "langdonholmes/en_student_name_detector",
39
  }
40
 
41
  PRESIDIO_EQUIVALENCES = {