tdubon commited on
Commit
3e8d135
·
1 Parent(s): 4fb44eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -70
app.py CHANGED
@@ -7,78 +7,56 @@ from spacy.tokens import Span
7
 
8
  nlp = spacy.load("en_core_web_md")
9
 
10
-
11
-
12
-
13
- user_input = input(str(""))
14
- doc1 = nlp(user_input)
15
-
16
- print list of entities captured by pertained model
17
- for ent in doc1.ents:
18
- print(ent.text, ent.label_)
19
 
20
- inspect labels and their meaning
21
- for ent in doc1.ents:
22
- print(ent.label_, spacy.explain(ent.label_))
23
-
24
- Use PhraseMatcher to find all references of interest
25
- Define the different references to Covid
26
- user_entries = input(str("")) #gradio text box here to enter sample terms
27
- pattern_list = []
28
-
29
- for i in user_entries.strip().split():
30
- pattern_list.append(i)
31
-
32
- patterns = list(nlp.pipe(pattern_list))
33
- print("patterns:", patterns)
34
-
35
- #Instantiate PhraseMatcher
36
- matcher = PhraseMatcher(nlp.vocab)
37
-
38
- #Create label for pattern
39
- user_named = input(str("").strip()) #gradio text box here to enter pattern label
40
- matcher.add(user_named, patterns)
41
-
42
- # Define the custom component
43
- @Language.component("covid_component")
44
- def covid_component_function(doc):
45
- #Apply the matcher to the doc
46
- matches = matcher(doc)
47
- #Create a Span for each match and assign the label
48
- spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
49
- # Overwrite the doc.ents with the matched spans
50
- doc.ents = spans
51
- return doc
52
-
53
- # Add the component to the pipeline after the "ner" component
54
- nlp.add_pipe((user_named + "component"), after="ner")
55
- print(nlp.pipe_names)
56
-
57
-
58
- #Verify that your model now detects all specified mentions of Covid on another text
59
- user_doc = input(str("").strip())
60
- apply_doc = nlp(user_doc)
61
- print([(ent.text, ent.label_) for ent in apply_doc.ents])
62
-
63
- #Count total mentions of label COVID in the 3rd document
64
- from collections import Counter
65
- labels = [ent.label_ for ent in apply_doc.ents]
66
- Counter(labels)
67
-
68
- iface = gr.Interface(
69
- process_text,
70
- [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
71
 
72
- entities,
73
- [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
 
74
 
75
- run,
76
- [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
77
- gr.outputs.HighlightedText(),
78
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- test,
81
- [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
82
- gr.outputs.HighlightedText(),
 
 
 
 
 
 
 
 
83
  )
84
- iface.launch()
 
7
 
8
  nlp = spacy.load("en_core_web_md")
9
 
10
+ def load(txt1, txt2, txt3, txt4):
11
+ user_input = str(txt1.strip())
12
+ doc1 = nlp(user_input)
13
+ entities = [(ent.text, ent.label_) for ent in doc1.ents]
14
+
15
+
16
+ pattern_list = []
17
+ for i in txt2.strip().split():
18
+ pattern_list.append(i)
19
 
20
+ patterns = list(nlp.pipe(pattern_list))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ matcher = PhraseMatcher(nlp.vocab)
23
+ user_named = str(txt3.strip())
24
+ matcher.add(user_named, patterns)
25
 
26
+ @Language.component("added_component")
27
+ def component_function(doc):
28
+ matches = matcher(doc)
29
+ spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
30
+ doc.ents = spans
31
+ return doc
32
+
33
+ if "added_component" not in nlp.pipe_names:
34
+ nlp.add_pipe(("added_component"), after="ner")
35
+
36
+ user_input4 = str(txt4.strip())
37
+ apply_doc = nlp(user_input4)
38
+ entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents]
39
+
40
+ from collections import Counter
41
+ labels = [ent.label_ for ent in apply_doc.ents]
42
+ lab_counts = Counter(labels)
43
+
44
+ return(entities, entities2, lab_counts)
45
+
46
+
47
+
48
+ description = "Use this space to produce and test your own customized NER"
49
 
50
+
51
+ iface = gr.Interface(
52
+ title = "Customized Named Entity Recognition",
53
+ description = description,
54
+ fn = load,
55
+ interpretation = "shap",
56
+ inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")],
57
+ outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"),
58
+ gr.outputs.Textbox(type="str", label="Entites recognized after"),
59
+ gr.outputs.Textbox(type="str", label="Count of entities captured for new label")],
60
+ theme = "dark"
61
  )
62
+ iface.launch()