File size: 2,815 Bytes
70cda52
 
 
 
 
 
 
 
 
 
 
 
4fb44eb
 
70cda52
4fb44eb
 
 
70cda52
4fb44eb
 
 
70cda52
4fb44eb
 
 
 
70cda52
4fb44eb
 
70cda52
4fb44eb
 
70cda52
 
4fb44eb
70cda52
 
4fb44eb
 
70cda52
 
4fb44eb
 
 
 
 
 
 
 
 
70cda52
 
4fb44eb
 
70cda52
 
 
4fb44eb
 
 
70cda52
 
4fb44eb
 
 
70cda52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span

nlp = spacy.load("en_core_web_md")




user_input = input(str("")) 
doc1 = nlp(user_input)

print list of entities captured by pertained model
for ent in doc1.ents:
    print(ent.text, ent.label_)
    
inspect labels and their meaning
for ent in doc1.ents:
    print(ent.label_, spacy.explain(ent.label_))

Use PhraseMatcher to find all references of interest
Define the different references to Covid
user_entries = input(str("")) #gradio text box here to enter sample terms
pattern_list = []

for i in user_entries.strip().split():
   pattern_list.append(i)
  
patterns = list(nlp.pipe(pattern_list))
print("patterns:", patterns)

#Instantiate PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

#Create label for pattern
user_named = input(str("").strip()) #gradio text box here to enter pattern label
matcher.add(user_named, patterns)

# Define the custom component
@Language.component("covid_component")
def covid_component_function(doc):
  #Apply the matcher to the doc
  matches = matcher(doc)
  #Create a Span for each match and assign the label
  spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
  # Overwrite the doc.ents with the matched spans
  doc.ents = spans
  return doc

# Add the component to the pipeline after the "ner" component
nlp.add_pipe((user_named + "component"), after="ner")
print(nlp.pipe_names)


#Verify that your model now detects all specified mentions of Covid on another text
user_doc = input(str("").strip())
apply_doc = nlp(user_doc)
print([(ent.text, ent.label_) for ent in apply_doc.ents])

#Count total mentions of label COVID in the 3rd document
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
Counter(labels)

iface = gr.Interface(
    process_text,
    [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
    
    entities,
    [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
    
    run,
    [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
    gr.outputs.HighlightedText(),
)

     test,
    [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
    gr.outputs.HighlightedText(),
)
iface.launch()