File size: 4,731 Bytes
70cda52
 
 
 
 
 
 
 
 
 
 
 
 
1379442
 
 
 
70cda52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span

nlp = spacy.load("en_core_web_md")

#Text 1
def process_text(text1):
  d = load(text1)
  return [
  (for ent in doc1.ents:
    print(ent.text, ent.label_))
  (for ent in doc1.ents:
    print(ent.label_, spacy.explain(ent.label_)))
    ]

def load(text):
  user_input = str(text.strip())
  doc1 = nlp(user_input)

#Text 2
def entities(text2):
  a = named_ents(text2)
  return [print("patterns:", patterns)]
  
def named_ents(text):
  pattern_list = []
  for i in text.strip().split():
    pattern_list.append(i)
  
  patterns = list(nlp.pipe(pattern_list))

#Text 3  
def run(text3):
  b = pipe(text3)
  return [
  doc
  print(nlp.pipe_names)]

def pipe(text):
  matcher = PhraseMatcher(nlp.vocab)
  #Create label for pattern
  user_named = str(text.strip()) #gradio text box here to enter pattern label
  matcher.add(user_named, patterns)
  # Define the custom component
  @Language.component("covid_component")
  def covid_component_function(doc):
  # Apply the matcher to the doc
    matches = matcher(doc)
    # Create a Span for each match and assign the label "ANIMAL"
    spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
    # Overwrite the doc.ents with the matched spans
    doc.ents = spans
    return doc
   # Add the component to the pipeline after the "ner" component
  nlp.add_pipe((user_named + "component"), after="ner")
  print(nlp.pipe_names)
  
#Text 4

def test(text4):
  c = new_sample(text4)
  return [
    print([(ent.text, ent.label_) for ent in apply_doc.ents])
    Counter(labels)]
    

def new_sample(text):
  user_doc = str(text).strip())
  apply_doc = nlp(user_doc)
  print([(ent.text, ent.label_) for ent in apply_doc.ents])
  #Count total mentions of label COVID in the 3rd document
  from collections import Counter
  labels = [ent.label_ for ent in apply_doc.ents]
  Counter(labels)


#user_input = input(str("")) #gradio text box here to enter sample text
#doc1 = nlp(user_input)

#print list of entities captured by pertained model
#for ent in doc1.ents:
    #print(ent.text, ent.label_)
    
#inspect labels and their meaning
#for ent in doc1.ents:
    #print(ent.label_, spacy.explain(ent.label_))

#Use PhraseMatcher to find all references of interest
#Define the different references to Covid
#user_entries = input(str("")) #gradio text box here to enter sample terms
#pattern_list = []

#for i in user_entries.strip().split():
#   pattern_list.append(i)
  
#patterns = list(nlp.pipe(pattern_list))
#print("patterns:", patterns)

#Instantiate PhraseMatcher
#matcher = PhraseMatcher(nlp.vocab)

#Create label for pattern
#user_named = input(str("").strip()) #gradio text box here to enter pattern label
#matcher.add(user_named, patterns)

# Define the custom component
#@Language.component("covid_component")
#def covid_component_function(doc):
    # Apply the matcher to the doc
#    matches = matcher(doc)
    # Create a Span for each match and assign the label "ANIMAL"
#    spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
    # Overwrite the doc.ents with the matched spans
#    doc.ents = spans
#    return doc

# Add the component to the pipeline after the "ner" component
#nlp.add_pipe((user_named + "component"), after="ner")
#print(nlp.pipe_names)


#Verify that your model now detects all specified mentions of Covid on another text
#user_doc = input(str("").strip())
#apply_doc = nlp(user_doc)
#print([(ent.text, ent.label_) for ent in apply_doc.ents])

#Count total mentions of label COVID in the 3rd document
#from collections import Counter
#labels = [ent.label_ for ent in apply_doc.ents]
#Counter(labels)

iface = gr.Interface(
    process_text,
    [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
    
    entities,
    [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
    
    run,
    [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
    gr.outputs.HighlightedText(),
)

     test,
    [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
    gr.outputs.HighlightedText(),
)
iface.launch()