import gradio as gr import spacy from spacy.pipeline import EntityRuler from spacy.language import Language from spacy.matcher import PhraseMatcher from spacy.tokens import Span nlp = spacy.load("en_core_web_md") #Text 1 def process_text(text1): d = load(text1) return [ (for ent in doc1.ents: print(ent.text, ent.label_)) (for ent in doc1.ents: print(ent.label_, spacy.explain(ent.label_))) ] def load(text): user_input = str(text.strip()) doc1 = nlp(user_input) #Text 2 def entities(text2): a = named_ents(text2) return [print("patterns:", patterns)] def named_ents(text): pattern_list = [] for i in text.strip().split(): pattern_list.append(i) patterns = list(nlp.pipe(pattern_list)) #Text 3 def run(text3): b = pipe(text3) return [ doc print(nlp.pipe_names)] def pipe(text): matcher = PhraseMatcher(nlp.vocab) #Create label for pattern user_named = str(text.strip()) #gradio text box here to enter pattern label matcher.add(user_named, patterns) # Define the custom component @Language.component("covid_component") def covid_component_function(doc): # Apply the matcher to the doc matches = matcher(doc) # Create a Span for each match and assign the label "ANIMAL" spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches] # Overwrite the doc.ents with the matched spans doc.ents = spans return doc # Add the component to the pipeline after the "ner" component nlp.add_pipe((user_named + "component"), after="ner") print(nlp.pipe_names) #Text 4 def test(text4): c = new_sample(text4) return [ print([(ent.text, ent.label_) for ent in apply_doc.ents]) Counter(labels)] def new_sample(text): user_doc = str(text).strip()) apply_doc = nlp(user_doc) print([(ent.text, ent.label_) for ent in apply_doc.ents]) #Count total mentions of label COVID in the 3rd document from collections import Counter labels = [ent.label_ for ent in apply_doc.ents] Counter(labels) #user_input = input(str("")) #gradio text box here to enter sample text #doc1 = nlp(user_input) #print list of entities captured by pertained model #for ent in doc1.ents: #print(ent.text, ent.label_) #inspect labels and their meaning #for ent in doc1.ents: #print(ent.label_, spacy.explain(ent.label_)) #Use PhraseMatcher to find all references of interest #Define the different references to Covid #user_entries = input(str("")) #gradio text box here to enter sample terms #pattern_list = [] #for i in user_entries.strip().split(): # pattern_list.append(i) #patterns = list(nlp.pipe(pattern_list)) #print("patterns:", patterns) #Instantiate PhraseMatcher #matcher = PhraseMatcher(nlp.vocab) #Create label for pattern #user_named = input(str("").strip()) #gradio text box here to enter pattern label #matcher.add(user_named, patterns) # Define the custom component #@Language.component("covid_component") #def covid_component_function(doc): # Apply the matcher to the doc # matches = matcher(doc) # Create a Span for each match and assign the label "ANIMAL" # spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches] # Overwrite the doc.ents with the matched spans # doc.ents = spans # return doc # Add the component to the pipeline after the "ner" component #nlp.add_pipe((user_named + "component"), after="ner") #print(nlp.pipe_names) #Verify that your model now detects all specified mentions of Covid on another text #user_doc = input(str("").strip()) #apply_doc = nlp(user_doc) #print([(ent.text, ent.label_) for ent in apply_doc.ents]) #Count total mentions of label COVID in the 3rd document #from collections import Counter #labels = [ent.label_ for ent in apply_doc.ents] #Counter(labels) iface = gr.Interface( process_text, [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")], entities, [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")], run, [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")], gr.outputs.HighlightedText(), ) test, [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")], gr.outputs.HighlightedText(), ) iface.launch()