Spaces:

tdubon
/

CustomizeNER

Runtime error

File size: 2,815 Bytes

70cda52
 
 
 
 
 
 
 
 
 
 
 
4fb44eb
 
70cda52
4fb44eb
 
 
70cda52
4fb44eb
 
 
70cda52
4fb44eb
 
 
 
70cda52
4fb44eb
 
70cda52
4fb44eb
 
70cda52
 
4fb44eb
70cda52
 
4fb44eb
 
70cda52
 
4fb44eb
 
 
 
 
 
 
 
 
70cda52
 
4fb44eb
 
70cda52
 
 
4fb44eb
 
 
70cda52
 
4fb44eb
 
 
70cda52

import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span

nlp = spacy.load("en_core_web_md")




user_input = input(str("")) 
doc1 = nlp(user_input)

print list of entities captured by pertained model
for ent in doc1.ents:
    print(ent.text, ent.label_)
    
inspect labels and their meaning
for ent in doc1.ents:
    print(ent.label_, spacy.explain(ent.label_))

Use PhraseMatcher to find all references of interest
Define the different references to Covid
user_entries = input(str("")) #gradio text box here to enter sample terms
pattern_list = []

for i in user_entries.strip().split():
   pattern_list.append(i)
  
patterns = list(nlp.pipe(pattern_list))
print("patterns:", patterns)

#Instantiate PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

#Create label for pattern
user_named = input(str("").strip()) #gradio text box here to enter pattern label
matcher.add(user_named, patterns)

# Define the custom component
@Language.component("covid_component")
def covid_component_function(doc):
  #Apply the matcher to the doc
  matches = matcher(doc)
  #Create a Span for each match and assign the label
  spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
  # Overwrite the doc.ents with the matched spans
  doc.ents = spans
  return doc

# Add the component to the pipeline after the "ner" component
nlp.add_pipe((user_named + "component"), after="ner")
print(nlp.pipe_names)


#Verify that your model now detects all specified mentions of Covid on another text
user_doc = input(str("").strip())
apply_doc = nlp(user_doc)
print([(ent.text, ent.label_) for ent in apply_doc.ents])

#Count total mentions of label COVID in the 3rd document
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
Counter(labels)

iface = gr.Interface(
    process_text,
    [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
    
    entities,
    [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
    
    run,
    [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
    gr.outputs.HighlightedText(),
)

     test,
    [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
    gr.outputs.HighlightedText(),
)
iface.launch()