Spaces:

tdubon
/

CustomizeNER

Runtime error

File size: 4,731 Bytes

import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span

nlp = spacy.load("en_core_web_md")

#Text 1
def process_text(text1):
  d = load(text1)
  return [
  (for ent in doc1.ents:
    print(ent.text, ent.label_))
  (for ent in doc1.ents:
    print(ent.label_, spacy.explain(ent.label_)))
    ]

def load(text):
  user_input = str(text.strip())
  doc1 = nlp(user_input)

#Text 2
def entities(text2):
  a = named_ents(text2)
  return [print("patterns:", patterns)]
  
def named_ents(text):
  pattern_list = []
  for i in text.strip().split():
    pattern_list.append(i)
  
  patterns = list(nlp.pipe(pattern_list))

#Text 3  
def run(text3):
  b = pipe(text3)
  return [
  doc
  print(nlp.pipe_names)]

def pipe(text):
  matcher = PhraseMatcher(nlp.vocab)
  #Create label for pattern
  user_named = str(text.strip()) #gradio text box here to enter pattern label
  matcher.add(user_named, patterns)
  # Define the custom component
  @Language.component("covid_component")
  def covid_component_function(doc):
  # Apply the matcher to the doc
    matches = matcher(doc)
    # Create a Span for each match and assign the label "ANIMAL"
    spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
    # Overwrite the doc.ents with the matched spans
    doc.ents = spans
    return doc
   # Add the component to the pipeline after the "ner" component
  nlp.add_pipe((user_named + "component"), after="ner")
  print(nlp.pipe_names)
  
#Text 4

def test(text4):
  c = new_sample(text4)
  return [
    print([(ent.text, ent.label_) for ent in apply_doc.ents])
    Counter(labels)]
    

def new_sample(text):
  user_doc = str(text).strip())
  apply_doc = nlp(user_doc)
  print([(ent.text, ent.label_) for ent in apply_doc.ents])
  #Count total mentions of label COVID in the 3rd document
  from collections import Counter
  labels = [ent.label_ for ent in apply_doc.ents]
  Counter(labels)


#user_input = input(str("")) #gradio text box here to enter sample text
#doc1 = nlp(user_input)

#print list of entities captured by pertained model
#for ent in doc1.ents:
    #print(ent.text, ent.label_)
    
#inspect labels and their meaning
#for ent in doc1.ents:
    #print(ent.label_, spacy.explain(ent.label_))

#Use PhraseMatcher to find all references of interest
#Define the different references to Covid
#user_entries = input(str("")) #gradio text box here to enter sample terms
#pattern_list = []

#for i in user_entries.strip().split():
#   pattern_list.append(i)
  
#patterns = list(nlp.pipe(pattern_list))
#print("patterns:", patterns)

#Instantiate PhraseMatcher
#matcher = PhraseMatcher(nlp.vocab)

#Create label for pattern
#user_named = input(str("").strip()) #gradio text box here to enter pattern label
#matcher.add(user_named, patterns)

# Define the custom component
#@Language.component("covid_component")
#def covid_component_function(doc):
    # Apply the matcher to the doc
#    matches = matcher(doc)
    # Create a Span for each match and assign the label "ANIMAL"
#    spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
    # Overwrite the doc.ents with the matched spans
#    doc.ents = spans
#    return doc

# Add the component to the pipeline after the "ner" component
#nlp.add_pipe((user_named + "component"), after="ner")
#print(nlp.pipe_names)


#Verify that your model now detects all specified mentions of Covid on another text
#user_doc = input(str("").strip())
#apply_doc = nlp(user_doc)
#print([(ent.text, ent.label_) for ent in apply_doc.ents])

#Count total mentions of label COVID in the 3rd document
#from collections import Counter
#labels = [ent.label_ for ent in apply_doc.ents]
#Counter(labels)

iface = gr.Interface(
    process_text,
    [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
    
    entities,
    [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
    
    run,
    [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
    gr.outputs.HighlightedText(),
)

     test,
    [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
    gr.outputs.HighlightedText(),
)
iface.launch()