wzkariampuzha commited on
Commit
b892550
·
1 Parent(s): 32ef4d9

Update epi_pipeline.py

Browse files
Files changed (1) hide show
  1. epi_pipeline.py +24 -1
epi_pipeline.py CHANGED
@@ -11,6 +11,24 @@ from typing import List, Dict, Union, Optional, Set, Tuple
11
  # The code was compiled into a single python file to make adding additional features and importing into other modules easy.
12
  # Each section has its own import statements to facilitate clean code reuse, except for typing which applies to all.
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ## Section: GATHER ABSTRACTS FROM APIs
15
  import requests
16
  import xml.etree.ElementTree as ET
@@ -41,6 +59,7 @@ def PMID_getAb(PMID:Union[int,str]) -> str:
41
  # 'strict' - must have some exact match to at least one of search terms/phrases in text)
42
  # 'lenient' - part of the abstract must match at least one word in the search term phrases.
43
  # 'none'
 
44
  def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
45
  #set of all pmids
46
  pmids = set()
@@ -141,6 +160,7 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
141
 
142
  #This is a streamlit version of search_getAbs. Refer to search_getAbs for documentation
143
  import streamlit as st
 
144
  def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
145
  pmids = set()
146
 
@@ -237,7 +257,7 @@ class Classify_Pipeline:
237
 
238
  def __call__(self, abstract:str) -> Tuple[float,bool]:
239
  return self.getTextPredictions(abstract)
240
-
241
  def getTextPredictions(self, abstract:str) -> Tuple[float,bool]:
242
  if len(abstract)>5:
243
  # input_ids
@@ -318,6 +338,7 @@ class GARD_Search:
318
 
319
  #Works much faster if broken down into sentences.
320
  #compares every phrase in a sentence to see if it matches anything in the GARD dictionary of diseases.
 
321
  def get_diseases(self, sentence:str) -> Tuple[List[str], List[str]]:
322
  tokens = [s.lower().strip() for s in nltk_tokenize.word_tokenize(sentence)]
323
  diseases = []
@@ -641,6 +662,7 @@ class NER_Pipeline:
641
  return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
642
 
643
  #Custom pipeline by WKariampuzha @NCATS (not Huggingface/Google/NVIDIA copyright)
 
644
  def __call__(self, text:str, rd_identify:Union[GARD_Search,None] = None):
645
  output_dict = {label:[] for label in self.labels}
646
 
@@ -896,6 +918,7 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
896
  percent_at_step = 100/len(pmid_abs)
897
  for pmid, abstract in pmid_abs.items():
898
  epi_prob, isEpi = epi_classify(abstract)
 
899
  if isEpi:
900
  if extract_diseases:
901
  extraction = epi_ner(abstract, GARD_Search)
 
11
  # The code was compiled into a single python file to make adding additional features and importing into other modules easy.
12
  # Each section has its own import statements to facilitate clean code reuse, except for typing which applies to all.
13
 
14
+ ## SECTION: PERFORMANCE (Adding a timer decorator for functions)
15
+ # Use @timeit decorator at the beginning of class methods or functions
16
+ # https://dev.to/kcdchennai/python-decorator-to-measure-execution-time-54hk
17
+ from functools import wraps
18
+ import time
19
+
20
+ def timeit(func):
21
+ @wraps(func)
22
+ def timeit_wrapper(*args, **kwargs):
23
+ start_time = time.perf_counter()
24
+ result = func(*args, **kwargs)
25
+ end_time = time.perf_counter()
26
+ total_time = end_time - start_time
27
+ # first item in the args, ie `args[0]` is `self`
28
+ print(f'Function {func.__name__}{args} took {total_time:.4f} seconds to execute')
29
+ return result
30
+ return timeit_wrapper
31
+
32
  ## Section: GATHER ABSTRACTS FROM APIs
33
  import requests
34
  import xml.etree.ElementTree as ET
 
59
  # 'strict' - must have some exact match to at least one of search terms/phrases in text)
60
  # 'lenient' - part of the abstract must match at least one word in the search term phrases.
61
  # 'none'
62
+ @timeit
63
  def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
64
  #set of all pmids
65
  pmids = set()
 
160
 
161
  #This is a streamlit version of search_getAbs. Refer to search_getAbs for documentation
162
  import streamlit as st
163
+ @timeit
164
  def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
165
  pmids = set()
166
 
 
257
 
258
  def __call__(self, abstract:str) -> Tuple[float,bool]:
259
  return self.getTextPredictions(abstract)
260
+ @timeit
261
  def getTextPredictions(self, abstract:str) -> Tuple[float,bool]:
262
  if len(abstract)>5:
263
  # input_ids
 
338
 
339
  #Works much faster if broken down into sentences.
340
  #compares every phrase in a sentence to see if it matches anything in the GARD dictionary of diseases.
341
+ @timeit
342
  def get_diseases(self, sentence:str) -> Tuple[List[str], List[str]]:
343
  tokens = [s.lower().strip() for s in nltk_tokenize.word_tokenize(sentence)]
344
  diseases = []
 
662
  return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
663
 
664
  #Custom pipeline by WKariampuzha @NCATS (not Huggingface/Google/NVIDIA copyright)
665
+ @timeit
666
  def __call__(self, text:str, rd_identify:Union[GARD_Search,None] = None):
667
  output_dict = {label:[] for label in self.labels}
668
 
 
918
  percent_at_step = 100/len(pmid_abs)
919
  for pmid, abstract in pmid_abs.items():
920
  epi_prob, isEpi = epi_classify(abstract)
921
+ print(f"Abstract with PMID: {pmid} was classified as {isEpi}")
922
  if isEpi:
923
  if extract_diseases:
924
  extraction = epi_ner(abstract, GARD_Search)