Spaces:

ncats
/

EpiPipeline4RD

Sleeping

App Files Files Community

wzkariampuzha commited on Apr 28, 2023

Commit

b892550

1 Parent(s): 32ef4d9

Update epi_pipeline.py

Browse files

Files changed (1) hide show

epi_pipeline.py +24 -1

epi_pipeline.py CHANGED Viewed

@@ -11,6 +11,24 @@ from typing import List, Dict, Union, Optional, Set, Tuple
 # The code was compiled into a single python file to make adding additional features and importing into other modules easy.
 # Each section has its own import statements to facilitate clean code reuse, except for typing which applies to all.
 ## Section: GATHER ABSTRACTS FROM APIs
 import requests
 import xml.etree.ElementTree as ET
@@ -41,6 +59,7 @@ def PMID_getAb(PMID:Union[int,str]) -> str:
 #   'strict' - must have some exact match to at least one of search terms/phrases in text)
 #   'lenient' - part of the abstract must match at least one word in the search term phrases.
 #   'none'
 def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
     #set of all pmids
     pmids = set()
@@ -141,6 +160,7 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
 #This is a streamlit version of search_getAbs. Refer to search_getAbs for documentation
 import streamlit as st
 def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
     pmids = set()
@@ -237,7 +257,7 @@ class Classify_Pipeline:
     def __call__(self, abstract:str) -> Tuple[float,bool]:
         return self.getTextPredictions(abstract)
     def getTextPredictions(self, abstract:str) -> Tuple[float,bool]:
         if len(abstract)>5:
             # input_ids
@@ -318,6 +338,7 @@ class GARD_Search:
     #Works much faster if broken down into sentences.
     #compares every phrase in a sentence to see if it matches anything in the GARD dictionary of diseases.
     def get_diseases(self, sentence:str) -> Tuple[List[str], List[str]]:
         tokens = [s.lower().strip() for s in nltk_tokenize.word_tokenize(sentence)]
         diseases = []
@@ -641,6 +662,7 @@ class NER_Pipeline:
         return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
     #Custom pipeline by WKariampuzha @NCATS (not Huggingface/Google/NVIDIA copyright)
     def __call__(self, text:str, rd_identify:Union[GARD_Search,None] = None):
         output_dict = {label:[] for label in self.labels}
@@ -896,6 +918,7 @@ def streamlit_extraction(search_term:Union[int,str], maxResults:int, filtering:s
         percent_at_step = 100/len(pmid_abs)
         for pmid, abstract in pmid_abs.items():
             epi_prob, isEpi = epi_classify(abstract)
             if isEpi:
                 if extract_diseases:
                     extraction = epi_ner(abstract, GARD_Search)

 # The code was compiled into a single python file to make adding additional features and importing into other modules easy.
 # Each section has its own import statements to facilitate clean code reuse, except for typing which applies to all.
+## SECTION: PERFORMANCE (Adding a timer decorator for functions)
+# Use @timeit decorator at the beginning of class methods or functions
+# https://dev.to/kcdchennai/python-decorator-to-measure-execution-time-54hk
+from functools import wraps
+import time
+def timeit(func):
+    @wraps(func)
+    def timeit_wrapper(*args, **kwargs):
+        start_time = time.perf_counter()
+        result = func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        # first item in the args, ie `args[0]` is `self`
+        print(f'Function {func.__name__}{args} took {total_time:.4f} seconds to execute')
+        return result
+    return timeit_wrapper
 ## Section: GATHER ABSTRACTS FROM APIs
 import requests
 import xml.etree.ElementTree as ET
 #   'strict' - must have some exact match to at least one of search terms/phrases in text)
 #   'lenient' - part of the abstract must match at least one word in the search term phrases.
 #   'none'
+@timeit
 def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
     #set of all pmids
     pmids = set()
 #This is a streamlit version of search_getAbs. Refer to search_getAbs for documentation
 import streamlit as st
+@timeit
 def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int, filtering:str) -> Dict[str,str]:
     pmids = set()
     def __call__(self, abstract:str) -> Tuple[float,bool]:
         return self.getTextPredictions(abstract)
+    @timeit
     def getTextPredictions(self, abstract:str) -> Tuple[float,bool]:
         if len(abstract)>5:
             # input_ids
     #Works much faster if broken down into sentences.
     #compares every phrase in a sentence to see if it matches anything in the GARD dictionary of diseases.
+    @timeit
     def get_diseases(self, sentence:str) -> Tuple[List[str], List[str]]:
         tokens = [s.lower().strip() for s in nltk_tokenize.word_tokenize(sentence)]
         diseases = []
         return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
     #Custom pipeline by WKariampuzha @NCATS (not Huggingface/Google/NVIDIA copyright)
+    @timeit
     def __call__(self, text:str, rd_identify:Union[GARD_Search,None] = None):
         output_dict = {label:[] for label in self.labels}
         percent_at_step = 100/len(pmid_abs)
         for pmid, abstract in pmid_abs.items():
             epi_prob, isEpi = epi_classify(abstract)
+            print(f"Abstract with PMID: {pmid} was classified as {isEpi}")
             if isEpi:
                 if extract_diseases:
                     extraction = epi_ner(abstract, GARD_Search)