Spaces:

ncats
/

EpiPipeline4RD

Sleeping

App Files Files Community

wzkariampuzha commited on May 3, 2022

Commit

f51313e

1 Parent(s): d548708

Update epi_pipeline.py

Browse files

Files changed (1) hide show

epi_pipeline.py +6 -4

epi_pipeline.py CHANGED Viewed

@@ -73,7 +73,7 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
         query = term[:-3]
         ## get pmid results from searching for disease name through PubMed API
-        url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(maxResults)
         r = requests.get(url)
         root = ET.fromstring(r.content)
@@ -129,7 +129,8 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
             #do filtering here
             if filtering == 'strict':
                 uncased_ab = abstract.lower()
-                for term in searchterm_list:
                     if term.lower() in uncased_ab:
                         pmid_abs[pmid] = abstract
                         break
@@ -169,7 +170,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
             for word in dz_words:
                 term += word + '%20'
             query = term[:-3]
             url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
             r = requests.get(url)
             root = ET.fromstring(r.content)
@@ -208,7 +209,8 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
                 #do filtering here
                 if filtering == 'strict':
                     uncased_ab = abstract.lower()
-                    for term in searchterm_list:
                         if term.lower() in uncased_ab:
                             pmid_abs[pmid] = abstract
                             break

         query = term[:-3]
         ## get pmid results from searching for disease name through PubMed API
+        url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
         r = requests.get(url)
         root = ET.fromstring(r.content)
             #do filtering here
             if filtering == 'strict':
                 uncased_ab = abstract.lower()
+                #Reversing the list hopefully cuts down on the number of if statements bc the search terms are ordered longest to shortest and shorter terms are more likely to be in the abstract
+                for term in reversed(searchterm_list):
                     if term.lower() in uncased_ab:
                         pmid_abs[pmid] = abstract
                         break
             for word in dz_words:
                 term += word + '%20'
             query = term[:-3]
+            #dividing by the len( of the search_ter
             url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
             r = requests.get(url)
             root = ET.fromstring(r.content)
                 #do filtering here
                 if filtering == 'strict':
                     uncased_ab = abstract.lower()
+                    #Reversing the list hopefully cuts down on the number of if statements bc the search terms are ordered longest to shortest and shorter terms are more likely to be in the abstract
+                    for term in reversed(searchterm_list):
                         if term.lower() in uncased_ab:
                             pmid_abs[pmid] = abstract
                             break