Spaces:

ncats
/

EpiPipeline4RD

Running

wzkariampuzha commited on May 3, 2022

Commit

d548708

1 Parent(s): 5ea9a7b

Update epi_pipeline.py

Files changed (1) hide show

epi_pipeline.py CHANGED Viewed

@@ -170,7 +170,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
                 term += word + '%20'
             query = term[:-3]
-            url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(maxResults)
             r = requests.get(url)
             root = ET.fromstring(r.content)
@@ -202,7 +202,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
         if filtering !='none' or filtering !='strict':
             filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
-        for pmid in pmids:
             abstract = PMID_getAb(pmid)
             if len(abstract)>5:
                 #do filtering here
@@ -219,7 +219,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
                     #Else and if are separated for readability and to better understand logical flow.
                     if set(filter_terms).intersection(set(nltk_tokenize.word_tokenize(abstract))):
                         pmid_abs[pmid] = abstract
-            abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
         abstracts_bar.empty()
     found = len(pmids)
     relevant = len(pmid_abs)

                 term += word + '%20'
             query = term[:-3]
+            url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
             r = requests.get(url)
             root = ET.fromstring(r.content)
         if filtering !='none' or filtering !='strict':
             filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
+        for i, pmid in enumerate(pmids):
             abstract = PMID_getAb(pmid)
             if len(abstract)>5:
                 #do filtering here
                     #Else and if are separated for readability and to better understand logical flow.
                     if set(filter_terms).intersection(set(nltk_tokenize.word_tokenize(abstract))):
                         pmid_abs[pmid] = abstract
+            abstracts_bar.progress(min(round(i*percent_by_step,1),1.0))
         abstracts_bar.empty()
     found = len(pmids)
     relevant = len(pmid_abs)