Spaces:

ncats
/

EpiPipeline4RD

Sleeping

App Files Files Community

wzkariampuzha commited on Mar 30, 2022

Commit

71c29ef

1 Parent(s): 490c9b8

Update classify_abs.py

Browse files

Files changed (1) hide show

classify_abs.py +8 -9

classify_abs.py CHANGED Viewed

@@ -290,7 +290,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
         else:
             searchterm_list = list(searchterm_list)
     #maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
-    percent_by_step = 1/(maxResults*1.05)
     with st.spinner("Gathering PubMed IDs..."):
         PMIDs_bar = st.progress(0)
         for dz in searchterm_list:
@@ -328,7 +328,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
     with st.spinner("Found "+str(len(pmids))+" PMIDs. Gathering Abstracts and Filtering..."):
         abstracts_bar = st.progress(0)
-        percent_by_step = 1/(maxResults)
         if filtering !='none' or filtering !='strict':
             filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
@@ -340,23 +340,22 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
                     uncased_ab = abstract.lower()
                     for term in searchterm_list:
                         if term.lower() in uncased_ab:
-                            pmid_abs[pmid] = abstract
-                            abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
                             break
                 elif filtering =='none':
                     pmid_abs[pmid] = abstract
-                    abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
                 #Default filtering is 'lenient'.
                 else:
                     #Else and if are separated for readability and to better understand logical flow.
                     if set(filter_terms).intersection(set(word_tokenize(abstract))):
                         pmid_abs[pmid] = abstract
-                        abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
         abstracts_bar.empty()
-    st.success('Found '+str(len(pmids))+' PMIDs. Gathered '+str(len(pmid_abs))+' Relevant Abstracts. Classifying and extracting epidemiology information...')
-    return pmid_abs, (len(pmids),len(pmid_abs))
 # Generate predictions for a PubMed Id
 # nlp: en_core_web_lg

         else:
             searchterm_list = list(searchterm_list)
     #maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
+    percent_by_step = 1/maxResults
     with st.spinner("Gathering PubMed IDs..."):
         PMIDs_bar = st.progress(0)
         for dz in searchterm_list:
     with st.spinner("Found "+str(len(pmids))+" PMIDs. Gathering Abstracts and Filtering..."):
         abstracts_bar = st.progress(0)
+        percent_by_step = 1/maxResults
         if filtering !='none' or filtering !='strict':
             filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
                     uncased_ab = abstract.lower()
                     for term in searchterm_list:
                         if term.lower() in uncased_ab:
+                            pmid_abs[pmid] = abstract
                             break
                 elif filtering =='none':
                     pmid_abs[pmid] = abstract
                 #Default filtering is 'lenient'.
                 else:
                     #Else and if are separated for readability and to better understand logical flow.
                     if set(filter_terms).intersection(set(word_tokenize(abstract))):
                         pmid_abs[pmid] = abstract
+            abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
         abstracts_bar.empty()
+    found = len(pmids)
+    relevant = len(pmid_abs)
+    st.success('Found '+str(found)+' PMIDs. Gathered '+str(relevant)+' Relevant Abstracts. Classifying and extracting epidemiology information...')
+    return pmid_abs, (found, relevant)
 # Generate predictions for a PubMed Id
 # nlp: en_core_web_lg