wzkariampuzha commited on
Commit
26da213
·
1 Parent(s): 256dcea

Update classify_abs.py

Browse files
Files changed (1) hide show
  1. classify_abs.py +29 -28
classify_abs.py CHANGED
@@ -291,6 +291,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
291
  searchterm_list = list(searchterm_list)
292
  #maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
293
  percent_by_step = 1/(maxResults*1.05)
 
294
  PMIDs_bar = st.progress(0)
295
  for dz in searchterm_list:
296
  term = ''
@@ -325,36 +326,36 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
325
  PMIDs_bar.progress(min(round(len(pmids)*percent_by_step,1),1.0))
326
  PMIDs_bar.empty()
327
 
328
- with st.success('Found '+str(len(pmids))+' PMIDs. Gathering Abstracts and Filtering...'):
329
- abstracts_bar = st.progress(0)
330
- percent_by_step = 1/(maxResults)
331
- if filtering !='none' or filtering !='strict':
332
- filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
333
-
334
- for pmid in pmids:
335
- abstract = PMID_getAb(pmid)
336
- if len(abstract)>5:
337
- #do filtering here
338
- if filtering == 'strict':
339
- uncased_ab = abstract.lower()
340
- for term in searchterm_list:
341
- if term.lower() in uncased_ab:
342
- pmid_abs[pmid] = abstract
343
- abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
344
- break
345
- elif filtering =='none':
346
- pmid_abs[pmid] = abstract
347
- abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
348
-
349
- #Default filtering is 'lenient'.
350
- else:
351
- #Else and if are separated for readability and to better understand logical flow.
352
- if set(filter_terms).intersection(set(word_tokenize(abstract))):
353
  pmid_abs[pmid] = abstract
354
  abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
355
- abstracts_bar.empty()
356
-
357
- st.success('Found '+str(len(pmids))+' PMIDs. Gathered '+str(len(pmid_abs))+' Relevant Abstracts.')
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  return pmid_abs, (len(pmids),len(pmid_abs))
360
 
 
291
  searchterm_list = list(searchterm_list)
292
  #maxResults is multiplied by a little bit because sometimes the results returned is more than maxResults
293
  percent_by_step = 1/(maxResults*1.05)
294
+ API_Loading = st.spinner("Gathering PubMed IDs...")
295
  PMIDs_bar = st.progress(0)
296
  for dz in searchterm_list:
297
  term = ''
 
326
  PMIDs_bar.progress(min(round(len(pmids)*percent_by_step,1),1.0))
327
  PMIDs_bar.empty()
328
 
329
+ API_Loading = st.spinner("Found "+str(len(pmids))+" PMIDs. Gathering Abstracts and Filtering...")
330
+ abstracts_bar = st.progress(0)
331
+ percent_by_step = 1/(maxResults)
332
+ if filtering !='none' or filtering !='strict':
333
+ filter_terms = set(searchterm_list).union(set(str(re.sub(',','',' '.join(searchterm_list))).split()).difference(STOPWORDS))
334
+
335
+ for pmid in pmids:
336
+ abstract = PMID_getAb(pmid)
337
+ if len(abstract)>5:
338
+ #do filtering here
339
+ if filtering == 'strict':
340
+ uncased_ab = abstract.lower()
341
+ for term in searchterm_list:
342
+ if term.lower() in uncased_ab:
 
 
 
 
 
 
 
 
 
 
 
343
  pmid_abs[pmid] = abstract
344
  abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
345
+ break
346
+ elif filtering =='none':
347
+ pmid_abs[pmid] = abstract
348
+ abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
349
+
350
+ #Default filtering is 'lenient'.
351
+ else:
352
+ #Else and if are separated for readability and to better understand logical flow.
353
+ if set(filter_terms).intersection(set(word_tokenize(abstract))):
354
+ pmid_abs[pmid] = abstract
355
+ abstracts_bar.progress(min(round(len(pmid_abs)*percent_by_step,1),1.0))
356
+ abstracts_bar.empty()
357
+ API_Loading.empty()
358
+ st.success('Found '+str(len(pmids))+' PMIDs. Gathered '+str(len(pmid_abs))+' Relevant Abstracts. Classifying and extracting epidemiology information...')
359
 
360
  return pmid_abs, (len(pmids),len(pmid_abs))
361