wzkariampuzha commited on
Commit
f51313e
1 Parent(s): d548708

Update epi_pipeline.py

Browse files
Files changed (1) hide show
  1. epi_pipeline.py +6 -4
epi_pipeline.py CHANGED
@@ -73,7 +73,7 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
73
  query = term[:-3]
74
 
75
  ## get pmid results from searching for disease name through PubMed API
76
- url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(maxResults)
77
  r = requests.get(url)
78
  root = ET.fromstring(r.content)
79
 
@@ -129,7 +129,8 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
129
  #do filtering here
130
  if filtering == 'strict':
131
  uncased_ab = abstract.lower()
132
- for term in searchterm_list:
 
133
  if term.lower() in uncased_ab:
134
  pmid_abs[pmid] = abstract
135
  break
@@ -169,7 +170,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
169
  for word in dz_words:
170
  term += word + '%20'
171
  query = term[:-3]
172
-
173
  url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
174
  r = requests.get(url)
175
  root = ET.fromstring(r.content)
@@ -208,7 +209,8 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
208
  #do filtering here
209
  if filtering == 'strict':
210
  uncased_ab = abstract.lower()
211
- for term in searchterm_list:
 
212
  if term.lower() in uncased_ab:
213
  pmid_abs[pmid] = abstract
214
  break
 
73
  query = term[:-3]
74
 
75
  ## get pmid results from searching for disease name through PubMed API
76
+ url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
77
  r = requests.get(url)
78
  root = ET.fromstring(r.content)
79
 
 
129
  #do filtering here
130
  if filtering == 'strict':
131
  uncased_ab = abstract.lower()
132
+ #Reversing the list hopefully cuts down on the number of if statements bc the search terms are ordered longest to shortest and shorter terms are more likely to be in the abstract
133
+ for term in reversed(searchterm_list):
134
  if term.lower() in uncased_ab:
135
  pmid_abs[pmid] = abstract
136
  break
 
170
  for word in dz_words:
171
  term += word + '%20'
172
  query = term[:-3]
173
+ #dividing by the len( of the search_ter
174
  url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
175
  r = requests.get(url)
176
  root = ET.fromstring(r.content)
 
209
  #do filtering here
210
  if filtering == 'strict':
211
  uncased_ab = abstract.lower()
212
+ #Reversing the list hopefully cuts down on the number of if statements bc the search terms are ordered longest to shortest and shorter terms are more likely to be in the abstract
213
+ for term in reversed(searchterm_list):
214
  if term.lower() in uncased_ab:
215
  pmid_abs[pmid] = abstract
216
  break