Spaces:
Sleeping
Sleeping
wzkariampuzha
commited on
Commit
•
f51313e
1
Parent(s):
d548708
Update epi_pipeline.py
Browse files- epi_pipeline.py +6 -4
epi_pipeline.py
CHANGED
@@ -73,7 +73,7 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
|
|
73 |
query = term[:-3]
|
74 |
|
75 |
## get pmid results from searching for disease name through PubMed API
|
76 |
-
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(maxResults)
|
77 |
r = requests.get(url)
|
78 |
root = ET.fromstring(r.content)
|
79 |
|
@@ -129,7 +129,8 @@ def search_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:int
|
|
129 |
#do filtering here
|
130 |
if filtering == 'strict':
|
131 |
uncased_ab = abstract.lower()
|
132 |
-
|
|
|
133 |
if term.lower() in uncased_ab:
|
134 |
pmid_abs[pmid] = abstract
|
135 |
break
|
@@ -169,7 +170,7 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
|
|
169 |
for word in dz_words:
|
170 |
term += word + '%20'
|
171 |
query = term[:-3]
|
172 |
-
|
173 |
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
|
174 |
r = requests.get(url)
|
175 |
root = ET.fromstring(r.content)
|
@@ -208,7 +209,8 @@ def streamlit_getAbs(searchterm_list:Union[List[str],List[int],str], maxResults:
|
|
208 |
#do filtering here
|
209 |
if filtering == 'strict':
|
210 |
uncased_ab = abstract.lower()
|
211 |
-
|
|
|
212 |
if term.lower() in uncased_ab:
|
213 |
pmid_abs[pmid] = abstract
|
214 |
break
|
|
|
73 |
query = term[:-3]
|
74 |
|
75 |
## get pmid results from searching for disease name through PubMed API
|
76 |
+
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
|
77 |
r = requests.get(url)
|
78 |
root = ET.fromstring(r.content)
|
79 |
|
|
|
129 |
#do filtering here
|
130 |
if filtering == 'strict':
|
131 |
uncased_ab = abstract.lower()
|
132 |
+
#Reversing the list hopefully cuts down on the number of if statements bc the search terms are ordered longest to shortest and shorter terms are more likely to be in the abstract
|
133 |
+
for term in reversed(searchterm_list):
|
134 |
if term.lower() in uncased_ab:
|
135 |
pmid_abs[pmid] = abstract
|
136 |
break
|
|
|
170 |
for word in dz_words:
|
171 |
term += word + '%20'
|
172 |
query = term[:-3]
|
173 |
+
#dividing by the len( of the search_ter
|
174 |
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='+query+"&retmax="+str(int(maxResults/len(searchterm_list)))
|
175 |
r = requests.get(url)
|
176 |
root = ET.fromstring(r.content)
|
|
|
209 |
#do filtering here
|
210 |
if filtering == 'strict':
|
211 |
uncased_ab = abstract.lower()
|
212 |
+
#Reversing the list hopefully cuts down on the number of if statements bc the search terms are ordered longest to shortest and shorter terms are more likely to be in the abstract
|
213 |
+
for term in reversed(searchterm_list):
|
214 |
if term.lower() in uncased_ab:
|
215 |
pmid_abs[pmid] = abstract
|
216 |
break
|