Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +3 -0
pipeline.py
CHANGED
@@ -200,6 +200,7 @@ def pipeline_with_gemini(accessions):
|
|
200 |
# first way: ncbi method
|
201 |
if country.lower() != "unknown":
|
202 |
stand_country = standardize_location.smart_country_lookup(country.lower())
|
|
|
203 |
if stand_country.lower() != "not found":
|
204 |
acc_score["country"][stand_country.lower()] = ["ncbi"]
|
205 |
else: acc_score["country"][country.lower()] = ["ncbi"]
|
@@ -211,6 +212,7 @@ def pipeline_with_gemini(accessions):
|
|
211 |
acc_score["sample_type"][sample_type.lower()] = ["ncbi"]
|
212 |
# second way: LLM model
|
213 |
# Preprocess the input token
|
|
|
214 |
accession, isolate = None, None
|
215 |
if acc != "unknown": accession = acc
|
216 |
if iso != "unknown": isolate = iso
|
@@ -218,6 +220,7 @@ def pipeline_with_gemini(accessions):
|
|
218 |
if doi != "unknown":
|
219 |
link = 'https://doi.org/' + doi
|
220 |
# get the file to create listOfFile for each id
|
|
|
221 |
html = extractHTML.HTML("",link)
|
222 |
jsonSM = html.getSupMaterial()
|
223 |
article_text = html.getListSection()
|
|
|
200 |
# first way: ncbi method
|
201 |
if country.lower() != "unknown":
|
202 |
stand_country = standardize_location.smart_country_lookup(country.lower())
|
203 |
+
print("stand_country: ", stand_country)
|
204 |
if stand_country.lower() != "not found":
|
205 |
acc_score["country"][stand_country.lower()] = ["ncbi"]
|
206 |
else: acc_score["country"][country.lower()] = ["ncbi"]
|
|
|
212 |
acc_score["sample_type"][sample_type.lower()] = ["ncbi"]
|
213 |
# second way: LLM model
|
214 |
# Preprocess the input token
|
215 |
+
print(acc_score)
|
216 |
accession, isolate = None, None
|
217 |
if acc != "unknown": accession = acc
|
218 |
if iso != "unknown": isolate = iso
|
|
|
220 |
if doi != "unknown":
|
221 |
link = 'https://doi.org/' + doi
|
222 |
# get the file to create listOfFile for each id
|
223 |
+
print("link of doi: ", link)
|
224 |
html = extractHTML.HTML("",link)
|
225 |
jsonSM = html.getSupMaterial()
|
226 |
article_text = html.getListSection()
|