VyLala commited on
Commit
72ec8ca
·
verified ·
1 Parent(s): b29bcf1

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +3 -0
pipeline.py CHANGED
@@ -200,6 +200,7 @@ def pipeline_with_gemini(accessions):
200
  # first way: ncbi method
201
  if country.lower() != "unknown":
202
  stand_country = standardize_location.smart_country_lookup(country.lower())
 
203
  if stand_country.lower() != "not found":
204
  acc_score["country"][stand_country.lower()] = ["ncbi"]
205
  else: acc_score["country"][country.lower()] = ["ncbi"]
@@ -211,6 +212,7 @@ def pipeline_with_gemini(accessions):
211
  acc_score["sample_type"][sample_type.lower()] = ["ncbi"]
212
  # second way: LLM model
213
  # Preprocess the input token
 
214
  accession, isolate = None, None
215
  if acc != "unknown": accession = acc
216
  if iso != "unknown": isolate = iso
@@ -218,6 +220,7 @@ def pipeline_with_gemini(accessions):
218
  if doi != "unknown":
219
  link = 'https://doi.org/' + doi
220
  # get the file to create listOfFile for each id
 
221
  html = extractHTML.HTML("",link)
222
  jsonSM = html.getSupMaterial()
223
  article_text = html.getListSection()
 
200
  # first way: ncbi method
201
  if country.lower() != "unknown":
202
  stand_country = standardize_location.smart_country_lookup(country.lower())
203
+ print("stand_country: ", stand_country)
204
  if stand_country.lower() != "not found":
205
  acc_score["country"][stand_country.lower()] = ["ncbi"]
206
  else: acc_score["country"][country.lower()] = ["ncbi"]
 
212
  acc_score["sample_type"][sample_type.lower()] = ["ncbi"]
213
  # second way: LLM model
214
  # Preprocess the input token
215
+ print(acc_score)
216
  accession, isolate = None, None
217
  if acc != "unknown": accession = acc
218
  if iso != "unknown": isolate = iso
 
220
  if doi != "unknown":
221
  link = 'https://doi.org/' + doi
222
  # get the file to create listOfFile for each id
223
+ print("link of doi: ", link)
224
  html = extractHTML.HTML("",link)
225
  jsonSM = html.getSupMaterial()
226
  article_text = html.getListSection()