VyLala commited on
Commit
46c6480
·
verified ·
1 Parent(s): 9dd2023

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +8 -6
pipeline.py CHANGED
@@ -403,6 +403,7 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
403
  print(f"🛑 Stop processing {accession}, aborting early...")
404
  return {}
405
  # check doi first
 
406
  if len(acc_score["file_all_output"]) == 0 and len(acc_score["file_chunk"]) == 0:
407
  if doi != "unknown":
408
  link = 'https://doi.org/' + doi
@@ -445,14 +446,15 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
445
  links = unique_preserve_order(links)
446
  acc_score["source"] = links
447
  else:
 
448
  try:
449
  temp_source = False
450
  if save_df is not None and not save_df.empty:
451
  print("save df not none")
452
- print(str(chunks_filename))
453
  print(str(all_filename))
454
- if str(chunks_filename) != "":
455
- link = save_df.loc[save_df["file_chunk"]==str(chunks_filename),"Sources"].iloc[0]
456
  #link = row["Sources"].iloc[0]
457
  if "http" in link:
458
  print("yeah http in save df source")
@@ -558,9 +560,9 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
558
  if not chunk and not all_output:
559
  print("not chunk and all output")
560
  # else: check if we can reuse these chunk and all output of existed accession to find another
561
- if str(chunks_filename) != "":
562
- print("first time have chunk path: ", str(chunks_filename))
563
- acc_score["file_chunk"] = str(chunks_filename)
564
  if str(all_filename) != "":
565
  print("first time have all path: ", str(all_filename))
566
  acc_score["file_all_output"] = str(all_filename)
 
403
  print(f"🛑 Stop processing {accession}, aborting early...")
404
  return {}
405
  # check doi first
406
+ print("acc sscore for file all output and chunk: ", acc_score["file_all_output"], acc_score["file_chunk"])
407
  if len(acc_score["file_all_output"]) == 0 and len(acc_score["file_chunk"]) == 0:
408
  if doi != "unknown":
409
  link = 'https://doi.org/' + doi
 
446
  links = unique_preserve_order(links)
447
  acc_score["source"] = links
448
  else:
449
+ print("no chunk or all output")
450
  try:
451
  temp_source = False
452
  if save_df is not None and not save_df.empty:
453
  print("save df not none")
454
+ print(str(chunk_filename))
455
  print(str(all_filename))
456
+ if str(chunk_filename) != "":
457
+ link = save_df.loc[save_df["file_chunk"]==str(chunk_filename),"Sources"].iloc[0]
458
  #link = row["Sources"].iloc[0]
459
  if "http" in link:
460
  print("yeah http in save df source")
 
560
  if not chunk and not all_output:
561
  print("not chunk and all output")
562
  # else: check if we can reuse these chunk and all output of existed accession to find another
563
+ if str(chunk_filename) != "":
564
+ print("first time have chunk path: ", str(chunk_filename))
565
+ acc_score["file_chunk"] = str(chunk_filename)
566
  if str(all_filename) != "":
567
  print("first time have all path: ", str(all_filename))
568
  acc_score["file_all_output"] = str(all_filename)