Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +8 -6
pipeline.py
CHANGED
@@ -403,6 +403,7 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
|
|
403 |
print(f"🛑 Stop processing {accession}, aborting early...")
|
404 |
return {}
|
405 |
# check doi first
|
|
|
406 |
if len(acc_score["file_all_output"]) == 0 and len(acc_score["file_chunk"]) == 0:
|
407 |
if doi != "unknown":
|
408 |
link = 'https://doi.org/' + doi
|
@@ -445,14 +446,15 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
|
|
445 |
links = unique_preserve_order(links)
|
446 |
acc_score["source"] = links
|
447 |
else:
|
|
|
448 |
try:
|
449 |
temp_source = False
|
450 |
if save_df is not None and not save_df.empty:
|
451 |
print("save df not none")
|
452 |
-
print(str(
|
453 |
print(str(all_filename))
|
454 |
-
if str(
|
455 |
-
link = save_df.loc[save_df["file_chunk"]==str(
|
456 |
#link = row["Sources"].iloc[0]
|
457 |
if "http" in link:
|
458 |
print("yeah http in save df source")
|
@@ -558,9 +560,9 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
|
|
558 |
if not chunk and not all_output:
|
559 |
print("not chunk and all output")
|
560 |
# else: check if we can reuse these chunk and all output of existed accession to find another
|
561 |
-
if str(
|
562 |
-
print("first time have chunk path: ", str(
|
563 |
-
acc_score["file_chunk"] = str(
|
564 |
if str(all_filename) != "":
|
565 |
print("first time have all path: ", str(all_filename))
|
566 |
acc_score["file_all_output"] = str(all_filename)
|
|
|
403 |
print(f"🛑 Stop processing {accession}, aborting early...")
|
404 |
return {}
|
405 |
# check doi first
|
406 |
+
print("acc sscore for file all output and chunk: ", acc_score["file_all_output"], acc_score["file_chunk"])
|
407 |
if len(acc_score["file_all_output"]) == 0 and len(acc_score["file_chunk"]) == 0:
|
408 |
if doi != "unknown":
|
409 |
link = 'https://doi.org/' + doi
|
|
|
446 |
links = unique_preserve_order(links)
|
447 |
acc_score["source"] = links
|
448 |
else:
|
449 |
+
print("no chunk or all output")
|
450 |
try:
|
451 |
temp_source = False
|
452 |
if save_df is not None and not save_df.empty:
|
453 |
print("save df not none")
|
454 |
+
print(str(chunk_filename))
|
455 |
print(str(all_filename))
|
456 |
+
if str(chunk_filename) != "":
|
457 |
+
link = save_df.loc[save_df["file_chunk"]==str(chunk_filename),"Sources"].iloc[0]
|
458 |
#link = row["Sources"].iloc[0]
|
459 |
if "http" in link:
|
460 |
print("yeah http in save df source")
|
|
|
560 |
if not chunk and not all_output:
|
561 |
print("not chunk and all output")
|
562 |
# else: check if we can reuse these chunk and all output of existed accession to find another
|
563 |
+
if str(chunk_filename) != "":
|
564 |
+
print("first time have chunk path: ", str(chunk_filename))
|
565 |
+
acc_score["file_chunk"] = str(chunk_filename)
|
566 |
if str(all_filename) != "":
|
567 |
print("first time have all path: ", str(all_filename))
|
568 |
acc_score["file_all_output"] = str(all_filename)
|