Spaces:

VyLala
/

mtDNALocation

Running

App Files Files Community

VyLala commited on Jul 8

Commit

bdbda26

verified ·

1 Parent(s): e9baf52

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +32 -16

pipeline.py CHANGED Viewed

@@ -250,23 +250,39 @@ def pipeline_with_gemini(accessions):
       # Define local temp paths for reading/writing
       # import tempfile
       # tmp_dir = tempfile.mkdtemp()
-      tmp_dir = "/mnt/data/generated_docs"
-      os.makedirs(tmp_dir, exist_ok=True)
-      file_chunk_path = os.path.join(tmp_dir, chunk_filename)
-      file_all_path = os.path.join(tmp_dir, all_filename)
       # file_chunk_path = os.path.join(tempfile.gettempdir(), chunk_filename)
       # file_all_path = os.path.join(tempfile.gettempdir(), all_filename)
       print(file_chunk_path)
       # 🔥 Remove the local file first if it exists
-      if os.path.exists(file_chunk_path):
-        os.remove(file_chunk_path)
-        print("remove chunk path")
-      if os.path.exists(file_all_path):
-        os.remove(file_all_path)
-        print("remove all path")
       # Try to download if already exists on Drive
-      chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
-      all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
       print("chunk exist: ", chunk_exists)
       # first way: ncbi method
       print("country.lower: ",country.lower())
@@ -405,11 +421,11 @@ def pipeline_with_gemini(accessions):
             all_output = all_output[:1*1024*1024]
         print("chunk len: ", len(chunk))
         print("all output len: ", len(all_output))
-        # data_preprocess.save_text_to_docx(chunk, file_chunk_path)
-        # data_preprocess.save_text_to_docx(all_output, file_all_path)
         # Later when saving new files
-        data_preprocess.save_text_to_docx(chunk, chunk_filename, sample_folder_id)
-        data_preprocess.save_text_to_docx(all_output, all_filename, sample_folder_id)
         # Upload to Drive
         upload_file_to_drive(file_chunk_path, chunk_filename, sample_folder_id)

       # Define local temp paths for reading/writing
       # import tempfile
       # tmp_dir = tempfile.mkdtemp()
+      LOCAL_TEMP_DIR = "/mnt/data/generated_docs"
+      os.makedirs(LOCAL_TEMP_DIR, exist_ok=True)
+      file_chunk_path = os.path.join(LOCAL_TEMP_DIR, chunk_filename)
+      file_all_path = os.path.join(LOCAL_TEMP_DIR, all_filename)
       # file_chunk_path = os.path.join(tempfile.gettempdir(), chunk_filename)
       # file_all_path = os.path.join(tempfile.gettempdir(), all_filename)
       print(file_chunk_path)
+      chunk_id = find_drive_file(chunk_filename, sample_folder_id)
+      all_id = find_drive_file(all_filename, sample_folder_id)
+      if chunk_id and all_id:
+        print("✅ Files already exist in Google Drive. Downloading them...")
+        chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
+        all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
+        # Read and parse these into `chunk` and `all_output`
+      else:
+        # 🔥 Remove any stale local copies
+        if os.path.exists(file_chunk_path):
+            os.remove(file_chunk_path)
+            print(f"🗑️ Removed stale: {file_chunk_path}")
+        if os.path.exists(file_all_path):
+            os.remove(file_all_path)
+            print(f"🗑️ Removed stale: {file_all_path}")
       # 🔥 Remove the local file first if it exists
+      # if os.path.exists(file_chunk_path):
+      #   os.remove(file_chunk_path)
+      #   print("remove chunk path")
+      # if os.path.exists(file_all_path):
+      #   os.remove(file_all_path)
+      #   print("remove all path")
       # Try to download if already exists on Drive
+        chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
+        all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
       print("chunk exist: ", chunk_exists)
       # first way: ncbi method
       print("country.lower: ",country.lower())
             all_output = all_output[:1*1024*1024]
         print("chunk len: ", len(chunk))
         print("all output len: ", len(all_output))
+        data_preprocess.save_text_to_docx(chunk, file_chunk_path)
+        data_preprocess.save_text_to_docx(all_output, file_all_path)
         # Later when saving new files
+        # data_preprocess.save_text_to_docx(chunk, chunk_filename, sample_folder_id)
+        # data_preprocess.save_text_to_docx(all_output, all_filename, sample_folder_id)
         # Upload to Drive
         upload_file_to_drive(file_chunk_path, chunk_filename, sample_folder_id)