VyLala commited on
Commit
bdbda26
·
verified ·
1 Parent(s): e9baf52

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +32 -16
pipeline.py CHANGED
@@ -250,23 +250,39 @@ def pipeline_with_gemini(accessions):
250
  # Define local temp paths for reading/writing
251
  # import tempfile
252
  # tmp_dir = tempfile.mkdtemp()
253
- tmp_dir = "/mnt/data/generated_docs"
254
- os.makedirs(tmp_dir, exist_ok=True)
255
- file_chunk_path = os.path.join(tmp_dir, chunk_filename)
256
- file_all_path = os.path.join(tmp_dir, all_filename)
257
  # file_chunk_path = os.path.join(tempfile.gettempdir(), chunk_filename)
258
  # file_all_path = os.path.join(tempfile.gettempdir(), all_filename)
259
  print(file_chunk_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  # 🔥 Remove the local file first if it exists
261
- if os.path.exists(file_chunk_path):
262
- os.remove(file_chunk_path)
263
- print("remove chunk path")
264
- if os.path.exists(file_all_path):
265
- os.remove(file_all_path)
266
- print("remove all path")
267
  # Try to download if already exists on Drive
268
- chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
269
- all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
270
  print("chunk exist: ", chunk_exists)
271
  # first way: ncbi method
272
  print("country.lower: ",country.lower())
@@ -405,11 +421,11 @@ def pipeline_with_gemini(accessions):
405
  all_output = all_output[:1*1024*1024]
406
  print("chunk len: ", len(chunk))
407
  print("all output len: ", len(all_output))
408
- # data_preprocess.save_text_to_docx(chunk, file_chunk_path)
409
- # data_preprocess.save_text_to_docx(all_output, file_all_path)
410
  # Later when saving new files
411
- data_preprocess.save_text_to_docx(chunk, chunk_filename, sample_folder_id)
412
- data_preprocess.save_text_to_docx(all_output, all_filename, sample_folder_id)
413
 
414
  # Upload to Drive
415
  upload_file_to_drive(file_chunk_path, chunk_filename, sample_folder_id)
 
250
  # Define local temp paths for reading/writing
251
  # import tempfile
252
  # tmp_dir = tempfile.mkdtemp()
253
+ LOCAL_TEMP_DIR = "/mnt/data/generated_docs"
254
+ os.makedirs(LOCAL_TEMP_DIR, exist_ok=True)
255
+ file_chunk_path = os.path.join(LOCAL_TEMP_DIR, chunk_filename)
256
+ file_all_path = os.path.join(LOCAL_TEMP_DIR, all_filename)
257
  # file_chunk_path = os.path.join(tempfile.gettempdir(), chunk_filename)
258
  # file_all_path = os.path.join(tempfile.gettempdir(), all_filename)
259
  print(file_chunk_path)
260
+ chunk_id = find_drive_file(chunk_filename, sample_folder_id)
261
+ all_id = find_drive_file(all_filename, sample_folder_id)
262
+
263
+ if chunk_id and all_id:
264
+ print("✅ Files already exist in Google Drive. Downloading them...")
265
+ chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
266
+ all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
267
+ # Read and parse these into `chunk` and `all_output`
268
+ else:
269
+ # 🔥 Remove any stale local copies
270
+ if os.path.exists(file_chunk_path):
271
+ os.remove(file_chunk_path)
272
+ print(f"🗑️ Removed stale: {file_chunk_path}")
273
+ if os.path.exists(file_all_path):
274
+ os.remove(file_all_path)
275
+ print(f"🗑️ Removed stale: {file_all_path}")
276
  # 🔥 Remove the local file first if it exists
277
+ # if os.path.exists(file_chunk_path):
278
+ # os.remove(file_chunk_path)
279
+ # print("remove chunk path")
280
+ # if os.path.exists(file_all_path):
281
+ # os.remove(file_all_path)
282
+ # print("remove all path")
283
  # Try to download if already exists on Drive
284
+ chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
285
+ all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
286
  print("chunk exist: ", chunk_exists)
287
  # first way: ncbi method
288
  print("country.lower: ",country.lower())
 
421
  all_output = all_output[:1*1024*1024]
422
  print("chunk len: ", len(chunk))
423
  print("all output len: ", len(all_output))
424
+ data_preprocess.save_text_to_docx(chunk, file_chunk_path)
425
+ data_preprocess.save_text_to_docx(all_output, file_all_path)
426
  # Later when saving new files
427
+ # data_preprocess.save_text_to_docx(chunk, chunk_filename, sample_folder_id)
428
+ # data_preprocess.save_text_to_docx(all_output, all_filename, sample_folder_id)
429
 
430
  # Upload to Drive
431
  upload_file_to_drive(file_chunk_path, chunk_filename, sample_folder_id)