VyLala commited on
Commit
632fff4
·
verified ·
1 Parent(s): 488822a

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +9 -16
pipeline.py CHANGED
@@ -76,7 +76,10 @@ def find_drive_file(filename, parent_id):
76
  # return file["id"]
77
  def upload_file_to_drive(local_path, remote_name, folder_id):
78
  try:
79
- # Check if file already exists and delete it
 
 
 
80
  existing = drive_service.files().list(
81
  q=f"name='{remote_name}' and '{folder_id}' in parents and trashed = false",
82
  fields="files(id)"
@@ -86,7 +89,6 @@ def upload_file_to_drive(local_path, remote_name, folder_id):
86
  drive_service.files().delete(fileId=existing[0]["id"]).execute()
87
  print(f"🗑️ Deleted existing '{remote_name}' in Drive folder {folder_id}")
88
 
89
- # Now upload the file
90
  file_metadata = {"name": remote_name, "parents": [folder_id]}
91
  media = MediaFileUpload(local_path, resumable=True)
92
  file = drive_service.files().create(
@@ -94,19 +96,8 @@ def upload_file_to_drive(local_path, remote_name, folder_id):
94
  media_body=media,
95
  fields="id"
96
  ).execute()
97
- print(f"✅ Uploaded '{remote_name}' to Google Drive folder ID: {folder_id}")
98
-
99
- # Optional verification after upload
100
- uploaded = drive_service.files().list(
101
- q=f"name='{remote_name}' and '{folder_id}' in parents and trashed = false",
102
- fields="files(id, name)"
103
- ).execute().get("files", [])
104
-
105
- if not uploaded:
106
- print(f"❌ Upload verification failed: '{remote_name}' not found after upload.")
107
- else:
108
- print(f"✅ Verified upload: {remote_name} (ID: {uploaded[0]['id']})")
109
 
 
110
  return file["id"]
111
 
112
  except Exception as e:
@@ -257,8 +248,10 @@ def pipeline_with_gemini(accessions):
257
  all_filename = f"{saveName}_all_merged_document.docx"
258
  print(chunk_filename, all_filename)
259
  # Define local temp paths for reading/writing
260
- import tempfile
261
- tmp_dir = tempfile.mkdtemp()
 
 
262
  file_chunk_path = os.path.join(tmp_dir, chunk_filename)
263
  file_all_path = os.path.join(tmp_dir, all_filename)
264
  # file_chunk_path = os.path.join(tempfile.gettempdir(), chunk_filename)
 
76
  # return file["id"]
77
  def upload_file_to_drive(local_path, remote_name, folder_id):
78
  try:
79
+ if not os.path.exists(local_path):
80
+ raise FileNotFoundError(f"❌ Local file does not exist: {local_path}")
81
+
82
+ # Delete existing file on Drive if present
83
  existing = drive_service.files().list(
84
  q=f"name='{remote_name}' and '{folder_id}' in parents and trashed = false",
85
  fields="files(id)"
 
89
  drive_service.files().delete(fileId=existing[0]["id"]).execute()
90
  print(f"🗑️ Deleted existing '{remote_name}' in Drive folder {folder_id}")
91
 
 
92
  file_metadata = {"name": remote_name, "parents": [folder_id]}
93
  media = MediaFileUpload(local_path, resumable=True)
94
  file = drive_service.files().create(
 
96
  media_body=media,
97
  fields="id"
98
  ).execute()
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ print(f"✅ Uploaded '{remote_name}' to Google Drive folder ID: {folder_id}")
101
  return file["id"]
102
 
103
  except Exception as e:
 
248
  all_filename = f"{saveName}_all_merged_document.docx"
249
  print(chunk_filename, all_filename)
250
  # Define local temp paths for reading/writing
251
+ # import tempfile
252
+ # tmp_dir = tempfile.mkdtemp()
253
+ tmp_dir = "/mnt/data/generated_docs"
254
+ os.makedirs(tmp_dir, exist_ok=True)
255
  file_chunk_path = os.path.join(tmp_dir, chunk_filename)
256
  file_all_path = os.path.join(tmp_dir, all_filename)
257
  # file_chunk_path = os.path.join(tempfile.gettempdir(), chunk_filename)