Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +54 -9
pipeline.py
CHANGED
@@ -61,14 +61,58 @@ def find_drive_file(filename, parent_id):
|
|
61 |
return None
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def upload_file_to_drive(local_path, remote_name, folder_id):
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
def download_file_from_drive(remote_name, folder_id, local_path):
|
74 |
results = drive_service.files().list(q=f"name='{remote_name}' and '{folder_id}' in parents", fields="files(id)").execute()
|
@@ -223,10 +267,10 @@ def pipeline_with_gemini(accessions):
|
|
223 |
# π₯ Remove the local file first if it exists
|
224 |
if os.path.exists(file_chunk_path):
|
225 |
os.remove(file_chunk_path)
|
226 |
-
|
227 |
if os.path.exists(file_all_path):
|
228 |
-
|
229 |
os.remove(file_all_path)
|
|
|
230 |
# Try to download if already exists on Drive
|
231 |
chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
|
232 |
all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
|
@@ -378,6 +422,7 @@ def pipeline_with_gemini(accessions):
|
|
378 |
upload_file_to_drive(file_chunk_path, chunk_filename, sample_folder_id)
|
379 |
upload_file_to_drive(file_all_path, all_filename, sample_folder_id)
|
380 |
print("here 1")
|
|
|
381 |
# else:
|
382 |
# final_input = ""
|
383 |
# if all_output:
|
|
|
61 |
return None
|
62 |
|
63 |
|
64 |
+
# def upload_file_to_drive(local_path, remote_name, folder_id):
|
65 |
+
# file_metadata = {"name": remote_name, "parents": [folder_id]}
|
66 |
+
# media = MediaFileUpload(local_path, resumable=True)
|
67 |
+
# existing = drive_service.files().list(q=f"name='{remote_name}' and '{folder_id}' in parents", fields="files(id)").execute().get("files", [])
|
68 |
+
# if existing:
|
69 |
+
# drive_service.files().delete(fileId=existing[0]["id"]).execute()
|
70 |
+
# file = drive_service.files().create(body=file_metadata, media_body=media, fields="id").execute()
|
71 |
+
# result = drive_service.files().list(q=f"name='{remote_name}' and '{folder_id}' in parents", fields="files(id)").execute()
|
72 |
+
# if not result.get("files"):
|
73 |
+
# print(f"β Upload failed: File '{remote_name}' not found in folder after upload.")
|
74 |
+
# else:
|
75 |
+
# print(f"β
Verified upload: {remote_name}")
|
76 |
+
# return file["id"]
|
77 |
def upload_file_to_drive(local_path, remote_name, folder_id):
|
78 |
+
try:
|
79 |
+
# Check if file already exists and delete it
|
80 |
+
existing = drive_service.files().list(
|
81 |
+
q=f"name='{remote_name}' and '{folder_id}' in parents and trashed = false",
|
82 |
+
fields="files(id)"
|
83 |
+
).execute().get("files", [])
|
84 |
+
|
85 |
+
if existing:
|
86 |
+
drive_service.files().delete(fileId=existing[0]["id"]).execute()
|
87 |
+
print(f"ποΈ Deleted existing '{remote_name}' in Drive folder {folder_id}")
|
88 |
+
|
89 |
+
# Now upload the file
|
90 |
+
file_metadata = {"name": remote_name, "parents": [folder_id]}
|
91 |
+
media = MediaFileUpload(local_path, resumable=True)
|
92 |
+
file = drive_service.files().create(
|
93 |
+
body=file_metadata,
|
94 |
+
media_body=media,
|
95 |
+
fields="id"
|
96 |
+
).execute()
|
97 |
+
print(f"β
Uploaded '{remote_name}' to Google Drive folder ID: {folder_id}")
|
98 |
+
|
99 |
+
# Optional verification after upload
|
100 |
+
uploaded = drive_service.files().list(
|
101 |
+
q=f"name='{remote_name}' and '{folder_id}' in parents and trashed = false",
|
102 |
+
fields="files(id, name)"
|
103 |
+
).execute().get("files", [])
|
104 |
+
|
105 |
+
if not uploaded:
|
106 |
+
print(f"β Upload verification failed: '{remote_name}' not found after upload.")
|
107 |
+
else:
|
108 |
+
print(f"β
Verified upload: {remote_name} (ID: {uploaded[0]['id']})")
|
109 |
+
|
110 |
+
return file["id"]
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
print(f"β Error during upload: {e}")
|
114 |
+
return None
|
115 |
+
|
116 |
|
117 |
def download_file_from_drive(remote_name, folder_id, local_path):
|
118 |
results = drive_service.files().list(q=f"name='{remote_name}' and '{folder_id}' in parents", fields="files(id)").execute()
|
|
|
267 |
# π₯ Remove the local file first if it exists
|
268 |
if os.path.exists(file_chunk_path):
|
269 |
os.remove(file_chunk_path)
|
270 |
+
print("remove chunk path")
|
271 |
if os.path.exists(file_all_path):
|
|
|
272 |
os.remove(file_all_path)
|
273 |
+
print("remove all path")
|
274 |
# Try to download if already exists on Drive
|
275 |
chunk_exists = download_file_from_drive(chunk_filename, sample_folder_id, file_chunk_path)
|
276 |
all_exists = download_file_from_drive(all_filename, sample_folder_id, file_all_path)
|
|
|
422 |
upload_file_to_drive(file_chunk_path, chunk_filename, sample_folder_id)
|
423 |
upload_file_to_drive(file_all_path, all_filename, sample_folder_id)
|
424 |
print("here 1")
|
425 |
+
|
426 |
# else:
|
427 |
# final_input = ""
|
428 |
# if all_output:
|