Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +48 -20
pipeline.py
CHANGED
@@ -155,15 +155,46 @@ def download_drive_file_content(file_id):
|
|
155 |
# if isinstance(result, Exception):
|
156 |
# raise result
|
157 |
# return True, result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
def run_with_timeout(func, args=(), kwargs={}, timeout=30):
|
159 |
-
|
160 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
161 |
-
future = executor.submit(func, *args, **kwargs)
|
162 |
try:
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
def time_it(func, *args, **kwargs):
|
169 |
"""
|
@@ -354,18 +385,15 @@ def pipeline_with_gemini(accessions,niche_cases=None):
|
|
354 |
print("tem link before filtering: ", tem_links)
|
355 |
# filter the quality link
|
356 |
print("saveLinkFolder as sample folder id: ", sample_folder_id)
|
357 |
-
print("start the smart filter link")
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
#
|
366 |
-
# except:
|
367 |
-
# links = tem_links
|
368 |
-
links = smart_fallback.filter_links_by_metadata(tem_links, saveLinkFolder=sample_folder_id, accession=acc)
|
369 |
print("this is links: ",links)
|
370 |
links = unique_preserve_order(links)
|
371 |
acc_score["source"] = links
|
@@ -410,7 +438,7 @@ def pipeline_with_gemini(accessions,niche_cases=None):
|
|
410 |
if iso != "unknown": query_kw = iso
|
411 |
else: query_kw = acc
|
412 |
#text_link, tables_link, final_input_link = data_preprocess.preprocess_document(link,saveLinkFolder, isolate=query_kw)
|
413 |
-
success_process, output_process = run_with_timeout(data_preprocess.preprocess_document,args=(link,sample_folder_id),kwargs={"isolate":query_kw,"accession":acc},timeout=
|
414 |
if success_process:
|
415 |
text_link, tables_link, final_input_link = output_process[0], output_process[1], output_process[2]
|
416 |
print("yes succeed for process document")
|
|
|
155 |
# if isinstance(result, Exception):
|
156 |
# raise result
|
157 |
# return True, result
|
158 |
+
# def run_with_timeout(func, args=(), kwargs={}, timeout=30):
|
159 |
+
# import concurrent.futures
|
160 |
+
# with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
161 |
+
# future = executor.submit(func, *args, **kwargs)
|
162 |
+
# try:
|
163 |
+
# return True, future.result(timeout=timeout)
|
164 |
+
# except concurrent.futures.TimeoutError:
|
165 |
+
# print(f"⏱️ Timeout exceeded ({timeout} sec) — function killed.")
|
166 |
+
# return False, None
|
167 |
+
|
168 |
+
import multiprocessing
|
169 |
+
|
170 |
def run_with_timeout(func, args=(), kwargs={}, timeout=30):
|
171 |
+
def wrapper(q, *args, **kwargs):
|
|
|
|
|
172 |
try:
|
173 |
+
result = func(*args, **kwargs)
|
174 |
+
q.put((True, result))
|
175 |
+
except Exception as e:
|
176 |
+
q.put((False, e))
|
177 |
+
|
178 |
+
q = multiprocessing.Queue()
|
179 |
+
p = multiprocessing.Process(target=wrapper, args=(q, *args), kwargs=kwargs)
|
180 |
+
p.start()
|
181 |
+
p.join(timeout)
|
182 |
+
|
183 |
+
if p.is_alive():
|
184 |
+
p.terminate()
|
185 |
+
p.join()
|
186 |
+
print(f"⏱️ Timeout exceeded ({timeout} sec) — function killed.")
|
187 |
+
return False, None
|
188 |
+
|
189 |
+
if not q.empty():
|
190 |
+
success, result = q.get()
|
191 |
+
if success:
|
192 |
+
return True, result
|
193 |
+
else:
|
194 |
+
raise result # re-raise exception if needed
|
195 |
+
|
196 |
+
return False, None
|
197 |
+
|
198 |
|
199 |
def time_it(func, *args, **kwargs):
|
200 |
"""
|
|
|
385 |
print("tem link before filtering: ", tem_links)
|
386 |
# filter the quality link
|
387 |
print("saveLinkFolder as sample folder id: ", sample_folder_id)
|
388 |
+
print("start the smart filter link")
|
389 |
+
success_process, output_process = run_with_timeout(smart_fallback.filter_links_by_metadata,args=(tem_links,sample_folder_id),kwargs={"accession":acc})
|
390 |
+
if success_process:
|
391 |
+
links = output_process
|
392 |
+
print("yes succeed for smart filter link")
|
393 |
+
else:
|
394 |
+
print("no suceed, fallback to all tem links")
|
395 |
+
links = tem_links
|
396 |
+
#links = smart_fallback.filter_links_by_metadata(tem_links, saveLinkFolder=sample_folder_id, accession=acc)
|
|
|
|
|
|
|
397 |
print("this is links: ",links)
|
398 |
links = unique_preserve_order(links)
|
399 |
acc_score["source"] = links
|
|
|
438 |
if iso != "unknown": query_kw = iso
|
439 |
else: query_kw = acc
|
440 |
#text_link, tables_link, final_input_link = data_preprocess.preprocess_document(link,saveLinkFolder, isolate=query_kw)
|
441 |
+
success_process, output_process = run_with_timeout(data_preprocess.preprocess_document,args=(link,sample_folder_id),kwargs={"isolate":query_kw,"accession":acc},timeout=100)
|
442 |
if success_process:
|
443 |
text_link, tables_link, final_input_link = output_process[0], output_process[1], output_process[2]
|
444 |
print("yes succeed for process document")
|