VyLala commited on
Commit
4ed9bfa
·
verified ·
1 Parent(s): a65b531

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +48 -20
pipeline.py CHANGED
@@ -155,15 +155,46 @@ def download_drive_file_content(file_id):
155
  # if isinstance(result, Exception):
156
  # raise result
157
  # return True, result
 
 
 
 
 
 
 
 
 
 
 
 
158
  def run_with_timeout(func, args=(), kwargs={}, timeout=30):
159
- import concurrent.futures
160
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
161
- future = executor.submit(func, *args, **kwargs)
162
  try:
163
- return True, future.result(timeout=timeout)
164
- except concurrent.futures.TimeoutError:
165
- print(f"⏱️ Timeout exceeded ({timeout} sec) — function killed.")
166
- return False, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  def time_it(func, *args, **kwargs):
169
  """
@@ -354,18 +385,15 @@ def pipeline_with_gemini(accessions,niche_cases=None):
354
  print("tem link before filtering: ", tem_links)
355
  # filter the quality link
356
  print("saveLinkFolder as sample folder id: ", sample_folder_id)
357
- print("start the smart filter link")
358
- # try:
359
- # success_process, output_process = run_with_timeout(smart_fallback.filter_links_by_metadata,args=(tem_links,sample_folder_id),kwargs={"accession":acc})
360
- # if success_process:
361
- # links = output_process
362
- # print("yes succeed for smart filter link")
363
- # else:
364
- # print("no suceed, fallback to all tem links")
365
- # links = tem_links
366
- # except:
367
- # links = tem_links
368
- links = smart_fallback.filter_links_by_metadata(tem_links, saveLinkFolder=sample_folder_id, accession=acc)
369
  print("this is links: ",links)
370
  links = unique_preserve_order(links)
371
  acc_score["source"] = links
@@ -410,7 +438,7 @@ def pipeline_with_gemini(accessions,niche_cases=None):
410
  if iso != "unknown": query_kw = iso
411
  else: query_kw = acc
412
  #text_link, tables_link, final_input_link = data_preprocess.preprocess_document(link,saveLinkFolder, isolate=query_kw)
413
- success_process, output_process = run_with_timeout(data_preprocess.preprocess_document,args=(link,sample_folder_id),kwargs={"isolate":query_kw,"accession":acc},timeout=180)
414
  if success_process:
415
  text_link, tables_link, final_input_link = output_process[0], output_process[1], output_process[2]
416
  print("yes succeed for process document")
 
155
  # if isinstance(result, Exception):
156
  # raise result
157
  # return True, result
158
+ # def run_with_timeout(func, args=(), kwargs={}, timeout=30):
159
+ # import concurrent.futures
160
+ # with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
161
+ # future = executor.submit(func, *args, **kwargs)
162
+ # try:
163
+ # return True, future.result(timeout=timeout)
164
+ # except concurrent.futures.TimeoutError:
165
+ # print(f"⏱️ Timeout exceeded ({timeout} sec) — function killed.")
166
+ # return False, None
167
+
168
+ import multiprocessing
169
+
170
  def run_with_timeout(func, args=(), kwargs={}, timeout=30):
171
+ def wrapper(q, *args, **kwargs):
 
 
172
  try:
173
+ result = func(*args, **kwargs)
174
+ q.put((True, result))
175
+ except Exception as e:
176
+ q.put((False, e))
177
+
178
+ q = multiprocessing.Queue()
179
+ p = multiprocessing.Process(target=wrapper, args=(q, *args), kwargs=kwargs)
180
+ p.start()
181
+ p.join(timeout)
182
+
183
+ if p.is_alive():
184
+ p.terminate()
185
+ p.join()
186
+ print(f"⏱️ Timeout exceeded ({timeout} sec) — function killed.")
187
+ return False, None
188
+
189
+ if not q.empty():
190
+ success, result = q.get()
191
+ if success:
192
+ return True, result
193
+ else:
194
+ raise result # re-raise exception if needed
195
+
196
+ return False, None
197
+
198
 
199
  def time_it(func, *args, **kwargs):
200
  """
 
385
  print("tem link before filtering: ", tem_links)
386
  # filter the quality link
387
  print("saveLinkFolder as sample folder id: ", sample_folder_id)
388
+ print("start the smart filter link")
389
+ success_process, output_process = run_with_timeout(smart_fallback.filter_links_by_metadata,args=(tem_links,sample_folder_id),kwargs={"accession":acc})
390
+ if success_process:
391
+ links = output_process
392
+ print("yes succeed for smart filter link")
393
+ else:
394
+ print("no suceed, fallback to all tem links")
395
+ links = tem_links
396
+ #links = smart_fallback.filter_links_by_metadata(tem_links, saveLinkFolder=sample_folder_id, accession=acc)
 
 
 
397
  print("this is links: ",links)
398
  links = unique_preserve_order(links)
399
  acc_score["source"] = links
 
438
  if iso != "unknown": query_kw = iso
439
  else: query_kw = acc
440
  #text_link, tables_link, final_input_link = data_preprocess.preprocess_document(link,saveLinkFolder, isolate=query_kw)
441
+ success_process, output_process = run_with_timeout(data_preprocess.preprocess_document,args=(link,sample_folder_id),kwargs={"isolate":query_kw,"accession":acc},timeout=100)
442
  if success_process:
443
  text_link, tables_link, final_input_link = output_process[0], output_process[1], output_process[2]
444
  print("yes succeed for process document")