libokj commited on
Commit
3d30dc9
·
verified ·
1 Parent(s): c72f721

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -30
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import concurrent.futures
2
  import glob
3
  import smtplib
4
- from datetime import datetime
5
  import itertools
6
  import textwrap
7
  from email.mime.multipart import MIMEMultipart
@@ -52,9 +52,9 @@ from deepscreen.predict import predict
52
  sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
53
  import sascorer
54
 
55
- UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
56
  DATASET_MAX_LEN = 10_000
57
  SERVER_DATA_DIR = os.getenv('DATA') # '/data'
 
58
 
59
  CSS = """
60
  .help-tip {
@@ -244,20 +244,32 @@ for job in jobs:
244
  scheduler = BackgroundScheduler()
245
 
246
 
 
 
 
 
 
 
 
 
 
 
247
  def check_expiry():
248
  Job = Query()
249
  jobs = db.all()
250
 
251
  for job in jobs:
252
  # Check if the job has expired
253
- if job['expiry_time'] < time():
254
- # Delete the job from the database
255
- db.remove(Job.id == job['id'])
256
- # Delete the corresponding file
257
- files = glob.glob(f"/data/{job['id']}*")
258
- for file_path in files:
259
- if os.path.exists(file_path):
260
- os.remove(file_path)
 
 
261
  elif job['status'] == 'RUNNING' and time() - job['start_time'] > 4 * 60 * 60: # 4 hours
262
  # Mark the job as failed
263
  db.update({'status': 'FAILED',
@@ -699,7 +711,7 @@ def submit_predict(predict_filepath, task, preset, target_family, state):
699
  job_query = (Job.id == job_id)
700
 
701
  end_time = time()
702
- expiry_time = end_time + 48 * 60 * 60 # Add 48 hours
703
 
704
  db.update({'end_time': end_time,
705
  'expiry_time': expiry_time,
@@ -758,7 +770,7 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
758
  raw_df: df,
759
  report_df: df.copy(),
760
  analyze_btn: gr.Button(interactive=True),
761
- report_task: gr.Dropdown(value=task)} # pie_chart
762
  else:
763
  return {analyze_btn: gr.Button(interactive=False)}
764
 
@@ -1567,7 +1579,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1567
  with gr.Column():
1568
  file_for_report = gr.File(interactive=True, type='filepath')
1569
  report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
1570
- label='Specify the Task for the Labels in the Upload Dataset')
1571
  raw_df = gr.State(value=pd.DataFrame())
1572
  report_df = gr.State(value=pd.DataFrame())
1573
  scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
@@ -1660,6 +1672,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1660
 
1661
 
1662
  def uniprot_query(input_type, uid, gene, organism='Human'):
 
1663
  fasta_rec = ''
1664
 
1665
  match input_type:
@@ -1670,11 +1683,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1670
  query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
1671
 
1672
  try:
1673
- fasta = session.get(UNIPROT_ENDPOINT.format(query=query))
1674
  fasta.raise_for_status()
1675
  if fasta.text:
1676
  fasta_rec = next(SeqIO.parse(io.StringIO(fasta.text), format='fasta'))
1677
- fasta_rec = f"{fasta_rec.description}\n{fasta_rec.seq}"
1678
 
1679
  except Exception as e:
1680
  raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
@@ -1698,18 +1711,21 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
1698
 
1699
 
1700
  def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
1701
- aligner = PairwiseAligner(scoring='blastp', mode='local')
1702
- alignment_df = pd.read_csv('data/target_libraries/ChEMBL33_all_spe_single_prot_info.csv')
1703
-
1704
- def align_score(query):
1705
- return aligner.align(process_target_fasta(fasta), query).score
1706
-
1707
- alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
1708
- desc="Detecting protein family of the target...").apply(align_score)
1709
- row = alignment_df.loc[alignment_df['score'].idxmax()]
1710
- return gr.Dropdown(value=row['protein_family'].capitalize(),
1711
- info=f"Reason: Best BLASTP score ({row['score']}) "
1712
- f"with {row['ID2']} from family {row['protein_family']}")
 
 
 
1713
 
1714
 
1715
  target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
@@ -2243,11 +2259,11 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2243
  return None
2244
 
2245
 
2246
- def create_html_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
2247
  try:
2248
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2249
  filename = f"/data/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
2250
- create_html_report(df, filename)
2251
  return gr.File(filename, visible=True)
2252
  except Exception as e:
2253
  gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
@@ -2261,7 +2277,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
2261
  outputs=csv_download_file, show_progress='full')
2262
  html_generate.click(
2263
  lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
2264
- ).then(fn=create_html_report_file, inputs=[report_df, file_for_report],
2265
  outputs=html_download_file, show_progress='full')
2266
 
2267
  if __name__ == "__main__":
 
1
  import concurrent.futures
2
  import glob
3
  import smtplib
4
+ from datetime import datetime, timedelta
5
  import itertools
6
  import textwrap
7
  from email.mime.multipart import MIMEMultipart
 
52
  sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
53
  import sascorer
54
 
 
55
  DATASET_MAX_LEN = 10_000
56
  SERVER_DATA_DIR = os.getenv('DATA') # '/data'
57
+ DB_EXPIRY = timedelta(hours=48).total_seconds()
58
 
59
  CSS = """
60
  .help-tip {
 
244
  scheduler = BackgroundScheduler()
245
 
246
 
247
+ def remove_job_record(job_id):
248
+ # Delete the job from the database
249
+ db.remove(Job.id == job_id)
250
+ # Delete the corresponding files
251
+ files = glob.glob(f"/data/{job_id}*")
252
+ for file_path in files:
253
+ if os.path.exists(file_path):
254
+ os.remove(file_path)
255
+
256
+
257
  def check_expiry():
258
  Job = Query()
259
  jobs = db.all()
260
 
261
  for job in jobs:
262
  # Check if the job has expired
263
+ if job['status'] != 'RUNNING':
264
+ expiry_time = job['expiry_time'] if job['expiry_time'] is not None else job['start_time'] + DB_EXPIRY
265
+ if expiry_time < time():
266
+ # Delete the job from the database
267
+ db.remove(Job.id == job['id'])
268
+ # Delete the corresponding file
269
+ files = glob.glob(f"/data/{job['id']}*")
270
+ for file_path in files:
271
+ if os.path.exists(file_path):
272
+ os.remove(file_path)
273
  elif job['status'] == 'RUNNING' and time() - job['start_time'] > 4 * 60 * 60: # 4 hours
274
  # Mark the job as failed
275
  db.update({'status': 'FAILED',
 
711
  job_query = (Job.id == job_id)
712
 
713
  end_time = time()
714
+ expiry_time = end_time + DB_EXPIRY
715
 
716
  db.update({'end_time': end_time,
717
  'expiry_time': expiry_time,
 
770
  raw_df: df,
771
  report_df: df.copy(),
772
  analyze_btn: gr.Button(interactive=True),
773
+ report_task: task} # pie_chart
774
  else:
775
  return {analyze_btn: gr.Button(interactive=False)}
776
 
 
1579
  with gr.Column():
1580
  file_for_report = gr.File(interactive=True, type='filepath')
1581
  report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
1582
+ label='Specify the Task Labels in the Upload Dataset')
1583
  raw_df = gr.State(value=pd.DataFrame())
1584
  report_df = gr.State(value=pd.DataFrame())
1585
  scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
 
1672
 
1673
 
1674
  def uniprot_query(input_type, uid, gene, organism='Human'):
1675
+ uniprot_endpoint = 'https://rest.uniprot.org/uniprotkb/{query}'
1676
  fasta_rec = ''
1677
 
1678
  match input_type:
 
1683
  query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
1684
 
1685
  try:
1686
+ fasta = session.get(uniprot_endpoint.format(query=query))
1687
  fasta.raise_for_status()
1688
  if fasta.text:
1689
  fasta_rec = next(SeqIO.parse(io.StringIO(fasta.text), format='fasta'))
1690
+ fasta_rec = f">{fasta_rec.description}\n{fasta_rec.seq}"
1691
 
1692
  except Exception as e:
1693
  raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
 
1711
 
1712
 
1713
  def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
1714
+ try:
1715
+ aligner = PairwiseAligner(scoring='blastp', mode='local')
1716
+ alignment_df = pd.read_csv('data/target_libraries/ChEMBL33_all_spe_single_prot_info.csv')
1717
+
1718
+ def align_score(query):
1719
+ return aligner.align(process_target_fasta(fasta), query).score
1720
+
1721
+ alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
1722
+ desc="Detecting protein family of the target...").apply(align_score)
1723
+ row = alignment_df.loc[alignment_df['score'].idxmax()]
1724
+ return gr.Dropdown(value=row['protein_family'].capitalize(),
1725
+ info=f"Reason: Best BLASTP score ({row['score']}) "
1726
+ f"with {row['ID2']} from family {row['protein_family']}")
1727
+ except Exception as e:
1728
+ gr.Warning("Failed to detect the protein family due to error: " + str(e))
1729
 
1730
 
1731
  target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
 
2259
  return None
2260
 
2261
 
2262
+ def create_html_report_file(df, file_report, task, progress=gr.Progress(track_tqdm=True)):
2263
  try:
2264
  now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
2265
  filename = f"/data/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
2266
+ create_html_report(df, filename, task)
2267
  return gr.File(filename, visible=True)
2268
  except Exception as e:
2269
  gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
 
2277
  outputs=csv_download_file, show_progress='full')
2278
  html_generate.click(
2279
  lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
2280
+ ).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task],
2281
  outputs=html_download_file, show_progress='full')
2282
 
2283
  if __name__ == "__main__":