Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import concurrent.futures
|
2 |
import glob
|
3 |
import smtplib
|
4 |
-
from datetime import datetime
|
5 |
import itertools
|
6 |
import textwrap
|
7 |
from email.mime.multipart import MIMEMultipart
|
@@ -52,9 +52,9 @@ from deepscreen.predict import predict
|
|
52 |
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
|
53 |
import sascorer
|
54 |
|
55 |
-
UNIPROT_ENDPOINT = 'https://rest.uniprot.org/uniprotkb/{query}'
|
56 |
DATASET_MAX_LEN = 10_000
|
57 |
SERVER_DATA_DIR = os.getenv('DATA') # '/data'
|
|
|
58 |
|
59 |
CSS = """
|
60 |
.help-tip {
|
@@ -244,20 +244,32 @@ for job in jobs:
|
|
244 |
scheduler = BackgroundScheduler()
|
245 |
|
246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
def check_expiry():
|
248 |
Job = Query()
|
249 |
jobs = db.all()
|
250 |
|
251 |
for job in jobs:
|
252 |
# Check if the job has expired
|
253 |
-
if job['
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
|
|
|
|
261 |
elif job['status'] == 'RUNNING' and time() - job['start_time'] > 4 * 60 * 60: # 4 hours
|
262 |
# Mark the job as failed
|
263 |
db.update({'status': 'FAILED',
|
@@ -699,7 +711,7 @@ def submit_predict(predict_filepath, task, preset, target_family, state):
|
|
699 |
job_query = (Job.id == job_id)
|
700 |
|
701 |
end_time = time()
|
702 |
-
expiry_time = end_time +
|
703 |
|
704 |
db.update({'end_time': end_time,
|
705 |
'expiry_time': expiry_time,
|
@@ -758,7 +770,7 @@ def update_df(file, progress=gr.Progress(track_tqdm=True)):
|
|
758 |
raw_df: df,
|
759 |
report_df: df.copy(),
|
760 |
analyze_btn: gr.Button(interactive=True),
|
761 |
-
report_task:
|
762 |
else:
|
763 |
return {analyze_btn: gr.Button(interactive=False)}
|
764 |
|
@@ -1567,7 +1579,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1567 |
with gr.Column():
|
1568 |
file_for_report = gr.File(interactive=True, type='filepath')
|
1569 |
report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
|
1570 |
-
label='Specify the Task
|
1571 |
raw_df = gr.State(value=pd.DataFrame())
|
1572 |
report_df = gr.State(value=pd.DataFrame())
|
1573 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
@@ -1660,6 +1672,7 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1660 |
|
1661 |
|
1662 |
def uniprot_query(input_type, uid, gene, organism='Human'):
|
|
|
1663 |
fasta_rec = ''
|
1664 |
|
1665 |
match input_type:
|
@@ -1670,11 +1683,11 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1670 |
query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
|
1671 |
|
1672 |
try:
|
1673 |
-
fasta = session.get(
|
1674 |
fasta.raise_for_status()
|
1675 |
if fasta.text:
|
1676 |
fasta_rec = next(SeqIO.parse(io.StringIO(fasta.text), format='fasta'))
|
1677 |
-
fasta_rec = f"{fasta_rec.description}\n{fasta_rec.seq}"
|
1678 |
|
1679 |
except Exception as e:
|
1680 |
raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
|
@@ -1698,18 +1711,21 @@ with gr.Blocks(theme=theme, title='DeepSEQreen', css=CSS, delete_cache=(3600, 48
|
|
1698 |
|
1699 |
|
1700 |
def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
|
1701 |
-
|
1702 |
-
|
1703 |
-
|
1704 |
-
|
1705 |
-
|
1706 |
-
|
1707 |
-
|
1708 |
-
|
1709 |
-
|
1710 |
-
|
1711 |
-
|
1712 |
-
|
|
|
|
|
|
|
1713 |
|
1714 |
|
1715 |
target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
|
@@ -2243,11 +2259,11 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2243 |
return None
|
2244 |
|
2245 |
|
2246 |
-
def create_html_report_file(df, file_report, progress=gr.Progress(track_tqdm=True)):
|
2247 |
try:
|
2248 |
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
2249 |
filename = f"/data/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
2250 |
-
create_html_report(df, filename)
|
2251 |
return gr.File(filename, visible=True)
|
2252 |
except Exception as e:
|
2253 |
gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
|
@@ -2261,7 +2277,7 @@ QALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES
|
|
2261 |
outputs=csv_download_file, show_progress='full')
|
2262 |
html_generate.click(
|
2263 |
lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
|
2264 |
-
).then(fn=create_html_report_file, inputs=[report_df, file_for_report],
|
2265 |
outputs=html_download_file, show_progress='full')
|
2266 |
|
2267 |
if __name__ == "__main__":
|
|
|
1 |
import concurrent.futures
|
2 |
import glob
|
3 |
import smtplib
|
4 |
+
from datetime import datetime, timedelta
|
5 |
import itertools
|
6 |
import textwrap
|
7 |
from email.mime.multipart import MIMEMultipart
|
|
|
52 |
sys.path.append(os.path.join(RDConfig.RDContribDir, 'SA_Score'))
|
53 |
import sascorer
|
54 |
|
|
|
55 |
DATASET_MAX_LEN = 10_000
|
56 |
SERVER_DATA_DIR = os.getenv('DATA') # '/data'
|
57 |
+
DB_EXPIRY = timedelta(hours=48).total_seconds()
|
58 |
|
59 |
CSS = """
|
60 |
.help-tip {
|
|
|
244 |
scheduler = BackgroundScheduler()
|
245 |
|
246 |
|
247 |
+
def remove_job_record(job_id):
|
248 |
+
# Delete the job from the database
|
249 |
+
db.remove(Job.id == job_id)
|
250 |
+
# Delete the corresponding files
|
251 |
+
files = glob.glob(f"/data/{job_id}*")
|
252 |
+
for file_path in files:
|
253 |
+
if os.path.exists(file_path):
|
254 |
+
os.remove(file_path)
|
255 |
+
|
256 |
+
|
257 |
def check_expiry():
|
258 |
Job = Query()
|
259 |
jobs = db.all()
|
260 |
|
261 |
for job in jobs:
|
262 |
# Check if the job has expired
|
263 |
+
if job['status'] != 'RUNNING':
|
264 |
+
expiry_time = job['expiry_time'] if job['expiry_time'] is not None else job['start_time'] + DB_EXPIRY
|
265 |
+
if expiry_time < time():
|
266 |
+
# Delete the job from the database
|
267 |
+
db.remove(Job.id == job['id'])
|
268 |
+
# Delete the corresponding file
|
269 |
+
files = glob.glob(f"/data/{job['id']}*")
|
270 |
+
for file_path in files:
|
271 |
+
if os.path.exists(file_path):
|
272 |
+
os.remove(file_path)
|
273 |
elif job['status'] == 'RUNNING' and time() - job['start_time'] > 4 * 60 * 60: # 4 hours
|
274 |
# Mark the job as failed
|
275 |
db.update({'status': 'FAILED',
|
|
|
711 |
job_query = (Job.id == job_id)
|
712 |
|
713 |
end_time = time()
|
714 |
+
expiry_time = end_time + DB_EXPIRY
|
715 |
|
716 |
db.update({'end_time': end_time,
|
717 |
'expiry_time': expiry_time,
|
|
|
770 |
raw_df: df,
|
771 |
report_df: df.copy(),
|
772 |
analyze_btn: gr.Button(interactive=True),
|
773 |
+
report_task: task} # pie_chart
|
774 |
else:
|
775 |
return {analyze_btn: gr.Button(interactive=False)}
|
776 |
|
|
|
1579 |
with gr.Column():
|
1580 |
file_for_report = gr.File(interactive=True, type='filepath')
|
1581 |
report_task = gr.Dropdown(list(TASK_MAP.keys()), visible=False, value=None,
|
1582 |
+
label='Specify the Task Labels in the Upload Dataset')
|
1583 |
raw_df = gr.State(value=pd.DataFrame())
|
1584 |
report_df = gr.State(value=pd.DataFrame())
|
1585 |
scores = gr.CheckboxGroup(list(SCORE_MAP.keys()), label='Scores')
|
|
|
1672 |
|
1673 |
|
1674 |
def uniprot_query(input_type, uid, gene, organism='Human'):
|
1675 |
+
uniprot_endpoint = 'https://rest.uniprot.org/uniprotkb/{query}'
|
1676 |
fasta_rec = ''
|
1677 |
|
1678 |
match input_type:
|
|
|
1683 |
query = f'search?query=organism_name:{organism.strip()}+AND+gene:{gene.strip()}&format=fasta'
|
1684 |
|
1685 |
try:
|
1686 |
+
fasta = session.get(uniprot_endpoint.format(query=query))
|
1687 |
fasta.raise_for_status()
|
1688 |
if fasta.text:
|
1689 |
fasta_rec = next(SeqIO.parse(io.StringIO(fasta.text), format='fasta'))
|
1690 |
+
fasta_rec = f">{fasta_rec.description}\n{fasta_rec.seq}"
|
1691 |
|
1692 |
except Exception as e:
|
1693 |
raise gr.Warning(f"Failed to query FASTA from UniProt database due to {str(e)}")
|
|
|
1711 |
|
1712 |
|
1713 |
def target_family_detect(fasta, progress=gr.Progress(track_tqdm=True)):
|
1714 |
+
try:
|
1715 |
+
aligner = PairwiseAligner(scoring='blastp', mode='local')
|
1716 |
+
alignment_df = pd.read_csv('data/target_libraries/ChEMBL33_all_spe_single_prot_info.csv')
|
1717 |
+
|
1718 |
+
def align_score(query):
|
1719 |
+
return aligner.align(process_target_fasta(fasta), query).score
|
1720 |
+
|
1721 |
+
alignment_df['score'] = alignment_df['X2'].swifter.progress_bar(
|
1722 |
+
desc="Detecting protein family of the target...").apply(align_score)
|
1723 |
+
row = alignment_df.loc[alignment_df['score'].idxmax()]
|
1724 |
+
return gr.Dropdown(value=row['protein_family'].capitalize(),
|
1725 |
+
info=f"Reason: Best BLASTP score ({row['score']}) "
|
1726 |
+
f"with {row['ID2']} from family {row['protein_family']}")
|
1727 |
+
except Exception as e:
|
1728 |
+
gr.Warning("Failed to detect the protein family due to error: " + str(e))
|
1729 |
|
1730 |
|
1731 |
target_family_detect_btn.click(fn=target_family_detect, inputs=target_fasta, outputs=drug_screen_target_family)
|
|
|
2259 |
return None
|
2260 |
|
2261 |
|
2262 |
+
def create_html_report_file(df, file_report, task, progress=gr.Progress(track_tqdm=True)):
|
2263 |
try:
|
2264 |
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
2265 |
filename = f"/data/{Path(file_report.name).stem}_DeepSEQreen_report_{now}.html"
|
2266 |
+
create_html_report(df, filename, task)
|
2267 |
return gr.File(filename, visible=True)
|
2268 |
except Exception as e:
|
2269 |
gr.Warning(f"Failed to generate HTML due to error: {str(e)}")
|
|
|
2277 |
outputs=csv_download_file, show_progress='full')
|
2278 |
html_generate.click(
|
2279 |
lambda: [gr.Button(visible=False), gr.File(visible=True)], outputs=[html_generate, html_download_file],
|
2280 |
+
).then(fn=create_html_report_file, inputs=[report_df, file_for_report, report_task],
|
2281 |
outputs=html_download_file, show_progress='full')
|
2282 |
|
2283 |
if __name__ == "__main__":
|