Spaces:
Sleeping
Sleeping
import streamlit as st | |
import zipfile | |
import urllib.request | |
import glob | |
import SigProfilerMatrixGenerator | |
from SigProfilerMatrixGenerator import install as genInstall | |
import shutil | |
import os | |
from SigProfilerExtractor import sigpro as sig | |
import sys | |
import numpy as np | |
import base64 | |
import streamlit.components.v1 as components | |
curdir= os.getcwd() | |
def remove_old_vcf(): | |
vcfrem=glob.glob('input/*.vcf') | |
for filepath in vcfrem: | |
os.remove(filepath) | |
vcfrem=glob.glob('input/input/*.vcf') | |
for filepath in vcfrem: | |
os.remove(filepath) | |
def show_pdf(file_path): | |
with open(file_path,"rb") as f: | |
base64_pdf = base64.b64encode(f.read()).decode('utf-8') | |
pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="1500" height="1000" type="application/pdf"></iframe>' | |
st.markdown(pdf_display, unsafe_allow_html=True) | |
#@st.cache_data(experimental_allow_widgets=True) | |
def showdl(file_to_lookat,to_dl_sbs,to_dl_indel,to_dl_dbs,to_dl_sbs_text,to_dl_indel_text,to_dl_dbs_text): | |
for j in np.arange(0,len(to_dl_sbs)): | |
if to_dl_sbs[j] != []: | |
download_link1 = f'<a href="data:application/octet-stream;base64, \ | |
{base64.b64encode(to_dl_sbs[j]).decode()}" download=" \ | |
{file_to_lookat[j].name}SBS.pdf">Download {file_to_lookat[j].name} Single Base Substition pdf</a>' | |
download_link2 = f'<a href="data:application/octet-stream;base64, \ | |
{base64.b64encode(to_dl_sbs_text[j]).decode()}" download=" \ | |
{file_to_lookat[j].name}SBS.txt">Download {file_to_lookat[j].name} Single Base Substition table</a>' | |
st.markdown(download_link1, unsafe_allow_html=True) | |
st.markdown(download_link2, unsafe_allow_html=True) | |
for j in np.arange(0,len(to_dl_indel)): | |
if to_dl_indel[j] != []: | |
download_link3 = f'<a href="data:application/octet-stream;base64, \ | |
{base64.b64encode(to_dl_indel[j]).decode()}" download=" \ | |
{file_to_lookat[j].name}Indel.pdf">Download {file_to_lookat[j].name} indel pdf</a>' | |
download_link4 = f'<a href="data:application/octet-stream;base64, \ | |
{base64.b64encode(to_dl_indel_text[j]).decode()}" download=" \ | |
{file_to_lookat[j].name}Indel.txt">Download {file_to_lookat[j].name} indel table</a>' | |
st.markdown(download_link3, unsafe_allow_html=True) | |
st.markdown(download_link4, unsafe_allow_html=True) | |
for j in np.arange(0,len(to_dl_dbs)): | |
if to_dl_dbs[j] !=[]: | |
download_link5 = f'<a href="data:application/octet-stream;base64, \ | |
{base64.b64encode(to_dl_dbs[j]).decode()}" download=" \ | |
{file_to_lookat[j].name}DBS.pdf">Download {file_to_lookat[j].name} Double Base Substitution pdf</a>' | |
download_link6 = f'<a href="data:application/octet-stream;base64, \ | |
{base64.b64encode(to_dl_dbs_text[j]).decode()}" download=" \ | |
{file_to_lookat[j].name}DBS.txt">Download {file_to_lookat[j].name} Double Base Substitution table</a>' | |
st.markdown(download_link5, unsafe_allow_html=True) | |
st.markdown(download_link6, unsafe_allow_html=True) | |
#st.download_button(label="Download image with single base substitution profiles", key=j, | |
# data=to_dl_sbs[j], | |
# file_name="SBS.pdf", | |
# mime='application/octet-stream') | |
#st.download_button(label="Download image with indel profiles", key=0.5+j, | |
# data=to_dl_indel[j], | |
# file_name="idel.pdf", | |
# mime='application/octet-stream') | |
def dl(valforkey): | |
#breakpoint() | |
seev=glob.glob('output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/*pdf') | |
for i in seev: | |
st.write('pdf file with sbs96 output is here: '+i) | |
with open("output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/SBS96_Decomposition_Plots.pdf", "rb") as pdf_file: | |
PDFbyte1 = pdf_file.read() | |
with open("output/SBS96/Samples.txt","rb") as txt_file: | |
Txtbyte1 = txt_file.read() | |
#st.download_button(label="Download image with single base substitution profiles", key=valforkey, | |
# data=PDFbyte1, | |
# file_name="SBS.pdf", | |
# mime='application/octet-stream') | |
if glob.glob('output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf') != []: | |
with open("output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf", "rb") as pdf_file: | |
PDFbyte2 = pdf_file.read() | |
with open("output/ID83/Samples.txt","rb") as txt_file: | |
Txtbyte2 = txt_file.read() | |
else: | |
PDFbyte2 = [] | |
Txtbyte2 = [] | |
if glob.glob("output/DBS78/Suggested_Solution/COSMIC_DBS78_Decomposed_Solution/DBS78_Decomposition_Plots.pdf") != []: | |
with open("output/DBS78/Suggested_Solution/COSMIC_DBS78_Decomposed_Solution/DBS78_Decomposition_Plots.pdf", "rb") as pdf_file: | |
PDFbyte3 = pdf_file.read() | |
with open("output/DBS78/Samples.txt","rb") as txt_file: | |
Txtbyte3 = txt_file.read() | |
else: | |
PDFbyte3 = [] | |
Txtbyte3=[] | |
os.system('rm -r output') | |
os.system('rm -r input') | |
#os.remove("output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/SBS96_Decomposition_Plots.pdf") | |
#os.remove("output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/*") | |
#os.remove("output/SBS96/Samples.txt") | |
#os.remove("output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf") | |
#os.remove("output/ID83/Samples.txt") | |
#os.remove("output/DBS78/Suggested_Solution/COSMIC_DBS78_Decomposed_Solution/DBS78_Decomposition_Plots.pdf") | |
#os.remove("output/DBS78/Samples.txt") | |
#st.download_button(label="Download image with indel profiles", key=0.5+valforkey, | |
# data=PDFbyte2, | |
# file_name="idel.pdf", | |
# mime='application/octet-stream') | |
return PDFbyte1,PDFbyte2,PDFbyte3,Txtbyte1,Txtbyte2,Txtbyte3 | |
#st.write(glob.glob(os.path.join(os.path.dirname(SigProfilerMatrixGenerator.__file__),'references/*txt'))) | |
with st.form('get signature'): | |
if glob.glob(os.path.join(os.path.dirname(SigProfilerMatrixGenerator.__file__),'references/chromosomes/tsb/GRCh3[78]/')+'*txt') == []: | |
st.write('There is no reference genome, we need to download this') | |
refdownload=True | |
else: | |
st.write('using reference from here:'+glob.glob(os.path.join(os.path.dirname(SigProfilerMatrixGenerator.__file__),'references/chromosomes/tsb/GRCh3[78]/'))[0]) | |
refdownload=False | |
referencegenome =st.radio('reference',['hg19','GRCh38']) | |
file_to_lookat=st.file_uploader('VCF upload here',type=[".vcf"],accept_multiple_files=True) | |
remove_old_vcf() | |
sub=st.form_submit_button('submit input') | |
if file_to_lookat !=[] and sub: | |
#if st.button('get reference genome'): | |
#st.write(os.path.dirname(SigProfilerMatrixGenerator.__file__)) | |
dirtest=os.path.dirname(SigProfilerMatrixGenerator.__file__) | |
#st.write(sys.path) | |
if refdownload==True and referencegenome=='hg19': | |
with st.spinner('downloading hg19 reference'): | |
urllib.request.urlretrieve('https://dl.dropboxusercontent.com/s/et97ewsct862x7m/references.zip?dl=0','references.zip') | |
with zipfile.ZipFile('references.zip', 'r') as zip_ref: | |
zip_ref.extractall(dirtest) | |
elif refdownload==True and referencegenome=='GRCh38': | |
with st.spinner('downloading GRCh38 reference'): | |
genInstall.install('GRCh38') | |
#seev=glob.glob(dirtest+'/references/chromosomes/tsb/GRCh37/*txt') | |
#for i in seev: | |
# st.write(i) | |
##genInstall.install('GRCh37') | |
to_dl_sbs=[] | |
to_dl_indel=[] | |
to_dl_dbs=[] | |
to_dl_sbs_text=[] | |
to_dl_indel_text=[] | |
to_dl_dbs_text=[] | |
for j in np.arange(0,len(file_to_lookat)): | |
if not os.path.exists('input'): | |
os.mkdir('input') | |
if not os.path.exists('input/input'): | |
os.mkdir('input/input') | |
remove_old_vcf() | |
bytes_data=file_to_lookat[j].read() | |
with open(os.path.join("input",file_to_lookat[j].name),"wb") as f: | |
f.write(bytes_data) | |
f.close() | |
seev=glob.glob('input/*') | |
#for i in seev: | |
# st.write(i) | |
#vcfuse=glob.glob('file_to_lookat[0].name')[0] | |
#shutil.copy2(vcfuse,'input/'+vcfuse) | |
#pdb.set_trace() | |
with st.spinner('computing signatures'): | |
sig.sigProfilerExtractor("vcf", "output", "input", minimum_signatures=1, maximum_signatures=3,nmf_test_conv= 1000,nmf_tolerance= 1e-10,max_nmf_iterations=100000,min_nmf_iterations= 1000) | |
if file_to_lookat !=[] and glob.glob('output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/*pdf'): | |
sbs_result,indel_result,dbs_result,sbs_text,indel_text,dbs_text=dl(j) | |
to_dl_sbs.append(sbs_result) | |
to_dl_sbs_text.append(sbs_text) | |
to_dl_indel.append(indel_result) | |
to_dl_indel_text.append(indel_text) | |
to_dl_dbs.append(dbs_result) | |
to_dl_dbs_text.append(dbs_text) | |
#show_pdf('output/SBS96/Suggested_Solution/COSMIC_SBS96_Decomposed_Solution/SBS96_Decomposition_Plots.pdf') | |
remove_old_vcf() | |
showdl(file_to_lookat,to_dl_sbs,to_dl_indel,to_dl_dbs,to_dl_sbs_text,to_dl_indel_text,to_dl_dbs_text) | |
components.iframe("https://cancer.sanger.ac.uk/signatures/sbs/", height=3000,width=800) | |
#show_pdf('output/ID83/Suggested_Solution/COSMIC_ID83_Decomposed_Solution/ID83_Decomposition_Plots.pdf') | |
#components.iframe("https://cancer.sanger.ac.uk/signatures/id/",height=1000,width=800) |