PreMode / analysis /Hsu.et.al.git /prepare.DeepSequence.from.EVE.R
gzhong's picture
Upload folder using huggingface_hub
7718235 verified
eve.folder <- '/share/vault/Users/gz2294/EVE/results/evol_indices/'
genes <- c('PTEN', 'NUDT15', 'CCR5', 'CXCR4', 'GCK', 'CYP2C9', 'ASPA', 'SNCA')
for (gene in genes) {
# read EVE all scores
eve <- read.csv(paste0(eve.folder, gene, '_HUMAN_20000_samples.csv'))
for (fold in 0:4) {
data.gene <- read.csv(paste0('data/', gene, '.fold.', fold, '.score.1/data.csv'))
data.gene.eve <- data.frame(mutant=data.gene$mutant,
mutation_effect_prediction_vae_ensemble=eve$evol_indices[match(data.gene$mutant, eve$mutations)])
print(sum(is.na(data.gene.eve)))
dir.create(paste0('inference/', gene, '.fold.', fold, '.score.1/DeepSequence/'), recursive = T)
write.csv(data.gene.eve, paste0('inference/', gene, '.fold.', fold,
'.score.1/DeepSequence/vae_predictions.csv'))
for (subset in c(1, 2, 4, 6)) {
data.gene <- read.csv(paste0('data/', gene, '.subset.', subset, '.fold.', fold, '.score.1/data.csv'))
data.gene.eve <- data.frame(mutant=data.gene$mutant,
mutation_effect_prediction_vae_ensemble=eve$evol_indices[match(data.gene$mutant, eve$mutations)])
# print(sum(is.na(data.gene.eve)))
dir.create(paste0('inference/', gene, '.subset.', subset, '.fold.', fold, '.score.1/DeepSequence/'), recursive = T)
write.csv(data.gene.eve, paste0('inference/', gene, '.subset.', subset, '.fold.', fold,
'.score.1/DeepSequence/vae_predictions.csv'))
}
}
}