eve.folder <- '/share/vault/Users/gz2294/EVE/results/evol_indices/' genes <- c('PTEN', 'NUDT15', 'CCR5', 'CXCR4', 'GCK', 'CYP2C9', 'ASPA', 'SNCA') for (gene in genes) { # read EVE all scores eve <- read.csv(paste0(eve.folder, gene, '_HUMAN_20000_samples.csv')) for (fold in 0:4) { data.gene <- read.csv(paste0('data/', gene, '.fold.', fold, '.score.1/data.csv')) data.gene.eve <- data.frame(mutant=data.gene$mutant, mutation_effect_prediction_vae_ensemble=eve$evol_indices[match(data.gene$mutant, eve$mutations)]) print(sum(is.na(data.gene.eve))) dir.create(paste0('inference/', gene, '.fold.', fold, '.score.1/DeepSequence/'), recursive = T) write.csv(data.gene.eve, paste0('inference/', gene, '.fold.', fold, '.score.1/DeepSequence/vae_predictions.csv')) for (subset in c(1, 2, 4, 6)) { data.gene <- read.csv(paste0('data/', gene, '.subset.', subset, '.fold.', fold, '.score.1/data.csv')) data.gene.eve <- data.frame(mutant=data.gene$mutant, mutation_effect_prediction_vae_ensemble=eve$evol_indices[match(data.gene$mutant, eve$mutations)]) # print(sum(is.na(data.gene.eve))) dir.create(paste0('inference/', gene, '.subset.', subset, '.fold.', fold, '.score.1/DeepSequence/'), recursive = T) write.csv(data.gene.eve, paste0('inference/', gene, '.subset.', subset, '.fold.', fold, '.score.1/DeepSequence/vae_predictions.csv')) } } }