File size: 2,464 Bytes
3290550 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import click
import cooler
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool
np.seterr(divide='ignore', invalid='ignore')
def process_chrom(args):
chrom_name, input_file, resol, mindis, exclude_self = args
try:
C = cooler.Cooler(f"{input_file}::resolutions/{resol}")
pixels = C.matrix(
balance=False, sparse=True, as_pixels=True).fetch(chrom_name)
bin_diff = pixels['bin2_id'] - pixels['bin1_id']
min_diff = max(mindis, 1) if exclude_self else mindis
mask = bin_diff >= min_diff
return pixels[mask]['count'].sum()
except Exception as e:
print(f"Error processing {chrom_name}: {e}")
return 0
@click.command()
@click.option('-c','--chrom', type=str, default=None, help='Comma separated chroms [all autosomes]')
@click.option('-md','--mindis', type=int, default=0, help='Min genomic distance in bins [0]')
@click.option('-r','--resol',type=int,required=True,help='Resolution (bp)')
@click.option('-i','--input', type=str,required=True,help='mcool file path')
@click.option('--exclude-self', is_flag=True, help='Exclude bin_diff=0 contacts')
def depth(input, resol, mindis, chrom, exclude_self):
"""Calculate intra-chromosomal contacts with bin distance >= mindis"""
print(f'\n[polaris] Depth calculation START')
try:
C = cooler.Cooler(f"{input}::resolutions/{resol}")
except ValueError:
available_res = cooler.fileops.list_coolers(input)
raise ValueError(f"Resolution {resol} not found. Available: {available_res}")
chrom_list = chrom.split(',') if chrom else C.chromnames
invalid_chroms = [c for c in chrom_list if c not in C.chromnames]
if invalid_chroms:
raise ValueError(f"Invalid chromosomes: {invalid_chroms}. Valid: {C.chromnames}")
# 并行处理
with Pool(processes=min(len(chrom_list), 4)) as pool:
args_list = [(chrom, input, resol, mindis, exclude_self) for chrom in chrom_list]
results = list(tqdm(pool.imap(process_chrom, args_list), total=len(chrom_list), dynamic_ncols=True))
total_contacts = sum(results)
print(f"\n[polaris] Depth calculation FINISHED")
print(f"File: {input} (res={resol}bp)")
print(f"Chromosomes: {chrom_list}")
print(f"Minimum bin distance: {mindis}{', exclude self' if exclude_self else ''}")
print(f"Total intra contacts: {total_contacts:,}")
if __name__ == '__main__':
depth() |