|
import click |
|
import cooler |
|
import numpy as np |
|
from tqdm import tqdm |
|
from multiprocessing import Pool |
|
|
|
np.seterr(divide='ignore', invalid='ignore') |
|
|
|
def process_chrom(args): |
|
chrom_name, input_file, resol, mindis, exclude_self = args |
|
try: |
|
C = cooler.Cooler(f"{input_file}::resolutions/{resol}") |
|
pixels = C.matrix( |
|
balance=False, sparse=True, as_pixels=True).fetch(chrom_name) |
|
bin_diff = pixels['bin2_id'] - pixels['bin1_id'] |
|
min_diff = max(mindis, 1) if exclude_self else mindis |
|
mask = bin_diff >= min_diff |
|
return pixels[mask]['count'].sum() |
|
except Exception as e: |
|
print(f"Error processing {chrom_name}: {e}") |
|
return 0 |
|
|
|
@click.command() |
|
@click.option('-c','--chrom', type=str, default=None, help='Comma separated chroms [all autosomes]') |
|
@click.option('-md','--mindis', type=int, default=0, help='Min genomic distance in bins [0]') |
|
@click.option('-r','--resol',type=int,required=True,help='Resolution (bp)') |
|
@click.option('-i','--input', type=str,required=True,help='mcool file path') |
|
@click.option('--exclude-self', is_flag=True, help='Exclude bin_diff=0 contacts') |
|
def depth(input, resol, mindis, chrom, exclude_self): |
|
"""Calculate intra-chromosomal contacts with bin distance >= mindis""" |
|
print(f'\n[polaris] Depth calculation START') |
|
|
|
try: |
|
C = cooler.Cooler(f"{input}::resolutions/{resol}") |
|
except ValueError: |
|
available_res = cooler.fileops.list_coolers(input) |
|
raise ValueError(f"Resolution {resol} not found. Available: {available_res}") |
|
|
|
chrom_list = chrom.split(',') if chrom else C.chromnames |
|
invalid_chroms = [c for c in chrom_list if c not in C.chromnames] |
|
if invalid_chroms: |
|
raise ValueError(f"Invalid chromosomes: {invalid_chroms}. Valid: {C.chromnames}") |
|
|
|
|
|
with Pool(processes=min(len(chrom_list), 4)) as pool: |
|
args_list = [(chrom, input, resol, mindis, exclude_self) for chrom in chrom_list] |
|
results = list(tqdm(pool.imap(process_chrom, args_list), total=len(chrom_list), dynamic_ncols=True)) |
|
total_contacts = sum(results) |
|
|
|
print(f"\n[polaris] Depth calculation FINISHED") |
|
print(f"File: {input} (res={resol}bp)") |
|
print(f"Chromosomes: {chrom_list}") |
|
print(f"Minimum bin distance: {mindis}{', exclude self' if exclude_self else ''}") |
|
print(f"Total intra contacts: {total_contacts:,}") |
|
|
|
if __name__ == '__main__': |
|
depth() |