File size: 2,464 Bytes
3290550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import click
import cooler
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool

np.seterr(divide='ignore', invalid='ignore')

def process_chrom(args):
    chrom_name, input_file, resol, mindis, exclude_self = args
    try:
        C = cooler.Cooler(f"{input_file}::resolutions/{resol}")
        pixels = C.matrix(
            balance=False, sparse=True, as_pixels=True).fetch(chrom_name)
        bin_diff = pixels['bin2_id'] - pixels['bin1_id']
        min_diff = max(mindis, 1) if exclude_self else mindis
        mask = bin_diff >= min_diff
        return pixels[mask]['count'].sum()
    except Exception as e:
        print(f"Error processing {chrom_name}: {e}")
        return 0

@click.command()
@click.option('-c','--chrom', type=str, default=None, help='Comma separated chroms [all autosomes]')
@click.option('-md','--mindis', type=int, default=0, help='Min genomic distance in bins [0]')
@click.option('-r','--resol',type=int,required=True,help='Resolution (bp)')
@click.option('-i','--input', type=str,required=True,help='mcool file path')
@click.option('--exclude-self', is_flag=True, help='Exclude bin_diff=0 contacts')
def depth(input, resol, mindis, chrom, exclude_self):
    """Calculate intra-chromosomal contacts with bin distance >= mindis"""
    print(f'\n[polaris] Depth calculation START')
    
    try:
        C = cooler.Cooler(f"{input}::resolutions/{resol}")
    except ValueError:
        available_res = cooler.fileops.list_coolers(input)
        raise ValueError(f"Resolution {resol} not found. Available: {available_res}")
    
    chrom_list = chrom.split(',') if chrom else C.chromnames
    invalid_chroms = [c for c in chrom_list if c not in C.chromnames]
    if invalid_chroms:
        raise ValueError(f"Invalid chromosomes: {invalid_chroms}. Valid: {C.chromnames}")
    
    # 并行处理
    with Pool(processes=min(len(chrom_list), 4)) as pool:
        args_list = [(chrom, input, resol, mindis, exclude_self) for chrom in chrom_list]
        results = list(tqdm(pool.imap(process_chrom, args_list), total=len(chrom_list), dynamic_ncols=True))
        total_contacts = sum(results)
    
    print(f"\n[polaris] Depth calculation FINISHED")
    print(f"File: {input} (res={resol}bp)")
    print(f"Chromosomes: {chrom_list}")
    print(f"Minimum bin distance: {mindis}{', exclude self' if exclude_self else ''}")
    print(f"Total intra contacts: {total_contacts:,}")

if __name__ == '__main__':
    depth()