|
|
|
""" |
|
Created on Mon Jun 15 11:24:45 2020 |
|
|
|
@author: luol2 |
|
""" |
|
|
|
import io |
|
def nest_overlap_entity(nest_list): |
|
temp_result_list={} |
|
for i in range(0, len(nest_list)): |
|
hpoid=nest_list[i][3] |
|
if hpoid not in temp_result_list.keys(): |
|
temp_result_list[hpoid]=nest_list[i] |
|
else: |
|
score=float(nest_list[i][4]) |
|
old_score=float(temp_result_list[hpoid][4]) |
|
if score>old_score: |
|
temp_result_list[hpoid]=nest_list[i] |
|
new_list=[] |
|
for hpoid in temp_result_list.keys(): |
|
new_list.append(temp_result_list[hpoid]) |
|
|
|
temp_result_list={} |
|
for i in range(0, len(new_list)): |
|
ids=new_list[i][0]+' '+new_list[i][1] |
|
if ids not in temp_result_list.keys(): |
|
temp_result_list[ids]=new_list[i] |
|
else: |
|
score=float(nest_list[i][4]) |
|
old_score=float(temp_result_list[ids][4]) |
|
if score>old_score: |
|
temp_result_list[ids]=new_list[i] |
|
final_list=[] |
|
for ids in temp_result_list.keys(): |
|
final_list.append(temp_result_list[ids]) |
|
return final_list |
|
def combine_ml_dict(dict_tsv,ml_tsv,nest=True): |
|
fin_dic=io.StringIO(dict_tsv) |
|
fin_ml=io.StringIO(ml_tsv) |
|
fout=io.StringIO() |
|
all_dic=fin_dic.read().strip().split('\n\n') |
|
all_ml=fin_ml.read().strip().split('\n\n') |
|
fin_dic.close() |
|
fin_ml.close() |
|
|
|
for i in range(0,len(all_dic)): |
|
lines_dic=all_dic[i].split('\n') |
|
lines_ml=all_ml[i].split('\n') |
|
entity_list={} |
|
for j in range(1,len(lines_dic)): |
|
seg=lines_dic[j].split('\t') |
|
entity_list[lines_dic[j]]=[int(seg[0]),int(seg[1])] |
|
for j in range(1,len(lines_ml)): |
|
seg=lines_ml[j].split('\t') |
|
entity_list[lines_ml[j]]=[int(seg[0]),int(seg[1])] |
|
|
|
entity_list=sorted(entity_list.items(), key=lambda kv:(kv[1]), reverse=False) |
|
entity_list_sort=[] |
|
for ele in entity_list: |
|
entity_list_sort.append(ele[0]) |
|
|
|
final_entity=[] |
|
if len(entity_list_sort)!=0: |
|
first_entity=entity_list_sort[0].split('\t') |
|
nest_list=[first_entity] |
|
max_eid=int(first_entity[1]) |
|
|
|
for i in range(1,len(entity_list_sort)): |
|
segs=entity_list_sort[i].split('\t') |
|
if int(segs[0])> max_eid: |
|
if len(nest_list)==1: |
|
final_entity.append(nest_list[0]) |
|
nest_list=[] |
|
nest_list.append(segs) |
|
if int(segs[1])>max_eid: |
|
max_eid=int(segs[1]) |
|
else: |
|
tem=nest_overlap_entity(nest_list) |
|
final_entity.extend(tem) |
|
nest_list=[] |
|
nest_list.append(segs) |
|
if int(segs[1])>max_eid: |
|
max_eid=int(segs[1]) |
|
else: |
|
nest_list.append(segs) |
|
if int(segs[1])>max_eid: |
|
max_eid=int(segs[1]) |
|
if nest_list!=[]: |
|
if len(nest_list)==1: |
|
final_entity.append(nest_list[0]) |
|
|
|
else: |
|
tem=nest_overlap_entity(nest_list) |
|
final_entity.extend(tem) |
|
|
|
fout.write(lines_ml[0]+'\n') |
|
for ele in final_entity: |
|
fout.write('\t'.join(ele)+'\n') |
|
fout.write('\n') |
|
return fout.getvalue() |
|
|
|
|