|
import json
|
|
with open("humaneval_new.json", "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
line_counts=[17,17,17,17,17,15]
|
|
line_counts_I=line_counts[0]*0.01*164
|
|
line_counts_II=line_counts[1]*0.01*164
|
|
line_counts_III=line_counts[2]*0.01*164
|
|
line_counts_IV=line_counts[3]*0.01*164
|
|
line_counts_V=line_counts[4]*0.01*164
|
|
line_counts_VI=line_counts[5]*0.01*164
|
|
|
|
token_counts=[17,17,17,17,17,15]
|
|
token_counts_I=token_counts[0]*0.01*164
|
|
token_counts_II=token_counts[1]*0.01*164
|
|
token_counts_III=token_counts[2]*0.01*164
|
|
token_counts_IV=token_counts[3]*0.01*164
|
|
token_counts_V=token_counts[4]*0.01*164
|
|
token_counts_VI=token_counts[5]*0.01*164
|
|
|
|
cyclomatic_complexity=[17,17,17,17,17,15]
|
|
cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
|
|
cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
|
|
cyclomatic_complexity_III=cyclomatic_complexity[2]*0.01*164
|
|
cyclomatic_complexity_IV=cyclomatic_complexity[3]*0.01*164
|
|
cyclomatic_complexity_V=cyclomatic_complexity[4]*0.01*164
|
|
cyclomatic_complexity_VI=cyclomatic_complexity[5]*0.01*164
|
|
|
|
|
|
data.sort(key=lambda x: x['line'])
|
|
for i, item in enumerate(data):
|
|
if i < line_counts_I:
|
|
item['line_diff'] = 0
|
|
elif i <line_counts_I+line_counts_II:
|
|
item['line_diff'] = 1
|
|
elif i <line_counts_I+line_counts_II+line_counts_III:
|
|
item['line_diff'] = 2
|
|
elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV:
|
|
item['line_diff'] = 3
|
|
elif i <line_counts_I+line_counts_II+line_counts_III+line_counts_IV+line_counts_V:
|
|
item['line_diff'] = 4
|
|
else:
|
|
item['line_diff'] = 5
|
|
|
|
data.sort(key=lambda x: x['token'])
|
|
for i, item in enumerate(data):
|
|
if i < token_counts_I:
|
|
item['token_diff'] = 0
|
|
elif i < token_counts_I + token_counts_II:
|
|
item['token_diff'] = 1
|
|
elif i < token_counts_I + token_counts_II+token_counts_III:
|
|
item['token_diff'] = 2
|
|
elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV:
|
|
item['token_diff'] = 3
|
|
elif i < token_counts_I + token_counts_II+token_counts_III+token_counts_IV+token_counts_V:
|
|
item['token_diff'] = 4
|
|
|
|
else:
|
|
item['token_diff'] = 5
|
|
|
|
data.sort(key=lambda x: x['cyclomatic_complexity'])
|
|
for i, item in enumerate(data):
|
|
if i < cyclomatic_complexity_I:
|
|
item['CC_diff'] = 0
|
|
elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
|
|
item['CC_diff'] = 1
|
|
elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III:
|
|
item['CC_diff'] = 2
|
|
elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV:
|
|
item['CC_diff'] = 3
|
|
elif i < cyclomatic_complexity_I + cyclomatic_complexity_II+cyclomatic_complexity_III+cyclomatic_complexity_IV+cyclomatic_complexity_V:
|
|
item['CC_diff'] = 4
|
|
else:
|
|
item['CC_diff'] = 5
|
|
|
|
|
|
data.sort(key=lambda x: x['id'])
|
|
|
|
with open('QS.json', 'w', encoding='utf-8') as file:
|
|
json.dump(data, file, ensure_ascii=False, indent=4)
|
|
|
|
|