|
import json
|
|
with open("humaneval_new.json", "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
line_counts=[33,33,34]
|
|
line_counts_I=line_counts[0]*0.01*164
|
|
line_counts_II=line_counts[1]*0.01*164
|
|
line_counts_III=164-line_counts_I-line_counts_II
|
|
|
|
token_counts=[33,33,34]
|
|
token_counts_I=token_counts[0]*0.01*164
|
|
token_counts_II=token_counts[1]*0.01*164
|
|
token_counts_III=164-token_counts_I-token_counts_II
|
|
|
|
cyclomatic_complexity=[33,33,34]
|
|
cyclomatic_complexity_I=cyclomatic_complexity[0]*0.01*164
|
|
cyclomatic_complexity_II=cyclomatic_complexity[1]*0.01*164
|
|
cyclomatic_complexity_III=164-cyclomatic_complexity_II-cyclomatic_complexity_I
|
|
|
|
|
|
|
|
data.sort(key=lambda x: x['line'])
|
|
for i, item in enumerate(data):
|
|
if i < line_counts_I:
|
|
item['line_diff'] = 0
|
|
elif i <line_counts_I+line_counts_II:
|
|
item['line_diff'] = 1
|
|
else:
|
|
item['line_diff'] = 2
|
|
|
|
data.sort(key=lambda x: x['token'])
|
|
for i, item in enumerate(data):
|
|
if i < token_counts_I:
|
|
item['token_diff'] = 0
|
|
elif i < token_counts_I + token_counts_II:
|
|
item['token_diff'] = 1
|
|
else:
|
|
item['token_diff'] = 2
|
|
|
|
data.sort(key=lambda x: x['cyclomatic_complexity'])
|
|
for i, item in enumerate(data):
|
|
if i < cyclomatic_complexity_I:
|
|
item['CC_diff'] = 0
|
|
elif i < cyclomatic_complexity_I + cyclomatic_complexity_II:
|
|
item['CC_diff'] = 1
|
|
else:
|
|
item['CC_diff'] = 2
|
|
|
|
|
|
data.sort(key=lambda x: x['id'])
|
|
|
|
with open('QS.json', 'w', encoding='utf-8') as file:
|
|
json.dump(data, file, ensure_ascii=False, indent=4)
|
|
|
|
|