File size: 6,242 Bytes
d758c99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/usr/bin/env python
import _jsonnet
import json
import argparse
import collections
import attr
from seq2struct.commands import preprocess, train, infer, eval
import crash_on_ipy
@attr.s
class PreprocessConfig:
config = attr.ib()
config_args = attr.ib()
@attr.s
class TrainConfig:
config = attr.ib()
config_args = attr.ib()
logdir = attr.ib()
@attr.s
class InferConfig:
config = attr.ib()
config_args = attr.ib()
logdir = attr.ib()
section = attr.ib()
beam_size = attr.ib()
output = attr.ib()
step = attr.ib()
use_heuristic = attr.ib(default=False)
mode = attr.ib(default="infer")
limit = attr.ib(default=None)
output_history = attr.ib(default=False)
@attr.s
class EvalConfig:
config = attr.ib()
config_args = attr.ib()
logdir = attr.ib()
section = attr.ib()
inferred = attr.ib()
output = attr.ib()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('mode', help="preprocess/train/eval")
parser.add_argument('exp_config_file', help="jsonnet file for experiments")
args = parser.parse_args()
exp_config = json.loads(_jsonnet.evaluate_file(args.exp_config_file))
model_config_file = exp_config["model_config"]
if "model_config_args" in exp_config:
model_config_args = json.dumps(exp_config["model_config_args"])
else:
model_config_args = None
other_config = json.loads(_jsonnet.evaluate_file(model_config_file, tla_codes={'args': model_config_args}))
if args.mode == "preprocess":
preprocess_config = PreprocessConfig(model_config_file, \
model_config_args)
preprocess.main(preprocess_config)
elif args.mode == "train":
train_config = TrainConfig(model_config_file,
model_config_args, exp_config["logdir"])
train.main(train_config)
elif args.mode == "eval":
result = open(f"{exp_config['eval_output']}/eval-results.csv", "a", encoding='utf8')
result.write(f"checkpoint;type;easy;medium;hard;extra;all\n")
result.close()
first_loop = True
#File with gold queries from dev.json
gold = open(f"{exp_config['eval_output']}/gold.txt", "w", encoding='utf8')
print(f"Open file {other_config['data']['val']['paths'][0]}")
with open(f"{other_config['data']['val']['paths'][0]}", encoding='utf8') as json_data_file:
data = json.load(json_data_file)
length = len(data) #tive que fazer pelo tamanho porque o arquivo .json começa com [ em branco ]
for i in range(length):
gold.write(f"{data[i]['query']}\t{data[i]['db_id']}\n")
json_data_file.close()
gold.close()
for step in exp_config["eval_steps"]:
infer_output_path = "{}/{}-step{}".format( #infer_output_path = "{}/{}-step{}.infer".format(
exp_config["eval_output"],
exp_config["eval_name"],
step)
infer_config = InferConfig(
model_config_file,
model_config_args,
exp_config["logdir"],
exp_config["eval_section"],
exp_config["eval_beam_size"],
infer_output_path,
step,
use_heuristic=exp_config["eval_use_heuristic"]
)
infer.main(infer_config)
eval_output_path = "{}/{}-step{}.eval".format(
exp_config["eval_output"],
exp_config["eval_name"],
step)
eval_config = EvalConfig(
model_config_file,
model_config_args,
exp_config["logdir"],
exp_config["eval_section"],
f"{infer_output_path}.infer",
eval_output_path
)
eval.main(eval_config)
res_json = json.load(open(eval_output_path))
print(step, res_json['total_scores']['all']['exact'])
print(f"*;count;{res_json['total_scores']['easy']['count']};{res_json['total_scores']['medium']['count']};{res_json['total_scores']['hard']['count']};{res_json['total_scores']['extra']['count']};{res_json['total_scores']['all']['count']}")
print(f"checkpoint;type;easy;medium;hard;extra;all")
print(f"{step};exact match;{res_json['total_scores']['easy']['exact']:.3f};{res_json['total_scores']['medium']['exact']:.3f};{res_json['total_scores']['hard']['exact']:.3f};{res_json['total_scores']['extra']['exact']:.3f};{res_json['total_scores']['all']['exact']:.3f}")
#Open, write, close - to leave memory free
result = open(f"{exp_config['eval_output']}/eval-results.csv", "a", encoding='utf8')
if first_loop == True:
result.write(f"*;count;{res_json['total_scores']['easy']['count']};{res_json['total_scores']['medium']['count']};{res_json['total_scores']['hard']['count']};{res_json['total_scores']['extra']['count']};{res_json['total_scores']['all']['count']}\n")
first_loop = False
result.write(f"{step};exact match;{res_json['total_scores']['easy']['exact']:.3f};{res_json['total_scores']['medium']['exact']:.3f};{res_json['total_scores']['hard']['exact']:.3f};{res_json['total_scores']['extra']['exact']:.3f};{res_json['total_scores']['all']['exact']:.3f}\n")
result.close()
#Clean version of original .eval file
eval_clean = open(f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.csv", "w", encoding='utf8')
for per_item in res_json['per_item']:
if per_item['exact'] == 0 or per_item['exact'] == "false": exact = "false" #in original .eval file some appear as 0 others as "false"
if per_item['exact'] == 1 or per_item['exact'] == "true": exact = "true" #in original .eval fiel all appear as "true", but I did the same to be standard
eval_clean.write(f"{exact};{per_item['hardness']};{per_item['gold']};{per_item['predicted']}\n")
eval_clean.close()
if __name__ == "__main__":
main() |