c4ai-usp
/

gap-text2sql

Model card Files Files and versions Community

gap-text2sql / gap-text2sql-main /mrat-sql-gap /run.py

antonlabate

ver 1.3

d758c99 over 2 years ago

6.24 kB

	#!/usr/bin/env python

	import _jsonnet
	import json
	import argparse
	import collections
	import attr
	from seq2struct.commands import preprocess, train, infer, eval
	import crash_on_ipy

	@attr.s
	class PreprocessConfig:
	config = attr.ib()
	config_args = attr.ib()

	@attr.s
	class TrainConfig:
	config = attr.ib()
	config_args = attr.ib()
	logdir = attr.ib()

	@attr.s
	class InferConfig:
	config = attr.ib()
	config_args = attr.ib()
	logdir = attr.ib()
	section = attr.ib()
	beam_size = attr.ib()
	output = attr.ib()
	step = attr.ib()
	use_heuristic = attr.ib(default=False)
	mode = attr.ib(default="infer")
	limit = attr.ib(default=None)
	output_history = attr.ib(default=False)

	@attr.s
	class EvalConfig:
	config = attr.ib()
	config_args = attr.ib()
	logdir = attr.ib()
	section = attr.ib()
	inferred = attr.ib()
	output = attr.ib()


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('mode', help="preprocess/train/eval")
	parser.add_argument('exp_config_file', help="jsonnet file for experiments")
	args = parser.parse_args()

	exp_config = json.loads(_jsonnet.evaluate_file(args.exp_config_file))
	model_config_file = exp_config["model_config"]
	if "model_config_args" in exp_config:
	model_config_args = json.dumps(exp_config["model_config_args"])
	else:
	model_config_args = None
	other_config = json.loads(_jsonnet.evaluate_file(model_config_file, tla_codes={'args': model_config_args}))

	if args.mode == "preprocess":
	preprocess_config = PreprocessConfig(model_config_file, \
	model_config_args)
	preprocess.main(preprocess_config)
	elif args.mode == "train":
	train_config = TrainConfig(model_config_file,
	model_config_args, exp_config["logdir"])
	train.main(train_config)
	elif args.mode == "eval":
	result = open(f"{exp_config['eval_output']}/eval-results.csv", "a", encoding='utf8')
	result.write(f"checkpoint;type;easy;medium;hard;extra;all\n")
	result.close()
	first_loop = True

	#File with gold queries from dev.json
	gold = open(f"{exp_config['eval_output']}/gold.txt", "w", encoding='utf8')
	print(f"Open file {other_config['data']['val']['paths'][0]}")
	with open(f"{other_config['data']['val']['paths'][0]}", encoding='utf8') as json_data_file:
	data = json.load(json_data_file)
	length = len(data) #tive que fazer pelo tamanho porque o arquivo .json começa com [ em branco ]
	for i in range(length):
	gold.write(f"{data[i]['query']}\t{data[i]['db_id']}\n")
	json_data_file.close()
	gold.close()

	for step in exp_config["eval_steps"]:
	infer_output_path = "{}/{}-step{}".format( #infer_output_path = "{}/{}-step{}.infer".format(
	exp_config["eval_output"],
	exp_config["eval_name"],
	step)
	infer_config = InferConfig(
	model_config_file,
	model_config_args,
	exp_config["logdir"],
	exp_config["eval_section"],
	exp_config["eval_beam_size"],
	infer_output_path,
	step,
	use_heuristic=exp_config["eval_use_heuristic"]
	)
	infer.main(infer_config)

	eval_output_path = "{}/{}-step{}.eval".format(
	exp_config["eval_output"],
	exp_config["eval_name"],
	step)
	eval_config = EvalConfig(
	model_config_file,
	model_config_args,
	exp_config["logdir"],
	exp_config["eval_section"],
	f"{infer_output_path}.infer",
	eval_output_path
	)
	eval.main(eval_config)

	res_json = json.load(open(eval_output_path))
	print(step, res_json['total_scores']['all']['exact'])
	print(f"*;count;{res_json['total_scores']['easy']['count']};{res_json['total_scores']['medium']['count']};{res_json['total_scores']['hard']['count']};{res_json['total_scores']['extra']['count']};{res_json['total_scores']['all']['count']}")
	print(f"checkpoint;type;easy;medium;hard;extra;all")
	print(f"{step};exact match;{res_json['total_scores']['easy']['exact']:.3f};{res_json['total_scores']['medium']['exact']:.3f};{res_json['total_scores']['hard']['exact']:.3f};{res_json['total_scores']['extra']['exact']:.3f};{res_json['total_scores']['all']['exact']:.3f}")

	#Open, write, close - to leave memory free
	result = open(f"{exp_config['eval_output']}/eval-results.csv", "a", encoding='utf8')
	if first_loop == True:
	result.write(f"*;count;{res_json['total_scores']['easy']['count']};{res_json['total_scores']['medium']['count']};{res_json['total_scores']['hard']['count']};{res_json['total_scores']['extra']['count']};{res_json['total_scores']['all']['count']}\n")
	first_loop = False
	result.write(f"{step};exact match;{res_json['total_scores']['easy']['exact']:.3f};{res_json['total_scores']['medium']['exact']:.3f};{res_json['total_scores']['hard']['exact']:.3f};{res_json['total_scores']['extra']['exact']:.3f};{res_json['total_scores']['all']['exact']:.3f}\n")
	result.close()

	#Clean version of original .eval file
	eval_clean = open(f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.csv", "w", encoding='utf8')
	for per_item in res_json['per_item']:
	if per_item['exact'] == 0 or per_item['exact'] == "false": exact = "false" #in original .eval file some appear as 0 others as "false"
	if per_item['exact'] == 1 or per_item['exact'] == "true": exact = "true" #in original .eval fiel all appear as "true", but I did the same to be standard
	eval_clean.write(f"{exact};{per_item['hardness']};{per_item['gold']};{per_item['predicted']}\n")
	eval_clean.close()

	if __name__ == "__main__":
	main()