c4ai-usp
/

gap-text2sql

Model card Files Files and versions Community

File size: 6,242 Bytes

d758c99

#!/usr/bin/env python

import _jsonnet
import json
import argparse
import collections
import attr
from seq2struct.commands import preprocess, train, infer, eval
import crash_on_ipy

@attr.s
class PreprocessConfig:
    config = attr.ib()
    config_args = attr.ib()

@attr.s
class TrainConfig:
    config = attr.ib()
    config_args = attr.ib()
    logdir = attr.ib()

@attr.s
class InferConfig:
    config = attr.ib()
    config_args = attr.ib()
    logdir = attr.ib()
    section = attr.ib()
    beam_size = attr.ib()
    output = attr.ib()
    step = attr.ib()
    use_heuristic = attr.ib(default=False)
    mode = attr.ib(default="infer")
    limit = attr.ib(default=None)
    output_history = attr.ib(default=False)

@attr.s
class EvalConfig:
    config = attr.ib()
    config_args = attr.ib()
    logdir = attr.ib()
    section = attr.ib()
    inferred = attr.ib()
    output = attr.ib()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('mode', help="preprocess/train/eval")
    parser.add_argument('exp_config_file', help="jsonnet file for experiments")
    args = parser.parse_args()
        
    exp_config = json.loads(_jsonnet.evaluate_file(args.exp_config_file))
    model_config_file = exp_config["model_config"]
    if "model_config_args" in exp_config:
        model_config_args = json.dumps(exp_config["model_config_args"])
    else:
        model_config_args = None
    other_config = json.loads(_jsonnet.evaluate_file(model_config_file, tla_codes={'args': model_config_args}))
    
    if args.mode == "preprocess":
        preprocess_config = PreprocessConfig(model_config_file, \
            model_config_args)
        preprocess.main(preprocess_config)
    elif args.mode == "train":
        train_config = TrainConfig(model_config_file, 
            model_config_args, exp_config["logdir"]) 
        train.main(train_config)
    elif args.mode == "eval":
        result = open(f"{exp_config['eval_output']}/eval-results.csv", "a", encoding='utf8')
        result.write(f"checkpoint;type;easy;medium;hard;extra;all\n") 
        result.close()
        first_loop = True

        #File with gold queries from dev.json
        gold = open(f"{exp_config['eval_output']}/gold.txt", "w", encoding='utf8')
        print(f"Open file {other_config['data']['val']['paths'][0]}")
        with open(f"{other_config['data']['val']['paths'][0]}", encoding='utf8') as json_data_file:
            data = json.load(json_data_file)
            length = len(data) #tive que fazer pelo tamanho porque o arquivo .json começa com [ em branco ]    
            for i in range(length):
                gold.write(f"{data[i]['query']}\t{data[i]['db_id']}\n")
        json_data_file.close()
        gold.close()        
      
        for step in exp_config["eval_steps"]:
            infer_output_path = "{}/{}-step{}".format( #infer_output_path = "{}/{}-step{}.infer".format(
                exp_config["eval_output"], 
                exp_config["eval_name"], 
                step)
            infer_config = InferConfig(
                model_config_file,
                model_config_args,
                exp_config["logdir"],
                exp_config["eval_section"],
                exp_config["eval_beam_size"],
                infer_output_path,
                step,
                use_heuristic=exp_config["eval_use_heuristic"]
            )
            infer.main(infer_config)

            eval_output_path = "{}/{}-step{}.eval".format(
                exp_config["eval_output"], 
                exp_config["eval_name"], 
                step)
            eval_config = EvalConfig(
                model_config_file,
                model_config_args,
                exp_config["logdir"],
                exp_config["eval_section"],
                f"{infer_output_path}.infer",
                eval_output_path
            )
            eval.main(eval_config)

            res_json = json.load(open(eval_output_path))
            print(step, res_json['total_scores']['all']['exact'])
            print(f"*;count;{res_json['total_scores']['easy']['count']};{res_json['total_scores']['medium']['count']};{res_json['total_scores']['hard']['count']};{res_json['total_scores']['extra']['count']};{res_json['total_scores']['all']['count']}") 
            print(f"checkpoint;type;easy;medium;hard;extra;all") 
            print(f"{step};exact match;{res_json['total_scores']['easy']['exact']:.3f};{res_json['total_scores']['medium']['exact']:.3f};{res_json['total_scores']['hard']['exact']:.3f};{res_json['total_scores']['extra']['exact']:.3f};{res_json['total_scores']['all']['exact']:.3f}") 
            
            #Open, write, close - to leave memory free
            result = open(f"{exp_config['eval_output']}/eval-results.csv", "a", encoding='utf8')            
            if first_loop == True: 
                result.write(f"*;count;{res_json['total_scores']['easy']['count']};{res_json['total_scores']['medium']['count']};{res_json['total_scores']['hard']['count']};{res_json['total_scores']['extra']['count']};{res_json['total_scores']['all']['count']}\n") 
            first_loop = False   
            result.write(f"{step};exact match;{res_json['total_scores']['easy']['exact']:.3f};{res_json['total_scores']['medium']['exact']:.3f};{res_json['total_scores']['hard']['exact']:.3f};{res_json['total_scores']['extra']['exact']:.3f};{res_json['total_scores']['all']['exact']:.3f}\n") 
            result.close()
            
            #Clean version of original .eval file
            eval_clean = open(f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.csv", "w", encoding='utf8')
            for per_item in res_json['per_item']:  
                if per_item['exact'] == 0 or per_item['exact'] == "false": exact = "false" #in original .eval file some appear as 0 others as "false"
                if per_item['exact'] == 1 or per_item['exact'] == "true": exact = "true" #in original .eval fiel all appear as "true", but I did the same to be standard 
                eval_clean.write(f"{exact};{per_item['hardness']};{per_item['gold']};{per_item['predicted']}\n")                                   
            eval_clean.close() 

if __name__ == "__main__":
    main()