Spaces:

jiangjiechen
/

loren-fact-checking

Build error

App Files Files Community

loren-fact-checking / src /mrc_client /seq2seq /run_eval_search.py

jiangjiechen

init loren for spaces

7f7285f almost 3 years ago

raw

history blame

5.34 kB

	#!/usr/bin/env python

	import argparse
	import itertools
	import operator
	import sys
	from collections import OrderedDict

	from run_eval import datetime_now, run_generate
	from seq2seq_utils import ROUGE_KEYS


	# A table of supported tasks and the list of scores in the order of importance to be sorted by.
	# To add a new task, simply list the score names that `run_eval.run_generate()` returns
	task_score_names = {
	"translation": ["bleu"],
	"summarization": ROUGE_KEYS,
	}


	def parse_search_arg(search):
	groups = search.split()
	entries = {k: vs for k, vs in (g.split("=") for g in groups)}
	entry_names = list(entries.keys())
	sets = [list((f"--{k} {v}") for v in vs.split(":")) for k, vs in entries.items()]
	matrix = [list(x) for x in itertools.product(*sets)]
	return matrix, entry_names


	def run_search():
	"""
	Run parametric search over the desired hparam space with help of ``run_eval.py``.

	All the arguments except ``--search`` are passed to ``run_eval.py`` as is. The values inside of "--search" are parsed, reformatted and fed to ``run_eval.py`` as additional args.

	The format for the ``--search`` value is a simple string with hparams and colon separated values to try, e.g.:
	```
	--search "num_beams=5:10 length_penalty=0.8:1.0:1.2 early_stopping=true:false"
	```
	which will generate ``12`` ``(232)`` searches for a product of each hparam. For example the example that was just used will invoke ``run_eval.py`` repeatedly with:

	```
	--num_beams 5 --length_penalty 0.8 --early_stopping true
	--num_beams 5 --length_penalty 0.8 --early_stopping false
	[...]
	--num_beams 10 --length_penalty 1.2 --early_stopping false
	```

	On completion, this function prints a markdown table of the results sorted by the best BLEU score and the winning arguments.


	"""
	prog = sys.argv[0]

	parser = argparse.ArgumentParser(
	usage="\n\nImportant: this script accepts all arguments `run_eval.py` accepts and then a few extra, therefore refer to `run_eval.py -h` for the complete list."
	)
	parser.add_argument(
	"--search",
	type=str,
	required=False,
	help='param space to search, e.g. "num_beams=5:10 length_penalty=0.8:1.0:1.2"',
	)
	parser.add_argument(
	"--bs", type=int, default=8, required=False, help="initial batch size (may get reduced if it's too big)"
	)
	parser.add_argument("--task", type=str, help="used for task_specific_params + metrics")
	parser.add_argument(
	"--info",
	nargs="?",
	type=str,
	const=datetime_now(),
	help="add custom notes to be printed before the results table. If no value is passed, the current datetime string will be used.",
	)
	args, args_main = parser.parse_known_args()
	# we share some of the args
	args_main.extend(["--task", args.task])
	args_normal = [prog] + args_main

	# to support variations like translation_en_to_de"
	task = "translation" if "translation" in args.task else "summarization"

	matrix, col_names = parse_search_arg(args.search)
	col_names[0:0] = task_score_names[task] # score cols first
	col_widths = {col: len(str(col)) for col in col_names}
	results = []
	for r in matrix:
	hparams = {k: v for k, v in (x.replace("--", "").split() for x in r)}
	args_exp = " ".join(r).split()
	args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
	sys.argv = args_normal + args_exp

	# XXX: need to trap CUDA OOM and lower args.bs if that happens and retry

	scores = run_generate(verbose=False)
	# make sure scores are first in the table
	result = OrderedDict()
	for score in task_score_names[task]:
	result[score] = scores[score]
	result.update(hparams)
	results.append(result)

	# find widest entries
	for k, v in result.items():
	l = len(str(v))
	if l > col_widths[k]:
	col_widths[k] = l

	results_sorted = sorted(results, key=operator.itemgetter(*task_score_names[task]), reverse=True)
	print(" \| ".join([f"{col:{col_widths[col]}}" for col in col_names]))
	print(" \| ".join([f"{'-'*col_widths[col]}" for col in col_names]))
	for row in results_sorted:
	print(" \| ".join([f"{row[col]:{col_widths[col]}}" for col in col_names]))

	best = results_sorted[0]
	for score in task_score_names[task]:
	del best[score]
	best_args = [f"--{k} {v}" for k, v in best.items()]
	dyn_args = ["--bs", str(args.bs)]
	if args.info:
	print(f"\nInfo: {args.info}")
	print("\nBest score args:")
	print(" ".join(args_main + best_args + dyn_args))

	return results_sorted


	if __name__ == "__main__":
	# Usage:
	# [normal-run_eval_search.py cmd plus] \
	# --search="num_beams=1:5:10 length_penalty=0.8:1:1.2 early_stopping=true:false"
	#
	# Example:
	# PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_NAME \
	# $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target \
	# --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation \
	# --search="num_beams=1:5:10 length_penalty=0.8:1:1.2 early_stopping=true:false"
	run_search()