FUXI
/

yuyan-10b

Model card Files Files and versions Community

yuyan-10b / tasks /msdp /evaluate.py

Shawn001's picture

Upload 53 files

c2c125c over 1 year ago

1.84 kB

	# coding=utf-8
	# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Model evaluation"""

	from megatron import get_args
	from megatron import print_rank_0
	from tasks.msdp.metrics import F1Metric
	from tqdm import tqdm


	def evaluate_f1(guess_file, answer_file):
	"""Evaluating F1 Score"""

	guess_list = []
	print_rank_0('reading %s' % guess_file)
	with open(guess_file, "r") as f:
	for i, line in enumerate(tqdm(f)):
	line = line.strip()
	if "<\|endoftext\|>" in line:
	line = line.replace("<\|endoftext\|>", "")
	guess_list.append(line)

	answer_list = []
	print_rank_0('reading %s' % answer_file)
	with open(answer_file, "r") as f:
	for i, line in enumerate(tqdm(f)):
	line = line.strip()
	if line == "no_passages_used":
	line = ""
	answer_list.append(line)

	assert len(guess_list) == len(answer_list), \
	"lengths of guess and answer are different!"

	precision, recall, f1 = F1Metric.compute_all_pairs(guess_list, answer_list)
	print_rank_0('Precision: %.4f; recall: %.4f; f1: %.4f' % (precision, recall, f1))

	print_rank_0('done :-)')


	def main():
	args = get_args()

	evaluate_f1(args.guess_file, args.answer_file)