Spaces:

Josh98
/

nl2bash_m

Runtime error

App Files Files Community

nl2bash_m / nl2bash_m.py

Josh98

version 1 metric

40d109e over 2 years ago

raw

history blame

6.43 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Exact Match metric."""
	import re
	import string

	import datasets
	import numpy as np

	import evaluate


	_DESCRIPTION = """
	returns a score that indicates how close the bash command generated is to the actual command with a perfect score out of 1.0
	"""

	_KWARGS_DESCRIPTION = """
	Args:
	predictions: List of predicted texts.
	references: List of reference texts.
	regexes_to_ignore: List, defaults to None. Regex expressions of characters to
	ignore when calculating the exact matches. Note: these regexes are removed
	from the input data before the changes based on the options below (e.g. ignore_case,
	ignore_punctuation, ignore_numbers) are applied.
	ignore_case: Boolean, defaults to False. If true, turns everything
	to lowercase so that capitalization differences are ignored.
	ignore_punctuation: Boolean, defaults to False. If true, removes all punctuation before
	comparing predictions and references.
	ignore_numbers: Boolean, defaults to False. If true, removes all punctuation before
	comparing predictions and references.
	Returns:
	exact_match: Dictionary containing exact_match rate. Possible values are between 0.0 and 1.0, inclusive.
	Examples:
	>>> exact_match = evaluate.load("exact_match")
	>>> refs = ["the cat", "theater", "YELLING", "agent007"]
	>>> preds = ["cat?", "theater", "yelling", "agent"]
	>>> results = exact_match.compute(references=refs, predictions=preds)
	>>> print(round(results["exact_match"], 2))
	0.25
	>>> exact_match = evaluate.load("exact_match")
	>>> refs = ["the cat", "theater", "YELLING", "agent007"]
	>>> preds = ["cat?", "theater", "yelling", "agent"]
	>>> results = exact_match.compute(references=refs, predictions=preds, regexes_to_ignore=["the ", "yell"], ignore_case=True, ignore_punctuation=True)
	>>> print(round(results["exact_match"], 2))
	0.5
	>>> exact_match = evaluate.load("exact_match")
	>>> refs = ["the cat", "theater", "YELLING", "agent007"]
	>>> preds = ["cat?", "theater", "yelling", "agent"]
	>>> results = exact_match.compute(references=refs, predictions=preds, regexes_to_ignore=["the ", "yell", "YELL"], ignore_case=True, ignore_punctuation=True)
	>>> print(round(results["exact_match"], 2))
	0.75
	>>> exact_match = evaluate.load("exact_match")
	>>> refs = ["the cat", "theater", "YELLING", "agent007"]
	>>> preds = ["cat?", "theater", "yelling", "agent"]
	>>> results = exact_match.compute(references=refs, predictions=preds, regexes_to_ignore=["the ", "yell", "YELL"], ignore_case=True, ignore_punctuation=True, ignore_numbers=True)
	>>> print(round(results["exact_match"], 2))
	1.0
	>>> exact_match = evaluate.load("exact_match")
	>>> refs = ["The cat sat on the mat.", "Theaters are great.", "It's like comparing oranges and apples."]
	>>> preds = ["The cat sat on the mat?", "Theaters are great.", "It's like comparing apples and oranges."]
	>>> results = exact_match.compute(references=refs, predictions=preds)
	>>> print(round(results["exact_match"], 2))
	0.33
	"""

	_CITATION = """
	"""


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class nl2bash_m(evaluate.Metric):
	def _info(self):
	return evaluate.MetricInfo(
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	features=datasets.Features(
	{
	"predictions": datasets.Value("string", id="sequence"),
	"references": datasets.Value("string", id="sequence"),
	}
	),
	reference_urls=[],
	)

	def get_score(self, pred, ref):
	if not pred and not ref: return 1
	cor = 0
	for i in range(min(len(pred), len(ref))):
	if (pred[i] == ref[i]):
	cor += 1

	return cor/max(len(pred), len(ref))

	def _compute(
	self,
	predictions,
	references,
	cmd_weight = 0.65,
	opt_weight = 0.25,
	arg_weight = 0.15,
	ignore_case=False,
	ignore_numbers=False,
	):

	predictions = np.asarray(predictions)
	references = np.asarray(references)

	if ignore_case:
	predictions = np.char.lower(predictions)
	references = np.char.lower(references)

	if ignore_numbers:
	repl_table = string.digits.maketrans("", "", string.digits)
	predictions = np.char.translate(predictions, table=repl_table)
	references = np.char.translate(references, table=repl_table)


	final_score = 0

	for pred, ref in zip(predictions, references):
	print(pred, ref)
	pred_words, ref_words = pred[0].split(), ref[0].split()
	# Get the cmd of predicted and ref
	cmd_corr = 1 if pred_words.pop(0)==ref_words.pop(0) else 0

	# Get the option of predicted and ref
	pred_option = [ x for x in pred_words if x[0] == '-']
	ref_option = [ x for x in ref_words if x[0] == '-']

	# Get the arguments of predicted and ref
	pred_args = [ x for x in pred_words if x[0] != '-']
	ref_args = [ x for x in ref_words if x[0] != '-']

	# Calculate scores
	cmd_score = cmd_weight * cmd_corr
	opt_score = opt_weight * self.get_score(pred_option, ref_option)
	arg_score = arg_weight * self.get_score(pred_args, ref_args)

	score = cmd_score + opt_score + arg_score
	final_score += score
	print(score)

	final_score = final_score/len(self.preds)
	print("f_s: ", final_score)


	return {"nl2bash_m": (final_score)}