nl2bash_m / nl2bash_m.py
Josh98's picture
revert back to nl2bash
d488842
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""nl2bash metric."""
import re
import string
import datasets
import numpy as np
import evaluate
_DESCRIPTION = """
returns a score that indicates how close the bash command generated is to the actual command with a perfect score out of 1.0
"""
_KWARGS_DESCRIPTION = """
Args:
predictions: List of predicted texts.
references: List of reference texts.
cmd_weight: The weight you want to put on getting the command correct
opt_weight: The weight you want to put on getting the option correct
arg_weight: The weight you want to put on getting the arg correct
ignore_case=False,
ignore_numbers=False,
Returns:
nl2bash metric: Dictionary containing nl2bash score. Possible values are between 0.0 and 1.0, inclusive.
Examples:
>>> metric = evaluate.load("Josh98/nl2bash_m")
>>> preds = ["ls -l /home/userr", "ls -l /home/josh", "lss /home/josh some argument"]
>>> refs = [["ls -l /home/user"], ["ls -l --v /home/josh"], ["ls /home/josh"]]
>>> results = exact_match.compute(references=refs, predictions=preds)
>>> print(round(results["nl2bash"], 2))
0.708
"""
_CITATION = """
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class nl2bash_m(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=[
datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
}
),
datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Value("string", id="sequence"),
}
),
],
reference_urls=[],
)
def get_score(self, pred, ref):
if not pred and not ref: return 1
cor = 0
for i in range(min(len(pred), len(ref))):
if (pred[i] == ref[i]):
cor += 1
return cor/max(len(pred), len(ref))
def _compute(
self,
predictions,
references,
cmd_weight = 0.65,
opt_weight = 0.25,
arg_weight = 0.15,
ignore_case=True,
ignore_numbers=True,
):
predictions = np.asarray(predictions)
references = np.asarray(references)
if ignore_case:
predictions = np.char.lower(predictions)
references = np.char.lower(references)
if ignore_numbers:
repl_table = string.digits.maketrans("", "", string.digits)
predictions = np.char.translate(predictions, table=repl_table)
references = np.char.translate(references, table=repl_table)
final_score = 0
for pred, refs in zip(predictions, references):
best_score = 0
if len(pred) == 0 and min([len(ref) for ref in refs]) == 0:
best_score = 1
elif len(pred) == 0 or min([len(ref) for ref in refs]) == 0:
best_score = 0
else:
for ref in refs:
pred_words, ref_words = pred.split(), ref.split()
# Get the cmd of predicted and ref
cmd_corr = 1 if pred_words.pop(0)==ref_words.pop(0) else 0
# Get the option of predicted and ref
pred_option = [ x for x in pred_words if x[0] == '-']
ref_option = [ x for x in ref_words if x[0] == '-']
# Get the arguments of predicted and ref
pred_args = [ x for x in pred_words if x[0] != '-']
ref_args = [ x for x in ref_words if x[0] != '-']
# Calculate scores
cmd_score = cmd_weight * cmd_corr
opt_score = opt_weight * self.get_score(pred_option, ref_option)
arg_score = arg_weight * self.get_score(pred_args, ref_args)
score = cmd_score + opt_score + arg_score
best_score = max(best_score, score)
final_score += best_score
final_score = final_score/len(predictions)
return {"nl2bash_m": (final_score)}