Spaces:
Running
Running
# This script computes COMET metrics and also performs significance testing on the evaluation set | |
# where each subdirectory contains En-X pair | |
echo `date` | |
devtest_data_dir=$1 # path to the evaluation directory | |
model_name=${2-"Unbabel/wmt22-comet-da"} # name of the model checkpoint | |
# predefined list of languages supported by COMET | |
langs=(asm_Beng ben_Beng guj_Gujr hin_Deva kan_Knda mal_Mlym mar_Deva ory_Orya pan_Guru tam_Taml tel_Telu urd_Arab) | |
# we predefine a set of systems which we consider for evaluation | |
# feel free to change the below line in case you want to add or remove any system | |
system=(google azure nllb mbart50 m2m100 it1 it2) | |
# iterate over the list of predefined languages | |
for lang in "${langs[@]}"; do | |
mkdir -p "$devtest_data_dir/eng_Latn-$lang/comet" | |
# -------------------------------------------------------------- | |
# COMET score computation | |
# -------------------------------------------------------------- | |
# iterate over the list of predefined systems | |
for sys in "${system[@]}"; do | |
echo "${sys}" | |
# en - indic direction | |
if [ -f "$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys" ]; then | |
echo "eng_Latn-${lang}" | |
src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn | |
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.$sys | |
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang | |
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_${sys}_comet.txt | |
# Compute COMET scores using the `comet-score` | |
comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname | |
fi | |
# indic - en direction | |
if [ -f "$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys" ]; then | |
echo "${lang}-eng_Latn" | |
src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang | |
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.$sys | |
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn | |
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_${sys}_comet.txt | |
# Compute COMET scores using the `comet-score` | |
comet-score -s $src_fname -t $pred_fname -r $ref_fname --gpus 1 --model $model_name --quiet --only_system > $out_fname | |
fi | |
done | |
# -------------------------------------------------------------- | |
# COMET significance testing | |
# -------------------------------------------------------------- | |
# en - indic direction | |
src_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn | |
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang.pred.* | |
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang | |
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/eng_Latn_${lang}_comet_stat.txt | |
# Compute COMET significance scores using the `comet-compare` | |
comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname | |
# indic-en direction | |
src_fname=$devtest_data_dir/eng_Latn-$lang/test.$lang | |
pred_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn.pred.* | |
ref_fname=$devtest_data_dir/eng_Latn-$lang/test.eng_Latn | |
out_fname=$devtest_data_dir/eng_Latn-$lang/comet/${lang}_eng_Latn_comet_stat.txt | |
# Compute COMET significance scores using the `comet-compare` | |
comet-compare -s $src_fname -t $pred_fname -r $ref_fname > $out_fname | |
done | |