Spaces:
Running
Running
# This script normalizes the punctuations and strips the extra spaces in the input text | |
# Directly sourced from https://github.com/pluiez/NLLB-inference | |
set -euo pipefail | |
root=$(dirname $0) | |
lang_map_path=$root/utils.map_token_lang.tsv | |
usage () { | |
echo "usage: $0 lang" >&2 | |
exit 1 | |
} | |
[ $# -eq 1 ] || usage | |
lang=$1 | |
declare -A lang_map | |
while read line; do | |
key=$(cut -f1 <<< "$line") | |
val=$(cut -f2 <<< "$line") | |
lang_map[$key]=$val | |
done < $lang_map_path | |
if [ -v "lang_map[$lang]" ]; then | |
lang=${lang_map[$lang]} | |
elif [ -v "lang_map[${lang:0:3}]" ]; then | |
lang=${lang_map[${lang:0:3}]} | |
else | |
echo "undefined mapping: ${lang}, falling back to: en" >&2 | |
lang=en | |
fi | |
perl $root/normalize-punctuation.perl $lang | |