Spaces:

mohdelgaar
/

LingConv

Running

upload lng

b028d48 8 months ago

1.14 kB

	#Language specific options for both training and document parsing
	lang_opts=

	#Options for parsing input documents (used by lexparser-lang.sh)
	parse_opts=

	tlp=edu.stanford.nlp.parser.lexparser

	if [ $lang == "Arabic" ]; then
	tlp="$tlp".ArabicTreebankParserParams
	lang_opts="-encoding UTF-8 -arabicFactored"

	parse_opts="-tokenized"

	elif [ $lang == "ArabicUTM" ]; then
	tlp="$tlp".ArabicUTMTreebankParserParams
	lang_opts="-encoding UTF-8 -arabicFactored"

	parse_opts="-tokenized"

	elif [ $lang == "English" ]; then
	tlp="$tlp".EnglishTreebankParserParams

	elif [ $lang == "German" ]; then
	tlp="$tlp".NegraPennTreebankParserParams
	lang_opts="-hMarkov 1 -vMarkov 2 -vSelSplitCutOff 300 -uwm 1 -unknownSuffixSize 2 -nodeCleanup 2"

	elif [ $lang == "Chinese" ]; then
	tlp="$tlp".ChineseTreebankParserParams
	lang_opts="-chineseFactored -encoding GB18030"

	parse_opts="-tokenized -sentences newline -escaper edu.stanford.nlp.trees.international.pennchinese.ChineseEscaper"

	elif [ $lang == "French" ]; then
	tlp="$tlp".FrenchTreebankParserParams
	lang_opts="-frenchFactored -encoding UTF-8"

	parse_opts="-tokenized"
	fi