Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- language_tool_python/__main__.py +178 -0
- language_tool_python/config_file.py +96 -0
- language_tool_python/console_mode.py +63 -0
- language_tool_python/download_lt.py +180 -0
- language_tool_python/language_tag.py +38 -0
- language_tool_python/match.py +119 -0
- language_tool_python/server.py +399 -0
- language_tool_python/utils.py +179 -0
- language_tool_python/which.py +75 -0
language_tool_python/__main__.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""LanguageTool command line."""
|
2 |
+
|
3 |
+
import argparse
|
4 |
+
import locale
|
5 |
+
import re
|
6 |
+
import sys
|
7 |
+
|
8 |
+
from .server import LanguageTool
|
9 |
+
from .utils import LanguageToolError
|
10 |
+
|
11 |
+
import pkg_resources
|
12 |
+
__version__ = pkg_resources.require("language_tool_python")[0].version
|
13 |
+
|
14 |
+
|
15 |
+
def parse_args():
|
16 |
+
parser = argparse.ArgumentParser(
|
17 |
+
description=__doc__.strip() if __doc__ else None,
|
18 |
+
prog='language_tool_python')
|
19 |
+
parser.add_argument('files', nargs='+',
|
20 |
+
help='plain text file or "-" for stdin')
|
21 |
+
parser.add_argument('-c', '--encoding',
|
22 |
+
help='input encoding')
|
23 |
+
parser.add_argument('-l', '--language', metavar='CODE',
|
24 |
+
help='language code of the input or "auto"')
|
25 |
+
parser.add_argument('-m', '--mother-tongue', metavar='CODE',
|
26 |
+
help='language code of your first language')
|
27 |
+
parser.add_argument('-d', '--disable', metavar='RULES', type=get_rules,
|
28 |
+
action=RulesAction, default=set(),
|
29 |
+
help='list of rule IDs to be disabled')
|
30 |
+
parser.add_argument('-e', '--enable', metavar='RULES', type=get_rules,
|
31 |
+
action=RulesAction, default=set(),
|
32 |
+
help='list of rule IDs to be enabled')
|
33 |
+
parser.add_argument('--enabled-only', action='store_true',
|
34 |
+
help='disable all rules except those specified in '
|
35 |
+
'--enable')
|
36 |
+
parser.add_argument(
|
37 |
+
'--version', action='version',
|
38 |
+
version='%(prog)s {}'.format(__version__),
|
39 |
+
help='show version')
|
40 |
+
parser.add_argument('-a', '--apply', action='store_true',
|
41 |
+
help='automatically apply suggestions if available')
|
42 |
+
parser.add_argument('-s', '--spell-check-off', dest='spell_check',
|
43 |
+
action='store_false',
|
44 |
+
help='disable spell-checking rules')
|
45 |
+
parser.add_argument('--ignore-lines',
|
46 |
+
help='ignore lines that match this regular expression')
|
47 |
+
parser.add_argument('--remote-host',
|
48 |
+
help='hostname of the remote LanguageTool server')
|
49 |
+
parser.add_argument('--remote-port',
|
50 |
+
help='port of the remote LanguageTool server')
|
51 |
+
|
52 |
+
args = parser.parse_args()
|
53 |
+
|
54 |
+
if args.enabled_only:
|
55 |
+
if args.disable:
|
56 |
+
parser.error('--enabled-only cannot be used with --disable')
|
57 |
+
|
58 |
+
if not args.enable:
|
59 |
+
parser.error('--enabled-only requires --enable')
|
60 |
+
|
61 |
+
return args
|
62 |
+
|
63 |
+
|
64 |
+
class RulesAction(argparse.Action):
|
65 |
+
def __call__(self, parser, namespace, values, option_string=None):
|
66 |
+
getattr(namespace, self.dest).update(values)
|
67 |
+
|
68 |
+
|
69 |
+
def get_rules(rules: str) -> set:
|
70 |
+
return {rule.upper() for rule in re.findall(r"[\w\-]+", rules)}
|
71 |
+
|
72 |
+
|
73 |
+
def get_text(filename, encoding, ignore):
|
74 |
+
with open(filename, encoding=encoding) as f:
|
75 |
+
text = ''.join('\n' if (ignore and re.match(ignore, line)) else line
|
76 |
+
for line in f.readlines())
|
77 |
+
return text
|
78 |
+
|
79 |
+
|
80 |
+
def print_unicode(text):
|
81 |
+
"""Print in a portable manner."""
|
82 |
+
if sys.version_info[0] < 3:
|
83 |
+
text = text.encode('utf-8')
|
84 |
+
|
85 |
+
print(text)
|
86 |
+
|
87 |
+
|
88 |
+
def main():
|
89 |
+
args = parse_args()
|
90 |
+
|
91 |
+
status = 0
|
92 |
+
|
93 |
+
for filename in args.files:
|
94 |
+
if len(args.files) > 1:
|
95 |
+
print(filename, file=sys.stderr)
|
96 |
+
|
97 |
+
if filename == '-':
|
98 |
+
filename = sys.stdin.fileno()
|
99 |
+
encoding = args.encoding or (
|
100 |
+
sys.stdin.encoding if sys.stdin.isatty()
|
101 |
+
else locale.getpreferredencoding()
|
102 |
+
)
|
103 |
+
else:
|
104 |
+
encoding = args.encoding or 'utf-8'
|
105 |
+
|
106 |
+
remote_server = None
|
107 |
+
if args.remote_host is not None:
|
108 |
+
remote_server = args.remote_host
|
109 |
+
if args.remote_port is not None:
|
110 |
+
remote_server += ':{}'.format(args.remote_port)
|
111 |
+
lang_tool = LanguageTool(
|
112 |
+
motherTongue=args.mother_tongue,
|
113 |
+
remote_server=remote_server,
|
114 |
+
)
|
115 |
+
guess_language = None
|
116 |
+
|
117 |
+
try:
|
118 |
+
text = get_text(filename, encoding, ignore=args.ignore_lines)
|
119 |
+
except UnicodeError as exception:
|
120 |
+
print('{}: {}'.format(filename, exception), file=sys.stderr)
|
121 |
+
continue
|
122 |
+
|
123 |
+
if args.language:
|
124 |
+
if args.language.lower() == 'auto':
|
125 |
+
try:
|
126 |
+
from guess_language import guess_language
|
127 |
+
except ImportError:
|
128 |
+
print('guess_language is unavailable.', file=sys.stderr)
|
129 |
+
return 1
|
130 |
+
else:
|
131 |
+
language = guess_language(text)
|
132 |
+
print('Detected language: {}'.format(language),
|
133 |
+
file=sys.stderr)
|
134 |
+
if not language:
|
135 |
+
return 1
|
136 |
+
lang_tool.language = language
|
137 |
+
else:
|
138 |
+
lang_tool.language = args.language
|
139 |
+
|
140 |
+
if not args.spell_check:
|
141 |
+
lang_tool.disable_spellchecking()
|
142 |
+
|
143 |
+
lang_tool.disabled_rules.update(args.disable)
|
144 |
+
lang_tool.enabled_rules.update(args.enable)
|
145 |
+
lang_tool.enabled_rules_only = args.enabled_only
|
146 |
+
|
147 |
+
try:
|
148 |
+
if args.apply:
|
149 |
+
print_unicode(lang_tool.correct(text))
|
150 |
+
else:
|
151 |
+
for match in lang_tool.check(text):
|
152 |
+
rule_id = match.ruleId
|
153 |
+
|
154 |
+
replacement_text = ', '.join(
|
155 |
+
"'{}'".format(word)
|
156 |
+
for word in match.replacements).strip()
|
157 |
+
|
158 |
+
message = match.message
|
159 |
+
|
160 |
+
# Messages that end with punctuation already include the
|
161 |
+
# suggestion.
|
162 |
+
if replacement_text and not message.endswith(('.', '?')):
|
163 |
+
message += '; suggestions: ' + replacement_text
|
164 |
+
|
165 |
+
print_unicode('{}: {}: {}'.format(
|
166 |
+
filename,
|
167 |
+
rule_id,
|
168 |
+
message))
|
169 |
+
|
170 |
+
status = 2
|
171 |
+
except LanguageToolError as exception:
|
172 |
+
print('{}: {}'.format(filename, exception), file=sys.stderr)
|
173 |
+
continue
|
174 |
+
|
175 |
+
return status
|
176 |
+
|
177 |
+
|
178 |
+
sys.exit(main())
|
language_tool_python/config_file.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict
|
2 |
+
|
3 |
+
import atexit
|
4 |
+
import os
|
5 |
+
import tempfile
|
6 |
+
|
7 |
+
ALLOWED_CONFIG_KEYS = {
|
8 |
+
'maxTextLength', 'maxTextHardLength', 'secretTokenKey', 'maxCheckTimeMillis', 'maxErrorsPerWordRate',
|
9 |
+
'maxSpellingSuggestions', 'maxCheckThreads', 'cacheSize', 'cacheTTLSeconds', 'cacheSize', 'requestLimit',
|
10 |
+
'requestLimitInBytes', 'timeoutRequestLimit', 'requestLimitPeriodInSeconds', 'languageModel',
|
11 |
+
'word2vecModel', 'fasttextModel', 'fasttextBinary', 'maxWorkQueueSize', 'rulesFile', 'warmUp',
|
12 |
+
'blockedReferrers' 'premiumOnly', 'disabledRuleIds', 'pipelineCaching', 'maxPipelinePoolSize',
|
13 |
+
'pipelineCaching', 'pipelineExpireTimeInSeconds', 'pipelinePrewarming'
|
14 |
+
}
|
15 |
+
class LanguageToolConfig:
|
16 |
+
config: Dict[str, Any]
|
17 |
+
path: str
|
18 |
+
def __init__(self, config: Dict[str, Any]):
|
19 |
+
assert set(config.keys()) <= ALLOWED_CONFIG_KEYS, f"unexpected keys in config: {set(config.keys()) - ALLOWED_CONFIG_KEYS}"
|
20 |
+
assert len(config), "config cannot be empty"
|
21 |
+
self.config = config
|
22 |
+
self.path = self._create_temp_file()
|
23 |
+
|
24 |
+
def _create_temp_file(self) -> str:
|
25 |
+
tmp_file = tempfile.NamedTemporaryFile(delete=False)
|
26 |
+
|
27 |
+
# WRite key=value entries as lines in temporary file.
|
28 |
+
for key, value in self.config.items():
|
29 |
+
next_line = f'{key}={value}\n'
|
30 |
+
tmp_file.write(next_line.encode())
|
31 |
+
tmp_file.close()
|
32 |
+
|
33 |
+
# Remove file when program exits.
|
34 |
+
atexit.register(lambda: os.unlink(tmp_file.name))
|
35 |
+
|
36 |
+
return tmp_file.name
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
"""
|
41 |
+
❯ /usr/bin/java -cp /Users/johnmorris/.cache/language_tool_python/LanguageTool-5.6/languagetool-server.jar org.languagetool.server.HTTPServer --help
|
42 |
+
Usage: HTTPServer [--config propertyFile] [--port|-p port] [--public]
|
43 |
+
--config FILE a Java property file (one key=value entry per line) with values for:
|
44 |
+
'maxTextLength' - maximum text length, longer texts will cause an error (optional)
|
45 |
+
'maxTextHardLength' - maximum text length, applies even to users with a special secret 'token' parameter (optional)
|
46 |
+
'secretTokenKey' - secret JWT token key, if set by user and valid, maxTextLength can be increased by the user (optional)
|
47 |
+
'maxCheckTimeMillis' - maximum time in milliseconds allowed per check (optional)
|
48 |
+
'maxErrorsPerWordRate' - checking will stop with error if there are more rules matches per word (optional)
|
49 |
+
'maxSpellingSuggestions' - only this many spelling errors will have suggestions for performance reasons (optional,
|
50 |
+
affects Hunspell-based languages only)
|
51 |
+
'maxCheckThreads' - maximum number of threads working in parallel (optional)
|
52 |
+
'cacheSize' - size of internal cache in number of sentences (optional, default: 0)
|
53 |
+
'cacheTTLSeconds' - how many seconds sentences are kept in cache (optional, default: 300 if 'cacheSize' is set)
|
54 |
+
'requestLimit' - maximum number of requests per requestLimitPeriodInSeconds (optional)
|
55 |
+
'requestLimitInBytes' - maximum aggregated size of requests per requestLimitPeriodInSeconds (optional)
|
56 |
+
'timeoutRequestLimit' - maximum number of timeout request (optional)
|
57 |
+
'requestLimitPeriodInSeconds' - time period to which requestLimit and timeoutRequestLimit applies (optional)
|
58 |
+
'languageModel' - a directory with '1grams', '2grams', '3grams' sub directories which contain a Lucene index
|
59 |
+
each with ngram occurrence counts; activates the confusion rule if supported (optional)
|
60 |
+
'word2vecModel' - a directory with word2vec data (optional), see
|
61 |
+
https://github.com/languagetool-org/languagetool/blob/master/languagetool-standalone/CHANGES.md#word2vec
|
62 |
+
'fasttextModel' - a model file for better language detection (optional), see
|
63 |
+
https://fasttext.cc/docs/en/language-identification.html
|
64 |
+
'fasttextBinary' - compiled fasttext executable for language detection (optional), see
|
65 |
+
https://fasttext.cc/docs/en/support.html
|
66 |
+
'maxWorkQueueSize' - reject request if request queue gets larger than this (optional)
|
67 |
+
'rulesFile' - a file containing rules configuration, such as .langugagetool.cfg (optional)
|
68 |
+
'warmUp' - set to 'true' to warm up server at start, i.e. run a short check with all languages (optional)
|
69 |
+
'blockedReferrers' - a comma-separated list of HTTP referrers (and 'Origin' headers) that are blocked and will not be served (optional)
|
70 |
+
'premiumOnly' - activate only the premium rules (optional)
|
71 |
+
'disabledRuleIds' - a comma-separated list of rule ids that are turned off for this server (optional)
|
72 |
+
'pipelineCaching' - set to 'true' to enable caching of internal pipelines to improve performance
|
73 |
+
'maxPipelinePoolSize' - cache size if 'pipelineCaching' is set
|
74 |
+
'pipelineExpireTimeInSeconds' - time after which pipeline cache items expire
|
75 |
+
'pipelinePrewarming' - set to 'true' to fill pipeline cache on start (can slow down start a lot)
|
76 |
+
Spellcheck-only languages: You can add simple spellcheck-only support for languages that LT doesn't
|
77 |
+
support by defining two optional properties:
|
78 |
+
'lang-xx' - set name of the language, use language code instead of 'xx', e.g. lang-tr=Turkish
|
79 |
+
'lang-xx-dictPath' - absolute path to the hunspell .dic file, use language code instead of 'xx', e.g.
|
80 |
+
lang-tr-dictPath=/path/to/tr.dic. Note that the same directory also needs to
|
81 |
+
contain a common_words.txt file with the most common 10,000 words (used for better language detection)
|
82 |
+
--port, -p PRT port to bind to, defaults to 8081 if not specified
|
83 |
+
--public allow this server process to be connected from anywhere; if not set,
|
84 |
+
it can only be connected from the computer it was started on
|
85 |
+
--allow-origin [ORIGIN] set the Access-Control-Allow-Origin header in the HTTP response,
|
86 |
+
used for direct (non-proxy) JavaScript-based access from browsers.
|
87 |
+
Example: --allow-origin "https://my-website.org"
|
88 |
+
Don't set a parameter for `*`, i.e. access from all websites.
|
89 |
+
--verbose, -v in case of exceptions, log the input text (up to 500 characters)
|
90 |
+
--languageModel a directory with '1grams', '2grams', '3grams' sub directories (per language)
|
91 |
+
which contain a Lucene index (optional, overwrites 'languageModel'
|
92 |
+
parameter in properties files)
|
93 |
+
--word2vecModel a directory with word2vec data (optional), see
|
94 |
+
https://github.com/languagetool-org/languagetool/blob/master/languagetool-standalone/CHANGES.md#word2vec
|
95 |
+
--premiumAlways activate the premium rules even when user has no username/password - useful for API servers
|
96 |
+
"""
|
language_tool_python/console_mode.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Write to stdout without causing UnicodeEncodeError."""
|
3 |
+
|
4 |
+
import sys
|
5 |
+
|
6 |
+
|
7 |
+
if (getattr(sys.stdout, 'errors', '') == 'strict' and
|
8 |
+
not getattr(sys.stdout, 'encoding', '').lower().startswith('utf')):
|
9 |
+
try:
|
10 |
+
import translit
|
11 |
+
sys.stdout = translit.StreamFilter(sys.stdout)
|
12 |
+
except ImportError:
|
13 |
+
import codecs
|
14 |
+
import unicodedata
|
15 |
+
import warnings
|
16 |
+
|
17 |
+
TRANSLIT_MAP = {
|
18 |
+
0x2018: "'",
|
19 |
+
0x2019: "'",
|
20 |
+
0x201c: '"',
|
21 |
+
0x201d: '"',
|
22 |
+
}
|
23 |
+
|
24 |
+
def simplify(s):
|
25 |
+
s = s.translate(TRANSLIT_MAP)
|
26 |
+
return ''.join([c for c in unicodedata.normalize('NFKD', s)
|
27 |
+
if not unicodedata.combining(c)])
|
28 |
+
|
29 |
+
def simple_translit_error_handler(error):
|
30 |
+
if not isinstance(error, UnicodeEncodeError):
|
31 |
+
raise error
|
32 |
+
chunk = error.object[error.start:error.end]
|
33 |
+
repl = simplify(chunk)
|
34 |
+
repl = (repl.encode(error.encoding, 'backslashreplace')
|
35 |
+
.decode(error.encoding))
|
36 |
+
return repl, error.end
|
37 |
+
|
38 |
+
class SimpleTranslitStreamFilter:
|
39 |
+
|
40 |
+
"""Filter a stream through simple transliteration."""
|
41 |
+
errors = 'simple_translit'
|
42 |
+
|
43 |
+
def __init__(self, target):
|
44 |
+
self.target = target
|
45 |
+
|
46 |
+
def __getattr__(self, name):
|
47 |
+
return getattr(self.target, name)
|
48 |
+
|
49 |
+
def write(self, s):
|
50 |
+
self.target.write(self.downgrade(s))
|
51 |
+
|
52 |
+
def writelines(self, lines):
|
53 |
+
self.target.writelines(
|
54 |
+
[self.downgrade(line) for line in lines])
|
55 |
+
|
56 |
+
def downgrade(self, s):
|
57 |
+
return (s.encode(self.target.encoding, self.errors)
|
58 |
+
.decode(self.target.encoding))
|
59 |
+
|
60 |
+
codecs.register_error(SimpleTranslitStreamFilter.errors,
|
61 |
+
simple_translit_error_handler)
|
62 |
+
sys.stdout = SimpleTranslitStreamFilter(sys.stdout)
|
63 |
+
warnings.warn('translit is unavailable', ImportWarning)
|
language_tool_python/download_lt.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""Download latest LanguageTool distribution."""
|
4 |
+
|
5 |
+
import logging
|
6 |
+
import os
|
7 |
+
import re
|
8 |
+
import requests
|
9 |
+
import subprocess
|
10 |
+
import sys
|
11 |
+
import tempfile
|
12 |
+
import tqdm
|
13 |
+
from typing import Optional
|
14 |
+
import zipfile
|
15 |
+
|
16 |
+
from distutils.spawn import find_executable
|
17 |
+
from urllib.parse import urljoin
|
18 |
+
from .utils import (
|
19 |
+
find_existing_language_tool_downloads,
|
20 |
+
get_language_tool_download_path,
|
21 |
+
LTP_JAR_DIR_PATH_ENV_VAR
|
22 |
+
)
|
23 |
+
|
24 |
+
# Create logger for this file.
|
25 |
+
logging.basicConfig(format='%(message)s')
|
26 |
+
logger = logging.getLogger(__name__)
|
27 |
+
logger.setLevel(logging.INFO)
|
28 |
+
|
29 |
+
|
30 |
+
# Get download host from environment or default.
|
31 |
+
BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/')
|
32 |
+
FILENAME = 'LanguageTool-{version}.zip'
|
33 |
+
|
34 |
+
LTP_DOWNLOAD_VERSION = '6.4'
|
35 |
+
|
36 |
+
JAVA_VERSION_REGEX = re.compile(
|
37 |
+
r'^(?:java|openjdk) version "(?P<major1>\d+)(|\.(?P<major2>\d+)\.[^"]+)"',
|
38 |
+
re.MULTILINE)
|
39 |
+
|
40 |
+
# Updated for later versions of java
|
41 |
+
JAVA_VERSION_REGEX_UPDATED = re.compile(
|
42 |
+
r'^(?:java|openjdk) [version ]?(?P<major1>\d+)\.(?P<major2>\d+)',
|
43 |
+
re.MULTILINE)
|
44 |
+
|
45 |
+
|
46 |
+
def parse_java_version(version_text):
|
47 |
+
"""Return Java version (major1, major2).
|
48 |
+
|
49 |
+
>>> parse_java_version('''java version "1.6.0_65"
|
50 |
+
... Java(TM) SE Runtime Environment (build 1.6.0_65-b14-462-11M4609)
|
51 |
+
... Java HotSpot(TM) 64-Bit Server VM (build 20.65-b04-462, mixed mode))
|
52 |
+
... ''')
|
53 |
+
(1, 6)
|
54 |
+
|
55 |
+
>>> parse_java_version('''
|
56 |
+
... openjdk version "1.8.0_60"
|
57 |
+
... OpenJDK Runtime Environment (build 1.8.0_60-b27)
|
58 |
+
... OpenJDK 64-Bit Server VM (build 25.60-b23, mixed mode))
|
59 |
+
... ''')
|
60 |
+
(1, 8)
|
61 |
+
|
62 |
+
"""
|
63 |
+
match = (
|
64 |
+
re.search(JAVA_VERSION_REGEX, version_text)
|
65 |
+
or re.search(JAVA_VERSION_REGEX_UPDATED, version_text)
|
66 |
+
)
|
67 |
+
if not match:
|
68 |
+
raise SystemExit(
|
69 |
+
'Could not parse Java version from """{}""".'.format(version_text))
|
70 |
+
major1 = int(match.group('major1'))
|
71 |
+
major2 = int(match.group('major2')) if match.group('major2') else 0
|
72 |
+
return (major1, major2)
|
73 |
+
|
74 |
+
|
75 |
+
def confirm_java_compatibility():
|
76 |
+
""" Confirms Java major version >= 8. """
|
77 |
+
java_path = find_executable('java')
|
78 |
+
if not java_path:
|
79 |
+
raise ModuleNotFoundError(
|
80 |
+
'No java install detected. '
|
81 |
+
'Please install java to use language-tool-python.'
|
82 |
+
)
|
83 |
+
|
84 |
+
output = subprocess.check_output([java_path, '-version'],
|
85 |
+
stderr=subprocess.STDOUT,
|
86 |
+
universal_newlines=True)
|
87 |
+
|
88 |
+
major_version, minor_version = parse_java_version(output)
|
89 |
+
# Some installs of java show the version number like `14.0.1`
|
90 |
+
# and others show `1.14.0.1`
|
91 |
+
# (with a leading 1). We want to support both,
|
92 |
+
# as long as the major version is >= 8.
|
93 |
+
# (See softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x)
|
94 |
+
if major_version == 1 and minor_version >= 8:
|
95 |
+
return True
|
96 |
+
elif major_version >= 8:
|
97 |
+
return True
|
98 |
+
else:
|
99 |
+
raise SystemError('Detected java {}.{}. LanguageTool requires Java >= 8.'.format(major_version, minor_version))
|
100 |
+
|
101 |
+
|
102 |
+
def get_common_prefix(z):
|
103 |
+
"""Get common directory in a zip file if any."""
|
104 |
+
name_list = z.namelist()
|
105 |
+
if name_list and all(n.startswith(name_list[0]) for n in name_list[1:]):
|
106 |
+
return name_list[0]
|
107 |
+
return None
|
108 |
+
|
109 |
+
|
110 |
+
def http_get(url, out_file, proxies=None):
|
111 |
+
""" Get contents of a URL and save to a file.
|
112 |
+
"""
|
113 |
+
req = requests.get(url, stream=True, proxies=proxies)
|
114 |
+
content_length = req.headers.get('Content-Length')
|
115 |
+
total = int(content_length) if content_length is not None else None
|
116 |
+
if req.status_code == 403: # Not found on AWS
|
117 |
+
raise Exception('Could not find at URL {}.'.format(url))
|
118 |
+
progress = tqdm.tqdm(unit="B", unit_scale=True, total=total,
|
119 |
+
desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}')
|
120 |
+
for chunk in req.iter_content(chunk_size=1024):
|
121 |
+
if chunk: # filter out keep-alive new chunks
|
122 |
+
progress.update(len(chunk))
|
123 |
+
out_file.write(chunk)
|
124 |
+
progress.close()
|
125 |
+
|
126 |
+
|
127 |
+
def unzip_file(temp_file, directory_to_extract_to):
|
128 |
+
""" Unzips a .zip file to folder path. """
|
129 |
+
logger.info(
|
130 |
+
'Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to)
|
131 |
+
)
|
132 |
+
with zipfile.ZipFile(temp_file.name, 'r') as zip_ref:
|
133 |
+
zip_ref.extractall(directory_to_extract_to)
|
134 |
+
|
135 |
+
|
136 |
+
def download_zip(url, directory):
|
137 |
+
""" Downloads and unzips zip file from `url` to `directory`. """
|
138 |
+
# Download file.
|
139 |
+
downloaded_file = tempfile.NamedTemporaryFile(suffix='.zip', delete=False)
|
140 |
+
http_get(url, downloaded_file)
|
141 |
+
# Close the file so we can extract it.
|
142 |
+
downloaded_file.close()
|
143 |
+
# Extract zip file to path.
|
144 |
+
unzip_file(downloaded_file, directory)
|
145 |
+
# Remove the temporary file.
|
146 |
+
os.remove(downloaded_file.name)
|
147 |
+
# Tell the user the download path.
|
148 |
+
logger.info('Downloaded {} to {}.'.format(url, directory))
|
149 |
+
|
150 |
+
|
151 |
+
def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION):
|
152 |
+
confirm_java_compatibility()
|
153 |
+
|
154 |
+
download_folder = get_language_tool_download_path()
|
155 |
+
|
156 |
+
# Use the env var to the jar directory if it is defined
|
157 |
+
# otherwise look in the download directory
|
158 |
+
if os.environ.get(LTP_JAR_DIR_PATH_ENV_VAR):
|
159 |
+
return
|
160 |
+
|
161 |
+
# Make download path, if it doesn't exist.
|
162 |
+
os.makedirs(download_folder, exist_ok=True)
|
163 |
+
|
164 |
+
assert os.path.isdir(download_folder)
|
165 |
+
old_path_list = find_existing_language_tool_downloads(download_folder)
|
166 |
+
|
167 |
+
if language_tool_version:
|
168 |
+
version = language_tool_version
|
169 |
+
filename = FILENAME.format(version=version)
|
170 |
+
language_tool_download_url = urljoin(BASE_URL, filename)
|
171 |
+
dirname, _ = os.path.splitext(filename)
|
172 |
+
extract_path = os.path.join(download_folder, dirname)
|
173 |
+
|
174 |
+
if extract_path in old_path_list:
|
175 |
+
return
|
176 |
+
download_zip(language_tool_download_url, download_folder)
|
177 |
+
|
178 |
+
|
179 |
+
if __name__ == '__main__':
|
180 |
+
sys.exit(download_lt())
|
language_tool_python/language_tag.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
from functools import total_ordering
|
4 |
+
|
5 |
+
@total_ordering
|
6 |
+
class LanguageTag:
|
7 |
+
"""Language tag supported by LanguageTool."""
|
8 |
+
_LANGUAGE_RE = re.compile(r"^([a-z]{2,3})(?:[_-]([a-z]{2}))?$", re.I)
|
9 |
+
|
10 |
+
def __init__(self, tag, languages):
|
11 |
+
self.tag = tag
|
12 |
+
self.languages = languages
|
13 |
+
self.normalized_tag = self._normalize(tag)
|
14 |
+
|
15 |
+
def __eq__(self, other_tag):
|
16 |
+
return self.normalized_tag == self._normalize(other_tag)
|
17 |
+
|
18 |
+
def __lt__(self, other_tag):
|
19 |
+
return str(self) < self._normalize(other)
|
20 |
+
|
21 |
+
def __str__(self):
|
22 |
+
return self.normalized_tag
|
23 |
+
|
24 |
+
def __repr__(self):
|
25 |
+
return '<LanguageTag "{}">'.format(str(self))
|
26 |
+
|
27 |
+
def _normalize(self, tag):
|
28 |
+
if not tag:
|
29 |
+
raise ValueError('empty language tag')
|
30 |
+
languages = {language.lower().replace('-', '_'): language
|
31 |
+
for language in self.languages}
|
32 |
+
try:
|
33 |
+
return languages[tag.lower().replace('-', '_')]
|
34 |
+
except KeyError:
|
35 |
+
try:
|
36 |
+
return languages[self._LANGUAGE_RE.match(tag).group(1).lower()]
|
37 |
+
except (KeyError, AttributeError):
|
38 |
+
raise ValueError('unsupported language: {!r}'.format(tag))
|
language_tool_python/match.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unicodedata
|
2 |
+
from collections import OrderedDict
|
3 |
+
from functools import total_ordering
|
4 |
+
|
5 |
+
def get_match_ordered_dict():
|
6 |
+
slots = OrderedDict([
|
7 |
+
('ruleId', str),
|
8 |
+
('message', str),
|
9 |
+
('replacements', list),
|
10 |
+
('offsetInContext', int),
|
11 |
+
('context', str),
|
12 |
+
('offset', int),
|
13 |
+
('errorLength', int),
|
14 |
+
('category', str),
|
15 |
+
('ruleIssueType', str),
|
16 |
+
('sentence', str),
|
17 |
+
])
|
18 |
+
return slots
|
19 |
+
|
20 |
+
""" Sample match JSON:
|
21 |
+
{
|
22 |
+
'message': 'Possible spelling mistake found.',
|
23 |
+
'shortMessage': 'Spelling mistake',
|
24 |
+
'replacements': [{'value': 'newt'}, {'value': 'not'}, {'value': 'new', 'shortDescription': 'having just been made'}, {'value': 'news'}, {'value': 'foot', 'shortDescription': 'singular'}, {'value': 'root', 'shortDescription': 'underground organ of a plant'}, {'value': 'boot'}, {'value': 'noon'}, {'value': 'loot', 'shortDescription': 'plunder'}, {'value': 'moot'}, {'value': 'Root'}, {'value': 'soot', 'shortDescription': 'carbon black'}, {'value': 'newts'}, {'value': 'nook'}, {'value': 'Lieut'}, {'value': 'coot'}, {'value': 'hoot'}, {'value': 'toot'}, {'value': 'snoot'}, {'value': 'neut'}, {'value': 'nowt'}, {'value': 'Noor'}, {'value': 'noob'}],
|
25 |
+
'offset': 8,
|
26 |
+
'length': 4,
|
27 |
+
'context': {'text': 'This is noot okay. ', 'offset': 8, 'length': 4}, 'sentence': 'This is noot okay.',
|
28 |
+
'type': {'typeName': 'Other'},
|
29 |
+
'rule': {'id': 'MORFOLOGIK_RULE_EN_US', 'description': 'Possible spelling mistake', 'issueType': 'misspelling', 'category': {'id': 'TYPOS', 'name': 'Possible Typo'}},
|
30 |
+
'ignoreForIncompleteSentence': False,
|
31 |
+
'contextForSureMatch': 0
|
32 |
+
}
|
33 |
+
|
34 |
+
"""
|
35 |
+
def auto_type(obj):
|
36 |
+
try:
|
37 |
+
return int(obj)
|
38 |
+
except ValueError:
|
39 |
+
try:
|
40 |
+
return float(obj)
|
41 |
+
except ValueError:
|
42 |
+
return obj
|
43 |
+
|
44 |
+
@total_ordering
|
45 |
+
class Match:
|
46 |
+
"""Hold information about where a rule matches text."""
|
47 |
+
def __init__(self, attrib):
|
48 |
+
# Process rule.
|
49 |
+
attrib['category'] = attrib['rule']['category']['id']
|
50 |
+
attrib['ruleId'] = attrib['rule']['id']
|
51 |
+
attrib['ruleIssueType'] = attrib['rule']['issueType']
|
52 |
+
del attrib['rule']
|
53 |
+
# Process context.
|
54 |
+
attrib['offsetInContext'] = attrib['context']['offset']
|
55 |
+
attrib['context'] = attrib['context']['text']
|
56 |
+
# Process replacements.
|
57 |
+
attrib['replacements'] = [r['value'] for r in attrib['replacements']]
|
58 |
+
# Rename error length.
|
59 |
+
attrib['errorLength'] = attrib['length']
|
60 |
+
# Normalize unicode
|
61 |
+
attrib['message'] = unicodedata.normalize("NFKC", attrib['message'])
|
62 |
+
# Store objects on self.
|
63 |
+
for k, v in attrib.items():
|
64 |
+
setattr(self, k, v)
|
65 |
+
|
66 |
+
def __repr__(self):
|
67 |
+
def _ordered_dict_repr():
|
68 |
+
slots = list(get_match_ordered_dict())
|
69 |
+
slots += list(set(self.__dict__).difference(slots))
|
70 |
+
attrs = [slot for slot in slots
|
71 |
+
if slot in self.__dict__ and not slot.startswith('_')]
|
72 |
+
return '{{{}}}'.format(
|
73 |
+
', '.join([
|
74 |
+
'{!r}: {!r}'.format(attr, getattr(self, attr))
|
75 |
+
for attr in attrs
|
76 |
+
])
|
77 |
+
)
|
78 |
+
|
79 |
+
return '{}({})'.format(self.__class__.__name__, _ordered_dict_repr())
|
80 |
+
|
81 |
+
def __str__(self):
|
82 |
+
ruleId = self.ruleId
|
83 |
+
s = 'Offset {}, length {}, Rule ID: {}'.format(
|
84 |
+
self.offset, self.errorLength, ruleId)
|
85 |
+
if self.message:
|
86 |
+
s += '\nMessage: {}'.format(self.message)
|
87 |
+
if self.replacements:
|
88 |
+
s += '\nSuggestion: {}'.format('; '.join(self.replacements))
|
89 |
+
s += '\n{}\n{}'.format(
|
90 |
+
self.context, ' ' * self.offsetInContext + '^' * self.errorLength
|
91 |
+
)
|
92 |
+
return s
|
93 |
+
|
94 |
+
@property
|
95 |
+
def matchedText(self):
|
96 |
+
""" Returns the text that garnered the error (without its surrounding context).
|
97 |
+
"""
|
98 |
+
return self.context[self.offsetInContext:self.offsetInContext+self.errorLength]
|
99 |
+
|
100 |
+
def __eq__(self, other):
|
101 |
+
return list(self) == list(other)
|
102 |
+
|
103 |
+
def __lt__(self, other):
|
104 |
+
return list(self) < list(other)
|
105 |
+
|
106 |
+
def __iter__(self):
|
107 |
+
return iter(getattr(self, attr) for attr in get_match_ordered_dict())
|
108 |
+
|
109 |
+
def __setattr__(self, key, value):
|
110 |
+
try:
|
111 |
+
value = get_match_ordered_dict()[key](value)
|
112 |
+
except KeyError:
|
113 |
+
return
|
114 |
+
super().__setattr__(key, value)
|
115 |
+
|
116 |
+
def __getattr__(self, name):
|
117 |
+
if name not in get_match_ordered_dict():
|
118 |
+
raise AttributeError('{!r} object has no attribute {!r}'
|
119 |
+
.format(self.__class__.__name__, name))
|
language_tool_python/server.py
ADDED
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List
|
2 |
+
|
3 |
+
import atexit
|
4 |
+
import http.client
|
5 |
+
import json
|
6 |
+
import os
|
7 |
+
import re
|
8 |
+
import requests
|
9 |
+
import socket
|
10 |
+
import subprocess
|
11 |
+
import threading
|
12 |
+
import urllib.parse
|
13 |
+
|
14 |
+
from .config_file import LanguageToolConfig
|
15 |
+
from .download_lt import download_lt, LTP_DOWNLOAD_VERSION
|
16 |
+
from .language_tag import LanguageTag
|
17 |
+
from .match import Match
|
18 |
+
from .utils import (
|
19 |
+
correct,
|
20 |
+
parse_url, get_locale_language,
|
21 |
+
get_language_tool_directory, get_server_cmd,
|
22 |
+
FAILSAFE_LANGUAGE, startupinfo,
|
23 |
+
LanguageToolError, ServerError, PathError
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
DEBUG_MODE = False
|
28 |
+
|
29 |
+
# Keep track of running server PIDs in a global list. This way,
|
30 |
+
# we can ensure they're killed on exit.
|
31 |
+
RUNNING_SERVER_PROCESSES: List[subprocess.Popen] = []
|
32 |
+
|
33 |
+
|
34 |
+
class LanguageTool:
|
35 |
+
"""Main class used for checking text against different rules.
|
36 |
+
LanguageTool v2 API documentation:
|
37 |
+
https://languagetool.org/http-api/swagger-ui/#!/default/post_check
|
38 |
+
"""
|
39 |
+
_MIN_PORT = 8081
|
40 |
+
_MAX_PORT = 8999
|
41 |
+
_TIMEOUT = 5 * 60
|
42 |
+
_remote = False
|
43 |
+
_port = _MIN_PORT
|
44 |
+
_server: subprocess.Popen = None
|
45 |
+
_consumer_thread: threading.Thread = None
|
46 |
+
_PORT_RE = re.compile(r"(?:https?://.*:|port\s+)(\d+)", re.I)
|
47 |
+
|
48 |
+
def __init__(
|
49 |
+
self, language=None, motherTongue=None,
|
50 |
+
remote_server=None, newSpellings=None,
|
51 |
+
new_spellings_persist=True,
|
52 |
+
host=None, config=None,
|
53 |
+
language_tool_download_version: str = LTP_DOWNLOAD_VERSION
|
54 |
+
):
|
55 |
+
self.language_tool_download_version = language_tool_download_version
|
56 |
+
self._new_spellings = None
|
57 |
+
self._new_spellings_persist = new_spellings_persist
|
58 |
+
self._host = host or socket.gethostbyname('localhost')
|
59 |
+
|
60 |
+
if remote_server:
|
61 |
+
assert config is None, "cannot pass config file to remote server"
|
62 |
+
self.config = LanguageToolConfig(config) if config else None
|
63 |
+
|
64 |
+
if remote_server is not None:
|
65 |
+
self._remote = True
|
66 |
+
self._url = parse_url(remote_server)
|
67 |
+
self._url = urllib.parse.urljoin(self._url, 'v2/')
|
68 |
+
self._update_remote_server_config(self._url)
|
69 |
+
elif not self._server_is_alive():
|
70 |
+
self._start_server_on_free_port()
|
71 |
+
if language is None:
|
72 |
+
try:
|
73 |
+
language = get_locale_language()
|
74 |
+
except ValueError:
|
75 |
+
language = FAILSAFE_LANGUAGE
|
76 |
+
if newSpellings:
|
77 |
+
self._new_spellings = newSpellings
|
78 |
+
self._register_spellings(self._new_spellings)
|
79 |
+
self._language = LanguageTag(language, self._get_languages())
|
80 |
+
self.motherTongue = motherTongue
|
81 |
+
self.disabled_rules = set()
|
82 |
+
self.enabled_rules = set()
|
83 |
+
self.disabled_categories = set()
|
84 |
+
self.enabled_categories = set()
|
85 |
+
self.enabled_rules_only = False
|
86 |
+
self.preferred_variants = set()
|
87 |
+
|
88 |
+
def __enter__(self):
|
89 |
+
return self
|
90 |
+
|
91 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
92 |
+
self.close()
|
93 |
+
|
94 |
+
def __del__(self):
|
95 |
+
self.close()
|
96 |
+
|
97 |
+
def __repr__(self):
|
98 |
+
return '{}(language={!r}, motherTongue={!r})'.format(
|
99 |
+
self.__class__.__name__, self.language, self.motherTongue)
|
100 |
+
|
101 |
+
def close(self):
|
102 |
+
if self._server_is_alive():
|
103 |
+
self._terminate_server()
|
104 |
+
if not self._new_spellings_persist and self._new_spellings:
|
105 |
+
self._unregister_spellings()
|
106 |
+
self._new_spellings = []
|
107 |
+
|
108 |
+
@property
|
109 |
+
def language(self):
|
110 |
+
"""The language to be used."""
|
111 |
+
return self._language
|
112 |
+
|
113 |
+
@language.setter
|
114 |
+
def language(self, language):
|
115 |
+
self._language = LanguageTag(language, self._get_languages())
|
116 |
+
self.disabled_rules.clear()
|
117 |
+
self.enabled_rules.clear()
|
118 |
+
|
119 |
+
@property
|
120 |
+
def motherTongue(self):
|
121 |
+
"""The user's mother tongue or None.
|
122 |
+
The mother tongue may also be used as a source language for
|
123 |
+
checking bilingual texts.
|
124 |
+
"""
|
125 |
+
return self._motherTongue
|
126 |
+
|
127 |
+
@motherTongue.setter
|
128 |
+
def motherTongue(self, motherTongue):
|
129 |
+
self._motherTongue = (
|
130 |
+
None if motherTongue is None
|
131 |
+
else LanguageTag(motherTongue, self._get_languages())
|
132 |
+
)
|
133 |
+
|
134 |
+
@property
|
135 |
+
def _spell_checking_categories(self):
|
136 |
+
return {'TYPOS'}
|
137 |
+
|
138 |
+
def check(self, text: str) -> List[Match]:
|
139 |
+
"""Match text against enabled rules."""
|
140 |
+
url = urllib.parse.urljoin(self._url, 'check')
|
141 |
+
response = self._query_server(url, self._create_params(text))
|
142 |
+
matches = response['matches']
|
143 |
+
return [Match(match) for match in matches]
|
144 |
+
|
145 |
+
def _create_params(self, text: str) -> Dict[str, str]:
|
146 |
+
params = {'language': str(self.language), 'text': text}
|
147 |
+
if self.motherTongue is not None:
|
148 |
+
params['motherTongue'] = self.motherTongue
|
149 |
+
if self.disabled_rules:
|
150 |
+
params['disabledRules'] = ','.join(self.disabled_rules)
|
151 |
+
if self.enabled_rules:
|
152 |
+
params['enabledRules'] = ','.join(self.enabled_rules)
|
153 |
+
if self.enabled_rules_only:
|
154 |
+
params['enabledOnly'] = 'true'
|
155 |
+
if self.disabled_categories:
|
156 |
+
params['disabledCategories'] = ','.join(self.disabled_categories)
|
157 |
+
if self.enabled_categories:
|
158 |
+
params['enabledCategories'] = ','.join(self.enabled_categories)
|
159 |
+
if self.preferred_variants:
|
160 |
+
params['preferredVariants'] = ','.join(self.preferred_variants)
|
161 |
+
return params
|
162 |
+
|
163 |
+
def correct(self, text: str) -> str:
|
164 |
+
"""Automatically apply suggestions to the text."""
|
165 |
+
return correct(text, self.check(text))
|
166 |
+
|
167 |
+
def enable_spellchecking(self):
|
168 |
+
"""Enable spell-checking rules."""
|
169 |
+
self.disabled_categories.difference_update(
|
170 |
+
self._spell_checking_categories
|
171 |
+
)
|
172 |
+
|
173 |
+
def disable_spellchecking(self):
|
174 |
+
"""Disable spell-checking rules."""
|
175 |
+
self.disabled_categories.update(self._spell_checking_categories)
|
176 |
+
|
177 |
+
@staticmethod
|
178 |
+
def _get_valid_spelling_file_path() -> str:
|
179 |
+
library_path = get_language_tool_directory()
|
180 |
+
spelling_file_path = os.path.join(
|
181 |
+
library_path, "org/languagetool/resource/en/hunspell/spelling.txt"
|
182 |
+
)
|
183 |
+
if not os.path.exists(spelling_file_path):
|
184 |
+
raise FileNotFoundError(
|
185 |
+
"Failed to find the spellings file at {}\n "
|
186 |
+
"Please file an issue at "
|
187 |
+
"https://github.com/jxmorris12/language_tool_python/issues"
|
188 |
+
.format(spelling_file_path))
|
189 |
+
return spelling_file_path
|
190 |
+
|
191 |
+
def _register_spellings(self, spellings):
|
192 |
+
spelling_file_path = self._get_valid_spelling_file_path()
|
193 |
+
with (
|
194 |
+
open(spelling_file_path, "a+", encoding='utf-8')
|
195 |
+
) as spellings_file:
|
196 |
+
spellings_file.write(
|
197 |
+
"\n" + "\n".join([word for word in spellings])
|
198 |
+
)
|
199 |
+
if DEBUG_MODE:
|
200 |
+
print("Registered new spellings at {}".format(spelling_file_path))
|
201 |
+
|
202 |
+
def _unregister_spellings(self):
|
203 |
+
spelling_file_path = self._get_valid_spelling_file_path()
|
204 |
+
with (
|
205 |
+
open(spelling_file_path, 'r+', encoding='utf-8')
|
206 |
+
) as spellings_file:
|
207 |
+
spellings_file.seek(0, os.SEEK_END)
|
208 |
+
for _ in range(len(self._new_spellings)):
|
209 |
+
while spellings_file.read(1) != '\n':
|
210 |
+
spellings_file.seek(spellings_file.tell() - 2, os.SEEK_SET)
|
211 |
+
spellings_file.seek(spellings_file.tell() - 2, os.SEEK_SET)
|
212 |
+
spellings_file.seek(spellings_file.tell() + 1, os.SEEK_SET)
|
213 |
+
spellings_file.truncate()
|
214 |
+
if DEBUG_MODE:
|
215 |
+
print(
|
216 |
+
"Unregistered new spellings at {}".format(spelling_file_path)
|
217 |
+
)
|
218 |
+
|
219 |
+
def _get_languages(self) -> set:
|
220 |
+
"""Get supported languages (by querying the server)."""
|
221 |
+
self._start_server_if_needed()
|
222 |
+
url = urllib.parse.urljoin(self._url, 'languages')
|
223 |
+
languages = set()
|
224 |
+
for e in self._query_server(url, num_tries=1):
|
225 |
+
languages.add(e.get('code'))
|
226 |
+
languages.add(e.get('longCode'))
|
227 |
+
languages.add("auto")
|
228 |
+
return languages
|
229 |
+
|
230 |
+
def _start_server_if_needed(self):
|
231 |
+
# Start server.
|
232 |
+
if not self._server_is_alive() and self._remote is False:
|
233 |
+
self._start_server_on_free_port()
|
234 |
+
|
235 |
+
def _update_remote_server_config(self, url):
|
236 |
+
self._url = url
|
237 |
+
self._remote = True
|
238 |
+
|
239 |
+
def _query_server(self, url, params=None, num_tries=2):
|
240 |
+
if DEBUG_MODE:
|
241 |
+
print('_query_server url:', url, 'params:', params)
|
242 |
+
for n in range(num_tries):
|
243 |
+
try:
|
244 |
+
with (
|
245 |
+
requests.get(url, params=params, timeout=self._TIMEOUT)
|
246 |
+
) as response:
|
247 |
+
try:
|
248 |
+
return response.json()
|
249 |
+
except json.decoder.JSONDecodeError as e:
|
250 |
+
if DEBUG_MODE:
|
251 |
+
print(
|
252 |
+
'URL {} and params {} '
|
253 |
+
'returned invalid JSON response: {}'
|
254 |
+
.format(url, params, e)
|
255 |
+
)
|
256 |
+
print(response)
|
257 |
+
print(response.content)
|
258 |
+
raise LanguageToolError(response.content.decode())
|
259 |
+
except (IOError, http.client.HTTPException) as e:
|
260 |
+
if self._remote is False:
|
261 |
+
self._terminate_server()
|
262 |
+
self._start_local_server()
|
263 |
+
if n + 1 >= num_tries:
|
264 |
+
raise LanguageToolError('{}: {}'.format(self._url, e))
|
265 |
+
|
266 |
+
def _start_server_on_free_port(self):
|
267 |
+
while True:
|
268 |
+
self._url = 'http://{}:{}/v2/'.format(self._host, self._port)
|
269 |
+
try:
|
270 |
+
self._start_local_server()
|
271 |
+
break
|
272 |
+
except ServerError:
|
273 |
+
if self._MIN_PORT <= self._port < self._MAX_PORT:
|
274 |
+
self._port += 1
|
275 |
+
else:
|
276 |
+
raise
|
277 |
+
|
278 |
+
def _start_local_server(self):
|
279 |
+
# Before starting local server, download language tool if needed.
|
280 |
+
download_lt(self.language_tool_download_version)
|
281 |
+
err = None
|
282 |
+
try:
|
283 |
+
if DEBUG_MODE:
|
284 |
+
if self._port:
|
285 |
+
print(
|
286 |
+
'language_tool_python initializing with port:',
|
287 |
+
self._port
|
288 |
+
)
|
289 |
+
if self.config:
|
290 |
+
print(
|
291 |
+
'language_tool_python initializing '
|
292 |
+
'with temporary config file:',
|
293 |
+
self.config.path
|
294 |
+
)
|
295 |
+
server_cmd = get_server_cmd(self._port, self.config)
|
296 |
+
except PathError as e:
|
297 |
+
# Can't find path to LanguageTool.
|
298 |
+
err = e
|
299 |
+
else:
|
300 |
+
# Need to PIPE all handles: http://bugs.python.org/issue3905
|
301 |
+
self._server = subprocess.Popen(
|
302 |
+
server_cmd,
|
303 |
+
stdin=subprocess.PIPE,
|
304 |
+
stdout=subprocess.PIPE,
|
305 |
+
stderr=subprocess.PIPE,
|
306 |
+
universal_newlines=True,
|
307 |
+
startupinfo=startupinfo
|
308 |
+
)
|
309 |
+
global RUNNING_SERVER_PROCESSES
|
310 |
+
RUNNING_SERVER_PROCESSES.append(self._server)
|
311 |
+
|
312 |
+
match = None
|
313 |
+
while True:
|
314 |
+
line = self._server.stdout.readline()
|
315 |
+
if not line:
|
316 |
+
break
|
317 |
+
match = self._PORT_RE.search(line)
|
318 |
+
if match:
|
319 |
+
port = int(match.group(1))
|
320 |
+
if port != self._port:
|
321 |
+
raise LanguageToolError(
|
322 |
+
'requested port {}, but got {}'
|
323 |
+
.format(self._port, port)
|
324 |
+
)
|
325 |
+
break
|
326 |
+
if not match:
|
327 |
+
err_msg = self._terminate_server()
|
328 |
+
match = self._PORT_RE.search(err_msg)
|
329 |
+
if not match:
|
330 |
+
raise LanguageToolError(err_msg)
|
331 |
+
port = int(match.group(1))
|
332 |
+
if port != self._port:
|
333 |
+
raise LanguageToolError(err_msg)
|
334 |
+
|
335 |
+
if self._server:
|
336 |
+
self._consumer_thread = threading.Thread(
|
337 |
+
target=lambda: _consume(self._server.stdout))
|
338 |
+
self._consumer_thread.daemon = True
|
339 |
+
self._consumer_thread.start()
|
340 |
+
else:
|
341 |
+
# Couldn't start the server, so maybe there is already one running.
|
342 |
+
if err:
|
343 |
+
raise Exception(err)
|
344 |
+
else:
|
345 |
+
raise ServerError(
|
346 |
+
'Server running; don\'t start a server here.'
|
347 |
+
)
|
348 |
+
|
349 |
+
def _server_is_alive(self):
|
350 |
+
return self._server and self._server.poll() is None
|
351 |
+
|
352 |
+
def _terminate_server(self):
|
353 |
+
LanguageToolError_message = ''
|
354 |
+
try:
|
355 |
+
self._server.terminate()
|
356 |
+
except OSError:
|
357 |
+
pass
|
358 |
+
try:
|
359 |
+
LanguageToolError_message = self._server.communicate()[1].strip()
|
360 |
+
except (IOError, ValueError):
|
361 |
+
pass
|
362 |
+
try:
|
363 |
+
self._server.stdout.close()
|
364 |
+
except IOError:
|
365 |
+
pass
|
366 |
+
try:
|
367 |
+
self._server.stdin.close()
|
368 |
+
except IOError:
|
369 |
+
pass
|
370 |
+
try:
|
371 |
+
self._server.stderr.close()
|
372 |
+
except IOError:
|
373 |
+
pass
|
374 |
+
self._server = None
|
375 |
+
return LanguageToolError_message
|
376 |
+
|
377 |
+
|
378 |
+
class LanguageToolPublicAPI(LanguageTool):
|
379 |
+
"""Language tool client of the official API."""
|
380 |
+
def __init__(self, *args, **kwargs):
|
381 |
+
super().__init__(
|
382 |
+
*args, remote_server='https://languagetool.org/api/', **kwargs
|
383 |
+
)
|
384 |
+
|
385 |
+
|
386 |
+
@atexit.register
|
387 |
+
def terminate_server():
|
388 |
+
"""Terminate the server."""
|
389 |
+
for proc in RUNNING_SERVER_PROCESSES:
|
390 |
+
proc.terminate()
|
391 |
+
|
392 |
+
|
393 |
+
def _consume(stdout):
|
394 |
+
"""Consume/ignore the rest of the server output.
|
395 |
+
Without this, the server will end up hanging due to the buffer
|
396 |
+
filling up.
|
397 |
+
"""
|
398 |
+
while stdout.readline():
|
399 |
+
pass
|
language_tool_python/utils.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Tuple
|
2 |
+
|
3 |
+
import glob
|
4 |
+
import locale
|
5 |
+
import os
|
6 |
+
import subprocess
|
7 |
+
import urllib.parse
|
8 |
+
import urllib.request
|
9 |
+
|
10 |
+
from .config_file import LanguageToolConfig
|
11 |
+
from .match import Match
|
12 |
+
from .which import which
|
13 |
+
|
14 |
+
JAR_NAMES = [
|
15 |
+
'languagetool-server.jar',
|
16 |
+
'languagetool-standalone*.jar', # 2.1
|
17 |
+
'LanguageTool.jar',
|
18 |
+
'LanguageTool.uno.jar'
|
19 |
+
]
|
20 |
+
FAILSAFE_LANGUAGE = 'en'
|
21 |
+
|
22 |
+
LTP_PATH_ENV_VAR = "LTP_PATH" # LanguageTool download path
|
23 |
+
|
24 |
+
# Directory containing the LanguageTool jar file:
|
25 |
+
LTP_JAR_DIR_PATH_ENV_VAR = "LTP_JAR_DIR_PATH"
|
26 |
+
|
27 |
+
# https://mail.python.org/pipermail/python-dev/2011-July/112551.html
|
28 |
+
|
29 |
+
if os.name == 'nt':
|
30 |
+
startupinfo = subprocess.STARTUPINFO()
|
31 |
+
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
32 |
+
else:
|
33 |
+
startupinfo = None
|
34 |
+
|
35 |
+
|
36 |
+
class LanguageToolError(Exception):
|
37 |
+
pass
|
38 |
+
|
39 |
+
|
40 |
+
class ServerError(LanguageToolError):
|
41 |
+
pass
|
42 |
+
|
43 |
+
|
44 |
+
class JavaError(LanguageToolError):
|
45 |
+
pass
|
46 |
+
|
47 |
+
|
48 |
+
class PathError(LanguageToolError):
|
49 |
+
pass
|
50 |
+
|
51 |
+
|
52 |
+
def parse_url(url_str):
|
53 |
+
""" Parses a URL string, and adds 'http' if necessary. """
|
54 |
+
if 'http' not in url_str:
|
55 |
+
url_str = 'http://' + url_str
|
56 |
+
|
57 |
+
return urllib.parse.urlparse(url_str).geturl()
|
58 |
+
|
59 |
+
|
60 |
+
def _4_bytes_encoded_positions(text: str) -> List[int]:
|
61 |
+
"""Return a list of positions of 4-byte encoded characters in the text."""
|
62 |
+
positions = []
|
63 |
+
char_index = 0
|
64 |
+
for char in text:
|
65 |
+
if len(char.encode('utf-8')) == 4:
|
66 |
+
positions.append(char_index)
|
67 |
+
# Adding 1 to the index because 4 byte characters are
|
68 |
+
# 2 bytes in length in LanguageTool, instead of 1 byte in Python.
|
69 |
+
char_index += 1
|
70 |
+
char_index += 1
|
71 |
+
return positions
|
72 |
+
|
73 |
+
|
74 |
+
def correct(text: str, matches: List[Match]) -> str:
|
75 |
+
"""Automatically apply suggestions to the text."""
|
76 |
+
# Get the positions of 4-byte encoded characters in the text because without
|
77 |
+
# carrying out this step, the offsets of the matches could be incorrect.
|
78 |
+
for match in matches:
|
79 |
+
match.offset -= sum(1 for i in _4_bytes_encoded_positions(text) if i <= match.offset)
|
80 |
+
ltext = list(text)
|
81 |
+
matches = [match for match in matches if match.replacements]
|
82 |
+
errors = [ltext[match.offset:match.offset + match.errorLength]
|
83 |
+
for match in matches]
|
84 |
+
correct_offset = 0
|
85 |
+
for n, match in enumerate(matches):
|
86 |
+
frompos, topos = (correct_offset + match.offset,
|
87 |
+
correct_offset + match.offset + match.errorLength)
|
88 |
+
if ltext[frompos:topos] != errors[n]:
|
89 |
+
continue
|
90 |
+
repl = match.replacements[0]
|
91 |
+
ltext[frompos:topos] = list(repl)
|
92 |
+
correct_offset += len(repl) - len(errors[n])
|
93 |
+
return ''.join(ltext)
|
94 |
+
|
95 |
+
|
96 |
+
def get_language_tool_download_path() -> str:
|
97 |
+
# Get download path from environment or use default.
|
98 |
+
download_path = os.environ.get(
|
99 |
+
LTP_PATH_ENV_VAR,
|
100 |
+
os.path.join(os.path.expanduser("~"), ".cache", "language_tool_python")
|
101 |
+
)
|
102 |
+
return download_path
|
103 |
+
|
104 |
+
|
105 |
+
def find_existing_language_tool_downloads(download_folder: str) -> List[str]:
|
106 |
+
language_tool_path_list = [
|
107 |
+
path for path in
|
108 |
+
glob.glob(os.path.join(download_folder, 'LanguageTool*'))
|
109 |
+
if os.path.isdir(path)
|
110 |
+
]
|
111 |
+
return language_tool_path_list
|
112 |
+
|
113 |
+
|
114 |
+
def get_language_tool_directory() -> str:
|
115 |
+
"""Get LanguageTool directory."""
|
116 |
+
download_folder = get_language_tool_download_path()
|
117 |
+
if not os.path.isdir(download_folder):
|
118 |
+
raise NotADirectoryError(
|
119 |
+
"LanguageTool directory path is not a valid directory {}."
|
120 |
+
.format(download_folder)
|
121 |
+
)
|
122 |
+
language_tool_path_list = find_existing_language_tool_downloads(
|
123 |
+
download_folder
|
124 |
+
)
|
125 |
+
|
126 |
+
if not len(language_tool_path_list):
|
127 |
+
raise FileNotFoundError(
|
128 |
+
'LanguageTool not found in {}.'.format(download_folder)
|
129 |
+
)
|
130 |
+
|
131 |
+
# Return the latest version found in the directory.
|
132 |
+
return max(language_tool_path_list)
|
133 |
+
|
134 |
+
|
135 |
+
def get_server_cmd(
|
136 |
+
port: int = None, config: LanguageToolConfig = None
|
137 |
+
) -> List[str]:
|
138 |
+
java_path, jar_path = get_jar_info()
|
139 |
+
cmd = [java_path, '-cp', jar_path,
|
140 |
+
'org.languagetool.server.HTTPServer']
|
141 |
+
|
142 |
+
if port is not None:
|
143 |
+
cmd += ['-p', str(port)]
|
144 |
+
|
145 |
+
if config is not None:
|
146 |
+
cmd += ['--config', config.path]
|
147 |
+
|
148 |
+
return cmd
|
149 |
+
|
150 |
+
|
151 |
+
def get_jar_info() -> Tuple[str, str]:
|
152 |
+
java_path = which('java')
|
153 |
+
if not java_path:
|
154 |
+
raise JavaError("can't find Java")
|
155 |
+
|
156 |
+
# Use the env var to the jar directory if it is defined
|
157 |
+
# otherwise look in the download directory
|
158 |
+
jar_dir_name = os.environ.get(
|
159 |
+
LTP_JAR_DIR_PATH_ENV_VAR,
|
160 |
+
get_language_tool_directory()
|
161 |
+
)
|
162 |
+
jar_path = None
|
163 |
+
for jar_name in JAR_NAMES:
|
164 |
+
for jar_path in glob.glob(os.path.join(jar_dir_name, jar_name)):
|
165 |
+
if os.path.isfile(jar_path):
|
166 |
+
break
|
167 |
+
else:
|
168 |
+
jar_path = None
|
169 |
+
if jar_path:
|
170 |
+
break
|
171 |
+
else:
|
172 |
+
raise PathError("can't find languagetool-standalone in {!r}"
|
173 |
+
.format(jar_dir_name))
|
174 |
+
return java_path, jar_path
|
175 |
+
|
176 |
+
|
177 |
+
def get_locale_language():
|
178 |
+
"""Get the language code for the current locale setting."""
|
179 |
+
return locale.getlocale()[0] or locale.getdefaultlocale()[0]
|
language_tool_python/which.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
"""Cross-platform which command."""
|
4 |
+
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
|
8 |
+
|
9 |
+
__all__ = ['which']
|
10 |
+
|
11 |
+
WIN_ALLOW_CROSS_ARCH = True
|
12 |
+
|
13 |
+
def which(program):
|
14 |
+
"""Identify the location of an executable file."""
|
15 |
+
if os.path.split(program)[0]:
|
16 |
+
program_path = find_exe(program)
|
17 |
+
if program_path:
|
18 |
+
return program_path
|
19 |
+
else:
|
20 |
+
for path in get_path_list():
|
21 |
+
program_path = find_exe(os.path.join(path, program))
|
22 |
+
if program_path:
|
23 |
+
return program_path
|
24 |
+
return None
|
25 |
+
|
26 |
+
|
27 |
+
def is_exe(path):
|
28 |
+
return os.path.isfile(path) and os.access(path, os.X_OK)
|
29 |
+
|
30 |
+
|
31 |
+
def _get_path_list():
|
32 |
+
return os.environ['PATH'].split(os.pathsep)
|
33 |
+
|
34 |
+
|
35 |
+
if os.name == 'nt':
|
36 |
+
def find_exe(program):
|
37 |
+
root, ext = os.path.splitext(program)
|
38 |
+
if ext:
|
39 |
+
if is_exe(program):
|
40 |
+
return program
|
41 |
+
else:
|
42 |
+
for ext in os.environ['PATHEXT'].split(os.pathsep):
|
43 |
+
program_path = root + ext.lower()
|
44 |
+
if is_exe(program_path):
|
45 |
+
return program_path
|
46 |
+
return None
|
47 |
+
|
48 |
+
def get_path_list():
|
49 |
+
paths = _get_path_list()
|
50 |
+
if WIN_ALLOW_CROSS_ARCH:
|
51 |
+
alt_sys_path = os.path.expandvars(r"$WINDIR\Sysnative")
|
52 |
+
if os.path.isdir(alt_sys_path):
|
53 |
+
paths.insert(0, alt_sys_path)
|
54 |
+
else:
|
55 |
+
alt_sys_path = os.path.expandvars(r"$WINDIR\SysWOW64")
|
56 |
+
if os.path.isdir(alt_sys_path):
|
57 |
+
paths.append(alt_sys_path)
|
58 |
+
return paths
|
59 |
+
|
60 |
+
else:
|
61 |
+
def find_exe(program):
|
62 |
+
return program if is_exe(program) else None
|
63 |
+
|
64 |
+
get_path_list = _get_path_list
|
65 |
+
|
66 |
+
|
67 |
+
def main():
|
68 |
+
for arg in sys.argv[1:]:
|
69 |
+
path = which(arg)
|
70 |
+
if path:
|
71 |
+
print(path)
|
72 |
+
|
73 |
+
|
74 |
+
if __name__ == '__main__':
|
75 |
+
sys.exit(main())
|