Spaces:
Sleeping
Sleeping
from typing import List, Tuple | |
import glob | |
import locale | |
import os | |
import subprocess | |
import urllib.parse | |
import urllib.request | |
from .config_file import LanguageToolConfig | |
from .match import Match | |
from .which import which | |
JAR_NAMES = [ | |
'languagetool-server.jar', | |
'languagetool-standalone*.jar', # 2.1 | |
'LanguageTool.jar', | |
'LanguageTool.uno.jar' | |
] | |
FAILSAFE_LANGUAGE = 'en' | |
LTP_PATH_ENV_VAR = "LTP_PATH" # LanguageTool download path | |
# Directory containing the LanguageTool jar file: | |
LTP_JAR_DIR_PATH_ENV_VAR = "LTP_JAR_DIR_PATH" | |
# https://mail.python.org/pipermail/python-dev/2011-July/112551.html | |
if os.name == 'nt': | |
startupinfo = subprocess.STARTUPINFO() | |
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW | |
else: | |
startupinfo = None | |
class LanguageToolError(Exception): | |
pass | |
class ServerError(LanguageToolError): | |
pass | |
class JavaError(LanguageToolError): | |
pass | |
class PathError(LanguageToolError): | |
pass | |
def parse_url(url_str): | |
""" Parses a URL string, and adds 'http' if necessary. """ | |
if 'http' not in url_str: | |
url_str = 'http://' + url_str | |
return urllib.parse.urlparse(url_str).geturl() | |
def _4_bytes_encoded_positions(text: str) -> List[int]: | |
"""Return a list of positions of 4-byte encoded characters in the text.""" | |
positions = [] | |
char_index = 0 | |
for char in text: | |
if len(char.encode('utf-8')) == 4: | |
positions.append(char_index) | |
# Adding 1 to the index because 4 byte characters are | |
# 2 bytes in length in LanguageTool, instead of 1 byte in Python. | |
char_index += 1 | |
char_index += 1 | |
return positions | |
def correct(text: str, matches: List[Match]) -> str: | |
"""Automatically apply suggestions to the text.""" | |
# Get the positions of 4-byte encoded characters in the text because without | |
# carrying out this step, the offsets of the matches could be incorrect. | |
for match in matches: | |
match.offset -= sum(1 for i in _4_bytes_encoded_positions(text) if i <= match.offset) | |
ltext = list(text) | |
matches = [match for match in matches if match.replacements] | |
errors = [ltext[match.offset:match.offset + match.errorLength] | |
for match in matches] | |
correct_offset = 0 | |
for n, match in enumerate(matches): | |
frompos, topos = (correct_offset + match.offset, | |
correct_offset + match.offset + match.errorLength) | |
if ltext[frompos:topos] != errors[n]: | |
continue | |
repl = match.replacements[0] | |
ltext[frompos:topos] = list(repl) | |
correct_offset += len(repl) - len(errors[n]) | |
return ''.join(ltext) | |
def get_language_tool_download_path() -> str: | |
# Get download path from environment or use default. | |
download_path = os.environ.get( | |
LTP_PATH_ENV_VAR, | |
os.path.join(os.path.expanduser("~"), ".cache", "language_tool_python") | |
) | |
return download_path | |
def find_existing_language_tool_downloads(download_folder: str) -> List[str]: | |
language_tool_path_list = [ | |
path for path in | |
glob.glob(os.path.join(download_folder, 'LanguageTool*')) | |
if os.path.isdir(path) | |
] | |
return language_tool_path_list | |
def get_language_tool_directory() -> str: | |
"""Get LanguageTool directory.""" | |
download_folder = get_language_tool_download_path() | |
if not os.path.isdir(download_folder): | |
raise NotADirectoryError( | |
"LanguageTool directory path is not a valid directory {}." | |
.format(download_folder) | |
) | |
language_tool_path_list = find_existing_language_tool_downloads( | |
download_folder | |
) | |
if not len(language_tool_path_list): | |
raise FileNotFoundError( | |
'LanguageTool not found in {}.'.format(download_folder) | |
) | |
# Return the latest version found in the directory. | |
return max(language_tool_path_list) | |
def get_server_cmd( | |
port: int = None, config: LanguageToolConfig = None | |
) -> List[str]: | |
java_path, jar_path = get_jar_info() | |
cmd = [java_path, '-cp', jar_path, | |
'org.languagetool.server.HTTPServer'] | |
if port is not None: | |
cmd += ['-p', str(port)] | |
if config is not None: | |
cmd += ['--config', config.path] | |
return cmd | |
def get_jar_info() -> Tuple[str, str]: | |
java_path = which('java') | |
if not java_path: | |
raise JavaError("can't find Java") | |
# Use the env var to the jar directory if it is defined | |
# otherwise look in the download directory | |
jar_dir_name = os.environ.get( | |
LTP_JAR_DIR_PATH_ENV_VAR, | |
get_language_tool_directory() | |
) | |
jar_path = None | |
for jar_name in JAR_NAMES: | |
for jar_path in glob.glob(os.path.join(jar_dir_name, jar_name)): | |
if os.path.isfile(jar_path): | |
break | |
else: | |
jar_path = None | |
if jar_path: | |
break | |
else: | |
raise PathError("can't find languagetool-standalone in {!r}" | |
.format(jar_dir_name)) | |
return java_path, jar_path | |
def get_locale_language(): | |
"""Get the language code for the current locale setting.""" | |
return locale.getlocale()[0] or locale.getdefaultlocale()[0] | |