from typing import List, Tuple import glob import locale import os import subprocess import urllib.parse import urllib.request from .config_file import LanguageToolConfig from .match import Match from .which import which JAR_NAMES = [ 'languagetool-server.jar', 'languagetool-standalone*.jar', # 2.1 'LanguageTool.jar', 'LanguageTool.uno.jar' ] FAILSAFE_LANGUAGE = 'en' LTP_PATH_ENV_VAR = "LTP_PATH" # LanguageTool download path # Directory containing the LanguageTool jar file: LTP_JAR_DIR_PATH_ENV_VAR = "LTP_JAR_DIR_PATH" # https://mail.python.org/pipermail/python-dev/2011-July/112551.html if os.name == 'nt': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW else: startupinfo = None class LanguageToolError(Exception): pass class ServerError(LanguageToolError): pass class JavaError(LanguageToolError): pass class PathError(LanguageToolError): pass def parse_url(url_str): """ Parses a URL string, and adds 'http' if necessary. """ if 'http' not in url_str: url_str = 'http://' + url_str return urllib.parse.urlparse(url_str).geturl() def _4_bytes_encoded_positions(text: str) -> List[int]: """Return a list of positions of 4-byte encoded characters in the text.""" positions = [] char_index = 0 for char in text: if len(char.encode('utf-8')) == 4: positions.append(char_index) # Adding 1 to the index because 4 byte characters are # 2 bytes in length in LanguageTool, instead of 1 byte in Python. char_index += 1 char_index += 1 return positions def correct(text: str, matches: List[Match]) -> str: """Automatically apply suggestions to the text.""" # Get the positions of 4-byte encoded characters in the text because without # carrying out this step, the offsets of the matches could be incorrect. for match in matches: match.offset -= sum(1 for i in _4_bytes_encoded_positions(text) if i <= match.offset) ltext = list(text) matches = [match for match in matches if match.replacements] errors = [ltext[match.offset:match.offset + match.errorLength] for match in matches] correct_offset = 0 for n, match in enumerate(matches): frompos, topos = (correct_offset + match.offset, correct_offset + match.offset + match.errorLength) if ltext[frompos:topos] != errors[n]: continue repl = match.replacements[0] ltext[frompos:topos] = list(repl) correct_offset += len(repl) - len(errors[n]) return ''.join(ltext) def get_language_tool_download_path() -> str: # Get download path from environment or use default. download_path = os.environ.get( LTP_PATH_ENV_VAR, os.path.join(os.path.expanduser("~"), ".cache", "language_tool_python") ) return download_path def find_existing_language_tool_downloads(download_folder: str) -> List[str]: language_tool_path_list = [ path for path in glob.glob(os.path.join(download_folder, 'LanguageTool*')) if os.path.isdir(path) ] return language_tool_path_list def get_language_tool_directory() -> str: """Get LanguageTool directory.""" download_folder = get_language_tool_download_path() if not os.path.isdir(download_folder): raise NotADirectoryError( "LanguageTool directory path is not a valid directory {}." .format(download_folder) ) language_tool_path_list = find_existing_language_tool_downloads( download_folder ) if not len(language_tool_path_list): raise FileNotFoundError( 'LanguageTool not found in {}.'.format(download_folder) ) # Return the latest version found in the directory. return max(language_tool_path_list) def get_server_cmd( port: int = None, config: LanguageToolConfig = None ) -> List[str]: java_path, jar_path = get_jar_info() cmd = [java_path, '-cp', jar_path, 'org.languagetool.server.HTTPServer'] if port is not None: cmd += ['-p', str(port)] if config is not None: cmd += ['--config', config.path] return cmd def get_jar_info() -> Tuple[str, str]: java_path = which('java') if not java_path: raise JavaError("can't find Java") # Use the env var to the jar directory if it is defined # otherwise look in the download directory jar_dir_name = os.environ.get( LTP_JAR_DIR_PATH_ENV_VAR, get_language_tool_directory() ) jar_path = None for jar_name in JAR_NAMES: for jar_path in glob.glob(os.path.join(jar_dir_name, jar_name)): if os.path.isfile(jar_path): break else: jar_path = None if jar_path: break else: raise PathError("can't find languagetool-standalone in {!r}" .format(jar_dir_name)) return java_path, jar_path def get_locale_language(): """Get the language code for the current locale setting.""" return locale.getlocale()[0] or locale.getdefaultlocale()[0]