File size: 5,215 Bytes
7b96a1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from typing import List, Tuple

import glob
import locale
import os
import subprocess
import urllib.parse
import urllib.request

from .config_file import LanguageToolConfig
from .match import Match
from .which import which

JAR_NAMES = [
    'languagetool-server.jar',
    'languagetool-standalone*.jar',  # 2.1
    'LanguageTool.jar',
    'LanguageTool.uno.jar'
]
FAILSAFE_LANGUAGE = 'en'

LTP_PATH_ENV_VAR = "LTP_PATH"  # LanguageTool download path

# Directory containing the LanguageTool jar file:
LTP_JAR_DIR_PATH_ENV_VAR = "LTP_JAR_DIR_PATH"

# https://mail.python.org/pipermail/python-dev/2011-July/112551.html

if os.name == 'nt':
    startupinfo = subprocess.STARTUPINFO()
    startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
else:
    startupinfo = None


class LanguageToolError(Exception):
    pass


class ServerError(LanguageToolError):
    pass


class JavaError(LanguageToolError):
    pass


class PathError(LanguageToolError):
    pass


def parse_url(url_str):
    """ Parses a URL string, and adds 'http' if necessary. """
    if 'http' not in url_str:
        url_str = 'http://' + url_str

    return urllib.parse.urlparse(url_str).geturl()


def _4_bytes_encoded_positions(text: str) -> List[int]:
    """Return a list of positions of 4-byte encoded characters in the text."""
    positions = []
    char_index = 0
    for char in text:
        if len(char.encode('utf-8')) == 4:
            positions.append(char_index)
            # Adding 1 to the index because 4 byte characters are
            # 2 bytes in length in LanguageTool, instead of 1 byte in Python.
            char_index += 1
        char_index += 1
    return positions


def correct(text: str, matches: List[Match]) -> str:
    """Automatically apply suggestions to the text."""
    # Get the positions of 4-byte encoded characters in the text because without 
    # carrying out this step, the offsets of the matches could be incorrect.
    for match in matches:
        match.offset -= sum(1 for i in _4_bytes_encoded_positions(text) if i <= match.offset)
    ltext = list(text)
    matches = [match for match in matches if match.replacements]
    errors = [ltext[match.offset:match.offset + match.errorLength]
              for match in matches]
    correct_offset = 0
    for n, match in enumerate(matches):
        frompos, topos = (correct_offset + match.offset,
                          correct_offset + match.offset + match.errorLength)
        if ltext[frompos:topos] != errors[n]:
            continue
        repl = match.replacements[0]
        ltext[frompos:topos] = list(repl)
        correct_offset += len(repl) - len(errors[n])
    return ''.join(ltext)


def get_language_tool_download_path() -> str:
    # Get download path from environment or use default.
    download_path = os.environ.get(
        LTP_PATH_ENV_VAR,
        os.path.join(os.path.expanduser("~"), ".cache", "language_tool_python")
    )
    return download_path


def find_existing_language_tool_downloads(download_folder: str) -> List[str]:
    language_tool_path_list = [
        path for path in
        glob.glob(os.path.join(download_folder, 'LanguageTool*'))
        if os.path.isdir(path)
    ]
    return language_tool_path_list


def get_language_tool_directory() -> str:
    """Get LanguageTool directory."""
    download_folder = get_language_tool_download_path()
    if not os.path.isdir(download_folder):
        raise NotADirectoryError(
            "LanguageTool directory path is not a valid directory {}."
            .format(download_folder)
        )
    language_tool_path_list = find_existing_language_tool_downloads(
        download_folder
    )

    if not len(language_tool_path_list):
        raise FileNotFoundError(
            'LanguageTool not found in {}.'.format(download_folder)
        )

    # Return the latest version found in the directory.
    return max(language_tool_path_list)


def get_server_cmd(
        port: int = None, config: LanguageToolConfig = None
) -> List[str]:
    java_path, jar_path = get_jar_info()
    cmd = [java_path, '-cp', jar_path,
           'org.languagetool.server.HTTPServer']

    if port is not None:
        cmd += ['-p', str(port)]

    if config is not None:
        cmd += ['--config', config.path]

    return cmd


def get_jar_info() -> Tuple[str, str]:
    java_path = which('java')
    if not java_path:
        raise JavaError("can't find Java")

    # Use the env var to the jar directory if it is defined
    # otherwise look in the download directory
    jar_dir_name = os.environ.get(
        LTP_JAR_DIR_PATH_ENV_VAR,
        get_language_tool_directory()
    )
    jar_path = None
    for jar_name in JAR_NAMES:
        for jar_path in glob.glob(os.path.join(jar_dir_name, jar_name)):
            if os.path.isfile(jar_path):
                break
        else:
            jar_path = None
        if jar_path:
            break
    else:
        raise PathError("can't find languagetool-standalone in {!r}"
                        .format(jar_dir_name))
    return java_path, jar_path


def get_locale_language():
    """Get the language code for the current locale setting."""
    return locale.getlocale()[0] or locale.getdefaultlocale()[0]