File size: 6,031 Bytes
7b96a1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Download latest LanguageTool distribution."""

import logging
import os
import re
import requests
import subprocess
import sys
import tempfile
import tqdm
from typing import Optional
import zipfile

from distutils.spawn import find_executable
from urllib.parse import urljoin
from .utils import (
    find_existing_language_tool_downloads,
    get_language_tool_download_path,
    LTP_JAR_DIR_PATH_ENV_VAR
)

# Create logger for this file.
logging.basicConfig(format='%(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


# Get download host from environment or default.
BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/')
FILENAME = 'LanguageTool-{version}.zip'

LTP_DOWNLOAD_VERSION = '6.4'

JAVA_VERSION_REGEX = re.compile(
    r'^(?:java|openjdk) version "(?P<major1>\d+)(|\.(?P<major2>\d+)\.[^"]+)"',
    re.MULTILINE)

# Updated for later versions of java
JAVA_VERSION_REGEX_UPDATED = re.compile(
    r'^(?:java|openjdk) [version ]?(?P<major1>\d+)\.(?P<major2>\d+)',
    re.MULTILINE)


def parse_java_version(version_text):
    """Return Java version (major1, major2).

    >>> parse_java_version('''java version "1.6.0_65"
    ... Java(TM) SE Runtime Environment (build 1.6.0_65-b14-462-11M4609)
    ... Java HotSpot(TM) 64-Bit Server VM (build 20.65-b04-462, mixed mode))
    ... ''')
    (1, 6)

    >>> parse_java_version('''
    ... openjdk version "1.8.0_60"
    ... OpenJDK Runtime Environment (build 1.8.0_60-b27)
    ... OpenJDK 64-Bit Server VM (build 25.60-b23, mixed mode))
    ... ''')
    (1, 8)

    """
    match = (
        re.search(JAVA_VERSION_REGEX, version_text)
        or re.search(JAVA_VERSION_REGEX_UPDATED, version_text)
    )
    if not match:
        raise SystemExit(
            'Could not parse Java version from """{}""".'.format(version_text))
    major1 = int(match.group('major1'))
    major2 = int(match.group('major2')) if match.group('major2') else 0
    return (major1, major2)


def confirm_java_compatibility():
    """ Confirms Java major version >= 8. """
    java_path = find_executable('java')
    if not java_path:
        raise ModuleNotFoundError(
            'No java install detected. '
            'Please install java to use language-tool-python.'
        )

    output = subprocess.check_output([java_path, '-version'],
                                     stderr=subprocess.STDOUT,
                                     universal_newlines=True)

    major_version, minor_version = parse_java_version(output)
    # Some installs of java show the version number like `14.0.1`
    # and others show `1.14.0.1`
    # (with a leading 1). We want to support both,
    # as long as the major version is >= 8.
    # (See softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x)
    if major_version == 1 and minor_version >= 8:
        return True
    elif major_version >= 8:
        return True
    else:
        raise SystemError('Detected java {}.{}. LanguageTool requires Java >= 8.'.format(major_version, minor_version))


def get_common_prefix(z):
    """Get common directory in a zip file if any."""
    name_list = z.namelist()
    if name_list and all(n.startswith(name_list[0]) for n in name_list[1:]):
        return name_list[0]
    return None


def http_get(url, out_file, proxies=None):
    """ Get contents of a URL and save to a file.
    """
    req = requests.get(url, stream=True, proxies=proxies)
    content_length = req.headers.get('Content-Length')
    total = int(content_length) if content_length is not None else None
    if req.status_code == 403:  # Not found on AWS
        raise Exception('Could not find at URL {}.'.format(url))
    progress = tqdm.tqdm(unit="B", unit_scale=True, total=total,
                         desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}')
    for chunk in req.iter_content(chunk_size=1024):
        if chunk:  # filter out keep-alive new chunks
            progress.update(len(chunk))
            out_file.write(chunk)
    progress.close()


def unzip_file(temp_file, directory_to_extract_to):
    """ Unzips a .zip file to folder path. """
    logger.info(
        'Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to)
    )
    with zipfile.ZipFile(temp_file.name, 'r') as zip_ref:
        zip_ref.extractall(directory_to_extract_to)


def download_zip(url, directory):
    """ Downloads and unzips zip file from `url` to `directory`. """
    # Download file.
    downloaded_file = tempfile.NamedTemporaryFile(suffix='.zip', delete=False)
    http_get(url, downloaded_file)
    # Close the file so we can extract it.
    downloaded_file.close()
    # Extract zip file to path.
    unzip_file(downloaded_file, directory)
    # Remove the temporary file.
    os.remove(downloaded_file.name)
    # Tell the user the download path.
    logger.info('Downloaded {} to {}.'.format(url, directory))


def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION):
    confirm_java_compatibility()

    download_folder = get_language_tool_download_path()

    # Use the env var to the jar directory if it is defined
    # otherwise look in the download directory
    if os.environ.get(LTP_JAR_DIR_PATH_ENV_VAR):
        return

    # Make download path, if it doesn't exist.
    os.makedirs(download_folder, exist_ok=True)

    assert os.path.isdir(download_folder)
    old_path_list = find_existing_language_tool_downloads(download_folder)

    if language_tool_version:
        version = language_tool_version
        filename = FILENAME.format(version=version)
        language_tool_download_url = urljoin(BASE_URL, filename)
        dirname, _ = os.path.splitext(filename)
        extract_path = os.path.join(download_folder, dirname)

        if extract_path in old_path_list:
            return
        download_zip(language_tool_download_url, download_folder)


if __name__ == '__main__':
    sys.exit(download_lt())