#!/usr/bin/env python # -*- coding: utf-8 -*- """Download latest LanguageTool distribution.""" import logging import os import re import requests import subprocess import sys import tempfile import tqdm from typing import Optional import zipfile from distutils.spawn import find_executable from urllib.parse import urljoin from .utils import ( find_existing_language_tool_downloads, get_language_tool_download_path, LTP_JAR_DIR_PATH_ENV_VAR ) # Create logger for this file. logging.basicConfig(format='%(message)s') logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Get download host from environment or default. BASE_URL = os.environ.get('LTP_DOWNLOAD_HOST', 'https://www.languagetool.org/download/') FILENAME = 'LanguageTool-{version}.zip' LTP_DOWNLOAD_VERSION = '6.4' JAVA_VERSION_REGEX = re.compile( r'^(?:java|openjdk) version "(?P\d+)(|\.(?P\d+)\.[^"]+)"', re.MULTILINE) # Updated for later versions of java JAVA_VERSION_REGEX_UPDATED = re.compile( r'^(?:java|openjdk) [version ]?(?P\d+)\.(?P\d+)', re.MULTILINE) def parse_java_version(version_text): """Return Java version (major1, major2). >>> parse_java_version('''java version "1.6.0_65" ... Java(TM) SE Runtime Environment (build 1.6.0_65-b14-462-11M4609) ... Java HotSpot(TM) 64-Bit Server VM (build 20.65-b04-462, mixed mode)) ... ''') (1, 6) >>> parse_java_version(''' ... openjdk version "1.8.0_60" ... OpenJDK Runtime Environment (build 1.8.0_60-b27) ... OpenJDK 64-Bit Server VM (build 25.60-b23, mixed mode)) ... ''') (1, 8) """ match = ( re.search(JAVA_VERSION_REGEX, version_text) or re.search(JAVA_VERSION_REGEX_UPDATED, version_text) ) if not match: raise SystemExit( 'Could not parse Java version from """{}""".'.format(version_text)) major1 = int(match.group('major1')) major2 = int(match.group('major2')) if match.group('major2') else 0 return (major1, major2) def confirm_java_compatibility(): """ Confirms Java major version >= 8. """ java_path = find_executable('java') if not java_path: raise ModuleNotFoundError( 'No java install detected. ' 'Please install java to use language-tool-python.' ) output = subprocess.check_output([java_path, '-version'], stderr=subprocess.STDOUT, universal_newlines=True) major_version, minor_version = parse_java_version(output) # Some installs of java show the version number like `14.0.1` # and others show `1.14.0.1` # (with a leading 1). We want to support both, # as long as the major version is >= 8. # (See softwareengineering.stackexchange.com/questions/175075/why-is-java-version-1-x-referred-to-as-java-x) if major_version == 1 and minor_version >= 8: return True elif major_version >= 8: return True else: raise SystemError('Detected java {}.{}. LanguageTool requires Java >= 8.'.format(major_version, minor_version)) def get_common_prefix(z): """Get common directory in a zip file if any.""" name_list = z.namelist() if name_list and all(n.startswith(name_list[0]) for n in name_list[1:]): return name_list[0] return None def http_get(url, out_file, proxies=None): """ Get contents of a URL and save to a file. """ req = requests.get(url, stream=True, proxies=proxies) content_length = req.headers.get('Content-Length') total = int(content_length) if content_length is not None else None if req.status_code == 403: # Not found on AWS raise Exception('Could not find at URL {}.'.format(url)) progress = tqdm.tqdm(unit="B", unit_scale=True, total=total, desc=f'Downloading LanguageTool {LTP_DOWNLOAD_VERSION}') for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks progress.update(len(chunk)) out_file.write(chunk) progress.close() def unzip_file(temp_file, directory_to_extract_to): """ Unzips a .zip file to folder path. """ logger.info( 'Unzipping {} to {}.'.format(temp_file.name, directory_to_extract_to) ) with zipfile.ZipFile(temp_file.name, 'r') as zip_ref: zip_ref.extractall(directory_to_extract_to) def download_zip(url, directory): """ Downloads and unzips zip file from `url` to `directory`. """ # Download file. downloaded_file = tempfile.NamedTemporaryFile(suffix='.zip', delete=False) http_get(url, downloaded_file) # Close the file so we can extract it. downloaded_file.close() # Extract zip file to path. unzip_file(downloaded_file, directory) # Remove the temporary file. os.remove(downloaded_file.name) # Tell the user the download path. logger.info('Downloaded {} to {}.'.format(url, directory)) def download_lt(language_tool_version: Optional[str] = LTP_DOWNLOAD_VERSION): confirm_java_compatibility() download_folder = get_language_tool_download_path() # Use the env var to the jar directory if it is defined # otherwise look in the download directory if os.environ.get(LTP_JAR_DIR_PATH_ENV_VAR): return # Make download path, if it doesn't exist. os.makedirs(download_folder, exist_ok=True) assert os.path.isdir(download_folder) old_path_list = find_existing_language_tool_downloads(download_folder) if language_tool_version: version = language_tool_version filename = FILENAME.format(version=version) language_tool_download_url = urljoin(BASE_URL, filename) dirname, _ = os.path.splitext(filename) extract_path = os.path.join(download_folder, dirname) if extract_path in old_path_list: return download_zip(language_tool_download_url, download_folder) if __name__ == '__main__': sys.exit(download_lt())