File size: 5,151 Bytes
c5db80e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
"""Download files with progress indicators.
"""
import cgi
import logging
import mimetypes
import os
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE
from pip._internal.cli.progress_bars import DownloadProgressProvider
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.models.index import PyPI
from pip._internal.network.cache import is_from_cache
from pip._internal.network.utils import (
HEADERS,
raise_for_status,
response_chunks,
)
from pip._internal.utils.misc import (
format_size,
redact_auth_from_url,
splitext,
)
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
if MYPY_CHECK_RUNNING:
from typing import Iterable, Optional
from pip._vendor.requests.models import Response
from pip._internal.models.link import Link
from pip._internal.network.session import PipSession
logger = logging.getLogger(__name__)
def _get_http_response_size(resp):
# type: (Response) -> Optional[int]
try:
return int(resp.headers['content-length'])
except (ValueError, KeyError, TypeError):
return None
def _prepare_download(
resp, # type: Response
link, # type: Link
progress_bar # type: str
):
# type: (...) -> Iterable[bytes]
total_length = _get_http_response_size(resp)
if link.netloc == PyPI.file_storage_domain:
url = link.show_url
else:
url = link.url_without_fragment
logged_url = redact_auth_from_url(url)
if total_length:
logged_url = '{} ({})'.format(logged_url, format_size(total_length))
if is_from_cache(resp):
logger.info("Using cached %s", logged_url)
else:
logger.info("Downloading %s", logged_url)
if logger.getEffectiveLevel() > logging.INFO:
show_progress = False
elif is_from_cache(resp):
show_progress = False
elif not total_length:
show_progress = True
elif total_length > (40 * 1000):
show_progress = True
else:
show_progress = False
chunks = response_chunks(resp, CONTENT_CHUNK_SIZE)
if not show_progress:
return chunks
return DownloadProgressProvider(
progress_bar, max=total_length
)(chunks)
def sanitize_content_filename(filename):
# type: (str) -> str
"""
Sanitize the "filename" value from a Content-Disposition header.
"""
return os.path.basename(filename)
def parse_content_disposition(content_disposition, default_filename):
# type: (str, str) -> str
"""
Parse the "filename" value from a Content-Disposition header, and
return the default filename if the result is empty.
"""
_type, params = cgi.parse_header(content_disposition)
filename = params.get('filename')
if filename:
# We need to sanitize the filename to prevent directory traversal
# in case the filename contains ".." path parts.
filename = sanitize_content_filename(filename)
return filename or default_filename
def _get_http_response_filename(resp, link):
# type: (Response, Link) -> str
"""Get an ideal filename from the given HTTP response, falling back to
the link filename if not provided.
"""
filename = link.filename # fallback
# Have a look at the Content-Disposition header for a better guess
content_disposition = resp.headers.get('content-disposition')
if content_disposition:
filename = parse_content_disposition(content_disposition, filename)
ext = splitext(filename)[1] # type: Optional[str]
if not ext:
ext = mimetypes.guess_extension(
resp.headers.get('content-type', '')
)
if ext:
filename += ext
if not ext and link.url != resp.url:
ext = os.path.splitext(resp.url)[1]
if ext:
filename += ext
return filename
def _http_get_download(session, link):
# type: (PipSession, Link) -> Response
target_url = link.url.split('#', 1)[0]
resp = session.get(target_url, headers=HEADERS, stream=True)
raise_for_status(resp)
return resp
class Download(object):
def __init__(
self,
response, # type: Response
filename, # type: str
chunks, # type: Iterable[bytes]
):
# type: (...) -> None
self.response = response
self.filename = filename
self.chunks = chunks
class Downloader(object):
def __init__(
self,
session, # type: PipSession
progress_bar, # type: str
):
# type: (...) -> None
self._session = session
self._progress_bar = progress_bar
def __call__(self, link):
# type: (Link) -> Download
try:
resp = _http_get_download(self._session, link)
except NetworkConnectionError as e:
assert e.response is not None
logger.critical(
"HTTP error %s while getting %s", e.response.status_code, link
)
raise
return Download(
resp,
_get_http_response_filename(resp, link),
_prepare_download(resp, link, self._progress_bar),
)
|