|
from typing import ( |
|
Any, |
|
AsyncIterable, |
|
AsyncIterator, |
|
Iterable, |
|
Iterator, |
|
List, |
|
Mapping, |
|
MutableMapping, |
|
Optional, |
|
Sequence, |
|
Tuple, |
|
Union, |
|
) |
|
from urllib.parse import urlparse |
|
|
|
|
|
|
|
|
|
HeadersAsSequence = Sequence[Tuple[Union[bytes, str], Union[bytes, str]]] |
|
HeadersAsMapping = Mapping[Union[bytes, str], Union[bytes, str]] |
|
HeaderTypes = Union[HeadersAsSequence, HeadersAsMapping, None] |
|
|
|
Extensions = MutableMapping[str, Any] |
|
|
|
|
|
def enforce_bytes(value: Union[bytes, str], *, name: str) -> bytes: |
|
""" |
|
Any arguments that are ultimately represented as bytes can be specified |
|
either as bytes or as strings. |
|
|
|
However we enforce that any string arguments must only contain characters in |
|
the plain ASCII range. chr(0)...chr(127). If you need to use characters |
|
outside that range then be precise, and use a byte-wise argument. |
|
""" |
|
if isinstance(value, str): |
|
try: |
|
return value.encode("ascii") |
|
except UnicodeEncodeError: |
|
raise TypeError(f"{name} strings may not include unicode characters.") |
|
elif isinstance(value, bytes): |
|
return value |
|
|
|
seen_type = type(value).__name__ |
|
raise TypeError(f"{name} must be bytes or str, but got {seen_type}.") |
|
|
|
|
|
def enforce_url(value: Union["URL", bytes, str], *, name: str) -> "URL": |
|
""" |
|
Type check for URL parameters. |
|
""" |
|
if isinstance(value, (bytes, str)): |
|
return URL(value) |
|
elif isinstance(value, URL): |
|
return value |
|
|
|
seen_type = type(value).__name__ |
|
raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.") |
|
|
|
|
|
def enforce_headers( |
|
value: Union[HeadersAsMapping, HeadersAsSequence, None] = None, *, name: str |
|
) -> List[Tuple[bytes, bytes]]: |
|
""" |
|
Convienence function that ensure all items in request or response headers |
|
are either bytes or strings in the plain ASCII range. |
|
""" |
|
if value is None: |
|
return [] |
|
elif isinstance(value, Mapping): |
|
return [ |
|
( |
|
enforce_bytes(k, name="header name"), |
|
enforce_bytes(v, name="header value"), |
|
) |
|
for k, v in value.items() |
|
] |
|
elif isinstance(value, Sequence): |
|
return [ |
|
( |
|
enforce_bytes(k, name="header name"), |
|
enforce_bytes(v, name="header value"), |
|
) |
|
for k, v in value |
|
] |
|
|
|
seen_type = type(value).__name__ |
|
raise TypeError( |
|
f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}." |
|
) |
|
|
|
|
|
def enforce_stream( |
|
value: Union[bytes, Iterable[bytes], AsyncIterable[bytes], None], *, name: str |
|
) -> Union[Iterable[bytes], AsyncIterable[bytes]]: |
|
if value is None: |
|
return ByteStream(b"") |
|
elif isinstance(value, bytes): |
|
return ByteStream(value) |
|
return value |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_PORTS = { |
|
b"ftp": 21, |
|
b"http": 80, |
|
b"https": 443, |
|
b"ws": 80, |
|
b"wss": 443, |
|
} |
|
|
|
|
|
def include_request_headers( |
|
headers: List[Tuple[bytes, bytes]], |
|
*, |
|
url: "URL", |
|
content: Union[None, bytes, Iterable[bytes], AsyncIterable[bytes]], |
|
) -> List[Tuple[bytes, bytes]]: |
|
headers_set = set(k.lower() for k, v in headers) |
|
|
|
if b"host" not in headers_set: |
|
default_port = DEFAULT_PORTS.get(url.scheme) |
|
if url.port is None or url.port == default_port: |
|
header_value = url.host |
|
else: |
|
header_value = b"%b:%d" % (url.host, url.port) |
|
headers = [(b"Host", header_value)] + headers |
|
|
|
if ( |
|
content is not None |
|
and b"content-length" not in headers_set |
|
and b"transfer-encoding" not in headers_set |
|
): |
|
if isinstance(content, bytes): |
|
content_length = str(len(content)).encode("ascii") |
|
headers += [(b"Content-Length", content_length)] |
|
else: |
|
headers += [(b"Transfer-Encoding", b"chunked")] |
|
|
|
return headers |
|
|
|
|
|
|
|
|
|
|
|
class ByteStream: |
|
""" |
|
A container for non-streaming content, and that supports both sync and async |
|
stream iteration. |
|
""" |
|
|
|
def __init__(self, content: bytes) -> None: |
|
self._content = content |
|
|
|
def __iter__(self) -> Iterator[bytes]: |
|
yield self._content |
|
|
|
async def __aiter__(self) -> AsyncIterator[bytes]: |
|
yield self._content |
|
|
|
def __repr__(self) -> str: |
|
return f"<{self.__class__.__name__} [{len(self._content)} bytes]>" |
|
|
|
|
|
class Origin: |
|
def __init__(self, scheme: bytes, host: bytes, port: int) -> None: |
|
self.scheme = scheme |
|
self.host = host |
|
self.port = port |
|
|
|
def __eq__(self, other: Any) -> bool: |
|
return ( |
|
isinstance(other, Origin) |
|
and self.scheme == other.scheme |
|
and self.host == other.host |
|
and self.port == other.port |
|
) |
|
|
|
def __str__(self) -> str: |
|
scheme = self.scheme.decode("ascii") |
|
host = self.host.decode("ascii") |
|
port = str(self.port) |
|
return f"{scheme}://{host}:{port}" |
|
|
|
|
|
class URL: |
|
""" |
|
Represents the URL against which an HTTP request may be made. |
|
|
|
The URL may either be specified as a plain string, for convienence: |
|
|
|
```python |
|
url = httpcore.URL("https://www.example.com/") |
|
``` |
|
|
|
Or be constructed with explicitily pre-parsed components: |
|
|
|
```python |
|
url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/') |
|
``` |
|
|
|
Using this second more explicit style allows integrations that are using |
|
`httpcore` to pass through URLs that have already been parsed in order to use |
|
libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures |
|
that URL parsing is treated identically at both the networking level and at any |
|
higher layers of abstraction. |
|
|
|
The four components are important here, as they allow the URL to be precisely |
|
specified in a pre-parsed format. They also allow certain types of request to |
|
be created that could not otherwise be expressed. |
|
|
|
For example, an HTTP request to `http://www.example.com/` forwarded via a proxy |
|
at `http://localhost:8080`... |
|
|
|
```python |
|
# Constructs an HTTP request with a complete URL as the target: |
|
# GET https://www.example.com/ HTTP/1.1 |
|
url = httpcore.URL( |
|
scheme=b'http', |
|
host=b'localhost', |
|
port=8080, |
|
target=b'https://www.example.com/' |
|
) |
|
request = httpcore.Request( |
|
method="GET", |
|
url=url |
|
) |
|
``` |
|
|
|
Another example is constructing an `OPTIONS *` request... |
|
|
|
```python |
|
# Constructs an 'OPTIONS *' HTTP request: |
|
# OPTIONS * HTTP/1.1 |
|
url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*') |
|
request = httpcore.Request(method="OPTIONS", url=url) |
|
``` |
|
|
|
This kind of request is not possible to formulate with a URL string, |
|
because the `/` delimiter is always used to demark the target from the |
|
host/port portion of the URL. |
|
|
|
For convenience, string-like arguments may be specified either as strings or |
|
as bytes. However, once a request is being issue over-the-wire, the URL |
|
components are always ultimately required to be a bytewise representation. |
|
|
|
In order to avoid any ambiguity over character encodings, when strings are used |
|
as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`. |
|
If you require a bytewise representation that is outside this range you must |
|
handle the character encoding directly, and pass a bytes instance. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
url: Union[bytes, str] = "", |
|
*, |
|
scheme: Union[bytes, str] = b"", |
|
host: Union[bytes, str] = b"", |
|
port: Optional[int] = None, |
|
target: Union[bytes, str] = b"", |
|
) -> None: |
|
""" |
|
Parameters: |
|
url: The complete URL as a string or bytes. |
|
scheme: The URL scheme as a string or bytes. |
|
Typically either `"http"` or `"https"`. |
|
host: The URL host as a string or bytes. Such as `"www.example.com"`. |
|
port: The port to connect to. Either an integer or `None`. |
|
target: The target of the HTTP request. Such as `"/items?search=red"`. |
|
""" |
|
if url: |
|
parsed = urlparse(enforce_bytes(url, name="url")) |
|
self.scheme = parsed.scheme |
|
self.host = parsed.hostname or b"" |
|
self.port = parsed.port |
|
self.target = (parsed.path or b"/") + ( |
|
b"?" + parsed.query if parsed.query else b"" |
|
) |
|
else: |
|
self.scheme = enforce_bytes(scheme, name="scheme") |
|
self.host = enforce_bytes(host, name="host") |
|
self.port = port |
|
self.target = enforce_bytes(target, name="target") |
|
|
|
@property |
|
def origin(self) -> Origin: |
|
default_port = { |
|
b"http": 80, |
|
b"https": 443, |
|
b"ws": 80, |
|
b"wss": 443, |
|
b"socks5": 1080, |
|
}[self.scheme] |
|
return Origin( |
|
scheme=self.scheme, host=self.host, port=self.port or default_port |
|
) |
|
|
|
def __eq__(self, other: Any) -> bool: |
|
return ( |
|
isinstance(other, URL) |
|
and other.scheme == self.scheme |
|
and other.host == self.host |
|
and other.port == self.port |
|
and other.target == self.target |
|
) |
|
|
|
def __bytes__(self) -> bytes: |
|
if self.port is None: |
|
return b"%b://%b%b" % (self.scheme, self.host, self.target) |
|
return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target) |
|
|
|
def __repr__(self) -> str: |
|
return ( |
|
f"{self.__class__.__name__}(scheme={self.scheme!r}, " |
|
f"host={self.host!r}, port={self.port!r}, target={self.target!r})" |
|
) |
|
|
|
|
|
class Request: |
|
""" |
|
An HTTP request. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
method: Union[bytes, str], |
|
url: Union[URL, bytes, str], |
|
*, |
|
headers: HeaderTypes = None, |
|
content: Union[bytes, Iterable[bytes], AsyncIterable[bytes], None] = None, |
|
extensions: Optional[Extensions] = None, |
|
) -> None: |
|
""" |
|
Parameters: |
|
method: The HTTP request method, either as a string or bytes. |
|
For example: `GET`. |
|
url: The request URL, either as a `URL` instance, or as a string or bytes. |
|
For example: `"https://www.example.com".` |
|
headers: The HTTP request headers. |
|
content: The content of the request body. |
|
extensions: A dictionary of optional extra information included on |
|
the request. Possible keys include `"timeout"`, and `"trace"`. |
|
""" |
|
self.method: bytes = enforce_bytes(method, name="method") |
|
self.url: URL = enforce_url(url, name="url") |
|
self.headers: List[Tuple[bytes, bytes]] = enforce_headers( |
|
headers, name="headers" |
|
) |
|
self.stream: Union[Iterable[bytes], AsyncIterable[bytes]] = enforce_stream( |
|
content, name="content" |
|
) |
|
self.extensions = {} if extensions is None else extensions |
|
|
|
if "target" in self.extensions: |
|
self.url = URL( |
|
scheme=self.url.scheme, |
|
host=self.url.host, |
|
port=self.url.port, |
|
target=self.extensions["target"], |
|
) |
|
|
|
def __repr__(self) -> str: |
|
return f"<{self.__class__.__name__} [{self.method!r}]>" |
|
|
|
|
|
class Response: |
|
""" |
|
An HTTP response. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
status: int, |
|
*, |
|
headers: HeaderTypes = None, |
|
content: Union[bytes, Iterable[bytes], AsyncIterable[bytes], None] = None, |
|
extensions: Optional[Extensions] = None, |
|
) -> None: |
|
""" |
|
Parameters: |
|
status: The HTTP status code of the response. For example `200`. |
|
headers: The HTTP response headers. |
|
content: The content of the response body. |
|
extensions: A dictionary of optional extra information included on |
|
the responseself.Possible keys include `"http_version"`, |
|
`"reason_phrase"`, and `"network_stream"`. |
|
""" |
|
self.status: int = status |
|
self.headers: List[Tuple[bytes, bytes]] = enforce_headers( |
|
headers, name="headers" |
|
) |
|
self.stream: Union[Iterable[bytes], AsyncIterable[bytes]] = enforce_stream( |
|
content, name="content" |
|
) |
|
self.extensions = {} if extensions is None else extensions |
|
|
|
self._stream_consumed = False |
|
|
|
@property |
|
def content(self) -> bytes: |
|
if not hasattr(self, "_content"): |
|
if isinstance(self.stream, Iterable): |
|
raise RuntimeError( |
|
"Attempted to access 'response.content' on a streaming response. " |
|
"Call 'response.read()' first." |
|
) |
|
else: |
|
raise RuntimeError( |
|
"Attempted to access 'response.content' on a streaming response. " |
|
"Call 'await response.aread()' first." |
|
) |
|
return self._content |
|
|
|
def __repr__(self) -> str: |
|
return f"<{self.__class__.__name__} [{self.status}]>" |
|
|
|
|
|
|
|
def read(self) -> bytes: |
|
if not isinstance(self.stream, Iterable): |
|
raise RuntimeError( |
|
"Attempted to read an asynchronous response using 'response.read()'. " |
|
"You should use 'await response.aread()' instead." |
|
) |
|
if not hasattr(self, "_content"): |
|
self._content = b"".join([part for part in self.iter_stream()]) |
|
return self._content |
|
|
|
def iter_stream(self) -> Iterator[bytes]: |
|
if not isinstance(self.stream, Iterable): |
|
raise RuntimeError( |
|
"Attempted to stream an asynchronous response using 'for ... in " |
|
"response.iter_stream()'. " |
|
"You should use 'async for ... in response.aiter_stream()' instead." |
|
) |
|
if self._stream_consumed: |
|
raise RuntimeError( |
|
"Attempted to call 'for ... in response.iter_stream()' more than once." |
|
) |
|
self._stream_consumed = True |
|
for chunk in self.stream: |
|
yield chunk |
|
|
|
def close(self) -> None: |
|
if not isinstance(self.stream, Iterable): |
|
raise RuntimeError( |
|
"Attempted to close an asynchronous response using 'response.close()'. " |
|
"You should use 'await response.aclose()' instead." |
|
) |
|
if hasattr(self.stream, "close"): |
|
self.stream.close() |
|
|
|
|
|
|
|
async def aread(self) -> bytes: |
|
if not isinstance(self.stream, AsyncIterable): |
|
raise RuntimeError( |
|
"Attempted to read an synchronous response using " |
|
"'await response.aread()'. " |
|
"You should use 'response.read()' instead." |
|
) |
|
if not hasattr(self, "_content"): |
|
self._content = b"".join([part async for part in self.aiter_stream()]) |
|
return self._content |
|
|
|
async def aiter_stream(self) -> AsyncIterator[bytes]: |
|
if not isinstance(self.stream, AsyncIterable): |
|
raise RuntimeError( |
|
"Attempted to stream an synchronous response using 'async for ... in " |
|
"response.aiter_stream()'. " |
|
"You should use 'for ... in response.iter_stream()' instead." |
|
) |
|
if self._stream_consumed: |
|
raise RuntimeError( |
|
"Attempted to call 'async for ... in response.aiter_stream()' " |
|
"more than once." |
|
) |
|
self._stream_consumed = True |
|
async for chunk in self.stream: |
|
yield chunk |
|
|
|
async def aclose(self) -> None: |
|
if not isinstance(self.stream, AsyncIterable): |
|
raise RuntimeError( |
|
"Attempted to close a synchronous response using " |
|
"'await response.aclose()'. " |
|
"You should use 'response.close()' instead." |
|
) |
|
if hasattr(self.stream, "aclose"): |
|
await self.stream.aclose() |
|
|