diff options
Diffstat (limited to 'urllib3')
-rw-r--r-- | urllib3/__init__.py | 2 | ||||
-rw-r--r-- | urllib3/_collections.py | 2 | ||||
-rw-r--r-- | urllib3/connection.py | 119 | ||||
-rw-r--r-- | urllib3/connectionpool.py | 83 | ||||
-rw-r--r-- | urllib3/fields.py | 22 | ||||
-rw-r--r-- | urllib3/filepost.py | 9 | ||||
-rw-r--r-- | urllib3/poolmanager.py | 2 | ||||
-rw-r--r-- | urllib3/request.py | 36 | ||||
-rw-r--r-- | urllib3/response.py | 55 | ||||
-rw-r--r-- | urllib3/util/connection.py | 6 | ||||
-rw-r--r-- | urllib3/util/request.py | 14 | ||||
-rw-r--r-- | urllib3/util/timeout.py | 31 | ||||
-rw-r--r-- | urllib3/util/url.py | 10 |
13 files changed, 216 insertions, 175 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py index bd237a6..c80d5da 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.8.2' +__version__ = '1.8.3' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 9cea3a4..ccf0d5f 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -116,7 +116,7 @@ class HTTPHeaderDict(MutableMapping): A ``dict`` like container for storing HTTP Headers. Field names are stored and compared case-insensitively in compliance with - RFC 2616. Iteration provides the first case-sensitive key seen for each + RFC 7230. Iteration provides the first case-sensitive key seen for each case-insensitive pair. Using ``__setitem__`` syntax overwrites fields that compare equal diff --git a/urllib3/connection.py b/urllib3/connection.py index de7b925..fbb63ed 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -8,32 +8,27 @@ import sys import socket from socket import timeout as SocketTimeout -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: from httplib import HTTPConnection as _HTTPConnection, HTTPException + class DummyConnection(object): "Used to detect a failed ConnectionCls import." pass -try: # Compiled with SSL? - ssl = None + +try: # Compiled with SSL? HTTPSConnection = DummyConnection + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None class BaseSSLError(BaseException): pass - try: # Python 3 - from http.client import HTTPSConnection as _HTTPSConnection - except ImportError: - from httplib import HTTPSConnection as _HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass from .exceptions import ( ConnectTimeoutError, @@ -58,12 +53,34 @@ class HTTPConnection(_HTTPConnection, object): """ Based on httplib.HTTPConnection but provides an extra constructor backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ default_port = port_by_scheme['http'] - # By default, disable Nagle's Algorithm. - tcp_nodelay = 1 + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] def __init__(self, *args, **kw): if six.PY3: # Python 3 @@ -74,30 +91,54 @@ class HTTPConnection(_HTTPConnection, object): # Pre-set source_address in case we have an older Python like 2.6. self.source_address = kw.get('source_address') + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + # Superclass also sets self.source_address in Python 2.7+. - _HTTPConnection.__init__(self, *args, **kw) + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): """ Establish a socket connection and set nodelay settings on it. - :return: a new socket connection + :return: New socket connection. """ extra_args = [] if self.source_address: # Python 2.7+ extra_args.append(self.source_address) - conn = socket.create_connection( - (self.host, self.port), self.timeout, *extra_args) - conn.setsockopt( - socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + try: + conn = socket.create_connection( + (self.host, self.port), self.timeout, *extra_args) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + # Set options on the socket. + self._set_options_on(conn) return conn def _prepare_conn(self, conn): self.sock = conn - if self._tunnel_host: + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def _set_options_on(self, conn): + # Disable all socket options if the user passes ``socket_options=None`` + if self.socket_options is None: + return + + for opt in self.socket_options: + conn.setsockopt(*opt) def connect(self): conn = self._new_conn() @@ -134,7 +175,6 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None - conn_kw = {} def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -149,37 +189,32 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - - try: - sock = socket.create_connection( - address=(self.host, self.port), timeout=self.timeout, - **self.conn_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + conn = self._new_conn() resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) - # the _tunnel_host attribute was added in python 2.6.3 (via - # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do - # not have them. + hostname = self.host if getattr(self, '_tunnel_host', None): - self.sock = sock + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, - server_hostname=self.host, + server_hostname=hostname, ssl_version=resolved_ssl_version) if resolved_cert_reqs != ssl.CERT_NONE: @@ -188,7 +223,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) + self.assert_hostname or hostname) if ssl: diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 95a53a7..ab205fa 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -11,7 +11,7 @@ import logging from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full @@ -21,7 +21,6 @@ except ImportError: from .exceptions import ( ClosedPoolError, ConnectionError, - ConnectTimeoutError, EmptyPoolError, HostChangedError, LocationParseError, @@ -54,8 +53,8 @@ log = logging.getLogger(__name__) _Default = object() -## Pool objects +## Pool objects class ConnectionPool(object): """ Base class for all connection pools, such as @@ -82,6 +81,7 @@ class ConnectionPool(object): # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Thread-safe connection pool for one host. @@ -133,6 +133,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy_headers: A dictionary with proxy headers, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. """ scheme = 'http' @@ -166,11 +170,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 - - if sys.version_info < (2, 7): # Python 2.6 and older - conn_kw.pop('source_address', None) self.conn_kw = conn_kw + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -182,10 +189,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, strict=self.strict, **self.conn_kw) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 return conn def _get_conn(self, timeout=None): @@ -204,7 +207,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - except AttributeError: # self.pool is None + except AttributeError: # self.pool is None raise ClosedPoolError(self, "Pool is closed.") except Empty: @@ -218,6 +221,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None return conn or self._new_conn() @@ -237,7 +245,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ try: self.pool.put(conn, block=False) - return # Everything is dandy, done. + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass @@ -283,16 +291,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj = self._get_timeout(timeout) - try: - timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout - # conn.request() calls httplib.*.request, not the method in - # urllib3.request. It also calls makefile (recv) on the socket. - conn.request(method, url, **httplib_request_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, timeout_obj.connect_timeout)) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout @@ -310,14 +313,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value + else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older + except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except SocketTimeout: raise ReadTimeoutError( @@ -333,7 +336,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise - except SocketError as e: # Platform-specific: Python 2 + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: @@ -364,7 +367,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() except Empty: - pass # Done. + pass # Done. def is_same_host(self, url): """ @@ -605,11 +608,9 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=None, assert_fingerprint=None, **conn_kw): - if sys.version_info < (2, 7): # Python 2.6 or older - conn_kw.pop('source_address', None) - HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers, **conn_kw) + block, headers, _proxy, _proxy_headers, + **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -617,7 +618,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -633,7 +633,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ @@ -641,7 +640,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - set_tunnel(self.host, self.port, self.proxy_headers) + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. conn.connect() @@ -667,18 +671,9 @@ class HTTPSConnectionPool(HTTPConnectionPool): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - extra_params.update(self.conn_kw) - conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - **extra_params) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 + strict=self.strict, **self.conn_kw) return self._prepare_conn(conn) diff --git a/urllib3/fields.py b/urllib3/fields.py index ed01765..dceafb4 100644 --- a/urllib3/fields.py +++ b/urllib3/fields.py @@ -15,7 +15,7 @@ def guess_content_type(filename, default='application/octet-stream'): Guess the "Content-Type" of a file. :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetimes`. + The filename to guess the "Content-Type" of using :mod:`mimetypes`. :param default: If no "Content-Type" can be guessed, default to `default`. """ @@ -78,9 +78,10 @@ class RequestField(object): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - Supports constructing :class:`~urllib3.fields.RequestField` from parameter - of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) - tuple where the MIME type is optional. For example: :: + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example: :: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), @@ -125,8 +126,8 @@ class RequestField(object): 'Content-Disposition' fields. :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as - `k1="v1"; k2="v2"; ...`. + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. """ parts = [] iterable = header_parts @@ -158,7 +159,8 @@ class RequestField(object): lines.append('\r\n') return '\r\n'.join(lines) - def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): """ Makes this request field into a multipart request field. @@ -172,6 +174,10 @@ class RequestField(object): """ self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) self.headers['Content-Type'] = content_type self.headers['Content-Location'] = content_location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e8b30bd..c3db30c 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -5,7 +5,6 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php import codecs -import mimetypes from uuid import uuid4 from io import BytesIO @@ -38,10 +37,10 @@ def iter_field_objects(fields): i = iter(fields) for field in i: - if isinstance(field, RequestField): - yield field - else: - yield RequestField.from_tuples(*field) + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index f18ff2b..3945f5d 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -161,7 +161,7 @@ class PoolManager(RequestMethods): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 2616, Section 10.3.4 + # RFC 7231, Section 6.4.4 if response.status == 303: method = 'GET' diff --git a/urllib3/request.py b/urllib3/request.py index 2a92cc2..7a46f1b 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -26,8 +26,8 @@ class RequestMethods(object): Specifically, - :meth:`.request_encode_url` is for sending requests whose fields are encoded - in the URL (such as GET, HEAD, DELETE). + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are encoded in the *body* of the request using multipart or www-form-urlencoded @@ -51,7 +51,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): # Abstract + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -61,8 +61,8 @@ class RequestMethods(object): ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual - effort. It can be used in most situations, while still having the option - to drop down to more specific methods when necessary, such as + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ @@ -70,12 +70,12 @@ class RequestMethods(object): if method in self._encode_url_methods: return self.request_encode_url(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) else: return self.request_encode_body(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) def request_encode_url(self, method, url, fields=None, **urlopen_kw): """ @@ -94,14 +94,14 @@ class RequestMethods(object): the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the - payload with the appropriate content type. Otherwise + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise :meth:`urllib.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably - safe to use it in other times too. However, it may break request signing, - such as with OAuth. + safe to use it in other times too. However, it may break request + signing, such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where @@ -119,17 +119,17 @@ class RequestMethods(object): When uploading a file, providing a filename (the first parameter of the tuple) is optional but recommended to best mimick behavior of browsers. - Note that if ``headers`` are supplied, the 'Content-Type' header will be - overwritten because it depends on the dynamic random boundary string + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ if encode_multipart: - body, content_type = encode_multipart_formdata(fields or {}, - boundary=multipart_boundary) + body, content_type = encode_multipart_formdata( + fields or {}, boundary=multipart_boundary) else: body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') + 'application/x-www-form-urlencoded') if headers is None: headers = self.headers diff --git a/urllib3/response.py b/urllib3/response.py index db44182..13ffba4 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -5,19 +5,16 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging import zlib import io +from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import DecodeError +from .exceptions import DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed -log = logging.getLogger(__name__) - - class DeflateDecoder(object): def __init__(self): @@ -163,8 +160,8 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 2616 - # Section 3.5 + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: @@ -178,23 +175,29 @@ class HTTPResponse(io.IOBase): flush_decoder = False try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do not - # properly close the connection in all cases. There is no harm - # in redundantly calling close. - self._fp.close() + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') self._fp_bytes_read += len(data) @@ -204,8 +207,7 @@ class HTTPResponse(io.IOBase): except (IOError, zlib.error) as e: raise DecodeError( "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e) + "failed to decode it." % content_encoding, e) if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) @@ -242,7 +244,6 @@ class HTTPResponse(io.IOBase): if data: yield data - @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -297,7 +298,7 @@ class HTTPResponse(io.IOBase): elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError("The file-like object this HTTPResponse is wrapped " + raise IOError("The file-like object this HTTPResponse is wrapped " "around has no file descriptor") def flush(self): diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 8deeab5..c67ef04 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -8,6 +8,7 @@ except ImportError: # `poll` doesn't exist on OSX and other platforms except ImportError: # `select` doesn't exist on AppEngine. select = False + def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -22,7 +23,7 @@ def is_connection_dropped(conn): # Platform-specific if sock is False: # Platform-specific: AppEngine return False if sock is None: # Connection already closed (such as by httplib). - return False + return True if not poll: if not select: # Platform-specific: AppEngine @@ -40,6 +41,3 @@ def is_connection_dropped(conn): # Platform-specific if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True - - - diff --git a/urllib3/util/request.py b/urllib3/util/request.py index d48d651..bfd7a98 100644 --- a/urllib3/util/request.py +++ b/urllib3/util/request.py @@ -7,7 +7,7 @@ ACCEPT_ENCODING = 'gzip,deflate' def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): + basic_auth=None, proxy_basic_auth=None, disable_cache=None): """ Shortcuts for generating request headers. @@ -29,8 +29,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, auth header. :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. + Colon-separated username:password string for + 'proxy-authorization: basic ...' auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. Example: :: @@ -63,6 +66,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['proxy-authorization'] = 'Basic ' + \ b64encode(six.b(proxy_basic_auth)).decode('utf-8') - return headers - + if disable_cache: + headers['cache-control'] = 'no-cache' + return headers diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py index 4f947cb..aaadc12 100644 --- a/urllib3/util/timeout.py +++ b/urllib3/util/timeout.py @@ -94,17 +94,16 @@ class Timeout(object): return '%s(connect=%r, read=%r, total=%r)' % ( type(self).__name__, self._connect, self._read, self.total) - @classmethod def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid + """ Check that a timeout attribute is valid. :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. """ if value is _Default: return cls.DEFAULT_TIMEOUT @@ -123,7 +122,7 @@ class Timeout(object): raise ValueError("Attempted to set %s timeout to %s, but the " "timeout cannot be set to a value less " "than 0." % (name, value)) - except TypeError: # Python 3 + except TypeError: # Python 3 raise ValueError("Timeout value %s was %s, but it must be an " "int or float." % (name, value)) @@ -135,12 +134,12 @@ class Timeout(object): The timeout value used by httplib.py sets the same timeout on the connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. + object that sets the individual timeouts to the ``timeout`` value + passed to this function. - :param timeout: The legacy timeout value + :param timeout: The legacy timeout value. :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object + :return: Timeout object :rtype: :class:`Timeout` """ return Timeout(read=timeout, connect=timeout) @@ -174,7 +173,7 @@ class Timeout(object): def get_connect_duration(self): """ Gets the time elapsed since the call to :meth:`start_connect`. - :return: the elapsed time + :return: Elapsed time. :rtype: float :raises urllib3.exceptions.TimeoutStateError: if you attempt to get duration for a timer that hasn't been started. @@ -191,7 +190,7 @@ class Timeout(object): This will be a positive float or integer, the value None (never timeout), or the default system timeout. - :return: the connect timeout + :return: Connect timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None """ if self.total is None: @@ -214,7 +213,7 @@ class Timeout(object): established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be raised. - :return: the value to use for the read timeout + :return: Value to use for the read timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` has not yet been called on this object. @@ -223,7 +222,7 @@ class Timeout(object): self.total is not self.DEFAULT_TIMEOUT and self._read is not None and self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. + # In case the connect timeout has not yet been established. if self._start_connect is None: return self._read return max(0, min(self.total - self.get_connect_duration(), diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 362d216..122108b 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,16 +2,20 @@ from collections import namedtuple from ..exceptions import LocationParseError +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + +class Url(namedtuple('Url', url_attrs)): """ Datastructure for representing an HTTP URL. Used as a return value for :func:`parse_url`. """ slots = () - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) @property def hostname(self): |