diff options
34 files changed, 345 insertions, 197 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 3f836e9..4d90ce2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,28 @@ Changes ======= +1.8.3 (2014-06-23) +++++++++++++++++++ + +* Fix TLS verification when using a proxy in Python 3.4.1. (Issue #385) + +* Add ``disable_cache`` option to ``urllib3.util.make_headers``. (Issue #393) + +* Wrap ``socket.timeout`` exception with + ``urllib3.exceptions.ReadTimeoutError``. (Issue #399) + +* Fixed proxy-related bug where connections were being reused incorrectly. + (Issues #366, #369) + +* Added ``socket_options`` keyword parameter which allows to define + ``setsockopt`` configuration of new sockets. (Issue #397) + +* Removed ``HTTPConnection.tcp_nodelay`` in favor of + ``HTTPConnection.default_socket_options``. (Issue #397) + +* Fixed ``TypeError`` bug in Python 2.6.4. (Issue #411) + + 1.8.2 (2014-04-17) ++++++++++++++++++ diff --git a/MANIFEST.in b/MANIFEST.in index 3f344d1..3c2189a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include README.rst CHANGES.rst LICENSE.txt CONTRIBUTORS.txt test-requirements.txt recursive-include dummyserver *.* +prune *.pyc @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8.2 +Version: 1.8.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -28,7 +28,14 @@ Description: ======= - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at - `Requests <http://python-requests.org/>`_ which is also powered by urllib3. + `Requests <http://python-requests.org/>`_ which is also powered by ``urllib3``. + + You might already be using urllib3! + =================================== + + ``urllib3`` powers `many great Python libraries <https://sourcegraph.com/search?q=package+urllib3>`_, + including ``pip`` and ``requests``. + What's wrong with urllib and urllib2? ===================================== @@ -99,6 +106,7 @@ Description: ======= py27: commands succeeded py32: commands succeeded py33: commands succeeded + py34: commands succeeded Note that code coverage less than 100% is regarded as a failing run. @@ -121,6 +129,28 @@ Description: ======= Changes ======= + 1.8.3 (2014-06-23) + ++++++++++++++++++ + + * Fix TLS verification when using a proxy in Python 3.4.1. (Issue #385) + + * Add ``disable_cache`` option to ``urllib3.util.make_headers``. (Issue #393) + + * Wrap ``socket.timeout`` exception with + ``urllib3.exceptions.ReadTimeoutError``. (Issue #399) + + * Fixed proxy-related bug where connections were being reused incorrectly. + (Issues #366, #369) + + * Added ``socket_options`` keyword parameter which allows to define + ``setsockopt`` configuration of new sockets. (Issue #397) + + * Removed ``HTTPConnection.tcp_nodelay`` in favor of + ``HTTPConnection.default_socket_options``. (Issue #397) + + * Fixed ``TypeError`` bug in Python 2.6.4. (Issue #411) + + 1.8.2 (2014-04-17) ++++++++++++++++++ @@ -20,7 +20,14 @@ Highlights - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at - `Requests <http://python-requests.org/>`_ which is also powered by urllib3. + `Requests <http://python-requests.org/>`_ which is also powered by ``urllib3``. + +You might already be using urllib3! +=================================== + +``urllib3`` powers `many great Python libraries <https://sourcegraph.com/search?q=package+urllib3>`_, +including ``pip`` and ``requests``. + What's wrong with urllib and urllib2? ===================================== @@ -91,6 +98,7 @@ analysis while running test suite. Easiest way to run the tests is thusly the py27: commands succeeded py32: commands succeeded py33: commands succeeded + py34: commands succeeded Note that code coverage less than 100% is regarded as a failing run. diff --git a/debian/changelog b/debian/changelog index 827b7bb..78a0a64 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +python-urllib3 (1.8.3-1) unstable; urgency=medium + + * New upstream release (Closes: #754090) + * debian/patches/01_do-not-use-embedded-python-six.patch + - Refresh + * debian/patches/04_relax_nosetests_options.patch + - Refresh + + -- Daniele Tricoli <eriol@mornie.org> Mon, 07 Jul 2014 16:09:06 +0200 + python-urllib3 (1.8.2-1) unstable; urgency=medium * New upstream release diff --git a/debian/patches/01_do-not-use-embedded-python-six.patch b/debian/patches/01_do-not-use-embedded-python-six.patch index f95a1e1..024180f 100644 --- a/debian/patches/01_do-not-use-embedded-python-six.patch +++ b/debian/patches/01_do-not-use-embedded-python-six.patch @@ -1,7 +1,7 @@ Description: Do not use embedded copy of python-six. Author: Daniele Tricoli <eriol@mornie.org> Forwarded: not-needed -Last-Update: 2014-05-24 +Last-Update: 2014-07-7 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -16,7 +16,7 @@ Last-Update: 2014-05-24 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py -@@ -32,7 +32,7 @@ +@@ -31,7 +31,7 @@ ProxyError, ) from .packages.ssl_match_hostname import CertificateError @@ -27,7 +27,7 @@ Last-Update: 2014-05-24 DummyConnection, --- a/urllib3/filepost.py +++ b/urllib3/filepost.py -@@ -10,8 +10,8 @@ +@@ -9,8 +9,8 @@ from uuid import uuid4 from io import BytesIO @@ -43,7 +43,7 @@ Last-Update: 2014-05-24 @@ -11,7 +11,7 @@ from ._collections import HTTPHeaderDict - from .exceptions import DecodeError + from .exceptions import DecodeError, ReadTimeoutError -from .packages.six import string_types as basestring, binary_type +from six import string_types as basestring, binary_type from .util import is_fp_closed @@ -106,7 +106,7 @@ Last-Update: 2014-05-24 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] --- a/urllib3/connection.py +++ b/urllib3/connection.py -@@ -39,7 +39,7 @@ +@@ -34,7 +34,7 @@ ConnectTimeoutError, ) from .packages.ssl_match_hostname import match_hostname diff --git a/debian/patches/04_relax_nosetests_options.patch b/debian/patches/04_relax_nosetests_options.patch index 29e6910..2f7f8ff 100644 --- a/debian/patches/04_relax_nosetests_options.patch +++ b/debian/patches/04_relax_nosetests_options.patch @@ -3,7 +3,7 @@ Description: Do not use logging-clear-handlers to see all logging output and it will be easier to backport python-urllib3 to Wheezy. Author: Daniele Tricoli <eriol@mornie.org> Forwarded: not-needed -Last-Update: 2013-10-16 +Last-Update: 2014-7-7 --- a/setup.cfg +++ b/setup.cfg @@ -17,4 +17,4 @@ Last-Update: 2013-10-16 +# cover-min-percentage = 100 cover-erase = true - [egg_info] + [flake8] diff --git a/dummyserver/__init__.pyc b/dummyserver/__init__.pyc Binary files differdeleted file mode 100644 index b017ac5..0000000 --- a/dummyserver/__init__.pyc +++ /dev/null diff --git a/dummyserver/__pycache__/__init__.cpython-33.pyc b/dummyserver/__pycache__/__init__.cpython-33.pyc Binary files differdeleted file mode 100644 index d1e84e3..0000000 --- a/dummyserver/__pycache__/__init__.cpython-33.pyc +++ /dev/null diff --git a/dummyserver/__pycache__/handlers.cpython-33.pyc b/dummyserver/__pycache__/handlers.cpython-33.pyc Binary files differdeleted file mode 100644 index e3bab97..0000000 --- a/dummyserver/__pycache__/handlers.cpython-33.pyc +++ /dev/null diff --git a/dummyserver/__pycache__/proxy.cpython-33.pyc b/dummyserver/__pycache__/proxy.cpython-33.pyc Binary files differdeleted file mode 100644 index 4cca456..0000000 --- a/dummyserver/__pycache__/proxy.cpython-33.pyc +++ /dev/null diff --git a/dummyserver/__pycache__/server.cpython-33.pyc b/dummyserver/__pycache__/server.cpython-33.pyc Binary files differdeleted file mode 100644 index 49504c9..0000000 --- a/dummyserver/__pycache__/server.cpython-33.pyc +++ /dev/null diff --git a/dummyserver/__pycache__/testcase.cpython-33.pyc b/dummyserver/__pycache__/testcase.cpython-33.pyc Binary files differdeleted file mode 100644 index 21e3d10..0000000 --- a/dummyserver/__pycache__/testcase.cpython-33.pyc +++ /dev/null diff --git a/dummyserver/handlers.pyc b/dummyserver/handlers.pyc Binary files differdeleted file mode 100644 index ddf66d2..0000000 --- a/dummyserver/handlers.pyc +++ /dev/null diff --git a/dummyserver/proxy.pyc b/dummyserver/proxy.pyc Binary files differdeleted file mode 100644 index a23689e..0000000 --- a/dummyserver/proxy.pyc +++ /dev/null diff --git a/dummyserver/server.pyc b/dummyserver/server.pyc Binary files differdeleted file mode 100644 index c0df815..0000000 --- a/dummyserver/server.pyc +++ /dev/null diff --git a/dummyserver/testcase.pyc b/dummyserver/testcase.pyc Binary files differdeleted file mode 100644 index a1f9bdf..0000000 --- a/dummyserver/testcase.pyc +++ /dev/null @@ -5,6 +5,9 @@ cover-package = urllib3 cover-min-percentage = 100 cover-erase = true +[flake8] +max-line-length = 99 + [egg_info] tag_build = tag_date = 0 diff --git a/test/test_util.py b/test/test_util.py index 5dcaeab..944d90f 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,5 +1,6 @@ import logging import unittest +import ssl from mock import patch @@ -11,6 +12,7 @@ from urllib3.util import ( parse_url, Timeout, Url, + resolve_cert_reqs, ) from urllib3.exceptions import LocationParseError, TimeoutStateError @@ -177,6 +179,10 @@ class TestUtil(unittest.TestCase): make_headers(proxy_basic_auth='foo:bar'), {'proxy-authorization': 'Basic Zm9vOmJhcg=='}) + self.assertEqual( + make_headers(disable_cache=True), + {'cache-control': 'no-cache'}) + def test_split_first(self): test_cases = { ('abcd', 'b'): ('a', 'cd', 'b'), @@ -294,4 +300,11 @@ class TestUtil(unittest.TestCase): current_time.return_value = TIMEOUT_EPOCH + 37 self.assertEqual(timeout.get_connect_duration(), 37) + def test_resolve_cert_reqs(self): + self.assertEqual(resolve_cert_reqs(None), ssl.CERT_NONE) + self.assertEqual(resolve_cert_reqs(ssl.CERT_NONE), ssl.CERT_NONE) + + self.assertEqual(resolve_cert_reqs(ssl.CERT_REQUIRED), ssl.CERT_REQUIRED) + self.assertEqual(resolve_cert_reqs('REQUIRED'), ssl.CERT_REQUIRED) + self.assertEqual(resolve_cert_reqs('CERT_REQUIRED'), ssl.CERT_REQUIRED) diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 0021e34..8e4fc2f 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.8.2 +Version: 1.8.3 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -28,7 +28,14 @@ Description: ======= - Tested on Python 2.6+ and Python 3.2+, 100% unit test coverage. - Small and easy to understand codebase perfect for extending and building upon. For a more comprehensive solution, have a look at - `Requests <http://python-requests.org/>`_ which is also powered by urllib3. + `Requests <http://python-requests.org/>`_ which is also powered by ``urllib3``. + + You might already be using urllib3! + =================================== + + ``urllib3`` powers `many great Python libraries <https://sourcegraph.com/search?q=package+urllib3>`_, + including ``pip`` and ``requests``. + What's wrong with urllib and urllib2? ===================================== @@ -99,6 +106,7 @@ Description: ======= py27: commands succeeded py32: commands succeeded py33: commands succeeded + py34: commands succeeded Note that code coverage less than 100% is regarded as a failing run. @@ -121,6 +129,28 @@ Description: ======= Changes ======= + 1.8.3 (2014-06-23) + ++++++++++++++++++ + + * Fix TLS verification when using a proxy in Python 3.4.1. (Issue #385) + + * Add ``disable_cache`` option to ``urllib3.util.make_headers``. (Issue #393) + + * Wrap ``socket.timeout`` exception with + ``urllib3.exceptions.ReadTimeoutError``. (Issue #399) + + * Fixed proxy-related bug where connections were being reused incorrectly. + (Issues #366, #369) + + * Added ``socket_options`` keyword parameter which allows to define + ``setsockopt`` configuration of new sockets. (Issue #397) + + * Removed ``HTTPConnection.tcp_nodelay`` in favor of + ``HTTPConnection.default_socket_options``. (Issue #397) + + * Fixed ``TypeError`` bug in Python 2.6.4. (Issue #411) + + 1.8.2 (2014-04-17) ++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 72e3351..fb93e5b 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -7,20 +7,10 @@ setup.cfg setup.py test-requirements.txt dummyserver/__init__.py -dummyserver/__init__.pyc dummyserver/handlers.py -dummyserver/handlers.pyc dummyserver/proxy.py -dummyserver/proxy.pyc dummyserver/server.py -dummyserver/server.pyc dummyserver/testcase.py -dummyserver/testcase.pyc -dummyserver/__pycache__/__init__.cpython-33.pyc -dummyserver/__pycache__/handlers.cpython-33.pyc -dummyserver/__pycache__/proxy.cpython-33.pyc -dummyserver/__pycache__/server.cpython-33.pyc -dummyserver/__pycache__/testcase.cpython-33.pyc dummyserver/certs/cacert.key dummyserver/certs/cacert.pem dummyserver/certs/client.csr diff --git a/urllib3/__init__.py b/urllib3/__init__.py index bd237a6..c80d5da 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.8.2' +__version__ = '1.8.3' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 9cea3a4..ccf0d5f 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -116,7 +116,7 @@ class HTTPHeaderDict(MutableMapping): A ``dict`` like container for storing HTTP Headers. Field names are stored and compared case-insensitively in compliance with - RFC 2616. Iteration provides the first case-sensitive key seen for each + RFC 7230. Iteration provides the first case-sensitive key seen for each case-insensitive pair. Using ``__setitem__`` syntax overwrites fields that compare equal diff --git a/urllib3/connection.py b/urllib3/connection.py index de7b925..fbb63ed 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -8,32 +8,27 @@ import sys import socket from socket import timeout as SocketTimeout -try: # Python 3 +try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException except ImportError: from httplib import HTTPConnection as _HTTPConnection, HTTPException + class DummyConnection(object): "Used to detect a failed ConnectionCls import." pass -try: # Compiled with SSL? - ssl = None + +try: # Compiled with SSL? HTTPSConnection = DummyConnection + import ssl + BaseSSLError = ssl.SSLError +except (ImportError, AttributeError): # Platform-specific: No SSL. + ssl = None class BaseSSLError(BaseException): pass - try: # Python 3 - from http.client import HTTPSConnection as _HTTPSConnection - except ImportError: - from httplib import HTTPSConnection as _HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass from .exceptions import ( ConnectTimeoutError, @@ -58,12 +53,34 @@ class HTTPConnection(_HTTPConnection, object): """ Based on httplib.HTTPConnection but provides an extra constructor backwards-compatibility layer between older and newer Pythons. + + Additional keyword parameters are used to configure attributes of the connection. + Accepted parameters include: + + - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` + - ``source_address``: Set the source address for the current connection. + + .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x + + - ``socket_options``: Set specific options on the underlying socket. If not specified, then + defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling + Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. + + For example, if you wish to enable TCP Keep Alive in addition to the defaults, + you might pass:: + + HTTPConnection.default_socket_options + [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + ] + + Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). """ default_port = port_by_scheme['http'] - # By default, disable Nagle's Algorithm. - tcp_nodelay = 1 + #: Disable Nagle's algorithm by default. + #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` + default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] def __init__(self, *args, **kw): if six.PY3: # Python 3 @@ -74,30 +91,54 @@ class HTTPConnection(_HTTPConnection, object): # Pre-set source_address in case we have an older Python like 2.6. self.source_address = kw.get('source_address') + #: The socket options provided by the user. If no options are + #: provided, we use the default options. + self.socket_options = kw.pop('socket_options', self.default_socket_options) + # Superclass also sets self.source_address in Python 2.7+. - _HTTPConnection.__init__(self, *args, **kw) + _HTTPConnection.__init__(self, *args, **kw) def _new_conn(self): """ Establish a socket connection and set nodelay settings on it. - :return: a new socket connection + :return: New socket connection. """ extra_args = [] if self.source_address: # Python 2.7+ extra_args.append(self.source_address) - conn = socket.create_connection( - (self.host, self.port), self.timeout, *extra_args) - conn.setsockopt( - socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + try: + conn = socket.create_connection( + (self.host, self.port), self.timeout, *extra_args) + + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + # Set options on the socket. + self._set_options_on(conn) return conn def _prepare_conn(self, conn): self.sock = conn - if self._tunnel_host: + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + def _set_options_on(self, conn): + # Disable all socket options if the user passes ``socket_options=None`` + if self.socket_options is None: + return + + for opt in self.socket_options: + conn.setsockopt(*opt) def connect(self): conn = self._new_conn() @@ -134,7 +175,6 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None - conn_kw = {} def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -149,37 +189,32 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification - - try: - sock = socket.create_connection( - address=(self.host, self.port), timeout=self.timeout, - **self.conn_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + conn = self._new_conn() resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) - # the _tunnel_host attribute was added in python 2.6.3 (via - # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do - # not have them. + hostname = self.host if getattr(self, '_tunnel_host', None): - self.sock = sock + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + + self.sock = conn # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Mark this connection as not reusable + self.auto_open = 0 + + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, - server_hostname=self.host, + server_hostname=hostname, ssl_version=resolved_ssl_version) if resolved_cert_reqs != ssl.CERT_NONE: @@ -188,7 +223,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) + self.assert_hostname or hostname) if ssl: diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 95a53a7..ab205fa 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -11,7 +11,7 @@ import logging from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 +try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full @@ -21,7 +21,6 @@ except ImportError: from .exceptions import ( ClosedPoolError, ConnectionError, - ConnectTimeoutError, EmptyPoolError, HostChangedError, LocationParseError, @@ -54,8 +53,8 @@ log = logging.getLogger(__name__) _Default = object() -## Pool objects +## Pool objects class ConnectionPool(object): """ Base class for all connection pools, such as @@ -82,6 +81,7 @@ class ConnectionPool(object): # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) + class HTTPConnectionPool(ConnectionPool, RequestMethods): """ Thread-safe connection pool for one host. @@ -133,6 +133,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param _proxy_headers: A dictionary with proxy headers, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" + + :param \**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. """ scheme = 'http' @@ -166,11 +170,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # These are mostly for testing and debugging purposes. self.num_connections = 0 self.num_requests = 0 - - if sys.version_info < (2, 7): # Python 2.6 and older - conn_kw.pop('source_address', None) self.conn_kw = conn_kw + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault('socket_options', []) + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -182,10 +189,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, strict=self.strict, **self.conn_kw) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 return conn def _get_conn(self, timeout=None): @@ -204,7 +207,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: conn = self.pool.get(block=self.block, timeout=timeout) - except AttributeError: # self.pool is None + except AttributeError: # self.pool is None raise ClosedPoolError(self, "Pool is closed.") except Empty: @@ -218,6 +221,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn and is_connection_dropped(conn): log.info("Resetting dropped connection: %s" % self.host) conn.close() + if getattr(conn, 'auto_open', 1) == 0: + # This is a proxied connection that has been mutated by + # httplib._tunnel() and cannot be reused (since it would + # attempt to bypass the proxy) + conn = None return conn or self._new_conn() @@ -237,7 +245,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ try: self.pool.put(conn, block=False) - return # Everything is dandy, done. + return # Everything is dandy, done. except AttributeError: # self.pool is None. pass @@ -283,16 +291,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj = self._get_timeout(timeout) - try: - timeout_obj.start_connect() - conn.timeout = timeout_obj.connect_timeout - # conn.request() calls httplib.*.request, not the method in - # urllib3.request. It also calls makefile (recv) on the socket. - conn.request(method, url, **httplib_request_kw) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, timeout_obj.connect_timeout)) + timeout_obj.start_connect() + conn.timeout = timeout_obj.connect_timeout + # conn.request() calls httplib.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + conn.request(method, url, **httplib_request_kw) # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout @@ -310,14 +313,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) - else: # None or a value + else: # None or a value conn.sock.settimeout(read_timeout) # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7+, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) - except TypeError: # Python 2.6 and older + except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() except SocketTimeout: raise ReadTimeoutError( @@ -333,7 +336,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise - except SocketError as e: # Platform-specific: Python 2 + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: @@ -364,7 +367,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() except Empty: - pass # Done. + pass # Done. def is_same_host(self, url): """ @@ -605,11 +608,9 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=None, assert_fingerprint=None, **conn_kw): - if sys.version_info < (2, 7): # Python 2.6 or older - conn_kw.pop('source_address', None) - HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers, **conn_kw) + block, headers, _proxy, _proxy_headers, + **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -617,7 +618,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -633,7 +633,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ @@ -641,7 +640,12 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - set_tunnel(self.host, self.port, self.proxy_headers) + + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) + # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. conn.connect() @@ -667,18 +671,9 @@ class HTTPSConnectionPool(HTTPConnectionPool): actual_host = self.proxy.host actual_port = self.proxy.port - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - extra_params.update(self.conn_kw) - conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, - **extra_params) - if self.proxy is not None: - # Enable Nagle's algorithm for proxies, to avoid packet - # fragmentation. - conn.tcp_nodelay = 0 + strict=self.strict, **self.conn_kw) return self._prepare_conn(conn) diff --git a/urllib3/fields.py b/urllib3/fields.py index ed01765..dceafb4 100644 --- a/urllib3/fields.py +++ b/urllib3/fields.py @@ -15,7 +15,7 @@ def guess_content_type(filename, default='application/octet-stream'): Guess the "Content-Type" of a file. :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetimes`. + The filename to guess the "Content-Type" of using :mod:`mimetypes`. :param default: If no "Content-Type" can be guessed, default to `default`. """ @@ -78,9 +78,10 @@ class RequestField(object): """ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - Supports constructing :class:`~urllib3.fields.RequestField` from parameter - of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) - tuple where the MIME type is optional. For example: :: + Supports constructing :class:`~urllib3.fields.RequestField` from + parameter of key/value strings AND key/filetuple. A filetuple is a + (filename, data, MIME type) tuple where the MIME type is optional. + For example: :: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), @@ -125,8 +126,8 @@ class RequestField(object): 'Content-Disposition' fields. :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as - `k1="v1"; k2="v2"; ...`. + A sequence of (k, v) typles or a :class:`dict` of (k, v) to format + as `k1="v1"; k2="v2"; ...`. """ parts = [] iterable = header_parts @@ -158,7 +159,8 @@ class RequestField(object): lines.append('\r\n') return '\r\n'.join(lines) - def make_multipart(self, content_disposition=None, content_type=None, content_location=None): + def make_multipart(self, content_disposition=None, content_type=None, + content_location=None): """ Makes this request field into a multipart request field. @@ -172,6 +174,10 @@ class RequestField(object): """ self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))]) + self.headers['Content-Disposition'] += '; '.join([ + '', self._render_parts( + (('name', self._name), ('filename', self._filename)) + ) + ]) self.headers['Content-Type'] = content_type self.headers['Content-Location'] = content_location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e8b30bd..c3db30c 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -5,7 +5,6 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php import codecs -import mimetypes from uuid import uuid4 from io import BytesIO @@ -38,10 +37,10 @@ def iter_field_objects(fields): i = iter(fields) for field in i: - if isinstance(field, RequestField): - yield field - else: - yield RequestField.from_tuples(*field) + if isinstance(field, RequestField): + yield field + else: + yield RequestField.from_tuples(*field) def iter_fields(fields): diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index f18ff2b..3945f5d 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -161,7 +161,7 @@ class PoolManager(RequestMethods): # Support relative URLs for redirecting. redirect_location = urljoin(url, redirect_location) - # RFC 2616, Section 10.3.4 + # RFC 7231, Section 6.4.4 if response.status == 303: method = 'GET' diff --git a/urllib3/request.py b/urllib3/request.py index 2a92cc2..7a46f1b 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -26,8 +26,8 @@ class RequestMethods(object): Specifically, - :meth:`.request_encode_url` is for sending requests whose fields are encoded - in the URL (such as GET, HEAD, DELETE). + :meth:`.request_encode_url` is for sending requests whose fields are + encoded in the URL (such as GET, HEAD, DELETE). :meth:`.request_encode_body` is for sending requests whose fields are encoded in the *body* of the request using multipart or www-form-urlencoded @@ -51,7 +51,7 @@ class RequestMethods(object): def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, - **kw): # Abstract + **kw): # Abstract raise NotImplemented("Classes extending RequestMethods must implement " "their own ``urlopen`` method.") @@ -61,8 +61,8 @@ class RequestMethods(object): ``fields`` based on the ``method`` used. This is a convenience method that requires the least amount of manual - effort. It can be used in most situations, while still having the option - to drop down to more specific methods when necessary, such as + effort. It can be used in most situations, while still having the + option to drop down to more specific methods when necessary, such as :meth:`request_encode_url`, :meth:`request_encode_body`, or even the lowest level :meth:`urlopen`. """ @@ -70,12 +70,12 @@ class RequestMethods(object): if method in self._encode_url_methods: return self.request_encode_url(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) else: return self.request_encode_body(method, url, fields=fields, - headers=headers, - **urlopen_kw) + headers=headers, + **urlopen_kw) def request_encode_url(self, method, url, fields=None, **urlopen_kw): """ @@ -94,14 +94,14 @@ class RequestMethods(object): the body. This is useful for request methods like POST, PUT, PATCH, etc. When ``encode_multipart=True`` (default), then - :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the - payload with the appropriate content type. Otherwise + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode + the payload with the appropriate content type. Otherwise :meth:`urllib.urlencode` is used with the 'application/x-www-form-urlencoded' content type. Multipart encoding must be used when posting files, and it's reasonably - safe to use it in other times too. However, it may break request signing, - such as with OAuth. + safe to use it in other times too. However, it may break request + signing, such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where @@ -119,17 +119,17 @@ class RequestMethods(object): When uploading a file, providing a filename (the first parameter of the tuple) is optional but recommended to best mimick behavior of browsers. - Note that if ``headers`` are supplied, the 'Content-Type' header will be - overwritten because it depends on the dynamic random boundary string + Note that if ``headers`` are supplied, the 'Content-Type' header will + be overwritten because it depends on the dynamic random boundary string which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ if encode_multipart: - body, content_type = encode_multipart_formdata(fields or {}, - boundary=multipart_boundary) + body, content_type = encode_multipart_formdata( + fields or {}, boundary=multipart_boundary) else: body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') + 'application/x-www-form-urlencoded') if headers is None: headers = self.headers diff --git a/urllib3/response.py b/urllib3/response.py index db44182..13ffba4 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -5,19 +5,16 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php -import logging import zlib import io +from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import DecodeError +from .exceptions import DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed -log = logging.getLogger(__name__) - - class DeflateDecoder(object): def __init__(self): @@ -163,8 +160,8 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 2616 - # Section 3.5 + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: @@ -178,23 +175,29 @@ class HTTPResponse(io.IOBase): flush_decoder = False try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do not - # properly close the connection in all cases. There is no harm - # in redundantly calling close. - self._fp.close() + try: + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() + flush_decoder = True + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') self._fp_bytes_read += len(data) @@ -204,8 +207,7 @@ class HTTPResponse(io.IOBase): except (IOError, zlib.error) as e: raise DecodeError( "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, - e) + "failed to decode it." % content_encoding, e) if flush_decoder and decode_content and self._decoder: buf = self._decoder.decompress(binary_type()) @@ -242,7 +244,6 @@ class HTTPResponse(io.IOBase): if data: yield data - @classmethod def from_httplib(ResponseCls, r, **response_kw): """ @@ -297,7 +298,7 @@ class HTTPResponse(io.IOBase): elif hasattr(self._fp, "fileno"): return self._fp.fileno() else: - raise IOError("The file-like object this HTTPResponse is wrapped " + raise IOError("The file-like object this HTTPResponse is wrapped " "around has no file descriptor") def flush(self): diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 8deeab5..c67ef04 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -8,6 +8,7 @@ except ImportError: # `poll` doesn't exist on OSX and other platforms except ImportError: # `select` doesn't exist on AppEngine. select = False + def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -22,7 +23,7 @@ def is_connection_dropped(conn): # Platform-specific if sock is False: # Platform-specific: AppEngine return False if sock is None: # Connection already closed (such as by httplib). - return False + return True if not poll: if not select: # Platform-specific: AppEngine @@ -40,6 +41,3 @@ def is_connection_dropped(conn): # Platform-specific if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True - - - diff --git a/urllib3/util/request.py b/urllib3/util/request.py index d48d651..bfd7a98 100644 --- a/urllib3/util/request.py +++ b/urllib3/util/request.py @@ -7,7 +7,7 @@ ACCEPT_ENCODING = 'gzip,deflate' def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): + basic_auth=None, proxy_basic_auth=None, disable_cache=None): """ Shortcuts for generating request headers. @@ -29,8 +29,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, auth header. :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. + Colon-separated username:password string for + 'proxy-authorization: basic ...' auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. Example: :: @@ -63,6 +66,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['proxy-authorization'] = 'Basic ' + \ b64encode(six.b(proxy_basic_auth)).decode('utf-8') - return headers - + if disable_cache: + headers['cache-control'] = 'no-cache' + return headers diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py index 4f947cb..aaadc12 100644 --- a/urllib3/util/timeout.py +++ b/urllib3/util/timeout.py @@ -94,17 +94,16 @@ class Timeout(object): return '%s(connect=%r, read=%r, total=%r)' % ( type(self).__name__, self._connect, self._read, self.total) - @classmethod def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid + """ Check that a timeout attribute is valid. :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. """ if value is _Default: return cls.DEFAULT_TIMEOUT @@ -123,7 +122,7 @@ class Timeout(object): raise ValueError("Attempted to set %s timeout to %s, but the " "timeout cannot be set to a value less " "than 0." % (name, value)) - except TypeError: # Python 3 + except TypeError: # Python 3 raise ValueError("Timeout value %s was %s, but it must be an " "int or float." % (name, value)) @@ -135,12 +134,12 @@ class Timeout(object): The timeout value used by httplib.py sets the same timeout on the connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. + object that sets the individual timeouts to the ``timeout`` value + passed to this function. - :param timeout: The legacy timeout value + :param timeout: The legacy timeout value. :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object + :return: Timeout object :rtype: :class:`Timeout` """ return Timeout(read=timeout, connect=timeout) @@ -174,7 +173,7 @@ class Timeout(object): def get_connect_duration(self): """ Gets the time elapsed since the call to :meth:`start_connect`. - :return: the elapsed time + :return: Elapsed time. :rtype: float :raises urllib3.exceptions.TimeoutStateError: if you attempt to get duration for a timer that hasn't been started. @@ -191,7 +190,7 @@ class Timeout(object): This will be a positive float or integer, the value None (never timeout), or the default system timeout. - :return: the connect timeout + :return: Connect timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None """ if self.total is None: @@ -214,7 +213,7 @@ class Timeout(object): established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be raised. - :return: the value to use for the read timeout + :return: Value to use for the read timeout. :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` has not yet been called on this object. @@ -223,7 +222,7 @@ class Timeout(object): self.total is not self.DEFAULT_TIMEOUT and self._read is not None and self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. + # In case the connect timeout has not yet been established. if self._start_connect is None: return self._read return max(0, min(self.total - self.get_connect_duration(), diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 362d216..122108b 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,16 +2,20 @@ from collections import namedtuple from ..exceptions import LocationParseError +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + +class Url(namedtuple('Url', url_attrs)): """ Datastructure for representing an HTTP URL. Used as a return value for :func:`parse_url`. """ slots = () - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) @property def hostname(self): |