From 5f949ee35667a6065ab02a3e7ab8c98c9fcdcaed Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:36 -0700 Subject: Imported Upstream version 1.8 --- urllib3/__init__.py | 2 +- urllib3/_collections.py | 117 ++++++++- urllib3/connection.py | 195 ++++++++++++++ urllib3/connectionpool.py | 290 +++++++++------------ urllib3/contrib/pyopenssl.py | 85 +++++- urllib3/exceptions.py | 5 + urllib3/filepost.py | 11 +- urllib3/packages/ssl_match_hostname/__init__.py | 111 +------- .../packages/ssl_match_hostname/_implementation.py | 105 ++++++++ urllib3/poolmanager.py | 11 +- urllib3/request.py | 1 - urllib3/response.py | 29 ++- urllib3/util.py | 54 ++-- 13 files changed, 696 insertions(+), 320 deletions(-) create mode 100644 urllib3/connection.py create mode 100644 urllib3/packages/ssl_match_hostname/_implementation.py (limited to 'urllib3') diff --git a/urllib3/__init__.py b/urllib3/__init__.py index eed7006..086387f 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.7.1' +__version__ = '1.8' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 282b8d5..9cea3a4 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -4,16 +4,26 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import MutableMapping -from threading import RLock +from collections import Mapping, MutableMapping +try: + from threading import RLock +except ImportError: # Platform-specific: No threads available + class RLock: + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + pass + try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict +from .packages.six import itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] _Null = object() @@ -92,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: return self._container.keys() + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 2616. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + + If you want to access the raw headers with their original casing + for debugging purposes you can access the private ``._data`` attribute + which is a normal python ``dict`` that maps the case-insensitive key to a + list of tuples stored as (case-sensitive-original-name, value). Using the + structure from above as our example: + + >>> headers._data + {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], + 'content-length': [('content-length', '7')]} + """ + + def __init__(self, headers=None, **kwargs): + self._data = {} + if headers is None: + headers = {} + self.update(headers, **kwargs) + + def add(self, key, value): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + self._data.setdefault(key.lower(), []).append((key, value)) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + return self[key].split(', ') if key in self else [] + + def copy(self): + h = HTTPHeaderDict() + for key in self._data: + for rawkey, value in self._data[key]: + h.add(rawkey, value) + return h + + def __eq__(self, other): + if not isinstance(other, Mapping): + return False + other = HTTPHeaderDict(other) + return dict((k1, self[k1]) for k1 in self._data) == \ + dict((k2, other[k2]) for k2 in other._data) + + def __getitem__(self, key): + values = self._data[key.lower()] + return ', '.join(value[1] for value in values) + + def __setitem__(self, key, value): + self._data[key.lower()] = [(key, value)] + + def __delitem__(self, key): + del self._data[key.lower()] + + def __len__(self): + return len(self._data) + + def __iter__(self): + for headers in itervalues(self._data): + yield headers[0][0] + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/urllib3/connection.py b/urllib3/connection.py new file mode 100644 index 0000000..662bd2e --- /dev/null +++ b/urllib3/connection.py @@ -0,0 +1,195 @@ +# urllib3/connection.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import sys +import socket +from socket import timeout as SocketTimeout + +try: # Python 3 + from http.client import HTTPConnection as _HTTPConnection, HTTPException +except ImportError: + from httplib import HTTPConnection as _HTTPConnection, HTTPException + +class DummyConnection(object): + "Used to detect a failed ConnectionCls import." + pass + +try: # Compiled with SSL? + ssl = None + HTTPSConnection = DummyConnection + + class BaseSSLError(BaseException): + pass + + try: # Python 3 + from http.client import HTTPSConnection as _HTTPSConnection + except ImportError: + from httplib import HTTPSConnection as _HTTPSConnection + + import ssl + BaseSSLError = ssl.SSLError + +except (ImportError, AttributeError): # Platform-specific: No SSL. + pass + +from .exceptions import ( + ConnectTimeoutError, +) +from .packages.ssl_match_hostname import match_hostname +from .packages import six +from .util import ( + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) + + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + + +class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + """ + + default_port = port_by_scheme['http'] + + # By default, disable Nagle's Algorithm. + tcp_nodelay = 1 + + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + + if sys.version_info < (2, 7): # Python 2.6 and earlier + kw.pop('source_address', None) + self.source_address = None + + _HTTPConnection.__init__(self, *args, **kw) + + def _new_conn(self): + """ Establish a socket connection and set nodelay settings on it + + :return: a new socket connection + """ + extra_args = [] + if self.source_address: # Python 2.7+ + extra_args.append(self.source_address) + + conn = socket.create_connection( + (self.host, self.port), + self.timeout, + *extra_args + ) + conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + return conn + + def _prepare_conn(self, conn): + self.sock = conn + if self._tunnel_host: + # TODO: Fix tunnel so it doesn't depend on self.sock state. + self._tunnel() + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + + +class HTTPSConnection(HTTPConnection): + default_port = port_by_scheme['https'] + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + + HTTPConnection.__init__(self, host, port, + strict=strict, + timeout=timeout, + source_address=source_address) + + self.key_file = key_file + self.cert_file = cert_file + + def connect(self): + conn = self._new_conn() + self._prepare_conn(conn) + self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) + + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + ssl_version = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def connect(self): + # Add certificate verification + try: + sock = socket.create_connection( + address=(self.host, self.port), + timeout=self.timeout, + ) + except SocketTimeout: + raise ConnectTimeoutError( + self, "Connection to %s timed out. (connect timeout=%s)" % + (self.host, self.timeout)) + + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, + self.tcp_nodelay) + + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): + self.sock = sock + # Calls self._set_hostport(), so self.host is + # self._tunnel_host below. + self._tunnel() + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + elif self.assert_hostname is not False: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) + + +if ssl: + # Make a copy for testing. + UnverifiedHTTPSConnection = HTTPSConnection + HTTPSConnection = VerifiedHTTPSConnection diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 691d4e2..6d0dbb1 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -10,13 +10,6 @@ import logging from socket import error as SocketError, timeout as SocketTimeout import socket -try: # Python 3 - from http.client import HTTPConnection, HTTPException - from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: - from httplib import HTTPConnection, HTTPException - from httplib import HTTP_PORT, HTTPS_PORT - try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: @@ -24,121 +17,42 @@ except ImportError: import Queue as _ # Platform-specific: Windows -try: # Compiled with SSL? - HTTPSConnection = object - - class BaseSSLError(BaseException): - pass - - ssl = None - - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass - - from .exceptions import ( ClosedPoolError, + ConnectionError, ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, + TimeoutError, ReadTimeoutError, ProxyError, ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError from .packages import six +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) from .request import RequestMethods from .response import HTTPResponse from .util import ( assert_fingerprint, get_host, is_connection_dropped, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, Timeout, ) + xrange = six.moves.xrange log = logging.getLogger(__name__) _Default = object() -port_by_scheme = { - 'http': HTTP_PORT, - 'https': HTTPS_PORT, -} - - -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ssl_version = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - if self._tunnel_host: - self.sock = sock - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - server_hostname=self.host, - ssl_version=resolved_ssl_version) - - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) - - ## Pool objects class ConnectionPool(object): @@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = 'http' + ConnectionCls = HTTPConnection def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - - return HTTPConnection(host=self.host, port=self.port, - timeout=self.timeout.connect_timeout, - **extra_params) + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + return conn def _get_conn(self, timeout=None): """ @@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: @@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in - # request.py. It also calls makefile (recv) on the socket + # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) except SocketTimeout: raise ConnectTimeoutError( @@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr - if hasattr(conn, 'sock') and \ - read_timeout is not None and \ - read_timeout is not Timeout.DEFAULT_TIMEOUT: + if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - conn.sock.settimeout(read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) # Receive the response from the server try: @@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) + except BaseSSLError as e: + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") + + raise + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error @@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - raise + raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) + # Use explicit default port for comparison when none is given if self.port and not port: - # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None return (scheme, host, port) == (self.scheme, self.host, self.port) @@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param retries: Number of retries to allow before raising a MaxRetryError exception. + If `False`, then retries are disabled and any exception is raised + immediately. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0: + if retries < 0 and retries is not False: raise MaxRetryError(self, url) if release_conn is None: @@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = None + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: # Request a connection from the queue conn = self._get_conn(timeout=pool_timeout) @@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.read()``) except Empty: - # Timed out by queue - raise ReadTimeoutError( - self, url, "Read timed out, no pool connections are available.") + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") - except SocketTimeout: - # Timed out by socket - raise ReadTimeoutError(self, url, "Read timed out.") - - except BaseSSLError as e: - # SSL certificate error - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + except (BaseSSLError, CertificateError) as e: + # Release connection unconditionally because there is no way to + # close it externally in case of exception. + release_conn = True raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) + if not retries: + if isinstance(e, TimeoutError): + # TimeoutError is exempt from MaxRetryError-wrapping. + # FIXME: ... Not sure why. Add a reason here. + raise - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below - err = e + # Wrap unexpected exceptions with the most appropriate + # module-level exception and re-raise. + if isinstance(e, SocketError) and self.proxy: + raise ProxyError('Cannot connect to proxy.', e) + + if retries is False: + raise ConnectionError('Connection failed.', e) - if retries == 0: raise MaxRetryError(self, url, e) + # Keep track of the error for the retry warning. + err = e + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) + log.warning("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, @@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location: + if redirect_location and retries is not False: if response.status == 303: method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = 'https' + ConnectionCls = HTTPSConnection def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, @@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, connection): + def _prepare_conn(self, conn): """ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` and establish the tunnel if proxy is used. """ - if isinstance(connection, VerifiedHTTPSConnection): - connection.set_cert(key_file=self.key_file, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - connection.ssl_version = self.ssl_version + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version if self.proxy is not None: # Python 2.7+ try: - set_tunnel = connection.set_tunnel + set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = connection._set_tunnel + set_tunnel = conn._set_tunnel set_tunnel(self.host, self.port, self.proxy_headers) # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. - connection.connect() + conn.connect() - return connection + return conn def _new_conn(self): """ @@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + # Platform-specific: Python without ssl + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + actual_host = self.host actual_port = self.port if self.proxy is not None: actual_host = self.proxy.host actual_port = self.proxy.port - if not ssl: # Platform-specific: Python compiled without +ssl - if not HTTPSConnection or HTTPSConnection is object: - raise SSLError("Can't connect to HTTPS URL because the SSL " - "module is not available.") - connection_class = HTTPSConnection - else: - connection_class = VerifiedHTTPSConnection - extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - connection = connection_class(host=actual_host, port=actual_port, - timeout=self.timeout.connect_timeout, - **extra_params) - return self._prepare_conn(connection) + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + + return self._prepare_conn(conn) def connection_from_url(url, **kw): diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index d43bcd6..7c513f3 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -1,4 +1,4 @@ -'''SSL with SNI-support for Python 2. +'''SSL with SNI_-support for Python 2. This needs the following packages installed: @@ -18,17 +18,36 @@ your application begins using ``urllib3``, like this:: Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. + +Activating this module also has the positive side effect of disabling SSL/TLS +encryption in Python 2 (see `CRIME attack`_). + +If you want to configure the default list of supported cipher suites, you can +set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. + +Module Variables +---------------- + +:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. + Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: + ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` + +.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication +.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) + ''' from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT -from ndg.httpsclient.subj_alt_name import SubjectAltName +from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder -from socket import _fileobject +from pyasn1.type import univ, constraint +from socket import _fileobject, timeout import ssl +import select from cStringIO import StringIO -from .. import connectionpool +from .. import connection from .. import util __all__ = ['inject_into_urllib3', 'extract_from_urllib3'] @@ -49,25 +68,53 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ + "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ + "!aNULL:!MD5:!DSS" + orig_util_HAS_SNI = util.HAS_SNI -orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket +orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket def inject_into_urllib3(): 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' - connectionpool.ssl_wrap_socket = ssl_wrap_socket + connection.ssl_wrap_socket = ssl_wrap_socket util.HAS_SNI = HAS_SNI def extract_from_urllib3(): 'Undo monkey-patching by :func:`inject_into_urllib3`.' - connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket util.HAS_SNI = orig_util_HAS_SNI +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +class SubjectAltName(BaseSubjectAltName): + '''ASN.1 implementation for subjectAltNames support''' + + # There is no limit to how many SAN certificates a certificate may have, + # however this needs to have some limit so we'll set an arbitrarily high + # limit. + sizeSpec = univ.SequenceOf.sizeSpec + \ + constraint.ValueSizeConstraint(1, 1024) + + ### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. def get_subj_alt_name(peer_cert): # Search through extensions @@ -101,6 +148,13 @@ def get_subj_alt_name(peer_cert): class fileobject(_fileobject): + def _wait_for_sock(self): + rd, wd, ed = select.select([self._sock], [], [], + self._sock.gettimeout()) + if not rd: + raise timeout() + + def read(self, size=-1): # Use max, disallow tiny reads in a loop as they are very inefficient. # We never leave read() with any leftover data from a new recv() call @@ -118,6 +172,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -145,6 +200,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(left) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -196,6 +252,7 @@ class fileobject(_fileobject): break buffers.append(data) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue break return "".join(buffers) @@ -206,6 +263,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -233,7 +291,8 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: - continue + self._wait_for_sock() + continue if not data: break left = size - buf_len @@ -328,6 +387,15 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.load_verify_locations(ca_certs, None) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + else: + ctx.set_default_verify_paths() + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + ctx.set_options(OP_NO_COMPRESSION) + + # Set list of supported ciphersuites. + ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) cnx = OpenSSL.SSL.Connection(ctx, sock) cnx.set_tlsext_host_name(server_hostname) @@ -336,6 +404,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: + select.select([sock], [], []) continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 98ef9ab..b4df831 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -44,6 +44,11 @@ class ProxyError(HTTPError): pass +class ConnectionError(HTTPError): + "Raised when a normal connection fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass diff --git a/urllib3/filepost.py b/urllib3/filepost.py index 4575582..e8b30bd 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -46,16 +46,15 @@ def iter_field_objects(fields): def iter_fields(fields): """ - Iterate over fields. + .. deprecated:: 1.6 - .. deprecated :: + Iterate over fields. - The addition of `~urllib3.fields.RequestField` makes this function - obsolete. Instead, use :func:`iter_field_objects`, which returns - `~urllib3.fields.RequestField` objects, instead. + The addition of :class:`~urllib3.fields.RequestField` makes this function + obsolete. Instead, use :func:`iter_field_objects`, which returns + :class:`~urllib3.fields.RequestField` objects. Supports list of (k, v) tuples and dicts. - """ if isinstance(fields, dict): return ((k, v) for k, v in six.iteritems(fields)) diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py index 2d61ac2..dd59a75 100644 --- a/urllib3/packages/ssl_match_hostname/__init__.py +++ b/urllib3/packages/ssl_match_hostname/__init__.py @@ -1,98 +1,13 @@ -"""The match_hostname() function from Python 3.2, essential when using SSL.""" - -import re - -__version__ = '3.2.2' - -class CertificateError(ValueError): - pass - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - parts = dn.split(r'.') - leftmost = parts[0] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in parts[1:]: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") +try: + # Python 3.2+ + from ssl import CertificateError, match_hostname +except ImportError: + try: + # Backport of the function from a pypi module + from backports.ssl_match_hostname import CertificateError, match_hostname + except ImportError: + # Our vendored copy + from ._implementation import CertificateError, match_hostname + +# Not needed, but documenting what we provide. +__all__ = ('CertificateError', 'match_hostname') diff --git a/urllib3/packages/ssl_match_hostname/_implementation.py b/urllib3/packages/ssl_match_hostname/_implementation.py new file mode 100644 index 0000000..52f4287 --- /dev/null +++ b/urllib3/packages/ssl_match_hostname/_implementation.py @@ -0,0 +1,105 @@ +"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" + +# Note: This file is under the PSF license as the code comes from the python +# stdlib. http://docs.python.org/3/license.html + +import re + +__version__ = '3.4.0.2' + +class CertificateError(ValueError): + pass + + +def _dnsname_match(dn, hostname, max_wildcards=1): + """Matching according to RFC 6125, section 6.4.3 + + http://tools.ietf.org/html/rfc6125#section-6.4.3 + """ + pats = [] + if not dn: + return False + + # Ported from python3-syntax: + # leftmost, *remainder = dn.split(r'.') + parts = dn.split(r'.') + leftmost = parts[0] + remainder = parts[1:] + + wildcards = leftmost.count('*') + if wildcards > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survey of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise CertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) + + # speed up common case w/o wildcards + if not wildcards: + return dn.lower() == hostname.lower() + + # RFC 6125, section 6.4.3, subitem 1. + # The client SHOULD NOT attempt to match a presented identifier in which + # the wildcard character comprises a label other than the left-most label. + if leftmost == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + elif leftmost.startswith('xn--') or hostname.startswith('xn--'): + # RFC 6125, section 6.4.3, subitem 3. + # The client SHOULD NOT attempt to match a presented identifier + # where the wildcard character is embedded within an A-label or + # U-label of an internationalized domain name. + pats.append(re.escape(leftmost)) + else: + # Otherwise, '*' matches any dotless string, e.g. www* + pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) + + # add the remaining fragments, ignore any wildcards + for frag in remainder: + pats.append(re.escape(frag)) + + pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + return pat.match(hostname) + + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index e7f8667..f18ff2b 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -176,7 +176,7 @@ class ProxyManager(PoolManager): Behaves just like :class:`PoolManager`, but sends all requests through the defined proxy, using the CONNECT method for HTTPS URLs. - :param poxy_url: + :param proxy_url: The URL of the proxy to be used. :param proxy_headers: @@ -245,12 +245,11 @@ class ProxyManager(PoolManager): u = parse_url(url) if u.scheme == "http": - # It's too late to set proxy headers on per-request basis for - # tunnelled HTTPS connections, should use - # constructor's proxy_headers instead. + # For proxied HTTPS requests, httplib sets the necessary headers + # on the CONNECT to the proxy. For HTTP, we'll definitely + # need to set 'Host' at the very least. kw['headers'] = self._set_proxy_headers(url, kw.get('headers', self.headers)) - kw['headers'].update(self.proxy_headers) return super(ProxyManager, self).urlopen(method, url, redirect, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index 66a9a0e..2a92cc2 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -45,7 +45,6 @@ class RequestMethods(object): """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) def __init__(self, headers=None): self.headers = headers or {} diff --git a/urllib3/response.py b/urllib3/response.py index 4efff5a..db44182 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -9,6 +9,7 @@ import logging import zlib import io +from ._collections import HTTPHeaderDict from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed @@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = headers or {} + + self.headers = HTTPHeaderDict() + if headers: + self.headers.update(headers) self.status = status self.version = version self.reason = reason @@ -90,6 +94,7 @@ class HTTPResponse(io.IOBase): self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response + self._fp_bytes_read = 0 self._pool = pool self._connection = connection @@ -129,6 +134,14 @@ class HTTPResponse(io.IOBase): if self._fp: return self.read(cache_content=True) + def tell(self): + """ + Obtain the number of bytes pulled over the wire so far. May differ from + the amount of content returned by :meth:``HTTPResponse.read`` if bytes + are encoded on the wire (e.g, compressed). + """ + return self._fp_bytes_read + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -183,6 +196,8 @@ class HTTPResponse(io.IOBase): self._fp.close() flush_decoder = True + self._fp_bytes_read += len(data) + try: if decode_content and self._decoder: data = self._decoder.decompress(data) @@ -238,17 +253,9 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ - # Normalize headers between different versions of Python - headers = {} + headers = HTTPHeaderDict() for k, v in r.getheaders(): - # Python 3: Header keys are returned capitalised - k = k.lower() - - has_value = headers.get(k) - if has_value: # Python 3: Repeating header keys are unmerged. - v = ', '.join([has_value, v]) - - headers[k] = v + headers.add(k, v) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) diff --git a/urllib3/util.py b/urllib3/util.py index 266c9ed..bd26631 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -80,14 +80,13 @@ class Timeout(object): :type read: integer, float, or None :param total: - The maximum amount of time to wait for an HTTP request to connect and - return. This combines the connect and read timeouts into one. In the + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the event that both a connect timeout and a total are specified, or a read timeout and a total are specified, the shorter timeout will be applied. Defaults to None. - :type total: integer, float, or None .. note:: @@ -101,18 +100,23 @@ class Timeout(object): `total`. In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, not - the total amount of time for the request to return a complete response. - As an example, you may want a request to return within 7 seconds or - fail, so you set the ``total`` timeout to 7 seconds. If the server - sends one byte to you every 5 seconds, the request will **not** trigger - time out. This case is admittedly rare. + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. """ #: A sentinel object representing the default timeout value DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - def __init__(self, connect=_Default, read=_Default, total=None): + def __init__(self, total=None, connect=_Default, read=_Default): self._connect = self._validate_timeout(connect, 'connect') self._read = self._validate_timeout(read, 'read') self.total = self._validate_timeout(total, 'total') @@ -372,7 +376,8 @@ def parse_url(url): # Auth if '@' in url: - auth, url = url.split('@', 1) + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) # IPv6 if url and url[0] == '[': @@ -386,10 +391,14 @@ def parse_url(url): if not host: host = _host - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - - port = int(port) + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError("Failed to parse: %s" % url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None elif not host and url: host = url @@ -417,7 +426,7 @@ def get_host(url): def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None): + basic_auth=None, proxy_basic_auth=None): """ Shortcuts for generating request headers. @@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, Colon-separated username:password string for 'authorization: basic ...' auth header. + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + Example: :: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") @@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, headers['authorization'] = 'Basic ' + \ b64encode(six.b(basic_auth)).decode('utf-8') + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + return headers @@ -603,6 +620,11 @@ if SSLContext is not None: # Python 3.2+ """ context = SSLContext(ssl_version) context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + if ca_certs: try: context.load_verify_locations(ca_certs) -- cgit v1.2.3