aboutsummaryrefslogtreecommitdiff
path: root/urllib3
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3')
-rw-r--r--urllib3/__init__.py2
-rw-r--r--urllib3/_collections.py117
-rw-r--r--urllib3/connection.py195
-rw-r--r--urllib3/connectionpool.py290
-rw-r--r--urllib3/contrib/pyopenssl.py85
-rw-r--r--urllib3/exceptions.py5
-rw-r--r--urllib3/filepost.py11
-rw-r--r--urllib3/packages/ssl_match_hostname/__init__.py111
-rw-r--r--urllib3/packages/ssl_match_hostname/_implementation.py105
-rw-r--r--urllib3/poolmanager.py11
-rw-r--r--urllib3/request.py1
-rw-r--r--urllib3/response.py29
-rw-r--r--urllib3/util.py54
13 files changed, 696 insertions, 320 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py
index eed7006..086387f 100644
--- a/urllib3/__init__.py
+++ b/urllib3/__init__.py
@@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using.
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
-__version__ = '1.7.1'
+__version__ = '1.8'
from .connectionpool import (
diff --git a/urllib3/_collections.py b/urllib3/_collections.py
index 282b8d5..9cea3a4 100644
--- a/urllib3/_collections.py
+++ b/urllib3/_collections.py
@@ -4,16 +4,26 @@
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
-from collections import MutableMapping
-from threading import RLock
+from collections import Mapping, MutableMapping
+try:
+ from threading import RLock
+except ImportError: # Platform-specific: No threads available
+ class RLock:
+ def __enter__(self):
+ pass
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ pass
+
try: # Python 2.7+
from collections import OrderedDict
except ImportError:
from .packages.ordered_dict import OrderedDict
+from .packages.six import itervalues
-__all__ = ['RecentlyUsedContainer']
+__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
_Null = object()
@@ -92,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping):
def keys(self):
with self.lock:
return self._container.keys()
+
+
+class HTTPHeaderDict(MutableMapping):
+ """
+ :param headers:
+ An iterable of field-value pairs. Must not contain multiple field names
+ when compared case-insensitively.
+
+ :param kwargs:
+ Additional field-value pairs to pass in to ``dict.update``.
+
+ A ``dict`` like container for storing HTTP Headers.
+
+ Field names are stored and compared case-insensitively in compliance with
+ RFC 2616. Iteration provides the first case-sensitive key seen for each
+ case-insensitive pair.
+
+ Using ``__setitem__`` syntax overwrites fields that compare equal
+ case-insensitively in order to maintain ``dict``'s api. For fields that
+ compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
+ in a loop.
+
+ If multiple fields that are equal case-insensitively are passed to the
+ constructor or ``.update``, the behavior is undefined and some will be
+ lost.
+
+ >>> headers = HTTPHeaderDict()
+ >>> headers.add('Set-Cookie', 'foo=bar')
+ >>> headers.add('set-cookie', 'baz=quxx')
+ >>> headers['content-length'] = '7'
+ >>> headers['SET-cookie']
+ 'foo=bar, baz=quxx'
+ >>> headers['Content-Length']
+ '7'
+
+ If you want to access the raw headers with their original casing
+ for debugging purposes you can access the private ``._data`` attribute
+ which is a normal python ``dict`` that maps the case-insensitive key to a
+ list of tuples stored as (case-sensitive-original-name, value). Using the
+ structure from above as our example:
+
+ >>> headers._data
+ {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')],
+ 'content-length': [('content-length', '7')]}
+ """
+
+ def __init__(self, headers=None, **kwargs):
+ self._data = {}
+ if headers is None:
+ headers = {}
+ self.update(headers, **kwargs)
+
+ def add(self, key, value):
+ """Adds a (name, value) pair, doesn't overwrite the value if it already
+ exists.
+
+ >>> headers = HTTPHeaderDict(foo='bar')
+ >>> headers.add('Foo', 'baz')
+ >>> headers['foo']
+ 'bar, baz'
+ """
+ self._data.setdefault(key.lower(), []).append((key, value))
+
+ def getlist(self, key):
+ """Returns a list of all the values for the named field. Returns an
+ empty list if the key doesn't exist."""
+ return self[key].split(', ') if key in self else []
+
+ def copy(self):
+ h = HTTPHeaderDict()
+ for key in self._data:
+ for rawkey, value in self._data[key]:
+ h.add(rawkey, value)
+ return h
+
+ def __eq__(self, other):
+ if not isinstance(other, Mapping):
+ return False
+ other = HTTPHeaderDict(other)
+ return dict((k1, self[k1]) for k1 in self._data) == \
+ dict((k2, other[k2]) for k2 in other._data)
+
+ def __getitem__(self, key):
+ values = self._data[key.lower()]
+ return ', '.join(value[1] for value in values)
+
+ def __setitem__(self, key, value):
+ self._data[key.lower()] = [(key, value)]
+
+ def __delitem__(self, key):
+ del self._data[key.lower()]
+
+ def __len__(self):
+ return len(self._data)
+
+ def __iter__(self):
+ for headers in itervalues(self._data):
+ yield headers[0][0]
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
diff --git a/urllib3/connection.py b/urllib3/connection.py
new file mode 100644
index 0000000..662bd2e
--- /dev/null
+++ b/urllib3/connection.py
@@ -0,0 +1,195 @@
+# urllib3/connection.py
+# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+#
+# This module is part of urllib3 and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+import sys
+import socket
+from socket import timeout as SocketTimeout
+
+try: # Python 3
+ from http.client import HTTPConnection as _HTTPConnection, HTTPException
+except ImportError:
+ from httplib import HTTPConnection as _HTTPConnection, HTTPException
+
+class DummyConnection(object):
+ "Used to detect a failed ConnectionCls import."
+ pass
+
+try: # Compiled with SSL?
+ ssl = None
+ HTTPSConnection = DummyConnection
+
+ class BaseSSLError(BaseException):
+ pass
+
+ try: # Python 3
+ from http.client import HTTPSConnection as _HTTPSConnection
+ except ImportError:
+ from httplib import HTTPSConnection as _HTTPSConnection
+
+ import ssl
+ BaseSSLError = ssl.SSLError
+
+except (ImportError, AttributeError): # Platform-specific: No SSL.
+ pass
+
+from .exceptions import (
+ ConnectTimeoutError,
+)
+from .packages.ssl_match_hostname import match_hostname
+from .packages import six
+from .util import (
+ assert_fingerprint,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+)
+
+
+port_by_scheme = {
+ 'http': 80,
+ 'https': 443,
+}
+
+
+class HTTPConnection(_HTTPConnection, object):
+ """
+ Based on httplib.HTTPConnection but provides an extra constructor
+ backwards-compatibility layer between older and newer Pythons.
+ """
+
+ default_port = port_by_scheme['http']
+
+ # By default, disable Nagle's Algorithm.
+ tcp_nodelay = 1
+
+ def __init__(self, *args, **kw):
+ if six.PY3: # Python 3
+ kw.pop('strict', None)
+
+ if sys.version_info < (2, 7): # Python 2.6 and earlier
+ kw.pop('source_address', None)
+ self.source_address = None
+
+ _HTTPConnection.__init__(self, *args, **kw)
+
+ def _new_conn(self):
+ """ Establish a socket connection and set nodelay settings on it
+
+ :return: a new socket connection
+ """
+ extra_args = []
+ if self.source_address: # Python 2.7+
+ extra_args.append(self.source_address)
+
+ conn = socket.create_connection(
+ (self.host, self.port),
+ self.timeout,
+ *extra_args
+ )
+ conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
+ self.tcp_nodelay)
+ return conn
+
+ def _prepare_conn(self, conn):
+ self.sock = conn
+ if self._tunnel_host:
+ # TODO: Fix tunnel so it doesn't depend on self.sock state.
+ self._tunnel()
+
+ def connect(self):
+ conn = self._new_conn()
+ self._prepare_conn(conn)
+
+
+class HTTPSConnection(HTTPConnection):
+ default_port = port_by_scheme['https']
+
+ def __init__(self, host, port=None, key_file=None, cert_file=None,
+ strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
+
+ HTTPConnection.__init__(self, host, port,
+ strict=strict,
+ timeout=timeout,
+ source_address=source_address)
+
+ self.key_file = key_file
+ self.cert_file = cert_file
+
+ def connect(self):
+ conn = self._new_conn()
+ self._prepare_conn(conn)
+ self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file)
+
+
+class VerifiedHTTPSConnection(HTTPSConnection):
+ """
+ Based on httplib.HTTPSConnection but wraps the socket with
+ SSL certification.
+ """
+ cert_reqs = None
+ ca_certs = None
+ ssl_version = None
+
+ def set_cert(self, key_file=None, cert_file=None,
+ cert_reqs=None, ca_certs=None,
+ assert_hostname=None, assert_fingerprint=None):
+
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.cert_reqs = cert_reqs
+ self.ca_certs = ca_certs
+ self.assert_hostname = assert_hostname
+ self.assert_fingerprint = assert_fingerprint
+
+ def connect(self):
+ # Add certificate verification
+ try:
+ sock = socket.create_connection(
+ address=(self.host, self.port),
+ timeout=self.timeout,
+ )
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, self.timeout))
+
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
+ self.tcp_nodelay)
+
+ resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
+ resolved_ssl_version = resolve_ssl_version(self.ssl_version)
+
+ # the _tunnel_host attribute was added in python 2.6.3 (via
+ # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
+ # not have them.
+ if getattr(self, '_tunnel_host', None):
+ self.sock = sock
+ # Calls self._set_hostport(), so self.host is
+ # self._tunnel_host below.
+ self._tunnel()
+
+ # Wrap socket using verification with the root certs in
+ # trusted_root_certs
+ self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
+ cert_reqs=resolved_cert_reqs,
+ ca_certs=self.ca_certs,
+ server_hostname=self.host,
+ ssl_version=resolved_ssl_version)
+
+ if resolved_cert_reqs != ssl.CERT_NONE:
+ if self.assert_fingerprint:
+ assert_fingerprint(self.sock.getpeercert(binary_form=True),
+ self.assert_fingerprint)
+ elif self.assert_hostname is not False:
+ match_hostname(self.sock.getpeercert(),
+ self.assert_hostname or self.host)
+
+
+if ssl:
+ # Make a copy for testing.
+ UnverifiedHTTPSConnection = HTTPSConnection
+ HTTPSConnection = VerifiedHTTPSConnection
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 691d4e2..6d0dbb1 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -11,134 +11,48 @@ from socket import error as SocketError, timeout as SocketTimeout
import socket
try: # Python 3
- from http.client import HTTPConnection, HTTPException
- from http.client import HTTP_PORT, HTTPS_PORT
-except ImportError:
- from httplib import HTTPConnection, HTTPException
- from httplib import HTTP_PORT, HTTPS_PORT
-
-try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
import Queue as _ # Platform-specific: Windows
-try: # Compiled with SSL?
- HTTPSConnection = object
-
- class BaseSSLError(BaseException):
- pass
-
- ssl = None
-
- try: # Python 3
- from http.client import HTTPSConnection
- except ImportError:
- from httplib import HTTPSConnection
-
- import ssl
- BaseSSLError = ssl.SSLError
-
-except (ImportError, AttributeError): # Platform-specific: No SSL.
- pass
-
-
from .exceptions import (
ClosedPoolError,
+ ConnectionError,
ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
MaxRetryError,
SSLError,
+ TimeoutError,
ReadTimeoutError,
ProxyError,
)
-from .packages.ssl_match_hostname import CertificateError, match_hostname
+from .packages.ssl_match_hostname import CertificateError
from .packages import six
+from .connection import (
+ port_by_scheme,
+ DummyConnection,
+ HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
+ HTTPException, BaseSSLError,
+)
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
assert_fingerprint,
get_host,
is_connection_dropped,
- resolve_cert_reqs,
- resolve_ssl_version,
- ssl_wrap_socket,
Timeout,
)
+
xrange = six.moves.xrange
log = logging.getLogger(__name__)
_Default = object()
-port_by_scheme = {
- 'http': HTTP_PORT,
- 'https': HTTPS_PORT,
-}
-
-
-## Connection objects (extension of httplib)
-
-class VerifiedHTTPSConnection(HTTPSConnection):
- """
- Based on httplib.HTTPSConnection but wraps the socket with
- SSL certification.
- """
- cert_reqs = None
- ca_certs = None
- ssl_version = None
-
- def set_cert(self, key_file=None, cert_file=None,
- cert_reqs=None, ca_certs=None,
- assert_hostname=None, assert_fingerprint=None):
-
- self.key_file = key_file
- self.cert_file = cert_file
- self.cert_reqs = cert_reqs
- self.ca_certs = ca_certs
- self.assert_hostname = assert_hostname
- self.assert_fingerprint = assert_fingerprint
-
- def connect(self):
- # Add certificate verification
- try:
- sock = socket.create_connection(
- address=(self.host, self.port),
- timeout=self.timeout)
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, self.timeout))
-
- resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
- resolved_ssl_version = resolve_ssl_version(self.ssl_version)
-
- if self._tunnel_host:
- self.sock = sock
- # Calls self._set_hostport(), so self.host is
- # self._tunnel_host below.
- self._tunnel()
-
- # Wrap socket using verification with the root certs in
- # trusted_root_certs
- self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
- cert_reqs=resolved_cert_reqs,
- ca_certs=self.ca_certs,
- server_hostname=self.host,
- ssl_version=resolved_ssl_version)
-
- if resolved_cert_reqs != ssl.CERT_NONE:
- if self.assert_fingerprint:
- assert_fingerprint(self.sock.getpeercert(binary_form=True),
- self.assert_fingerprint)
- elif self.assert_hostname is not False:
- match_hostname(self.sock.getpeercert(),
- self.assert_hostname or self.host)
-
-
## Pool objects
class ConnectionPool(object):
@@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
scheme = 'http'
+ ConnectionCls = HTTPConnection
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
@@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _new_conn(self):
"""
- Return a fresh :class:`httplib.HTTPConnection`.
+ Return a fresh :class:`HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
-
- return HTTPConnection(host=self.host, port=self.port,
- timeout=self.timeout.connect_timeout,
- **extra_params)
+ conn = self.ConnectionCls(host=self.host, port=self.port,
+ timeout=self.timeout.connect_timeout,
+ strict=self.strict)
+ if self.proxy is not None:
+ # Enable Nagle's algorithm for proxies, to avoid packet
+ # fragmentation.
+ conn.tcp_nodelay = 0
+ return conn
def _get_conn(self, timeout=None):
"""
@@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
pass
except Full:
# This should never happen if self.block == True
- log.warning("HttpConnectionPool is full, discarding connection: %s"
- % self.host)
+ log.warning(
+ "Connection pool is full, discarding connection: %s" %
+ self.host)
# Connection never got put back into the pool, close it.
if conn:
@@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
- Perform a request on a given httplib connection object taken from our
+ Perform a request on a given urllib connection object taken from our
pool.
:param conn:
@@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
- # request.py. It also calls makefile (recv) on the socket
+ # urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
@@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
- log.debug("Setting read timeout to %s" % read_timeout)
+
# App Engine doesn't have a sock attr
- if hasattr(conn, 'sock') and \
- read_timeout is not None and \
- read_timeout is not Timeout.DEFAULT_TIMEOUT:
+ if hasattr(conn, 'sock'):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
@@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
- conn.sock.settimeout(read_timeout)
+ if read_timeout is Timeout.DEFAULT_TIMEOUT:
+ conn.sock.settimeout(socket.getdefaulttimeout())
+ else: # None or a value
+ conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
@@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
+ except BaseSSLError as e:
+ # Catch possible read timeouts thrown as SSL errors. If not the
+ # case, rethrow the original. We need to do this because of:
+ # http://bugs.python.org/issue10272
+ if 'timed out' in str(e) or \
+ 'did not complete (read)' in str(e): # Python 2.6
+ raise ReadTimeoutError(self, url, "Read timed out.")
+
+ raise
+
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
@@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
- raise
+ raise
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# TODO: Add optional support for socket.gethostbyname checking.
scheme, host, port = get_host(url)
+ # Use explicit default port for comparison when none is given
if self.port and not port:
- # Use explicit default port for comparison when none is given.
port = port_by_scheme.get(scheme)
+ elif not self.port and port == port_by_scheme.get(scheme):
+ port = None
return (scheme, host, port) == (self.scheme, self.host, self.port)
@@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
+ If `False`, then retries are disabled and any exception is raised
+ immediately.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307, 308). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry. Disabling retries
+ will disable redirect, too.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
@@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if headers is None:
headers = self.headers
- if retries < 0:
+ if retries < 0 and retries is not False:
raise MaxRetryError(self, url)
if release_conn is None:
@@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn = None
+ # Merge the proxy headers. Only do this in HTTP. We have to copy the
+ # headers dict so we can safely change it without those changes being
+ # reflected in anyone else's copy.
+ if self.scheme == 'http':
+ headers = headers.copy()
+ headers.update(self.proxy_headers)
+
+ # Must keep the exception bound to a separate variable or else Python 3
+ # complains about UnboundLocalError.
+ err = None
+
try:
# Request a connection from the queue
conn = self._get_conn(timeout=pool_timeout)
@@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# ``response.read()``)
except Empty:
- # Timed out by queue
- raise ReadTimeoutError(
- self, url, "Read timed out, no pool connections are available.")
+ # Timed out by queue.
+ raise EmptyPoolError(self, "No pool connections are available.")
- except SocketTimeout:
- # Timed out by socket
- raise ReadTimeoutError(self, url, "Read timed out.")
-
- except BaseSSLError as e:
- # SSL certificate error
- if 'timed out' in str(e) or \
- 'did not complete (read)' in str(e): # Platform-specific: Python 2.6
- raise ReadTimeoutError(self, url, "Read timed out.")
+ except (BaseSSLError, CertificateError) as e:
+ # Release connection unconditionally because there is no way to
+ # close it externally in case of exception.
+ release_conn = True
raise SSLError(e)
- except CertificateError as e:
- # Name mismatch
- raise SSLError(e)
+ except (TimeoutError, HTTPException, SocketError) as e:
+ if conn:
+ # Discard the connection for these exceptions. It will be
+ # be replaced during the next _get_conn() call.
+ conn.close()
+ conn = None
- except (HTTPException, SocketError) as e:
- if isinstance(e, SocketError) and self.proxy is not None:
- raise ProxyError('Cannot connect to proxy. '
- 'Socket error: %s.' % e)
+ if not retries:
+ if isinstance(e, TimeoutError):
+ # TimeoutError is exempt from MaxRetryError-wrapping.
+ # FIXME: ... Not sure why. Add a reason here.
+ raise
- # Connection broken, discard. It will be replaced next _get_conn().
- conn = None
- # This is necessary so we can access e below
- err = e
+ # Wrap unexpected exceptions with the most appropriate
+ # module-level exception and re-raise.
+ if isinstance(e, SocketError) and self.proxy:
+ raise ProxyError('Cannot connect to proxy.', e)
+
+ if retries is False:
+ raise ConnectionError('Connection failed.', e)
- if retries == 0:
raise MaxRetryError(self, url, e)
+ # Keep track of the error for the retry warning.
+ err = e
+
finally:
if release_conn:
# Put the connection back to be reused. If the connection is
@@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if not conn:
# Try again
- log.warn("Retrying (%d attempts remain) after connection "
- "broken by '%r': %s" % (retries, err, url))
+ log.warning("Retrying (%d attempts remain) after connection "
+ "broken by '%r': %s" % (retries, err, url))
return self.urlopen(method, url, body, headers, retries - 1,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
@@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
- if redirect_location:
+ if redirect_location and retries is not False:
if response.status == 303:
method = 'GET'
log.info("Redirecting %s -> %s" % (url, redirect_location))
@@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
When Python is compiled with the :mod:`ssl` module, then
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
- instead of :class:`httplib.HTTPSConnection`.
+ instead of :class:`.HTTPSConnection`.
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
@@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
"""
scheme = 'https'
+ ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
@@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
- def _prepare_conn(self, connection):
+ def _prepare_conn(self, conn):
"""
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
and establish the tunnel if proxy is used.
"""
- if isinstance(connection, VerifiedHTTPSConnection):
- connection.set_cert(key_file=self.key_file,
- cert_file=self.cert_file,
- cert_reqs=self.cert_reqs,
- ca_certs=self.ca_certs,
- assert_hostname=self.assert_hostname,
- assert_fingerprint=self.assert_fingerprint)
- connection.ssl_version = self.ssl_version
+ if isinstance(conn, VerifiedHTTPSConnection):
+ conn.set_cert(key_file=self.key_file,
+ cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs,
+ assert_hostname=self.assert_hostname,
+ assert_fingerprint=self.assert_fingerprint)
+ conn.ssl_version = self.ssl_version
if self.proxy is not None:
# Python 2.7+
try:
- set_tunnel = connection.set_tunnel
+ set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
- set_tunnel = connection._set_tunnel
+ set_tunnel = conn._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
- connection.connect()
+ conn.connect()
- return connection
+ return conn
def _new_conn(self):
"""
@@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
+ if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
+ # Platform-specific: Python without ssl
+ raise SSLError("Can't connect to HTTPS URL because the SSL "
+ "module is not available.")
+
actual_host = self.host
actual_port = self.port
if self.proxy is not None:
actual_host = self.proxy.host
actual_port = self.proxy.port
- if not ssl: # Platform-specific: Python compiled without +ssl
- if not HTTPSConnection or HTTPSConnection is object:
- raise SSLError("Can't connect to HTTPS URL because the SSL "
- "module is not available.")
- connection_class = HTTPSConnection
- else:
- connection_class = VerifiedHTTPSConnection
-
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
- connection = connection_class(host=actual_host, port=actual_port,
- timeout=self.timeout.connect_timeout,
- **extra_params)
- return self._prepare_conn(connection)
+ conn = self.ConnectionCls(host=actual_host, port=actual_port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+ if self.proxy is not None:
+ # Enable Nagle's algorithm for proxies, to avoid packet
+ # fragmentation.
+ conn.tcp_nodelay = 0
+
+ return self._prepare_conn(conn)
def connection_from_url(url, **kw):
diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py
index d43bcd6..7c513f3 100644
--- a/urllib3/contrib/pyopenssl.py
+++ b/urllib3/contrib/pyopenssl.py
@@ -1,4 +1,4 @@
-'''SSL with SNI-support for Python 2.
+'''SSL with SNI_-support for Python 2.
This needs the following packages installed:
@@ -18,17 +18,36 @@ your application begins using ``urllib3``, like this::
Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
+
+Activating this module also has the positive side effect of disabling SSL/TLS
+encryption in Python 2 (see `CRIME attack`_).
+
+If you want to configure the default list of supported cipher suites, you can
+set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
+
+Module Variables
+----------------
+
+:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites.
+ Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:
+ ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS``
+
+.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
+.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
+
'''
from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
-from ndg.httpsclient.subj_alt_name import SubjectAltName
+from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
-from socket import _fileobject
+from pyasn1.type import univ, constraint
+from socket import _fileobject, timeout
import ssl
+import select
from cStringIO import StringIO
-from .. import connectionpool
+from .. import connection
from .. import util
__all__ = ['inject_into_urllib3', 'extract_from_urllib3']
@@ -49,26 +68,54 @@ _openssl_verify = {
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
+# A secure default.
+# Sources for more information on TLS ciphers:
+#
+# - https://wiki.mozilla.org/Security/Server_Side_TLS
+# - https://www.ssllabs.com/projects/best-practices/index.html
+# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
+#
+# The general intent is:
+# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
+# - prefer ECDHE over DHE for better performance,
+# - prefer any AES-GCM over any AES-CBC for better performance and security,
+# - use 3DES as fallback which is secure but slow,
+# - disable NULL authentication, MD5 MACs and DSS for security reasons.
+DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \
+ "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \
+ "!aNULL:!MD5:!DSS"
+
orig_util_HAS_SNI = util.HAS_SNI
-orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket
+orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket
def inject_into_urllib3():
'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.'
- connectionpool.ssl_wrap_socket = ssl_wrap_socket
+ connection.ssl_wrap_socket = ssl_wrap_socket
util.HAS_SNI = HAS_SNI
def extract_from_urllib3():
'Undo monkey-patching by :func:`inject_into_urllib3`.'
- connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket
+ connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket
util.HAS_SNI = orig_util_HAS_SNI
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
+class SubjectAltName(BaseSubjectAltName):
+ '''ASN.1 implementation for subjectAltNames support'''
+
+ # There is no limit to how many SAN certificates a certificate may have,
+ # however this needs to have some limit so we'll set an arbitrarily high
+ # limit.
+ sizeSpec = univ.SequenceOf.sizeSpec + \
+ constraint.ValueSizeConstraint(1, 1024)
+
+
+### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
def get_subj_alt_name(peer_cert):
# Search through extensions
dns_name = []
@@ -101,6 +148,13 @@ def get_subj_alt_name(peer_cert):
class fileobject(_fileobject):
+ def _wait_for_sock(self):
+ rd, wd, ed = select.select([self._sock], [], [],
+ self._sock.gettimeout())
+ if not rd:
+ raise timeout()
+
+
def read(self, size=-1):
# Use max, disallow tiny reads in a loop as they are very inefficient.
# We never leave read() with any leftover data from a new recv() call
@@ -118,6 +172,7 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(rbufsize)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
@@ -145,6 +200,7 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(left)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
@@ -196,6 +252,7 @@ class fileobject(_fileobject):
break
buffers.append(data)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
break
return "".join(buffers)
@@ -206,6 +263,7 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
+ self._wait_for_sock()
continue
if not data:
break
@@ -233,7 +291,8 @@ class fileobject(_fileobject):
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
- continue
+ self._wait_for_sock()
+ continue
if not data:
break
left = size - buf_len
@@ -328,6 +387,15 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ctx.load_verify_locations(ca_certs, None)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e)
+ else:
+ ctx.set_default_verify_paths()
+
+ # Disable TLS compression to migitate CRIME attack (issue #309)
+ OP_NO_COMPRESSION = 0x20000
+ ctx.set_options(OP_NO_COMPRESSION)
+
+ # Set list of supported ciphersuites.
+ ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST)
cnx = OpenSSL.SSL.Connection(ctx, sock)
cnx.set_tlsext_host_name(server_hostname)
@@ -336,6 +404,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError:
+ select.select([sock], [], [])
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad handshake', e)
diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py
index 98ef9ab..b4df831 100644
--- a/urllib3/exceptions.py
+++ b/urllib3/exceptions.py
@@ -44,6 +44,11 @@ class ProxyError(HTTPError):
pass
+class ConnectionError(HTTPError):
+ "Raised when a normal connection fails."
+ pass
+
+
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
diff --git a/urllib3/filepost.py b/urllib3/filepost.py
index 4575582..e8b30bd 100644
--- a/urllib3/filepost.py
+++ b/urllib3/filepost.py
@@ -46,16 +46,15 @@ def iter_field_objects(fields):
def iter_fields(fields):
"""
- Iterate over fields.
+ .. deprecated:: 1.6
- .. deprecated ::
+ Iterate over fields.
- The addition of `~urllib3.fields.RequestField` makes this function
- obsolete. Instead, use :func:`iter_field_objects`, which returns
- `~urllib3.fields.RequestField` objects, instead.
+ The addition of :class:`~urllib3.fields.RequestField` makes this function
+ obsolete. Instead, use :func:`iter_field_objects`, which returns
+ :class:`~urllib3.fields.RequestField` objects.
Supports list of (k, v) tuples and dicts.
-
"""
if isinstance(fields, dict):
return ((k, v) for k, v in six.iteritems(fields))
diff --git a/urllib3/packages/ssl_match_hostname/__init__.py b/urllib3/packages/ssl_match_hostname/__init__.py
index 2d61ac2..dd59a75 100644
--- a/urllib3/packages/ssl_match_hostname/__init__.py
+++ b/urllib3/packages/ssl_match_hostname/__init__.py
@@ -1,98 +1,13 @@
-"""The match_hostname() function from Python 3.2, essential when using SSL."""
-
-import re
-
-__version__ = '3.2.2'
-
-class CertificateError(ValueError):
- pass
-
-def _dnsname_match(dn, hostname, max_wildcards=1):
- """Matching according to RFC 6125, section 6.4.3
-
- http://tools.ietf.org/html/rfc6125#section-6.4.3
- """
- pats = []
- if not dn:
- return False
-
- parts = dn.split(r'.')
- leftmost = parts[0]
-
- wildcards = leftmost.count('*')
- if wildcards > max_wildcards:
- # Issue #17980: avoid denials of service by refusing more
- # than one wildcard per fragment. A survery of established
- # policy among SSL implementations showed it to be a
- # reasonable choice.
- raise CertificateError(
- "too many wildcards in certificate DNS name: " + repr(dn))
-
- # speed up common case w/o wildcards
- if not wildcards:
- return dn.lower() == hostname.lower()
-
- # RFC 6125, section 6.4.3, subitem 1.
- # The client SHOULD NOT attempt to match a presented identifier in which
- # the wildcard character comprises a label other than the left-most label.
- if leftmost == '*':
- # When '*' is a fragment by itself, it matches a non-empty dotless
- # fragment.
- pats.append('[^.]+')
- elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
- # RFC 6125, section 6.4.3, subitem 3.
- # The client SHOULD NOT attempt to match a presented identifier
- # where the wildcard character is embedded within an A-label or
- # U-label of an internationalized domain name.
- pats.append(re.escape(leftmost))
- else:
- # Otherwise, '*' matches any dotless string, e.g. www*
- pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
-
- # add the remaining fragments, ignore any wildcards
- for frag in parts[1:]:
- pats.append(re.escape(frag))
-
- pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
- return pat.match(hostname)
-
-
-def match_hostname(cert, hostname):
- """Verify that *cert* (in decoded format as returned by
- SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
- rules are followed, but IP addresses are not accepted for *hostname*.
-
- CertificateError is raised on failure. On success, the function
- returns nothing.
- """
- if not cert:
- raise ValueError("empty or no certificate")
- dnsnames = []
- san = cert.get('subjectAltName', ())
- for key, value in san:
- if key == 'DNS':
- if _dnsname_match(value, hostname):
- return
- dnsnames.append(value)
- if not dnsnames:
- # The subject is only checked when there is no dNSName entry
- # in subjectAltName
- for sub in cert.get('subject', ()):
- for key, value in sub:
- # XXX according to RFC 2818, the most specific Common Name
- # must be used.
- if key == 'commonName':
- if _dnsname_match(value, hostname):
- return
- dnsnames.append(value)
- if len(dnsnames) > 1:
- raise CertificateError("hostname %r "
- "doesn't match either of %s"
- % (hostname, ', '.join(map(repr, dnsnames))))
- elif len(dnsnames) == 1:
- raise CertificateError("hostname %r "
- "doesn't match %r"
- % (hostname, dnsnames[0]))
- else:
- raise CertificateError("no appropriate commonName or "
- "subjectAltName fields were found")
+try:
+ # Python 3.2+
+ from ssl import CertificateError, match_hostname
+except ImportError:
+ try:
+ # Backport of the function from a pypi module
+ from backports.ssl_match_hostname import CertificateError, match_hostname
+ except ImportError:
+ # Our vendored copy
+ from ._implementation import CertificateError, match_hostname
+
+# Not needed, but documenting what we provide.
+__all__ = ('CertificateError', 'match_hostname')
diff --git a/urllib3/packages/ssl_match_hostname/_implementation.py b/urllib3/packages/ssl_match_hostname/_implementation.py
new file mode 100644
index 0000000..52f4287
--- /dev/null
+++ b/urllib3/packages/ssl_match_hostname/_implementation.py
@@ -0,0 +1,105 @@
+"""The match_hostname() function from Python 3.3.3, essential when using SSL."""
+
+# Note: This file is under the PSF license as the code comes from the python
+# stdlib. http://docs.python.org/3/license.html
+
+import re
+
+__version__ = '3.4.0.2'
+
+class CertificateError(ValueError):
+ pass
+
+
+def _dnsname_match(dn, hostname, max_wildcards=1):
+ """Matching according to RFC 6125, section 6.4.3
+
+ http://tools.ietf.org/html/rfc6125#section-6.4.3
+ """
+ pats = []
+ if not dn:
+ return False
+
+ # Ported from python3-syntax:
+ # leftmost, *remainder = dn.split(r'.')
+ parts = dn.split(r'.')
+ leftmost = parts[0]
+ remainder = parts[1:]
+
+ wildcards = leftmost.count('*')
+ if wildcards > max_wildcards:
+ # Issue #17980: avoid denials of service by refusing more
+ # than one wildcard per fragment. A survey of established
+ # policy among SSL implementations showed it to be a
+ # reasonable choice.
+ raise CertificateError(
+ "too many wildcards in certificate DNS name: " + repr(dn))
+
+ # speed up common case w/o wildcards
+ if not wildcards:
+ return dn.lower() == hostname.lower()
+
+ # RFC 6125, section 6.4.3, subitem 1.
+ # The client SHOULD NOT attempt to match a presented identifier in which
+ # the wildcard character comprises a label other than the left-most label.
+ if leftmost == '*':
+ # When '*' is a fragment by itself, it matches a non-empty dotless
+ # fragment.
+ pats.append('[^.]+')
+ elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
+ # RFC 6125, section 6.4.3, subitem 3.
+ # The client SHOULD NOT attempt to match a presented identifier
+ # where the wildcard character is embedded within an A-label or
+ # U-label of an internationalized domain name.
+ pats.append(re.escape(leftmost))
+ else:
+ # Otherwise, '*' matches any dotless string, e.g. www*
+ pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+
+ # add the remaining fragments, ignore any wildcards
+ for frag in remainder:
+ pats.append(re.escape(frag))
+
+ pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+ return pat.match(hostname)
+
+
+def match_hostname(cert, hostname):
+ """Verify that *cert* (in decoded format as returned by
+ SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
+ rules are followed, but IP addresses are not accepted for *hostname*.
+
+ CertificateError is raised on failure. On success, the function
+ returns nothing.
+ """
+ if not cert:
+ raise ValueError("empty or no certificate")
+ dnsnames = []
+ san = cert.get('subjectAltName', ())
+ for key, value in san:
+ if key == 'DNS':
+ if _dnsname_match(value, hostname):
+ return
+ dnsnames.append(value)
+ if not dnsnames:
+ # The subject is only checked when there is no dNSName entry
+ # in subjectAltName
+ for sub in cert.get('subject', ()):
+ for key, value in sub:
+ # XXX according to RFC 2818, the most specific Common Name
+ # must be used.
+ if key == 'commonName':
+ if _dnsname_match(value, hostname):
+ return
+ dnsnames.append(value)
+ if len(dnsnames) > 1:
+ raise CertificateError("hostname %r "
+ "doesn't match either of %s"
+ % (hostname, ', '.join(map(repr, dnsnames))))
+ elif len(dnsnames) == 1:
+ raise CertificateError("hostname %r "
+ "doesn't match %r"
+ % (hostname, dnsnames[0]))
+ else:
+ raise CertificateError("no appropriate commonName or "
+ "subjectAltName fields were found")
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index e7f8667..f18ff2b 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -1,5 +1,5 @@
# urllib3/poolmanager.py
-# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
+# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
@@ -176,7 +176,7 @@ class ProxyManager(PoolManager):
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
- :param poxy_url:
+ :param proxy_url:
The URL of the proxy to be used.
:param proxy_headers:
@@ -245,12 +245,11 @@ class ProxyManager(PoolManager):
u = parse_url(url)
if u.scheme == "http":
- # It's too late to set proxy headers on per-request basis for
- # tunnelled HTTPS connections, should use
- # constructor's proxy_headers instead.
+ # For proxied HTTPS requests, httplib sets the necessary headers
+ # on the CONNECT to the proxy. For HTTP, we'll definitely
+ # need to set 'Host' at the very least.
kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
self.headers))
- kw['headers'].update(self.proxy_headers)
return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
diff --git a/urllib3/request.py b/urllib3/request.py
index 66a9a0e..2a92cc2 100644
--- a/urllib3/request.py
+++ b/urllib3/request.py
@@ -45,7 +45,6 @@ class RequestMethods(object):
"""
_encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
- _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE'])
def __init__(self, headers=None):
self.headers = headers or {}
diff --git a/urllib3/response.py b/urllib3/response.py
index 4efff5a..db44182 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -9,6 +9,7 @@ import logging
import zlib
import io
+from ._collections import HTTPHeaderDict
from .exceptions import DecodeError
from .packages.six import string_types as basestring, binary_type
from .util import is_fp_closed
@@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase):
def __init__(self, body='', headers=None, status=0, version=0, reason=None,
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None):
- self.headers = headers or {}
+
+ self.headers = HTTPHeaderDict()
+ if headers:
+ self.headers.update(headers)
self.status = status
self.version = version
self.reason = reason
@@ -90,6 +94,7 @@ class HTTPResponse(io.IOBase):
self._body = body if body and isinstance(body, basestring) else None
self._fp = None
self._original_response = original_response
+ self._fp_bytes_read = 0
self._pool = pool
self._connection = connection
@@ -129,6 +134,14 @@ class HTTPResponse(io.IOBase):
if self._fp:
return self.read(cache_content=True)
+ def tell(self):
+ """
+ Obtain the number of bytes pulled over the wire so far. May differ from
+ the amount of content returned by :meth:``HTTPResponse.read`` if bytes
+ are encoded on the wire (e.g, compressed).
+ """
+ return self._fp_bytes_read
+
def read(self, amt=None, decode_content=None, cache_content=False):
"""
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
@@ -183,6 +196,8 @@ class HTTPResponse(io.IOBase):
self._fp.close()
flush_decoder = True
+ self._fp_bytes_read += len(data)
+
try:
if decode_content and self._decoder:
data = self._decoder.decompress(data)
@@ -238,17 +253,9 @@ class HTTPResponse(io.IOBase):
with ``original_response=r``.
"""
- # Normalize headers between different versions of Python
- headers = {}
+ headers = HTTPHeaderDict()
for k, v in r.getheaders():
- # Python 3: Header keys are returned capitalised
- k = k.lower()
-
- has_value = headers.get(k)
- if has_value: # Python 3: Repeating header keys are unmerged.
- v = ', '.join([has_value, v])
-
- headers[k] = v
+ headers.add(k, v)
# HTTPResponse objects in Python 3 don't have a .strict attribute
strict = getattr(r, 'strict', 0)
diff --git a/urllib3/util.py b/urllib3/util.py
index 266c9ed..bd26631 100644
--- a/urllib3/util.py
+++ b/urllib3/util.py
@@ -80,14 +80,13 @@ class Timeout(object):
:type read: integer, float, or None
:param total:
- The maximum amount of time to wait for an HTTP request to connect and
- return. This combines the connect and read timeouts into one. In the
+ This combines the connect and read timeouts into one; the read timeout
+ will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
-
:type total: integer, float, or None
.. note::
@@ -101,18 +100,23 @@ class Timeout(object):
`total`.
In addition, the read and total timeouts only measure the time between
- read operations on the socket connecting the client and the server, not
- the total amount of time for the request to return a complete response.
- As an example, you may want a request to return within 7 seconds or
- fail, so you set the ``total`` timeout to 7 seconds. If the server
- sends one byte to you every 5 seconds, the request will **not** trigger
- time out. This case is admittedly rare.
+ read operations on the socket connecting the client and the server,
+ not the total amount of time for the request to return a complete
+ response. For most requests, the timeout is raised because the server
+ has not sent the first byte in the specified time. This is not always
+ the case; if a server streams one byte every fifteen seconds, a timeout
+ of 20 seconds will not ever trigger, even though the request will
+ take several minutes to complete.
+
+ If your goal is to cut off any request after a set amount of wall clock
+ time, consider having a second "watcher" thread to cut off a slow
+ request.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
- def __init__(self, connect=_Default, read=_Default, total=None):
+ def __init__(self, total=None, connect=_Default, read=_Default):
self._connect = self._validate_timeout(connect, 'connect')
self._read = self._validate_timeout(read, 'read')
self.total = self._validate_timeout(total, 'total')
@@ -372,7 +376,8 @@ def parse_url(url):
# Auth
if '@' in url:
- auth, url = url.split('@', 1)
+ # Last '@' denotes end of auth part
+ auth, url = url.rsplit('@', 1)
# IPv6
if url and url[0] == '[':
@@ -386,10 +391,14 @@ def parse_url(url):
if not host:
host = _host
- if not port.isdigit():
- raise LocationParseError("Failed to parse: %s" % url)
-
- port = int(port)
+ if port:
+ # If given, ports must be integers.
+ if not port.isdigit():
+ raise LocationParseError("Failed to parse: %s" % url)
+ port = int(port)
+ else:
+ # Blank ports are cool, too. (rfc3986#section-3.2.3)
+ port = None
elif not host and url:
host = url
@@ -417,7 +426,7 @@ def get_host(url):
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
- basic_auth=None):
+ basic_auth=None, proxy_basic_auth=None):
"""
Shortcuts for generating request headers.
@@ -438,6 +447,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
Colon-separated username:password string for 'authorization: basic ...'
auth header.
+ :param proxy_basic_auth:
+ Colon-separated username:password string for 'proxy-authorization: basic ...'
+ auth header.
+
Example: ::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
@@ -465,6 +478,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
headers['authorization'] = 'Basic ' + \
b64encode(six.b(basic_auth)).decode('utf-8')
+ if proxy_basic_auth:
+ headers['proxy-authorization'] = 'Basic ' + \
+ b64encode(six.b(proxy_basic_auth)).decode('utf-8')
+
return headers
@@ -603,6 +620,11 @@ if SSLContext is not None: # Python 3.2+
"""
context = SSLContext(ssl_version)
context.verify_mode = cert_reqs
+
+ # Disable TLS compression to migitate CRIME attack (issue #309)
+ OP_NO_COMPRESSION = 0x20000
+ context.options |= OP_NO_COMPRESSION
+
if ca_certs:
try:
context.load_verify_locations(ca_certs)