aboutsummaryrefslogtreecommitdiff
path: root/urllib3/connectionpool.py
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3/connectionpool.py')
-rw-r--r--urllib3/connectionpool.py286
1 files changed, 218 insertions, 68 deletions
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 73fa9ca..691d4e2 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -4,12 +4,11 @@
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
-import logging
-import socket
import errno
+import logging
from socket import error as SocketError, timeout as SocketTimeout
-from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint
+import socket
try: # Python 3
from http.client import HTTPConnection, HTTPException
@@ -22,11 +21,15 @@ try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
+ import Queue as _ # Platform-specific: Windows
try: # Compiled with SSL?
HTTPSConnection = object
- BaseSSLError = None
+
+ class BaseSSLError(BaseException):
+ pass
+
ssl = None
try: # Python 3
@@ -41,21 +44,29 @@ except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
-from .request import RequestMethods
-from .response import HTTPResponse
-from .util import get_host, is_connection_dropped, ssl_wrap_socket
from .exceptions import (
ClosedPoolError,
+ ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
MaxRetryError,
SSLError,
- TimeoutError,
+ ReadTimeoutError,
+ ProxyError,
)
-
-from .packages.ssl_match_hostname import match_hostname, CertificateError
+from .packages.ssl_match_hostname import CertificateError, match_hostname
from .packages import six
-
+from .request import RequestMethods
+from .response import HTTPResponse
+from .util import (
+ assert_fingerprint,
+ get_host,
+ is_connection_dropped,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+ Timeout,
+)
xrange = six.moves.xrange
@@ -93,11 +104,24 @@ class VerifiedHTTPSConnection(HTTPSConnection):
def connect(self):
# Add certificate verification
- sock = socket.create_connection((self.host, self.port), self.timeout)
+ try:
+ sock = socket.create_connection(
+ address=(self.host, self.port),
+ timeout=self.timeout)
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, self.timeout))
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
+ if self._tunnel_host:
+ self.sock = sock
+ # Calls self._set_hostport(), so self.host is
+ # self._tunnel_host below.
+ self._tunnel()
+
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
@@ -110,10 +134,11 @@ class VerifiedHTTPSConnection(HTTPSConnection):
if self.assert_fingerprint:
assert_fingerprint(self.sock.getpeercert(binary_form=True),
self.assert_fingerprint)
- else:
+ elif self.assert_hostname is not False:
match_hostname(self.sock.getpeercert(),
self.assert_hostname or self.host)
+
## Pool objects
class ConnectionPool(object):
@@ -126,6 +151,9 @@ class ConnectionPool(object):
QueueCls = LifoQueue
def __init__(self, host, port=None):
+ # httplib doesn't like it when we include brackets in ipv6 addresses
+ host = host.strip('[]')
+
self.host = host
self.port = port
@@ -133,6 +161,8 @@ class ConnectionPool(object):
return '%s(host=%r, port=%r)' % (type(self).__name__,
self.host, self.port)
+# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
+_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
@@ -151,9 +181,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
as a valid HTTP/1.0 or 1.1 status line, passed into
:class:`httplib.HTTPConnection`.
+ .. note::
+ Only works in Python 2. This parameter is ignored in Python 3.
+
:param timeout:
- Socket timeout for each individual connection, can be a float. None
- disables timeout.
+ Socket timeout in seconds for each individual connection. This can
+ be a float or integer, which sets the timeout for the HTTP request,
+ or an instance of :class:`urllib3.util.Timeout` which gives you more
+ fine-grained control over request timeouts. After the constructor has
+ been parsed, this is always a `urllib3.util.Timeout` object.
:param maxsize:
Number of connections to save that can be reused. More than 1 is useful
@@ -171,20 +207,39 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
+
+ :param _proxy:
+ Parsed proxy URL, should not be used directly, instead, see
+ :class:`urllib3.connectionpool.ProxyManager`"
+
+ :param _proxy_headers:
+ A dictionary with proxy headers, should not be used directly,
+ instead, see :class:`urllib3.connectionpool.ProxyManager`"
"""
scheme = 'http'
- def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
- block=False, headers=None):
+ def __init__(self, host, port=None, strict=False,
+ timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
+ headers=None, _proxy=None, _proxy_headers=None):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
+
+ # This is for backwards compatibility and can be removed once a timeout
+ # can only be set to a Timeout object
+ if not isinstance(timeout, Timeout):
+ timeout = Timeout.from_float(timeout)
+
self.timeout = timeout
+
self.pool = self.QueueCls(maxsize)
self.block = block
+ self.proxy = _proxy
+ self.proxy_headers = _proxy_headers or {}
+
# Fill the queue up so that doing get() on it will block properly
for _ in xrange(maxsize):
self.pool.put(None)
@@ -200,9 +255,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- return HTTPConnection(host=self.host,
- port=self.port,
- strict=self.strict)
+ extra_params = {}
+ if not six.PY3: # Python 2
+ extra_params['strict'] = self.strict
+
+ return HTTPConnection(host=self.host, port=self.port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+
def _get_conn(self, timeout=None):
"""
@@ -263,31 +323,89 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
% self.host)
# Connection never got put back into the pool, close it.
- conn.close()
+ if conn:
+ conn.close()
+
+ def _get_timeout(self, timeout):
+ """ Helper that always returns a :class:`urllib3.util.Timeout` """
+ if timeout is _Default:
+ return self.timeout.clone()
+
+ if isinstance(timeout, Timeout):
+ return timeout.clone()
+ else:
+ # User passed us an int/float. This is for backwards compatibility,
+ # can be removed later
+ return Timeout.from_float(timeout)
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
Perform a request on a given httplib connection object taken from our
pool.
+
+ :param conn:
+ a connection from one of our connection pools
+
+ :param timeout:
+ Socket timeout in seconds for the request. This can be a
+ float or integer, which will set the same timeout value for
+ the socket connect and the socket read, or an instance of
+ :class:`urllib3.util.Timeout`, which gives you more fine-grained
+ control over your timeouts.
"""
self.num_requests += 1
- if timeout is _Default:
- timeout = self.timeout
-
- conn.timeout = timeout # This only does anything in Py26+
- conn.request(method, url, **httplib_request_kw)
+ timeout_obj = self._get_timeout(timeout)
- # Set timeout
- sock = getattr(conn, 'sock', False) # AppEngine doesn't have sock attr.
- if sock:
- sock.settimeout(timeout)
+ try:
+ timeout_obj.start_connect()
+ conn.timeout = timeout_obj.connect_timeout
+ # conn.request() calls httplib.*.request, not the method in
+ # request.py. It also calls makefile (recv) on the socket
+ conn.request(method, url, **httplib_request_kw)
+ except SocketTimeout:
+ raise ConnectTimeoutError(
+ self, "Connection to %s timed out. (connect timeout=%s)" %
+ (self.host, timeout_obj.connect_timeout))
+
+ # Reset the timeout for the recv() on the socket
+ read_timeout = timeout_obj.read_timeout
+ log.debug("Setting read timeout to %s" % read_timeout)
+ # App Engine doesn't have a sock attr
+ if hasattr(conn, 'sock') and \
+ read_timeout is not None and \
+ read_timeout is not Timeout.DEFAULT_TIMEOUT:
+ # In Python 3 socket.py will catch EAGAIN and return None when you
+ # try and read into the file pointer created by http.client, which
+ # instead raises a BadStatusLine exception. Instead of catching
+ # the exception and assuming all BadStatusLine exceptions are read
+ # timeouts, check for a zero timeout before making the request.
+ if read_timeout == 0:
+ raise ReadTimeoutError(
+ self, url,
+ "Read timed out. (read timeout=%s)" % read_timeout)
+ conn.sock.settimeout(read_timeout)
+
+ # Receive the response from the server
+ try:
+ try: # Python 2.7+, use buffering of HTTP responses
+ httplib_response = conn.getresponse(buffering=True)
+ except TypeError: # Python 2.6 and older
+ httplib_response = conn.getresponse()
+ except SocketTimeout:
+ raise ReadTimeoutError(
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout)
+
+ except SocketError as e: # Platform-specific: Python 2
+ # See the above comment about EAGAIN in Python 3. In Python 2 we
+ # have to specifically catch it and throw the timeout error
+ if e.errno in _blocking_errnos:
+ raise ReadTimeoutError(
+ self, url,
+ "Read timed out. (read timeout=%s)" % read_timeout)
+ raise
- try: # Python 2.7+, use buffering of HTTP responses
- httplib_response = conn.getresponse(buffering=True)
- except TypeError: # Python 2.6 and older
- httplib_response = conn.getresponse()
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -367,7 +485,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
@@ -375,7 +493,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
use the pool on an HTTP proxy and request foreign hosts.
:param timeout:
- If specified, overrides the default timeout for this one request.
+ If specified, overrides the default timeout for this one
+ request. It may be a float (in seconds) or an instance of
+ :class:`urllib3.util.Timeout`.
:param pool_timeout:
If set and the pool is set to block=True, then this method will
@@ -402,18 +522,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if retries < 0:
raise MaxRetryError(self, url)
- if timeout is _Default:
- timeout = self.timeout
-
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
- host = "%s://%s" % (self.scheme, self.host)
- if self.port:
- host = "%s:%d" % (host, self.port)
-
raise HostChangedError(self, url, retries - 1)
conn = None
@@ -444,18 +557,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# ``response.release_conn()`` is called (implicitly by
# ``response.read()``)
- except Empty as e:
+ except Empty:
# Timed out by queue
- raise TimeoutError(self, "Request timed out. (pool_timeout=%s)" %
- pool_timeout)
+ raise ReadTimeoutError(
+ self, url, "Read timed out, no pool connections are available.")
- except SocketTimeout as e:
+ except SocketTimeout:
# Timed out by socket
- raise TimeoutError(self, "Request timed out. (timeout=%s)" %
- timeout)
+ raise ReadTimeoutError(self, url, "Read timed out.")
except BaseSSLError as e:
# SSL certificate error
+ if 'timed out' in str(e) or \
+ 'did not complete (read)' in str(e): # Platform-specific: Python 2.6
+ raise ReadTimeoutError(self, url, "Read timed out.")
raise SSLError(e)
except CertificateError as e:
@@ -463,6 +578,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise SSLError(e)
except (HTTPException, SocketError) as e:
+ if isinstance(e, SocketError) and self.proxy is not None:
+ raise ProxyError('Cannot connect to proxy. '
+ 'Socket error: %s.' % e)
+
# Connection broken, discard. It will be replaced next _get_conn().
conn = None
# This is necessary so we can access e below
@@ -511,6 +630,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
+ If ``assert_hostname`` is False, no verification is done.
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and
``ssl_version`` are only used if :mod:`ssl` is available and are fed into
@@ -523,13 +643,13 @@ class HTTPSConnectionPool(HTTPConnectionPool):
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
+ _proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None):
- HTTPConnectionPool.__init__(self, host, port,
- strict, timeout, maxsize,
- block, headers)
+ HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
+ block, headers, _proxy, _proxy_headers)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
@@ -538,6 +658,34 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
+ def _prepare_conn(self, connection):
+ """
+ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
+ and establish the tunnel if proxy is used.
+ """
+
+ if isinstance(connection, VerifiedHTTPSConnection):
+ connection.set_cert(key_file=self.key_file,
+ cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs,
+ assert_hostname=self.assert_hostname,
+ assert_fingerprint=self.assert_fingerprint)
+ connection.ssl_version = self.ssl_version
+
+ if self.proxy is not None:
+ # Python 2.7+
+ try:
+ set_tunnel = connection.set_tunnel
+ except AttributeError: # Platform-specific: Python 2.6
+ set_tunnel = connection._set_tunnel
+ set_tunnel(self.host, self.port, self.proxy_headers)
+ # Establish tunnel connection early, because otherwise httplib
+ # would improperly set Host: header to proxy's IP:port.
+ connection.connect()
+
+ return connection
+
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
@@ -546,26 +694,28 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
+ actual_host = self.host
+ actual_port = self.port
+ if self.proxy is not None:
+ actual_host = self.proxy.host
+ actual_port = self.proxy.port
+
if not ssl: # Platform-specific: Python compiled without +ssl
if not HTTPSConnection or HTTPSConnection is object:
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
-
- return HTTPSConnection(host=self.host,
- port=self.port,
- strict=self.strict)
-
- connection = VerifiedHTTPSConnection(host=self.host,
- port=self.port,
- strict=self.strict)
- connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
- cert_reqs=self.cert_reqs, ca_certs=self.ca_certs,
- assert_hostname=self.assert_hostname,
- assert_fingerprint=self.assert_fingerprint)
-
- connection.ssl_version = self.ssl_version
-
- return connection
+ connection_class = HTTPSConnection
+ else:
+ connection_class = VerifiedHTTPSConnection
+
+ extra_params = {}
+ if not six.PY3: # Python 2
+ extra_params['strict'] = self.strict
+ connection = connection_class(host=actual_host, port=actual_port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+
+ return self._prepare_conn(connection)
def connection_from_url(url, **kw):