aboutsummaryrefslogtreecommitdiff
path: root/urllib3/connectionpool.py
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3/connectionpool.py')
-rw-r--r--urllib3/connectionpool.py290
1 files changed, 120 insertions, 170 deletions
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 691d4e2..6d0dbb1 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -11,134 +11,48 @@ from socket import error as SocketError, timeout as SocketTimeout
import socket
try: # Python 3
- from http.client import HTTPConnection, HTTPException
- from http.client import HTTP_PORT, HTTPS_PORT
-except ImportError:
- from httplib import HTTPConnection, HTTPException
- from httplib import HTTP_PORT, HTTPS_PORT
-
-try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
import Queue as _ # Platform-specific: Windows
-try: # Compiled with SSL?
- HTTPSConnection = object
-
- class BaseSSLError(BaseException):
- pass
-
- ssl = None
-
- try: # Python 3
- from http.client import HTTPSConnection
- except ImportError:
- from httplib import HTTPSConnection
-
- import ssl
- BaseSSLError = ssl.SSLError
-
-except (ImportError, AttributeError): # Platform-specific: No SSL.
- pass
-
-
from .exceptions import (
ClosedPoolError,
+ ConnectionError,
ConnectTimeoutError,
EmptyPoolError,
HostChangedError,
MaxRetryError,
SSLError,
+ TimeoutError,
ReadTimeoutError,
ProxyError,
)
-from .packages.ssl_match_hostname import CertificateError, match_hostname
+from .packages.ssl_match_hostname import CertificateError
from .packages import six
+from .connection import (
+ port_by_scheme,
+ DummyConnection,
+ HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
+ HTTPException, BaseSSLError,
+)
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
assert_fingerprint,
get_host,
is_connection_dropped,
- resolve_cert_reqs,
- resolve_ssl_version,
- ssl_wrap_socket,
Timeout,
)
+
xrange = six.moves.xrange
log = logging.getLogger(__name__)
_Default = object()
-port_by_scheme = {
- 'http': HTTP_PORT,
- 'https': HTTPS_PORT,
-}
-
-
-## Connection objects (extension of httplib)
-
-class VerifiedHTTPSConnection(HTTPSConnection):
- """
- Based on httplib.HTTPSConnection but wraps the socket with
- SSL certification.
- """
- cert_reqs = None
- ca_certs = None
- ssl_version = None
-
- def set_cert(self, key_file=None, cert_file=None,
- cert_reqs=None, ca_certs=None,
- assert_hostname=None, assert_fingerprint=None):
-
- self.key_file = key_file
- self.cert_file = cert_file
- self.cert_reqs = cert_reqs
- self.ca_certs = ca_certs
- self.assert_hostname = assert_hostname
- self.assert_fingerprint = assert_fingerprint
-
- def connect(self):
- # Add certificate verification
- try:
- sock = socket.create_connection(
- address=(self.host, self.port),
- timeout=self.timeout)
- except SocketTimeout:
- raise ConnectTimeoutError(
- self, "Connection to %s timed out. (connect timeout=%s)" %
- (self.host, self.timeout))
-
- resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
- resolved_ssl_version = resolve_ssl_version(self.ssl_version)
-
- if self._tunnel_host:
- self.sock = sock
- # Calls self._set_hostport(), so self.host is
- # self._tunnel_host below.
- self._tunnel()
-
- # Wrap socket using verification with the root certs in
- # trusted_root_certs
- self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
- cert_reqs=resolved_cert_reqs,
- ca_certs=self.ca_certs,
- server_hostname=self.host,
- ssl_version=resolved_ssl_version)
-
- if resolved_cert_reqs != ssl.CERT_NONE:
- if self.assert_fingerprint:
- assert_fingerprint(self.sock.getpeercert(binary_form=True),
- self.assert_fingerprint)
- elif self.assert_hostname is not False:
- match_hostname(self.sock.getpeercert(),
- self.assert_hostname or self.host)
-
-
## Pool objects
class ConnectionPool(object):
@@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
scheme = 'http'
+ ConnectionCls = HTTPConnection
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
@@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _new_conn(self):
"""
- Return a fresh :class:`httplib.HTTPConnection`.
+ Return a fresh :class:`HTTPConnection`.
"""
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
- extra_params = {}
- if not six.PY3: # Python 2
- extra_params['strict'] = self.strict
-
- return HTTPConnection(host=self.host, port=self.port,
- timeout=self.timeout.connect_timeout,
- **extra_params)
+ conn = self.ConnectionCls(host=self.host, port=self.port,
+ timeout=self.timeout.connect_timeout,
+ strict=self.strict)
+ if self.proxy is not None:
+ # Enable Nagle's algorithm for proxies, to avoid packet
+ # fragmentation.
+ conn.tcp_nodelay = 0
+ return conn
def _get_conn(self, timeout=None):
"""
@@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
pass
except Full:
# This should never happen if self.block == True
- log.warning("HttpConnectionPool is full, discarding connection: %s"
- % self.host)
+ log.warning(
+ "Connection pool is full, discarding connection: %s" %
+ self.host)
# Connection never got put back into the pool, close it.
if conn:
@@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
- Perform a request on a given httplib connection object taken from our
+ Perform a request on a given urllib connection object taken from our
pool.
:param conn:
@@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
- # request.py. It also calls makefile (recv) on the socket
+ # urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
@@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
- log.debug("Setting read timeout to %s" % read_timeout)
+
# App Engine doesn't have a sock attr
- if hasattr(conn, 'sock') and \
- read_timeout is not None and \
- read_timeout is not Timeout.DEFAULT_TIMEOUT:
+ if hasattr(conn, 'sock'):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
@@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
- conn.sock.settimeout(read_timeout)
+ if read_timeout is Timeout.DEFAULT_TIMEOUT:
+ conn.sock.settimeout(socket.getdefaulttimeout())
+ else: # None or a value
+ conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
@@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
+ except BaseSSLError as e:
+ # Catch possible read timeouts thrown as SSL errors. If not the
+ # case, rethrow the original. We need to do this because of:
+ # http://bugs.python.org/issue10272
+ if 'timed out' in str(e) or \
+ 'did not complete (read)' in str(e): # Python 2.6
+ raise ReadTimeoutError(self, url, "Read timed out.")
+
+ raise
+
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
@@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
- raise
+ raise
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# TODO: Add optional support for socket.gethostbyname checking.
scheme, host, port = get_host(url)
+ # Use explicit default port for comparison when none is given
if self.port and not port:
- # Use explicit default port for comparison when none is given.
port = port_by_scheme.get(scheme)
+ elif not self.port and port == port_by_scheme.get(scheme):
+ port = None
return (scheme, host, port) == (self.scheme, self.host, self.port)
@@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
+ If `False`, then retries are disabled and any exception is raised
+ immediately.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
- 303, 307, 308). Each redirect counts as a retry.
+ 303, 307, 308). Each redirect counts as a retry. Disabling retries
+ will disable redirect, too.
:param assert_same_host:
If ``True``, will make sure that the host of the pool requests is
@@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if headers is None:
headers = self.headers
- if retries < 0:
+ if retries < 0 and retries is not False:
raise MaxRetryError(self, url)
if release_conn is None:
@@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn = None
+ # Merge the proxy headers. Only do this in HTTP. We have to copy the
+ # headers dict so we can safely change it without those changes being
+ # reflected in anyone else's copy.
+ if self.scheme == 'http':
+ headers = headers.copy()
+ headers.update(self.proxy_headers)
+
+ # Must keep the exception bound to a separate variable or else Python 3
+ # complains about UnboundLocalError.
+ err = None
+
try:
# Request a connection from the queue
conn = self._get_conn(timeout=pool_timeout)
@@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# ``response.read()``)
except Empty:
- # Timed out by queue
- raise ReadTimeoutError(
- self, url, "Read timed out, no pool connections are available.")
+ # Timed out by queue.
+ raise EmptyPoolError(self, "No pool connections are available.")
- except SocketTimeout:
- # Timed out by socket
- raise ReadTimeoutError(self, url, "Read timed out.")
-
- except BaseSSLError as e:
- # SSL certificate error
- if 'timed out' in str(e) or \
- 'did not complete (read)' in str(e): # Platform-specific: Python 2.6
- raise ReadTimeoutError(self, url, "Read timed out.")
+ except (BaseSSLError, CertificateError) as e:
+ # Release connection unconditionally because there is no way to
+ # close it externally in case of exception.
+ release_conn = True
raise SSLError(e)
- except CertificateError as e:
- # Name mismatch
- raise SSLError(e)
+ except (TimeoutError, HTTPException, SocketError) as e:
+ if conn:
+ # Discard the connection for these exceptions. It will be
+ # be replaced during the next _get_conn() call.
+ conn.close()
+ conn = None
- except (HTTPException, SocketError) as e:
- if isinstance(e, SocketError) and self.proxy is not None:
- raise ProxyError('Cannot connect to proxy. '
- 'Socket error: %s.' % e)
+ if not retries:
+ if isinstance(e, TimeoutError):
+ # TimeoutError is exempt from MaxRetryError-wrapping.
+ # FIXME: ... Not sure why. Add a reason here.
+ raise
- # Connection broken, discard. It will be replaced next _get_conn().
- conn = None
- # This is necessary so we can access e below
- err = e
+ # Wrap unexpected exceptions with the most appropriate
+ # module-level exception and re-raise.
+ if isinstance(e, SocketError) and self.proxy:
+ raise ProxyError('Cannot connect to proxy.', e)
+
+ if retries is False:
+ raise ConnectionError('Connection failed.', e)
- if retries == 0:
raise MaxRetryError(self, url, e)
+ # Keep track of the error for the retry warning.
+ err = e
+
finally:
if release_conn:
# Put the connection back to be reused. If the connection is
@@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if not conn:
# Try again
- log.warn("Retrying (%d attempts remain) after connection "
- "broken by '%r': %s" % (retries, err, url))
+ log.warning("Retrying (%d attempts remain) after connection "
+ "broken by '%r': %s" % (retries, err, url))
return self.urlopen(method, url, body, headers, retries - 1,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
@@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
- if redirect_location:
+ if redirect_location and retries is not False:
if response.status == 303:
method = 'GET'
log.info("Redirecting %s -> %s" % (url, redirect_location))
@@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
When Python is compiled with the :mod:`ssl` module, then
:class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates,
- instead of :class:`httplib.HTTPSConnection`.
+ instead of :class:`.HTTPSConnection`.
:class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``,
``assert_hostname`` and ``host`` in this order to verify connections.
@@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
"""
scheme = 'https'
+ ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
@@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
- def _prepare_conn(self, connection):
+ def _prepare_conn(self, conn):
"""
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
and establish the tunnel if proxy is used.
"""
- if isinstance(connection, VerifiedHTTPSConnection):
- connection.set_cert(key_file=self.key_file,
- cert_file=self.cert_file,
- cert_reqs=self.cert_reqs,
- ca_certs=self.ca_certs,
- assert_hostname=self.assert_hostname,
- assert_fingerprint=self.assert_fingerprint)
- connection.ssl_version = self.ssl_version
+ if isinstance(conn, VerifiedHTTPSConnection):
+ conn.set_cert(key_file=self.key_file,
+ cert_file=self.cert_file,
+ cert_reqs=self.cert_reqs,
+ ca_certs=self.ca_certs,
+ assert_hostname=self.assert_hostname,
+ assert_fingerprint=self.assert_fingerprint)
+ conn.ssl_version = self.ssl_version
if self.proxy is not None:
# Python 2.7+
try:
- set_tunnel = connection.set_tunnel
+ set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
- set_tunnel = connection._set_tunnel
+ set_tunnel = conn._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
- connection.connect()
+ conn.connect()
- return connection
+ return conn
def _new_conn(self):
"""
@@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
+ if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
+ # Platform-specific: Python without ssl
+ raise SSLError("Can't connect to HTTPS URL because the SSL "
+ "module is not available.")
+
actual_host = self.host
actual_port = self.port
if self.proxy is not None:
actual_host = self.proxy.host
actual_port = self.proxy.port
- if not ssl: # Platform-specific: Python compiled without +ssl
- if not HTTPSConnection or HTTPSConnection is object:
- raise SSLError("Can't connect to HTTPS URL because the SSL "
- "module is not available.")
- connection_class = HTTPSConnection
- else:
- connection_class = VerifiedHTTPSConnection
-
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
- connection = connection_class(host=actual_host, port=actual_port,
- timeout=self.timeout.connect_timeout,
- **extra_params)
- return self._prepare_conn(connection)
+ conn = self.ConnectionCls(host=actual_host, port=actual_port,
+ timeout=self.timeout.connect_timeout,
+ **extra_params)
+ if self.proxy is not None:
+ # Enable Nagle's algorithm for proxies, to avoid packet
+ # fragmentation.
+ conn.tcp_nodelay = 0
+
+ return self._prepare_conn(conn)
def connection_from_url(url, **kw):