diff options
Diffstat (limited to 'urllib3/connectionpool.py')
-rw-r--r-- | urllib3/connectionpool.py | 290 |
1 files changed, 120 insertions, 170 deletions
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 691d4e2..6d0dbb1 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -11,134 +11,48 @@ from socket import error as SocketError, timeout as SocketTimeout import socket try: # Python 3 - from http.client import HTTPConnection, HTTPException - from http.client import HTTP_PORT, HTTPS_PORT -except ImportError: - from httplib import HTTPConnection, HTTPException - from httplib import HTTP_PORT, HTTPS_PORT - -try: # Python 3 from queue import LifoQueue, Empty, Full except ImportError: from Queue import LifoQueue, Empty, Full import Queue as _ # Platform-specific: Windows -try: # Compiled with SSL? - HTTPSConnection = object - - class BaseSSLError(BaseException): - pass - - ssl = None - - try: # Python 3 - from http.client import HTTPSConnection - except ImportError: - from httplib import HTTPSConnection - - import ssl - BaseSSLError = ssl.SSLError - -except (ImportError, AttributeError): # Platform-specific: No SSL. - pass - - from .exceptions import ( ClosedPoolError, + ConnectionError, ConnectTimeoutError, EmptyPoolError, HostChangedError, MaxRetryError, SSLError, + TimeoutError, ReadTimeoutError, ProxyError, ) -from .packages.ssl_match_hostname import CertificateError, match_hostname +from .packages.ssl_match_hostname import CertificateError from .packages import six +from .connection import ( + port_by_scheme, + DummyConnection, + HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, + HTTPException, BaseSSLError, +) from .request import RequestMethods from .response import HTTPResponse from .util import ( assert_fingerprint, get_host, is_connection_dropped, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, Timeout, ) + xrange = six.moves.xrange log = logging.getLogger(__name__) _Default = object() -port_by_scheme = { - 'http': HTTP_PORT, - 'https': HTTPS_PORT, -} - - -## Connection objects (extension of httplib) - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ssl_version = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None): - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - try: - sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout) - except SocketTimeout: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - if self._tunnel_host: - self.sock = sock - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - server_hostname=self.host, - ssl_version=resolved_ssl_version) - - if resolved_cert_reqs != ssl.CERT_NONE: - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) - - ## Pool objects class ConnectionPool(object): @@ -218,6 +132,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ scheme = 'http' + ConnectionCls = HTTPConnection def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, @@ -250,19 +165,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _new_conn(self): """ - Return a fresh :class:`httplib.HTTPConnection`. + Return a fresh :class:`HTTPConnection`. """ self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - - return HTTPConnection(host=self.host, port=self.port, - timeout=self.timeout.connect_timeout, - **extra_params) + conn = self.ConnectionCls(host=self.host, port=self.port, + timeout=self.timeout.connect_timeout, + strict=self.strict) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + return conn def _get_conn(self, timeout=None): """ @@ -319,8 +235,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: @@ -341,7 +258,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ - Perform a request on a given httplib connection object taken from our + Perform a request on a given urllib connection object taken from our pool. :param conn: @@ -362,7 +279,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout # conn.request() calls httplib.*.request, not the method in - # request.py. It also calls makefile (recv) on the socket + # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) except SocketTimeout: raise ConnectTimeoutError( @@ -371,11 +288,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Reset the timeout for the recv() on the socket read_timeout = timeout_obj.read_timeout - log.debug("Setting read timeout to %s" % read_timeout) + # App Engine doesn't have a sock attr - if hasattr(conn, 'sock') and \ - read_timeout is not None and \ - read_timeout is not Timeout.DEFAULT_TIMEOUT: + if hasattr(conn, 'sock'): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -385,7 +300,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - conn.sock.settimeout(read_timeout) + if read_timeout is Timeout.DEFAULT_TIMEOUT: + conn.sock.settimeout(socket.getdefaulttimeout()) + else: # None or a value + conn.sock.settimeout(read_timeout) # Receive the response from the server try: @@ -397,6 +315,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) + except BaseSSLError as e: + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(e) or \ + 'did not complete (read)' in str(e): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out.") + + raise + except SocketError as e: # Platform-specific: Python 2 # See the above comment about EAGAIN in Python 3. In Python 2 we # have to specifically catch it and throw the timeout error @@ -404,8 +332,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise ReadTimeoutError( self, url, "Read timed out. (read timeout=%s)" % read_timeout) - raise + raise # AppEngine doesn't have a version attr. http_version = getattr(conn, '_http_vsn_str', 'HTTP/?') @@ -441,9 +369,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) + # Use explicit default port for comparison when none is given if self.port and not port: - # Use explicit default port for comparison when none is given. port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None return (scheme, host, port) == (self.scheme, self.host, self.port) @@ -482,10 +412,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param retries: Number of retries to allow before raising a MaxRetryError exception. + If `False`, then retries are disabled and any exception is raised + immediately. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -519,7 +452,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0: + if retries < 0 and retries is not False: raise MaxRetryError(self, url) if release_conn is None: @@ -531,6 +464,17 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn = None + # Merge the proxy headers. Only do this in HTTP. We have to copy the + # headers dict so we can safely change it without those changes being + # reflected in anyone else's copy. + if self.scheme == 'http': + headers = headers.copy() + headers.update(self.proxy_headers) + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: # Request a connection from the queue conn = self._get_conn(timeout=pool_timeout) @@ -558,38 +502,41 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.read()``) except Empty: - # Timed out by queue - raise ReadTimeoutError( - self, url, "Read timed out, no pool connections are available.") + # Timed out by queue. + raise EmptyPoolError(self, "No pool connections are available.") - except SocketTimeout: - # Timed out by socket - raise ReadTimeoutError(self, url, "Read timed out.") - - except BaseSSLError as e: - # SSL certificate error - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Platform-specific: Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + except (BaseSSLError, CertificateError) as e: + # Release connection unconditionally because there is no way to + # close it externally in case of exception. + release_conn = True raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except (HTTPException, SocketError) as e: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) + if not retries: + if isinstance(e, TimeoutError): + # TimeoutError is exempt from MaxRetryError-wrapping. + # FIXME: ... Not sure why. Add a reason here. + raise - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below - err = e + # Wrap unexpected exceptions with the most appropriate + # module-level exception and re-raise. + if isinstance(e, SocketError) and self.proxy: + raise ProxyError('Cannot connect to proxy.', e) + + if retries is False: + raise ConnectionError('Connection failed.', e) - if retries == 0: raise MaxRetryError(self, url, e) + # Keep track of the error for the retry warning. + err = e + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -599,8 +546,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) + log.warning("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, @@ -608,7 +555,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location: + if redirect_location and retries is not False: if response.status == 303: method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -626,7 +573,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:`httplib.HTTPSConnection`. + instead of :class:`.HTTPSConnection`. :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, ``assert_hostname`` and ``host`` in this order to verify connections. @@ -639,6 +586,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): """ scheme = 'https' + ConnectionCls = HTTPSConnection def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, @@ -658,33 +606,33 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint - def _prepare_conn(self, connection): + def _prepare_conn(self, conn): """ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket` and establish the tunnel if proxy is used. """ - if isinstance(connection, VerifiedHTTPSConnection): - connection.set_cert(key_file=self.key_file, - cert_file=self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs, - assert_hostname=self.assert_hostname, - assert_fingerprint=self.assert_fingerprint) - connection.ssl_version = self.ssl_version + if isinstance(conn, VerifiedHTTPSConnection): + conn.set_cert(key_file=self.key_file, + cert_file=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + conn.ssl_version = self.ssl_version if self.proxy is not None: # Python 2.7+ try: - set_tunnel = connection.set_tunnel + set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = connection._set_tunnel + set_tunnel = conn._set_tunnel set_tunnel(self.host, self.port, self.proxy_headers) # Establish tunnel connection early, because otherwise httplib # would improperly set Host: header to proxy's IP:port. - connection.connect() + conn.connect() - return connection + return conn def _new_conn(self): """ @@ -694,28 +642,30 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: + # Platform-specific: Python without ssl + raise SSLError("Can't connect to HTTPS URL because the SSL " + "module is not available.") + actual_host = self.host actual_port = self.port if self.proxy is not None: actual_host = self.proxy.host actual_port = self.proxy.port - if not ssl: # Platform-specific: Python compiled without +ssl - if not HTTPSConnection or HTTPSConnection is object: - raise SSLError("Can't connect to HTTPS URL because the SSL " - "module is not available.") - connection_class = HTTPSConnection - else: - connection_class = VerifiedHTTPSConnection - extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict - connection = connection_class(host=actual_host, port=actual_port, - timeout=self.timeout.connect_timeout, - **extra_params) - return self._prepare_conn(connection) + conn = self.ConnectionCls(host=actual_host, port=actual_port, + timeout=self.timeout.connect_timeout, + **extra_params) + if self.proxy is not None: + # Enable Nagle's algorithm for proxies, to avoid packet + # fragmentation. + conn.tcp_nodelay = 0 + + return self._prepare_conn(conn) def connection_from_url(url, **kw): |