diff options
author | SVN-Git Migration <python-modules-team@lists.alioth.debian.org> | 2015-10-08 13:19:39 -0700 |
---|---|---|
committer | SVN-Git Migration <python-modules-team@lists.alioth.debian.org> | 2015-10-08 13:19:39 -0700 |
commit | 0f393d00b51bc54c5075447e4a8b21f0bed6acd8 (patch) | |
tree | 401c9f6c345c8ec7818e2d3341086a1b889b3bc4 /urllib3 | |
parent | 73be7d6cc85a90ab4f67ffc27dc7eae672f7741f (diff) | |
download | python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar.gz |
Imported Upstream version 1.9
Diffstat (limited to 'urllib3')
-rw-r--r-- | urllib3/__init__.py | 26 | ||||
-rw-r--r-- | urllib3/_collections.py | 6 | ||||
-rw-r--r-- | urllib3/connection.py | 51 | ||||
-rw-r--r-- | urllib3/connectionpool.py | 182 | ||||
-rw-r--r-- | urllib3/contrib/ntlmpool.py | 6 | ||||
-rw-r--r-- | urllib3/contrib/pyopenssl.py | 202 | ||||
-rw-r--r-- | urllib3/exceptions.py | 37 | ||||
-rw-r--r-- | urllib3/fields.py | 8 | ||||
-rw-r--r-- | urllib3/filepost.py | 6 | ||||
-rw-r--r-- | urllib3/packages/ordered_dict.py | 1 | ||||
-rw-r--r-- | urllib3/poolmanager.py | 41 | ||||
-rw-r--r-- | urllib3/request.py | 8 | ||||
-rw-r--r-- | urllib3/response.py | 31 | ||||
-rw-r--r-- | urllib3/util/__init__.py | 9 | ||||
-rw-r--r-- | urllib3/util/connection.py | 56 | ||||
-rw-r--r-- | urllib3/util/request.py | 13 | ||||
-rw-r--r-- | urllib3/util/retry.py | 279 | ||||
-rw-r--r-- | urllib3/util/ssl_.py | 5 | ||||
-rw-r--r-- | urllib3/util/timeout.py | 67 | ||||
-rw-r--r-- | urllib3/util/url.py | 9 |
20 files changed, 640 insertions, 403 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py index c80d5da..56f5bf4 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -1,16 +1,10 @@ -# urllib3/__init__.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ urllib3 - Thread-safe connection pooling and re-using. """ __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.8.3' +__version__ = '1.9' from .connectionpool import ( @@ -23,7 +17,10 @@ from . import exceptions from .filepost import encode_multipart_formdata from .poolmanager import PoolManager, ProxyManager, proxy_from_url from .response import HTTPResponse -from .util import make_headers, get_host, Timeout +from .util.request import make_headers +from .util.url import get_host +from .util.timeout import Timeout +from .util.retry import Retry # Set default logging handler to avoid "No handler found" warnings. @@ -51,8 +48,19 @@ def add_stderr_logger(level=logging.DEBUG): handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) logger.addHandler(handler) logger.setLevel(level) - logger.debug('Added an stderr logging handler to logger: %s' % __name__) + logger.debug('Added a stderr logging handler to logger: %s' % __name__) return handler # ... Clean up. del NullHandler + + +# Set security warning to only go off once by default. +import warnings +warnings.simplefilter('module', exceptions.InsecureRequestWarning) + +def disable_warnings(category=exceptions.HTTPWarning): + """ + Helper for quickly disabling all urllib3 warnings. + """ + warnings.simplefilter('ignore', category) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index ccf0d5f..d77ebb8 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -1,9 +1,3 @@ -# urllib3/_collections.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - from collections import Mapping, MutableMapping try: from threading import RLock diff --git a/urllib3/connection.py b/urllib3/connection.py index fbb63ed..0d578d7 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -1,9 +1,3 @@ -# urllib3/connection.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import sys import socket from socket import timeout as SocketTimeout @@ -35,13 +29,16 @@ from .exceptions import ( ) from .packages.ssl_match_hostname import match_hostname from .packages import six -from .util import ( - assert_fingerprint, + +from .util.ssl_ import ( resolve_cert_reqs, resolve_ssl_version, ssl_wrap_socket, + assert_fingerprint, ) +from .util import connection + port_by_scheme = { 'http': 80, @@ -82,15 +79,23 @@ class HTTPConnection(_HTTPConnection, object): #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] + #: Whether this connection verifies the host's certificate. + is_verified = False + def __init__(self, *args, **kw): if six.PY3: # Python 3 kw.pop('strict', None) - if sys.version_info < (2, 7): # Python 2.6 and older - kw.pop('source_address', None) # Pre-set source_address in case we have an older Python like 2.6. self.source_address = kw.get('source_address') + if sys.version_info < (2, 7): # Python 2.6 + # _HTTPConnection on Python 2.6 will balk at this keyword arg, but + # not newer versions. We can still use it when creating a + # connection though, so we pop it *after* we have saved it as + # self.source_address. + kw.pop('source_address', None) + #: The socket options provided by the user. If no options are #: provided, we use the default options. self.socket_options = kw.pop('socket_options', self.default_socket_options) @@ -103,22 +108,22 @@ class HTTPConnection(_HTTPConnection, object): :return: New socket connection. """ - extra_args = [] - if self.source_address: # Python 2.7+ - extra_args.append(self.source_address) + extra_kw = {} + if self.source_address: + extra_kw['source_address'] = self.source_address + + if self.socket_options: + extra_kw['socket_options'] = self.socket_options try: - conn = socket.create_connection( - (self.host, self.port), self.timeout, *extra_args) + conn = connection.create_connection( + (self.host, self.port), self.timeout, **extra_kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % (self.host, self.timeout)) - # Set options on the socket. - self._set_options_on(conn) - return conn def _prepare_conn(self, conn): @@ -132,14 +137,6 @@ class HTTPConnection(_HTTPConnection, object): # Mark this connection as not reusable self.auto_open = 0 - def _set_options_on(self, conn): - # Disable all socket options if the user passes ``socket_options=None`` - if self.socket_options is None: - return - - for opt in self.socket_options: - conn.setsockopt(*opt) - def connect(self): conn = self._new_conn() self._prepare_conn(conn) @@ -225,6 +222,8 @@ class VerifiedHTTPSConnection(HTTPSConnection): match_hostname(self.sock.getpeercert(), self.assert_hostname or hostname) + self.is_verified = resolved_cert_reqs == ssl.CERT_REQUIRED + if ssl: # Make a copy for testing. diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index ab205fa..9317fdc 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -1,12 +1,7 @@ -# urllib3/connectionpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - -import sys import errno import logging +import sys +import warnings from socket import error as SocketError, timeout as SocketTimeout import socket @@ -20,15 +15,16 @@ except ImportError: from .exceptions import ( ClosedPoolError, - ConnectionError, + ProtocolError, EmptyPoolError, HostChangedError, - LocationParseError, + LocationValueError, MaxRetryError, + ProxyError, + ReadTimeoutError, SSLError, TimeoutError, - ReadTimeoutError, - ProxyError, + InsecureRequestWarning, ) from .packages.ssl_match_hostname import CertificateError from .packages import six @@ -40,11 +36,11 @@ from .connection import ( ) from .request import RequestMethods from .response import HTTPResponse -from .util import ( - get_host, - is_connection_dropped, - Timeout, -) + +from .util.connection import is_connection_dropped +from .util.retry import Retry +from .util.timeout import Timeout +from .util.url import get_host xrange = six.moves.xrange @@ -65,13 +61,11 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): - if host is None: - raise LocationParseError(host) + if not host: + raise LocationValueError("No host specified.") # httplib doesn't like it when we include brackets in ipv6 addresses - host = host.strip('[]') - - self.host = host + self.host = host.strip('[]') self.port = port def __str__(self): @@ -126,6 +120,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): Headers to include with all requests, unless other headers are given explicitly. + :param retries: + Retry configuration to use by default with requests in this pool. + :param _proxy: Parsed proxy URL, should not be used directly, instead, see :class:`urllib3.connectionpool.ProxyManager`" @@ -144,18 +141,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None, **conn_kw): + headers=None, retries=None, + _proxy=None, _proxy_headers=None, + **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) self.strict = strict - # This is for backwards compatibility and can be removed once a timeout - # can only be set to a Timeout object if not isinstance(timeout, Timeout): timeout = Timeout.from_float(timeout) + if retries is None: + retries = Retry.DEFAULT + self.timeout = timeout + self.retries = retries self.pool = self.QueueCls(maxsize) self.block = block @@ -259,6 +260,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if conn: conn.close() + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -290,9 +297,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_requests += 1 timeout_obj = self._get_timeout(timeout) - timeout_obj.start_connect() conn.timeout = timeout_obj.connect_timeout + + # Trigger any extra validation we need to do. + self._validate_conn(conn) + # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. conn.request(method, url, **httplib_request_kw) @@ -301,7 +311,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): read_timeout = timeout_obj.read_timeout # App Engine doesn't have a sock attr - if hasattr(conn, 'sock'): + if getattr(conn, 'sock', None): # In Python 3 socket.py will catch EAGAIN and return None when you # try and read into the file pointer created by http.client, which # instead raises a BadStatusLine exception. Instead of catching @@ -309,8 +319,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # timeouts, check for a zero timeout before making the request. if read_timeout == 0: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) if read_timeout is Timeout.DEFAULT_TIMEOUT: conn.sock.settimeout(socket.getdefaulttimeout()) else: # None or a value @@ -332,7 +341,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # http://bugs.python.org/issue10272 if 'timed out' in str(e) or \ 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError(self, url, "Read timed out.") + raise ReadTimeoutError( + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise @@ -341,8 +351,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # have to specifically catch it and throw the timeout error if e.errno in _blocking_errnos: raise ReadTimeoutError( - self, url, - "Read timed out. (read timeout=%s)" % read_timeout) + self, url, "Read timed out. (read timeout=%s)" % read_timeout) raise @@ -388,7 +397,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): return (scheme, host, port) == (self.scheme, self.host, self.port) - def urlopen(self, method, url, body=None, headers=None, retries=3, + def urlopen(self, method, url, body=None, headers=None, retries=None, redirect=True, assert_same_host=True, timeout=_Default, pool_timeout=None, release_conn=None, **response_kw): """ @@ -422,9 +431,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): these headers completely replace any pool-specific headers. :param retries: - Number of retries to allow before raising a MaxRetryError exception. - If `False`, then retries are disabled and any exception is raised - immediately. + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. :param redirect: If True, automatically handle redirects (status codes 301, 302, @@ -463,15 +483,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0 and retries is not False: - raise MaxRetryError(self, url) + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) if release_conn is None: release_conn = response_kw.get('preload_content', True) # Check host if assert_same_host and not self.is_same_host(url): - raise HostChangedError(self, url, retries - 1) + raise HostChangedError(self, url, retries) conn = None @@ -487,10 +507,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): err = None try: - # Request a connection from the queue + # Request a connection from the queue. conn = self._get_conn(timeout=pool_timeout) - # Make the request on the httplib connection object + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, timeout=timeout, body=body, headers=headers) @@ -529,21 +549,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.close() conn = None - if not retries: - if isinstance(e, TimeoutError): - # TimeoutError is exempt from MaxRetryError-wrapping. - # FIXME: ... Not sure why. Add a reason here. - raise - - # Wrap unexpected exceptions with the most appropriate - # module-level exception and re-raise. - if isinstance(e, SocketError) and self.proxy: - raise ProxyError('Cannot connect to proxy.', e) - - if retries is False: - raise ConnectionError('Connection failed.', e) + stacktrace = sys.exc_info()[2] + if isinstance(e, SocketError) and self.proxy: + e = ProxyError('Cannot connect to proxy.', e) + elif isinstance(e, (SocketError, HTTPException)): + e = ProtocolError('Connection aborted.', e) - raise MaxRetryError(self, url, e) + retries = retries.increment(method, url, error=e, + _pool=self, _stacktrace=stacktrace) + retries.sleep() # Keep track of the error for the retry warning. err = e @@ -557,23 +571,43 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warning("Retrying (%d attempts remain) after connection " + log.warning("Retrying (%r) after connection " "broken by '%r': %s" % (retries, err, url)) - return self.urlopen(method, url, body, headers, retries - 1, + return self.urlopen(method, url, body, headers, retries, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, release_conn=release_conn, **response_kw) # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location and retries is not False: + if redirect_location: if response.status == 303: method = 'GET' + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, body, headers, - retries - 1, redirect, assert_same_host, - timeout=timeout, pool_timeout=pool_timeout, - release_conn=release_conn, **response_kw) + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=response.status): + retries = retries.increment(method, url, response=response, _pool=self) + retries.sleep() + log.info("Forced retry: %s" % url) + return self.urlopen(method, url, body, headers, + retries=retries, redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, pool_timeout=pool_timeout, + release_conn=release_conn, **response_kw) return response @@ -600,8 +634,8 @@ class HTTPSConnectionPool(HTTPConnectionPool): ConnectionCls = HTTPSConnection def __init__(self, host, port=None, - strict=False, timeout=None, maxsize=1, - block=False, headers=None, + strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, + block=False, headers=None, retries=None, _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, @@ -609,7 +643,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): **conn_kw): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers, + block, headers, retries, _proxy, _proxy_headers, **conn_kw) self.key_file = key_file self.cert_file = cert_file @@ -640,7 +674,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): set_tunnel = conn.set_tunnel except AttributeError: # Platform-specific: Python 2.6 set_tunnel = conn._set_tunnel - + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older set_tunnel(self.host, self.port) else: @@ -677,6 +711,24 @@ class HTTPSConnectionPool(HTTPConnectionPool): return self._prepare_conn(conn) + def _validate_conn(self, conn): + """ + Called right before a request is made, after the socket is created. + """ + super(HTTPSConnectionPool, self)._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not conn.sock: + conn.connect() + + if not conn.is_verified: + warnings.warn(( + 'Unverified HTTPS request is being made. ' + 'Adding certificate verification is strongly advised. See: ' + 'https://urllib3.readthedocs.org/en/latest/security.html ' + '(This warning will only appear once by default.)'), + InsecureRequestWarning) + def connection_from_url(url, **kw): """ @@ -693,7 +745,7 @@ def connection_from_url(url, **kw): :class:`.ConnectionPool`. Useful for specifying things like timeout, maxsize, headers, etc. - Example: :: + Example:: >>> conn = connection_from_url('http://google.com/') >>> r = conn.request('GET', '/') diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py index b8cd933..c6b266f 100644 --- a/urllib3/contrib/ntlmpool.py +++ b/urllib3/contrib/ntlmpool.py @@ -1,9 +1,3 @@ -# urllib3/contrib/ntlmpool.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - """ NTLM authenticating pool, contributed by erikcederstran diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index 21a12c6..7a9ea2e 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -54,7 +54,6 @@ from pyasn1.type import univ, constraint from socket import _fileobject, timeout import ssl import select -from cStringIO import StringIO from .. import connection from .. import util @@ -155,196 +154,37 @@ def get_subj_alt_name(peer_cert): return dns_name -class fileobject(_fileobject): - - def _wait_for_sock(self): - rd, wd, ed = select.select([self._sock], [], [], - self._sock.gettimeout()) - if not rd: - raise timeout() - - - def read(self, size=-1): - # Use max, disallow tiny reads in a loop as they are very inefficient. - # We never leave read() with any leftover data from a new recv() call - # in our internal buffer. - rbufsize = max(self._rbufsize, self.default_bufsize) - # Our use of StringIO rather than lists of string objects returned by - # recv() minimizes memory usage and fragmentation that occurs when - # rbufsize is large compared to the typical return value of recv(). - buf = self._rbuf - buf.seek(0, 2) # seek end - if size < 0: - # Read until EOF - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or EOF seen, whichever comes first - buf_len = buf.tell() - if buf_len >= size: - # Already have size bytes in our buffer? Extract and return. - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - left = size - buf_len - # recv() will malloc the amount of memory given as its - # parameter even though it often returns much less data - # than that. The returned data string is short lived - # as we copy it into a StringIO and free it. This avoids - # fragmentation issues on many platforms. - try: - data = self._sock.recv(left) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid buffer data copies when: - # - We have no data in our buffer. - # AND - # - Our call to recv returned exactly the - # number of bytes we were asked to read. - return data - if n == left: - buf.write(data) - del data # explicit free - break - assert n <= left, "recv(%d) returned %d bytes" % (left, n) - buf.write(data) - buf_len += n - del data # explicit free - #assert buf_len == buf.tell() - return buf.getvalue() - - def readline(self, size=-1): - buf = self._rbuf - buf.seek(0, 2) # seek end - if buf.tell() > 0: - # check if we already have it in our buffer - buf.seek(0) - bline = buf.readline(size) - if bline.endswith('\n') or len(bline) == size: - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return bline - del bline - if size < 0: - # Read until \n or EOF, whichever comes first - if self._rbufsize <= 1: - # Speed up unbuffered case - buf.seek(0) - buffers = [buf.read()] - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - data = None - recv = self._sock.recv - while True: - try: - while data != "\n": - data = recv(1) - if not data: - break - buffers.append(data) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - break - return "".join(buffers) - - buf.seek(0, 2) # seek end - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - nl = data.find('\n') - if nl >= 0: - nl += 1 - buf.write(data[:nl]) - self._rbuf.write(data[nl:]) - del data - break - buf.write(data) - return buf.getvalue() - else: - # Read until size bytes or \n or EOF seen, whichever comes first - buf.seek(0, 2) # seek end - buf_len = buf.tell() - if buf_len >= size: - buf.seek(0) - rv = buf.read(size) - self._rbuf = StringIO() - self._rbuf.write(buf.read()) - return rv - self._rbuf = StringIO() # reset _rbuf. we consume it via buf. - while True: - try: - data = self._sock.recv(self._rbufsize) - except OpenSSL.SSL.WantReadError: - self._wait_for_sock() - continue - if not data: - break - left = size - buf_len - # did we just receive a newline? - nl = data.find('\n', 0, left) - if nl >= 0: - nl += 1 - # save the excess data to _rbuf - self._rbuf.write(data[nl:]) - if buf_len: - buf.write(data[:nl]) - break - else: - # Shortcut. Avoid data copy through buf when returning - # a substring of our first recv(). - return data[:nl] - n = len(data) - if n == size and not buf_len: - # Shortcut. Avoid data copy through buf when - # returning exactly all of our first recv(). - return data - if n >= left: - buf.write(data[:left]) - self._rbuf.write(data[left:]) - break - buf.write(data) - buf_len += n - #assert buf_len == buf.tell() - return buf.getvalue() - - class WrappedSocket(object): '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' - def __init__(self, connection, socket): + def __init__(self, connection, socket, suppress_ragged_eofs=True): self.connection = connection self.socket = socket + self.suppress_ragged_eofs = suppress_ragged_eofs def fileno(self): return self.socket.fileno() def makefile(self, mode, bufsize=-1): - return fileobject(self.connection, mode, bufsize) + return _fileobject(self, mode, bufsize) + + def recv(self, *args, **kwargs): + try: + data = self.connection.recv(*args, **kwargs) + except OpenSSL.SSL.SysCallError as e: + if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): + return b'' + else: + raise + except OpenSSL.SSL.WantReadError: + rd, wd, ed = select.select( + [self.socket], [], [], self.socket.gettimeout()) + if not rd: + raise timeout() + else: + return self.recv(*args, **kwargs) + else: + return data def settimeout(self, timeout): return self.socket.settimeout(timeout) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index b4df831..fff8bfa 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -1,9 +1,3 @@ -# urllib3/exceptions.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - ## Base Exceptions @@ -11,6 +5,11 @@ class HTTPError(Exception): "Base exception used by this module." pass +class HTTPWarning(Warning): + "Base warning used by this module." + pass + + class PoolError(HTTPError): "Base exception for errors caused within a pool." @@ -44,16 +43,20 @@ class ProxyError(HTTPError): pass -class ConnectionError(HTTPError): - "Raised when a normal connection fails." +class DecodeError(HTTPError): + "Raised when automatic decoding based on Content-Type fails." pass -class DecodeError(HTTPError): - "Raised when automatic decoding based on Content-Type fails." +class ProtocolError(HTTPError): + "Raised when something unexpected happens mid-request/response." pass +#: Renamed to ProtocolError but aliased for backwards compatibility. +ConnectionError = ProtocolError + + ## Leaf Exceptions class MaxRetryError(RequestError): @@ -64,7 +67,7 @@ class MaxRetryError(RequestError): message = "Max retries exceeded with url: %s" % url if reason: - message += " (Caused by %s: %s)" % (type(reason), reason) + message += " (Caused by %r)" % reason else: message += " (Caused by redirect)" @@ -116,7 +119,12 @@ class ClosedPoolError(PoolError): pass -class LocationParseError(ValueError, HTTPError): +class LocationValueError(ValueError, HTTPError): + "Raised when there is something wrong with a given URL input." + pass + + +class LocationParseError(LocationValueError): "Raised when get_host or similar fails to parse the URL input." def __init__(self, location): @@ -124,3 +132,8 @@ class LocationParseError(ValueError, HTTPError): HTTPError.__init__(self, message) self.location = location + + +class InsecureRequestWarning(HTTPWarning): + "Warned when making an unverified HTTPS request." + pass diff --git a/urllib3/fields.py b/urllib3/fields.py index dceafb4..c853f8d 100644 --- a/urllib3/fields.py +++ b/urllib3/fields.py @@ -1,9 +1,3 @@ -# urllib3/fields.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import email.utils import mimetypes @@ -81,7 +75,7 @@ class RequestField(object): Supports constructing :class:`~urllib3.fields.RequestField` from parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where the MIME type is optional. - For example: :: + For example:: 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), diff --git a/urllib3/filepost.py b/urllib3/filepost.py index c3db30c..0fbf488 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -1,9 +1,3 @@ -# urllib3/filepost.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import codecs from uuid import uuid4 diff --git a/urllib3/packages/ordered_dict.py b/urllib3/packages/ordered_dict.py index 7f8ee15..4479363 100644 --- a/urllib3/packages/ordered_dict.py +++ b/urllib3/packages/ordered_dict.py @@ -2,7 +2,6 @@ # Passes Python2.7's test suite and incorporates all the latest updates. # Copyright 2009 Raymond Hettinger, released under the MIT License. # http://code.activestate.com/recipes/576693/ - try: from thread import get_ident as _get_ident except ImportError: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 3945f5d..515dc96 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,9 +1,3 @@ -# urllib3/poolmanager.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - import logging try: # Python 3 @@ -14,8 +8,10 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme +from .exceptions import LocationValueError from .request import RequestMethods -from .util import parse_url +from .util.url import parse_url +from .util.retry import Retry __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] @@ -49,7 +45,7 @@ class PoolManager(RequestMethods): Additional parameters are used to create fresh :class:`urllib3.connectionpool.ConnectionPool` instances. - Example: :: + Example:: >>> manager = PoolManager(num_pools=2) >>> r = manager.request('GET', 'http://google.com/') @@ -102,10 +98,11 @@ class PoolManager(RequestMethods): ``urllib3.connectionpool.port_by_scheme``. """ - scheme = scheme or 'http' + if not host: + raise LocationValueError("No host specified.") + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) with self.pools.lock: @@ -118,6 +115,7 @@ class PoolManager(RequestMethods): # Make a fresh ConnectionPool of the desired type pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool + return pool def connection_from_url(self, url): @@ -165,9 +163,14 @@ class PoolManager(RequestMethods): if response.status == 303: method = 'GET' - log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + retries = kw.get('retries') + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect) + + kw['retries'] = retries.increment(method, redirect_location) kw['redirect'] = redirect + + log.info("Redirecting %s -> %s" % (url, redirect_location)) return self.urlopen(method, redirect_location, **kw) @@ -208,12 +211,16 @@ class ProxyManager(PoolManager): if not proxy.port: port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) + + assert proxy.scheme in ("http", "https"), \ + 'Not supported proxy scheme %s' % proxy.scheme + self.proxy = proxy self.proxy_headers = proxy_headers or {} - assert self.proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % self.proxy.scheme + connection_pool_kw['_proxy'] = self.proxy connection_pool_kw['_proxy_headers'] = self.proxy_headers + super(ProxyManager, self).__init__( num_pools, headers, **connection_pool_kw) @@ -248,10 +255,10 @@ class ProxyManager(PoolManager): # For proxied HTTPS requests, httplib sets the necessary headers # on the CONNECT to the proxy. For HTTP, we'll definitely # need to set 'Host' at the very least. - kw['headers'] = self._set_proxy_headers(url, kw.get('headers', - self.headers)) + headers = kw.get('headers', self.headers) + kw['headers'] = self._set_proxy_headers(url, headers) - return super(ProxyManager, self).urlopen(method, url, redirect, **kw) + return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) def proxy_from_url(url, **kw): diff --git a/urllib3/request.py b/urllib3/request.py index 7a46f1b..51fe238 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,9 +1,3 @@ -# urllib3/request.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - try: from urllib.parse import urlencode except ImportError: @@ -105,7 +99,7 @@ class RequestMethods(object): Supports an optional ``fields`` parameter of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type) tuple where - the MIME type is optional. For example: :: + the MIME type is optional. For example:: fields = { 'foo': 'bar', diff --git a/urllib3/response.py b/urllib3/response.py index 13ffba4..7e0d47f 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -1,18 +1,13 @@ -# urllib3/response.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - import zlib import io from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import DecodeError, ReadTimeoutError +from .exceptions import ProtocolError, DecodeError, ReadTimeoutError from .packages.six import string_types as basestring, binary_type -from .util import is_fp_closed +from .connection import HTTPException, BaseSSLError +from .util.response import is_fp_closed + class DeflateDecoder(object): @@ -88,11 +83,14 @@ class HTTPResponse(io.IOBase): self.decode_content = decode_content self._decoder = None - self._body = body if body and isinstance(body, basestring) else None + self._body = None self._fp = None self._original_response = original_response self._fp_bytes_read = 0 + if body and isinstance(body, (basestring, binary_type)): + self._body = body + self._pool = pool self._connection = connection @@ -199,6 +197,19 @@ class HTTPResponse(io.IOBase): # there is yet no clean way to get at it from this context. raise ReadTimeoutError(self._pool, None, 'Read timed out.') + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if not 'read operation timed out' in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + self._fp_bytes_read += len(data) try: diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py index a40185e..8becc81 100644 --- a/urllib3/util/__init__.py +++ b/urllib3/util/__init__.py @@ -1,9 +1,4 @@ -# urllib3/util/__init__.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +# For backwards compatibility, provide imports that used to be here. from .connection import is_connection_dropped from .request import make_headers from .response import is_fp_closed @@ -19,6 +14,8 @@ from .timeout import ( current_time, Timeout, ) + +from .retry import Retry from .url import ( get_host, parse_url, diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index c67ef04..062ee9d 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -1,4 +1,4 @@ -from socket import error as SocketError +import socket try: from select import poll, POLLIN except ImportError: # `poll` doesn't exist on OSX and other platforms @@ -31,7 +31,7 @@ def is_connection_dropped(conn): # Platform-specific try: return select([sock], [], [], 0.0)[0] - except SocketError: + except socket.error: return True # This version is better on platforms that support it. @@ -41,3 +41,55 @@ def is_connection_dropped(conn): # Platform-specific if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) diff --git a/urllib3/util/request.py b/urllib3/util/request.py index bfd7a98..bc64f6b 100644 --- a/urllib3/util/request.py +++ b/urllib3/util/request.py @@ -1,7 +1,6 @@ from base64 import b64encode -from ..packages import six - +from ..packages.six import b ACCEPT_ENCODING = 'gzip,deflate' @@ -29,13 +28,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, auth header. :param proxy_basic_auth: - Colon-separated username:password string for - 'proxy-authorization: basic ...' auth header. + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. :param disable_cache: If ``True``, adds 'cache-control: no-cache' header. - Example: :: + Example:: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} @@ -60,11 +59,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, if basic_auth: headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') + b64encode(b(basic_auth)).decode('utf-8') if proxy_basic_auth: headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') + b64encode(b(proxy_basic_auth)).decode('utf-8') if disable_cache: headers['cache-control'] = 'no-cache' diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py new file mode 100644 index 0000000..9013197 --- /dev/null +++ b/urllib3/util/retry.py @@ -0,0 +1,279 @@ +import time +import logging + +from ..exceptions import ( + ProtocolError, + ConnectTimeoutError, + ReadTimeoutError, + MaxRetryError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we can't + assume that the server did not process any of it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/response retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? + """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + + else: + # FIXME: Nothing changed, scenario doesn't make sense. + _observed_errors += 1 + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index dee4b87..9cfe2d2 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -34,10 +34,9 @@ def assert_fingerprint(cert, fingerprint): } fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: + if odd or digest_length not in hashfunc_map: raise SSLError('Fingerprint is of invalid length.') # We need encode() here for py32; works on py2 and p33. diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py index aaadc12..ea7027f 100644 --- a/urllib3/util/timeout.py +++ b/urllib3/util/timeout.py @@ -1,32 +1,49 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user from socket import _GLOBAL_DEFAULT_TIMEOUT import time from ..exceptions import TimeoutStateError +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() def current_time(): """ - Retrieve the current time, this function is mocked out in unit testing. + Retrieve the current time. This function is mocked out in unit testing. """ return time.time() -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout +class Timeout(object): + """ Timeout configuration. + Timeouts can be defined as a default for a pool:: -class Timeout(object): - """ - Utility object for storing timeout values. + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) - Example usage: + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. - .. code-block:: python + Defaults to None. - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc + :type total: integer, float, or None :param connect: The maximum amount of time to wait for a connection attempt to a server @@ -47,25 +64,15 @@ class Timeout(object): :type read: integer, float, or None - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - .. note:: Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. In addition, the read and total timeouts only measure the time between read operations on the socket connecting the client and the server, @@ -73,8 +80,8 @@ class Timeout(object): response. For most requests, the timeout is raised because the server has not sent the first byte in the specified time. This is not always the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. + of 20 seconds will not trigger, even though the request will take + several minutes to complete. If your goal is to cut off any request after a set amount of wall clock time, consider having a second "watcher" thread to cut off a slow diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 122108b..487d456 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,6 +2,7 @@ from collections import namedtuple from ..exceptions import LocationParseError + url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -47,7 +48,7 @@ def split_first(s, delims): If not found, then the first part is the full input string. - Example: :: + Example:: >>> split_first('foo/bar?baz', '?/=') ('foo', 'bar?baz', '/') @@ -80,7 +81,7 @@ def parse_url(url): Partly backwards-compatible with :mod:`urlparse`. - Example: :: + Example:: >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) @@ -95,6 +96,10 @@ def parse_url(url): # Additionally, this implementations does silly things to be optimal # on CPython. + if not url: + # Empty + return Url() + scheme = None auth = None host = None |