Imported Upstream version 1.9

author: SVN-Git Migration <python-modules-team@lists.alioth.debian.org> 2015-10-08 13:19:39 -0700
committer: SVN-Git Migration <python-modules-team@lists.alioth.debian.org> 2015-10-08 13:19:39 -0700
commit: 0f393d00b51bc54c5075447e4a8b21f0bed6acd8 (patch)
tree: 401c9f6c345c8ec7818e2d3341086a1b889b3bc4 /urllib3/util
parent: 73be7d6cc85a90ab4f67ffc27dc7eae672f7741f (diff)
download: python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar
python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar.gz
7 files changed, 388 insertions, 50 deletions
diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py
index a40185e..8becc81 100644
--- a/urllib3/util/__init__.py
+++ b/urllib3/util/__init__.py
@@ -1,9 +1,4 @@
-# urllib3/util/__init__.py
-# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
-#
-# This module is part of urllib3 and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
+# For backwards compatibility, provide imports that used to be here.
 from .connection import is_connection_dropped
 from .request import make_headers
 from .response import is_fp_closed
@@ -19,6 +14,8 @@ from .timeout import (
     current_time,
     Timeout,
 )
+
+from .retry import Retry
 from .url import (
     get_host,
     parse_url,
diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py
index c67ef04..062ee9d 100644
--- a/urllib3/util/connection.py
+++ b/urllib3/util/connection.py
@@ -1,4 +1,4 @@
-from socket import error as SocketError
+import socket
 try:
     from select import poll, POLLIN
 except ImportError:  # `poll` doesn't exist on OSX and other platforms
@@ -31,7 +31,7 @@ def is_connection_dropped(conn):  # Platform-specific
 
         try:
             return select([sock], [], [], 0.0)[0]
-        except SocketError:
+        except socket.error:
             return True
 
     # This version is better on platforms that support it.
@@ -41,3 +41,55 @@ def is_connection_dropped(conn):  # Platform-specific
         if fno == sock.fileno():
             # Either data is buffered (bad), or the connection is dropped.
             return True
+
+
+# This function is copied from socket.py in the Python 2.7 standard
+# library test suite. Added to its signature is only `socket_options`.
+def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+                      source_address=None, socket_options=None):
+    """Connect to *address* and return the socket object.
+
+    Convenience function.  Connect to *address* (a 2-tuple ``(host,
+    port)``) and return the socket object.  Passing the optional
+    *timeout* parameter will set the timeout on the socket instance
+    before attempting to connect.  If no *timeout* is supplied, the
+    global default timeout setting returned by :func:`getdefaulttimeout`
+    is used.  If *source_address* is set it must be a tuple of (host, port)
+    for the socket to bind as a source address before making the connection.
+    An host of '' or port 0 tells the OS to use the default.
+    """
+
+    host, port = address
+    err = None
+    for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+        af, socktype, proto, canonname, sa = res
+        sock = None
+        try:
+            sock = socket.socket(af, socktype, proto)
+            if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+                sock.settimeout(timeout)
+            if source_address:
+                sock.bind(source_address)
+            # If provided, set socket level options before connecting.
+            # This is the only addition urllib3 makes to this function.
+            _set_socket_options(sock, socket_options)
+            sock.connect(sa)
+            return sock
+
+        except socket.error as _:
+            err = _
+            if sock is not None:
+                sock.close()
+
+    if err is not None:
+        raise err
+    else:
+        raise socket.error("getaddrinfo returns an empty list")
+
+
+def _set_socket_options(sock, options):
+    if options is None:
+        return
+
+    for opt in options:
+        sock.setsockopt(*opt)
diff --git a/urllib3/util/request.py b/urllib3/util/request.py
index bfd7a98..bc64f6b 100644
--- a/urllib3/util/request.py
+++ b/urllib3/util/request.py
@@ -1,7 +1,6 @@
 from base64 import b64encode
 
-from ..packages import six
-
+from ..packages.six import b
 
 ACCEPT_ENCODING = 'gzip,deflate'
 
@@ -29,13 +28,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
         auth header.
 
     :param proxy_basic_auth:
-        Colon-separated username:password string for
-        'proxy-authorization: basic ...' auth header.
+        Colon-separated username:password string for 'proxy-authorization: basic ...'
+        auth header.
 
     :param disable_cache:
         If ``True``, adds 'cache-control: no-cache' header.
 
-    Example: ::
+    Example::
 
         >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
         {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
@@ -60,11 +59,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
 
     if basic_auth:
         headers['authorization'] = 'Basic ' + \
-            b64encode(six.b(basic_auth)).decode('utf-8')
+            b64encode(b(basic_auth)).decode('utf-8')
 
     if proxy_basic_auth:
         headers['proxy-authorization'] = 'Basic ' + \
-            b64encode(six.b(proxy_basic_auth)).decode('utf-8')
+            b64encode(b(proxy_basic_auth)).decode('utf-8')
 
     if disable_cache:
         headers['cache-control'] = 'no-cache'
diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py
new file mode 100644
index 0000000..9013197
--- /dev/null
+++ b/urllib3/util/retry.py
@@ -0,0 +1,279 @@
+import time
+import logging
+
+from ..exceptions import (
+    ProtocolError,
+    ConnectTimeoutError,
+    ReadTimeoutError,
+    MaxRetryError,
+)
+from ..packages import six
+
+
+log = logging.getLogger(__name__)
+
+
+class Retry(object):
+    """ Retry configuration.
+
+    Each retry attempt will create a new Retry object with updated values, so
+    they can be safely reused.
+
+    Retries can be defined as a default for a pool::
+
+        retries = Retry(connect=5, read=2, redirect=5)
+        http = PoolManager(retries=retries)
+        response = http.request('GET', 'http://example.com/')
+
+    Or per-request (which overrides the default for the pool)::
+
+        response = http.request('GET', 'http://example.com/', retries=Retry(10))
+
+    Retries can be disabled by passing ``False``::
+
+        response = http.request('GET', 'http://example.com/', retries=False)
+
+    Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
+    retries are disabled, in which case the causing exception will be raised.
+
+
+    :param int total:
+        Total number of retries to allow. Takes precedence over other counts.
+
+        Set to ``None`` to remove this constraint and fall back on other
+        counts. It's a good idea to set this to some sensibly-high value to
+        account for unexpected edge cases and avoid infinite retry loops.
+
+        Set to ``0`` to fail on the first retry.
+
+        Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+    :param int connect:
+        How many connection-related errors to retry on.
+
+        These are errors raised before the request is sent to the remote server,
+        which we assume has not triggered the server to process the request.
+
+        Set to ``0`` to fail on the first retry of this type.
+
+    :param int read:
+        How many times to retry on read errors.
+
+        These errors are raised after the request was sent to the server, so the
+        request may have side-effects.
+
+        Set to ``0`` to fail on the first retry of this type.
+
+    :param int redirect:
+        How many redirects to perform. Limit this to avoid infinite redirect
+        loops.
+
+        A redirect is a HTTP response with a status code 301, 302, 303, 307 or
+        308.
+
+        Set to ``0`` to fail on the first retry of this type.
+
+        Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+    :param iterable method_whitelist:
+        Set of uppercased HTTP method verbs that we should retry on.
+
+        By default, we only retry on methods which are considered to be
+        indempotent (multiple requests with the same parameters end with the
+        same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
+
+    :param iterable status_forcelist:
+        A set of HTTP status codes that we should force a retry on. 
+
+        By default, this is disabled with ``None``.
+
+    :param float backoff_factor:
+        A backoff factor to apply between attempts. urllib3 will sleep for::
+
+            {backoff factor} * (2 ^ ({number of total retries} - 1))
+
+        seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
+        for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
+        than :attr:`Retry.MAX_BACKOFF`.
+
+        By default, backoff is disabled (set to 0).
+
+    :param bool raise_on_redirect: Whether, if the number of redirects is
+        exhausted, to raise a MaxRetryError, or to return a response with a
+        response code in the 3xx range.
+    """
+
+    DEFAULT_METHOD_WHITELIST = frozenset([
+        'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
+
+    #: Maximum backoff time.
+    BACKOFF_MAX = 120
+
+    def __init__(self, total=10, connect=None, read=None, redirect=None,
+                 method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
+                 backoff_factor=0, raise_on_redirect=True, _observed_errors=0):
+
+        self.total = total
+        self.connect = connect
+        self.read = read
+
+        if redirect is False or total is False:
+            redirect = 0
+            raise_on_redirect = False
+
+        self.redirect = redirect
+        self.status_forcelist = status_forcelist or set()
+        self.method_whitelist = method_whitelist
+        self.backoff_factor = backoff_factor
+        self.raise_on_redirect = raise_on_redirect
+        self._observed_errors = _observed_errors # TODO: use .history instead?
+
+    def new(self, **kw):
+        params = dict(
+            total=self.total,
+            connect=self.connect, read=self.read, redirect=self.redirect,
+            method_whitelist=self.method_whitelist,
+            status_forcelist=self.status_forcelist,
+            backoff_factor=self.backoff_factor,
+            raise_on_redirect=self.raise_on_redirect,
+            _observed_errors=self._observed_errors,
+        )
+        params.update(kw)
+        return type(self)(**params)
+
+    @classmethod
+    def from_int(cls, retries, redirect=True, default=None):
+        """ Backwards-compatibility for the old retries format."""
+        if retries is None:
+            retries = default if default is not None else cls.DEFAULT
+
+        if isinstance(retries, Retry):
+            return retries
+
+        redirect = bool(redirect) and None
+        new_retries = cls(retries, redirect=redirect)
+        log.debug("Converted retries value: %r -> %r" % (retries, new_retries))
+        return new_retries
+
+    def get_backoff_time(self):
+        """ Formula for computing the current backoff
+
+        :rtype: float
+        """
+        if self._observed_errors <= 1:
+            return 0
+
+        backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1))
+        return min(self.BACKOFF_MAX, backoff_value)
+
+    def sleep(self):
+        """ Sleep between retry attempts using an exponential backoff.
+
+        By default, the backoff factor is 0 and this method will return
+        immediately.
+        """
+        backoff = self.get_backoff_time()
+        if backoff <= 0:
+            return
+        time.sleep(backoff)
+
+    def _is_connection_error(self, err):
+        """ Errors when we're fairly sure that the server did not receive the
+        request, so it should be safe to retry.
+        """
+        return isinstance(err, ConnectTimeoutError)
+
+    def _is_read_error(self, err):
+        """ Errors that occur after the request has been started, so we can't
+        assume that the server did not process any of it.
+        """
+        return isinstance(err, (ReadTimeoutError, ProtocolError))
+
+    def is_forced_retry(self, method, status_code):
+        """ Is this method/response retryable? (Based on method/codes whitelists)
+        """
+        if self.method_whitelist and method.upper() not in self.method_whitelist:
+            return False
+
+        return self.status_forcelist and status_code in self.status_forcelist
+
+    def is_exhausted(self):
+        """ Are we out of retries?
+        """
+        retry_counts = (self.total, self.connect, self.read, self.redirect)
+        retry_counts = list(filter(None, retry_counts))
+        if not retry_counts:
+            return False
+
+        return min(retry_counts) < 0
+
+    def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None):
+        """ Return a new Retry object with incremented retry counters.
+
+        :param response: A response object, or None, if the server did not
+            return a response.
+        :type response: :class:`~urllib3.response.HTTPResponse`
+        :param Exception error: An error encountered during the request, or
+            None if the response was received successfully.
+
+        :return: A new ``Retry`` object.
+        """
+        if self.total is False and error:
+            # Disabled, indicate to re-raise the error.
+            raise six.reraise(type(error), error, _stacktrace)
+
+        total = self.total
+        if total is not None:
+            total -= 1
+
+        _observed_errors = self._observed_errors
+        connect = self.connect
+        read = self.read
+        redirect = self.redirect
+
+        if error and self._is_connection_error(error):
+            # Connect retry?
+            if connect is False:
+                raise six.reraise(type(error), error, _stacktrace)
+            elif connect is not None:
+                connect -= 1
+            _observed_errors += 1
+
+        elif error and self._is_read_error(error):
+            # Read retry?
+            if read is False:
+                raise six.reraise(type(error), error, _stacktrace)
+            elif read is not None:
+                read -= 1
+            _observed_errors += 1
+
+        elif response and response.get_redirect_location():
+            # Redirect retry?
+            if redirect is not None:
+                redirect -= 1
+
+        else:
+            # FIXME: Nothing changed, scenario doesn't make sense.
+            _observed_errors += 1
+
+        new_retry = self.new(
+            total=total,
+            connect=connect, read=read, redirect=redirect,
+            _observed_errors=_observed_errors)
+
+        if new_retry.is_exhausted():
+            raise MaxRetryError(_pool, url, error)
+
+        log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry))
+
+        return new_retry
+
+
+    def __repr__(self):
+        return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
+                'read={self.read}, redirect={self.redirect})').format(
+                    cls=type(self), self=self)
+
+
+# For backwards compatibility (equivalent to pre-v1.9):
+Retry.DEFAULT = Retry(3)
diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py
index dee4b87..9cfe2d2 100644
--- a/urllib3/util/ssl_.py
+++ b/urllib3/util/ssl_.py
@@ -34,10 +34,9 @@ def assert_fingerprint(cert, fingerprint):
     }
 
     fingerprint = fingerprint.replace(':', '').lower()
+    digest_length, odd = divmod(len(fingerprint), 2)
 
-    digest_length, rest = divmod(len(fingerprint), 2)
-
-    if rest or digest_length not in hashfunc_map:
+    if odd or digest_length not in hashfunc_map:
         raise SSLError('Fingerprint is of invalid length.')
 
     # We need encode() here for py32; works on py2 and p33.
diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py
index aaadc12..ea7027f 100644
--- a/urllib3/util/timeout.py
+++ b/urllib3/util/timeout.py
@@ -1,32 +1,49 @@
+# The default socket timeout, used by httplib to indicate that no timeout was
+# specified by the user
 from socket import _GLOBAL_DEFAULT_TIMEOUT
 import time
 
 from ..exceptions import TimeoutStateError
 
+# A sentinel value to indicate that no timeout was specified by the user in
+# urllib3
+_Default = object()
 
 def current_time():
     """
-    Retrieve the current time, this function is mocked out in unit testing.
+    Retrieve the current time. This function is mocked out in unit testing.
     """
     return time.time()
 
 
-_Default = object()
-# The default timeout to use for socket connections. This is the attribute used
-# by httplib to define the default timeout
+class Timeout(object):
+    """ Timeout configuration.
 
+    Timeouts can be defined as a default for a pool::
 
-class Timeout(object):
-    """
-    Utility object for storing timeout values.
+        timeout = Timeout(connect=2.0, read=7.0)
+        http = PoolManager(timeout=timeout)
+        response = http.request('GET', 'http://example.com/')
+
+    Or per-request (which overrides the default for the pool)::
+
+        response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
 
-    Example usage:
+    Timeouts can be disabled by setting all the parameters to ``None``::
+
+        no_timeout = Timeout(connect=None, read=None)
+        response = http.request('GET', 'http://example.com/, timeout=no_timeout)
+
+
+    :param total:
+        This combines the connect and read timeouts into one; the read timeout
+        will be set to the time leftover from the connect attempt. In the
+        event that both a connect timeout and a total are specified, or a read
+        timeout and a total are specified, the shorter timeout will be applied.
 
-    .. code-block:: python
+        Defaults to None.
 
-        timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
-        pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
-        pool.request(...) # Etc, etc
+    :type total: integer, float, or None
 
     :param connect:
         The maximum amount of time to wait for a connection attempt to a server
@@ -47,25 +64,15 @@ class Timeout(object):
 
     :type read: integer, float, or None
 
-    :param total:
-        This combines the connect and read timeouts into one; the read timeout
-        will be set to the time leftover from the connect attempt. In the
-        event that both a connect timeout and a total are specified, or a read
-        timeout and a total are specified, the shorter timeout will be applied.
-
-        Defaults to None.
-
-    :type total: integer, float, or None
-
     .. note::
 
         Many factors can affect the total amount of time for urllib3 to return
-        an HTTP response. Specifically, Python's DNS resolver does not obey the
-        timeout specified on the socket. Other factors that can affect total
-        request time include high CPU load, high swap, the program running at a
-        low priority level, or other behaviors. The observed running time for
-        urllib3 to return a response may be greater than the value passed to
-        `total`.
+        an HTTP response.
+
+        For example, Python's DNS resolver does not obey the timeout specified
+        on the socket. Other factors that can affect total request time include
+        high CPU load, high swap, the program running at a low priority level,
+        or other behaviors.
 
         In addition, the read and total timeouts only measure the time between
         read operations on the socket connecting the client and the server,
@@ -73,8 +80,8 @@ class Timeout(object):
         response. For most requests, the timeout is raised because the server
         has not sent the first byte in the specified time. This is not always
         the case; if a server streams one byte every fifteen seconds, a timeout
-        of 20 seconds will not ever trigger, even though the request will
-        take several minutes to complete.
+        of 20 seconds will not trigger, even though the request will take
+        several minutes to complete.
 
         If your goal is to cut off any request after a set amount of wall clock
         time, consider having a second "watcher" thread to cut off a slow
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
index 122108b..487d456 100644
--- a/urllib3/util/url.py
+++ b/urllib3/util/url.py
@@ -2,6 +2,7 @@ from collections import namedtuple
 
 from ..exceptions import LocationParseError
 
+
 url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
 
 
@@ -47,7 +48,7 @@ def split_first(s, delims):
 
     If not found, then the first part is the full input string.
 
-    Example: ::
+    Example::
 
         >>> split_first('foo/bar?baz', '?/=')
         ('foo', 'bar?baz', '/')
@@ -80,7 +81,7 @@ def parse_url(url):
 
     Partly backwards-compatible with :mod:`urlparse`.
 
-    Example: ::
+    Example::
 
         >>> parse_url('http://google.com/mail/')
         Url(scheme='http', host='google.com', port=None, path='/', ...)
@@ -95,6 +96,10 @@ def parse_url(url):
     # Additionally, this implementations does silly things to be optimal
     # on CPython.
 
+    if not url:
+        # Empty
+        return Url()
+
     scheme = None
     auth = None
     host = None
author	SVN-Git Migration <python-modules-team@lists.alioth.debian.org>	2015-10-08 13:19:39 -0700
committer	SVN-Git Migration <python-modules-team@lists.alioth.debian.org>	2015-10-08 13:19:39 -0700
commit	0f393d00b51bc54c5075447e4a8b21f0bed6acd8 (patch)
tree	401c9f6c345c8ec7818e2d3341086a1b889b3bc4 /urllib3/util
parent	73be7d6cc85a90ab4f67ffc27dc7eae672f7741f (diff)
download	python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar.gz