diff options
author | SVN-Git Migration <python-modules-team@lists.alioth.debian.org> | 2015-10-08 13:19:39 -0700 |
---|---|---|
committer | SVN-Git Migration <python-modules-team@lists.alioth.debian.org> | 2015-10-08 13:19:39 -0700 |
commit | 0f393d00b51bc54c5075447e4a8b21f0bed6acd8 (patch) | |
tree | 401c9f6c345c8ec7818e2d3341086a1b889b3bc4 /urllib3/util | |
parent | 73be7d6cc85a90ab4f67ffc27dc7eae672f7741f (diff) | |
download | python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar python-urllib3-0f393d00b51bc54c5075447e4a8b21f0bed6acd8.tar.gz |
Imported Upstream version 1.9
Diffstat (limited to 'urllib3/util')
-rw-r--r-- | urllib3/util/__init__.py | 9 | ||||
-rw-r--r-- | urllib3/util/connection.py | 56 | ||||
-rw-r--r-- | urllib3/util/request.py | 13 | ||||
-rw-r--r-- | urllib3/util/retry.py | 279 | ||||
-rw-r--r-- | urllib3/util/ssl_.py | 5 | ||||
-rw-r--r-- | urllib3/util/timeout.py | 67 | ||||
-rw-r--r-- | urllib3/util/url.py | 9 |
7 files changed, 388 insertions, 50 deletions
diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py index a40185e..8becc81 100644 --- a/urllib3/util/__init__.py +++ b/urllib3/util/__init__.py @@ -1,9 +1,4 @@ -# urllib3/util/__init__.py -# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - +# For backwards compatibility, provide imports that used to be here. from .connection import is_connection_dropped from .request import make_headers from .response import is_fp_closed @@ -19,6 +14,8 @@ from .timeout import ( current_time, Timeout, ) + +from .retry import Retry from .url import ( get_host, parse_url, diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index c67ef04..062ee9d 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -1,4 +1,4 @@ -from socket import error as SocketError +import socket try: from select import poll, POLLIN except ImportError: # `poll` doesn't exist on OSX and other platforms @@ -31,7 +31,7 @@ def is_connection_dropped(conn): # Platform-specific try: return select([sock], [], [], 0.0)[0] - except SocketError: + except socket.error: return True # This version is better on platforms that support it. @@ -41,3 +41,55 @@ def is_connection_dropped(conn): # Platform-specific if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) diff --git a/urllib3/util/request.py b/urllib3/util/request.py index bfd7a98..bc64f6b 100644 --- a/urllib3/util/request.py +++ b/urllib3/util/request.py @@ -1,7 +1,6 @@ from base64 import b64encode -from ..packages import six - +from ..packages.six import b ACCEPT_ENCODING = 'gzip,deflate' @@ -29,13 +28,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, auth header. :param proxy_basic_auth: - Colon-separated username:password string for - 'proxy-authorization: basic ...' auth header. + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. :param disable_cache: If ``True``, adds 'cache-control: no-cache' header. - Example: :: + Example:: >>> make_headers(keep_alive=True, user_agent="Batman/1.0") {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} @@ -60,11 +59,11 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, if basic_auth: headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') + b64encode(b(basic_auth)).decode('utf-8') if proxy_basic_auth: headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') + b64encode(b(proxy_basic_auth)).decode('utf-8') if disable_cache: headers['cache-control'] = 'no-cache' diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py new file mode 100644 index 0000000..9013197 --- /dev/null +++ b/urllib3/util/retry.py @@ -0,0 +1,279 @@ +import time +import logging + +from ..exceptions import ( + ProtocolError, + ConnectTimeoutError, + ReadTimeoutError, + MaxRetryError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we can't + assume that the server did not process any of it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/response retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? + """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + + else: + # FIXME: Nothing changed, scenario doesn't make sense. + _observed_errors += 1 + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index dee4b87..9cfe2d2 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -34,10 +34,9 @@ def assert_fingerprint(cert, fingerprint): } fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: + if odd or digest_length not in hashfunc_map: raise SSLError('Fingerprint is of invalid length.') # We need encode() here for py32; works on py2 and p33. diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py index aaadc12..ea7027f 100644 --- a/urllib3/util/timeout.py +++ b/urllib3/util/timeout.py @@ -1,32 +1,49 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user from socket import _GLOBAL_DEFAULT_TIMEOUT import time from ..exceptions import TimeoutStateError +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() def current_time(): """ - Retrieve the current time, this function is mocked out in unit testing. + Retrieve the current time. This function is mocked out in unit testing. """ return time.time() -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout +class Timeout(object): + """ Timeout configuration. + Timeouts can be defined as a default for a pool:: -class Timeout(object): - """ - Utility object for storing timeout values. + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) - Example usage: + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. - .. code-block:: python + Defaults to None. - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc + :type total: integer, float, or None :param connect: The maximum amount of time to wait for a connection attempt to a server @@ -47,25 +64,15 @@ class Timeout(object): :type read: integer, float, or None - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - .. note:: Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. In addition, the read and total timeouts only measure the time between read operations on the socket connecting the client and the server, @@ -73,8 +80,8 @@ class Timeout(object): response. For most requests, the timeout is raised because the server has not sent the first byte in the specified time. This is not always the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. + of 20 seconds will not trigger, even though the request will take + several minutes to complete. If your goal is to cut off any request after a set amount of wall clock time, consider having a second "watcher" thread to cut off a slow diff --git a/urllib3/util/url.py b/urllib3/util/url.py index 122108b..487d456 100644 --- a/urllib3/util/url.py +++ b/urllib3/util/url.py @@ -2,6 +2,7 @@ from collections import namedtuple from ..exceptions import LocationParseError + url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] @@ -47,7 +48,7 @@ def split_first(s, delims): If not found, then the first part is the full input string. - Example: :: + Example:: >>> split_first('foo/bar?baz', '?/=') ('foo', 'bar?baz', '/') @@ -80,7 +81,7 @@ def parse_url(url): Partly backwards-compatible with :mod:`urlparse`. - Example: :: + Example:: >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) @@ -95,6 +96,10 @@ def parse_url(url): # Additionally, this implementations does silly things to be optimal # on CPython. + if not url: + # Empty + return Url() + scheme = None auth = None host = None |