diff options
Diffstat (limited to 'urllib3/util')
-rw-r--r-- | urllib3/util/__init__.py | 24 | ||||
-rw-r--r-- | urllib3/util/connection.py | 98 | ||||
-rw-r--r-- | urllib3/util/request.py | 71 | ||||
-rw-r--r-- | urllib3/util/response.py | 22 | ||||
-rw-r--r-- | urllib3/util/retry.py | 285 | ||||
-rw-r--r-- | urllib3/util/ssl_.py | 280 | ||||
-rw-r--r-- | urllib3/util/timeout.py | 240 | ||||
-rw-r--r-- | urllib3/util/url.py | 214 |
8 files changed, 1234 insertions, 0 deletions
diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py new file mode 100644 index 0000000..8becc81 --- /dev/null +++ b/urllib3/util/__init__.py @@ -0,0 +1,24 @@ +# For backwards compatibility, provide imports that used to be here. +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) + +from .retry import Retry +from .url import ( + get_host, + parse_url, + split_first, + Url, +) diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py new file mode 100644 index 0000000..859aec6 --- /dev/null +++ b/urllib3/util/connection.py @@ -0,0 +1,98 @@ +import socket +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return True + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except socket.error: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + +# This function is copied from socket.py in the Python 2.7 standard +# library test suite. Added to its signature is only `socket_options`. +def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, socket_options=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket.socket(af, socktype, proto) + + # If provided, set socket level options before connecting. + # This is the only addition urllib3 makes to this function. + _set_socket_options(sock, socket_options) + + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except socket.error as _: + err = _ + if sock is not None: + sock.close() + sock = None + + if err is not None: + raise err + else: + raise socket.error("getaddrinfo returns an empty list") + + +def _set_socket_options(sock, options): + if options is None: + return + + for opt in options: + sock.setsockopt(*opt) diff --git a/urllib3/util/request.py b/urllib3/util/request.py new file mode 100644 index 0000000..bc64f6b --- /dev/null +++ b/urllib3/util/request.py @@ -0,0 +1,71 @@ +from base64 import b64encode + +from ..packages.six import b + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None, disable_cache=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + :param disable_cache: + If ``True``, adds 'cache-control: no-cache' header. + + Example:: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(b(proxy_basic_auth)).decode('utf-8') + + if disable_cache: + headers['cache-control'] = 'no-cache' + + return headers diff --git a/urllib3/util/response.py b/urllib3/util/response.py new file mode 100644 index 0000000..45fff55 --- /dev/null +++ b/urllib3/util/response.py @@ -0,0 +1,22 @@ +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + + try: + # Check via the official file-like-object way. + return obj.closed + except AttributeError: + pass + + try: + # Check if the object is a container for another file-like object that + # gets released on exhaustion (e.g. HTTPResponse). + return obj.fp is None + except AttributeError: + pass + + raise ValueError("Unable to determine whether fp is closed.") diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py new file mode 100644 index 0000000..7e0959d --- /dev/null +++ b/urllib3/util/retry.py @@ -0,0 +1,285 @@ +import time +import logging + +from ..exceptions import ( + ConnectTimeoutError, + MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, +) +from ..packages import six + + +log = logging.getLogger(__name__) + + +class Retry(object): + """ Retry configuration. + + Each retry attempt will create a new Retry object with updated values, so + they can be safely reused. + + Retries can be defined as a default for a pool:: + + retries = Retry(connect=5, read=2, redirect=5) + http = PoolManager(retries=retries) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', retries=Retry(10)) + + Retries can be disabled by passing ``False``:: + + response = http.request('GET', 'http://example.com/', retries=False) + + Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + retries are disabled, in which case the causing exception will be raised. + + :param int total: + Total number of retries to allow. Takes precedence over other counts. + + Set to ``None`` to remove this constraint and fall back on other + counts. It's a good idea to set this to some sensibly-high value to + account for unexpected edge cases and avoid infinite retry loops. + + Set to ``0`` to fail on the first retry. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param int connect: + How many connection-related errors to retry on. + + These are errors raised before the request is sent to the remote server, + which we assume has not triggered the server to process the request. + + Set to ``0`` to fail on the first retry of this type. + + :param int read: + How many times to retry on read errors. + + These errors are raised after the request was sent to the server, so the + request may have side-effects. + + Set to ``0`` to fail on the first retry of this type. + + :param int redirect: + How many redirects to perform. Limit this to avoid infinite redirect + loops. + + A redirect is a HTTP response with a status code 301, 302, 303, 307 or + 308. + + Set to ``0`` to fail on the first retry of this type. + + Set to ``False`` to disable and imply ``raise_on_redirect=False``. + + :param iterable method_whitelist: + Set of uppercased HTTP method verbs that we should retry on. + + By default, we only retry on methods which are considered to be + indempotent (multiple requests with the same parameters end with the + same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. + + :param iterable status_forcelist: + A set of HTTP status codes that we should force a retry on. + + By default, this is disabled with ``None``. + + :param float backoff_factor: + A backoff factor to apply between attempts. urllib3 will sleep for:: + + {backoff factor} * (2 ^ ({number of total retries} - 1)) + + seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep + for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer + than :attr:`Retry.MAX_BACKOFF`. + + By default, backoff is disabled (set to 0). + + :param bool raise_on_redirect: Whether, if the number of redirects is + exhausted, to raise a MaxRetryError, or to return a response with a + response code in the 3xx range. + """ + + DEFAULT_METHOD_WHITELIST = frozenset([ + 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) + + #: Maximum backoff time. + BACKOFF_MAX = 120 + + def __init__(self, total=10, connect=None, read=None, redirect=None, + method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, + backoff_factor=0, raise_on_redirect=True, _observed_errors=0): + + self.total = total + self.connect = connect + self.read = read + + if redirect is False or total is False: + redirect = 0 + raise_on_redirect = False + + self.redirect = redirect + self.status_forcelist = status_forcelist or set() + self.method_whitelist = method_whitelist + self.backoff_factor = backoff_factor + self.raise_on_redirect = raise_on_redirect + self._observed_errors = _observed_errors # TODO: use .history instead? + + def new(self, **kw): + params = dict( + total=self.total, + connect=self.connect, read=self.read, redirect=self.redirect, + method_whitelist=self.method_whitelist, + status_forcelist=self.status_forcelist, + backoff_factor=self.backoff_factor, + raise_on_redirect=self.raise_on_redirect, + _observed_errors=self._observed_errors, + ) + params.update(kw) + return type(self)(**params) + + @classmethod + def from_int(cls, retries, redirect=True, default=None): + """ Backwards-compatibility for the old retries format.""" + if retries is None: + retries = default if default is not None else cls.DEFAULT + + if isinstance(retries, Retry): + return retries + + redirect = bool(redirect) and None + new_retries = cls(retries, redirect=redirect) + log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) + return new_retries + + def get_backoff_time(self): + """ Formula for computing the current backoff + + :rtype: float + """ + if self._observed_errors <= 1: + return 0 + + backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) + return min(self.BACKOFF_MAX, backoff_value) + + def sleep(self): + """ Sleep between retry attempts using an exponential backoff. + + By default, the backoff factor is 0 and this method will return + immediately. + """ + backoff = self.get_backoff_time() + if backoff <= 0: + return + time.sleep(backoff) + + def _is_connection_error(self, err): + """ Errors when we're fairly sure that the server did not receive the + request, so it should be safe to retry. + """ + return isinstance(err, ConnectTimeoutError) + + def _is_read_error(self, err): + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. + """ + return isinstance(err, (ReadTimeoutError, ProtocolError)) + + def is_forced_retry(self, method, status_code): + """ Is this method/status code retryable? (Based on method/codes whitelists) + """ + if self.method_whitelist and method.upper() not in self.method_whitelist: + return False + + return self.status_forcelist and status_code in self.status_forcelist + + def is_exhausted(self): + """ Are we out of retries? """ + retry_counts = (self.total, self.connect, self.read, self.redirect) + retry_counts = list(filter(None, retry_counts)) + if not retry_counts: + return False + + return min(retry_counts) < 0 + + def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): + """ Return a new Retry object with incremented retry counters. + + :param response: A response object, or None, if the server did not + return a response. + :type response: :class:`~urllib3.response.HTTPResponse` + :param Exception error: An error encountered during the request, or + None if the response was received successfully. + + :return: A new ``Retry`` object. + """ + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + _observed_errors = self._observed_errors + connect = self.connect + read = self.read + redirect = self.redirect + cause = 'unknown' + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + _observed_errors += 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False: + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + _observed_errors += 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + cause = 'too many redirects' + + else: + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist + _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) + + new_retry = self.new( + total=total, + connect=connect, read=read, redirect=redirect, + _observed_errors=_observed_errors) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error or ResponseError(cause)) + + log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) + + return new_retry + + + def __repr__(self): + return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' + 'read={self.read}, redirect={self.redirect})').format( + cls=type(self), self=self) + + +# For backwards compatibility (equivalent to pre-v1.9): +Retry.DEFAULT = Retry(3) diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py new file mode 100644 index 0000000..b846d42 --- /dev/null +++ b/urllib3/util/ssl_.py @@ -0,0 +1,280 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1, sha256 + +from ..exceptions import SSLError, InsecurePlatformWarning + + +SSLContext = None +HAS_SNI = False +create_default_context = None + +import errno +import warnings + +try: # Test for SSL features + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' + '!eNULL:!MD5' +) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, location): + self.ca_certs = location + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/security.html' + '#insecureplatformwarning.', + InsecurePlatformWarning + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1, + 32: sha256, + } + + fingerprint = fingerprint.replace(':', '').lower() + digest_length, odd = divmod(len(fingerprint), 2) + + if odd or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def create_urllib3_context(ssl_version=None, cert_reqs=None, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None): + """ + All arguments except for server_hostname and ssl_context have the same + meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + """ + context = ssl_context + if context is None: + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: + raise SSLError(e) + raise + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py new file mode 100644 index 0000000..ea7027f --- /dev/null +++ b/urllib3/util/timeout.py @@ -0,0 +1,240 @@ +# The default socket timeout, used by httplib to indicate that no timeout was +# specified by the user +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + +# A sentinel value to indicate that no timeout was specified by the user in +# urllib3 +_Default = object() + +def current_time(): + """ + Retrieve the current time. This function is mocked out in unit testing. + """ + return time.time() + + +class Timeout(object): + """ Timeout configuration. + + Timeouts can be defined as a default for a pool:: + + timeout = Timeout(connect=2.0, read=7.0) + http = PoolManager(timeout=timeout) + response = http.request('GET', 'http://example.com/') + + Or per-request (which overrides the default for the pool):: + + response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) + + Timeouts can be disabled by setting all the parameters to ``None``:: + + no_timeout = Timeout(connect=None, read=None) + response = http.request('GET', 'http://example.com/, timeout=no_timeout) + + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. + None will set an infinite timeout. + + :type read: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. + + For example, Python's DNS resolver does not obey the timeout specified + on the socket. Other factors that can affect total request time include + high CPU load, high swap, the program running at a low priority level, + or other behaviors. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not trigger, even though the request will take + several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid. + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is + used to specify in error messages. + :return: The validated and casted version of the given value. + :raises ValueError: If the type is not an integer or a float, or if it + is a numeric value less than zero. + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value + passed to this function. + + :param timeout: The legacy timeout value. + :type timeout: integer, float, sentinel default object, or None + :return: Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: Elapsed time. + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: Connect timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: Value to use for the read timeout. + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # In case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/urllib3/util/url.py b/urllib3/util/url.py new file mode 100644 index 0000000..e58050c --- /dev/null +++ b/urllib3/util/url.py @@ -0,0 +1,214 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] + + +class Url(namedtuple('Url', url_attrs)): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, + query=None, fragment=None): + if path and not path.startswith('/'): + path = '/' + path + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, + query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example:: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example:: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + if not url: + # Empty + return Url() + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError(url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port |