aboutsummaryrefslogtreecommitdiff
path: root/urllib3/util
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3/util')
-rw-r--r--urllib3/util/__init__.py24
-rw-r--r--urllib3/util/connection.py98
-rw-r--r--urllib3/util/request.py71
-rw-r--r--urllib3/util/response.py22
-rw-r--r--urllib3/util/retry.py285
-rw-r--r--urllib3/util/ssl_.py280
-rw-r--r--urllib3/util/timeout.py240
-rw-r--r--urllib3/util/url.py214
8 files changed, 1234 insertions, 0 deletions
diff --git a/urllib3/util/__init__.py b/urllib3/util/__init__.py
new file mode 100644
index 0000000..8becc81
--- /dev/null
+++ b/urllib3/util/__init__.py
@@ -0,0 +1,24 @@
+# For backwards compatibility, provide imports that used to be here.
+from .connection import is_connection_dropped
+from .request import make_headers
+from .response import is_fp_closed
+from .ssl_ import (
+ SSLContext,
+ HAS_SNI,
+ assert_fingerprint,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+)
+from .timeout import (
+ current_time,
+ Timeout,
+)
+
+from .retry import Retry
+from .url import (
+ get_host,
+ parse_url,
+ split_first,
+ Url,
+)
diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py
new file mode 100644
index 0000000..859aec6
--- /dev/null
+++ b/urllib3/util/connection.py
@@ -0,0 +1,98 @@
+import socket
+try:
+ from select import poll, POLLIN
+except ImportError: # `poll` doesn't exist on OSX and other platforms
+ poll = False
+ try:
+ from select import select
+ except ImportError: # `select` doesn't exist on AppEngine.
+ select = False
+
+
+def is_connection_dropped(conn): # Platform-specific
+ """
+ Returns True if the connection is dropped and should be closed.
+
+ :param conn:
+ :class:`httplib.HTTPConnection` object.
+
+ Note: For platforms like AppEngine, this will always return ``False`` to
+ let the platform handle connection recycling transparently for us.
+ """
+ sock = getattr(conn, 'sock', False)
+ if sock is False: # Platform-specific: AppEngine
+ return False
+ if sock is None: # Connection already closed (such as by httplib).
+ return True
+
+ if not poll:
+ if not select: # Platform-specific: AppEngine
+ return False
+
+ try:
+ return select([sock], [], [], 0.0)[0]
+ except socket.error:
+ return True
+
+ # This version is better on platforms that support it.
+ p = poll()
+ p.register(sock, POLLIN)
+ for (fno, ev) in p.poll(0.0):
+ if fno == sock.fileno():
+ # Either data is buffered (bad), or the connection is dropped.
+ return True
+
+
+# This function is copied from socket.py in the Python 2.7 standard
+# library test suite. Added to its signature is only `socket_options`.
+def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, socket_options=None):
+ """Connect to *address* and return the socket object.
+
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
+ port)``) and return the socket object. Passing the optional
+ *timeout* parameter will set the timeout on the socket instance
+ before attempting to connect. If no *timeout* is supplied, the
+ global default timeout setting returned by :func:`getdefaulttimeout`
+ is used. If *source_address* is set it must be a tuple of (host, port)
+ for the socket to bind as a source address before making the connection.
+ An host of '' or port 0 tells the OS to use the default.
+ """
+
+ host, port = address
+ err = None
+ for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+
+ # If provided, set socket level options before connecting.
+ # This is the only addition urllib3 makes to this function.
+ _set_socket_options(sock, socket_options)
+
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ sock = None
+
+ if err is not None:
+ raise err
+ else:
+ raise socket.error("getaddrinfo returns an empty list")
+
+
+def _set_socket_options(sock, options):
+ if options is None:
+ return
+
+ for opt in options:
+ sock.setsockopt(*opt)
diff --git a/urllib3/util/request.py b/urllib3/util/request.py
new file mode 100644
index 0000000..bc64f6b
--- /dev/null
+++ b/urllib3/util/request.py
@@ -0,0 +1,71 @@
+from base64 import b64encode
+
+from ..packages.six import b
+
+ACCEPT_ENCODING = 'gzip,deflate'
+
+
+def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
+ basic_auth=None, proxy_basic_auth=None, disable_cache=None):
+ """
+ Shortcuts for generating request headers.
+
+ :param keep_alive:
+ If ``True``, adds 'connection: keep-alive' header.
+
+ :param accept_encoding:
+ Can be a boolean, list, or string.
+ ``True`` translates to 'gzip,deflate'.
+ List will get joined by comma.
+ String will be used as provided.
+
+ :param user_agent:
+ String representing the user-agent you want, such as
+ "python-urllib3/0.6"
+
+ :param basic_auth:
+ Colon-separated username:password string for 'authorization: basic ...'
+ auth header.
+
+ :param proxy_basic_auth:
+ Colon-separated username:password string for 'proxy-authorization: basic ...'
+ auth header.
+
+ :param disable_cache:
+ If ``True``, adds 'cache-control: no-cache' header.
+
+ Example::
+
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+ >>> make_headers(accept_encoding=True)
+ {'accept-encoding': 'gzip,deflate'}
+ """
+ headers = {}
+ if accept_encoding:
+ if isinstance(accept_encoding, str):
+ pass
+ elif isinstance(accept_encoding, list):
+ accept_encoding = ','.join(accept_encoding)
+ else:
+ accept_encoding = ACCEPT_ENCODING
+ headers['accept-encoding'] = accept_encoding
+
+ if user_agent:
+ headers['user-agent'] = user_agent
+
+ if keep_alive:
+ headers['connection'] = 'keep-alive'
+
+ if basic_auth:
+ headers['authorization'] = 'Basic ' + \
+ b64encode(b(basic_auth)).decode('utf-8')
+
+ if proxy_basic_auth:
+ headers['proxy-authorization'] = 'Basic ' + \
+ b64encode(b(proxy_basic_auth)).decode('utf-8')
+
+ if disable_cache:
+ headers['cache-control'] = 'no-cache'
+
+ return headers
diff --git a/urllib3/util/response.py b/urllib3/util/response.py
new file mode 100644
index 0000000..45fff55
--- /dev/null
+++ b/urllib3/util/response.py
@@ -0,0 +1,22 @@
+def is_fp_closed(obj):
+ """
+ Checks whether a given file-like object is closed.
+
+ :param obj:
+ The file-like object to check.
+ """
+
+ try:
+ # Check via the official file-like-object way.
+ return obj.closed
+ except AttributeError:
+ pass
+
+ try:
+ # Check if the object is a container for another file-like object that
+ # gets released on exhaustion (e.g. HTTPResponse).
+ return obj.fp is None
+ except AttributeError:
+ pass
+
+ raise ValueError("Unable to determine whether fp is closed.")
diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py
new file mode 100644
index 0000000..7e0959d
--- /dev/null
+++ b/urllib3/util/retry.py
@@ -0,0 +1,285 @@
+import time
+import logging
+
+from ..exceptions import (
+ ConnectTimeoutError,
+ MaxRetryError,
+ ProtocolError,
+ ReadTimeoutError,
+ ResponseError,
+)
+from ..packages import six
+
+
+log = logging.getLogger(__name__)
+
+
+class Retry(object):
+ """ Retry configuration.
+
+ Each retry attempt will create a new Retry object with updated values, so
+ they can be safely reused.
+
+ Retries can be defined as a default for a pool::
+
+ retries = Retry(connect=5, read=2, redirect=5)
+ http = PoolManager(retries=retries)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', retries=Retry(10))
+
+ Retries can be disabled by passing ``False``::
+
+ response = http.request('GET', 'http://example.com/', retries=False)
+
+ Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
+ retries are disabled, in which case the causing exception will be raised.
+
+ :param int total:
+ Total number of retries to allow. Takes precedence over other counts.
+
+ Set to ``None`` to remove this constraint and fall back on other
+ counts. It's a good idea to set this to some sensibly-high value to
+ account for unexpected edge cases and avoid infinite retry loops.
+
+ Set to ``0`` to fail on the first retry.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param int connect:
+ How many connection-related errors to retry on.
+
+ These are errors raised before the request is sent to the remote server,
+ which we assume has not triggered the server to process the request.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int read:
+ How many times to retry on read errors.
+
+ These errors are raised after the request was sent to the server, so the
+ request may have side-effects.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int redirect:
+ How many redirects to perform. Limit this to avoid infinite redirect
+ loops.
+
+ A redirect is a HTTP response with a status code 301, 302, 303, 307 or
+ 308.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param iterable method_whitelist:
+ Set of uppercased HTTP method verbs that we should retry on.
+
+ By default, we only retry on methods which are considered to be
+ indempotent (multiple requests with the same parameters end with the
+ same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
+
+ :param iterable status_forcelist:
+ A set of HTTP status codes that we should force a retry on.
+
+ By default, this is disabled with ``None``.
+
+ :param float backoff_factor:
+ A backoff factor to apply between attempts. urllib3 will sleep for::
+
+ {backoff factor} * (2 ^ ({number of total retries} - 1))
+
+ seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
+ for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
+ than :attr:`Retry.MAX_BACKOFF`.
+
+ By default, backoff is disabled (set to 0).
+
+ :param bool raise_on_redirect: Whether, if the number of redirects is
+ exhausted, to raise a MaxRetryError, or to return a response with a
+ response code in the 3xx range.
+ """
+
+ DEFAULT_METHOD_WHITELIST = frozenset([
+ 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
+
+ #: Maximum backoff time.
+ BACKOFF_MAX = 120
+
+ def __init__(self, total=10, connect=None, read=None, redirect=None,
+ method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
+ backoff_factor=0, raise_on_redirect=True, _observed_errors=0):
+
+ self.total = total
+ self.connect = connect
+ self.read = read
+
+ if redirect is False or total is False:
+ redirect = 0
+ raise_on_redirect = False
+
+ self.redirect = redirect
+ self.status_forcelist = status_forcelist or set()
+ self.method_whitelist = method_whitelist
+ self.backoff_factor = backoff_factor
+ self.raise_on_redirect = raise_on_redirect
+ self._observed_errors = _observed_errors # TODO: use .history instead?
+
+ def new(self, **kw):
+ params = dict(
+ total=self.total,
+ connect=self.connect, read=self.read, redirect=self.redirect,
+ method_whitelist=self.method_whitelist,
+ status_forcelist=self.status_forcelist,
+ backoff_factor=self.backoff_factor,
+ raise_on_redirect=self.raise_on_redirect,
+ _observed_errors=self._observed_errors,
+ )
+ params.update(kw)
+ return type(self)(**params)
+
+ @classmethod
+ def from_int(cls, retries, redirect=True, default=None):
+ """ Backwards-compatibility for the old retries format."""
+ if retries is None:
+ retries = default if default is not None else cls.DEFAULT
+
+ if isinstance(retries, Retry):
+ return retries
+
+ redirect = bool(redirect) and None
+ new_retries = cls(retries, redirect=redirect)
+ log.debug("Converted retries value: %r -> %r" % (retries, new_retries))
+ return new_retries
+
+ def get_backoff_time(self):
+ """ Formula for computing the current backoff
+
+ :rtype: float
+ """
+ if self._observed_errors <= 1:
+ return 0
+
+ backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1))
+ return min(self.BACKOFF_MAX, backoff_value)
+
+ def sleep(self):
+ """ Sleep between retry attempts using an exponential backoff.
+
+ By default, the backoff factor is 0 and this method will return
+ immediately.
+ """
+ backoff = self.get_backoff_time()
+ if backoff <= 0:
+ return
+ time.sleep(backoff)
+
+ def _is_connection_error(self, err):
+ """ Errors when we're fairly sure that the server did not receive the
+ request, so it should be safe to retry.
+ """
+ return isinstance(err, ConnectTimeoutError)
+
+ def _is_read_error(self, err):
+ """ Errors that occur after the request has been started, so we should
+ assume that the server began processing it.
+ """
+ return isinstance(err, (ReadTimeoutError, ProtocolError))
+
+ def is_forced_retry(self, method, status_code):
+ """ Is this method/status code retryable? (Based on method/codes whitelists)
+ """
+ if self.method_whitelist and method.upper() not in self.method_whitelist:
+ return False
+
+ return self.status_forcelist and status_code in self.status_forcelist
+
+ def is_exhausted(self):
+ """ Are we out of retries? """
+ retry_counts = (self.total, self.connect, self.read, self.redirect)
+ retry_counts = list(filter(None, retry_counts))
+ if not retry_counts:
+ return False
+
+ return min(retry_counts) < 0
+
+ def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None):
+ """ Return a new Retry object with incremented retry counters.
+
+ :param response: A response object, or None, if the server did not
+ return a response.
+ :type response: :class:`~urllib3.response.HTTPResponse`
+ :param Exception error: An error encountered during the request, or
+ None if the response was received successfully.
+
+ :return: A new ``Retry`` object.
+ """
+ if self.total is False and error:
+ # Disabled, indicate to re-raise the error.
+ raise six.reraise(type(error), error, _stacktrace)
+
+ total = self.total
+ if total is not None:
+ total -= 1
+
+ _observed_errors = self._observed_errors
+ connect = self.connect
+ read = self.read
+ redirect = self.redirect
+ cause = 'unknown'
+
+ if error and self._is_connection_error(error):
+ # Connect retry?
+ if connect is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif connect is not None:
+ connect -= 1
+ _observed_errors += 1
+
+ elif error and self._is_read_error(error):
+ # Read retry?
+ if read is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif read is not None:
+ read -= 1
+ _observed_errors += 1
+
+ elif response and response.get_redirect_location():
+ # Redirect retry?
+ if redirect is not None:
+ redirect -= 1
+ cause = 'too many redirects'
+
+ else:
+ # Incrementing because of a server error like a 500 in
+ # status_forcelist and a the given method is in the whitelist
+ _observed_errors += 1
+ cause = ResponseError.GENERIC_ERROR
+ if response and response.status:
+ cause = ResponseError.SPECIFIC_ERROR.format(
+ status_code=response.status)
+
+ new_retry = self.new(
+ total=total,
+ connect=connect, read=read, redirect=redirect,
+ _observed_errors=_observed_errors)
+
+ if new_retry.is_exhausted():
+ raise MaxRetryError(_pool, url, error or ResponseError(cause))
+
+ log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry))
+
+ return new_retry
+
+
+ def __repr__(self):
+ return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
+ 'read={self.read}, redirect={self.redirect})').format(
+ cls=type(self), self=self)
+
+
+# For backwards compatibility (equivalent to pre-v1.9):
+Retry.DEFAULT = Retry(3)
diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py
new file mode 100644
index 0000000..b846d42
--- /dev/null
+++ b/urllib3/util/ssl_.py
@@ -0,0 +1,280 @@
+from binascii import hexlify, unhexlify
+from hashlib import md5, sha1, sha256
+
+from ..exceptions import SSLError, InsecurePlatformWarning
+
+
+SSLContext = None
+HAS_SNI = False
+create_default_context = None
+
+import errno
+import warnings
+
+try: # Test for SSL features
+ import ssl
+ from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
+ from ssl import HAS_SNI # Has SNI?
+except ImportError:
+ pass
+
+
+try:
+ from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
+except ImportError:
+ OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
+ OP_NO_COMPRESSION = 0x20000
+
+# A secure default.
+# Sources for more information on TLS ciphers:
+#
+# - https://wiki.mozilla.org/Security/Server_Side_TLS
+# - https://www.ssllabs.com/projects/best-practices/index.html
+# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
+#
+# The general intent is:
+# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
+# - prefer ECDHE over DHE for better performance,
+# - prefer any AES-GCM over any AES-CBC for better performance and security,
+# - use 3DES as fallback which is secure but slow,
+# - disable NULL authentication, MD5 MACs and DSS for security reasons.
+DEFAULT_CIPHERS = (
+ 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
+ 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:'
+ '!eNULL:!MD5'
+)
+
+try:
+ from ssl import SSLContext # Modern SSL?
+except ImportError:
+ import sys
+
+ class SSLContext(object): # Platform-specific: Python 2 & 3.1
+ supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or
+ (3, 2) <= sys.version_info)
+
+ def __init__(self, protocol_version):
+ self.protocol = protocol_version
+ # Use default values from a real SSLContext
+ self.check_hostname = False
+ self.verify_mode = ssl.CERT_NONE
+ self.ca_certs = None
+ self.options = 0
+ self.certfile = None
+ self.keyfile = None
+ self.ciphers = None
+
+ def load_cert_chain(self, certfile, keyfile):
+ self.certfile = certfile
+ self.keyfile = keyfile
+
+ def load_verify_locations(self, location):
+ self.ca_certs = location
+
+ def set_ciphers(self, cipher_suite):
+ if not self.supports_set_ciphers:
+ raise TypeError(
+ 'Your version of Python does not support setting '
+ 'a custom cipher suite. Please upgrade to Python '
+ '2.7, 3.2, or later if you need this functionality.'
+ )
+ self.ciphers = cipher_suite
+
+ def wrap_socket(self, socket, server_hostname=None):
+ warnings.warn(
+ 'A true SSLContext object is not available. This prevents '
+ 'urllib3 from configuring SSL appropriately and may cause '
+ 'certain SSL connections to fail. For more information, see '
+ 'https://urllib3.readthedocs.org/en/latest/security.html'
+ '#insecureplatformwarning.',
+ InsecurePlatformWarning
+ )
+ kwargs = {
+ 'keyfile': self.keyfile,
+ 'certfile': self.certfile,
+ 'ca_certs': self.ca_certs,
+ 'cert_reqs': self.verify_mode,
+ 'ssl_version': self.protocol,
+ }
+ if self.supports_set_ciphers: # Platform-specific: Python 2.7+
+ return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
+ else: # Platform-specific: Python 2.6
+ return wrap_socket(socket, **kwargs)
+
+
+def assert_fingerprint(cert, fingerprint):
+ """
+ Checks if given fingerprint matches the supplied certificate.
+
+ :param cert:
+ Certificate as bytes object.
+ :param fingerprint:
+ Fingerprint as string of hexdigits, can be interspersed by colons.
+ """
+
+ # Maps the length of a digest to a possible hash function producing
+ # this digest.
+ hashfunc_map = {
+ 16: md5,
+ 20: sha1,
+ 32: sha256,
+ }
+
+ fingerprint = fingerprint.replace(':', '').lower()
+ digest_length, odd = divmod(len(fingerprint), 2)
+
+ if odd or digest_length not in hashfunc_map:
+ raise SSLError('Fingerprint is of invalid length.')
+
+ # We need encode() here for py32; works on py2 and p33.
+ fingerprint_bytes = unhexlify(fingerprint.encode())
+
+ hashfunc = hashfunc_map[digest_length]
+
+ cert_digest = hashfunc(cert).digest()
+
+ if not cert_digest == fingerprint_bytes:
+ raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
+ .format(hexlify(fingerprint_bytes),
+ hexlify(cert_digest)))
+
+
+def resolve_cert_reqs(candidate):
+ """
+ Resolves the argument to a numeric constant, which can be passed to
+ the wrap_socket function/method from the ssl module.
+ Defaults to :data:`ssl.CERT_NONE`.
+ If given a string it is assumed to be the name of the constant in the
+ :mod:`ssl` module or its abbrevation.
+ (So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
+ If it's neither `None` nor a string we assume it is already the numeric
+ constant which can directly be passed to wrap_socket.
+ """
+ if candidate is None:
+ return CERT_NONE
+
+ if isinstance(candidate, str):
+ res = getattr(ssl, candidate, None)
+ if res is None:
+ res = getattr(ssl, 'CERT_' + candidate)
+ return res
+
+ return candidate
+
+
+def resolve_ssl_version(candidate):
+ """
+ like resolve_cert_reqs
+ """
+ if candidate is None:
+ return PROTOCOL_SSLv23
+
+ if isinstance(candidate, str):
+ res = getattr(ssl, candidate, None)
+ if res is None:
+ res = getattr(ssl, 'PROTOCOL_' + candidate)
+ return res
+
+ return candidate
+
+
+def create_urllib3_context(ssl_version=None, cert_reqs=None,
+ options=None, ciphers=None):
+ """All arguments have the same meaning as ``ssl_wrap_socket``.
+
+ By default, this function does a lot of the same work that
+ ``ssl.create_default_context`` does on Python 3.4+. It:
+
+ - Disables SSLv2, SSLv3, and compression
+ - Sets a restricted set of server ciphers
+
+ If you wish to enable SSLv3, you can do::
+
+ from urllib3.util import ssl_
+ context = ssl_.create_urllib3_context()
+ context.options &= ~ssl_.OP_NO_SSLv3
+
+ You can do the same to enable compression (substituting ``COMPRESSION``
+ for ``SSLv3`` in the last line above).
+
+ :param ssl_version:
+ The desired protocol version to use. This will default to
+ PROTOCOL_SSLv23 which will negotiate the highest protocol that both
+ the server and your installation of OpenSSL support.
+ :param cert_reqs:
+ Whether to require the certificate verification. This defaults to
+ ``ssl.CERT_REQUIRED``.
+ :param options:
+ Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
+ ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
+ :param ciphers:
+ Which cipher suites to allow the server to select.
+ :returns:
+ Constructed SSLContext object with specified options
+ :rtype: SSLContext
+ """
+ context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23)
+
+ # Setting the default here, as we may have no ssl module on import
+ cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
+
+ if options is None:
+ options = 0
+ # SSLv2 is easily broken and is considered harmful and dangerous
+ options |= OP_NO_SSLv2
+ # SSLv3 has several problems and is now dangerous
+ options |= OP_NO_SSLv3
+ # Disable compression to prevent CRIME attacks for OpenSSL 1.0+
+ # (issue #309)
+ options |= OP_NO_COMPRESSION
+
+ context.options |= options
+
+ if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6
+ context.set_ciphers(ciphers or DEFAULT_CIPHERS)
+
+ context.verify_mode = cert_reqs
+ if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2
+ # We do our own verification, including fingerprints and alternative
+ # hostnames. So disable it here
+ context.check_hostname = False
+ return context
+
+
+def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
+ ca_certs=None, server_hostname=None,
+ ssl_version=None, ciphers=None, ssl_context=None):
+ """
+ All arguments except for server_hostname and ssl_context have the same
+ meaning as they do when using :func:`ssl.wrap_socket`.
+
+ :param server_hostname:
+ When SNI is supported, the expected hostname of the certificate
+ :param ssl_context:
+ A pre-made :class:`SSLContext` object. If none is provided, one will
+ be created using :func:`create_urllib3_context`.
+ :param ciphers:
+ A string of ciphers we wish the client to support. This is not
+ supported on Python 2.6 as the ssl module does not support it.
+ """
+ context = ssl_context
+ if context is None:
+ context = create_urllib3_context(ssl_version, cert_reqs,
+ ciphers=ciphers)
+
+ if ca_certs:
+ try:
+ context.load_verify_locations(ca_certs)
+ except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2
+ raise SSLError(e)
+ # Py33 raises FileNotFoundError which subclasses OSError
+ # These are not equivalent unless we check the errno attribute
+ except OSError as e: # Platform-specific: Python 3.3 and beyond
+ if e.errno == errno.ENOENT:
+ raise SSLError(e)
+ raise
+ if certfile:
+ context.load_cert_chain(certfile, keyfile)
+ if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
+ return context.wrap_socket(sock, server_hostname=server_hostname)
+ return context.wrap_socket(sock)
diff --git a/urllib3/util/timeout.py b/urllib3/util/timeout.py
new file mode 100644
index 0000000..ea7027f
--- /dev/null
+++ b/urllib3/util/timeout.py
@@ -0,0 +1,240 @@
+# The default socket timeout, used by httplib to indicate that no timeout was
+# specified by the user
+from socket import _GLOBAL_DEFAULT_TIMEOUT
+import time
+
+from ..exceptions import TimeoutStateError
+
+# A sentinel value to indicate that no timeout was specified by the user in
+# urllib3
+_Default = object()
+
+def current_time():
+ """
+ Retrieve the current time. This function is mocked out in unit testing.
+ """
+ return time.time()
+
+
+class Timeout(object):
+ """ Timeout configuration.
+
+ Timeouts can be defined as a default for a pool::
+
+ timeout = Timeout(connect=2.0, read=7.0)
+ http = PoolManager(timeout=timeout)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
+
+ Timeouts can be disabled by setting all the parameters to ``None``::
+
+ no_timeout = Timeout(connect=None, read=None)
+ response = http.request('GET', 'http://example.com/, timeout=no_timeout)
+
+
+ :param total:
+ This combines the connect and read timeouts into one; the read timeout
+ will be set to the time leftover from the connect attempt. In the
+ event that both a connect timeout and a total are specified, or a read
+ timeout and a total are specified, the shorter timeout will be applied.
+
+ Defaults to None.
+
+ :type total: integer, float, or None
+
+ :param connect:
+ The maximum amount of time to wait for a connection attempt to a server
+ to succeed. Omitting the parameter will default the connect timeout to
+ the system default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout for connection attempts.
+
+ :type connect: integer, float, or None
+
+ :param read:
+ The maximum amount of time to wait between consecutive
+ read operations for a response from the server. Omitting
+ the parameter will default the read timeout to the system
+ default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout.
+
+ :type read: integer, float, or None
+
+ .. note::
+
+ Many factors can affect the total amount of time for urllib3 to return
+ an HTTP response.
+
+ For example, Python's DNS resolver does not obey the timeout specified
+ on the socket. Other factors that can affect total request time include
+ high CPU load, high swap, the program running at a low priority level,
+ or other behaviors.
+
+ In addition, the read and total timeouts only measure the time between
+ read operations on the socket connecting the client and the server,
+ not the total amount of time for the request to return a complete
+ response. For most requests, the timeout is raised because the server
+ has not sent the first byte in the specified time. This is not always
+ the case; if a server streams one byte every fifteen seconds, a timeout
+ of 20 seconds will not trigger, even though the request will take
+ several minutes to complete.
+
+ If your goal is to cut off any request after a set amount of wall clock
+ time, consider having a second "watcher" thread to cut off a slow
+ request.
+ """
+
+ #: A sentinel object representing the default timeout value
+ DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
+
+ def __init__(self, total=None, connect=_Default, read=_Default):
+ self._connect = self._validate_timeout(connect, 'connect')
+ self._read = self._validate_timeout(read, 'read')
+ self.total = self._validate_timeout(total, 'total')
+ self._start_connect = None
+
+ def __str__(self):
+ return '%s(connect=%r, read=%r, total=%r)' % (
+ type(self).__name__, self._connect, self._read, self.total)
+
+ @classmethod
+ def _validate_timeout(cls, value, name):
+ """ Check that a timeout attribute is valid.
+
+ :param value: The timeout value to validate
+ :param name: The name of the timeout attribute to validate. This is
+ used to specify in error messages.
+ :return: The validated and casted version of the given value.
+ :raises ValueError: If the type is not an integer or a float, or if it
+ is a numeric value less than zero.
+ """
+ if value is _Default:
+ return cls.DEFAULT_TIMEOUT
+
+ if value is None or value is cls.DEFAULT_TIMEOUT:
+ return value
+
+ try:
+ float(value)
+ except (TypeError, ValueError):
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ try:
+ if value < 0:
+ raise ValueError("Attempted to set %s timeout to %s, but the "
+ "timeout cannot be set to a value less "
+ "than 0." % (name, value))
+ except TypeError: # Python 3
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ return value
+
+ @classmethod
+ def from_float(cls, timeout):
+ """ Create a new Timeout from a legacy timeout value.
+
+ The timeout value used by httplib.py sets the same timeout on the
+ connect(), and recv() socket requests. This creates a :class:`Timeout`
+ object that sets the individual timeouts to the ``timeout`` value
+ passed to this function.
+
+ :param timeout: The legacy timeout value.
+ :type timeout: integer, float, sentinel default object, or None
+ :return: Timeout object
+ :rtype: :class:`Timeout`
+ """
+ return Timeout(read=timeout, connect=timeout)
+
+ def clone(self):
+ """ Create a copy of the timeout object
+
+ Timeout properties are stored per-pool but each request needs a fresh
+ Timeout object to ensure each one has its own start/stop configured.
+
+ :return: a copy of the timeout object
+ :rtype: :class:`Timeout`
+ """
+ # We can't use copy.deepcopy because that will also create a new object
+ # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
+ # detect the user default.
+ return Timeout(connect=self._connect, read=self._read,
+ total=self.total)
+
+ def start_connect(self):
+ """ Start the timeout clock, used during a connect() attempt
+
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to start a timer that has been started already.
+ """
+ if self._start_connect is not None:
+ raise TimeoutStateError("Timeout timer has already been started.")
+ self._start_connect = current_time()
+ return self._start_connect
+
+ def get_connect_duration(self):
+ """ Gets the time elapsed since the call to :meth:`start_connect`.
+
+ :return: Elapsed time.
+ :rtype: float
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to get duration for a timer that hasn't been started.
+ """
+ if self._start_connect is None:
+ raise TimeoutStateError("Can't get connect duration for timer "
+ "that has not started.")
+ return current_time() - self._start_connect
+
+ @property
+ def connect_timeout(self):
+ """ Get the value to use when setting a connection timeout.
+
+ This will be a positive float or integer, the value None
+ (never timeout), or the default system timeout.
+
+ :return: Connect timeout.
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ """
+ if self.total is None:
+ return self._connect
+
+ if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
+ return self.total
+
+ return min(self._connect, self.total)
+
+ @property
+ def read_timeout(self):
+ """ Get the value for the read timeout.
+
+ This assumes some time has elapsed in the connection timeout and
+ computes the read timeout appropriately.
+
+ If self.total is set, the read timeout is dependent on the amount of
+ time taken by the connect timeout. If the connection time has not been
+ established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
+ raised.
+
+ :return: Value to use for the read timeout.
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
+ has not yet been called on this object.
+ """
+ if (self.total is not None and
+ self.total is not self.DEFAULT_TIMEOUT and
+ self._read is not None and
+ self._read is not self.DEFAULT_TIMEOUT):
+ # In case the connect timeout has not yet been established.
+ if self._start_connect is None:
+ return self._read
+ return max(0, min(self.total - self.get_connect_duration(),
+ self._read))
+ elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
+ return max(0, self.total - self.get_connect_duration())
+ else:
+ return self._read
diff --git a/urllib3/util/url.py b/urllib3/util/url.py
new file mode 100644
index 0000000..e58050c
--- /dev/null
+++ b/urllib3/util/url.py
@@ -0,0 +1,214 @@
+from collections import namedtuple
+
+from ..exceptions import LocationParseError
+
+
+url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+
+
+class Url(namedtuple('Url', url_attrs)):
+ """
+ Datastructure for representing an HTTP URL. Used as a return value for
+ :func:`parse_url`.
+ """
+ slots = ()
+
+ def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
+ query=None, fragment=None):
+ if path and not path.startswith('/'):
+ path = '/' + path
+ return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
+ query, fragment)
+
+ @property
+ def hostname(self):
+ """For backwards-compatibility with urlparse. We're nice like that."""
+ return self.host
+
+ @property
+ def request_uri(self):
+ """Absolute path including the query string."""
+ uri = self.path or '/'
+
+ if self.query is not None:
+ uri += '?' + self.query
+
+ return uri
+
+ @property
+ def netloc(self):
+ """Network location including host and port"""
+ if self.port:
+ return '%s:%d' % (self.host, self.port)
+ return self.host
+
+ @property
+ def url(self):
+ """
+ Convert self into a url
+
+ This function should more or less round-trip with :func:`.parse_url`. The
+ returned url may not be exactly the same as the url inputted to
+ :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
+ with a blank port will have : removed).
+
+ Example: ::
+
+ >>> U = parse_url('http://google.com/mail/')
+ >>> U.url
+ 'http://google.com/mail/'
+ >>> Url('http', 'username:password', 'host.com', 80,
+ ... '/path', 'query', 'fragment').url
+ 'http://username:password@host.com:80/path?query#fragment'
+ """
+ scheme, auth, host, port, path, query, fragment = self
+ url = ''
+
+ # We use "is not None" we want things to happen with empty strings (or 0 port)
+ if scheme is not None:
+ url += scheme + '://'
+ if auth is not None:
+ url += auth + '@'
+ if host is not None:
+ url += host
+ if port is not None:
+ url += ':' + str(port)
+ if path is not None:
+ url += path
+ if query is not None:
+ url += '?' + query
+ if fragment is not None:
+ url += '#' + fragment
+
+ return url
+
+ def __str__(self):
+ return self.url
+
+def split_first(s, delims):
+ """
+ Given a string and an iterable of delimiters, split on the first found
+ delimiter. Return two split parts and the matched delimiter.
+
+ If not found, then the first part is the full input string.
+
+ Example::
+
+ >>> split_first('foo/bar?baz', '?/=')
+ ('foo', 'bar?baz', '/')
+ >>> split_first('foo/bar?baz', '123')
+ ('foo/bar?baz', '', None)
+
+ Scales linearly with number of delims. Not ideal for large number of delims.
+ """
+ min_idx = None
+ min_delim = None
+ for d in delims:
+ idx = s.find(d)
+ if idx < 0:
+ continue
+
+ if min_idx is None or idx < min_idx:
+ min_idx = idx
+ min_delim = d
+
+ if min_idx is None or min_idx < 0:
+ return s, '', None
+
+ return s[:min_idx], s[min_idx+1:], min_delim
+
+
+def parse_url(url):
+ """
+ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+ performed to parse incomplete urls. Fields not provided will be None.
+
+ Partly backwards-compatible with :mod:`urlparse`.
+
+ Example::
+
+ >>> parse_url('http://google.com/mail/')
+ Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+ >>> parse_url('google.com:80')
+ Url(scheme=None, host='google.com', port=80, path=None, ...)
+ >>> parse_url('/foo?bar')
+ Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+ """
+
+ # While this code has overlap with stdlib's urlparse, it is much
+ # simplified for our needs and less annoying.
+ # Additionally, this implementations does silly things to be optimal
+ # on CPython.
+
+ if not url:
+ # Empty
+ return Url()
+
+ scheme = None
+ auth = None
+ host = None
+ port = None
+ path = None
+ fragment = None
+ query = None
+
+ # Scheme
+ if '://' in url:
+ scheme, url = url.split('://', 1)
+
+ # Find the earliest Authority Terminator
+ # (http://tools.ietf.org/html/rfc3986#section-3.2)
+ url, path_, delim = split_first(url, ['/', '?', '#'])
+
+ if delim:
+ # Reassemble the path
+ path = delim + path_
+
+ # Auth
+ if '@' in url:
+ # Last '@' denotes end of auth part
+ auth, url = url.rsplit('@', 1)
+
+ # IPv6
+ if url and url[0] == '[':
+ host, url = url.split(']', 1)
+ host += ']'
+
+ # Port
+ if ':' in url:
+ _host, port = url.split(':', 1)
+
+ if not host:
+ host = _host
+
+ if port:
+ # If given, ports must be integers.
+ if not port.isdigit():
+ raise LocationParseError(url)
+ port = int(port)
+ else:
+ # Blank ports are cool, too. (rfc3986#section-3.2.3)
+ port = None
+
+ elif not host and url:
+ host = url
+
+ if not path:
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+ # Fragment
+ if '#' in path:
+ path, fragment = path.split('#', 1)
+
+ # Query
+ if '?' in path:
+ path, query = path.split('?', 1)
+
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+def get_host(url):
+ """
+ Deprecated. Use :func:`.parse_url` instead.
+ """
+ p = parse_url(url)
+ return p.scheme or 'http', p.hostname, p.port