aboutsummaryrefslogtreecommitdiff
path: root/urllib3/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3/util.py')
-rw-r--r--urllib3/util.py260
1 files changed, 254 insertions, 6 deletions
diff --git a/urllib3/util.py b/urllib3/util.py
index 544f9ed..266c9ed 100644
--- a/urllib3/util.py
+++ b/urllib3/util.py
@@ -6,10 +6,11 @@
from base64 import b64encode
+from binascii import hexlify, unhexlify
from collections import namedtuple
-from socket import error as SocketError
from hashlib import md5, sha1
-from binascii import hexlify, unhexlify
+from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT
+import time
try:
from select import poll, POLLIN
@@ -31,9 +32,234 @@ try: # Test for SSL features
except ImportError:
pass
-
from .packages import six
-from .exceptions import LocationParseError, SSLError
+from .exceptions import LocationParseError, SSLError, TimeoutStateError
+
+
+_Default = object()
+# The default timeout to use for socket connections. This is the attribute used
+# by httplib to define the default timeout
+
+
+def current_time():
+ """
+ Retrieve the current time, this function is mocked out in unit testing.
+ """
+ return time.time()
+
+
+class Timeout(object):
+ """
+ Utility object for storing timeout values.
+
+ Example usage:
+
+ .. code-block:: python
+
+ timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
+ pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
+ pool.request(...) # Etc, etc
+
+ :param connect:
+ The maximum amount of time to wait for a connection attempt to a server
+ to succeed. Omitting the parameter will default the connect timeout to
+ the system default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout for connection attempts.
+
+ :type connect: integer, float, or None
+
+ :param read:
+ The maximum amount of time to wait between consecutive
+ read operations for a response from the server. Omitting
+ the parameter will default the read timeout to the system
+ default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout.
+
+ :type read: integer, float, or None
+
+ :param total:
+ The maximum amount of time to wait for an HTTP request to connect and
+ return. This combines the connect and read timeouts into one. In the
+ event that both a connect timeout and a total are specified, or a read
+ timeout and a total are specified, the shorter timeout will be applied.
+
+ Defaults to None.
+
+
+ :type total: integer, float, or None
+
+ .. note::
+
+ Many factors can affect the total amount of time for urllib3 to return
+ an HTTP response. Specifically, Python's DNS resolver does not obey the
+ timeout specified on the socket. Other factors that can affect total
+ request time include high CPU load, high swap, the program running at a
+ low priority level, or other behaviors. The observed running time for
+ urllib3 to return a response may be greater than the value passed to
+ `total`.
+
+ In addition, the read and total timeouts only measure the time between
+ read operations on the socket connecting the client and the server, not
+ the total amount of time for the request to return a complete response.
+ As an example, you may want a request to return within 7 seconds or
+ fail, so you set the ``total`` timeout to 7 seconds. If the server
+ sends one byte to you every 5 seconds, the request will **not** trigger
+ time out. This case is admittedly rare.
+ """
+
+ #: A sentinel object representing the default timeout value
+ DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
+
+ def __init__(self, connect=_Default, read=_Default, total=None):
+ self._connect = self._validate_timeout(connect, 'connect')
+ self._read = self._validate_timeout(read, 'read')
+ self.total = self._validate_timeout(total, 'total')
+ self._start_connect = None
+
+ def __str__(self):
+ return '%s(connect=%r, read=%r, total=%r)' % (
+ type(self).__name__, self._connect, self._read, self.total)
+
+
+ @classmethod
+ def _validate_timeout(cls, value, name):
+ """ Check that a timeout attribute is valid
+
+ :param value: The timeout value to validate
+ :param name: The name of the timeout attribute to validate. This is used
+ for clear error messages
+ :return: the value
+ :raises ValueError: if the type is not an integer or a float, or if it
+ is a numeric value less than zero
+ """
+ if value is _Default:
+ return cls.DEFAULT_TIMEOUT
+
+ if value is None or value is cls.DEFAULT_TIMEOUT:
+ return value
+
+ try:
+ float(value)
+ except (TypeError, ValueError):
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ try:
+ if value < 0:
+ raise ValueError("Attempted to set %s timeout to %s, but the "
+ "timeout cannot be set to a value less "
+ "than 0." % (name, value))
+ except TypeError: # Python 3
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int or float." % (name, value))
+
+ return value
+
+ @classmethod
+ def from_float(cls, timeout):
+ """ Create a new Timeout from a legacy timeout value.
+
+ The timeout value used by httplib.py sets the same timeout on the
+ connect(), and recv() socket requests. This creates a :class:`Timeout`
+ object that sets the individual timeouts to the ``timeout`` value passed
+ to this function.
+
+ :param timeout: The legacy timeout value
+ :type timeout: integer, float, sentinel default object, or None
+ :return: a Timeout object
+ :rtype: :class:`Timeout`
+ """
+ return Timeout(read=timeout, connect=timeout)
+
+ def clone(self):
+ """ Create a copy of the timeout object
+
+ Timeout properties are stored per-pool but each request needs a fresh
+ Timeout object to ensure each one has its own start/stop configured.
+
+ :return: a copy of the timeout object
+ :rtype: :class:`Timeout`
+ """
+ # We can't use copy.deepcopy because that will also create a new object
+ # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
+ # detect the user default.
+ return Timeout(connect=self._connect, read=self._read,
+ total=self.total)
+
+ def start_connect(self):
+ """ Start the timeout clock, used during a connect() attempt
+
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to start a timer that has been started already.
+ """
+ if self._start_connect is not None:
+ raise TimeoutStateError("Timeout timer has already been started.")
+ self._start_connect = current_time()
+ return self._start_connect
+
+ def get_connect_duration(self):
+ """ Gets the time elapsed since the call to :meth:`start_connect`.
+
+ :return: the elapsed time
+ :rtype: float
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to get duration for a timer that hasn't been started.
+ """
+ if self._start_connect is None:
+ raise TimeoutStateError("Can't get connect duration for timer "
+ "that has not started.")
+ return current_time() - self._start_connect
+
+ @property
+ def connect_timeout(self):
+ """ Get the value to use when setting a connection timeout.
+
+ This will be a positive float or integer, the value None
+ (never timeout), or the default system timeout.
+
+ :return: the connect timeout
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ """
+ if self.total is None:
+ return self._connect
+
+ if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
+ return self.total
+
+ return min(self._connect, self.total)
+
+ @property
+ def read_timeout(self):
+ """ Get the value for the read timeout.
+
+ This assumes some time has elapsed in the connection timeout and
+ computes the read timeout appropriately.
+
+ If self.total is set, the read timeout is dependent on the amount of
+ time taken by the connect timeout. If the connection time has not been
+ established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
+ raised.
+
+ :return: the value to use for the read timeout
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
+ has not yet been called on this object.
+ """
+ if (self.total is not None and
+ self.total is not self.DEFAULT_TIMEOUT and
+ self._read is not None and
+ self._read is not self.DEFAULT_TIMEOUT):
+ # in case the connect timeout has not yet been established.
+ if self._start_connect is None:
+ return self._read
+ return max(0, min(self.total - self.get_connect_duration(),
+ self._read))
+ elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
+ return max(0, self.total - self.get_connect_duration())
+ else:
+ return self._read
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
@@ -61,6 +287,13 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
return uri
+ @property
+ def netloc(self):
+ """Network location including host and port"""
+ if self.port:
+ return '%s:%d' % (self.host, self.port)
+ return self.host
+
def split_first(s, delims):
"""
@@ -114,7 +347,7 @@ def parse_url(url):
# While this code has overlap with stdlib's urlparse, it is much
# simplified for our needs and less annoying.
- # Additionally, this imeplementations does silly things to be optimal
+ # Additionally, this implementations does silly things to be optimal
# on CPython.
scheme = None
@@ -143,7 +376,8 @@ def parse_url(url):
# IPv6
if url and url[0] == '[':
- host, url = url[1:].split(']', 1)
+ host, url = url.split(']', 1)
+ host += ']'
# Port
if ':' in url:
@@ -341,6 +575,20 @@ def assert_fingerprint(cert, fingerprint):
.format(hexlify(fingerprint_bytes),
hexlify(cert_digest)))
+def is_fp_closed(obj):
+ """
+ Checks whether a given file-like object is closed.
+
+ :param obj:
+ The file-like object to check.
+ """
+ if hasattr(obj, 'fp'):
+ # Object is a container for another file-like object that gets released
+ # on exhaustion (e.g. HTTPResponse)
+ return obj.fp is None
+
+ return obj.closed
+
if SSLContext is not None: # Python 3.2+
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,