diff options
Diffstat (limited to 'requests')
25 files changed, 1173 insertions, 844 deletions
| diff --git a/requests/__init__.py b/requests/__init__.py index 2e9f3a0..bba1900 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -42,8 +42,8 @@ is at <http://python-requests.org>.  """  __title__ = 'requests' -__version__ = '2.2.1' -__build__ = 0x020201 +__version__ = '2.3.0' +__build__ = 0x020300  __author__ = 'Kenneth Reitz'  __license__ = 'Apache 2.0'  __copyright__ = 'Copyright 2014 Kenneth Reitz' diff --git a/requests/adapters.py b/requests/adapters.py index dd10e95..eb7a2d2 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -16,7 +16,7 @@ from .packages.urllib3.response import HTTPResponse  from .packages.urllib3.util import Timeout as TimeoutSauce  from .compat import urlparse, basestring, urldefrag, unquote  from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, -                    except_on_missing_scheme, get_auth_from_url) +                    prepend_scheme_if_needed, get_auth_from_url)  from .structures import CaseInsensitiveDict  from .packages.urllib3.exceptions import MaxRetryError  from .packages.urllib3.exceptions import TimeoutError @@ -203,7 +203,7 @@ class HTTPAdapter(BaseAdapter):          proxy = proxies.get(urlparse(url.lower()).scheme)          if proxy: -            except_on_missing_scheme(proxy) +            proxy = prepend_scheme_if_needed(proxy, 'http')              proxy_headers = self.proxy_headers(proxy)              if not proxy in self.proxy_manager: @@ -310,10 +310,7 @@ class HTTPAdapter(BaseAdapter):          chunked = not (request.body is None or 'Content-Length' in request.headers) -        if stream: -            timeout = TimeoutSauce(connect=timeout) -        else: -            timeout = TimeoutSauce(connect=timeout, read=timeout) +        timeout = TimeoutSauce(connect=timeout, read=timeout)          try:              if not chunked: @@ -372,25 +369,20 @@ class HTTPAdapter(BaseAdapter):                      conn._put_conn(low_conn)          except socket.error as sockerr: -            raise ConnectionError(sockerr) +            raise ConnectionError(sockerr, request=request)          except MaxRetryError as e: -            raise ConnectionError(e) +            raise ConnectionError(e, request=request)          except _ProxyError as e:              raise ProxyError(e)          except (_SSLError, _HTTPError) as e:              if isinstance(e, _SSLError): -                raise SSLError(e) +                raise SSLError(e, request=request)              elif isinstance(e, TimeoutError): -                raise Timeout(e) +                raise Timeout(e, request=request)              else:                  raise -        r = self.build_response(request, resp) - -        if not stream: -            r.content - -        return r +        return self.build_response(request, resp) diff --git a/requests/api.py b/requests/api.py index baf43dd..01d853d 100644 --- a/requests/api.py +++ b/requests/api.py @@ -26,7 +26,7 @@ def request(method, url, **kwargs):      :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.      :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload.      :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. -    :param timeout: (optional) Float describing the timeout of the request. +    :param timeout: (optional) Float describing the timeout of the request in seconds.      :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.      :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.      :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. diff --git a/requests/auth.py b/requests/auth.py index 6664cd8..9f831b7 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -11,7 +11,6 @@ import os  import re  import time  import hashlib -import logging  from base64 import b64encode @@ -19,8 +18,6 @@ from .compat import urlparse, str  from .cookies import extract_cookies_to_jar  from .utils import parse_dict_header -log = logging.getLogger(__name__) -  CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'  CONTENT_TYPE_MULTI_PART = 'multipart/form-data' diff --git a/requests/exceptions.py b/requests/exceptions.py index cd3c760..a4ee9d6 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -14,15 +14,22 @@ class RequestException(IOError):      """There was an ambiguous exception that occurred while handling your      request.""" +    def __init__(self, *args, **kwargs): +        """ +        Initialize RequestException with `request` and `response` objects. +        """ +        response = kwargs.pop('response', None) +        self.response = response +        self.request = kwargs.pop('request', None) +        if (response is not None and not self.request and +                hasattr(response, 'request')): +            self.request = self.response.request +        super(RequestException, self).__init__(*args, **kwargs) +  class HTTPError(RequestException):      """An HTTP error occurred.""" -    def __init__(self, *args, **kwargs): -        """ Initializes HTTPError with optional `response` object. """ -        self.response = kwargs.pop('response', None) -        super(HTTPError, self).__init__(*args, **kwargs) -  class ConnectionError(RequestException):      """A Connection error occurred.""" diff --git a/requests/models.py b/requests/models.py index ae46a83..120968f 100644 --- a/requests/models.py +++ b/requests/models.py @@ -8,7 +8,6 @@ This module contains the primary objects that power Requests.  """  import collections -import logging  import datetime  from io import BytesIO, UnsupportedOperation @@ -31,12 +30,20 @@ from .utils import (  from .compat import (      cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO,      is_py2, chardet, json, builtin_str, basestring, IncompleteRead) +from .status_codes import codes +#: The set of HTTP status codes that indicate an automatically +#: processable redirect. +REDIRECT_STATI = ( +    codes.moved,  # 301 +    codes.found,  # 302 +    codes.other,  # 303 +    codes.temporary_moved,  # 307 +) +DEFAULT_REDIRECT_LIMIT = 30  CONTENT_CHUNK_SIZE = 10 * 1024  ITER_CHUNK_SIZE = 512 -log = logging.getLogger(__name__) -  class RequestEncodingMixin(object):      @property @@ -401,9 +408,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):          is_stream = all([              hasattr(data, '__iter__'), -            not isinstance(data, basestring), -            not isinstance(data, list), -            not isinstance(data, dict) +            not isinstance(data, (basestring, list, tuple, dict))          ])          try: @@ -517,7 +522,7 @@ class Response(object):          self._content = False          self._content_consumed = False -        #: Integer Code of responded HTTP Status. +        #: Integer Code of responded HTTP Status, e.g. 404 or 200.          self.status_code = None          #: Case-insensitive Dictionary of Response Headers. @@ -541,6 +546,7 @@ class Response(object):          #: up here. The list is sorted from the oldest to the most recent request.          self.history = [] +        #: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK".          self.reason = None          #: A CookieJar of Cookies the server sent back. @@ -567,6 +573,7 @@ class Response(object):          # pickled objects do not have .raw          setattr(self, '_content_consumed', True) +        setattr(self, 'raw', None)      def __repr__(self):          return '<Response [%s]>' % (self.status_code) @@ -592,9 +599,15 @@ class Response(object):          return True      @property +    def is_redirect(self): +        """True if this Response is a well-formed HTTP redirect that could have +        been processed automatically (by :meth:`Session.resolve_redirects`). +        """ +        return ('location' in self.headers and self.status_code in REDIRECT_STATI) + +    @property      def apparent_encoding(self): -        """The apparent encoding, provided by the lovely Charade library -        (Thanks, Ian!).""" +        """The apparent encoding, provided by the chardet library"""          return chardet.detect(self.content)['encoding']      def iter_content(self, chunk_size=1, decode_unicode=False): @@ -603,17 +616,15 @@ class Response(object):          large responses.  The chunk size is the number of bytes it should          read into memory.  This is not necessarily the length of each item          returned as decoding can take place. -        """ -        if self._content_consumed: -            # simulate reading small chunks of the content -            return iter_slices(self._content, chunk_size) +        If decode_unicode is True, content will be decoded using the best +        available encoding based on the response. +        """          def generate():              try:                  # Special case for urllib3.                  try: -                    for chunk in self.raw.stream(chunk_size, -                                                 decode_content=True): +                    for chunk in self.raw.stream(chunk_size, decode_content=True):                          yield chunk                  except IncompleteRead as e:                      raise ChunkedEncodingError(e) @@ -629,12 +640,17 @@ class Response(object):              self._content_consumed = True -        gen = generate() +        # simulate reading small chunks of the content +        reused_chunks = iter_slices(self._content, chunk_size) + +        stream_chunks = generate() + +        chunks = reused_chunks if self._content_consumed else stream_chunks          if decode_unicode: -            gen = stream_decode_response_unicode(gen, self) +            chunks = stream_decode_response_unicode(chunks, self) -        return gen +        return chunks      def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None):          """Iterates over the response data, one line at a time.  When @@ -644,8 +660,7 @@ class Response(object):          pending = None -        for chunk in self.iter_content(chunk_size=chunk_size, -                                       decode_unicode=decode_unicode): +        for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode):              if pending is not None:                  chunk = pending + chunk @@ -693,7 +708,7 @@ class Response(object):          If Response.encoding is None, encoding will be guessed using          ``chardet``. -        The encoding of the response content is determined based soley on HTTP +        The encoding of the response content is determined based solely on HTTP          headers, following RFC 2616 to the letter. If you can take advantage of          non-HTTP knowledge to make a better guess at the encoding, you should          set ``r.encoding`` appropriately before accessing this property. @@ -737,7 +752,14 @@ class Response(object):              # a best guess).              encoding = guess_json_utf(self.content)              if encoding is not None: -                return json.loads(self.content.decode(encoding), **kwargs) +                try: +                    return json.loads(self.content.decode(encoding), **kwargs) +                except UnicodeDecodeError: +                    # Wrong UTF codec detected; usually because it's not UTF-8 +                    # but some other 8-bit codec.  This is an RFC violation, +                    # and the server didn't bother to tell us what codec *was* +                    # used. +                    pass          return json.loads(self.text, **kwargs)      @property @@ -773,8 +795,8 @@ class Response(object):              raise HTTPError(http_error_msg, response=self)      def close(self): -        """Closes the underlying file descriptor and releases the connection -        back to the pool. +        """Releases the connection back to the pool. Once this method has been +        called the underlying ``raw`` object must not be accessed again.          *Note: Should not normally need to be called explicitly.*          """ diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index 5907b0d..9cea3a4 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -4,7 +4,7 @@  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import MutableMapping +from collections import Mapping, MutableMapping  try:      from threading import RLock  except ImportError: # Platform-specific: No threads available @@ -20,9 +20,10 @@ try: # Python 2.7+      from collections import OrderedDict  except ImportError:      from .packages.ordered_dict import OrderedDict +from .packages.six import itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']  _Null = object() @@ -101,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping):      def keys(self):          with self.lock:              return self._container.keys() + + +class HTTPHeaderDict(MutableMapping): +    """ +    :param headers: +        An iterable of field-value pairs. Must not contain multiple field names +        when compared case-insensitively. + +    :param kwargs: +        Additional field-value pairs to pass in to ``dict.update``. + +    A ``dict`` like container for storing HTTP Headers. + +    Field names are stored and compared case-insensitively in compliance with +    RFC 2616. Iteration provides the first case-sensitive key seen for each +    case-insensitive pair. + +    Using ``__setitem__`` syntax overwrites fields that compare equal +    case-insensitively in order to maintain ``dict``'s api. For fields that +    compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` +    in a loop. + +    If multiple fields that are equal case-insensitively are passed to the +    constructor or ``.update``, the behavior is undefined and some will be +    lost. + +    >>> headers = HTTPHeaderDict() +    >>> headers.add('Set-Cookie', 'foo=bar') +    >>> headers.add('set-cookie', 'baz=quxx') +    >>> headers['content-length'] = '7' +    >>> headers['SET-cookie'] +    'foo=bar, baz=quxx' +    >>> headers['Content-Length'] +    '7' + +    If you want to access the raw headers with their original casing +    for debugging purposes you can access the private ``._data`` attribute +    which is a normal python ``dict`` that maps the case-insensitive key to a +    list of tuples stored as (case-sensitive-original-name, value). Using the +    structure from above as our example: + +    >>> headers._data +    {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], +    'content-length': [('content-length', '7')]} +    """ + +    def __init__(self, headers=None, **kwargs): +        self._data = {} +        if headers is None: +            headers = {} +        self.update(headers, **kwargs) + +    def add(self, key, value): +        """Adds a (name, value) pair, doesn't overwrite the value if it already +        exists. + +        >>> headers = HTTPHeaderDict(foo='bar') +        >>> headers.add('Foo', 'baz') +        >>> headers['foo'] +        'bar, baz' +        """ +        self._data.setdefault(key.lower(), []).append((key, value)) + +    def getlist(self, key): +        """Returns a list of all the values for the named field. Returns an +        empty list if the key doesn't exist.""" +        return self[key].split(', ') if key in self else [] + +    def copy(self): +        h = HTTPHeaderDict() +        for key in self._data: +            for rawkey, value in self._data[key]: +                h.add(rawkey, value) +        return h + +    def __eq__(self, other): +        if not isinstance(other, Mapping): +            return False +        other = HTTPHeaderDict(other) +        return dict((k1, self[k1]) for k1 in self._data) == \ +                dict((k2, other[k2]) for k2 in other._data) + +    def __getitem__(self, key): +        values = self._data[key.lower()] +        return ', '.join(value[1] for value in values) + +    def __setitem__(self, key, value): +        self._data[key.lower()] = [(key, value)] + +    def __delitem__(self, key): +        del self._data[key.lower()] + +    def __len__(self): +        return len(self._data) + +    def __iter__(self): +        for headers in itervalues(self._data): +            yield headers[0][0] + +    def __repr__(self): +        return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py index 2124774..5feb332 100644 --- a/requests/packages/urllib3/connection.py +++ b/requests/packages/urllib3/connection.py @@ -4,6 +4,7 @@  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php +import sys  import socket  from socket import timeout as SocketTimeout @@ -38,6 +39,7 @@ from .exceptions import (      ConnectTimeoutError,  )  from .packages.ssl_match_hostname import match_hostname +from .packages import six  from .util import (      assert_fingerprint,      resolve_cert_reqs, @@ -53,34 +55,50 @@ port_by_scheme = {  class HTTPConnection(_HTTPConnection, object): +    """ +    Based on httplib.HTTPConnection but provides an extra constructor +    backwards-compatibility layer between older and newer Pythons. +    """ +      default_port = port_by_scheme['http']      # By default, disable Nagle's Algorithm.      tcp_nodelay = 1 +    def __init__(self, *args, **kw): +        if six.PY3:  # Python 3 +            kw.pop('strict', None) +        if sys.version_info < (2, 7):  # Python 2.6 and older +            kw.pop('source_address', None) + +        # Pre-set source_address in case we have an older Python like 2.6. +        self.source_address = kw.get('source_address') + +        # Superclass also sets self.source_address in Python 2.7+. +        _HTTPConnection.__init__(self, *args, **kw)   +      def _new_conn(self): -        """ Establish a socket connection and set nodelay settings on it +        """ Establish a socket connection and set nodelay settings on it.          :return: a new socket connection          """ -        try: -            conn = socket.create_connection( -                (self.host, self.port), -                self.timeout, -                self.source_address, -            ) -        except AttributeError: # Python 2.6 -            conn = socket.create_connection( -                (self.host, self.port), -                self.timeout, -            ) -        conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, -                        self.tcp_nodelay) +        extra_args = [] +        if self.source_address:  # Python 2.7+ +            extra_args.append(self.source_address) + +        conn = socket.create_connection( +            (self.host, self.port), self.timeout, *extra_args) +        conn.setsockopt( +            socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) +          return conn      def _prepare_conn(self, conn):          self.sock = conn -        if self._tunnel_host: +        # the _tunnel_host attribute was added in python 2.6.3 (via +        # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do +        # not have them. +        if getattr(self, '_tunnel_host', None):              # TODO: Fix tunnel so it doesn't depend on self.sock state.              self._tunnel() @@ -93,15 +111,18 @@ class HTTPSConnection(HTTPConnection):      default_port = port_by_scheme['https']      def __init__(self, host, port=None, key_file=None, cert_file=None, -                 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, -                 source_address=None): -        try: -            HTTPConnection.__init__(self, host, port, strict, timeout, source_address) -        except TypeError: # Python 2.6 -            HTTPConnection.__init__(self, host, port, strict, timeout) +                 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): + +        HTTPConnection.__init__(self, host, port, strict=strict, +                                timeout=timeout, **kw) +          self.key_file = key_file          self.cert_file = cert_file +        # Required property for Google AppEngine 1.9.0 which otherwise causes +        # HTTPS requests to go out as HTTP. (See Issue #356) +        self._protocol = 'https' +      def connect(self):          conn = self._new_conn()          self._prepare_conn(conn) @@ -116,6 +137,7 @@ class VerifiedHTTPSConnection(HTTPSConnection):      cert_reqs = None      ca_certs = None      ssl_version = None +    conn_kw = {}      def set_cert(self, key_file=None, cert_file=None,                   cert_reqs=None, ca_certs=None, @@ -130,11 +152,11 @@ class VerifiedHTTPSConnection(HTTPSConnection):      def connect(self):          # Add certificate verification +          try:              sock = socket.create_connection( -                address=(self.host, self.port), -                timeout=self.timeout, -            ) +                address=(self.host, self.port), timeout=self.timeout, +                **self.conn_kw)          except SocketTimeout:              raise ConnectTimeoutError(                  self, "Connection to %s timed out. (connect timeout=%s)" % @@ -146,21 +168,25 @@ class VerifiedHTTPSConnection(HTTPSConnection):          resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)          resolved_ssl_version = resolve_ssl_version(self.ssl_version) -        # the _tunnel_host attribute was added in python 2.6.3 (via -        # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do -        # not have them. +        hostname = self.host          if getattr(self, '_tunnel_host', None): +            # _tunnel_host was added in Python 2.6.3 +            # (See: http://hg.python.org/cpython/rev/0f57b30a152f) +              self.sock = sock              # Calls self._set_hostport(), so self.host is              # self._tunnel_host below.              self._tunnel() +            # Override the host with the one we're requesting data from. +            hostname = self._tunnel_host +          # Wrap socket using verification with the root certs in          # trusted_root_certs          self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,                                      cert_reqs=resolved_cert_reqs,                                      ca_certs=self.ca_certs, -                                    server_hostname=self.host, +                                    server_hostname=hostname,                                      ssl_version=resolved_ssl_version)          if resolved_cert_reqs != ssl.CERT_NONE: @@ -169,7 +195,7 @@ class VerifiedHTTPSConnection(HTTPSConnection):                                     self.assert_fingerprint)              elif self.assert_hostname is not False:                  match_hostname(self.sock.getpeercert(), -                               self.assert_hostname or self.host) +                               self.assert_hostname or hostname)  if ssl: diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 243d700..95a53a7 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -4,6 +4,7 @@  # This module is part of urllib3 and is released under  # the MIT License: http://www.opensource.org/licenses/mit-license.php +import sys  import errno  import logging @@ -19,9 +20,11 @@ except ImportError:  from .exceptions import (      ClosedPoolError, +    ConnectionError,      ConnectTimeoutError,      EmptyPoolError,      HostChangedError, +    LocationParseError,      MaxRetryError,      SSLError,      TimeoutError, @@ -39,7 +42,6 @@ from .connection import (  from .request import RequestMethods  from .response import HTTPResponse  from .util import ( -    assert_fingerprint,      get_host,      is_connection_dropped,      Timeout, @@ -64,6 +66,9 @@ class ConnectionPool(object):      QueueCls = LifoQueue      def __init__(self, host, port=None): +        if host is None: +            raise LocationParseError(host) +          # httplib doesn't like it when we include brackets in ipv6 addresses          host = host.strip('[]') @@ -135,7 +140,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):      def __init__(self, host, port=None, strict=False,                   timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, -                 headers=None, _proxy=None, _proxy_headers=None): +                 headers=None, _proxy=None, _proxy_headers=None, **conn_kw):          ConnectionPool.__init__(self, host, port)          RequestMethods.__init__(self, headers) @@ -162,6 +167,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          self.num_connections = 0          self.num_requests = 0 +        if sys.version_info < (2, 7):  # Python 2.6 and older +            conn_kw.pop('source_address', None) +        self.conn_kw = conn_kw +      def _new_conn(self):          """          Return a fresh :class:`HTTPConnection`. @@ -170,13 +179,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          log.info("Starting new HTTP connection (%d): %s" %                   (self.num_connections, self.host)) -        extra_params = {} -        if not six.PY3:  # Python 2 -            extra_params['strict'] = self.strict -          conn = self.ConnectionCls(host=self.host, port=self.port,                                    timeout=self.timeout.connect_timeout, -                                  **extra_params) +                                  strict=self.strict, **self.conn_kw)          if self.proxy is not None:              # Enable Nagle's algorithm for proxies, to avoid packet              # fragmentation. @@ -238,8 +243,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              pass          except Full:              # This should never happen if self.block == True -            log.warning("HttpConnectionPool is full, discarding connection: %s" -                        % self.host) +            log.warning( +                "Connection pool is full, discarding connection: %s" % +                self.host)          # Connection never got put back into the pool, close it.          if conn: @@ -414,10 +420,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          :param retries:              Number of retries to allow before raising a MaxRetryError exception. +            If `False`, then retries are disabled and any exception is raised +            immediately.          :param redirect:              If True, automatically handle redirects (status codes 301, 302, -            303, 307, 308). Each redirect counts as a retry. +            303, 307, 308). Each redirect counts as a retry. Disabling retries +            will disable redirect, too.          :param assert_same_host:              If ``True``, will make sure that the host of the pool requests is @@ -451,7 +460,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          if headers is None:              headers = self.headers -        if retries < 0: +        if retries < 0 and retries is not False:              raise MaxRetryError(self, url)          if release_conn is None: @@ -470,6 +479,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              headers = headers.copy()              headers.update(self.proxy_headers) +        # Must keep the exception bound to a separate variable or else Python 3 +        # complains about UnboundLocalError. +        err = None +          try:              # Request a connection from the queue              conn = self._get_conn(timeout=pool_timeout) @@ -497,37 +510,40 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):              #     ``response.read()``)          except Empty: -            # Timed out by queue +            # Timed out by queue.              raise EmptyPoolError(self, "No pool connections are available.") -        except BaseSSLError as e: +        except (BaseSSLError, CertificateError) as e: +            # Release connection unconditionally because there is no way to +            # close it externally in case of exception. +            release_conn = True              raise SSLError(e) -        except CertificateError as e: -            # Name mismatch -            raise SSLError(e) +        except (TimeoutError, HTTPException, SocketError) as e: +            if conn: +                # Discard the connection for these exceptions. It will be +                # be replaced during the next _get_conn() call. +                conn.close() +                conn = None -        except TimeoutError as e: -            # Connection broken, discard. -            conn = None -            # Save the error off for retry logic. -            err = e +            if not retries: +                if isinstance(e, TimeoutError): +                    # TimeoutError is exempt from MaxRetryError-wrapping. +                    # FIXME: ... Not sure why. Add a reason here. +                    raise -            if retries == 0: -                raise +                # Wrap unexpected exceptions with the most appropriate +                # module-level exception and re-raise. +                if isinstance(e, SocketError) and self.proxy: +                    raise ProxyError('Cannot connect to proxy.', e) -        except (HTTPException, SocketError) as e: -            # Connection broken, discard. It will be replaced next _get_conn(). -            conn = None -            # This is necessary so we can access e below -            err = e +                if retries is False: +                    raise ConnectionError('Connection failed.', e) -            if retries == 0: -                if isinstance(e, SocketError) and self.proxy is not None: -                    raise ProxyError('Cannot connect to proxy. ' -                                     'Socket error: %s.' % e) -                else: -                    raise MaxRetryError(self, url, e) +                raise MaxRetryError(self, url, e) + +            # Keep track of the error for the retry warning. +            err = e          finally:              if release_conn: @@ -538,8 +554,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          if not conn:              # Try again -            log.warn("Retrying (%d attempts remain) after connection " -                     "broken by '%r': %s" % (retries, err, url)) +            log.warning("Retrying (%d attempts remain) after connection " +                        "broken by '%r': %s" % (retries, err, url))              return self.urlopen(method, url, body, headers, retries - 1,                                  redirect, assert_same_host,                                  timeout=timeout, pool_timeout=pool_timeout, @@ -547,7 +563,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):          # Handle redirect?          redirect_location = redirect and response.get_redirect_location() -        if redirect_location: +        if redirect_location and retries is not False:              if response.status == 303:                  method = 'GET'              log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -586,10 +602,14 @@ class HTTPSConnectionPool(HTTPConnectionPool):                   _proxy=None, _proxy_headers=None,                   key_file=None, cert_file=None, cert_reqs=None,                   ca_certs=None, ssl_version=None, -                 assert_hostname=None, assert_fingerprint=None): +                 assert_hostname=None, assert_fingerprint=None, +                 **conn_kw): + +        if sys.version_info < (2, 7):  # Python 2.6 or older +            conn_kw.pop('source_address', None)          HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, -                                    block, headers, _proxy, _proxy_headers) +                                    block, headers, _proxy, _proxy_headers, **conn_kw)          self.key_file = key_file          self.cert_file = cert_file          self.cert_reqs = cert_reqs @@ -597,6 +617,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):          self.ssl_version = ssl_version          self.assert_hostname = assert_hostname          self.assert_fingerprint = assert_fingerprint +        self.conn_kw = conn_kw      def _prepare_conn(self, conn):          """ @@ -612,6 +633,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):                            assert_hostname=self.assert_hostname,                            assert_fingerprint=self.assert_fingerprint)              conn.ssl_version = self.ssl_version +            conn.conn_kw = self.conn_kw          if self.proxy is not None:              # Python 2.7+ @@ -648,6 +670,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):          extra_params = {}          if not six.PY3:  # Python 2              extra_params['strict'] = self.strict +        extra_params.update(self.conn_kw)          conn = self.ConnectionCls(host=actual_host, port=actual_port,                                    timeout=self.timeout.connect_timeout, diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index d9bda15..21a12c6 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,7 @@ -'''SSL with SNI_-support for Python 2. +'''SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself.  This needs the following packages installed: @@ -6,9 +9,15 @@ This needs the following packages installed:  * ndg-httpsclient (tested with 0.3.2)  * pyasn1 (tested with 0.1.6) -To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. -This can be done in a ``sitecustomize`` module, or at any other time before -your application begins using ``urllib3``, like this:: +You can install them with the following command: + +    pip install pyopenssl ndg-httpsclient pyasn1 + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this::      try:          import urllib3.contrib.pyopenssl @@ -29,9 +38,8 @@ Module Variables  ----------------  :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. -    Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256 -    EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES -    !MD5 !EXP !PSK !SRP !DSS'`` +    Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: +    ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS``  .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication  .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) @@ -43,7 +51,7 @@ from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName  import OpenSSL.SSL  from pyasn1.codec.der import decoder as der_decoder  from pyasn1.type import univ, constraint -from socket import _fileobject +from socket import _fileobject, timeout  import ssl  import select  from cStringIO import StringIO @@ -69,12 +77,22 @@ _openssl_verify = {                         + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,  } -# Default SSL/TLS cipher list. -# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/ -# configuring-apache-nginx-and-openssl-for-forward-secrecy -DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \ -        'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \ -        'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS' +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ +    "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ +    "!aNULL:!MD5:!DSS"  orig_util_HAS_SNI = util.HAS_SNI @@ -139,6 +157,13 @@ def get_subj_alt_name(peer_cert):  class fileobject(_fileobject): +    def _wait_for_sock(self): +        rd, wd, ed = select.select([self._sock], [], [], +                                   self._sock.gettimeout()) +        if not rd: +            raise timeout() + +      def read(self, size=-1):          # Use max, disallow tiny reads in a loop as they are very inefficient.          # We never leave read() with any leftover data from a new recv() call @@ -156,6 +181,7 @@ class fileobject(_fileobject):                  try:                      data = self._sock.recv(rbufsize)                  except OpenSSL.SSL.WantReadError: +                    self._wait_for_sock()                      continue                  if not data:                      break @@ -183,6 +209,7 @@ class fileobject(_fileobject):                  try:                      data = self._sock.recv(left)                  except OpenSSL.SSL.WantReadError: +                    self._wait_for_sock()                      continue                  if not data:                      break @@ -234,6 +261,7 @@ class fileobject(_fileobject):                                  break                              buffers.append(data)                      except OpenSSL.SSL.WantReadError: +                        self._wait_for_sock()                          continue                      break                  return "".join(buffers) @@ -244,6 +272,7 @@ class fileobject(_fileobject):                  try:                      data = self._sock.recv(self._rbufsize)                  except OpenSSL.SSL.WantReadError: +                    self._wait_for_sock()                      continue                  if not data:                      break @@ -271,7 +300,8 @@ class fileobject(_fileobject):                  try:                      data = self._sock.recv(self._rbufsize)                  except OpenSSL.SSL.WantReadError: -                        continue +                    self._wait_for_sock() +                    continue                  if not data:                      break                  left = size - buf_len @@ -366,6 +396,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,              ctx.load_verify_locations(ca_certs, None)          except OpenSSL.SSL.Error as e:              raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) +    else: +        ctx.set_default_verify_paths()      # Disable TLS compression to migitate CRIME attack (issue #309)      OP_NO_COMPRESSION = 0x20000 diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 98ef9ab..b4df831 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -44,6 +44,11 @@ class ProxyError(HTTPError):      pass +class ConnectionError(HTTPError): +    "Raised when a normal connection fails." +    pass + +  class DecodeError(HTTPError):      "Raised when automatic decoding based on Content-Type fails."      pass diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py index ed01765..da79e92 100644 --- a/requests/packages/urllib3/fields.py +++ b/requests/packages/urllib3/fields.py @@ -15,7 +15,7 @@ def guess_content_type(filename, default='application/octet-stream'):      Guess the "Content-Type" of a file.      :param filename: -        The filename to guess the "Content-Type" of using :mod:`mimetimes`. +        The filename to guess the "Content-Type" of using :mod:`mimetypes`.      :param default:          If no "Content-Type" can be guessed, default to `default`.      """ diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 3aa5b2e..dd59a75 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,7 +7,7 @@ except ImportError:          from backports.ssl_match_hostname import CertificateError, match_hostname      except ImportError:          # Our vendored copy -        from _implementation import CertificateError, match_hostname +        from ._implementation import CertificateError, match_hostname  # Not needed, but documenting what we provide.  __all__ = ('CertificateError', 'match_hostname') diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 6a1fe1a..db44182 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -9,6 +9,7 @@ import logging  import zlib  import io +from ._collections import HTTPHeaderDict  from .exceptions import DecodeError  from .packages.six import string_types as basestring, binary_type  from .util import is_fp_closed @@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase):      def __init__(self, body='', headers=None, status=0, version=0, reason=None,                   strict=0, preload_content=True, decode_content=True,                   original_response=None, pool=None, connection=None): -        self.headers = headers or {} + +        self.headers = HTTPHeaderDict() +        if headers: +            self.headers.update(headers)          self.status = status          self.version = version          self.reason = reason @@ -249,17 +253,9 @@ class HTTPResponse(io.IOBase):          with ``original_response=r``.          """ -        # Normalize headers between different versions of Python -        headers = {} +        headers = HTTPHeaderDict()          for k, v in r.getheaders(): -            # Python 3: Header keys are returned capitalised -            k = k.lower() - -            has_value = headers.get(k) -            if has_value: # Python 3: Repeating header keys are unmerged. -                v = ', '.join([has_value, v]) - -            headers[k] = v +            headers.add(k, v)          # HTTPResponse objects in Python 3 don't have a .strict attribute          strict = getattr(r, 'strict', 0) diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py deleted file mode 100644 index bd26631..0000000 --- a/requests/packages/urllib3/util.py +++ /dev/null @@ -1,648 +0,0 @@ -# urllib3/util.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -from base64 import b64encode -from binascii import hexlify, unhexlify -from collections import namedtuple -from hashlib import md5, sha1 -from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT -import time - -try: -    from select import poll, POLLIN -except ImportError:  # `poll` doesn't exist on OSX and other platforms -    poll = False -    try: -        from select import select -    except ImportError:  # `select` doesn't exist on AppEngine. -        select = False - -try:  # Test for SSL features -    SSLContext = None -    HAS_SNI = False - -    import ssl -    from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 -    from ssl import SSLContext  # Modern SSL? -    from ssl import HAS_SNI  # Has SNI? -except ImportError: -    pass - -from .packages import six -from .exceptions import LocationParseError, SSLError, TimeoutStateError - - -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout - - -def current_time(): -    """ -    Retrieve the current time, this function is mocked out in unit testing. -    """ -    return time.time() - - -class Timeout(object): -    """ -    Utility object for storing timeout values. - -    Example usage: - -    .. code-block:: python - -        timeout = urllib3.util.Timeout(connect=2.0, read=7.0) -        pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) -        pool.request(...) # Etc, etc - -    :param connect: -        The maximum amount of time to wait for a connection attempt to a server -        to succeed. Omitting the parameter will default the connect timeout to -        the system default, probably `the global default timeout in socket.py -        <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. -        None will set an infinite timeout for connection attempts. - -    :type connect: integer, float, or None - -    :param read: -        The maximum amount of time to wait between consecutive -        read operations for a response from the server. Omitting -        the parameter will default the read timeout to the system -        default, probably `the global default timeout in socket.py -        <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. -        None will set an infinite timeout. - -    :type read: integer, float, or None - -    :param total: -        This combines the connect and read timeouts into one; the read timeout -        will be set to the time leftover from the connect attempt. In the -        event that both a connect timeout and a total are specified, or a read -        timeout and a total are specified, the shorter timeout will be applied. - -        Defaults to None. - -    :type total: integer, float, or None - -    .. note:: - -        Many factors can affect the total amount of time for urllib3 to return -        an HTTP response. Specifically, Python's DNS resolver does not obey the -        timeout specified on the socket. Other factors that can affect total -        request time include high CPU load, high swap, the program running at a -        low priority level, or other behaviors. The observed running time for -        urllib3 to return a response may be greater than the value passed to -        `total`. - -        In addition, the read and total timeouts only measure the time between -        read operations on the socket connecting the client and the server, -        not the total amount of time for the request to return a complete -        response. For most requests, the timeout is raised because the server -        has not sent the first byte in the specified time. This is not always -        the case; if a server streams one byte every fifteen seconds, a timeout -        of 20 seconds will not ever trigger, even though the request will -        take several minutes to complete. - -        If your goal is to cut off any request after a set amount of wall clock -        time, consider having a second "watcher" thread to cut off a slow -        request. -    """ - -    #: A sentinel object representing the default timeout value -    DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - -    def __init__(self, total=None, connect=_Default, read=_Default): -        self._connect = self._validate_timeout(connect, 'connect') -        self._read = self._validate_timeout(read, 'read') -        self.total = self._validate_timeout(total, 'total') -        self._start_connect = None - -    def __str__(self): -        return '%s(connect=%r, read=%r, total=%r)' % ( -            type(self).__name__, self._connect, self._read, self.total) - - -    @classmethod -    def _validate_timeout(cls, value, name): -        """ Check that a timeout attribute is valid - -        :param value: The timeout value to validate -        :param name: The name of the timeout attribute to validate. This is used -            for clear error messages -        :return: the value -        :raises ValueError: if the type is not an integer or a float, or if it -            is a numeric value less than zero -        """ -        if value is _Default: -            return cls.DEFAULT_TIMEOUT - -        if value is None or value is cls.DEFAULT_TIMEOUT: -            return value - -        try: -            float(value) -        except (TypeError, ValueError): -            raise ValueError("Timeout value %s was %s, but it must be an " -                             "int or float." % (name, value)) - -        try: -            if value < 0: -                raise ValueError("Attempted to set %s timeout to %s, but the " -                                 "timeout cannot be set to a value less " -                                 "than 0." % (name, value)) -        except TypeError: # Python 3 -            raise ValueError("Timeout value %s was %s, but it must be an " -                             "int or float." % (name, value)) - -        return value - -    @classmethod -    def from_float(cls, timeout): -        """ Create a new Timeout from a legacy timeout value. - -        The timeout value used by httplib.py sets the same timeout on the -        connect(), and recv() socket requests. This creates a :class:`Timeout` -        object that sets the individual timeouts to the ``timeout`` value passed -        to this function. - -        :param timeout: The legacy timeout value -        :type timeout: integer, float, sentinel default object, or None -        :return: a Timeout object -        :rtype: :class:`Timeout` -        """ -        return Timeout(read=timeout, connect=timeout) - -    def clone(self): -        """ Create a copy of the timeout object - -        Timeout properties are stored per-pool but each request needs a fresh -        Timeout object to ensure each one has its own start/stop configured. - -        :return: a copy of the timeout object -        :rtype: :class:`Timeout` -        """ -        # We can't use copy.deepcopy because that will also create a new object -        # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to -        # detect the user default. -        return Timeout(connect=self._connect, read=self._read, -                       total=self.total) - -    def start_connect(self): -        """ Start the timeout clock, used during a connect() attempt - -        :raises urllib3.exceptions.TimeoutStateError: if you attempt -            to start a timer that has been started already. -        """ -        if self._start_connect is not None: -            raise TimeoutStateError("Timeout timer has already been started.") -        self._start_connect = current_time() -        return self._start_connect - -    def get_connect_duration(self): -        """ Gets the time elapsed since the call to :meth:`start_connect`. - -        :return: the elapsed time -        :rtype: float -        :raises urllib3.exceptions.TimeoutStateError: if you attempt -            to get duration for a timer that hasn't been started. -        """ -        if self._start_connect is None: -            raise TimeoutStateError("Can't get connect duration for timer " -                                    "that has not started.") -        return current_time() - self._start_connect - -    @property -    def connect_timeout(self): -        """ Get the value to use when setting a connection timeout. - -        This will be a positive float or integer, the value None -        (never timeout), or the default system timeout. - -        :return: the connect timeout -        :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None -        """ -        if self.total is None: -            return self._connect - -        if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: -            return self.total - -        return min(self._connect, self.total) - -    @property -    def read_timeout(self): -        """ Get the value for the read timeout. - -        This assumes some time has elapsed in the connection timeout and -        computes the read timeout appropriately. - -        If self.total is set, the read timeout is dependent on the amount of -        time taken by the connect timeout. If the connection time has not been -        established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be -        raised. - -        :return: the value to use for the read timeout -        :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None -        :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` -            has not yet been called on this object. -        """ -        if (self.total is not None and -            self.total is not self.DEFAULT_TIMEOUT and -            self._read is not None and -            self._read is not self.DEFAULT_TIMEOUT): -            # in case the connect timeout has not yet been established. -            if self._start_connect is None: -                return self._read -            return max(0, min(self.total - self.get_connect_duration(), -                              self._read)) -        elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: -            return max(0, self.total - self.get_connect_duration()) -        else: -            return self._read - - -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): -    """ -    Datastructure for representing an HTTP URL. Used as a return value for -    :func:`parse_url`. -    """ -    slots = () - -    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): -        return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) - -    @property -    def hostname(self): -        """For backwards-compatibility with urlparse. We're nice like that.""" -        return self.host - -    @property -    def request_uri(self): -        """Absolute path including the query string.""" -        uri = self.path or '/' - -        if self.query is not None: -            uri += '?' + self.query - -        return uri - -    @property -    def netloc(self): -        """Network location including host and port""" -        if self.port: -            return '%s:%d' % (self.host, self.port) -        return self.host - - -def split_first(s, delims): -    """ -    Given a string and an iterable of delimiters, split on the first found -    delimiter. Return two split parts and the matched delimiter. - -    If not found, then the first part is the full input string. - -    Example: :: - -        >>> split_first('foo/bar?baz', '?/=') -        ('foo', 'bar?baz', '/') -        >>> split_first('foo/bar?baz', '123') -        ('foo/bar?baz', '', None) - -    Scales linearly with number of delims. Not ideal for large number of delims. -    """ -    min_idx = None -    min_delim = None -    for d in delims: -        idx = s.find(d) -        if idx < 0: -            continue - -        if min_idx is None or idx < min_idx: -            min_idx = idx -            min_delim = d - -    if min_idx is None or min_idx < 0: -        return s, '', None - -    return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): -    """ -    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is -    performed to parse incomplete urls. Fields not provided will be None. - -    Partly backwards-compatible with :mod:`urlparse`. - -    Example: :: - -        >>> parse_url('http://google.com/mail/') -        Url(scheme='http', host='google.com', port=None, path='/', ...) -        >>> parse_url('google.com:80') -        Url(scheme=None, host='google.com', port=80, path=None, ...) -        >>> parse_url('/foo?bar') -        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) -    """ - -    # While this code has overlap with stdlib's urlparse, it is much -    # simplified for our needs and less annoying. -    # Additionally, this implementations does silly things to be optimal -    # on CPython. - -    scheme = None -    auth = None -    host = None -    port = None -    path = None -    fragment = None -    query = None - -    # Scheme -    if '://' in url: -        scheme, url = url.split('://', 1) - -    # Find the earliest Authority Terminator -    # (http://tools.ietf.org/html/rfc3986#section-3.2) -    url, path_, delim = split_first(url, ['/', '?', '#']) - -    if delim: -        # Reassemble the path -        path = delim + path_ - -    # Auth -    if '@' in url: -        # Last '@' denotes end of auth part -        auth, url = url.rsplit('@', 1) - -    # IPv6 -    if url and url[0] == '[': -        host, url = url.split(']', 1) -        host += ']' - -    # Port -    if ':' in url: -        _host, port = url.split(':', 1) - -        if not host: -            host = _host - -        if port: -            # If given, ports must be integers. -            if not port.isdigit(): -                raise LocationParseError("Failed to parse: %s" % url) -            port = int(port) -        else: -            # Blank ports are cool, too. (rfc3986#section-3.2.3) -            port = None - -    elif not host and url: -        host = url - -    if not path: -        return Url(scheme, auth, host, port, path, query, fragment) - -    # Fragment -    if '#' in path: -        path, fragment = path.split('#', 1) - -    # Query -    if '?' in path: -        path, query = path.split('?', 1) - -    return Url(scheme, auth, host, port, path, query, fragment) - - -def get_host(url): -    """ -    Deprecated. Use :func:`.parse_url` instead. -    """ -    p = parse_url(url) -    return p.scheme or 'http', p.hostname, p.port - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, -                 basic_auth=None, proxy_basic_auth=None): -    """ -    Shortcuts for generating request headers. - -    :param keep_alive: -        If ``True``, adds 'connection: keep-alive' header. - -    :param accept_encoding: -        Can be a boolean, list, or string. -        ``True`` translates to 'gzip,deflate'. -        List will get joined by comma. -        String will be used as provided. - -    :param user_agent: -        String representing the user-agent you want, such as -        "python-urllib3/0.6" - -    :param basic_auth: -        Colon-separated username:password string for 'authorization: basic ...' -        auth header. - -    :param proxy_basic_auth: -        Colon-separated username:password string for 'proxy-authorization: basic ...' -        auth header. - -    Example: :: - -        >>> make_headers(keep_alive=True, user_agent="Batman/1.0") -        {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} -        >>> make_headers(accept_encoding=True) -        {'accept-encoding': 'gzip,deflate'} -    """ -    headers = {} -    if accept_encoding: -        if isinstance(accept_encoding, str): -            pass -        elif isinstance(accept_encoding, list): -            accept_encoding = ','.join(accept_encoding) -        else: -            accept_encoding = 'gzip,deflate' -        headers['accept-encoding'] = accept_encoding - -    if user_agent: -        headers['user-agent'] = user_agent - -    if keep_alive: -        headers['connection'] = 'keep-alive' - -    if basic_auth: -        headers['authorization'] = 'Basic ' + \ -            b64encode(six.b(basic_auth)).decode('utf-8') - -    if proxy_basic_auth: -        headers['proxy-authorization'] = 'Basic ' + \ -            b64encode(six.b(proxy_basic_auth)).decode('utf-8') - -    return headers - - -def is_connection_dropped(conn):  # Platform-specific -    """ -    Returns True if the connection is dropped and should be closed. - -    :param conn: -        :class:`httplib.HTTPConnection` object. - -    Note: For platforms like AppEngine, this will always return ``False`` to -    let the platform handle connection recycling transparently for us. -    """ -    sock = getattr(conn, 'sock', False) -    if not sock: # Platform-specific: AppEngine -        return False - -    if not poll: -        if not select: # Platform-specific: AppEngine -            return False - -        try: -            return select([sock], [], [], 0.0)[0] -        except SocketError: -            return True - -    # This version is better on platforms that support it. -    p = poll() -    p.register(sock, POLLIN) -    for (fno, ev) in p.poll(0.0): -        if fno == sock.fileno(): -            # Either data is buffered (bad), or the connection is dropped. -            return True - - -def resolve_cert_reqs(candidate): -    """ -    Resolves the argument to a numeric constant, which can be passed to -    the wrap_socket function/method from the ssl module. -    Defaults to :data:`ssl.CERT_NONE`. -    If given a string it is assumed to be the name of the constant in the -    :mod:`ssl` module or its abbrevation. -    (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. -    If it's neither `None` nor a string we assume it is already the numeric -    constant which can directly be passed to wrap_socket. -    """ -    if candidate is None: -        return CERT_NONE - -    if isinstance(candidate, str): -        res = getattr(ssl, candidate, None) -        if res is None: -            res = getattr(ssl, 'CERT_' + candidate) -        return res - -    return candidate - - -def resolve_ssl_version(candidate): -    """ -    like resolve_cert_reqs -    """ -    if candidate is None: -        return PROTOCOL_SSLv23 - -    if isinstance(candidate, str): -        res = getattr(ssl, candidate, None) -        if res is None: -            res = getattr(ssl, 'PROTOCOL_' + candidate) -        return res - -    return candidate - - -def assert_fingerprint(cert, fingerprint): -    """ -    Checks if given fingerprint matches the supplied certificate. - -    :param cert: -        Certificate as bytes object. -    :param fingerprint: -        Fingerprint as string of hexdigits, can be interspersed by colons. -    """ - -    # Maps the length of a digest to a possible hash function producing -    # this digest. -    hashfunc_map = { -        16: md5, -        20: sha1 -    } - -    fingerprint = fingerprint.replace(':', '').lower() - -    digest_length, rest = divmod(len(fingerprint), 2) - -    if rest or digest_length not in hashfunc_map: -        raise SSLError('Fingerprint is of invalid length.') - -    # We need encode() here for py32; works on py2 and p33. -    fingerprint_bytes = unhexlify(fingerprint.encode()) - -    hashfunc = hashfunc_map[digest_length] - -    cert_digest = hashfunc(cert).digest() - -    if not cert_digest == fingerprint_bytes: -        raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' -                       .format(hexlify(fingerprint_bytes), -                               hexlify(cert_digest))) - -def is_fp_closed(obj): -    """ -    Checks whether a given file-like object is closed. - -    :param obj: -        The file-like object to check. -    """ -    if hasattr(obj, 'fp'): -        # Object is a container for another file-like object that gets released -        # on exhaustion (e.g. HTTPResponse) -        return obj.fp is None - -    return obj.closed - - -if SSLContext is not None:  # Python 3.2+ -    def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, -                        ca_certs=None, server_hostname=None, -                        ssl_version=None): -        """ -        All arguments except `server_hostname` have the same meaning as for -        :func:`ssl.wrap_socket` - -        :param server_hostname: -            Hostname of the expected certificate -        """ -        context = SSLContext(ssl_version) -        context.verify_mode = cert_reqs - -        # Disable TLS compression to migitate CRIME attack (issue #309) -        OP_NO_COMPRESSION = 0x20000 -        context.options |= OP_NO_COMPRESSION - -        if ca_certs: -            try: -                context.load_verify_locations(ca_certs) -            # Py32 raises IOError -            # Py33 raises FileNotFoundError -            except Exception as e:  # Reraise as SSLError -                raise SSLError(e) -        if certfile: -            # FIXME: This block needs a test. -            context.load_cert_chain(certfile, keyfile) -        if HAS_SNI:  # Platform-specific: OpenSSL with enabled SNI -            return context.wrap_socket(sock, server_hostname=server_hostname) -        return context.wrap_socket(sock) - -else:  # Python 3.1 and earlier -    def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, -                        ca_certs=None, server_hostname=None, -                        ssl_version=None): -        return wrap_socket(sock, keyfile=keyfile, certfile=certfile, -                           ca_certs=ca_certs, cert_reqs=cert_reqs, -                           ssl_version=ssl_version) diff --git a/requests/packages/urllib3/util/__init__.py b/requests/packages/urllib3/util/__init__.py new file mode 100644 index 0000000..a40185e --- /dev/null +++ b/requests/packages/urllib3/util/__init__.py @@ -0,0 +1,27 @@ +# urllib3/util/__init__.py +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( +    SSLContext, +    HAS_SNI, +    assert_fingerprint, +    resolve_cert_reqs, +    resolve_ssl_version, +    ssl_wrap_socket, +) +from .timeout import ( +    current_time, +    Timeout, +) +from .url import ( +    get_host, +    parse_url, +    split_first, +    Url, +) diff --git a/requests/packages/urllib3/util/connection.py b/requests/packages/urllib3/util/connection.py new file mode 100644 index 0000000..8deeab5 --- /dev/null +++ b/requests/packages/urllib3/util/connection.py @@ -0,0 +1,45 @@ +from socket import error as SocketError +try: +    from select import poll, POLLIN +except ImportError:  # `poll` doesn't exist on OSX and other platforms +    poll = False +    try: +        from select import select +    except ImportError:  # `select` doesn't exist on AppEngine. +        select = False + +def is_connection_dropped(conn):  # Platform-specific +    """ +    Returns True if the connection is dropped and should be closed. + +    :param conn: +        :class:`httplib.HTTPConnection` object. + +    Note: For platforms like AppEngine, this will always return ``False`` to +    let the platform handle connection recycling transparently for us. +    """ +    sock = getattr(conn, 'sock', False) +    if sock is False:  # Platform-specific: AppEngine +        return False +    if sock is None:  # Connection already closed (such as by httplib). +        return False + +    if not poll: +        if not select:  # Platform-specific: AppEngine +            return False + +        try: +            return select([sock], [], [], 0.0)[0] +        except SocketError: +            return True + +    # This version is better on platforms that support it. +    p = poll() +    p.register(sock, POLLIN) +    for (fno, ev) in p.poll(0.0): +        if fno == sock.fileno(): +            # Either data is buffered (bad), or the connection is dropped. +            return True + + + diff --git a/requests/packages/urllib3/util/request.py b/requests/packages/urllib3/util/request.py new file mode 100644 index 0000000..d48d651 --- /dev/null +++ b/requests/packages/urllib3/util/request.py @@ -0,0 +1,68 @@ +from base64 import b64encode + +from ..packages import six + + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, +                 basic_auth=None, proxy_basic_auth=None): +    """ +    Shortcuts for generating request headers. + +    :param keep_alive: +        If ``True``, adds 'connection: keep-alive' header. + +    :param accept_encoding: +        Can be a boolean, list, or string. +        ``True`` translates to 'gzip,deflate'. +        List will get joined by comma. +        String will be used as provided. + +    :param user_agent: +        String representing the user-agent you want, such as +        "python-urllib3/0.6" + +    :param basic_auth: +        Colon-separated username:password string for 'authorization: basic ...' +        auth header. + +    :param proxy_basic_auth: +        Colon-separated username:password string for 'proxy-authorization: basic ...' +        auth header. + +    Example: :: + +        >>> make_headers(keep_alive=True, user_agent="Batman/1.0") +        {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} +        >>> make_headers(accept_encoding=True) +        {'accept-encoding': 'gzip,deflate'} +    """ +    headers = {} +    if accept_encoding: +        if isinstance(accept_encoding, str): +            pass +        elif isinstance(accept_encoding, list): +            accept_encoding = ','.join(accept_encoding) +        else: +            accept_encoding = ACCEPT_ENCODING +        headers['accept-encoding'] = accept_encoding + +    if user_agent: +        headers['user-agent'] = user_agent + +    if keep_alive: +        headers['connection'] = 'keep-alive' + +    if basic_auth: +        headers['authorization'] = 'Basic ' + \ +            b64encode(six.b(basic_auth)).decode('utf-8') + +    if proxy_basic_auth: +        headers['proxy-authorization'] = 'Basic ' + \ +            b64encode(six.b(proxy_basic_auth)).decode('utf-8') + +    return headers + + diff --git a/requests/packages/urllib3/util/response.py b/requests/packages/urllib3/util/response.py new file mode 100644 index 0000000..d0325bc --- /dev/null +++ b/requests/packages/urllib3/util/response.py @@ -0,0 +1,13 @@ +def is_fp_closed(obj): +    """ +    Checks whether a given file-like object is closed. + +    :param obj: +        The file-like object to check. +    """ +    if hasattr(obj, 'fp'): +        # Object is a container for another file-like object that gets released +        # on exhaustion (e.g. HTTPResponse) +        return obj.fp is None + +    return obj.closed diff --git a/requests/packages/urllib3/util/ssl_.py b/requests/packages/urllib3/util/ssl_.py new file mode 100644 index 0000000..dee4b87 --- /dev/null +++ b/requests/packages/urllib3/util/ssl_.py @@ -0,0 +1,133 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1 + +from ..exceptions import SSLError + + +try:  # Test for SSL features +    SSLContext = None +    HAS_SNI = False + +    import ssl +    from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 +    from ssl import SSLContext  # Modern SSL? +    from ssl import HAS_SNI  # Has SNI? +except ImportError: +    pass + + +def assert_fingerprint(cert, fingerprint): +    """ +    Checks if given fingerprint matches the supplied certificate. + +    :param cert: +        Certificate as bytes object. +    :param fingerprint: +        Fingerprint as string of hexdigits, can be interspersed by colons. +    """ + +    # Maps the length of a digest to a possible hash function producing +    # this digest. +    hashfunc_map = { +        16: md5, +        20: sha1 +    } + +    fingerprint = fingerprint.replace(':', '').lower() + +    digest_length, rest = divmod(len(fingerprint), 2) + +    if rest or digest_length not in hashfunc_map: +        raise SSLError('Fingerprint is of invalid length.') + +    # We need encode() here for py32; works on py2 and p33. +    fingerprint_bytes = unhexlify(fingerprint.encode()) + +    hashfunc = hashfunc_map[digest_length] + +    cert_digest = hashfunc(cert).digest() + +    if not cert_digest == fingerprint_bytes: +        raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' +                       .format(hexlify(fingerprint_bytes), +                               hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): +    """ +    Resolves the argument to a numeric constant, which can be passed to +    the wrap_socket function/method from the ssl module. +    Defaults to :data:`ssl.CERT_NONE`. +    If given a string it is assumed to be the name of the constant in the +    :mod:`ssl` module or its abbrevation. +    (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. +    If it's neither `None` nor a string we assume it is already the numeric +    constant which can directly be passed to wrap_socket. +    """ +    if candidate is None: +        return CERT_NONE + +    if isinstance(candidate, str): +        res = getattr(ssl, candidate, None) +        if res is None: +            res = getattr(ssl, 'CERT_' + candidate) +        return res + +    return candidate + + +def resolve_ssl_version(candidate): +    """ +    like resolve_cert_reqs +    """ +    if candidate is None: +        return PROTOCOL_SSLv23 + +    if isinstance(candidate, str): +        res = getattr(ssl, candidate, None) +        if res is None: +            res = getattr(ssl, 'PROTOCOL_' + candidate) +        return res + +    return candidate + + +if SSLContext is not None:  # Python 3.2+ +    def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, +                        ca_certs=None, server_hostname=None, +                        ssl_version=None): +        """ +        All arguments except `server_hostname` have the same meaning as for +        :func:`ssl.wrap_socket` + +        :param server_hostname: +            Hostname of the expected certificate +        """ +        context = SSLContext(ssl_version) +        context.verify_mode = cert_reqs + +        # Disable TLS compression to migitate CRIME attack (issue #309) +        OP_NO_COMPRESSION = 0x20000 +        context.options |= OP_NO_COMPRESSION + +        if ca_certs: +            try: +                context.load_verify_locations(ca_certs) +            # Py32 raises IOError +            # Py33 raises FileNotFoundError +            except Exception as e:  # Reraise as SSLError +                raise SSLError(e) +        if certfile: +            # FIXME: This block needs a test. +            context.load_cert_chain(certfile, keyfile) +        if HAS_SNI:  # Platform-specific: OpenSSL with enabled SNI +            return context.wrap_socket(sock, server_hostname=server_hostname) +        return context.wrap_socket(sock) + +else:  # Python 3.1 and earlier +    def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, +                        ca_certs=None, server_hostname=None, +                        ssl_version=None): +        return wrap_socket(sock, keyfile=keyfile, certfile=certfile, +                           ca_certs=ca_certs, cert_reqs=cert_reqs, +                           ssl_version=ssl_version) diff --git a/requests/packages/urllib3/util/timeout.py b/requests/packages/urllib3/util/timeout.py new file mode 100644 index 0000000..4f947cb --- /dev/null +++ b/requests/packages/urllib3/util/timeout.py @@ -0,0 +1,234 @@ +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + + +def current_time(): +    """ +    Retrieve the current time, this function is mocked out in unit testing. +    """ +    return time.time() + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +class Timeout(object): +    """ +    Utility object for storing timeout values. + +    Example usage: + +    .. code-block:: python + +        timeout = urllib3.util.Timeout(connect=2.0, read=7.0) +        pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) +        pool.request(...) # Etc, etc + +    :param connect: +        The maximum amount of time to wait for a connection attempt to a server +        to succeed. Omitting the parameter will default the connect timeout to +        the system default, probably `the global default timeout in socket.py +        <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. +        None will set an infinite timeout for connection attempts. + +    :type connect: integer, float, or None + +    :param read: +        The maximum amount of time to wait between consecutive +        read operations for a response from the server. Omitting +        the parameter will default the read timeout to the system +        default, probably `the global default timeout in socket.py +        <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_. +        None will set an infinite timeout. + +    :type read: integer, float, or None + +    :param total: +        This combines the connect and read timeouts into one; the read timeout +        will be set to the time leftover from the connect attempt. In the +        event that both a connect timeout and a total are specified, or a read +        timeout and a total are specified, the shorter timeout will be applied. + +        Defaults to None. + +    :type total: integer, float, or None + +    .. note:: + +        Many factors can affect the total amount of time for urllib3 to return +        an HTTP response. Specifically, Python's DNS resolver does not obey the +        timeout specified on the socket. Other factors that can affect total +        request time include high CPU load, high swap, the program running at a +        low priority level, or other behaviors. The observed running time for +        urllib3 to return a response may be greater than the value passed to +        `total`. + +        In addition, the read and total timeouts only measure the time between +        read operations on the socket connecting the client and the server, +        not the total amount of time for the request to return a complete +        response. For most requests, the timeout is raised because the server +        has not sent the first byte in the specified time. This is not always +        the case; if a server streams one byte every fifteen seconds, a timeout +        of 20 seconds will not ever trigger, even though the request will +        take several minutes to complete. + +        If your goal is to cut off any request after a set amount of wall clock +        time, consider having a second "watcher" thread to cut off a slow +        request. +    """ + +    #: A sentinel object representing the default timeout value +    DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + +    def __init__(self, total=None, connect=_Default, read=_Default): +        self._connect = self._validate_timeout(connect, 'connect') +        self._read = self._validate_timeout(read, 'read') +        self.total = self._validate_timeout(total, 'total') +        self._start_connect = None + +    def __str__(self): +        return '%s(connect=%r, read=%r, total=%r)' % ( +            type(self).__name__, self._connect, self._read, self.total) + + +    @classmethod +    def _validate_timeout(cls, value, name): +        """ Check that a timeout attribute is valid + +        :param value: The timeout value to validate +        :param name: The name of the timeout attribute to validate. This is used +            for clear error messages +        :return: the value +        :raises ValueError: if the type is not an integer or a float, or if it +            is a numeric value less than zero +        """ +        if value is _Default: +            return cls.DEFAULT_TIMEOUT + +        if value is None or value is cls.DEFAULT_TIMEOUT: +            return value + +        try: +            float(value) +        except (TypeError, ValueError): +            raise ValueError("Timeout value %s was %s, but it must be an " +                             "int or float." % (name, value)) + +        try: +            if value < 0: +                raise ValueError("Attempted to set %s timeout to %s, but the " +                                 "timeout cannot be set to a value less " +                                 "than 0." % (name, value)) +        except TypeError: # Python 3 +            raise ValueError("Timeout value %s was %s, but it must be an " +                             "int or float." % (name, value)) + +        return value + +    @classmethod +    def from_float(cls, timeout): +        """ Create a new Timeout from a legacy timeout value. + +        The timeout value used by httplib.py sets the same timeout on the +        connect(), and recv() socket requests. This creates a :class:`Timeout` +        object that sets the individual timeouts to the ``timeout`` value passed +        to this function. + +        :param timeout: The legacy timeout value +        :type timeout: integer, float, sentinel default object, or None +        :return: a Timeout object +        :rtype: :class:`Timeout` +        """ +        return Timeout(read=timeout, connect=timeout) + +    def clone(self): +        """ Create a copy of the timeout object + +        Timeout properties are stored per-pool but each request needs a fresh +        Timeout object to ensure each one has its own start/stop configured. + +        :return: a copy of the timeout object +        :rtype: :class:`Timeout` +        """ +        # We can't use copy.deepcopy because that will also create a new object +        # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to +        # detect the user default. +        return Timeout(connect=self._connect, read=self._read, +                       total=self.total) + +    def start_connect(self): +        """ Start the timeout clock, used during a connect() attempt + +        :raises urllib3.exceptions.TimeoutStateError: if you attempt +            to start a timer that has been started already. +        """ +        if self._start_connect is not None: +            raise TimeoutStateError("Timeout timer has already been started.") +        self._start_connect = current_time() +        return self._start_connect + +    def get_connect_duration(self): +        """ Gets the time elapsed since the call to :meth:`start_connect`. + +        :return: the elapsed time +        :rtype: float +        :raises urllib3.exceptions.TimeoutStateError: if you attempt +            to get duration for a timer that hasn't been started. +        """ +        if self._start_connect is None: +            raise TimeoutStateError("Can't get connect duration for timer " +                                    "that has not started.") +        return current_time() - self._start_connect + +    @property +    def connect_timeout(self): +        """ Get the value to use when setting a connection timeout. + +        This will be a positive float or integer, the value None +        (never timeout), or the default system timeout. + +        :return: the connect timeout +        :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None +        """ +        if self.total is None: +            return self._connect + +        if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: +            return self.total + +        return min(self._connect, self.total) + +    @property +    def read_timeout(self): +        """ Get the value for the read timeout. + +        This assumes some time has elapsed in the connection timeout and +        computes the read timeout appropriately. + +        If self.total is set, the read timeout is dependent on the amount of +        time taken by the connect timeout. If the connection time has not been +        established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be +        raised. + +        :return: the value to use for the read timeout +        :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None +        :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` +            has not yet been called on this object. +        """ +        if (self.total is not None and +            self.total is not self.DEFAULT_TIMEOUT and +            self._read is not None and +            self._read is not self.DEFAULT_TIMEOUT): +            # in case the connect timeout has not yet been established. +            if self._start_connect is None: +                return self._read +            return max(0, min(self.total - self.get_connect_duration(), +                              self._read)) +        elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: +            return max(0, self.total - self.get_connect_duration()) +        else: +            return self._read diff --git a/requests/packages/urllib3/util/url.py b/requests/packages/urllib3/util/url.py new file mode 100644 index 0000000..362d216 --- /dev/null +++ b/requests/packages/urllib3/util/url.py @@ -0,0 +1,162 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): +    """ +    Datastructure for representing an HTTP URL. Used as a return value for +    :func:`parse_url`. +    """ +    slots = () + +    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): +        return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + +    @property +    def hostname(self): +        """For backwards-compatibility with urlparse. We're nice like that.""" +        return self.host + +    @property +    def request_uri(self): +        """Absolute path including the query string.""" +        uri = self.path or '/' + +        if self.query is not None: +            uri += '?' + self.query + +        return uri + +    @property +    def netloc(self): +        """Network location including host and port""" +        if self.port: +            return '%s:%d' % (self.host, self.port) +        return self.host + + +def split_first(s, delims): +    """ +    Given a string and an iterable of delimiters, split on the first found +    delimiter. Return two split parts and the matched delimiter. + +    If not found, then the first part is the full input string. + +    Example: :: + +        >>> split_first('foo/bar?baz', '?/=') +        ('foo', 'bar?baz', '/') +        >>> split_first('foo/bar?baz', '123') +        ('foo/bar?baz', '', None) + +    Scales linearly with number of delims. Not ideal for large number of delims. +    """ +    min_idx = None +    min_delim = None +    for d in delims: +        idx = s.find(d) +        if idx < 0: +            continue + +        if min_idx is None or idx < min_idx: +            min_idx = idx +            min_delim = d + +    if min_idx is None or min_idx < 0: +        return s, '', None + +    return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): +    """ +    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is +    performed to parse incomplete urls. Fields not provided will be None. + +    Partly backwards-compatible with :mod:`urlparse`. + +    Example: :: + +        >>> parse_url('http://google.com/mail/') +        Url(scheme='http', host='google.com', port=None, path='/', ...) +        >>> parse_url('google.com:80') +        Url(scheme=None, host='google.com', port=80, path=None, ...) +        >>> parse_url('/foo?bar') +        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) +    """ + +    # While this code has overlap with stdlib's urlparse, it is much +    # simplified for our needs and less annoying. +    # Additionally, this implementations does silly things to be optimal +    # on CPython. + +    scheme = None +    auth = None +    host = None +    port = None +    path = None +    fragment = None +    query = None + +    # Scheme +    if '://' in url: +        scheme, url = url.split('://', 1) + +    # Find the earliest Authority Terminator +    # (http://tools.ietf.org/html/rfc3986#section-3.2) +    url, path_, delim = split_first(url, ['/', '?', '#']) + +    if delim: +        # Reassemble the path +        path = delim + path_ + +    # Auth +    if '@' in url: +        # Last '@' denotes end of auth part +        auth, url = url.rsplit('@', 1) + +    # IPv6 +    if url and url[0] == '[': +        host, url = url.split(']', 1) +        host += ']' + +    # Port +    if ':' in url: +        _host, port = url.split(':', 1) + +        if not host: +            host = _host + +        if port: +            # If given, ports must be integers. +            if not port.isdigit(): +                raise LocationParseError(url) +            port = int(port) +        else: +            # Blank ports are cool, too. (rfc3986#section-3.2.3) +            port = None + +    elif not host and url: +        host = url + +    if not path: +        return Url(scheme, auth, host, port, path, query, fragment) + +    # Fragment +    if '#' in path: +        path, fragment = path.split('#', 1) + +    # Query +    if '?' in path: +        path, query = path.split('?', 1) + +    return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): +    """ +    Deprecated. Use :func:`.parse_url` instead. +    """ +    p = parse_url(url) +    return p.scheme or 'http', p.hostname, p.port diff --git a/requests/sessions.py b/requests/sessions.py index db227ca..df85a25 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -12,27 +12,28 @@ import os  from collections import Mapping  from datetime import datetime +from .auth import _basic_auth_str  from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str  from .cookies import (      cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) -from .models import Request, PreparedRequest +from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT  from .hooks import default_hooks, dispatch_hook -from .utils import to_key_val_list, default_headers -from .exceptions import TooManyRedirects, InvalidSchema +from .utils import to_key_val_list, default_headers, to_native_string +from .exceptions import ( +    TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError)  from .structures import CaseInsensitiveDict  from .adapters import HTTPAdapter -from .utils import requote_uri, get_environ_proxies, get_netrc_auth +from .utils import ( +    requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, +    get_auth_from_url +)  from .status_codes import codes -REDIRECT_STATI = ( -    codes.moved, # 301 -    codes.found, # 302 -    codes.other, # 303 -    codes.temporary_moved, # 307 -) -DEFAULT_REDIRECT_LIMIT = 30 + +# formerly defined here, reexposed here for backward compatibility +from .models import REDIRECT_STATI  def merge_setting(request_setting, session_setting, dict_class=OrderedDict): @@ -63,6 +64,8 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict):          if v is None:              del merged_setting[k] +    merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) +      return merged_setting @@ -89,11 +92,13 @@ class SessionRedirectMixin(object):          i = 0 -        # ((resp.status_code is codes.see_other)) -        while ('location' in resp.headers and resp.status_code in REDIRECT_STATI): +        while resp.is_redirect:              prepared_request = req.copy() -            resp.content  # Consume socket so it can be released +            try: +                resp.content  # Consume socket so it can be released +            except (ChunkedEncodingError, ContentDecodingError, RuntimeError): +                resp.raw.read(decode_content=False)              if i >= self.max_redirects:                  raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects) @@ -121,7 +126,7 @@ class SessionRedirectMixin(object):              else:                  url = requote_uri(url) -            prepared_request.url = url +            prepared_request.url = to_native_string(url)              # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4              if (resp.status_code == codes.see_other and @@ -153,13 +158,19 @@ class SessionRedirectMixin(object):              except KeyError:                  pass -            extract_cookies_to_jar(prepared_request._cookies, -                                   prepared_request, resp.raw) +            extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw)              prepared_request._cookies.update(self.cookies)              prepared_request.prepare_cookies(prepared_request._cookies) +            # Rebuild auth and proxy information. +            proxies = self.rebuild_proxies(prepared_request, proxies) +            self.rebuild_auth(prepared_request, resp) + +            # Override the original request. +            req = prepared_request +              resp = self.send( -                prepared_request, +                req,                  stream=stream,                  timeout=timeout,                  verify=verify, @@ -173,6 +184,68 @@ class SessionRedirectMixin(object):              i += 1              yield resp +    def rebuild_auth(self, prepared_request, response): +        """ +        When being redirected we may want to strip authentication from the +        request to avoid leaking credentials. This method intelligently removes +        and reapplies authentication where possible to avoid credential loss. +        """ +        headers = prepared_request.headers +        url = prepared_request.url + +        if 'Authorization' in headers: +            # If we get redirected to a new host, we should strip out any +            # authentication headers. +            original_parsed = urlparse(response.request.url) +            redirect_parsed = urlparse(url) + +            if (original_parsed.hostname != redirect_parsed.hostname): +                del headers['Authorization'] + +        # .netrc might have more auth for us on our new host. +        new_auth = get_netrc_auth(url) if self.trust_env else None +        if new_auth is not None: +            prepared_request.prepare_auth(new_auth) + +        return + +    def rebuild_proxies(self, prepared_request, proxies): +        """ +        This method re-evaluates the proxy configuration by considering the +        environment variables. If we are redirected to a URL covered by +        NO_PROXY, we strip the proxy configuration. Otherwise, we set missing +        proxy keys for this URL (in case they were stripped by a previous +        redirect). + +        This method also replaces the Proxy-Authorization header where +        necessary. +        """ +        headers = prepared_request.headers +        url = prepared_request.url +        scheme = urlparse(url).scheme +        new_proxies = proxies.copy() if proxies is not None else {} + +        if self.trust_env and not should_bypass_proxies(url): +            environ_proxies = get_environ_proxies(url) + +            proxy = environ_proxies.get(scheme) + +            if proxy: +                new_proxies.setdefault(scheme, environ_proxies[scheme]) + +        if 'Proxy-Authorization' in headers: +            del headers['Proxy-Authorization'] + +        try: +            username, password = get_auth_from_url(new_proxies[scheme]) +        except KeyError: +            username, password = None, None + +        if username and password: +            headers['Proxy-Authorization'] = _basic_auth_str(username, password) + +        return new_proxies +  class Session(SessionRedirectMixin):      """A Requests session. @@ -320,7 +393,7 @@ class Session(SessionRedirectMixin):          :param auth: (optional) Auth tuple or callable to enable              Basic/Digest/Custom HTTP Auth.          :param timeout: (optional) Float describing the timeout of the -            request. +            request in seconds.          :param allow_redirects: (optional) Boolean. Set to True by default.          :param proxies: (optional) Dictionary mapping protocol to the URL of              the proxy. @@ -467,8 +540,7 @@ class Session(SessionRedirectMixin):          if not isinstance(request, PreparedRequest):              raise ValueError('You can only send PreparedRequests.') -        # Set up variables needed for resolve_redirects and dispatching of -        # hooks +        # Set up variables needed for resolve_redirects and dispatching of hooks          allow_redirects = kwargs.pop('allow_redirects', True)          stream = kwargs.get('stream')          timeout = kwargs.get('timeout') @@ -482,8 +554,10 @@ class Session(SessionRedirectMixin):          # Start time (approximately) of the request          start = datetime.utcnow() +          # Send the request          r = adapter.send(request, **kwargs) +          # Total elapsed time of the request (approximately)          r.elapsed = datetime.utcnow() - start @@ -492,15 +566,20 @@ class Session(SessionRedirectMixin):          # Persist cookies          if r.history: +              # If the hooks create history then we want those cookies too              for resp in r.history:                  extract_cookies_to_jar(self.cookies, resp.request, resp.raw) +          extract_cookies_to_jar(self.cookies, request, r.raw)          # Redirect resolving generator. -        gen = self.resolve_redirects(r, request, stream=stream, -                                     timeout=timeout, verify=verify, cert=cert, -                                     proxies=proxies) +        gen = self.resolve_redirects(r, request, +            stream=stream, +            timeout=timeout, +            verify=verify, +            cert=cert, +            proxies=proxies)          # Resolve redirects if allowed.          history = [resp for resp in gen] if allow_redirects else [] @@ -511,7 +590,10 @@ class Session(SessionRedirectMixin):              history.insert(0, r)              # Get the last request made              r = history.pop() -            r.history = tuple(history) +            r.history = history + +        if not stream: +            r.content          return r @@ -534,8 +616,10 @@ class Session(SessionRedirectMixin):          """Registers a connection adapter to a prefix.          Adapters are sorted in descending order by key length.""" +          self.adapters[prefix] = adapter          keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] +          for key in keys_to_move:              self.adapters[key] = self.adapters.pop(key) diff --git a/requests/structures.py b/requests/structures.py index a175913..9fd7818 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -106,8 +106,7 @@ class CaseInsensitiveDict(collections.MutableMapping):          return CaseInsensitiveDict(self._store.values())      def __repr__(self): -        return '%s(%r)' % (self.__class__.__name__, dict(self.items())) - +        return str(dict(self.items()))  class LookupDict(dict):      """Dictionary lookup object.""" diff --git a/requests/utils.py b/requests/utils.py index 7b7ff0a..68e50cf 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -24,10 +24,10 @@ from . import __version__  from . import certs  from .compat import parse_http_list as _parse_list_header  from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, -                     builtin_str, getproxies, proxy_bypass) +                     builtin_str, getproxies, proxy_bypass, urlunparse)  from .cookies import RequestsCookieJar, cookiejar_from_dict  from .structures import CaseInsensitiveDict -from .exceptions import MissingSchema, InvalidURL +from .exceptions import InvalidURL  _hush_pyflakes = (RequestsCookieJar,) @@ -61,7 +61,7 @@ def super_len(o):              return os.fstat(fileno).st_size      if hasattr(o, 'getvalue'): -        # e.g. BytesIO, cStringIO.StringI +        # e.g. BytesIO, cStringIO.StringIO          return len(o.getvalue()) @@ -466,9 +466,10 @@ def is_valid_cidr(string_network):      return True -def get_environ_proxies(url): -    """Return a dict of environment proxies.""" - +def should_bypass_proxies(url): +    """ +    Returns whether we should bypass proxies or not. +    """      get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())      # First check whether no_proxy is defined. If it is, check that the URL @@ -486,13 +487,13 @@ def get_environ_proxies(url):              for proxy_ip in no_proxy:                  if is_valid_cidr(proxy_ip):                      if address_in_network(ip, proxy_ip): -                        return {} +                        return True          else:              for host in no_proxy:                  if netloc.endswith(host) or netloc.split(':')[0].endswith(host):                      # The URL does match something in no_proxy, so we don't want                      # to apply the proxies on this URL. -                    return {} +                    return True      # If the system proxy settings indicate that this URL should be bypassed,      # don't proxy. @@ -506,12 +507,16 @@ def get_environ_proxies(url):          bypass = False      if bypass: -        return {} +        return True -    # If we get here, we either didn't have no_proxy set or we're not going -    # anywhere that no_proxy applies to, and the system settings don't require -    # bypassing the proxy for the current URL. -    return getproxies() +    return False + +def get_environ_proxies(url): +    """Return a dict of environment proxies.""" +    if should_bypass_proxies(url): +        return {} +    else: +        return getproxies()  def default_user_agent(name="python-requests"): @@ -548,7 +553,7 @@ def default_user_agent(name="python-requests"):  def default_headers():      return CaseInsensitiveDict({          'User-Agent': default_user_agent(), -        'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), +        'Accept-Encoding': ', '.join(('gzip', 'deflate')),          'Accept': '*/*'      }) @@ -622,13 +627,18 @@ def guess_json_utf(data):      return None -def except_on_missing_scheme(url): -    """Given a URL, raise a MissingSchema exception if the scheme is missing. -    """ -    scheme, netloc, path, params, query, fragment = urlparse(url) +def prepend_scheme_if_needed(url, new_scheme): +    '''Given a URL that may or may not have a scheme, prepend the given scheme. +    Does not replace a present scheme with the one provided as an argument.''' +    scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) + +    # urlparse is a finicky beast, and sometimes decides that there isn't a +    # netloc present. Assume that it's being over-cautious, and switch netloc +    # and path if urlparse decided there was no netloc. +    if not netloc: +        netloc, path = path, netloc -    if not scheme: -        raise MissingSchema('Proxy URLs must have explicit schemes.') +    return urlunparse((scheme, netloc, path, params, query, fragment))  def get_auth_from_url(url): |