From d075cc8a1294c77c994dc5fd220ecb163ed31b93 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:41:33 -0700 Subject: Imported Upstream version 2.3.0 --- requests/__init__.py | 4 +- requests/adapters.py | 24 +- requests/api.py | 2 +- requests/auth.py | 3 - requests/exceptions.py | 17 +- requests/models.py | 72 ++- requests/packages/urllib3/_collections.py | 106 +++- requests/packages/urllib3/connection.py | 84 ++- requests/packages/urllib3/connectionpool.py | 101 ++-- requests/packages/urllib3/contrib/pyopenssl.py | 62 +- requests/packages/urllib3/exceptions.py | 5 + requests/packages/urllib3/fields.py | 2 +- .../packages/ssl_match_hostname/__init__.py | 2 +- requests/packages/urllib3/response.py | 18 +- requests/packages/urllib3/util.py | 648 --------------------- requests/packages/urllib3/util/__init__.py | 27 + requests/packages/urllib3/util/connection.py | 45 ++ requests/packages/urllib3/util/request.py | 68 +++ requests/packages/urllib3/util/response.py | 13 + requests/packages/urllib3/util/ssl_.py | 133 +++++ requests/packages/urllib3/util/timeout.py | 234 ++++++++ requests/packages/urllib3/util/url.py | 162 ++++++ requests/sessions.py | 134 ++++- requests/structures.py | 3 +- requests/utils.py | 50 +- 25 files changed, 1174 insertions(+), 845 deletions(-) delete mode 100644 requests/packages/urllib3/util.py create mode 100644 requests/packages/urllib3/util/__init__.py create mode 100644 requests/packages/urllib3/util/connection.py create mode 100644 requests/packages/urllib3/util/request.py create mode 100644 requests/packages/urllib3/util/response.py create mode 100644 requests/packages/urllib3/util/ssl_.py create mode 100644 requests/packages/urllib3/util/timeout.py create mode 100644 requests/packages/urllib3/util/url.py (limited to 'requests') diff --git a/requests/__init__.py b/requests/__init__.py index 2e9f3a0..bba1900 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -42,8 +42,8 @@ is at . """ __title__ = 'requests' -__version__ = '2.2.1' -__build__ = 0x020201 +__version__ = '2.3.0' +__build__ = 0x020300 __author__ = 'Kenneth Reitz' __license__ = 'Apache 2.0' __copyright__ = 'Copyright 2014 Kenneth Reitz' diff --git a/requests/adapters.py b/requests/adapters.py index dd10e95..eb7a2d2 100644 --- a/requests/adapters.py +++ b/requests/adapters.py @@ -16,7 +16,7 @@ from .packages.urllib3.response import HTTPResponse from .packages.urllib3.util import Timeout as TimeoutSauce from .compat import urlparse, basestring, urldefrag, unquote from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - except_on_missing_scheme, get_auth_from_url) + prepend_scheme_if_needed, get_auth_from_url) from .structures import CaseInsensitiveDict from .packages.urllib3.exceptions import MaxRetryError from .packages.urllib3.exceptions import TimeoutError @@ -203,7 +203,7 @@ class HTTPAdapter(BaseAdapter): proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: - except_on_missing_scheme(proxy) + proxy = prepend_scheme_if_needed(proxy, 'http') proxy_headers = self.proxy_headers(proxy) if not proxy in self.proxy_manager: @@ -310,10 +310,7 @@ class HTTPAdapter(BaseAdapter): chunked = not (request.body is None or 'Content-Length' in request.headers) - if stream: - timeout = TimeoutSauce(connect=timeout) - else: - timeout = TimeoutSauce(connect=timeout, read=timeout) + timeout = TimeoutSauce(connect=timeout, read=timeout) try: if not chunked: @@ -372,25 +369,20 @@ class HTTPAdapter(BaseAdapter): conn._put_conn(low_conn) except socket.error as sockerr: - raise ConnectionError(sockerr) + raise ConnectionError(sockerr, request=request) except MaxRetryError as e: - raise ConnectionError(e) + raise ConnectionError(e, request=request) except _ProxyError as e: raise ProxyError(e) except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): - raise SSLError(e) + raise SSLError(e, request=request) elif isinstance(e, TimeoutError): - raise Timeout(e) + raise Timeout(e, request=request) else: raise - r = self.build_response(request, resp) - - if not stream: - r.content - - return r + return self.build_response(request, resp) diff --git a/requests/api.py b/requests/api.py index baf43dd..01d853d 100644 --- a/requests/api.py +++ b/requests/api.py @@ -26,7 +26,7 @@ def request(method, url, **kwargs): :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) Float describing the timeout of the request. + :param timeout: (optional) Float describing the timeout of the request in seconds. :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. diff --git a/requests/auth.py b/requests/auth.py index 6664cd8..9f831b7 100644 --- a/requests/auth.py +++ b/requests/auth.py @@ -11,7 +11,6 @@ import os import re import time import hashlib -import logging from base64 import b64encode @@ -19,8 +18,6 @@ from .compat import urlparse, str from .cookies import extract_cookies_to_jar from .utils import parse_dict_header -log = logging.getLogger(__name__) - CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' diff --git a/requests/exceptions.py b/requests/exceptions.py index cd3c760..a4ee9d6 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -14,15 +14,22 @@ class RequestException(IOError): """There was an ambiguous exception that occurred while handling your request.""" + def __init__(self, *args, **kwargs): + """ + Initialize RequestException with `request` and `response` objects. + """ + response = kwargs.pop('response', None) + self.response = response + self.request = kwargs.pop('request', None) + if (response is not None and not self.request and + hasattr(response, 'request')): + self.request = self.response.request + super(RequestException, self).__init__(*args, **kwargs) + class HTTPError(RequestException): """An HTTP error occurred.""" - def __init__(self, *args, **kwargs): - """ Initializes HTTPError with optional `response` object. """ - self.response = kwargs.pop('response', None) - super(HTTPError, self).__init__(*args, **kwargs) - class ConnectionError(RequestException): """A Connection error occurred.""" diff --git a/requests/models.py b/requests/models.py index ae46a83..120968f 100644 --- a/requests/models.py +++ b/requests/models.py @@ -8,7 +8,6 @@ This module contains the primary objects that power Requests. """ import collections -import logging import datetime from io import BytesIO, UnsupportedOperation @@ -31,12 +30,20 @@ from .utils import ( from .compat import ( cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, is_py2, chardet, json, builtin_str, basestring, IncompleteRead) - +from .status_codes import codes + +#: The set of HTTP status codes that indicate an automatically +#: processable redirect. +REDIRECT_STATI = ( + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_moved, # 307 +) +DEFAULT_REDIRECT_LIMIT = 30 CONTENT_CHUNK_SIZE = 10 * 1024 ITER_CHUNK_SIZE = 512 -log = logging.getLogger(__name__) - class RequestEncodingMixin(object): @property @@ -401,9 +408,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): is_stream = all([ hasattr(data, '__iter__'), - not isinstance(data, basestring), - not isinstance(data, list), - not isinstance(data, dict) + not isinstance(data, (basestring, list, tuple, dict)) ]) try: @@ -517,7 +522,7 @@ class Response(object): self._content = False self._content_consumed = False - #: Integer Code of responded HTTP Status. + #: Integer Code of responded HTTP Status, e.g. 404 or 200. self.status_code = None #: Case-insensitive Dictionary of Response Headers. @@ -541,6 +546,7 @@ class Response(object): #: up here. The list is sorted from the oldest to the most recent request. self.history = [] + #: Textual reason of responded HTTP Status, e.g. "Not Found" or "OK". self.reason = None #: A CookieJar of Cookies the server sent back. @@ -567,6 +573,7 @@ class Response(object): # pickled objects do not have .raw setattr(self, '_content_consumed', True) + setattr(self, 'raw', None) def __repr__(self): return '' % (self.status_code) @@ -591,10 +598,16 @@ class Response(object): return False return True + @property + def is_redirect(self): + """True if this Response is a well-formed HTTP redirect that could have + been processed automatically (by :meth:`Session.resolve_redirects`). + """ + return ('location' in self.headers and self.status_code in REDIRECT_STATI) + @property def apparent_encoding(self): - """The apparent encoding, provided by the lovely Charade library - (Thanks, Ian!).""" + """The apparent encoding, provided by the chardet library""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): @@ -603,17 +616,15 @@ class Response(object): large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. - """ - if self._content_consumed: - # simulate reading small chunks of the content - return iter_slices(self._content, chunk_size) + If decode_unicode is True, content will be decoded using the best + available encoding based on the response. + """ def generate(): try: # Special case for urllib3. try: - for chunk in self.raw.stream(chunk_size, - decode_content=True): + for chunk in self.raw.stream(chunk_size, decode_content=True): yield chunk except IncompleteRead as e: raise ChunkedEncodingError(e) @@ -629,12 +640,17 @@ class Response(object): self._content_consumed = True - gen = generate() + # simulate reading small chunks of the content + reused_chunks = iter_slices(self._content, chunk_size) + + stream_chunks = generate() + + chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: - gen = stream_decode_response_unicode(gen, self) + chunks = stream_decode_response_unicode(chunks, self) - return gen + return chunks def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None): """Iterates over the response data, one line at a time. When @@ -644,8 +660,7 @@ class Response(object): pending = None - for chunk in self.iter_content(chunk_size=chunk_size, - decode_unicode=decode_unicode): + for chunk in self.iter_content(chunk_size=chunk_size, decode_unicode=decode_unicode): if pending is not None: chunk = pending + chunk @@ -693,7 +708,7 @@ class Response(object): If Response.encoding is None, encoding will be guessed using ``chardet``. - The encoding of the response content is determined based soley on HTTP + The encoding of the response content is determined based solely on HTTP headers, following RFC 2616 to the letter. If you can take advantage of non-HTTP knowledge to make a better guess at the encoding, you should set ``r.encoding`` appropriately before accessing this property. @@ -737,7 +752,14 @@ class Response(object): # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: - return json.loads(self.content.decode(encoding), **kwargs) + try: + return json.loads(self.content.decode(encoding), **kwargs) + except UnicodeDecodeError: + # Wrong UTF codec detected; usually because it's not UTF-8 + # but some other 8-bit codec. This is an RFC violation, + # and the server didn't bother to tell us what codec *was* + # used. + pass return json.loads(self.text, **kwargs) @property @@ -773,8 +795,8 @@ class Response(object): raise HTTPError(http_error_msg, response=self) def close(self): - """Closes the underlying file descriptor and releases the connection - back to the pool. + """Releases the connection back to the pool. Once this method has been + called the underlying ``raw`` object must not be accessed again. *Note: Should not normally need to be called explicitly.* """ diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index 5907b0d..9cea3a4 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -4,7 +4,7 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from collections import MutableMapping +from collections import Mapping, MutableMapping try: from threading import RLock except ImportError: # Platform-specific: No threads available @@ -20,9 +20,10 @@ try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict +from .packages.six import itervalues -__all__ = ['RecentlyUsedContainer'] +__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] _Null = object() @@ -101,3 +102,104 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: return self._container.keys() + + +class HTTPHeaderDict(MutableMapping): + """ + :param headers: + An iterable of field-value pairs. Must not contain multiple field names + when compared case-insensitively. + + :param kwargs: + Additional field-value pairs to pass in to ``dict.update``. + + A ``dict`` like container for storing HTTP Headers. + + Field names are stored and compared case-insensitively in compliance with + RFC 2616. Iteration provides the first case-sensitive key seen for each + case-insensitive pair. + + Using ``__setitem__`` syntax overwrites fields that compare equal + case-insensitively in order to maintain ``dict``'s api. For fields that + compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` + in a loop. + + If multiple fields that are equal case-insensitively are passed to the + constructor or ``.update``, the behavior is undefined and some will be + lost. + + >>> headers = HTTPHeaderDict() + >>> headers.add('Set-Cookie', 'foo=bar') + >>> headers.add('set-cookie', 'baz=quxx') + >>> headers['content-length'] = '7' + >>> headers['SET-cookie'] + 'foo=bar, baz=quxx' + >>> headers['Content-Length'] + '7' + + If you want to access the raw headers with their original casing + for debugging purposes you can access the private ``._data`` attribute + which is a normal python ``dict`` that maps the case-insensitive key to a + list of tuples stored as (case-sensitive-original-name, value). Using the + structure from above as our example: + + >>> headers._data + {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], + 'content-length': [('content-length', '7')]} + """ + + def __init__(self, headers=None, **kwargs): + self._data = {} + if headers is None: + headers = {} + self.update(headers, **kwargs) + + def add(self, key, value): + """Adds a (name, value) pair, doesn't overwrite the value if it already + exists. + + >>> headers = HTTPHeaderDict(foo='bar') + >>> headers.add('Foo', 'baz') + >>> headers['foo'] + 'bar, baz' + """ + self._data.setdefault(key.lower(), []).append((key, value)) + + def getlist(self, key): + """Returns a list of all the values for the named field. Returns an + empty list if the key doesn't exist.""" + return self[key].split(', ') if key in self else [] + + def copy(self): + h = HTTPHeaderDict() + for key in self._data: + for rawkey, value in self._data[key]: + h.add(rawkey, value) + return h + + def __eq__(self, other): + if not isinstance(other, Mapping): + return False + other = HTTPHeaderDict(other) + return dict((k1, self[k1]) for k1 in self._data) == \ + dict((k2, other[k2]) for k2 in other._data) + + def __getitem__(self, key): + values = self._data[key.lower()] + return ', '.join(value[1] for value in values) + + def __setitem__(self, key, value): + self._data[key.lower()] = [(key, value)] + + def __delitem__(self, key): + del self._data[key.lower()] + + def __len__(self): + return len(self._data) + + def __iter__(self): + for headers in itervalues(self._data): + yield headers[0][0] + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, dict(self.items())) diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py index 2124774..5feb332 100644 --- a/requests/packages/urllib3/connection.py +++ b/requests/packages/urllib3/connection.py @@ -4,6 +4,7 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +import sys import socket from socket import timeout as SocketTimeout @@ -38,6 +39,7 @@ from .exceptions import ( ConnectTimeoutError, ) from .packages.ssl_match_hostname import match_hostname +from .packages import six from .util import ( assert_fingerprint, resolve_cert_reqs, @@ -53,34 +55,50 @@ port_by_scheme = { class HTTPConnection(_HTTPConnection, object): + """ + Based on httplib.HTTPConnection but provides an extra constructor + backwards-compatibility layer between older and newer Pythons. + """ + default_port = port_by_scheme['http'] # By default, disable Nagle's Algorithm. tcp_nodelay = 1 + def __init__(self, *args, **kw): + if six.PY3: # Python 3 + kw.pop('strict', None) + if sys.version_info < (2, 7): # Python 2.6 and older + kw.pop('source_address', None) + + # Pre-set source_address in case we have an older Python like 2.6. + self.source_address = kw.get('source_address') + + # Superclass also sets self.source_address in Python 2.7+. + _HTTPConnection.__init__(self, *args, **kw) + def _new_conn(self): - """ Establish a socket connection and set nodelay settings on it + """ Establish a socket connection and set nodelay settings on it. :return: a new socket connection """ - try: - conn = socket.create_connection( - (self.host, self.port), - self.timeout, - self.source_address, - ) - except AttributeError: # Python 2.6 - conn = socket.create_connection( - (self.host, self.port), - self.timeout, - ) - conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, - self.tcp_nodelay) + extra_args = [] + if self.source_address: # Python 2.7+ + extra_args.append(self.source_address) + + conn = socket.create_connection( + (self.host, self.port), self.timeout, *extra_args) + conn.setsockopt( + socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay) + return conn def _prepare_conn(self, conn): self.sock = conn - if self._tunnel_host: + # the _tunnel_host attribute was added in python 2.6.3 (via + # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do + # not have them. + if getattr(self, '_tunnel_host', None): # TODO: Fix tunnel so it doesn't depend on self.sock state. self._tunnel() @@ -93,15 +111,18 @@ class HTTPSConnection(HTTPConnection): default_port = port_by_scheme['https'] def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None): - try: - HTTPConnection.__init__(self, host, port, strict, timeout, source_address) - except TypeError: # Python 2.6 - HTTPConnection.__init__(self, host, port, strict, timeout) + strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): + + HTTPConnection.__init__(self, host, port, strict=strict, + timeout=timeout, **kw) + self.key_file = key_file self.cert_file = cert_file + # Required property for Google AppEngine 1.9.0 which otherwise causes + # HTTPS requests to go out as HTTP. (See Issue #356) + self._protocol = 'https' + def connect(self): conn = self._new_conn() self._prepare_conn(conn) @@ -116,6 +137,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs = None ca_certs = None ssl_version = None + conn_kw = {} def set_cert(self, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, @@ -130,11 +152,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): def connect(self): # Add certificate verification + try: sock = socket.create_connection( - address=(self.host, self.port), - timeout=self.timeout, - ) + address=(self.host, self.port), timeout=self.timeout, + **self.conn_kw) except SocketTimeout: raise ConnectTimeoutError( self, "Connection to %s timed out. (connect timeout=%s)" % @@ -146,21 +168,25 @@ class VerifiedHTTPSConnection(HTTPSConnection): resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) resolved_ssl_version = resolve_ssl_version(self.ssl_version) - # the _tunnel_host attribute was added in python 2.6.3 (via - # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do - # not have them. + hostname = self.host if getattr(self, '_tunnel_host', None): + # _tunnel_host was added in Python 2.6.3 + # (See: http://hg.python.org/cpython/rev/0f57b30a152f) + self.sock = sock # Calls self._set_hostport(), so self.host is # self._tunnel_host below. self._tunnel() + # Override the host with the one we're requesting data from. + hostname = self._tunnel_host + # Wrap socket using verification with the root certs in # trusted_root_certs self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, cert_reqs=resolved_cert_reqs, ca_certs=self.ca_certs, - server_hostname=self.host, + server_hostname=hostname, ssl_version=resolved_ssl_version) if resolved_cert_reqs != ssl.CERT_NONE: @@ -169,7 +195,7 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif self.assert_hostname is not False: match_hostname(self.sock.getpeercert(), - self.assert_hostname or self.host) + self.assert_hostname or hostname) if ssl: diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 243d700..95a53a7 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -4,6 +4,7 @@ # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +import sys import errno import logging @@ -19,9 +20,11 @@ except ImportError: from .exceptions import ( ClosedPoolError, + ConnectionError, ConnectTimeoutError, EmptyPoolError, HostChangedError, + LocationParseError, MaxRetryError, SSLError, TimeoutError, @@ -39,7 +42,6 @@ from .connection import ( from .request import RequestMethods from .response import HTTPResponse from .util import ( - assert_fingerprint, get_host, is_connection_dropped, Timeout, @@ -64,6 +66,9 @@ class ConnectionPool(object): QueueCls = LifoQueue def __init__(self, host, port=None): + if host is None: + raise LocationParseError(host) + # httplib doesn't like it when we include brackets in ipv6 addresses host = host.strip('[]') @@ -135,7 +140,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False, - headers=None, _proxy=None, _proxy_headers=None): + headers=None, _proxy=None, _proxy_headers=None, **conn_kw): ConnectionPool.__init__(self, host, port) RequestMethods.__init__(self, headers) @@ -162,6 +167,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections = 0 self.num_requests = 0 + if sys.version_info < (2, 7): # Python 2.6 and older + conn_kw.pop('source_address', None) + self.conn_kw = conn_kw + def _new_conn(self): """ Return a fresh :class:`HTTPConnection`. @@ -170,13 +179,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - extra_params = {} - if not six.PY3: # Python 2 - extra_params['strict'] = self.strict - conn = self.ConnectionCls(host=self.host, port=self.port, timeout=self.timeout.connect_timeout, - **extra_params) + strict=self.strict, **self.conn_kw) if self.proxy is not None: # Enable Nagle's algorithm for proxies, to avoid packet # fragmentation. @@ -238,8 +243,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): pass except Full: # This should never happen if self.block == True - log.warning("HttpConnectionPool is full, discarding connection: %s" - % self.host) + log.warning( + "Connection pool is full, discarding connection: %s" % + self.host) # Connection never got put back into the pool, close it. if conn: @@ -414,10 +420,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param retries: Number of retries to allow before raising a MaxRetryError exception. + If `False`, then retries are disabled and any exception is raised + immediately. :param redirect: If True, automatically handle redirects (status codes 301, 302, - 303, 307, 308). Each redirect counts as a retry. + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. :param assert_same_host: If ``True``, will make sure that the host of the pool requests is @@ -451,7 +460,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if headers is None: headers = self.headers - if retries < 0: + if retries < 0 and retries is not False: raise MaxRetryError(self, url) if release_conn is None: @@ -470,6 +479,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): headers = headers.copy() headers.update(self.proxy_headers) + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + try: # Request a connection from the queue conn = self._get_conn(timeout=pool_timeout) @@ -497,37 +510,40 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # ``response.read()``) except Empty: - # Timed out by queue + # Timed out by queue. raise EmptyPoolError(self, "No pool connections are available.") - except BaseSSLError as e: + except (BaseSSLError, CertificateError) as e: + # Release connection unconditionally because there is no way to + # close it externally in case of exception. + release_conn = True raise SSLError(e) - except CertificateError as e: - # Name mismatch - raise SSLError(e) + except (TimeoutError, HTTPException, SocketError) as e: + if conn: + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn.close() + conn = None - except TimeoutError as e: - # Connection broken, discard. - conn = None - # Save the error off for retry logic. - err = e + if not retries: + if isinstance(e, TimeoutError): + # TimeoutError is exempt from MaxRetryError-wrapping. + # FIXME: ... Not sure why. Add a reason here. + raise - if retries == 0: - raise + # Wrap unexpected exceptions with the most appropriate + # module-level exception and re-raise. + if isinstance(e, SocketError) and self.proxy: + raise ProxyError('Cannot connect to proxy.', e) - except (HTTPException, SocketError) as e: - # Connection broken, discard. It will be replaced next _get_conn(). - conn = None - # This is necessary so we can access e below - err = e + if retries is False: + raise ConnectionError('Connection failed.', e) - if retries == 0: - if isinstance(e, SocketError) and self.proxy is not None: - raise ProxyError('Cannot connect to proxy. ' - 'Socket error: %s.' % e) - else: - raise MaxRetryError(self, url, e) + raise MaxRetryError(self, url, e) + + # Keep track of the error for the retry warning. + err = e finally: if release_conn: @@ -538,8 +554,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): if not conn: # Try again - log.warn("Retrying (%d attempts remain) after connection " - "broken by '%r': %s" % (retries, err, url)) + log.warning("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, err, url)) return self.urlopen(method, url, body, headers, retries - 1, redirect, assert_same_host, timeout=timeout, pool_timeout=pool_timeout, @@ -547,7 +563,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Handle redirect? redirect_location = redirect and response.get_redirect_location() - if redirect_location: + if redirect_location and retries is not False: if response.status == 303: method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) @@ -586,10 +602,14 @@ class HTTPSConnectionPool(HTTPConnectionPool): _proxy=None, _proxy_headers=None, key_file=None, cert_file=None, cert_reqs=None, ca_certs=None, ssl_version=None, - assert_hostname=None, assert_fingerprint=None): + assert_hostname=None, assert_fingerprint=None, + **conn_kw): + + if sys.version_info < (2, 7): # Python 2.6 or older + conn_kw.pop('source_address', None) HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, - block, headers, _proxy, _proxy_headers) + block, headers, _proxy, _proxy_headers, **conn_kw) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -597,6 +617,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): self.ssl_version = ssl_version self.assert_hostname = assert_hostname self.assert_fingerprint = assert_fingerprint + self.conn_kw = conn_kw def _prepare_conn(self, conn): """ @@ -612,6 +633,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_hostname=self.assert_hostname, assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version + conn.conn_kw = self.conn_kw if self.proxy is not None: # Python 2.7+ @@ -648,6 +670,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): extra_params = {} if not six.PY3: # Python 2 extra_params['strict'] = self.strict + extra_params.update(self.conn_kw) conn = self.ConnectionCls(host=actual_host, port=actual_port, timeout=self.timeout.connect_timeout, diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index d9bda15..21a12c6 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -1,4 +1,7 @@ -'''SSL with SNI_-support for Python 2. +'''SSL with SNI_-support for Python 2. Follow these instructions if you would +like to verify SSL certificates in Python 2. Note, the default libraries do +*not* do certificate checking; you need to do additional work to validate +certificates yourself. This needs the following packages installed: @@ -6,9 +9,15 @@ This needs the following packages installed: * ndg-httpsclient (tested with 0.3.2) * pyasn1 (tested with 0.1.6) -To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. -This can be done in a ``sitecustomize`` module, or at any other time before -your application begins using ``urllib3``, like this:: +You can install them with the following command: + + pip install pyopenssl ndg-httpsclient pyasn1 + +To activate certificate checking, call +:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code +before you begin making HTTP requests. This can be done in a ``sitecustomize`` +module, or at any other time before your application begins using ``urllib3``, +like this:: try: import urllib3.contrib.pyopenssl @@ -29,9 +38,8 @@ Module Variables ---------------- :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - Default: ``EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA256 - EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA EECDH RC4 !aNULL !eNULL !LOW !3DES - !MD5 !EXP !PSK !SRP !DSS'`` + Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: + ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) @@ -43,7 +51,7 @@ from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName import OpenSSL.SSL from pyasn1.codec.der import decoder as der_decoder from pyasn1.type import univ, constraint -from socket import _fileobject +from socket import _fileobject, timeout import ssl import select from cStringIO import StringIO @@ -69,12 +77,22 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -# Default SSL/TLS cipher list. -# Recommendation by https://community.qualys.com/blogs/securitylabs/2013/08/05/ -# configuring-apache-nginx-and-openssl-for-forward-secrecy -DEFAULT_SSL_CIPHER_LIST = 'EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM ' + \ - 'EECDH+ECDSA+SHA256 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EDH+aRSA ' + \ - 'EECDH RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS' +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ + "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ + "!aNULL:!MD5:!DSS" orig_util_HAS_SNI = util.HAS_SNI @@ -139,6 +157,13 @@ def get_subj_alt_name(peer_cert): class fileobject(_fileobject): + def _wait_for_sock(self): + rd, wd, ed = select.select([self._sock], [], [], + self._sock.gettimeout()) + if not rd: + raise timeout() + + def read(self, size=-1): # Use max, disallow tiny reads in a loop as they are very inefficient. # We never leave read() with any leftover data from a new recv() call @@ -156,6 +181,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -183,6 +209,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(left) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -234,6 +261,7 @@ class fileobject(_fileobject): break buffers.append(data) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue break return "".join(buffers) @@ -244,6 +272,7 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: + self._wait_for_sock() continue if not data: break @@ -271,7 +300,8 @@ class fileobject(_fileobject): try: data = self._sock.recv(self._rbufsize) except OpenSSL.SSL.WantReadError: - continue + self._wait_for_sock() + continue if not data: break left = size - buf_len @@ -366,6 +396,8 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ctx.load_verify_locations(ca_certs, None) except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + else: + ctx.set_default_verify_paths() # Disable TLS compression to migitate CRIME attack (issue #309) OP_NO_COMPRESSION = 0x20000 diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 98ef9ab..b4df831 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -44,6 +44,11 @@ class ProxyError(HTTPError): pass +class ConnectionError(HTTPError): + "Raised when a normal connection fails." + pass + + class DecodeError(HTTPError): "Raised when automatic decoding based on Content-Type fails." pass diff --git a/requests/packages/urllib3/fields.py b/requests/packages/urllib3/fields.py index ed01765..da79e92 100644 --- a/requests/packages/urllib3/fields.py +++ b/requests/packages/urllib3/fields.py @@ -15,7 +15,7 @@ def guess_content_type(filename, default='application/octet-stream'): Guess the "Content-Type" of a file. :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetimes`. + The filename to guess the "Content-Type" of using :mod:`mimetypes`. :param default: If no "Content-Type" can be guessed, default to `default`. """ diff --git a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py index 3aa5b2e..dd59a75 100644 --- a/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ b/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py @@ -7,7 +7,7 @@ except ImportError: from backports.ssl_match_hostname import CertificateError, match_hostname except ImportError: # Our vendored copy - from _implementation import CertificateError, match_hostname + from ._implementation import CertificateError, match_hostname # Not needed, but documenting what we provide. __all__ = ('CertificateError', 'match_hostname') diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index 6a1fe1a..db44182 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -9,6 +9,7 @@ import logging import zlib import io +from ._collections import HTTPHeaderDict from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type from .util import is_fp_closed @@ -79,7 +80,10 @@ class HTTPResponse(io.IOBase): def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = headers or {} + + self.headers = HTTPHeaderDict() + if headers: + self.headers.update(headers) self.status = status self.version = version self.reason = reason @@ -249,17 +253,9 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ - # Normalize headers between different versions of Python - headers = {} + headers = HTTPHeaderDict() for k, v in r.getheaders(): - # Python 3: Header keys are returned capitalised - k = k.lower() - - has_value = headers.get(k) - if has_value: # Python 3: Repeating header keys are unmerged. - v = ', '.join([has_value, v]) - - headers[k] = v + headers.add(k, v) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) diff --git a/requests/packages/urllib3/util.py b/requests/packages/urllib3/util.py deleted file mode 100644 index bd26631..0000000 --- a/requests/packages/urllib3/util.py +++ /dev/null @@ -1,648 +0,0 @@ -# urllib3/util.py -# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) -# -# This module is part of urllib3 and is released under -# the MIT License: http://www.opensource.org/licenses/mit-license.php - - -from base64 import b64encode -from binascii import hexlify, unhexlify -from collections import namedtuple -from hashlib import md5, sha1 -from socket import error as SocketError, _GLOBAL_DEFAULT_TIMEOUT -import time - -try: - from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms - poll = False - try: - from select import select - except ImportError: # `select` doesn't exist on AppEngine. - select = False - -try: # Test for SSL features - SSLContext = None - HAS_SNI = False - - import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - -from .packages import six -from .exceptions import LocationParseError, SSLError, TimeoutStateError - - -_Default = object() -# The default timeout to use for socket connections. This is the attribute used -# by httplib to define the default timeout - - -def current_time(): - """ - Retrieve the current time, this function is mocked out in unit testing. - """ - return time.time() - - -class Timeout(object): - """ - Utility object for storing timeout values. - - Example usage: - - .. code-block:: python - - timeout = urllib3.util.Timeout(connect=2.0, read=7.0) - pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) - pool.request(...) # Etc, etc - - :param connect: - The maximum amount of time to wait for a connection attempt to a server - to succeed. Omitting the parameter will default the connect timeout to - the system default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout for connection attempts. - - :type connect: integer, float, or None - - :param read: - The maximum amount of time to wait between consecutive - read operations for a response from the server. Omitting - the parameter will default the read timeout to the system - default, probably `the global default timeout in socket.py - `_. - None will set an infinite timeout. - - :type read: integer, float, or None - - :param total: - This combines the connect and read timeouts into one; the read timeout - will be set to the time leftover from the connect attempt. In the - event that both a connect timeout and a total are specified, or a read - timeout and a total are specified, the shorter timeout will be applied. - - Defaults to None. - - :type total: integer, float, or None - - .. note:: - - Many factors can affect the total amount of time for urllib3 to return - an HTTP response. Specifically, Python's DNS resolver does not obey the - timeout specified on the socket. Other factors that can affect total - request time include high CPU load, high swap, the program running at a - low priority level, or other behaviors. The observed running time for - urllib3 to return a response may be greater than the value passed to - `total`. - - In addition, the read and total timeouts only measure the time between - read operations on the socket connecting the client and the server, - not the total amount of time for the request to return a complete - response. For most requests, the timeout is raised because the server - has not sent the first byte in the specified time. This is not always - the case; if a server streams one byte every fifteen seconds, a timeout - of 20 seconds will not ever trigger, even though the request will - take several minutes to complete. - - If your goal is to cut off any request after a set amount of wall clock - time, consider having a second "watcher" thread to cut off a slow - request. - """ - - #: A sentinel object representing the default timeout value - DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT - - def __init__(self, total=None, connect=_Default, read=_Default): - self._connect = self._validate_timeout(connect, 'connect') - self._read = self._validate_timeout(read, 'read') - self.total = self._validate_timeout(total, 'total') - self._start_connect = None - - def __str__(self): - return '%s(connect=%r, read=%r, total=%r)' % ( - type(self).__name__, self._connect, self._read, self.total) - - - @classmethod - def _validate_timeout(cls, value, name): - """ Check that a timeout attribute is valid - - :param value: The timeout value to validate - :param name: The name of the timeout attribute to validate. This is used - for clear error messages - :return: the value - :raises ValueError: if the type is not an integer or a float, or if it - is a numeric value less than zero - """ - if value is _Default: - return cls.DEFAULT_TIMEOUT - - if value is None or value is cls.DEFAULT_TIMEOUT: - return value - - try: - float(value) - except (TypeError, ValueError): - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - try: - if value < 0: - raise ValueError("Attempted to set %s timeout to %s, but the " - "timeout cannot be set to a value less " - "than 0." % (name, value)) - except TypeError: # Python 3 - raise ValueError("Timeout value %s was %s, but it must be an " - "int or float." % (name, value)) - - return value - - @classmethod - def from_float(cls, timeout): - """ Create a new Timeout from a legacy timeout value. - - The timeout value used by httplib.py sets the same timeout on the - connect(), and recv() socket requests. This creates a :class:`Timeout` - object that sets the individual timeouts to the ``timeout`` value passed - to this function. - - :param timeout: The legacy timeout value - :type timeout: integer, float, sentinel default object, or None - :return: a Timeout object - :rtype: :class:`Timeout` - """ - return Timeout(read=timeout, connect=timeout) - - def clone(self): - """ Create a copy of the timeout object - - Timeout properties are stored per-pool but each request needs a fresh - Timeout object to ensure each one has its own start/stop configured. - - :return: a copy of the timeout object - :rtype: :class:`Timeout` - """ - # We can't use copy.deepcopy because that will also create a new object - # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to - # detect the user default. - return Timeout(connect=self._connect, read=self._read, - total=self.total) - - def start_connect(self): - """ Start the timeout clock, used during a connect() attempt - - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to start a timer that has been started already. - """ - if self._start_connect is not None: - raise TimeoutStateError("Timeout timer has already been started.") - self._start_connect = current_time() - return self._start_connect - - def get_connect_duration(self): - """ Gets the time elapsed since the call to :meth:`start_connect`. - - :return: the elapsed time - :rtype: float - :raises urllib3.exceptions.TimeoutStateError: if you attempt - to get duration for a timer that hasn't been started. - """ - if self._start_connect is None: - raise TimeoutStateError("Can't get connect duration for timer " - "that has not started.") - return current_time() - self._start_connect - - @property - def connect_timeout(self): - """ Get the value to use when setting a connection timeout. - - This will be a positive float or integer, the value None - (never timeout), or the default system timeout. - - :return: the connect timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - """ - if self.total is None: - return self._connect - - if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: - return self.total - - return min(self._connect, self.total) - - @property - def read_timeout(self): - """ Get the value for the read timeout. - - This assumes some time has elapsed in the connection timeout and - computes the read timeout appropriately. - - If self.total is set, the read timeout is dependent on the amount of - time taken by the connect timeout. If the connection time has not been - established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be - raised. - - :return: the value to use for the read timeout - :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None - :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` - has not yet been called on this object. - """ - if (self.total is not None and - self.total is not self.DEFAULT_TIMEOUT and - self._read is not None and - self._read is not self.DEFAULT_TIMEOUT): - # in case the connect timeout has not yet been established. - if self._start_connect is None: - return self._read - return max(0, min(self.total - self.get_connect_duration(), - self._read)) - elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: - return max(0, self.total - self.get_connect_duration()) - else: - return self._read - - -class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): - """ - Datastructure for representing an HTTP URL. Used as a return value for - :func:`parse_url`. - """ - slots = () - - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) - - @property - def hostname(self): - """For backwards-compatibility with urlparse. We're nice like that.""" - return self.host - - @property - def request_uri(self): - """Absolute path including the query string.""" - uri = self.path or '/' - - if self.query is not None: - uri += '?' + self.query - - return uri - - @property - def netloc(self): - """Network location including host and port""" - if self.port: - return '%s:%d' % (self.host, self.port) - return self.host - - -def split_first(s, delims): - """ - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example: :: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) - - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, '', None - - return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): - """ - Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is - performed to parse incomplete urls. Fields not provided will be None. - - Partly backwards-compatible with :mod:`urlparse`. - - Example: :: - - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) - """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. - if not port.isdigit(): - raise LocationParseError("Failed to parse: %s" % url) - port = int(port) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None - - elif not host and url: - host = url - - if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) - - -def get_host(url): - """ - Deprecated. Use :func:`.parse_url` instead. - """ - p = parse_url(url) - return p.scheme or 'http', p.hostname, p.port - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. - - Example: :: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = 'gzip,deflate' - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(six.b(basic_auth)).decode('utf-8') - - if proxy_basic_auth: - headers['proxy-authorization'] = 'Basic ' + \ - b64encode(six.b(proxy_basic_auth)).decode('utf-8') - - return headers - - -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`httplib.HTTPConnection` object. - - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. - """ - sock = getattr(conn, 'sock', False) - if not sock: # Platform-specific: AppEngine - return False - - if not poll: - if not select: # Platform-specific: AppEngine - return False - - try: - return select([sock], [], [], 0.0)[0] - except SocketError: - return True - - # This version is better on platforms that support it. - p = poll() - p.register(sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True - - -def resolve_cert_reqs(candidate): - """ - Resolves the argument to a numeric constant, which can be passed to - the wrap_socket function/method from the ssl module. - Defaults to :data:`ssl.CERT_NONE`. - If given a string it is assumed to be the name of the constant in the - :mod:`ssl` module or its abbrevation. - (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. - If it's neither `None` nor a string we assume it is already the numeric - constant which can directly be passed to wrap_socket. - """ - if candidate is None: - return CERT_NONE - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'CERT_' + candidate) - return res - - return candidate - - -def resolve_ssl_version(candidate): - """ - like resolve_cert_reqs - """ - if candidate is None: - return PROTOCOL_SSLv23 - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'PROTOCOL_' + candidate) - return res - - return candidate - - -def assert_fingerprint(cert, fingerprint): - """ - Checks if given fingerprint matches the supplied certificate. - - :param cert: - Certificate as bytes object. - :param fingerprint: - Fingerprint as string of hexdigits, can be interspersed by colons. - """ - - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1 - } - - fingerprint = fingerprint.replace(':', '').lower() - - digest_length, rest = divmod(len(fingerprint), 2) - - if rest or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') - - # We need encode() here for py32; works on py2 and p33. - fingerprint_bytes = unhexlify(fingerprint.encode()) - - hashfunc = hashfunc_map[digest_length] - - cert_digest = hashfunc(cert).digest() - - if not cert_digest == fingerprint_bytes: - raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) - -def is_fp_closed(obj): - """ - Checks whether a given file-like object is closed. - - :param obj: - The file-like object to check. - """ - if hasattr(obj, 'fp'): - # Object is a container for another file-like object that gets released - # on exhaustion (e.g. HTTPResponse) - return obj.fp is None - - return obj.closed - - -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError - raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) diff --git a/requests/packages/urllib3/util/__init__.py b/requests/packages/urllib3/util/__init__.py new file mode 100644 index 0000000..a40185e --- /dev/null +++ b/requests/packages/urllib3/util/__init__.py @@ -0,0 +1,27 @@ +# urllib3/util/__init__.py +# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from .connection import is_connection_dropped +from .request import make_headers +from .response import is_fp_closed +from .ssl_ import ( + SSLContext, + HAS_SNI, + assert_fingerprint, + resolve_cert_reqs, + resolve_ssl_version, + ssl_wrap_socket, +) +from .timeout import ( + current_time, + Timeout, +) +from .url import ( + get_host, + parse_url, + split_first, + Url, +) diff --git a/requests/packages/urllib3/util/connection.py b/requests/packages/urllib3/util/connection.py new file mode 100644 index 0000000..8deeab5 --- /dev/null +++ b/requests/packages/urllib3/util/connection.py @@ -0,0 +1,45 @@ +from socket import error as SocketError +try: + from select import poll, POLLIN +except ImportError: # `poll` doesn't exist on OSX and other platforms + poll = False + try: + from select import select + except ImportError: # `select` doesn't exist on AppEngine. + select = False + +def is_connection_dropped(conn): # Platform-specific + """ + Returns True if the connection is dropped and should be closed. + + :param conn: + :class:`httplib.HTTPConnection` object. + + Note: For platforms like AppEngine, this will always return ``False`` to + let the platform handle connection recycling transparently for us. + """ + sock = getattr(conn, 'sock', False) + if sock is False: # Platform-specific: AppEngine + return False + if sock is None: # Connection already closed (such as by httplib). + return False + + if not poll: + if not select: # Platform-specific: AppEngine + return False + + try: + return select([sock], [], [], 0.0)[0] + except SocketError: + return True + + # This version is better on platforms that support it. + p = poll() + p.register(sock, POLLIN) + for (fno, ev) in p.poll(0.0): + if fno == sock.fileno(): + # Either data is buffered (bad), or the connection is dropped. + return True + + + diff --git a/requests/packages/urllib3/util/request.py b/requests/packages/urllib3/util/request.py new file mode 100644 index 0000000..d48d651 --- /dev/null +++ b/requests/packages/urllib3/util/request.py @@ -0,0 +1,68 @@ +from base64 import b64encode + +from ..packages import six + + +ACCEPT_ENCODING = 'gzip,deflate' + + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None, proxy_basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + :param proxy_basic_auth: + Colon-separated username:password string for 'proxy-authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = ACCEPT_ENCODING + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + b64encode(six.b(basic_auth)).decode('utf-8') + + if proxy_basic_auth: + headers['proxy-authorization'] = 'Basic ' + \ + b64encode(six.b(proxy_basic_auth)).decode('utf-8') + + return headers + + diff --git a/requests/packages/urllib3/util/response.py b/requests/packages/urllib3/util/response.py new file mode 100644 index 0000000..d0325bc --- /dev/null +++ b/requests/packages/urllib3/util/response.py @@ -0,0 +1,13 @@ +def is_fp_closed(obj): + """ + Checks whether a given file-like object is closed. + + :param obj: + The file-like object to check. + """ + if hasattr(obj, 'fp'): + # Object is a container for another file-like object that gets released + # on exhaustion (e.g. HTTPResponse) + return obj.fp is None + + return obj.closed diff --git a/requests/packages/urllib3/util/ssl_.py b/requests/packages/urllib3/util/ssl_.py new file mode 100644 index 0000000..dee4b87 --- /dev/null +++ b/requests/packages/urllib3/util/ssl_.py @@ -0,0 +1,133 @@ +from binascii import hexlify, unhexlify +from hashlib import md5, sha1 + +from ..exceptions import SSLError + + +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + + # Disable TLS compression to migitate CRIME attack (issue #309) + OP_NO_COMPRESSION = 0x20000 + context.options |= OP_NO_COMPRESSION + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) diff --git a/requests/packages/urllib3/util/timeout.py b/requests/packages/urllib3/util/timeout.py new file mode 100644 index 0000000..4f947cb --- /dev/null +++ b/requests/packages/urllib3/util/timeout.py @@ -0,0 +1,234 @@ +from socket import _GLOBAL_DEFAULT_TIMEOUT +import time + +from ..exceptions import TimeoutStateError + + +def current_time(): + """ + Retrieve the current time, this function is mocked out in unit testing. + """ + return time.time() + + +_Default = object() +# The default timeout to use for socket connections. This is the attribute used +# by httplib to define the default timeout + + +class Timeout(object): + """ + Utility object for storing timeout values. + + Example usage: + + .. code-block:: python + + timeout = urllib3.util.Timeout(connect=2.0, read=7.0) + pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout) + pool.request(...) # Etc, etc + + :param connect: + The maximum amount of time to wait for a connection attempt to a server + to succeed. Omitting the parameter will default the connect timeout to + the system default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout for connection attempts. + + :type connect: integer, float, or None + + :param read: + The maximum amount of time to wait between consecutive + read operations for a response from the server. Omitting + the parameter will default the read timeout to the system + default, probably `the global default timeout in socket.py + `_. + None will set an infinite timeout. + + :type read: integer, float, or None + + :param total: + This combines the connect and read timeouts into one; the read timeout + will be set to the time leftover from the connect attempt. In the + event that both a connect timeout and a total are specified, or a read + timeout and a total are specified, the shorter timeout will be applied. + + Defaults to None. + + :type total: integer, float, or None + + .. note:: + + Many factors can affect the total amount of time for urllib3 to return + an HTTP response. Specifically, Python's DNS resolver does not obey the + timeout specified on the socket. Other factors that can affect total + request time include high CPU load, high swap, the program running at a + low priority level, or other behaviors. The observed running time for + urllib3 to return a response may be greater than the value passed to + `total`. + + In addition, the read and total timeouts only measure the time between + read operations on the socket connecting the client and the server, + not the total amount of time for the request to return a complete + response. For most requests, the timeout is raised because the server + has not sent the first byte in the specified time. This is not always + the case; if a server streams one byte every fifteen seconds, a timeout + of 20 seconds will not ever trigger, even though the request will + take several minutes to complete. + + If your goal is to cut off any request after a set amount of wall clock + time, consider having a second "watcher" thread to cut off a slow + request. + """ + + #: A sentinel object representing the default timeout value + DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT + + def __init__(self, total=None, connect=_Default, read=_Default): + self._connect = self._validate_timeout(connect, 'connect') + self._read = self._validate_timeout(read, 'read') + self.total = self._validate_timeout(total, 'total') + self._start_connect = None + + def __str__(self): + return '%s(connect=%r, read=%r, total=%r)' % ( + type(self).__name__, self._connect, self._read, self.total) + + + @classmethod + def _validate_timeout(cls, value, name): + """ Check that a timeout attribute is valid + + :param value: The timeout value to validate + :param name: The name of the timeout attribute to validate. This is used + for clear error messages + :return: the value + :raises ValueError: if the type is not an integer or a float, or if it + is a numeric value less than zero + """ + if value is _Default: + return cls.DEFAULT_TIMEOUT + + if value is None or value is cls.DEFAULT_TIMEOUT: + return value + + try: + float(value) + except (TypeError, ValueError): + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + try: + if value < 0: + raise ValueError("Attempted to set %s timeout to %s, but the " + "timeout cannot be set to a value less " + "than 0." % (name, value)) + except TypeError: # Python 3 + raise ValueError("Timeout value %s was %s, but it must be an " + "int or float." % (name, value)) + + return value + + @classmethod + def from_float(cls, timeout): + """ Create a new Timeout from a legacy timeout value. + + The timeout value used by httplib.py sets the same timeout on the + connect(), and recv() socket requests. This creates a :class:`Timeout` + object that sets the individual timeouts to the ``timeout`` value passed + to this function. + + :param timeout: The legacy timeout value + :type timeout: integer, float, sentinel default object, or None + :return: a Timeout object + :rtype: :class:`Timeout` + """ + return Timeout(read=timeout, connect=timeout) + + def clone(self): + """ Create a copy of the timeout object + + Timeout properties are stored per-pool but each request needs a fresh + Timeout object to ensure each one has its own start/stop configured. + + :return: a copy of the timeout object + :rtype: :class:`Timeout` + """ + # We can't use copy.deepcopy because that will also create a new object + # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to + # detect the user default. + return Timeout(connect=self._connect, read=self._read, + total=self.total) + + def start_connect(self): + """ Start the timeout clock, used during a connect() attempt + + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to start a timer that has been started already. + """ + if self._start_connect is not None: + raise TimeoutStateError("Timeout timer has already been started.") + self._start_connect = current_time() + return self._start_connect + + def get_connect_duration(self): + """ Gets the time elapsed since the call to :meth:`start_connect`. + + :return: the elapsed time + :rtype: float + :raises urllib3.exceptions.TimeoutStateError: if you attempt + to get duration for a timer that hasn't been started. + """ + if self._start_connect is None: + raise TimeoutStateError("Can't get connect duration for timer " + "that has not started.") + return current_time() - self._start_connect + + @property + def connect_timeout(self): + """ Get the value to use when setting a connection timeout. + + This will be a positive float or integer, the value None + (never timeout), or the default system timeout. + + :return: the connect timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + """ + if self.total is None: + return self._connect + + if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: + return self.total + + return min(self._connect, self.total) + + @property + def read_timeout(self): + """ Get the value for the read timeout. + + This assumes some time has elapsed in the connection timeout and + computes the read timeout appropriately. + + If self.total is set, the read timeout is dependent on the amount of + time taken by the connect timeout. If the connection time has not been + established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be + raised. + + :return: the value to use for the read timeout + :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None + :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` + has not yet been called on this object. + """ + if (self.total is not None and + self.total is not self.DEFAULT_TIMEOUT and + self._read is not None and + self._read is not self.DEFAULT_TIMEOUT): + # in case the connect timeout has not yet been established. + if self._start_connect is None: + return self._read + return max(0, min(self.total - self.get_connect_duration(), + self._read)) + elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: + return max(0, self.total - self.get_connect_duration()) + else: + return self._read diff --git a/requests/packages/urllib3/util/url.py b/requests/packages/urllib3/util/url.py new file mode 100644 index 0000000..362d216 --- /dev/null +++ b/requests/packages/urllib3/util/url.py @@ -0,0 +1,162 @@ +from collections import namedtuple + +from ..exceptions import LocationParseError + + +class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): + """ + Datastructure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. + """ + slots = () + + def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self): + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self): + """Absolute path including the query string.""" + uri = self.path or '/' + + if self.query is not None: + uri += '?' + self.query + + return uri + + @property + def netloc(self): + """Network location including host and port""" + if self.port: + return '%s:%d' % (self.host, self.port) + return self.host + + +def split_first(s, delims): + """ + Given a string and an iterable of delimiters, split on the first found + delimiter. Return two split parts and the matched delimiter. + + If not found, then the first part is the full input string. + + Example: :: + + >>> split_first('foo/bar?baz', '?/=') + ('foo', 'bar?baz', '/') + >>> split_first('foo/bar?baz', '123') + ('foo/bar?baz', '', None) + + Scales linearly with number of delims. Not ideal for large number of delims. + """ + min_idx = None + min_delim = None + for d in delims: + idx = s.find(d) + if idx < 0: + continue + + if min_idx is None or idx < min_idx: + min_idx = idx + min_delim = d + + if min_idx is None or min_idx < 0: + return s, '', None + + return s[:min_idx], s[min_idx+1:], min_delim + + +def parse_url(url): + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + + Partly backwards-compatible with :mod:`urlparse`. + + Example: :: + + >>> parse_url('http://google.com/mail/') + Url(scheme='http', host='google.com', port=None, path='/', ...) + >>> parse_url('google.com:80') + Url(scheme=None, host='google.com', port=80, path=None, ...) + >>> parse_url('/foo?bar') + Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + + # While this code has overlap with stdlib's urlparse, it is much + # simplified for our needs and less annoying. + # Additionally, this implementations does silly things to be optimal + # on CPython. + + scheme = None + auth = None + host = None + port = None + path = None + fragment = None + query = None + + # Scheme + if '://' in url: + scheme, url = url.split('://', 1) + + # Find the earliest Authority Terminator + # (http://tools.ietf.org/html/rfc3986#section-3.2) + url, path_, delim = split_first(url, ['/', '?', '#']) + + if delim: + # Reassemble the path + path = delim + path_ + + # Auth + if '@' in url: + # Last '@' denotes end of auth part + auth, url = url.rsplit('@', 1) + + # IPv6 + if url and url[0] == '[': + host, url = url.split(']', 1) + host += ']' + + # Port + if ':' in url: + _host, port = url.split(':', 1) + + if not host: + host = _host + + if port: + # If given, ports must be integers. + if not port.isdigit(): + raise LocationParseError(url) + port = int(port) + else: + # Blank ports are cool, too. (rfc3986#section-3.2.3) + port = None + + elif not host and url: + host = url + + if not path: + return Url(scheme, auth, host, port, path, query, fragment) + + # Fragment + if '#' in path: + path, fragment = path.split('#', 1) + + # Query + if '?' in path: + path, query = path.split('?', 1) + + return Url(scheme, auth, host, port, path, query, fragment) + + +def get_host(url): + """ + Deprecated. Use :func:`.parse_url` instead. + """ + p = parse_url(url) + return p.scheme or 'http', p.hostname, p.port diff --git a/requests/sessions.py b/requests/sessions.py index db227ca..df85a25 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -12,27 +12,28 @@ import os from collections import Mapping from datetime import datetime +from .auth import _basic_auth_str from .compat import cookielib, OrderedDict, urljoin, urlparse, builtin_str from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) -from .models import Request, PreparedRequest +from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook -from .utils import to_key_val_list, default_headers -from .exceptions import TooManyRedirects, InvalidSchema +from .utils import to_key_val_list, default_headers, to_native_string +from .exceptions import ( + TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter -from .utils import requote_uri, get_environ_proxies, get_netrc_auth +from .utils import ( + requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, + get_auth_from_url +) from .status_codes import codes -REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_moved, # 307 -) -DEFAULT_REDIRECT_LIMIT = 30 + +# formerly defined here, reexposed here for backward compatibility +from .models import REDIRECT_STATI def merge_setting(request_setting, session_setting, dict_class=OrderedDict): @@ -63,6 +64,8 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): if v is None: del merged_setting[k] + merged_setting = dict((k, v) for (k, v) in merged_setting.items() if v is not None) + return merged_setting @@ -89,11 +92,13 @@ class SessionRedirectMixin(object): i = 0 - # ((resp.status_code is codes.see_other)) - while ('location' in resp.headers and resp.status_code in REDIRECT_STATI): + while resp.is_redirect: prepared_request = req.copy() - resp.content # Consume socket so it can be released + try: + resp.content # Consume socket so it can be released + except (ChunkedEncodingError, ContentDecodingError, RuntimeError): + resp.raw.read(decode_content=False) if i >= self.max_redirects: raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects) @@ -121,7 +126,7 @@ class SessionRedirectMixin(object): else: url = requote_uri(url) - prepared_request.url = url + prepared_request.url = to_native_string(url) # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if (resp.status_code == codes.see_other and @@ -153,13 +158,19 @@ class SessionRedirectMixin(object): except KeyError: pass - extract_cookies_to_jar(prepared_request._cookies, - prepared_request, resp.raw) + extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) prepared_request._cookies.update(self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) + # Rebuild auth and proxy information. + proxies = self.rebuild_proxies(prepared_request, proxies) + self.rebuild_auth(prepared_request, resp) + + # Override the original request. + req = prepared_request + resp = self.send( - prepared_request, + req, stream=stream, timeout=timeout, verify=verify, @@ -173,6 +184,68 @@ class SessionRedirectMixin(object): i += 1 yield resp + def rebuild_auth(self, prepared_request, response): + """ + When being redirected we may want to strip authentication from the + request to avoid leaking credentials. This method intelligently removes + and reapplies authentication where possible to avoid credential loss. + """ + headers = prepared_request.headers + url = prepared_request.url + + if 'Authorization' in headers: + # If we get redirected to a new host, we should strip out any + # authentication headers. + original_parsed = urlparse(response.request.url) + redirect_parsed = urlparse(url) + + if (original_parsed.hostname != redirect_parsed.hostname): + del headers['Authorization'] + + # .netrc might have more auth for us on our new host. + new_auth = get_netrc_auth(url) if self.trust_env else None + if new_auth is not None: + prepared_request.prepare_auth(new_auth) + + return + + def rebuild_proxies(self, prepared_request, proxies): + """ + This method re-evaluates the proxy configuration by considering the + environment variables. If we are redirected to a URL covered by + NO_PROXY, we strip the proxy configuration. Otherwise, we set missing + proxy keys for this URL (in case they were stripped by a previous + redirect). + + This method also replaces the Proxy-Authorization header where + necessary. + """ + headers = prepared_request.headers + url = prepared_request.url + scheme = urlparse(url).scheme + new_proxies = proxies.copy() if proxies is not None else {} + + if self.trust_env and not should_bypass_proxies(url): + environ_proxies = get_environ_proxies(url) + + proxy = environ_proxies.get(scheme) + + if proxy: + new_proxies.setdefault(scheme, environ_proxies[scheme]) + + if 'Proxy-Authorization' in headers: + del headers['Proxy-Authorization'] + + try: + username, password = get_auth_from_url(new_proxies[scheme]) + except KeyError: + username, password = None, None + + if username and password: + headers['Proxy-Authorization'] = _basic_auth_str(username, password) + + return new_proxies + class Session(SessionRedirectMixin): """A Requests session. @@ -320,7 +393,7 @@ class Session(SessionRedirectMixin): :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) Float describing the timeout of the - request. + request in seconds. :param allow_redirects: (optional) Boolean. Set to True by default. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. @@ -467,8 +540,7 @@ class Session(SessionRedirectMixin): if not isinstance(request, PreparedRequest): raise ValueError('You can only send PreparedRequests.') - # Set up variables needed for resolve_redirects and dispatching of - # hooks + # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') timeout = kwargs.get('timeout') @@ -482,8 +554,10 @@ class Session(SessionRedirectMixin): # Start time (approximately) of the request start = datetime.utcnow() + # Send the request r = adapter.send(request, **kwargs) + # Total elapsed time of the request (approximately) r.elapsed = datetime.utcnow() - start @@ -492,15 +566,20 @@ class Session(SessionRedirectMixin): # Persist cookies if r.history: + # If the hooks create history then we want those cookies too for resp in r.history: extract_cookies_to_jar(self.cookies, resp.request, resp.raw) + extract_cookies_to_jar(self.cookies, request, r.raw) # Redirect resolving generator. - gen = self.resolve_redirects(r, request, stream=stream, - timeout=timeout, verify=verify, cert=cert, - proxies=proxies) + gen = self.resolve_redirects(r, request, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies) # Resolve redirects if allowed. history = [resp for resp in gen] if allow_redirects else [] @@ -511,7 +590,10 @@ class Session(SessionRedirectMixin): history.insert(0, r) # Get the last request made r = history.pop() - r.history = tuple(history) + r.history = history + + if not stream: + r.content return r @@ -534,8 +616,10 @@ class Session(SessionRedirectMixin): """Registers a connection adapter to a prefix. Adapters are sorted in descending order by key length.""" + self.adapters[prefix] = adapter keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] + for key in keys_to_move: self.adapters[key] = self.adapters.pop(key) diff --git a/requests/structures.py b/requests/structures.py index a175913..9fd7818 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -106,8 +106,7 @@ class CaseInsensitiveDict(collections.MutableMapping): return CaseInsensitiveDict(self._store.values()) def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) - + return str(dict(self.items())) class LookupDict(dict): """Dictionary lookup object.""" diff --git a/requests/utils.py b/requests/utils.py index 7b7ff0a..68e50cf 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -24,10 +24,10 @@ from . import __version__ from . import certs from .compat import parse_http_list as _parse_list_header from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, - builtin_str, getproxies, proxy_bypass) + builtin_str, getproxies, proxy_bypass, urlunparse) from .cookies import RequestsCookieJar, cookiejar_from_dict from .structures import CaseInsensitiveDict -from .exceptions import MissingSchema, InvalidURL +from .exceptions import InvalidURL _hush_pyflakes = (RequestsCookieJar,) @@ -61,7 +61,7 @@ def super_len(o): return os.fstat(fileno).st_size if hasattr(o, 'getvalue'): - # e.g. BytesIO, cStringIO.StringI + # e.g. BytesIO, cStringIO.StringIO return len(o.getvalue()) @@ -466,9 +466,10 @@ def is_valid_cidr(string_network): return True -def get_environ_proxies(url): - """Return a dict of environment proxies.""" - +def should_bypass_proxies(url): + """ + Returns whether we should bypass proxies or not. + """ get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL @@ -486,13 +487,13 @@ def get_environ_proxies(url): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): if address_in_network(ip, proxy_ip): - return {} + return True else: for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. - return {} + return True # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. @@ -506,12 +507,16 @@ def get_environ_proxies(url): bypass = False if bypass: - return {} + return True - # If we get here, we either didn't have no_proxy set or we're not going - # anywhere that no_proxy applies to, and the system settings don't require - # bypassing the proxy for the current URL. - return getproxies() + return False + +def get_environ_proxies(url): + """Return a dict of environment proxies.""" + if should_bypass_proxies(url): + return {} + else: + return getproxies() def default_user_agent(name="python-requests"): @@ -548,7 +553,7 @@ def default_user_agent(name="python-requests"): def default_headers(): return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), - 'Accept-Encoding': ', '.join(('gzip', 'deflate', 'compress')), + 'Accept-Encoding': ', '.join(('gzip', 'deflate')), 'Accept': '*/*' }) @@ -622,13 +627,18 @@ def guess_json_utf(data): return None -def except_on_missing_scheme(url): - """Given a URL, raise a MissingSchema exception if the scheme is missing. - """ - scheme, netloc, path, params, query, fragment = urlparse(url) +def prepend_scheme_if_needed(url, new_scheme): + '''Given a URL that may or may not have a scheme, prepend the given scheme. + Does not replace a present scheme with the one provided as an argument.''' + scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) + + # urlparse is a finicky beast, and sometimes decides that there isn't a + # netloc present. Assume that it's being over-cautious, and switch netloc + # and path if urlparse decided there was no netloc. + if not netloc: + netloc, path = path, netloc - if not scheme: - raise MissingSchema('Proxy URLs must have explicit schemes.') + return urlunparse((scheme, netloc, path, params, query, fragment)) def get_auth_from_url(url): -- cgit v1.2.3