diff options
Diffstat (limited to 'requests/packages')
20 files changed, 868 insertions, 272 deletions
diff --git a/requests/packages/chardet/__init__.py b/requests/packages/chardet/__init__.py index e4f0799..82c2a48 100644 --- a/requests/packages/chardet/__init__.py +++ b/requests/packages/chardet/__init__.py @@ -15,7 +15,7 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -__version__ = "2.2.1" +__version__ = "2.3.0" from sys import version_info diff --git a/requests/packages/chardet/chardetect.py b/requests/packages/chardet/chardetect.py index ecd0163..ffe892f 100755 --- a/requests/packages/chardet/chardetect.py +++ b/requests/packages/chardet/chardetect.py @@ -12,34 +12,68 @@ Example:: If no paths are provided, it takes its input from stdin. """ + +from __future__ import absolute_import, print_function, unicode_literals + +import argparse +import sys from io import open -from sys import argv, stdin +from chardet import __version__ from chardet.universaldetector import UniversalDetector -def description_of(file, name='stdin'): - """Return a string describing the probable encoding of a file.""" +def description_of(lines, name='stdin'): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ u = UniversalDetector() - for line in file: + for line in lines: u.feed(line) u.close() result = u.result if result['encoding']: - return '%s: %s with confidence %s' % (name, - result['encoding'], - result['confidence']) + return '{0}: {1} with confidence {2}'.format(name, result['encoding'], + result['confidence']) else: - return '%s: no result' % name + return '{0}: no result'.format(name) -def main(): - if len(argv) <= 1: - print(description_of(stdin)) - else: - for path in argv[1:]: - with open(path, 'rb') as f: - print(description_of(f, path)) +def main(argv=None): + ''' + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + ''' + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + conflict_handler='resolve') + parser.add_argument('input', + help='File whose encoding we would like to determine.', + type=argparse.FileType('rb'), nargs='*', + default=[sys.stdin]) + parser.add_argument('--version', action='version', + version='%(prog)s {0}'.format(__version__)) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print("You are running chardetect interactively. Press " + + "CTRL-D twice at the start of a blank line to signal the " + + "end of your input. If you want help, run chardetect " + + "--help\n", file=sys.stderr) + print(description_of(f, f.name)) if __name__ == '__main__': diff --git a/requests/packages/chardet/jpcntx.py b/requests/packages/chardet/jpcntx.py index f7f69ba..59aeb6a 100644 --- a/requests/packages/chardet/jpcntx.py +++ b/requests/packages/chardet/jpcntx.py @@ -177,6 +177,12 @@ class JapaneseContextAnalysis: return -1, 1 class SJISContextAnalysis(JapaneseContextAnalysis): + def __init__(self): + self.charset_name = "SHIFT_JIS" + + def get_charset_name(self): + return self.charset_name + def get_order(self, aBuf): if not aBuf: return -1, 1 @@ -184,6 +190,8 @@ class SJISContextAnalysis(JapaneseContextAnalysis): first_char = wrap_ord(aBuf[0]) if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): charLen = 2 + if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): + self.charset_name = "CP932" else: charLen = 1 diff --git a/requests/packages/chardet/latin1prober.py b/requests/packages/chardet/latin1prober.py index ad695f5..eef3573 100644 --- a/requests/packages/chardet/latin1prober.py +++ b/requests/packages/chardet/latin1prober.py @@ -129,11 +129,11 @@ class Latin1Prober(CharSetProber): if total < 0.01: confidence = 0.0 else: - confidence = ((self._mFreqCounter[3] / total) - - (self._mFreqCounter[1] * 20.0 / total)) + confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) + / total) if confidence < 0.0: confidence = 0.0 # lower the confidence of latin1 so that other more accurate # detector can take priority. - confidence = confidence * 0.5 + confidence = confidence * 0.73 return confidence diff --git a/requests/packages/chardet/mbcssm.py b/requests/packages/chardet/mbcssm.py index 3f93cfb..efe678c 100644 --- a/requests/packages/chardet/mbcssm.py +++ b/requests/packages/chardet/mbcssm.py @@ -353,7 +353,7 @@ SJIS_cls = ( 2,2,2,2,2,2,2,2, # 68 - 6f 2,2,2,2,2,2,2,2, # 70 - 77 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,3,3,3, # 80 - 87 + 3,3,3,3,3,2,2,3, # 80 - 87 3,3,3,3,3,3,3,3, # 88 - 8f 3,3,3,3,3,3,3,3, # 90 - 97 3,3,3,3,3,3,3,3, # 98 - 9f @@ -369,9 +369,8 @@ SJIS_cls = ( 2,2,2,2,2,2,2,2, # d8 - df 3,3,3,3,3,3,3,3, # e0 - e7 3,3,3,3,3,4,4,4, # e8 - ef - 4,4,4,4,4,4,4,4, # f0 - f7 - 4,4,4,4,4,0,0,0 # f8 - ff -) + 3,3,3,3,3,3,3,3, # f0 - f7 + 3,3,3,3,3,0,0,0) # f8 - ff SJIS_st = ( @@ -571,5 +570,3 @@ UTF8SMModel = {'classTable': UTF8_cls, 'stateTable': UTF8_st, 'charLenTable': UTF8CharLenTable, 'name': 'UTF-8'} - -# flake8: noqa diff --git a/requests/packages/chardet/sjisprober.py b/requests/packages/chardet/sjisprober.py index b173614..cd0e9e7 100644 --- a/requests/packages/chardet/sjisprober.py +++ b/requests/packages/chardet/sjisprober.py @@ -47,7 +47,7 @@ class SJISProber(MultiByteCharSetProber): self._mContextAnalyzer.reset() def get_charset_name(self): - return "SHIFT_JIS" + return self._mContextAnalyzer.get_charset_name() def feed(self, aBuf): aLen = len(aBuf) diff --git a/requests/packages/chardet/universaldetector.py b/requests/packages/chardet/universaldetector.py index 9a03ad3..476522b 100644 --- a/requests/packages/chardet/universaldetector.py +++ b/requests/packages/chardet/universaldetector.py @@ -71,9 +71,9 @@ class UniversalDetector: if not self._mGotData: # If the data starts with BOM, we know it is UTF - if aBuf[:3] == codecs.BOM: + if aBuf[:3] == codecs.BOM_UTF8: # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8", 'confidence': 1.0} + self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} elif aBuf[:4] == codecs.BOM_UTF32_LE: # FF FE 00 00 UTF-32, little-endian BOM self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py index 4b36b5a..f48ac4a 100644 --- a/requests/packages/urllib3/__init__.py +++ b/requests/packages/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = 'dev' +__version__ = '1.10.4' from .connectionpool import ( @@ -55,9 +55,12 @@ def add_stderr_logger(level=logging.DEBUG): del NullHandler -# Set security warning to only go off once by default. import warnings -warnings.simplefilter('module', exceptions.SecurityWarning) +# SecurityWarning's always go off by default. +warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# InsecurePlatformWarning's don't vary between requests, so we keep it default. +warnings.simplefilter('default', exceptions.InsecurePlatformWarning, + append=True) def disable_warnings(category=exceptions.HTTPWarning): """ diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py index d77ebb8..279416c 100644 --- a/requests/packages/urllib3/_collections.py +++ b/requests/packages/urllib3/_collections.py @@ -1,7 +1,7 @@ from collections import Mapping, MutableMapping try: from threading import RLock -except ImportError: # Platform-specific: No threads available +except ImportError: # Platform-specific: No threads available class RLock: def __enter__(self): pass @@ -10,11 +10,11 @@ except ImportError: # Platform-specific: No threads available pass -try: # Python 2.7+ +try: # Python 2.7+ from collections import OrderedDict except ImportError: from .packages.ordered_dict import OrderedDict -from .packages.six import itervalues +from .packages.six import iterkeys, itervalues, PY3 __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] @@ -85,8 +85,7 @@ class RecentlyUsedContainer(MutableMapping): def clear(self): with self.lock: # Copy pointers to all values, then wipe the mapping - # under Python 2, this copies the list of values twice :-| - values = list(self._container.values()) + values = list(itervalues(self._container)) self._container.clear() if self.dispose_func: @@ -95,10 +94,17 @@ class RecentlyUsedContainer(MutableMapping): def keys(self): with self.lock: - return self._container.keys() + return list(iterkeys(self._container)) -class HTTPHeaderDict(MutableMapping): +_dict_setitem = dict.__setitem__ +_dict_getitem = dict.__getitem__ +_dict_delitem = dict.__delitem__ +_dict_contains = dict.__contains__ +_dict_setdefault = dict.setdefault + + +class HTTPHeaderDict(dict): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -130,25 +136,75 @@ class HTTPHeaderDict(MutableMapping): 'foo=bar, baz=quxx' >>> headers['Content-Length'] '7' - - If you want to access the raw headers with their original casing - for debugging purposes you can access the private ``._data`` attribute - which is a normal python ``dict`` that maps the case-insensitive key to a - list of tuples stored as (case-sensitive-original-name, value). Using the - structure from above as our example: - - >>> headers._data - {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], - 'content-length': [('content-length', '7')]} """ def __init__(self, headers=None, **kwargs): - self._data = {} - if headers is None: - headers = {} - self.update(headers, **kwargs) + dict.__init__(self) + if headers is not None: + if isinstance(headers, HTTPHeaderDict): + self._copy_from(headers) + else: + self.extend(headers) + if kwargs: + self.extend(kwargs) + + def __setitem__(self, key, val): + return _dict_setitem(self, key.lower(), (key, val)) + + def __getitem__(self, key): + val = _dict_getitem(self, key.lower()) + return ', '.join(val[1:]) + + def __delitem__(self, key): + return _dict_delitem(self, key.lower()) - def add(self, key, value): + def __contains__(self, key): + return _dict_contains(self, key.lower()) + + def __eq__(self, other): + if not isinstance(other, Mapping) and not hasattr(other, 'keys'): + return False + if not isinstance(other, type(self)): + other = type(self)(other) + return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other) + + def __ne__(self, other): + return not self.__eq__(other) + + values = MutableMapping.values + get = MutableMapping.get + update = MutableMapping.update + + if not PY3: # Python 2 + iterkeys = MutableMapping.iterkeys + itervalues = MutableMapping.itervalues + + __marker = object() + + def pop(self, key, default=__marker): + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + ''' + # Using the MutableMapping function directly fails due to the private marker. + # Using ordinary dict.pop would expose the internal structures. + # So let's reinvent the wheel. + try: + value = self[key] + except KeyError: + if default is self.__marker: + raise + return default + else: + del self[key] + return value + + def discard(self, key): + try: + del self[key] + except KeyError: + pass + + def add(self, key, val): """Adds a (name, value) pair, doesn't overwrite the value if it already exists. @@ -157,43 +213,111 @@ class HTTPHeaderDict(MutableMapping): >>> headers['foo'] 'bar, baz' """ - self._data.setdefault(key.lower(), []).append((key, value)) + key_lower = key.lower() + new_vals = key, val + # Keep the common case aka no item present as fast as possible + vals = _dict_setdefault(self, key_lower, new_vals) + if new_vals is not vals: + # new_vals was not inserted, as there was a previous one + if isinstance(vals, list): + # If already several items got inserted, we have a list + vals.append(val) + else: + # vals should be a tuple then, i.e. only one item so far + # Need to convert the tuple to list for further extension + _dict_setitem(self, key_lower, [vals[0], vals[1], val]) + + def extend(self, *args, **kwargs): + """Generic import function for any type of header-like object. + Adapted version of MutableMapping.update in order to insert items + with self.add instead of self.__setitem__ + """ + if len(args) > 1: + raise TypeError("extend() takes at most 1 positional " + "arguments ({} given)".format(len(args))) + other = args[0] if len(args) >= 1 else () + + if isinstance(other, HTTPHeaderDict): + for key, val in other.iteritems(): + self.add(key, val) + elif isinstance(other, Mapping): + for key in other: + self.add(key, other[key]) + elif hasattr(other, "keys"): + for key in other.keys(): + self.add(key, other[key]) + else: + for key, value in other: + self.add(key, value) + + for key, value in kwargs.items(): + self.add(key, value) def getlist(self, key): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" - return self[key].split(', ') if key in self else [] - - def copy(self): - h = HTTPHeaderDict() - for key in self._data: - for rawkey, value in self._data[key]: - h.add(rawkey, value) - return h - - def __eq__(self, other): - if not isinstance(other, Mapping): - return False - other = HTTPHeaderDict(other) - return dict((k1, self[k1]) for k1 in self._data) == \ - dict((k2, other[k2]) for k2 in other._data) - - def __getitem__(self, key): - values = self._data[key.lower()] - return ', '.join(value[1] for value in values) - - def __setitem__(self, key, value): - self._data[key.lower()] = [(key, value)] + try: + vals = _dict_getitem(self, key.lower()) + except KeyError: + return [] + else: + if isinstance(vals, tuple): + return [vals[1]] + else: + return vals[1:] + + # Backwards compatibility for httplib + getheaders = getlist + getallmatchingheaders = getlist + iget = getlist - def __delitem__(self, key): - del self._data[key.lower()] + def __repr__(self): + return "%s(%s)" % (type(self).__name__, dict(self.itermerged())) - def __len__(self): - return len(self._data) + def _copy_from(self, other): + for key in other: + val = _dict_getitem(other, key) + if isinstance(val, list): + # Don't need to convert tuples + val = list(val) + _dict_setitem(self, key, val) - def __iter__(self): - for headers in itervalues(self._data): - yield headers[0][0] - - def __repr__(self): - return '%s(%r)' % (self.__class__.__name__, dict(self.items())) + def copy(self): + clone = type(self)() + clone._copy_from(self) + return clone + + def iteritems(self): + """Iterate over all header lines, including duplicate ones.""" + for key in self: + vals = _dict_getitem(self, key) + for val in vals[1:]: + yield vals[0], val + + def itermerged(self): + """Iterate over all headers, merging duplicate ones together.""" + for key in self: + val = _dict_getitem(self, key) + yield val[0], ', '.join(val[1:]) + + def items(self): + return list(self.iteritems()) + + @classmethod + def from_httplib(cls, message): # Python 2 + """Read headers from a Python 2 httplib message object.""" + # python2.7 does not expose a proper API for exporting multiheaders + # efficiently. This function re-reads raw lines from the message + # object and extracts the multiheaders properly. + headers = [] + + for line in message.headers: + if line.startswith((' ', '\t')): + key, value = headers[-1] + headers[-1] = (key, value + '\r\n' + line.rstrip()) + continue + + key, value = line.split(':', 1) + headers.append((key, value.strip())) + + return cls(headers) diff --git a/requests/packages/urllib3/connection.py b/requests/packages/urllib3/connection.py index c6e1959..2a8c359 100644 --- a/requests/packages/urllib3/connection.py +++ b/requests/packages/urllib3/connection.py @@ -3,6 +3,7 @@ import sys import socket from socket import timeout as SocketTimeout import warnings +from .packages import six try: # Python 3 from http.client import HTTPConnection as _HTTPConnection, HTTPException @@ -26,12 +27,20 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. pass +try: # Python 3: + # Not a no-op, we're adding this to the namespace so it can be imported. + ConnectionError = ConnectionError +except NameError: # Python 2: + class ConnectionError(Exception): + pass + + from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, + SecurityWarning, ) from .packages.ssl_match_hostname import match_hostname -from .packages import six from .util.ssl_ import ( resolve_cert_reqs, @@ -40,8 +49,8 @@ from .util.ssl_ import ( assert_fingerprint, ) -from .util import connection +from .util import connection port_by_scheme = { 'http': 80, @@ -233,8 +242,15 @@ class VerifiedHTTPSConnection(HTTPSConnection): self.assert_fingerprint) elif resolved_cert_reqs != ssl.CERT_NONE \ and self.assert_hostname is not False: - match_hostname(self.sock.getpeercert(), - self.assert_hostname or hostname) + cert = self.sock.getpeercert() + if not cert.get('subjectAltName', ()): + warnings.warn(( + 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' + 'This feature is being removed by major browsers and deprecated by RFC 2818. ' + '(See https://github.com/shazow/urllib3/issues/497 for details.)'), + SecurityWarning + ) + match_hostname(cert, self.assert_hostname or hostname) self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED or self.assert_fingerprint is not None) @@ -244,3 +260,5 @@ if ssl: # Make a copy for testing. UnverifiedHTTPSConnection = HTTPSConnection HTTPSConnection = VerifiedHTTPSConnection +else: + HTTPSConnection = DummyConnection diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py index 9cc2a95..117269a 100644 --- a/requests/packages/urllib3/connectionpool.py +++ b/requests/packages/urllib3/connectionpool.py @@ -32,7 +32,7 @@ from .connection import ( port_by_scheme, DummyConnection, HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection, - HTTPException, BaseSSLError, + HTTPException, BaseSSLError, ConnectionError ) from .request import RequestMethods from .response import HTTPResponse @@ -72,6 +72,21 @@ class ConnectionPool(object): return '%s(host=%r, port=%r)' % (type(self).__name__, self.host, self.port) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(): + """ + Close all pooled connections and disable the pool. + """ + pass + + # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 _blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK]) @@ -266,6 +281,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): """ pass + def _prepare_proxy(self, conn): + # Nothing to do for HTTP connections. + pass + def _get_timeout(self, timeout): """ Helper that always returns a :class:`urllib3.util.Timeout` """ if timeout is _Default: @@ -278,6 +297,23 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # can be removed later return Timeout.from_float(timeout) + def _raise_timeout(self, err, url, timeout_value): + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # See the above comment about EAGAIN in Python 3. In Python 2 we have + # to specifically catch it and throw the timeout error + if hasattr(err, 'errno') and err.errno in _blocking_errnos: + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + + # Catch possible read timeouts thrown as SSL errors. If not the + # case, rethrow the original. We need to do this because of: + # http://bugs.python.org/issue10272 + if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6 + raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value) + def _make_request(self, conn, method, url, timeout=_Default, **httplib_request_kw): """ @@ -301,7 +337,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): conn.timeout = timeout_obj.connect_timeout # Trigger any extra validation we need to do. - self._validate_conn(conn) + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout. + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise # conn.request() calls httplib.*.request, not the method in # urllib3.request. It also calls makefile (recv) on the socket. @@ -327,32 +368,12 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Receive the response from the server try: - try: # Python 2.7+, use buffering of HTTP responses + try: # Python 2.7, use buffering of HTTP responses httplib_response = conn.getresponse(buffering=True) except TypeError: # Python 2.6 and older httplib_response = conn.getresponse() - except SocketTimeout: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - except BaseSSLError as e: - # Catch possible read timeouts thrown as SSL errors. If not the - # case, rethrow the original. We need to do this because of: - # http://bugs.python.org/issue10272 - if 'timed out' in str(e) or \ - 'did not complete (read)' in str(e): # Python 2.6 - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - - raise - - except SocketError as e: # Platform-specific: Python 2 - # See the above comment about EAGAIN in Python 3. In Python 2 we - # have to specifically catch it and throw the timeout error - if e.errno in _blocking_errnos: - raise ReadTimeoutError( - self, url, "Read timed out. (read timeout=%s)" % read_timeout) - + except (SocketTimeout, BaseSSLError, SocketError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) raise # AppEngine doesn't have a version attr. @@ -508,11 +529,18 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): try: # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) conn = self._get_conn(timeout=pool_timeout) + conn.timeout = timeout_obj.connect_timeout + + is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None) + if is_new_proxy_conn: + self._prepare_proxy(conn) + # Make the request on the httplib connection object. httplib_response = self._make_request(conn, method, url, - timeout=timeout, + timeout=timeout_obj, body=body, headers=headers) # If we're going to release the connection in ``finally:``, then @@ -537,26 +565,36 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): raise EmptyPoolError(self, "No pool connections are available.") except (BaseSSLError, CertificateError) as e: - # Release connection unconditionally because there is no way to - # close it externally in case of exception. - release_conn = True + # Close the connection. If a connection is reused on which there + # was a Certificate error, the next request will certainly raise + # another Certificate error. + if conn: + conn.close() + conn = None raise SSLError(e) - except (TimeoutError, HTTPException, SocketError) as e: + except SSLError: + # Treat SSLError separately from BaseSSLError to preserve + # traceback. + if conn: + conn.close() + conn = None + raise + + except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: if conn: # Discard the connection for these exceptions. It will be # be replaced during the next _get_conn() call. conn.close() conn = None - stacktrace = sys.exc_info()[2] if isinstance(e, SocketError) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) elif isinstance(e, (SocketError, HTTPException)): e = ProtocolError('Connection aborted.', e) - retries = retries.increment(method, url, error=e, - _pool=self, _stacktrace=stacktrace) + retries = retries.increment(method, url, error=e, _pool=self, + _stacktrace=sys.exc_info()[2]) retries.sleep() # Keep track of the error for the retry warning. @@ -668,23 +706,25 @@ class HTTPSConnectionPool(HTTPConnectionPool): assert_fingerprint=self.assert_fingerprint) conn.ssl_version = self.ssl_version - if self.proxy is not None: - # Python 2.7+ - try: - set_tunnel = conn.set_tunnel - except AttributeError: # Platform-specific: Python 2.6 - set_tunnel = conn._set_tunnel + return conn - if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older - set_tunnel(self.host, self.port) - else: - set_tunnel(self.host, self.port, self.proxy_headers) + def _prepare_proxy(self, conn): + """ + Establish tunnel connection early, because otherwise httplib + would improperly set Host: header to proxy's IP:port. + """ + # Python 2.7+ + try: + set_tunnel = conn.set_tunnel + except AttributeError: # Platform-specific: Python 2.6 + set_tunnel = conn._set_tunnel - # Establish tunnel connection early, because otherwise httplib - # would improperly set Host: header to proxy's IP:port. - conn.connect() + if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older + set_tunnel(self.host, self.port) + else: + set_tunnel(self.host, self.port, self.proxy_headers) - return conn + conn.connect() def _new_conn(self): """ @@ -695,7 +735,6 @@ class HTTPSConnectionPool(HTTPConnectionPool): % (self.num_connections, self.host)) if not self.ConnectionCls or self.ConnectionCls is DummyConnection: - # Platform-specific: Python without ssl raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") @@ -725,8 +764,7 @@ class HTTPSConnectionPool(HTTPConnectionPool): warnings.warn(( 'Unverified HTTPS request is being made. ' 'Adding certificate verification is strongly advised. See: ' - 'https://urllib3.readthedocs.org/en/latest/security.html ' - '(This warning will only appear once by default.)'), + 'https://urllib3.readthedocs.org/en/latest/security.html'), InsecureRequestWarning) diff --git a/requests/packages/urllib3/contrib/pyopenssl.py b/requests/packages/urllib3/contrib/pyopenssl.py index 24de9e4..b2c34a8 100644 --- a/requests/packages/urllib3/contrib/pyopenssl.py +++ b/requests/packages/urllib3/contrib/pyopenssl.py @@ -29,7 +29,7 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI when the required modules are installed. Activating this module also has the positive side effect of disabling SSL/TLS -encryption in Python 2 (see `CRIME attack`_). +compression in Python 2 (see `CRIME attack`_). If you want to configure the default list of supported cipher suites, you can set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. @@ -38,8 +38,6 @@ Module Variables ---------------- :var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES: - ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS`` .. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication .. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) @@ -70,9 +68,14 @@ HAS_SNI = SUBJ_ALT_NAME_SUPPORT # Map from urllib3 to PyOpenSSL compatible parameter-values. _openssl_versions = { ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, } + +try: + _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) +except AttributeError: + pass + _openssl_verify = { ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, @@ -80,22 +83,7 @@ _openssl_verify = { + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, } -# A secure default. -# Sources for more information on TLS ciphers: -# -# - https://wiki.mozilla.org/Security/Server_Side_TLS -# - https://www.ssllabs.com/projects/best-practices/index.html -# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ -# -# The general intent is: -# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), -# - prefer ECDHE over DHE for better performance, -# - prefer any AES-GCM over any AES-CBC for better performance and security, -# - use 3DES as fallback which is secure but slow, -# - disable NULL authentication, MD5 MACs and DSS for security reasons. -DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \ - "ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \ - "!aNULL:!MD5:!DSS" +DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS orig_util_HAS_SNI = util.HAS_SNI @@ -186,6 +174,11 @@ class WrappedSocket(object): return b'' else: raise + except OpenSSL.SSL.ZeroReturnError as e: + if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: + return b'' + else: + raise except OpenSSL.SSL.WantReadError: rd, wd, ed = select.select( [self.socket], [], [], self.socket.gettimeout()) @@ -199,8 +192,21 @@ class WrappedSocket(object): def settimeout(self, timeout): return self.socket.settimeout(timeout) + def _send_until_done(self, data): + while True: + try: + return self.connection.send(data) + except OpenSSL.SSL.WantWriteError: + _, wlist, _ = select.select([], [self.socket], [], + self.socket.gettimeout()) + if not wlist: + raise timeout() + continue + def sendall(self, data): - return self.connection.sendall(data) + while len(data): + sent = self._send_until_done(data) + data = data[sent:] def close(self): if self._makefile_refs < 1: @@ -248,6 +254,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, ssl_version=None): ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) if certfile: + keyfile = keyfile or certfile # Match behaviour of the normal python ssl library ctx.use_certificate_file(certfile) if keyfile: ctx.use_privatekey_file(keyfile) @@ -275,7 +282,9 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, try: cnx.do_handshake() except OpenSSL.SSL.WantReadError: - select.select([sock], [], []) + rd, _, _ = select.select([sock], [], [], sock.gettimeout()) + if not rd: + raise timeout('select timed out') continue except OpenSSL.SSL.Error as e: raise ssl.SSLError('bad handshake', e) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py index 7519ba9..31bda1c 100644 --- a/requests/packages/urllib3/exceptions.py +++ b/requests/packages/urllib3/exceptions.py @@ -72,11 +72,8 @@ class MaxRetryError(RequestError): def __init__(self, pool, url, reason=None): self.reason = reason - message = "Max retries exceeded with url: %s" % url - if reason: - message += " (Caused by %r)" % reason - else: - message += " (Caused by redirect)" + message = "Max retries exceeded with url: %s (Caused by %r)" % ( + url, reason) RequestError.__init__(self, pool, url, message) @@ -141,6 +138,12 @@ class LocationParseError(LocationValueError): self.location = location +class ResponseError(HTTPError): + "Used as a container for an error reason supplied in a MaxRetryError." + GENERIC_ERROR = 'too many error responses' + SPECIFIC_ERROR = 'too many {status_code} error responses' + + class SecurityWarning(HTTPWarning): "Warned when perfoming security reducing actions" pass @@ -154,3 +157,13 @@ class InsecureRequestWarning(SecurityWarning): class SystemTimeWarning(SecurityWarning): "Warned when system time is suspected to be wrong" pass + + +class InsecurePlatformWarning(SecurityWarning): + "Warned when certain SSL configuration is not available on a platform." + pass + + +class ResponseNotChunked(ProtocolError, ValueError): + "Response needs to be chunked in order to read it as chunks." + pass diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py index 515dc96..b8d1e74 100644 --- a/requests/packages/urllib3/poolmanager.py +++ b/requests/packages/urllib3/poolmanager.py @@ -8,7 +8,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError +from .exceptions import LocationValueError, MaxRetryError from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -64,6 +64,14 @@ class PoolManager(RequestMethods): self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.clear() + # Return False to re-raise any potential exceptions + return False + def _new_pool(self, scheme, host, port): """ Create a new :class:`ConnectionPool` based on host, port and scheme. @@ -167,7 +175,14 @@ class PoolManager(RequestMethods): if not isinstance(retries, Retry): retries = Retry.from_int(retries, redirect=redirect) - kw['retries'] = retries.increment(method, redirect_location) + try: + retries = retries.increment(method, url, response=response, _pool=conn) + except MaxRetryError: + if retries.raise_on_redirect: + raise + return response + + kw['retries'] = retries kw['redirect'] = redirect log.info("Redirecting %s -> %s" % (url, redirect_location)) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py index 51fe238..b08d6c9 100644 --- a/requests/packages/urllib3/request.py +++ b/requests/packages/urllib3/request.py @@ -118,18 +118,24 @@ class RequestMethods(object): which is used to compose the body of the request. The random boundary string can be explicitly set with the ``multipart_boundary`` parameter. """ - if encode_multipart: - body, content_type = encode_multipart_formdata( - fields or {}, boundary=multipart_boundary) - else: - body, content_type = (urlencode(fields or {}), - 'application/x-www-form-urlencoded') - if headers is None: headers = self.headers - headers_ = {'Content-Type': content_type} - headers_.update(headers) + extra_kw = {'headers': {}} + + if fields: + if 'body' in urlopen_kw: + raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') + + if encode_multipart: + body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) + else: + body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' + + extra_kw['body'] = body + extra_kw['headers'] = {'Content-Type': content_type} + + extra_kw['headers'].update(headers) + extra_kw.update(urlopen_kw) - return self.urlopen(method, url, body=body, headers=headers_, - **urlopen_kw) + return self.urlopen(method, url, **extra_kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py index e69de95..24140c4 100644 --- a/requests/packages/urllib3/response.py +++ b/requests/packages/urllib3/response.py @@ -1,15 +1,20 @@ +try: + import http.client as httplib +except ImportError: + import httplib import zlib import io from socket import timeout as SocketTimeout from ._collections import HTTPHeaderDict -from .exceptions import ProtocolError, DecodeError, ReadTimeoutError -from .packages.six import string_types as basestring, binary_type +from .exceptions import ( + ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked +) +from .packages.six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError from .util.response import is_fp_closed - class DeflateDecoder(object): def __init__(self): @@ -21,6 +26,9 @@ class DeflateDecoder(object): return getattr(self._obj, name) def decompress(self, data): + if not data: + return data + if not self._first_try: return self._obj.decompress(data) @@ -36,9 +44,23 @@ class DeflateDecoder(object): self._data = None +class GzipDecoder(object): + + def __init__(self): + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) + + def __getattr__(self, name): + return getattr(self._obj, name) + + def decompress(self, data): + if not data: + return data + return self._obj.decompress(data) + + def _get_decoder(mode): if mode == 'gzip': - return zlib.decompressobj(16 + zlib.MAX_WBITS) + return GzipDecoder() return DeflateDecoder() @@ -76,9 +98,10 @@ class HTTPResponse(io.IOBase): strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): - self.headers = HTTPHeaderDict() - if headers: - self.headers.update(headers) + if isinstance(headers, HTTPHeaderDict): + self.headers = headers + else: + self.headers = HTTPHeaderDict(headers) self.status = status self.version = version self.reason = reason @@ -100,7 +123,17 @@ class HTTPResponse(io.IOBase): if hasattr(body, 'read'): self._fp = body - if preload_content and not self._body: + # Are we using the chunked-style of transfer encoding? + self.chunked = False + self.chunk_left = None + tr_enc = self.headers.get('transfer-encoding', '').lower() + # Don't incur the penalty of creating a list and then discarding it + encodings = (enc.strip() for enc in tr_enc.split(",")) + if "chunked" in encodings: + self.chunked = True + + # We certainly don't want to preload content when the response is chunked. + if not self.chunked and preload_content and not self._body: self._body = self.read(decode_content=decode_content) def get_redirect_location(self): @@ -140,6 +173,35 @@ class HTTPResponse(io.IOBase): """ return self._fp_bytes_read + def _init_decoder(self): + """ + Set-up the _decoder attribute if necessar. + """ + # Note: content-encoding value should be case-insensitive, per RFC 7230 + # Section 3.2 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None and content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) + + def _decode(self, data, decode_content, flush_decoder): + """ + Decode the data passed in and potentially flush the decoder. + """ + try: + if decode_content and self._decoder: + data = self._decoder.decompress(data) + except (IOError, zlib.error) as e: + content_encoding = self.headers.get('content-encoding', '').lower() + raise DecodeError( + "Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding, e) + + if flush_decoder and decode_content and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + + return data + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -161,12 +223,7 @@ class HTTPResponse(io.IOBase): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None: - if content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) + self._init_decoder() if decode_content is None: decode_content = self.decode_content @@ -202,7 +259,7 @@ class HTTPResponse(io.IOBase): except BaseSSLError as e: # FIXME: Is there a better way to differentiate between SSLErrors? - if not 'read operation timed out' in str(e): # Defensive: + if 'read operation timed out' not in str(e): # Defensive: # This shouldn't happen but just in case we're missing an edge # case, let's avoid swallowing SSL errors. raise @@ -215,17 +272,7 @@ class HTTPResponse(io.IOBase): self._fp_bytes_read += len(data) - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() + data = self._decode(data, decode_content, flush_decoder) if cache_content: self._body = data @@ -252,11 +299,15 @@ class HTTPResponse(io.IOBase): If True, will attempt to decode the body based on the 'content-encoding' header. """ - while not is_fp_closed(self._fp): - data = self.read(amt=amt, decode_content=decode_content) + if self.chunked: + for line in self.read_chunked(amt, decode_content=decode_content): + yield line + else: + while not is_fp_closed(self._fp): + data = self.read(amt=amt, decode_content=decode_content) - if data: - yield data + if data: + yield data @classmethod def from_httplib(ResponseCls, r, **response_kw): @@ -267,14 +318,16 @@ class HTTPResponse(io.IOBase): Remaining parameters are passed to the HTTPResponse constructor, along with ``original_response=r``. """ - - headers = HTTPHeaderDict() - for k, v in r.getheaders(): - headers.add(k, v) + headers = r.msg + if not isinstance(headers, HTTPHeaderDict): + if PY3: # Python 3 + headers = HTTPHeaderDict(headers.items()) + else: # Python 2 + headers = HTTPHeaderDict.from_httplib(headers) # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) - return ResponseCls(body=r, + resp = ResponseCls(body=r, headers=headers, status=r.status, version=r.version, @@ -282,6 +335,7 @@ class HTTPResponse(io.IOBase): strict=strict, original_response=r, **response_kw) + return resp # Backwards-compatibility methods for httplib.HTTPResponse def getheaders(self): @@ -331,3 +385,82 @@ class HTTPResponse(io.IOBase): else: b[:len(temp)] = temp return len(temp) + + def _update_chunk_length(self): + # First, we'll figure out length of a chunk and then + # we'll try to read it from socket. + if self.chunk_left is not None: + return + line = self._fp.fp.readline() + line = line.split(b';', 1)[0] + try: + self.chunk_left = int(line, 16) + except ValueError: + # Invalid chunked protocol response, abort. + self.close() + raise httplib.IncompleteRead(line) + + def _handle_chunk(self, amt): + returned_chunk = None + if amt is None: + chunk = self._fp._safe_read(self.chunk_left) + returned_chunk = chunk + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + elif amt < self.chunk_left: + value = self._fp._safe_read(amt) + self.chunk_left = self.chunk_left - amt + returned_chunk = value + elif amt == self.chunk_left: + value = self._fp._safe_read(amt) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + returned_chunk = value + else: # amt > self.chunk_left + returned_chunk = self._fp._safe_read(self.chunk_left) + self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. + self.chunk_left = None + return returned_chunk + + def read_chunked(self, amt=None, decode_content=None): + """ + Similar to :meth:`HTTPResponse.read`, but with an additional + parameter: ``decode_content``. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + """ + self._init_decoder() + # FIXME: Rewrite this method and make it a class with a better structured logic. + if not self.chunked: + raise ResponseNotChunked("Response is not chunked. " + "Header 'transfer-encoding: chunked' is missing.") + + if self._original_response and self._original_response._method.upper() == 'HEAD': + # Don't bother reading the body of a HEAD request. + # FIXME: Can we do this somehow without accessing private httplib _method? + self._original_response.close() + return + + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() + self.release_conn() diff --git a/requests/packages/urllib3/util/connection.py b/requests/packages/urllib3/util/connection.py index 2156993..859aec6 100644 --- a/requests/packages/urllib3/util/connection.py +++ b/requests/packages/urllib3/util/connection.py @@ -82,6 +82,7 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, err = _ if sock is not None: sock.close() + sock = None if err is not None: raise err diff --git a/requests/packages/urllib3/util/retry.py b/requests/packages/urllib3/util/retry.py index eb560df..7e0959d 100644 --- a/requests/packages/urllib3/util/retry.py +++ b/requests/packages/urllib3/util/retry.py @@ -2,10 +2,11 @@ import time import logging from ..exceptions import ( - ProtocolError, ConnectTimeoutError, - ReadTimeoutError, MaxRetryError, + ProtocolError, + ReadTimeoutError, + ResponseError, ) from ..packages import six @@ -36,7 +37,6 @@ class Retry(object): Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless retries are disabled, in which case the causing exception will be raised. - :param int total: Total number of retries to allow. Takes precedence over other counts. @@ -184,13 +184,13 @@ class Retry(object): return isinstance(err, ConnectTimeoutError) def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we can't - assume that the server did not process any of it. + """ Errors that occur after the request has been started, so we should + assume that the server began processing it. """ return isinstance(err, (ReadTimeoutError, ProtocolError)) def is_forced_retry(self, method, status_code): - """ Is this method/response retryable? (Based on method/codes whitelists) + """ Is this method/status code retryable? (Based on method/codes whitelists) """ if self.method_whitelist and method.upper() not in self.method_whitelist: return False @@ -198,8 +198,7 @@ class Retry(object): return self.status_forcelist and status_code in self.status_forcelist def is_exhausted(self): - """ Are we out of retries? - """ + """ Are we out of retries? """ retry_counts = (self.total, self.connect, self.read, self.redirect) retry_counts = list(filter(None, retry_counts)) if not retry_counts: @@ -230,6 +229,7 @@ class Retry(object): connect = self.connect read = self.read redirect = self.redirect + cause = 'unknown' if error and self._is_connection_error(error): # Connect retry? @@ -251,10 +251,16 @@ class Retry(object): # Redirect retry? if redirect is not None: redirect -= 1 + cause = 'too many redirects' else: - # FIXME: Nothing changed, scenario doesn't make sense. + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist _observed_errors += 1 + cause = ResponseError.GENERIC_ERROR + if response and response.status: + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) new_retry = self.new( total=total, @@ -262,7 +268,7 @@ class Retry(object): _observed_errors=_observed_errors) if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error) + raise MaxRetryError(_pool, url, error or ResponseError(cause)) log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) diff --git a/requests/packages/urllib3/util/ssl_.py b/requests/packages/urllib3/util/ssl_.py index 9cfe2d2..b846d42 100644 --- a/requests/packages/urllib3/util/ssl_.py +++ b/requests/packages/urllib3/util/ssl_.py @@ -1,21 +1,107 @@ from binascii import hexlify, unhexlify -from hashlib import md5, sha1 +from hashlib import md5, sha1, sha256 -from ..exceptions import SSLError +from ..exceptions import SSLError, InsecurePlatformWarning -try: # Test for SSL features - SSLContext = None - HAS_SNI = False +SSLContext = None +HAS_SNI = False +create_default_context = None + +import errno +import warnings +try: # Test for SSL features import ssl from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import SSLContext # Modern SSL? from ssl import HAS_SNI # Has SNI? except ImportError: pass +try: + from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION +except ImportError: + OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 + OP_NO_COMPRESSION = 0x20000 + +# A secure default. +# Sources for more information on TLS ciphers: +# +# - https://wiki.mozilla.org/Security/Server_Side_TLS +# - https://www.ssllabs.com/projects/best-practices/index.html +# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ +# +# The general intent is: +# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), +# - prefer ECDHE over DHE for better performance, +# - prefer any AES-GCM over any AES-CBC for better performance and security, +# - use 3DES as fallback which is secure but slow, +# - disable NULL authentication, MD5 MACs and DSS for security reasons. +DEFAULT_CIPHERS = ( + 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' + 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' + '!eNULL:!MD5' +) + +try: + from ssl import SSLContext # Modern SSL? +except ImportError: + import sys + + class SSLContext(object): # Platform-specific: Python 2 & 3.1 + supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or + (3, 2) <= sys.version_info) + + def __init__(self, protocol_version): + self.protocol = protocol_version + # Use default values from a real SSLContext + self.check_hostname = False + self.verify_mode = ssl.CERT_NONE + self.ca_certs = None + self.options = 0 + self.certfile = None + self.keyfile = None + self.ciphers = None + + def load_cert_chain(self, certfile, keyfile): + self.certfile = certfile + self.keyfile = keyfile + + def load_verify_locations(self, location): + self.ca_certs = location + + def set_ciphers(self, cipher_suite): + if not self.supports_set_ciphers: + raise TypeError( + 'Your version of Python does not support setting ' + 'a custom cipher suite. Please upgrade to Python ' + '2.7, 3.2, or later if you need this functionality.' + ) + self.ciphers = cipher_suite + + def wrap_socket(self, socket, server_hostname=None): + warnings.warn( + 'A true SSLContext object is not available. This prevents ' + 'urllib3 from configuring SSL appropriately and may cause ' + 'certain SSL connections to fail. For more information, see ' + 'https://urllib3.readthedocs.org/en/latest/security.html' + '#insecureplatformwarning.', + InsecurePlatformWarning + ) + kwargs = { + 'keyfile': self.keyfile, + 'certfile': self.certfile, + 'ca_certs': self.ca_certs, + 'cert_reqs': self.verify_mode, + 'ssl_version': self.protocol, + } + if self.supports_set_ciphers: # Platform-specific: Python 2.7+ + return wrap_socket(socket, ciphers=self.ciphers, **kwargs) + else: # Platform-specific: Python 2.6 + return wrap_socket(socket, **kwargs) + + def assert_fingerprint(cert, fingerprint): """ Checks if given fingerprint matches the supplied certificate. @@ -30,7 +116,8 @@ def assert_fingerprint(cert, fingerprint): # this digest. hashfunc_map = { 16: md5, - 20: sha1 + 20: sha1, + 32: sha256, } fingerprint = fingerprint.replace(':', '').lower() @@ -91,42 +178,103 @@ def resolve_ssl_version(candidate): return candidate -if SSLContext is not None: # Python 3.2+ - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - """ - All arguments except `server_hostname` have the same meaning as for - :func:`ssl.wrap_socket` - - :param server_hostname: - Hostname of the expected certificate - """ - context = SSLContext(ssl_version) - context.verify_mode = cert_reqs - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - context.options |= OP_NO_COMPRESSION - - if ca_certs: - try: - context.load_verify_locations(ca_certs) - # Py32 raises IOError - # Py33 raises FileNotFoundError - except Exception as e: # Reraise as SSLError +def create_urllib3_context(ssl_version=None, cert_reqs=None, + options=None, ciphers=None): + """All arguments have the same meaning as ``ssl_wrap_socket``. + + By default, this function does a lot of the same work that + ``ssl.create_default_context`` does on Python 3.4+. It: + + - Disables SSLv2, SSLv3, and compression + - Sets a restricted set of server ciphers + + If you wish to enable SSLv3, you can do:: + + from urllib3.util import ssl_ + context = ssl_.create_urllib3_context() + context.options &= ~ssl_.OP_NO_SSLv3 + + You can do the same to enable compression (substituting ``COMPRESSION`` + for ``SSLv3`` in the last line above). + + :param ssl_version: + The desired protocol version to use. This will default to + PROTOCOL_SSLv23 which will negotiate the highest protocol that both + the server and your installation of OpenSSL support. + :param cert_reqs: + Whether to require the certificate verification. This defaults to + ``ssl.CERT_REQUIRED``. + :param options: + Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, + ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. + :param ciphers: + Which cipher suites to allow the server to select. + :returns: + Constructed SSLContext object with specified options + :rtype: SSLContext + """ + context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) + + # Setting the default here, as we may have no ssl module on import + cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs + + if options is None: + options = 0 + # SSLv2 is easily broken and is considered harmful and dangerous + options |= OP_NO_SSLv2 + # SSLv3 has several problems and is now dangerous + options |= OP_NO_SSLv3 + # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ + # (issue #309) + options |= OP_NO_COMPRESSION + + context.options |= options + + if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 + context.set_ciphers(ciphers or DEFAULT_CIPHERS) + + context.verify_mode = cert_reqs + if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 + # We do our own verification, including fingerprints and alternative + # hostnames. So disable it here + context.check_hostname = False + return context + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None, ciphers=None, ssl_context=None): + """ + All arguments except for server_hostname and ssl_context have the same + meaning as they do when using :func:`ssl.wrap_socket`. + + :param server_hostname: + When SNI is supported, the expected hostname of the certificate + :param ssl_context: + A pre-made :class:`SSLContext` object. If none is provided, one will + be created using :func:`create_urllib3_context`. + :param ciphers: + A string of ciphers we wish the client to support. This is not + supported on Python 2.6 as the ssl module does not support it. + """ + context = ssl_context + if context is None: + context = create_urllib3_context(ssl_version, cert_reqs, + ciphers=ciphers) + + if ca_certs: + try: + context.load_verify_locations(ca_certs) + except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 + raise SSLError(e) + # Py33 raises FileNotFoundError which subclasses OSError + # These are not equivalent unless we check the errno attribute + except OSError as e: # Platform-specific: Python 3.3 and beyond + if e.errno == errno.ENOENT: raise SSLError(e) - if certfile: - # FIXME: This block needs a test. - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) - -else: # Python 3.1 and earlier - def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None): - return wrap_socket(sock, keyfile=keyfile, certfile=certfile, - ca_certs=ca_certs, cert_reqs=cert_reqs, - ssl_version=ssl_version) + raise + if certfile: + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) diff --git a/requests/packages/urllib3/util/url.py b/requests/packages/urllib3/util/url.py index 487d456..e58050c 100644 --- a/requests/packages/urllib3/util/url.py +++ b/requests/packages/urllib3/util/url.py @@ -15,6 +15,8 @@ class Url(namedtuple('Url', url_attrs)): def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): + if path and not path.startswith('/'): + path = '/' + path return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) @@ -40,6 +42,48 @@ class Url(namedtuple('Url', url_attrs)): return '%s:%d' % (self.host, self.port) return self.host + @property + def url(self): + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: :: + + >>> U = parse_url('http://google.com/mail/') + >>> U.url + 'http://google.com/mail/' + >>> Url('http', 'username:password', 'host.com', 80, + ... '/path', 'query', 'fragment').url + 'http://username:password@host.com:80/path?query#fragment' + """ + scheme, auth, host, port, path, query, fragment = self + url = '' + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + '://' + if auth is not None: + url += auth + '@' + if host is not None: + url += host + if port is not None: + url += ':' + str(port) + if path is not None: + url += path + if query is not None: + url += '?' + query + if fragment is not None: + url += '#' + fragment + + return url + + def __str__(self): + return self.url def split_first(s, delims): """ @@ -84,7 +128,7 @@ def parse_url(url): Example:: >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/', ...) + Url(scheme='http', host='google.com', port=None, path='/mail/', ...) >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) >>> parse_url('/foo?bar') @@ -162,7 +206,6 @@ def parse_url(url): return Url(scheme, auth, host, port, path, query, fragment) - def get_host(url): """ Deprecated. Use :func:`.parse_url` instead. |