From 92b84b67f7b187b81dacbf1ae46d59a1d0b5b125 Mon Sep 17 00:00:00 2001 From: SVN-Git Migration Date: Thu, 8 Oct 2015 13:19:33 -0700 Subject: Imported Upstream version 1.6 --- urllib3/__init__.py | 4 +- urllib3/_collections.py | 2 +- urllib3/connectionpool.py | 99 ++++++++++++++++--------- urllib3/contrib/__init__.py | 0 urllib3/contrib/ntlmpool.py | 120 +++++++++++++++++++++++++++++++ urllib3/contrib/pyopenssl.py | 167 +++++++++++++++++++++++++++++++++++++++++++ urllib3/exceptions.py | 18 +++-- urllib3/filepost.py | 27 ++++--- urllib3/packages/six.py | 53 ++++++++------ urllib3/poolmanager.py | 74 +++++++++++++------ urllib3/request.py | 28 ++++++-- urllib3/response.py | 95 ++++++++++++++++-------- urllib3/util.py | 142 +++++++++++++++++++++++++++++++++--- 13 files changed, 696 insertions(+), 133 deletions(-) create mode 100644 urllib3/contrib/__init__.py create mode 100644 urllib3/contrib/ntlmpool.py create mode 100644 urllib3/contrib/pyopenssl.py (limited to 'urllib3') diff --git a/urllib3/__init__.py b/urllib3/__init__.py index b552543..ebd43b3 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -1,5 +1,5 @@ # urllib3/__init__.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -10,7 +10,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.5' +__version__ = '1.6' from .connectionpool import ( diff --git a/urllib3/_collections.py b/urllib3/_collections.py index a052b1d..b35a736 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -1,5 +1,5 @@ # urllib3/_collections.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 97da544..73fa9ca 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -1,13 +1,15 @@ # urllib3/connectionpool.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import logging import socket +import errno -from socket import timeout as SocketTimeout +from socket import error as SocketError, timeout as SocketTimeout +from .util import resolve_cert_reqs, resolve_ssl_version, assert_fingerprint try: # Python 3 from http.client import HTTPConnection, HTTPException @@ -41,7 +43,7 @@ except (ImportError, AttributeError): # Platform-specific: No SSL. from .request import RequestMethods from .response import HTTPResponse -from .util import get_host, is_connection_dropped +from .util import get_host, is_connection_dropped, ssl_wrap_socket from .exceptions import ( ClosedPoolError, EmptyPoolError, @@ -76,32 +78,41 @@ class VerifiedHTTPSConnection(HTTPSConnection): """ cert_reqs = None ca_certs = None + ssl_version = None def set_cert(self, key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): - ssl_req_scheme = { - 'CERT_NONE': ssl.CERT_NONE, - 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, - 'CERT_REQUIRED': ssl.CERT_REQUIRED - } + cert_reqs=None, ca_certs=None, + assert_hostname=None, assert_fingerprint=None): self.key_file = key_file self.cert_file = cert_file - self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def connect(self): # Add certificate verification sock = socket.create_connection((self.host, self.port), self.timeout) + resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) + resolved_ssl_version = resolve_ssl_version(self.ssl_version) + # Wrap socket using verification with the root certs in # trusted_root_certs - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs) - if self.ca_certs: - match_hostname(self.sock.getpeercert(), self.host) - + self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=resolved_cert_reqs, + ca_certs=self.ca_certs, + server_hostname=self.host, + ssl_version=resolved_ssl_version) + + if resolved_cert_reqs != ssl.CERT_NONE: + if self.assert_fingerprint: + assert_fingerprint(self.sock.getpeercert(binary_form=True), + self.assert_fingerprint) + else: + match_hostname(self.sock.getpeercert(), + self.assert_hostname or self.host) ## Pool objects @@ -166,13 +177,13 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None): - super(HTTPConnectionPool, self).__init__(host, port) + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) self.strict = strict self.timeout = timeout self.pool = self.QueueCls(maxsize) self.block = block - self.headers = headers or {} # Fill the queue up so that doing get() on it will block properly for _ in xrange(maxsize): @@ -189,7 +200,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): self.num_connections += 1 log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host)) - return HTTPConnection(host=self.host, port=self.port) + return HTTPConnection(host=self.host, + port=self.port, + strict=self.strict) def _get_conn(self, timeout=None): """ @@ -449,12 +462,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Name mismatch raise SSLError(e) - except HTTPException as e: + except (HTTPException, SocketError) as e: # Connection broken, discard. It will be replaced next _get_conn(). conn = None # This is necessary so we can access e below err = e + if retries == 0: + raise MaxRetryError(self, url, e) + finally: if release_conn: # Put the connection back to be reused. If the connection is @@ -491,11 +507,15 @@ class HTTPSConnectionPool(HTTPConnectionPool): When Python is compiled with the :mod:`ssl` module, then :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, - instead of :class:httplib.HTTPSConnection`. + instead of :class:`httplib.HTTPSConnection`. - The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters - are only used if :mod:`ssl` is available and are fed into - :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + :class:`.VerifiedHTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and + ``ssl_version`` are only used if :mod:`ssl` is available and are fed into + :meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket + into an SSL socket. """ scheme = 'https' @@ -503,16 +523,20 @@ class HTTPSConnectionPool(HTTPConnectionPool): def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, block=False, headers=None, - key_file=None, cert_file=None, - cert_reqs='CERT_NONE', ca_certs=None): + key_file=None, cert_file=None, cert_reqs=None, + ca_certs=None, ssl_version=None, + assert_hostname=None, assert_fingerprint=None): - super(HTTPSConnectionPool, self).__init__(host, port, - strict, timeout, maxsize, - block, headers) + HTTPConnectionPool.__init__(self, host, port, + strict, timeout, maxsize, + block, headers) self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs self.ca_certs = ca_certs + self.ssl_version = ssl_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint def _new_conn(self): """ @@ -522,16 +546,25 @@ class HTTPSConnectionPool(HTTPConnectionPool): log.info("Starting new HTTPS connection (%d): %s" % (self.num_connections, self.host)) - if not ssl: # Platform-specific: Python compiled without +ssl + if not ssl: # Platform-specific: Python compiled without +ssl if not HTTPSConnection or HTTPSConnection is object: raise SSLError("Can't connect to HTTPS URL because the SSL " "module is not available.") - return HTTPSConnection(host=self.host, port=self.port) + return HTTPSConnection(host=self.host, + port=self.port, + strict=self.strict) - connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection = VerifiedHTTPSConnection(host=self.host, + port=self.port, + strict=self.strict) connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, - cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint) + + connection.ssl_version = self.ssl_version + return connection diff --git a/urllib3/contrib/__init__.py b/urllib3/contrib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/urllib3/contrib/ntlmpool.py b/urllib3/contrib/ntlmpool.py new file mode 100644 index 0000000..277ee0b --- /dev/null +++ b/urllib3/contrib/ntlmpool.py @@ -0,0 +1,120 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +try: + from http.client import HTTPSConnection +except ImportError: + from httplib import HTTPSConnection +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py new file mode 100644 index 0000000..5c4c6d8 --- /dev/null +++ b/urllib3/contrib/pyopenssl.py @@ -0,0 +1,167 @@ +'''SSL with SNI-support for Python 2. + +This needs the following packages installed: + +* pyOpenSSL (tested with 0.13) +* ndg-httpsclient (tested with 0.3.2) +* pyasn1 (tested with 0.1.6) + +To activate it call :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`. +This can be done in a ``sitecustomize`` module, or at any other time before +your application begins using ``urllib3``, like this:: + + try: + import urllib3.contrib.pyopenssl + urllib3.contrib.pyopenssl.inject_into_urllib3() + except ImportError: + pass + +Now you can use :mod:`urllib3` as you normally would, and it will support SNI +when the required modules are installed. +''' + +from ndg.httpsclient.ssl_peer_verification import (ServerSSLCertVerification, + SUBJ_ALT_NAME_SUPPORT) +from ndg.httpsclient.subj_alt_name import SubjectAltName +import OpenSSL.SSL +from pyasn1.codec.der import decoder as der_decoder +from socket import _fileobject +import ssl + +from .. import connectionpool +from .. import util + +__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] + +# SNI only *really* works if we can read the subjectAltName of certificates. +HAS_SNI = SUBJ_ALT_NAME_SUPPORT + +# Map from urllib3 to PyOpenSSL compatible parameter-values. +_openssl_versions = { + ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, + ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD, + ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, +} +_openssl_verify = { + ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, + ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, + ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER + + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, +} + + +orig_util_HAS_SNI = util.HAS_SNI +orig_connectionpool_ssl_wrap_socket = connectionpool.ssl_wrap_socket + + +def inject_into_urllib3(): + 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' + + connectionpool.ssl_wrap_socket = ssl_wrap_socket + util.HAS_SNI = HAS_SNI + + +def extract_from_urllib3(): + 'Undo monkey-patching by :func:`inject_into_urllib3`.' + + connectionpool.ssl_wrap_socket = orig_connectionpool_ssl_wrap_socket + util.HAS_SNI = orig_util_HAS_SNI + + +### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. +def get_subj_alt_name(peer_cert): + # Search through extensions + dns_name = [] + if not SUBJ_ALT_NAME_SUPPORT: + return dns_name + + general_names = SubjectAltName() + for i in range(peer_cert.get_extension_count()): + ext = peer_cert.get_extension(i) + ext_name = ext.get_short_name() + if ext_name != 'subjectAltName': + continue + + # PyOpenSSL returns extension data in ASN.1 encoded form + ext_dat = ext.get_data() + decoded_dat = der_decoder.decode(ext_dat, + asn1Spec=general_names) + + for name in decoded_dat: + if not isinstance(name, SubjectAltName): + continue + for entry in range(len(name)): + component = name.getComponentByPosition(entry) + if component.getName() != 'dNSName': + continue + dns_name.append(str(component.getComponent())) + + return dns_name + + +class WrappedSocket(object): + '''API-compatibility wrapper for Python OpenSSL's Connection-class.''' + + def __init__(self, connection, socket): + self.connection = connection + self.socket = socket + + def makefile(self, mode, bufsize=-1): + return _fileobject(self.connection, mode, bufsize) + + def settimeout(self, timeout): + return self.socket.settimeout(timeout) + + def sendall(self, data): + return self.connection.sendall(data) + + def getpeercert(self, binary_form=False): + x509 = self.connection.get_peer_certificate() + if not x509: + raise ssl.SSLError('') + + if binary_form: + return OpenSSL.crypto.dump_certificate( + OpenSSL.crypto.FILETYPE_ASN1, + x509) + + return { + 'subject': ( + (('commonName', x509.get_subject().CN),), + ), + 'subjectAltName': [ + ('DNS', value) + for value in get_subj_alt_name(x509) + ] + } + + +def _verify_callback(cnx, x509, err_no, err_depth, return_code): + return err_no == 0 + + +def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) + if certfile: + ctx.use_certificate_file(certfile) + if keyfile: + ctx.use_privatekey_file(keyfile) + if cert_reqs != ssl.CERT_NONE: + ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback) + if ca_certs: + try: + ctx.load_verify_locations(ca_certs, None) + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) + + cnx = OpenSSL.SSL.Connection(ctx, sock) + cnx.set_tlsext_host_name(server_hostname) + cnx.set_connect_state() + try: + cnx.do_handshake() + except OpenSSL.SSL.Error as e: + raise ssl.SSLError('bad handshake', e) + + return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 99ebb67..8dd76af 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -1,5 +1,5 @@ # urllib3/exceptions.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -18,6 +18,10 @@ class PoolError(HTTPError): self.pool = pool HTTPError.__init__(self, "%s: %s" % (pool, message)) + def __reduce__(self): + # For pickling purposes. + return self.__class__, (None, self.url) + class SSLError(HTTPError): "Raised when SSL certificate fails in an HTTPS connection." @@ -34,10 +38,16 @@ class DecodeError(HTTPError): class MaxRetryError(PoolError): "Raised when the maximum number of retries is exceeded." - def __init__(self, pool, url): + def __init__(self, pool, url, reason=None): + self.reason = reason + message = "Max retries exceeded with url: %s" % url - PoolError.__init__(self, pool, message) + if reason: + message += " (Caused by %s: %s)" % (type(reason), reason) + else: + message += " (Caused by redirect)" + PoolError.__init__(self, pool, message) self.url = url @@ -72,6 +82,6 @@ class LocationParseError(ValueError, HTTPError): def __init__(self, location): message = "Failed to parse: %s" % location - super(LocationParseError, self).__init__(self, message) + HTTPError.__init__(self, message) self.location = location diff --git a/urllib3/filepost.py b/urllib3/filepost.py index e679b93..526a740 100644 --- a/urllib3/filepost.py +++ b/urllib3/filepost.py @@ -1,5 +1,5 @@ # urllib3/filepost.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -41,13 +41,16 @@ def iter_fields(fields): def encode_multipart_formdata(fields, boundary=None): """ - Encode a dictionary of ``fields`` using the multipart/form-data mime format. + Encode a dictionary of ``fields`` using the multipart/form-data MIME format. :param fields: - Dictionary of fields or list of (key, value) field tuples. The key is - treated as the field name, and the value as the body of the form-data - bytes. If the value is a tuple of two elements, then the first element - is treated as the filename of the form-data section. + Dictionary of fields or list of (key, value) or (key, value, MIME type) + field tuples. The key is treated as the field name, and the value as + the body of the form-data bytes. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section and a suitable MIME type is guessed based on the + filename. If the value is a tuple of three elements, then the third + element is treated as an explicit MIME type of the form-data section. Field names and filenames must be unicode. @@ -63,16 +66,20 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s\r\n' % (boundary))) if isinstance(value, tuple): - filename, data = value + if len(value) == 3: + filename, data, content_type = value + else: + filename, data = value + content_type = get_content_type(filename) writer(body).write('Content-Disposition: form-data; name="%s"; ' 'filename="%s"\r\n' % (fieldname, filename)) body.write(b('Content-Type: %s\r\n\r\n' % - (get_content_type(filename)))) + (content_type,))) else: data = value writer(body).write('Content-Disposition: form-data; name="%s"\r\n' % (fieldname)) - body.write(b'Content-Type: text/plain\r\n\r\n') + body.write(b'\r\n') if isinstance(data, int): data = str(data) # Backwards compatibility @@ -86,6 +93,6 @@ def encode_multipart_formdata(fields, boundary=None): body.write(b('--%s--\r\n' % (boundary))) - content_type = b('multipart/form-data; boundary=%s' % boundary) + content_type = str('multipart/form-data; boundary=%s' % boundary) return body.getvalue(), content_type diff --git a/urllib3/packages/six.py b/urllib3/packages/six.py index a64f6fb..27d8011 100644 --- a/urllib3/packages/six.py +++ b/urllib3/packages/six.py @@ -24,7 +24,7 @@ import sys import types __author__ = "Benjamin Peterson " -__version__ = "1.1.0" +__version__ = "1.2.0" # Revision 41c74fef2ded # True if we are running on Python 3. @@ -45,19 +45,23 @@ else: text_type = unicode binary_type = str - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit + if sys.platform.startswith("java"): + # Jython always uses 32 bits. MAXSIZE = int((1 << 31) - 1) else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X + # It's possible to have sizeof(long) != sizeof(Py_ssize_t). + class X(object): + def __len__(self): + return 1 << 31 + try: + len(X()) + except OverflowError: + # 32-bit + MAXSIZE = int((1 << 31) - 1) + else: + # 64-bit + MAXSIZE = int((1 << 63) - 1) + del X def _add_doc(func, doc): @@ -132,6 +136,7 @@ class _MovedItems(types.ModuleType): _moved_attributes = [ MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), + MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), MovedAttribute("map", "itertools", "builtins", "imap", "map"), MovedAttribute("reload_module", "__builtin__", "imp", "reload"), MovedAttribute("reduce", "__builtin__", "functools"), @@ -178,7 +183,7 @@ for attr in _moved_attributes: setattr(_MovedItems, attr.name, attr) del attr -moves = sys.modules["six.moves"] = _MovedItems("moves") +moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves") def add_move(move): @@ -219,12 +224,19 @@ else: _iteritems = "iteritems" +try: + advance_iterator = next +except NameError: + def advance_iterator(it): + return it.next() +next = advance_iterator + + if PY3: def get_unbound_function(unbound): return unbound - - advance_iterator = next + Iterator = object def callable(obj): return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) @@ -232,9 +244,10 @@ else: def get_unbound_function(unbound): return unbound.im_func + class Iterator(object): - def advance_iterator(it): - return it.next() + def next(self): + return type(self).__next__(self) callable = callable _add_doc(get_unbound_function, @@ -249,15 +262,15 @@ get_function_defaults = operator.attrgetter(_func_defaults) def iterkeys(d): """Return an iterator over the keys of a dictionary.""" - return getattr(d, _iterkeys)() + return iter(getattr(d, _iterkeys)()) def itervalues(d): """Return an iterator over the values of a dictionary.""" - return getattr(d, _itervalues)() + return iter(getattr(d, _itervalues)()) def iteritems(d): """Return an iterator over the (key, value) pairs of a dictionary.""" - return getattr(d, _iteritems)() + return iter(getattr(d, _iteritems)()) if PY3: diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index 8f5b54c..ce0c248 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -1,5 +1,5 @@ # urllib3/poolmanager.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -23,6 +23,9 @@ pool_classes_by_scheme = { log = logging.getLogger(__name__) +SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', + 'ssl_version') + class PoolManager(RequestMethods): """ @@ -30,8 +33,12 @@ class PoolManager(RequestMethods): necessary connection pools for you. :param num_pools: - Number of connection pools to cache before discarding the least recently - used pool. + Number of connection pools to cache before discarding the least + recently used pool. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. :param \**connection_pool_kw: Additional parameters are used to create fresh @@ -40,19 +47,37 @@ class PoolManager(RequestMethods): Example: :: >>> manager = PoolManager(num_pools=2) - >>> r = manager.urlopen("http://google.com/") - >>> r = manager.urlopen("http://google.com/mail") - >>> r = manager.urlopen("http://yahoo.com/") + >>> r = manager.request('GET', 'http://google.com/') + >>> r = manager.request('GET', 'http://google.com/mail') + >>> r = manager.request('GET', 'http://yahoo.com/') >>> len(manager.pools) 2 """ - def __init__(self, num_pools=10, **connection_pool_kw): + def __init__(self, num_pools=10, headers=None, **connection_pool_kw): + RequestMethods.__init__(self, headers) self.connection_pool_kw = connection_pool_kw self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) + def _new_pool(self, scheme, host, port): + """ + Create a new :class:`ConnectionPool` based on host, port and scheme. + + This method is used to actually create the connection pools handed out + by :meth:`connection_from_url` and companion methods. It is intended + to be overridden for customization. + """ + pool_cls = pool_classes_by_scheme[scheme] + kwargs = self.connection_pool_kw + if scheme == 'http': + kwargs = self.connection_pool_kw.copy() + for kw in SSL_KEYWORDS: + kwargs.pop(kw, None) + + return pool_cls(host, port, **kwargs) + def clear(self): """ Empty our store of pools and direct them all to close. @@ -69,6 +94,7 @@ class PoolManager(RequestMethods): If ``port`` isn't given, it will be derived from the ``scheme`` using ``urllib3.connectionpool.port_by_scheme``. """ + scheme = scheme or 'http' port = port or port_by_scheme.get(scheme, 80) pool_key = (scheme, host, port) @@ -80,11 +106,8 @@ class PoolManager(RequestMethods): return pool # Make a fresh ConnectionPool of the desired type - pool_cls = pool_classes_by_scheme[scheme] - pool = pool_cls(host, port, **self.connection_pool_kw) - + pool = self._new_pool(scheme, host, port) self.pools[pool_key] = pool - return pool def connection_from_url(self, url): @@ -113,6 +136,8 @@ class PoolManager(RequestMethods): kw['assert_same_host'] = False kw['redirect'] = False + if 'headers' not in kw: + kw['headers'] = self.headers response = conn.urlopen(method, u.request_uri, **kw) @@ -124,32 +149,41 @@ class PoolManager(RequestMethods): method = 'GET' log.info("Redirecting %s -> %s" % (url, redirect_location)) - kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown + kw['redirect'] = redirect return self.urlopen(method, redirect_location, **kw) class ProxyManager(RequestMethods): """ Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method - will make requests to any url through the defined proxy. + will make requests to any url through the defined proxy. The ProxyManager + class will automatically set the 'Host' header if it is not provided. """ def __init__(self, proxy_pool): self.proxy_pool = proxy_pool - def _set_proxy_headers(self, headers=None): - headers = headers or {} + def _set_proxy_headers(self, url, headers=None): + """ + Sets headers needed by proxies: specifically, the Accept and Host + headers. Only sets headers not provided by the user. + """ + headers_ = {'Accept': '*/*'} + + host = parse_url(url).host + if host: + headers_['Host'] = host - # Same headers are curl passes for --proxy1.0 - headers['Accept'] = '*/*' - headers['Proxy-Connection'] = 'Keep-Alive' + if headers: + headers_.update(headers) - return headers + return headers_ def urlopen(self, method, url, **kw): "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." kw['assert_same_host'] = False - kw['headers'] = self._set_proxy_headers(kw.get('headers')) + kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers')) return self.proxy_pool.urlopen(method, url, **kw) diff --git a/urllib3/request.py b/urllib3/request.py index 569ac96..bf0256e 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -1,5 +1,5 @@ # urllib3/request.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -36,12 +36,20 @@ class RequestMethods(object): :meth:`.request` is for making any kind of request, it will look up the appropriate encoding format and use one of the above two methods to make the request. + + Initializer parameters: + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. """ _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) - _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + def __init__(self, headers=None): + self.headers = headers or {} + def urlopen(self, method, url, body=None, headers=None, encode_multipart=True, multipart_boundary=None, **kw): # Abstract @@ -97,13 +105,16 @@ class RequestMethods(object): such as with OAuth. Supports an optional ``fields`` parameter of key/value strings AND - key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + key/filetuple. A filetuple is a (filename, data, MIME type) tuple where + the MIME type is optional. For example: :: fields = { 'foo': 'bar', 'fakefile': ('foofile.txt', 'contents of foofile'), 'realfile': ('barfile.txt', open('realfile').read()), - 'nonamefile': ('contents of nonamefile field'), + 'typedfile': ('bazfile.bin', open('bazfile').read(), + 'image/jpeg'), + 'nonamefile': 'contents of nonamefile field', } When uploading a file, providing a filename (the first parameter of the @@ -121,8 +132,11 @@ class RequestMethods(object): body, content_type = (urlencode(fields or {}), 'application/x-www-form-urlencoded') - headers = headers or {} - headers.update({'Content-Type': content_type}) + if headers is None: + headers = self.headers + + headers_ = {'Content-Type': content_type} + headers_.update(headers) - return self.urlopen(method, url, body=body, headers=headers, + return self.urlopen(method, url, body=body, headers=headers_, **urlopen_kw) diff --git a/urllib3/response.py b/urllib3/response.py index 28537d3..1685760 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -1,32 +1,51 @@ # urllib3/response.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import gzip + import logging import zlib -from io import BytesIO - from .exceptions import DecodeError -from .packages.six import string_types as basestring +from .packages.six import string_types as basestring, binary_type log = logging.getLogger(__name__) -def decode_gzip(data): - gzipper = gzip.GzipFile(fileobj=BytesIO(data)) - return gzipper.read() +class DeflateDecoder(object): + + def __init__(self): + self._first_try = True + self._data = binary_type() + self._obj = zlib.decompressobj() + def __getattr__(self, name): + return getattr(self._obj, name) -def decode_deflate(data): - try: - return zlib.decompress(data) - except zlib.error: - return zlib.decompress(data, -zlib.MAX_WBITS) + def decompress(self, data): + if not self._first_try: + return self._obj.decompress(data) + + self._data += data + try: + return self._obj.decompress(data) + except zlib.error: + self._first_try = False + self._obj = zlib.decompressobj(-zlib.MAX_WBITS) + try: + return self.decompress(self._data) + finally: + self._data = None + + +def _get_decoder(mode): + if mode == 'gzip': + return zlib.decompressobj(16 + zlib.MAX_WBITS) + + return DeflateDecoder() class HTTPResponse(object): @@ -52,10 +71,7 @@ class HTTPResponse(object): otherwise unused. """ - CONTENT_DECODERS = { - 'gzip': decode_gzip, - 'deflate': decode_deflate, - } + CONTENT_DECODERS = ['gzip', 'deflate'] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, @@ -65,8 +81,9 @@ class HTTPResponse(object): self.version = version self.reason = reason self.strict = strict + self.decode_content = decode_content - self._decode_content = decode_content + self._decoder = None self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response @@ -115,13 +132,13 @@ class HTTPResponse(object): parameters: ``decode_content`` and ``cache_content``. :param amt: - How much of the content to read. If specified, decoding and caching - is skipped because we can't decode partial content nor does it make - sense to cache partial content as the full response. + How much of the content to read. If specified, caching is skipped + because it doesn't make sense to cache partial content as the full + response. :param decode_content: If True, will attempt to decode the body based on the - 'content-encoding' header. (Overridden if ``amt`` is set.) + 'content-encoding' header. :param cache_content: If True, will save the returned data such that the same result is @@ -130,28 +147,50 @@ class HTTPResponse(object): after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ - content_encoding = self.headers.get('content-encoding') - decoder = self.CONTENT_DECODERS.get(content_encoding) + # Note: content-encoding value should be case-insensitive, per RFC 2616 + # Section 3.5 + content_encoding = self.headers.get('content-encoding', '').lower() + if self._decoder is None: + if content_encoding in self.CONTENT_DECODERS: + self._decoder = _get_decoder(content_encoding) if decode_content is None: - decode_content = self._decode_content + decode_content = self.decode_content if self._fp is None: return + flush_decoder = False + try: if amt is None: # cStringIO doesn't like amt=None data = self._fp.read() + flush_decoder = True else: - return self._fp.read(amt) + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do not + # properly close the connection in all cases. There is no harm + # in redundantly calling close. + self._fp.close() + flush_decoder = True try: - if decode_content and decoder: - data = decoder(data) + if decode_content and self._decoder: + data = self._decoder.decompress(data) except (IOError, zlib.error): raise DecodeError("Received response with content-encoding: %s, but " "failed to decode it." % content_encoding) + if flush_decoder and self._decoder: + buf = self._decoder.decompress(binary_type()) + data += buf + self._decoder.flush() + if cache_content: self._body = data diff --git a/urllib3/util.py b/urllib3/util.py index 8ec990b..544f9ed 100644 --- a/urllib3/util.py +++ b/urllib3/util.py @@ -1,5 +1,5 @@ # urllib3/util.py -# Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -8,18 +8,32 @@ from base64 import b64encode from collections import namedtuple from socket import error as SocketError +from hashlib import md5, sha1 +from binascii import hexlify, unhexlify try: from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms +except ImportError: # `poll` doesn't exist on OSX and other platforms poll = False try: from select import select - except ImportError: # `select` doesn't exist on AppEngine. + except ImportError: # `select` doesn't exist on AppEngine. select = False +try: # Test for SSL features + SSLContext = None + HAS_SNI = False + + import ssl + from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 + from ssl import SSLContext # Modern SSL? + from ssl import HAS_SNI # Has SNI? +except ImportError: + pass + + from .packages import six -from .exceptions import LocationParseError +from .exceptions import LocationParseError, SSLError class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): @@ -92,9 +106,9 @@ def parse_url(url): >>> parse_url('http://google.com/mail/') Url(scheme='http', host='google.com', port=None, path='/', ...) - >>> prase_url('google.com:80') + >>> parse_url('google.com:80') Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> prase_url('/foo?bar') + >>> parse_url('/foo?bar') Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) """ @@ -220,7 +234,7 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, return headers -def is_connection_dropped(conn): +def is_connection_dropped(conn): # Platform-specific """ Returns True if the connection is dropped and should be closed. @@ -234,7 +248,7 @@ def is_connection_dropped(conn): if not sock: # Platform-specific: AppEngine return False - if not poll: # Platform-specific + if not poll: if not select: # Platform-specific: AppEngine return False @@ -250,3 +264,115 @@ def is_connection_dropped(conn): if fno == sock.fileno(): # Either data is buffered (bad), or the connection is dropped. return True + + +def resolve_cert_reqs(candidate): + """ + Resolves the argument to a numeric constant, which can be passed to + the wrap_socket function/method from the ssl module. + Defaults to :data:`ssl.CERT_NONE`. + If given a string it is assumed to be the name of the constant in the + :mod:`ssl` module or its abbrevation. + (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. + If it's neither `None` nor a string we assume it is already the numeric + constant which can directly be passed to wrap_socket. + """ + if candidate is None: + return CERT_NONE + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'CERT_' + candidate) + return res + + return candidate + + +def resolve_ssl_version(candidate): + """ + like resolve_cert_reqs + """ + if candidate is None: + return PROTOCOL_SSLv23 + + if isinstance(candidate, str): + res = getattr(ssl, candidate, None) + if res is None: + res = getattr(ssl, 'PROTOCOL_' + candidate) + return res + + return candidate + + +def assert_fingerprint(cert, fingerprint): + """ + Checks if given fingerprint matches the supplied certificate. + + :param cert: + Certificate as bytes object. + :param fingerprint: + Fingerprint as string of hexdigits, can be interspersed by colons. + """ + + # Maps the length of a digest to a possible hash function producing + # this digest. + hashfunc_map = { + 16: md5, + 20: sha1 + } + + fingerprint = fingerprint.replace(':', '').lower() + + digest_length, rest = divmod(len(fingerprint), 2) + + if rest or digest_length not in hashfunc_map: + raise SSLError('Fingerprint is of invalid length.') + + # We need encode() here for py32; works on py2 and p33. + fingerprint_bytes = unhexlify(fingerprint.encode()) + + hashfunc = hashfunc_map[digest_length] + + cert_digest = hashfunc(cert).digest() + + if not cert_digest == fingerprint_bytes: + raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' + .format(hexlify(fingerprint_bytes), + hexlify(cert_digest))) + + +if SSLContext is not None: # Python 3.2+ + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + """ + All arguments except `server_hostname` have the same meaning as for + :func:`ssl.wrap_socket` + + :param server_hostname: + Hostname of the expected certificate + """ + context = SSLContext(ssl_version) + context.verify_mode = cert_reqs + if ca_certs: + try: + context.load_verify_locations(ca_certs) + # Py32 raises IOError + # Py33 raises FileNotFoundError + except Exception as e: # Reraise as SSLError + raise SSLError(e) + if certfile: + # FIXME: This block needs a test. + context.load_cert_chain(certfile, keyfile) + if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI + return context.wrap_socket(sock, server_hostname=server_hostname) + return context.wrap_socket(sock) + +else: # Python 3.1 and earlier + def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, + ca_certs=None, server_hostname=None, + ssl_version=None): + return wrap_socket(sock, keyfile=keyfile, certfile=certfile, + ca_certs=ca_certs, cert_reqs=cert_reqs, + ssl_version=ssl_version) -- cgit v1.2.3