diff options
Diffstat (limited to 'urllib3')
-rw-r--r-- | urllib3/__init__.py | 4 | ||||
-rw-r--r-- | urllib3/_collections.py | 58 | ||||
-rw-r--r-- | urllib3/connection.py | 14 | ||||
-rw-r--r-- | urllib3/connectionpool.py | 45 | ||||
-rw-r--r-- | urllib3/contrib/appengine.py | 222 | ||||
-rw-r--r-- | urllib3/contrib/pyopenssl.py | 26 | ||||
-rw-r--r-- | urllib3/exceptions.py | 21 | ||||
-rw-r--r-- | urllib3/poolmanager.py | 6 | ||||
-rw-r--r-- | urllib3/request.py | 12 | ||||
-rw-r--r-- | urllib3/response.py | 154 | ||||
-rw-r--r-- | urllib3/util/connection.py | 2 | ||||
-rw-r--r-- | urllib3/util/response.py | 54 | ||||
-rw-r--r-- | urllib3/util/retry.py | 2 | ||||
-rw-r--r-- | urllib3/util/ssl_.py | 31 |
14 files changed, 508 insertions, 143 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py index f48ac4a..747d09a 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.10.4' +__version__ = '1.11' from .connectionpool import ( @@ -58,6 +58,8 @@ del NullHandler import warnings # SecurityWarning's always go off by default. warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter('default', exceptions.SubjectAltNameWarning) # InsecurePlatformWarning's don't vary between requests, so we keep it default. warnings.simplefilter('default', exceptions.InsecurePlatformWarning, append=True) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 279416c..b68b9a5 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -97,14 +97,7 @@ class RecentlyUsedContainer(MutableMapping): return list(iterkeys(self._container)) -_dict_setitem = dict.__setitem__ -_dict_getitem = dict.__getitem__ -_dict_delitem = dict.__delitem__ -_dict_contains = dict.__contains__ -_dict_setdefault = dict.setdefault - - -class HTTPHeaderDict(dict): +class HTTPHeaderDict(MutableMapping): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -139,7 +132,8 @@ class HTTPHeaderDict(dict): """ def __init__(self, headers=None, **kwargs): - dict.__init__(self) + super(HTTPHeaderDict, self).__init__() + self._container = {} if headers is not None: if isinstance(headers, HTTPHeaderDict): self._copy_from(headers) @@ -149,38 +143,44 @@ class HTTPHeaderDict(dict): self.extend(kwargs) def __setitem__(self, key, val): - return _dict_setitem(self, key.lower(), (key, val)) + self._container[key.lower()] = (key, val) + return self._container[key.lower()] def __getitem__(self, key): - val = _dict_getitem(self, key.lower()) + val = self._container[key.lower()] return ', '.join(val[1:]) def __delitem__(self, key): - return _dict_delitem(self, key.lower()) + del self._container[key.lower()] def __contains__(self, key): - return _dict_contains(self, key.lower()) + return key.lower() in self._container def __eq__(self, other): if not isinstance(other, Mapping) and not hasattr(other, 'keys'): return False if not isinstance(other, type(self)): other = type(self)(other) - return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other) + return (dict((k.lower(), v) for k, v in self.itermerged()) == + dict((k.lower(), v) for k, v in other.itermerged())) def __ne__(self, other): return not self.__eq__(other) - values = MutableMapping.values - get = MutableMapping.get - update = MutableMapping.update - if not PY3: # Python 2 iterkeys = MutableMapping.iterkeys itervalues = MutableMapping.itervalues __marker = object() + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + def pop(self, key, default=__marker): '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. If key is not found, d is returned if given, otherwise KeyError is raised. @@ -216,7 +216,7 @@ class HTTPHeaderDict(dict): key_lower = key.lower() new_vals = key, val # Keep the common case aka no item present as fast as possible - vals = _dict_setdefault(self, key_lower, new_vals) + vals = self._container.setdefault(key_lower, new_vals) if new_vals is not vals: # new_vals was not inserted, as there was a previous one if isinstance(vals, list): @@ -225,7 +225,7 @@ class HTTPHeaderDict(dict): else: # vals should be a tuple then, i.e. only one item so far # Need to convert the tuple to list for further extension - _dict_setitem(self, key_lower, [vals[0], vals[1], val]) + self._container[key_lower] = [vals[0], vals[1], val] def extend(self, *args, **kwargs): """Generic import function for any type of header-like object. @@ -236,7 +236,7 @@ class HTTPHeaderDict(dict): raise TypeError("extend() takes at most 1 positional " "arguments ({} given)".format(len(args))) other = args[0] if len(args) >= 1 else () - + if isinstance(other, HTTPHeaderDict): for key, val in other.iteritems(): self.add(key, val) @@ -257,7 +257,7 @@ class HTTPHeaderDict(dict): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" try: - vals = _dict_getitem(self, key.lower()) + vals = self._container[key.lower()] except KeyError: return [] else: @@ -276,11 +276,11 @@ class HTTPHeaderDict(dict): def _copy_from(self, other): for key in other: - val = _dict_getitem(other, key) + val = other.getlist(key) if isinstance(val, list): # Don't need to convert tuples val = list(val) - _dict_setitem(self, key, val) + self._container[key.lower()] = [key] + val def copy(self): clone = type(self)() @@ -290,14 +290,14 @@ class HTTPHeaderDict(dict): def iteritems(self): """Iterate over all header lines, including duplicate ones.""" for key in self: - vals = _dict_getitem(self, key) + vals = self._container[key.lower()] for val in vals[1:]: yield vals[0], val def itermerged(self): """Iterate over all headers, merging duplicate ones together.""" for key in self: - val = _dict_getitem(self, key) + val = self._container[key.lower()] yield val[0], ', '.join(val[1:]) def items(self): @@ -307,16 +307,16 @@ class HTTPHeaderDict(dict): def from_httplib(cls, message): # Python 2 """Read headers from a Python 2 httplib message object.""" # python2.7 does not expose a proper API for exporting multiheaders - # efficiently. This function re-reads raw lines from the message + # efficiently. This function re-reads raw lines from the message # object and extracts the multiheaders properly. headers = [] - + for line in message.headers: if line.startswith((' ', '\t')): key, value = headers[-1] headers[-1] = (key, value + '\r\n' + line.rstrip()) continue - + key, value = line.split(':', 1) headers.append((key, value.strip())) diff --git a/urllib3/connection.py b/urllib3/connection.py index 2a8c359..f64dd1a 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -38,7 +38,7 @@ except NameError: # Python 2: from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, - SecurityWarning, + SubjectAltNameWarning, ) from .packages.ssl_match_hostname import match_hostname @@ -192,6 +192,9 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs=None, ca_certs=None, assert_hostname=None, assert_fingerprint=None): + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -245,10 +248,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert = self.sock.getpeercert() if not cert.get('subjectAltName', ()): warnings.warn(( - 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' - 'This feature is being removed by major browsers and deprecated by RFC 2818. ' - '(See https://github.com/shazow/urllib3/issues/497 for details.)'), - SecurityWarning + 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' + '`commonName` for now. This feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' + 'for details.)'.format(hostname)), + SubjectAltNameWarning ) match_hostname(cert, self.assert_hostname or hostname) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 117269a..c958725 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -17,6 +17,7 @@ from .exceptions import ( ClosedPoolError, ProtocolError, EmptyPoolError, + HeaderParsingError, HostChangedError, LocationValueError, MaxRetryError, @@ -38,9 +39,10 @@ from .request import RequestMethods from .response import HTTPResponse from .util.connection import is_connection_dropped +from .util.response import assert_header_parsing from .util.retry import Retry from .util.timeout import Timeout -from .util.url import get_host +from .util.url import get_host, Url xrange = six.moves.xrange @@ -120,7 +122,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param maxsize: Number of connections to save that can be reused. More than 1 is useful - in multithreaded situations. If ``block`` is set to false, more + in multithreaded situations. If ``block`` is set to False, more connections will be created but they will not be saved once they've been used. @@ -381,8 +383,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) + + try: + assert_header_parsing(httplib_response.msg) + except HeaderParsingError as hpe: # Platform-specific: Python 3 + log.warning( + 'Failed to parse headers (url=%s): %s', + self._absolute_url(url), hpe, exc_info=True) + return httplib_response + def _absolute_url(self, path): + return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + def close(self): """ Close all pooled connections and disable the pool. @@ -409,7 +422,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) - + # Use explicit default port for comparison when none is given if self.port and not port: port = port_by_scheme.get(scheme) @@ -568,25 +581,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Close the connection. If a connection is reused on which there # was a Certificate error, the next request will certainly raise # another Certificate error. - if conn: - conn.close() - conn = None + conn = conn and conn.close() + release_conn = True raise SSLError(e) except SSLError: # Treat SSLError separately from BaseSSLError to preserve # traceback. - if conn: - conn.close() - conn = None + conn = conn and conn.close() + release_conn = True raise except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: - if conn: - # Discard the connection for these exceptions. It will be - # be replaced during the next _get_conn() call. - conn.close() - conn = None + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn = conn and conn.close() + release_conn = True if isinstance(e, SocketError) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) @@ -626,6 +636,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): retries = retries.increment(method, url, response=response, _pool=self) except MaxRetryError: if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() raise return response @@ -683,6 +696,10 @@ class HTTPSConnectionPool(HTTPConnectionPool): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, retries, _proxy, _proxy_headers, **conn_kw) + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs diff --git a/urllib3/contrib/appengine.py b/urllib3/contrib/appengine.py new file mode 100644 index 0000000..ed9d8b8 --- /dev/null +++ b/urllib3/contrib/appengine.py @@ -0,0 +1,222 @@ +import logging +import os +import warnings + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + TimeoutError, + SSLError +) + +from ..packages.six import BytesIO +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.timeout import Timeout +from ..util.retry import Retry + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None + + +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation here: + + https://cloud.google.com/appengine/docs/python/urlfetch + + Notably it will raise an AppEnginePlatformError if: + * URLFetch is not available. + * If you attempt to use this on GAEv2 (Managed VMs), as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabtyes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__(self, headers=None, retries=None, validate_certificate=True): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment.") + + if is_prod_appengine_v2(): + raise AppEnginePlatformError( + "Use normal urllib3.PoolManager instead of AppEngineManager" + "on Managed VMs, as using URLFetch is not necessary in " + "this environment.") + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.org/en/latest/contrib.html.", + AppEnginePlatformWarning) + + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen(self, method, url, body=None, headers=None, + retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw): + + retries = self._get_retries(retries, redirect) + + try: + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=( + redirect and + retries.redirect != 0 and + retries.total), + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if 'too large' in e.message: + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", e) + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if 'Too many redirects' in e.message: + raise MaxRetryError(self, url, reason=e) + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", e) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e) + + http_response = self._urlfetch_response_to_http_response( + response, **response_kw) + + # Check for redirect response + if (http_response.get_redirect_location() and + retries.raise_on_redirect and redirect): + raise MaxRetryError(self, url, "too many redirects") + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=http_response.status): + retries = retries.increment( + method, url, response=http_response, _pool=self) + log.info("Forced retry: %s" % url) + retries.sleep() + return self.urlopen( + method, url, + body=body, headers=headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + return http_response + + def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): + + if is_prod_appengine_v1(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get('content-encoding') + + if content_encoding == 'deflate': + del urlfetch_resp.headers['content-encoding'] + + return HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return 5 # 5s is the default timeout for URLFetch. + if isinstance(timeout, Timeout): + if not timeout.read is timeout.connect: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total timeout.", AppEnginePlatformWarning) + return timeout.total + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, redirect=redirect, default=self.retries) + + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning) + + return retries + + +def is_appengine(): + return (is_local_appengine() or + is_prod_appengine_v1() or + is_prod_appengine_v2()) + + +def is_appengine_sandbox(): + return is_appengine() and not is_prod_appengine_v2() + + +def is_local_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE']) + + +def is_prod_appengine_v1(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not is_prod_appengine_v2()) + + +def is_prod_appengine_v2(): + return os.environ.get('GAE_VM', False) == 'true' diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index b2c34a8..19c5b4e 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -85,6 +85,14 @@ _openssl_verify = { DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 + +try: + _ = memoryview + has_memoryview = True +except NameError: + has_memoryview = False orig_util_HAS_SNI = util.HAS_SNI orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket @@ -204,13 +212,21 @@ class WrappedSocket(object): continue def sendall(self, data): - while len(data): - sent = self._send_until_done(data) - data = data[sent:] + if has_memoryview and not isinstance(data, memoryview): + data = memoryview(data) + + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done(data[total_sent:total_sent+SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + # FIXME rethrow compatible exceptions should we ever use this + self.connection.shutdown() def close(self): if self._makefile_refs < 1: - return self.connection.shutdown() + return self.connection.close() else: self._makefile_refs -= 1 @@ -287,7 +303,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, raise timeout('select timed out') continue except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + raise ssl.SSLError('bad handshake: %r' % e) break return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 31bda1c..36ce0d1 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -149,6 +149,11 @@ class SecurityWarning(HTTPWarning): pass +class SubjectAltNameWarning(SecurityWarning): + "Warned when connecting to a host with a certificate missing a SAN." + pass + + class InsecureRequestWarning(SecurityWarning): "Warned when making an unverified HTTPS request." pass @@ -167,3 +172,19 @@ class InsecurePlatformWarning(SecurityWarning): class ResponseNotChunked(ProtocolError, ValueError): "Response needs to be chunked in order to read it as chunks." pass + + +class ProxySchemeUnknown(AssertionError, ValueError): + "ProxyManager does not support the supplied scheme" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + + def __init__(self, scheme): + message = "Not supported proxy scheme %s" % scheme + super(ProxySchemeUnknown, self).__init__(message) + + +class HeaderParsingError(HTTPError): + "Raised by assert_header_parsing, but we convert it to a log.warning statement." + def __init__(self, defects, unparsed_data): + message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) + super(HeaderParsingError, self).__init__(message) diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index b8d1e74..76b6a12 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -8,7 +8,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError, MaxRetryError +from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -227,8 +227,8 @@ class ProxyManager(PoolManager): port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) - assert proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % proxy.scheme + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) self.proxy = proxy self.proxy_headers = proxy_headers or {} diff --git a/urllib3/request.py b/urllib3/request.py index b08d6c9..a1a12bc 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -71,14 +71,22 @@ class RequestMethods(object): headers=headers, **urlopen_kw) - def request_encode_url(self, method, url, fields=None, **urlopen_kw): + def request_encode_url(self, method, url, fields=None, headers=None, + **urlopen_kw): """ Make a request using :meth:`urlopen` with the ``fields`` encoded in the url. This is useful for request methods like GET, HEAD, DELETE, etc. """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': headers} + extra_kw.update(urlopen_kw) + if fields: url += '?' + urlencode(fields) - return self.urlopen(method, url, **urlopen_kw) + + return self.urlopen(method, url, **extra_kw) def request_encode_body(self, method, url, fields=None, headers=None, encode_multipart=True, multipart_boundary=None, diff --git a/urllib3/response.py b/urllib3/response.py index 24140c4..15d4aac 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -2,6 +2,7 @@ try: import http.client as httplib except ImportError: import httplib +from contextlib import contextmanager import zlib import io from socket import timeout as SocketTimeout @@ -12,7 +13,7 @@ from .exceptions import ( ) from .packages.six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError -from .util.response import is_fp_closed +from .util.response import is_fp_closed, is_response_to_head class DeflateDecoder(object): @@ -202,6 +203,47 @@ class HTTPResponse(io.IOBase): return data + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + except Exception: + # The response may not be closed but we're not going to use it anymore + # so close it now to ensure that the connection is released back to the pool. + if self._original_response and not self._original_response.isclosed(): + self._original_response.close() + + raise + finally: + if self._original_response and self._original_response.isclosed(): + self.release_conn() + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -231,45 +273,28 @@ class HTTPResponse(io.IOBase): return flush_decoder = False - - try: - try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() + data = None + + with self._error_catcher(): + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do - # not properly close the connection in all cases. There is - # no harm in redundantly calling close. - self._fp.close() - flush_decoder = True - - except SocketTimeout: - # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but - # there is yet no clean way to get at it from this context. - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except BaseSSLError as e: - # FIXME: Is there a better way to differentiate between SSLErrors? - if 'read operation timed out' not in str(e): # Defensive: - # This shouldn't happen but just in case we're missing an edge - # case, let's avoid swallowing SSL errors. - raise - - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except HTTPException as e: - # This includes IncompleteRead. - raise ProtocolError('Connection broken: %r' % e, e) + if data: self._fp_bytes_read += len(data) data = self._decode(data, decode_content, flush_decoder) @@ -277,11 +302,8 @@ class HTTPResponse(io.IOBase): if cache_content: self._body = data - return data + return data - finally: - if self._original_response and self._original_response.isclosed(): - self.release_conn() def stream(self, amt=2**16, decode_content=None): """ @@ -319,6 +341,7 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ headers = r.msg + if not isinstance(headers, HTTPHeaderDict): if PY3: # Python 3 headers = HTTPHeaderDict(headers.items()) @@ -437,30 +460,29 @@ class HTTPResponse(io.IOBase): raise ResponseNotChunked("Response is not chunked. " "Header 'transfer-encoding: chunked' is missing.") - if self._original_response and self._original_response._method.upper() == 'HEAD': - # Don't bother reading the body of a HEAD request. - # FIXME: Can we do this somehow without accessing private httplib _method? + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): self._original_response.close() return - while True: - self._update_chunk_length() - if self.chunk_left == 0: - break - chunk = self._handle_chunk(amt) - yield self._decode(chunk, decode_content=decode_content, - flush_decoder=True) - - # Chunk content ends with \r\n: discard it. - while True: - line = self._fp.fp.readline() - if not line: - # Some sites may not end with '\r\n'. - break - if line == b'\r\n': - break - - # We read everything; close the "file". - if self._original_response: - self._original_response.close() - self.release_conn() + with self._error_catcher(): + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 859aec6..9ed5a64 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -60,6 +60,8 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, """ host, port = address + if host.startswith('['): + host = host.strip('[]') err = None for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res diff --git a/urllib3/util/response.py b/urllib3/util/response.py index 45fff55..767ee15 100644 --- a/urllib3/util/response.py +++ b/urllib3/util/response.py @@ -1,3 +1,11 @@ +try: + import http.client as httplib +except ImportError: + import httplib + +from ..exceptions import HeaderParsingError + + def is_fp_closed(obj): """ Checks whether a given file-like object is closed. @@ -20,3 +28,49 @@ def is_fp_closed(obj): pass raise ValueError("Unable to determine whether fp is closed.") + + +def assert_header_parsing(headers): + """ + Asserts whether all headers have been successfully parsed. + Extracts encountered errors from the result of parsing headers. + + Only works on Python 3. + + :param headers: Headers to verify. + :type headers: `httplib.HTTPMessage`. + + :raises urllib3.exceptions.HeaderParsingError: + If parsing errors are found. + """ + + # This will fail silently if we pass in the wrong kind of parameter. + # To make debugging easier add an explicit check. + if not isinstance(headers, httplib.HTTPMessage): + raise TypeError('expected httplib.Message, got {}.'.format( + type(headers))) + + defects = getattr(headers, 'defects', None) + get_payload = getattr(headers, 'get_payload', None) + + unparsed_data = None + if get_payload: # Platform-specific: Python 3. + unparsed_data = get_payload() + + if defects or unparsed_data: + raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) + + +def is_response_to_head(response): + """ + Checks, wether a the request of a response has been a HEAD-request. + Handles the quirks of AppEngine. + + :param conn: + :type conn: :class:`httplib.HTTPResponse` + """ + # FIXME: Can we do this somehow without accessing private httplib _method? + method = response._method + if isinstance(method, int): # Platform-specific: Appengine + return method == 3 + return method.upper() == 'HEAD' diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index 7e0959d..1fb1f23 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -94,7 +94,7 @@ class Retry(object): seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer - than :attr:`Retry.MAX_BACKOFF`. + than :attr:`Retry.BACKOFF_MAX`. By default, backoff is disabled (set to 0). diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index b846d42..311378b 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -8,6 +8,13 @@ SSLContext = None HAS_SNI = False create_default_context = None +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = { + 32: md5, + 40: sha1, + 64: sha256, +} + import errno import warnings @@ -112,31 +119,21 @@ def assert_fingerprint(cert, fingerprint): Fingerprint as string of hexdigits, can be interspersed by colons. """ - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1, - 32: sha256, - } - fingerprint = fingerprint.replace(':', '').lower() - digest_length, odd = divmod(len(fingerprint), 2) - - if odd or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') + digest_length = len(fingerprint) + hashfunc = HASHFUNC_MAP.get(digest_length) + if not hashfunc: + raise SSLError( + 'Fingerprint of invalid length: {0}'.format(fingerprint)) # We need encode() here for py32; works on py2 and p33. fingerprint_bytes = unhexlify(fingerprint.encode()) - hashfunc = hashfunc_map[digest_length] - cert_digest = hashfunc(cert).digest() - if not cert_digest == fingerprint_bytes: + if cert_digest != fingerprint_bytes: raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) + .format(fingerprint, hexlify(cert_digest))) def resolve_cert_reqs(candidate): |