aboutsummaryrefslogtreecommitdiff
path: root/urllib3
diff options
context:
space:
mode:
Diffstat (limited to 'urllib3')
-rw-r--r--urllib3/__init__.py4
-rw-r--r--urllib3/_collections.py58
-rw-r--r--urllib3/connection.py14
-rw-r--r--urllib3/connectionpool.py45
-rw-r--r--urllib3/contrib/appengine.py222
-rw-r--r--urllib3/contrib/pyopenssl.py26
-rw-r--r--urllib3/exceptions.py21
-rw-r--r--urllib3/poolmanager.py6
-rw-r--r--urllib3/request.py12
-rw-r--r--urllib3/response.py154
-rw-r--r--urllib3/util/connection.py2
-rw-r--r--urllib3/util/response.py54
-rw-r--r--urllib3/util/retry.py2
-rw-r--r--urllib3/util/ssl_.py31
14 files changed, 508 insertions, 143 deletions
diff --git a/urllib3/__init__.py b/urllib3/__init__.py
index f48ac4a..747d09a 100644
--- a/urllib3/__init__.py
+++ b/urllib3/__init__.py
@@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using.
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
-__version__ = '1.10.4'
+__version__ = '1.11'
from .connectionpool import (
@@ -58,6 +58,8 @@ del NullHandler
import warnings
# SecurityWarning's always go off by default.
warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
+# SubjectAltNameWarning's should go off once per host
+warnings.simplefilter('default', exceptions.SubjectAltNameWarning)
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
append=True)
diff --git a/urllib3/_collections.py b/urllib3/_collections.py
index 279416c..b68b9a5 100644
--- a/urllib3/_collections.py
+++ b/urllib3/_collections.py
@@ -97,14 +97,7 @@ class RecentlyUsedContainer(MutableMapping):
return list(iterkeys(self._container))
-_dict_setitem = dict.__setitem__
-_dict_getitem = dict.__getitem__
-_dict_delitem = dict.__delitem__
-_dict_contains = dict.__contains__
-_dict_setdefault = dict.setdefault
-
-
-class HTTPHeaderDict(dict):
+class HTTPHeaderDict(MutableMapping):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
@@ -139,7 +132,8 @@ class HTTPHeaderDict(dict):
"""
def __init__(self, headers=None, **kwargs):
- dict.__init__(self)
+ super(HTTPHeaderDict, self).__init__()
+ self._container = {}
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
@@ -149,38 +143,44 @@ class HTTPHeaderDict(dict):
self.extend(kwargs)
def __setitem__(self, key, val):
- return _dict_setitem(self, key.lower(), (key, val))
+ self._container[key.lower()] = (key, val)
+ return self._container[key.lower()]
def __getitem__(self, key):
- val = _dict_getitem(self, key.lower())
+ val = self._container[key.lower()]
return ', '.join(val[1:])
def __delitem__(self, key):
- return _dict_delitem(self, key.lower())
+ del self._container[key.lower()]
def __contains__(self, key):
- return _dict_contains(self, key.lower())
+ return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, 'keys'):
return False
if not isinstance(other, type(self)):
other = type(self)(other)
- return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other)
+ return (dict((k.lower(), v) for k, v in self.itermerged()) ==
+ dict((k.lower(), v) for k, v in other.itermerged()))
def __ne__(self, other):
return not self.__eq__(other)
- values = MutableMapping.values
- get = MutableMapping.get
- update = MutableMapping.update
-
if not PY3: # Python 2
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
+ def __len__(self):
+ return len(self._container)
+
+ def __iter__(self):
+ # Only provide the originally cased names
+ for vals in self._container.values():
+ yield vals[0]
+
def pop(self, key, default=__marker):
'''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
@@ -216,7 +216,7 @@ class HTTPHeaderDict(dict):
key_lower = key.lower()
new_vals = key, val
# Keep the common case aka no item present as fast as possible
- vals = _dict_setdefault(self, key_lower, new_vals)
+ vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# new_vals was not inserted, as there was a previous one
if isinstance(vals, list):
@@ -225,7 +225,7 @@ class HTTPHeaderDict(dict):
else:
# vals should be a tuple then, i.e. only one item so far
# Need to convert the tuple to list for further extension
- _dict_setitem(self, key_lower, [vals[0], vals[1], val])
+ self._container[key_lower] = [vals[0], vals[1], val]
def extend(self, *args, **kwargs):
"""Generic import function for any type of header-like object.
@@ -236,7 +236,7 @@ class HTTPHeaderDict(dict):
raise TypeError("extend() takes at most 1 positional "
"arguments ({} given)".format(len(args)))
other = args[0] if len(args) >= 1 else ()
-
+
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
@@ -257,7 +257,7 @@ class HTTPHeaderDict(dict):
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
- vals = _dict_getitem(self, key.lower())
+ vals = self._container[key.lower()]
except KeyError:
return []
else:
@@ -276,11 +276,11 @@ class HTTPHeaderDict(dict):
def _copy_from(self, other):
for key in other:
- val = _dict_getitem(other, key)
+ val = other.getlist(key)
if isinstance(val, list):
# Don't need to convert tuples
val = list(val)
- _dict_setitem(self, key, val)
+ self._container[key.lower()] = [key] + val
def copy(self):
clone = type(self)()
@@ -290,14 +290,14 @@ class HTTPHeaderDict(dict):
def iteritems(self):
"""Iterate over all header lines, including duplicate ones."""
for key in self:
- vals = _dict_getitem(self, key)
+ vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self):
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
- val = _dict_getitem(self, key)
+ val = self._container[key.lower()]
yield val[0], ', '.join(val[1:])
def items(self):
@@ -307,16 +307,16 @@ class HTTPHeaderDict(dict):
def from_httplib(cls, message): # Python 2
"""Read headers from a Python 2 httplib message object."""
# python2.7 does not expose a proper API for exporting multiheaders
- # efficiently. This function re-reads raw lines from the message
+ # efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
headers = []
-
+
for line in message.headers:
if line.startswith((' ', '\t')):
key, value = headers[-1]
headers[-1] = (key, value + '\r\n' + line.rstrip())
continue
-
+
key, value = line.split(':', 1)
headers.append((key, value.strip()))
diff --git a/urllib3/connection.py b/urllib3/connection.py
index 2a8c359..f64dd1a 100644
--- a/urllib3/connection.py
+++ b/urllib3/connection.py
@@ -38,7 +38,7 @@ except NameError: # Python 2:
from .exceptions import (
ConnectTimeoutError,
SystemTimeWarning,
- SecurityWarning,
+ SubjectAltNameWarning,
)
from .packages.ssl_match_hostname import match_hostname
@@ -192,6 +192,9 @@ class VerifiedHTTPSConnection(HTTPSConnection):
cert_reqs=None, ca_certs=None,
assert_hostname=None, assert_fingerprint=None):
+ if ca_certs and cert_reqs is None:
+ cert_reqs = 'CERT_REQUIRED'
+
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
@@ -245,10 +248,11 @@ class VerifiedHTTPSConnection(HTTPSConnection):
cert = self.sock.getpeercert()
if not cert.get('subjectAltName', ()):
warnings.warn((
- 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. '
- 'This feature is being removed by major browsers and deprecated by RFC 2818. '
- '(See https://github.com/shazow/urllib3/issues/497 for details.)'),
- SecurityWarning
+ 'Certificate for {0} has no `subjectAltName`, falling back to check for a '
+ '`commonName` for now. This feature is being removed by major browsers and '
+ 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 '
+ 'for details.)'.format(hostname)),
+ SubjectAltNameWarning
)
match_hostname(cert, self.assert_hostname or hostname)
diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py
index 117269a..c958725 100644
--- a/urllib3/connectionpool.py
+++ b/urllib3/connectionpool.py
@@ -17,6 +17,7 @@ from .exceptions import (
ClosedPoolError,
ProtocolError,
EmptyPoolError,
+ HeaderParsingError,
HostChangedError,
LocationValueError,
MaxRetryError,
@@ -38,9 +39,10 @@ from .request import RequestMethods
from .response import HTTPResponse
from .util.connection import is_connection_dropped
+from .util.response import assert_header_parsing
from .util.retry import Retry
from .util.timeout import Timeout
-from .util.url import get_host
+from .util.url import get_host, Url
xrange = six.moves.xrange
@@ -120,7 +122,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param maxsize:
Number of connections to save that can be reused. More than 1 is useful
- in multithreaded situations. If ``block`` is set to false, more
+ in multithreaded situations. If ``block`` is set to False, more
connections will be created but they will not be saved once they've
been used.
@@ -381,8 +383,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
log.debug("\"%s %s %s\" %s %s" % (method, url, http_version,
httplib_response.status,
httplib_response.length))
+
+ try:
+ assert_header_parsing(httplib_response.msg)
+ except HeaderParsingError as hpe: # Platform-specific: Python 3
+ log.warning(
+ 'Failed to parse headers (url=%s): %s',
+ self._absolute_url(url), hpe, exc_info=True)
+
return httplib_response
+ def _absolute_url(self, path):
+ return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
+
def close(self):
"""
Close all pooled connections and disable the pool.
@@ -409,7 +422,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# TODO: Add optional support for socket.gethostbyname checking.
scheme, host, port = get_host(url)
-
+
# Use explicit default port for comparison when none is given
if self.port and not port:
port = port_by_scheme.get(scheme)
@@ -568,25 +581,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Close the connection. If a connection is reused on which there
# was a Certificate error, the next request will certainly raise
# another Certificate error.
- if conn:
- conn.close()
- conn = None
+ conn = conn and conn.close()
+ release_conn = True
raise SSLError(e)
except SSLError:
# Treat SSLError separately from BaseSSLError to preserve
# traceback.
- if conn:
- conn.close()
- conn = None
+ conn = conn and conn.close()
+ release_conn = True
raise
except (TimeoutError, HTTPException, SocketError, ConnectionError) as e:
- if conn:
- # Discard the connection for these exceptions. It will be
- # be replaced during the next _get_conn() call.
- conn.close()
- conn = None
+ # Discard the connection for these exceptions. It will be
+ # be replaced during the next _get_conn() call.
+ conn = conn and conn.close()
+ release_conn = True
if isinstance(e, SocketError) and self.proxy:
e = ProxyError('Cannot connect to proxy.', e)
@@ -626,6 +636,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
retries = retries.increment(method, url, response=response, _pool=self)
except MaxRetryError:
if retries.raise_on_redirect:
+ # Release the connection for this response, since we're not
+ # returning it to be released manually.
+ response.release_conn()
raise
return response
@@ -683,6 +696,10 @@ class HTTPSConnectionPool(HTTPConnectionPool):
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
block, headers, retries, _proxy, _proxy_headers,
**conn_kw)
+
+ if ca_certs and cert_reqs is None:
+ cert_reqs = 'CERT_REQUIRED'
+
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
diff --git a/urllib3/contrib/appengine.py b/urllib3/contrib/appengine.py
new file mode 100644
index 0000000..ed9d8b8
--- /dev/null
+++ b/urllib3/contrib/appengine.py
@@ -0,0 +1,222 @@
+import logging
+import os
+import warnings
+
+from ..exceptions import (
+ HTTPError,
+ HTTPWarning,
+ MaxRetryError,
+ ProtocolError,
+ TimeoutError,
+ SSLError
+)
+
+from ..packages.six import BytesIO
+from ..request import RequestMethods
+from ..response import HTTPResponse
+from ..util.timeout import Timeout
+from ..util.retry import Retry
+
+try:
+ from google.appengine.api import urlfetch
+except ImportError:
+ urlfetch = None
+
+
+log = logging.getLogger(__name__)
+
+
+class AppEnginePlatformWarning(HTTPWarning):
+ pass
+
+
+class AppEnginePlatformError(HTTPError):
+ pass
+
+
+class AppEngineManager(RequestMethods):
+ """
+ Connection manager for Google App Engine sandbox applications.
+
+ This manager uses the URLFetch service directly instead of using the
+ emulated httplib, and is subject to URLFetch limitations as described in
+ the App Engine documentation here:
+
+ https://cloud.google.com/appengine/docs/python/urlfetch
+
+ Notably it will raise an AppEnginePlatformError if:
+ * URLFetch is not available.
+ * If you attempt to use this on GAEv2 (Managed VMs), as full socket
+ support is available.
+ * If a request size is more than 10 megabytes.
+ * If a response size is more than 32 megabtyes.
+ * If you use an unsupported request method such as OPTIONS.
+
+ Beyond those cases, it will raise normal urllib3 errors.
+ """
+
+ def __init__(self, headers=None, retries=None, validate_certificate=True):
+ if not urlfetch:
+ raise AppEnginePlatformError(
+ "URLFetch is not available in this environment.")
+
+ if is_prod_appengine_v2():
+ raise AppEnginePlatformError(
+ "Use normal urllib3.PoolManager instead of AppEngineManager"
+ "on Managed VMs, as using URLFetch is not necessary in "
+ "this environment.")
+
+ warnings.warn(
+ "urllib3 is using URLFetch on Google App Engine sandbox instead "
+ "of sockets. To use sockets directly instead of URLFetch see "
+ "https://urllib3.readthedocs.org/en/latest/contrib.html.",
+ AppEnginePlatformWarning)
+
+ RequestMethods.__init__(self, headers)
+ self.validate_certificate = validate_certificate
+
+ self.retries = retries or Retry.DEFAULT
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ # Return False to re-raise any potential exceptions
+ return False
+
+ def urlopen(self, method, url, body=None, headers=None,
+ retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT,
+ **response_kw):
+
+ retries = self._get_retries(retries, redirect)
+
+ try:
+ response = urlfetch.fetch(
+ url,
+ payload=body,
+ method=method,
+ headers=headers or {},
+ allow_truncated=False,
+ follow_redirects=(
+ redirect and
+ retries.redirect != 0 and
+ retries.total),
+ deadline=self._get_absolute_timeout(timeout),
+ validate_certificate=self.validate_certificate,
+ )
+ except urlfetch.DeadlineExceededError as e:
+ raise TimeoutError(self, e)
+
+ except urlfetch.InvalidURLError as e:
+ if 'too large' in e.message:
+ raise AppEnginePlatformError(
+ "URLFetch request too large, URLFetch only "
+ "supports requests up to 10mb in size.", e)
+ raise ProtocolError(e)
+
+ except urlfetch.DownloadError as e:
+ if 'Too many redirects' in e.message:
+ raise MaxRetryError(self, url, reason=e)
+ raise ProtocolError(e)
+
+ except urlfetch.ResponseTooLargeError as e:
+ raise AppEnginePlatformError(
+ "URLFetch response too large, URLFetch only supports"
+ "responses up to 32mb in size.", e)
+
+ except urlfetch.SSLCertificateError as e:
+ raise SSLError(e)
+
+ except urlfetch.InvalidMethodError as e:
+ raise AppEnginePlatformError(
+ "URLFetch does not support method: %s" % method, e)
+
+ http_response = self._urlfetch_response_to_http_response(
+ response, **response_kw)
+
+ # Check for redirect response
+ if (http_response.get_redirect_location() and
+ retries.raise_on_redirect and redirect):
+ raise MaxRetryError(self, url, "too many redirects")
+
+ # Check if we should retry the HTTP response.
+ if retries.is_forced_retry(method, status_code=http_response.status):
+ retries = retries.increment(
+ method, url, response=http_response, _pool=self)
+ log.info("Forced retry: %s" % url)
+ retries.sleep()
+ return self.urlopen(
+ method, url,
+ body=body, headers=headers,
+ retries=retries, redirect=redirect,
+ timeout=timeout, **response_kw)
+
+ return http_response
+
+ def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
+
+ if is_prod_appengine_v1():
+ # Production GAE handles deflate encoding automatically, but does
+ # not remove the encoding header.
+ content_encoding = urlfetch_resp.headers.get('content-encoding')
+
+ if content_encoding == 'deflate':
+ del urlfetch_resp.headers['content-encoding']
+
+ return HTTPResponse(
+ # In order for decoding to work, we must present the content as
+ # a file-like object.
+ body=BytesIO(urlfetch_resp.content),
+ headers=urlfetch_resp.headers,
+ status=urlfetch_resp.status_code,
+ **response_kw
+ )
+
+ def _get_absolute_timeout(self, timeout):
+ if timeout is Timeout.DEFAULT_TIMEOUT:
+ return 5 # 5s is the default timeout for URLFetch.
+ if isinstance(timeout, Timeout):
+ if not timeout.read is timeout.connect:
+ warnings.warn(
+ "URLFetch does not support granular timeout settings, "
+ "reverting to total timeout.", AppEnginePlatformWarning)
+ return timeout.total
+ return timeout
+
+ def _get_retries(self, retries, redirect):
+ if not isinstance(retries, Retry):
+ retries = Retry.from_int(
+ retries, redirect=redirect, default=self.retries)
+
+ if retries.connect or retries.read or retries.redirect:
+ warnings.warn(
+ "URLFetch only supports total retries and does not "
+ "recognize connect, read, or redirect retry parameters.",
+ AppEnginePlatformWarning)
+
+ return retries
+
+
+def is_appengine():
+ return (is_local_appengine() or
+ is_prod_appengine_v1() or
+ is_prod_appengine_v2())
+
+
+def is_appengine_sandbox():
+ return is_appengine() and not is_prod_appengine_v2()
+
+
+def is_local_appengine():
+ return ('APPENGINE_RUNTIME' in os.environ and
+ 'Development/' in os.environ['SERVER_SOFTWARE'])
+
+
+def is_prod_appengine_v1():
+ return ('APPENGINE_RUNTIME' in os.environ and
+ 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and
+ not is_prod_appengine_v2())
+
+
+def is_prod_appengine_v2():
+ return os.environ.get('GAE_VM', False) == 'true'
diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py
index b2c34a8..19c5b4e 100644
--- a/urllib3/contrib/pyopenssl.py
+++ b/urllib3/contrib/pyopenssl.py
@@ -85,6 +85,14 @@ _openssl_verify = {
DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS
+# OpenSSL will only write 16K at a time
+SSL_WRITE_BLOCKSIZE = 16384
+
+try:
+ _ = memoryview
+ has_memoryview = True
+except NameError:
+ has_memoryview = False
orig_util_HAS_SNI = util.HAS_SNI
orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket
@@ -204,13 +212,21 @@ class WrappedSocket(object):
continue
def sendall(self, data):
- while len(data):
- sent = self._send_until_done(data)
- data = data[sent:]
+ if has_memoryview and not isinstance(data, memoryview):
+ data = memoryview(data)
+
+ total_sent = 0
+ while total_sent < len(data):
+ sent = self._send_until_done(data[total_sent:total_sent+SSL_WRITE_BLOCKSIZE])
+ total_sent += sent
+
+ def shutdown(self):
+ # FIXME rethrow compatible exceptions should we ever use this
+ self.connection.shutdown()
def close(self):
if self._makefile_refs < 1:
- return self.connection.shutdown()
+ return self.connection.close()
else:
self._makefile_refs -= 1
@@ -287,7 +303,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
raise timeout('select timed out')
continue
except OpenSSL.SSL.Error as e:
- raise ssl.SSLError('bad handshake', e)
+ raise ssl.SSLError('bad handshake: %r' % e)
break
return WrappedSocket(cnx, sock)
diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py
index 31bda1c..36ce0d1 100644
--- a/urllib3/exceptions.py
+++ b/urllib3/exceptions.py
@@ -149,6 +149,11 @@ class SecurityWarning(HTTPWarning):
pass
+class SubjectAltNameWarning(SecurityWarning):
+ "Warned when connecting to a host with a certificate missing a SAN."
+ pass
+
+
class InsecureRequestWarning(SecurityWarning):
"Warned when making an unverified HTTPS request."
pass
@@ -167,3 +172,19 @@ class InsecurePlatformWarning(SecurityWarning):
class ResponseNotChunked(ProtocolError, ValueError):
"Response needs to be chunked in order to read it as chunks."
pass
+
+
+class ProxySchemeUnknown(AssertionError, ValueError):
+ "ProxyManager does not support the supplied scheme"
+ # TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
+
+ def __init__(self, scheme):
+ message = "Not supported proxy scheme %s" % scheme
+ super(ProxySchemeUnknown, self).__init__(message)
+
+
+class HeaderParsingError(HTTPError):
+ "Raised by assert_header_parsing, but we convert it to a log.warning statement."
+ def __init__(self, defects, unparsed_data):
+ message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data)
+ super(HeaderParsingError, self).__init__(message)
diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py
index b8d1e74..76b6a12 100644
--- a/urllib3/poolmanager.py
+++ b/urllib3/poolmanager.py
@@ -8,7 +8,7 @@ except ImportError:
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
-from .exceptions import LocationValueError, MaxRetryError
+from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
from .request import RequestMethods
from .util.url import parse_url
from .util.retry import Retry
@@ -227,8 +227,8 @@ class ProxyManager(PoolManager):
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
- assert proxy.scheme in ("http", "https"), \
- 'Not supported proxy scheme %s' % proxy.scheme
+ if proxy.scheme not in ("http", "https"):
+ raise ProxySchemeUnknown(proxy.scheme)
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
diff --git a/urllib3/request.py b/urllib3/request.py
index b08d6c9..a1a12bc 100644
--- a/urllib3/request.py
+++ b/urllib3/request.py
@@ -71,14 +71,22 @@ class RequestMethods(object):
headers=headers,
**urlopen_kw)
- def request_encode_url(self, method, url, fields=None, **urlopen_kw):
+ def request_encode_url(self, method, url, fields=None, headers=None,
+ **urlopen_kw):
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
"""
+ if headers is None:
+ headers = self.headers
+
+ extra_kw = {'headers': headers}
+ extra_kw.update(urlopen_kw)
+
if fields:
url += '?' + urlencode(fields)
- return self.urlopen(method, url, **urlopen_kw)
+
+ return self.urlopen(method, url, **extra_kw)
def request_encode_body(self, method, url, fields=None, headers=None,
encode_multipart=True, multipart_boundary=None,
diff --git a/urllib3/response.py b/urllib3/response.py
index 24140c4..15d4aac 100644
--- a/urllib3/response.py
+++ b/urllib3/response.py
@@ -2,6 +2,7 @@ try:
import http.client as httplib
except ImportError:
import httplib
+from contextlib import contextmanager
import zlib
import io
from socket import timeout as SocketTimeout
@@ -12,7 +13,7 @@ from .exceptions import (
)
from .packages.six import string_types as basestring, binary_type, PY3
from .connection import HTTPException, BaseSSLError
-from .util.response import is_fp_closed
+from .util.response import is_fp_closed, is_response_to_head
class DeflateDecoder(object):
@@ -202,6 +203,47 @@ class HTTPResponse(io.IOBase):
return data
+ @contextmanager
+ def _error_catcher(self):
+ """
+ Catch low-level python exceptions, instead re-raising urllib3
+ variants, so that low-level exceptions are not leaked in the
+ high-level api.
+
+ On exit, release the connection back to the pool.
+ """
+ try:
+ try:
+ yield
+
+ except SocketTimeout:
+ # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
+ # there is yet no clean way to get at it from this context.
+ raise ReadTimeoutError(self._pool, None, 'Read timed out.')
+
+ except BaseSSLError as e:
+ # FIXME: Is there a better way to differentiate between SSLErrors?
+ if 'read operation timed out' not in str(e): # Defensive:
+ # This shouldn't happen but just in case we're missing an edge
+ # case, let's avoid swallowing SSL errors.
+ raise
+
+ raise ReadTimeoutError(self._pool, None, 'Read timed out.')
+
+ except HTTPException as e:
+ # This includes IncompleteRead.
+ raise ProtocolError('Connection broken: %r' % e, e)
+ except Exception:
+ # The response may not be closed but we're not going to use it anymore
+ # so close it now to ensure that the connection is released back to the pool.
+ if self._original_response and not self._original_response.isclosed():
+ self._original_response.close()
+
+ raise
+ finally:
+ if self._original_response and self._original_response.isclosed():
+ self.release_conn()
+
def read(self, amt=None, decode_content=None, cache_content=False):
"""
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
@@ -231,45 +273,28 @@ class HTTPResponse(io.IOBase):
return
flush_decoder = False
-
- try:
- try:
- if amt is None:
- # cStringIO doesn't like amt=None
- data = self._fp.read()
+ data = None
+
+ with self._error_catcher():
+ if amt is None:
+ # cStringIO doesn't like amt=None
+ data = self._fp.read()
+ flush_decoder = True
+ else:
+ cache_content = False
+ data = self._fp.read(amt)
+ if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
+ # Close the connection when no data is returned
+ #
+ # This is redundant to what httplib/http.client _should_
+ # already do. However, versions of python released before
+ # December 15, 2012 (http://bugs.python.org/issue16298) do
+ # not properly close the connection in all cases. There is
+ # no harm in redundantly calling close.
+ self._fp.close()
flush_decoder = True
- else:
- cache_content = False
- data = self._fp.read(amt)
- if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
- # Close the connection when no data is returned
- #
- # This is redundant to what httplib/http.client _should_
- # already do. However, versions of python released before
- # December 15, 2012 (http://bugs.python.org/issue16298) do
- # not properly close the connection in all cases. There is
- # no harm in redundantly calling close.
- self._fp.close()
- flush_decoder = True
-
- except SocketTimeout:
- # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
- # there is yet no clean way to get at it from this context.
- raise ReadTimeoutError(self._pool, None, 'Read timed out.')
-
- except BaseSSLError as e:
- # FIXME: Is there a better way to differentiate between SSLErrors?
- if 'read operation timed out' not in str(e): # Defensive:
- # This shouldn't happen but just in case we're missing an edge
- # case, let's avoid swallowing SSL errors.
- raise
-
- raise ReadTimeoutError(self._pool, None, 'Read timed out.')
-
- except HTTPException as e:
- # This includes IncompleteRead.
- raise ProtocolError('Connection broken: %r' % e, e)
+ if data:
self._fp_bytes_read += len(data)
data = self._decode(data, decode_content, flush_decoder)
@@ -277,11 +302,8 @@ class HTTPResponse(io.IOBase):
if cache_content:
self._body = data
- return data
+ return data
- finally:
- if self._original_response and self._original_response.isclosed():
- self.release_conn()
def stream(self, amt=2**16, decode_content=None):
"""
@@ -319,6 +341,7 @@ class HTTPResponse(io.IOBase):
with ``original_response=r``.
"""
headers = r.msg
+
if not isinstance(headers, HTTPHeaderDict):
if PY3: # Python 3
headers = HTTPHeaderDict(headers.items())
@@ -437,30 +460,29 @@ class HTTPResponse(io.IOBase):
raise ResponseNotChunked("Response is not chunked. "
"Header 'transfer-encoding: chunked' is missing.")
- if self._original_response and self._original_response._method.upper() == 'HEAD':
- # Don't bother reading the body of a HEAD request.
- # FIXME: Can we do this somehow without accessing private httplib _method?
+ # Don't bother reading the body of a HEAD request.
+ if self._original_response and is_response_to_head(self._original_response):
self._original_response.close()
return
- while True:
- self._update_chunk_length()
- if self.chunk_left == 0:
- break
- chunk = self._handle_chunk(amt)
- yield self._decode(chunk, decode_content=decode_content,
- flush_decoder=True)
-
- # Chunk content ends with \r\n: discard it.
- while True:
- line = self._fp.fp.readline()
- if not line:
- # Some sites may not end with '\r\n'.
- break
- if line == b'\r\n':
- break
-
- # We read everything; close the "file".
- if self._original_response:
- self._original_response.close()
- self.release_conn()
+ with self._error_catcher():
+ while True:
+ self._update_chunk_length()
+ if self.chunk_left == 0:
+ break
+ chunk = self._handle_chunk(amt)
+ yield self._decode(chunk, decode_content=decode_content,
+ flush_decoder=True)
+
+ # Chunk content ends with \r\n: discard it.
+ while True:
+ line = self._fp.fp.readline()
+ if not line:
+ # Some sites may not end with '\r\n'.
+ break
+ if line == b'\r\n':
+ break
+
+ # We read everything; close the "file".
+ if self._original_response:
+ self._original_response.close()
diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py
index 859aec6..9ed5a64 100644
--- a/urllib3/util/connection.py
+++ b/urllib3/util/connection.py
@@ -60,6 +60,8 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
"""
host, port = address
+ if host.startswith('['):
+ host = host.strip('[]')
err = None
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
diff --git a/urllib3/util/response.py b/urllib3/util/response.py
index 45fff55..767ee15 100644
--- a/urllib3/util/response.py
+++ b/urllib3/util/response.py
@@ -1,3 +1,11 @@
+try:
+ import http.client as httplib
+except ImportError:
+ import httplib
+
+from ..exceptions import HeaderParsingError
+
+
def is_fp_closed(obj):
"""
Checks whether a given file-like object is closed.
@@ -20,3 +28,49 @@ def is_fp_closed(obj):
pass
raise ValueError("Unable to determine whether fp is closed.")
+
+
+def assert_header_parsing(headers):
+ """
+ Asserts whether all headers have been successfully parsed.
+ Extracts encountered errors from the result of parsing headers.
+
+ Only works on Python 3.
+
+ :param headers: Headers to verify.
+ :type headers: `httplib.HTTPMessage`.
+
+ :raises urllib3.exceptions.HeaderParsingError:
+ If parsing errors are found.
+ """
+
+ # This will fail silently if we pass in the wrong kind of parameter.
+ # To make debugging easier add an explicit check.
+ if not isinstance(headers, httplib.HTTPMessage):
+ raise TypeError('expected httplib.Message, got {}.'.format(
+ type(headers)))
+
+ defects = getattr(headers, 'defects', None)
+ get_payload = getattr(headers, 'get_payload', None)
+
+ unparsed_data = None
+ if get_payload: # Platform-specific: Python 3.
+ unparsed_data = get_payload()
+
+ if defects or unparsed_data:
+ raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
+
+
+def is_response_to_head(response):
+ """
+ Checks, wether a the request of a response has been a HEAD-request.
+ Handles the quirks of AppEngine.
+
+ :param conn:
+ :type conn: :class:`httplib.HTTPResponse`
+ """
+ # FIXME: Can we do this somehow without accessing private httplib _method?
+ method = response._method
+ if isinstance(method, int): # Platform-specific: Appengine
+ return method == 3
+ return method.upper() == 'HEAD'
diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py
index 7e0959d..1fb1f23 100644
--- a/urllib3/util/retry.py
+++ b/urllib3/util/retry.py
@@ -94,7 +94,7 @@ class Retry(object):
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
- than :attr:`Retry.MAX_BACKOFF`.
+ than :attr:`Retry.BACKOFF_MAX`.
By default, backoff is disabled (set to 0).
diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py
index b846d42..311378b 100644
--- a/urllib3/util/ssl_.py
+++ b/urllib3/util/ssl_.py
@@ -8,6 +8,13 @@ SSLContext = None
HAS_SNI = False
create_default_context = None
+# Maps the length of a digest to a possible hash function producing this digest
+HASHFUNC_MAP = {
+ 32: md5,
+ 40: sha1,
+ 64: sha256,
+}
+
import errno
import warnings
@@ -112,31 +119,21 @@ def assert_fingerprint(cert, fingerprint):
Fingerprint as string of hexdigits, can be interspersed by colons.
"""
- # Maps the length of a digest to a possible hash function producing
- # this digest.
- hashfunc_map = {
- 16: md5,
- 20: sha1,
- 32: sha256,
- }
-
fingerprint = fingerprint.replace(':', '').lower()
- digest_length, odd = divmod(len(fingerprint), 2)
-
- if odd or digest_length not in hashfunc_map:
- raise SSLError('Fingerprint is of invalid length.')
+ digest_length = len(fingerprint)
+ hashfunc = HASHFUNC_MAP.get(digest_length)
+ if not hashfunc:
+ raise SSLError(
+ 'Fingerprint of invalid length: {0}'.format(fingerprint))
# We need encode() here for py32; works on py2 and p33.
fingerprint_bytes = unhexlify(fingerprint.encode())
- hashfunc = hashfunc_map[digest_length]
-
cert_digest = hashfunc(cert).digest()
- if not cert_digest == fingerprint_bytes:
+ if cert_digest != fingerprint_bytes:
raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
- .format(hexlify(fingerprint_bytes),
- hexlify(cert_digest)))
+ .format(fingerprint, hexlify(cert_digest)))
def resolve_cert_reqs(candidate):