diff options
50 files changed, 1485 insertions, 177 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 8d922a4..7f5620f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,54 @@ Changes ======= +1.11 (2015-07-21) ++++++++++++++++++ + +* When ``ca_certs`` is given, ``cert_reqs`` defaults to + ``'CERT_REQUIRED'``. (Issue #650) + +* ``pip install urllib3[secure]`` will install Certifi and + PyOpenSSL as dependencies. (Issue #678) + +* Made ``HTTPHeaderDict`` usable as a ``headers`` input value + (Issues #632, #679) + +* Added `urllib3.contrib.appengine <https://urllib3.readthedocs.org/en/latest/contrib.html#google-app-engine>`_ + which has an ``AppEngineManager`` for using ``URLFetch`` in a + Google AppEngine environment. (Issue #664) + +* Dev: Added test suite for AppEngine. (Issue #631) + +* Fix performance regression when using PyOpenSSL. (Issue #626) + +* Passing incorrect scheme (e.g. ``foo://``) will raise + ``ValueError`` instead of ``AssertionError`` (backwards + compatible for now, but please migrate). (Issue #640) + +* Fix pools not getting replenished when an error occurs during a + request using ``release_conn=False``. (Issue #644) + +* Fix pool-default headers not applying for url-encoded requests + like GET. (Issue #657) + +* log.warning in Python 3 when headers are skipped due to parsing + errors. (Issue #642) + +* Close and discard connections if an error occurs during read. + (Issue #660) + +* Fix host parsing for IPv6 proxies. (Issue #668) + +* Separate warning type SubjectAltNameWarning, now issued once + per host. (Issue #671) + +* Fix ``httplib.IncompleteRead`` not getting converted to + ``ProtocolError`` when using ``HTTPResponse.stream()`` + (Issue #674) + +* ... [Short description of non-trivial change.] (Issue #) + + 1.10.4 (2015-05-03) +++++++++++++++++++ diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 5807307..b8a0f01 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -1,4 +1,4 @@ -# Contributions to the urllib3 project +# Contributions to the urllib3 project ## Creator & Maintainer @@ -148,6 +148,27 @@ In chronological order: * tlynn <https://github.com/tlynn> * Respect the warning preferences at import. +* David D. Riddle <ddriddle@illinois.edu> + * IPv6 bugfixes in testsuite + +* Jon Wayne Parrott <jonwayne@google.com> + * App Engine environment tests. + +* John Krauss <https://github.com/talos> + * Clues to debugging problems with `cryptography` dependency in docs + +* Disassem <https://github.com/Disassem> + * Fix pool-default headers not applying for url-encoded requests like GET. + +* James Atherfold <jlatherfold@hotmail.com> + * Bugfixes relating to cleanup of connections during errors. + +* Christian Pedersen <https://github.com/chripede> + * IPv6 HTTPS proxy bugfix + +* Jordan Moldow <https://github.com/jmoldow> + * Fix low-level exceptions leaking from ``HTTPResponse.stream()``. + * [Your name or handle] <[email or website]> * [Brief summary of your changes] @@ -39,6 +39,9 @@ test: requirements test-all: requirements tox +test-gae: requirements + tox -e gae + docs: cd docs && pip install -r doc-requirements.txt && make html @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.10.4 +Version: 1.11 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -156,6 +156,54 @@ Description: ======= Changes ======= + 1.11 (2015-07-21) + +++++++++++++++++ + + * When ``ca_certs`` is given, ``cert_reqs`` defaults to + ``'CERT_REQUIRED'``. (Issue #650) + + * ``pip install urllib3[secure]`` will install Certifi and + PyOpenSSL as dependencies. (Issue #678) + + * Made ``HTTPHeaderDict`` usable as a ``headers`` input value + (Issues #632, #679) + + * Added `urllib3.contrib.appengine <https://urllib3.readthedocs.org/en/latest/contrib.html#google-app-engine>`_ + which has an ``AppEngineManager`` for using ``URLFetch`` in a + Google AppEngine environment. (Issue #664) + + * Dev: Added test suite for AppEngine. (Issue #631) + + * Fix performance regression when using PyOpenSSL. (Issue #626) + + * Passing incorrect scheme (e.g. ``foo://``) will raise + ``ValueError`` instead of ``AssertionError`` (backwards + compatible for now, but please migrate). (Issue #640) + + * Fix pools not getting replenished when an error occurs during a + request using ``release_conn=False``. (Issue #644) + + * Fix pool-default headers not applying for url-encoded requests + like GET. (Issue #657) + + * log.warning in Python 3 when headers are skipped due to parsing + errors. (Issue #642) + + * Close and discard connections if an error occurs during read. + (Issue #660) + + * Fix host parsing for IPv6 proxies. (Issue #668) + + * Separate warning type SubjectAltNameWarning, now issued once + per host. (Issue #671) + + * Fix ``httplib.IncompleteRead`` not getting converted to + ``ProtocolError`` when using ``HTTPResponse.stream()`` + (Issue #674) + + * ... [Short description of non-trivial change.] (Issue #) + + 1.10.4 (2015-05-03) +++++++++++++++++++ diff --git a/dev-requirements.txt b/dev-requirements.txt index 9ea3691..a5e405d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,5 @@ nose==1.3.3 +nose-exclude==0.2.0 mock==1.0.1 coverage==3.7.1 tox==1.7.1 diff --git a/docs/contrib.rst b/docs/contrib.rst index 99c5492..5a88f8e 100644 --- a/docs/contrib.rst +++ b/docs/contrib.rst @@ -6,9 +6,47 @@ Contrib Modules These modules implement various extra features, that may not be ready for prime time. -.. _pyopenssl: +.. _contrib-pyopenssl: SNI-support for Python 2 ------------------------ .. automodule:: urllib3.contrib.pyopenssl + + +.. _gae: + +Google App Engine +----------------- + +The :mod:`urllib3.contrib.appengine` module provides a pool manager that +uses Google App Engine's `URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_. + +Example usage:: + + from urllib3 import PoolManager + from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox + + # This substitution will be done automagically once appengine code + # graduates from the contrib module. + if is_appengine_sandbox(): + # AppEngineManager uses AppEngine's URLFetch API behind the scenes + http = AppEngineManager() + else: + # PoolManager uses a socket-level API behind the scenes + http = PoolManager() + + # The client API should be consistent across managers, though some features are not available + # in URLFetch and you'll get warnings when you try to use them (like granular timeouts). + r = http.request('GET', 'https://google.com/') + + +There are `limitations <https://cloud.google.com/appengine/docs/python/urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be the best choice for your application. App Engine provides three options for urllib3 users: + +1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is cost-effective in many circumstances as long as your usage is within the limitations. +2. You can use a normal :class:`PoolManager` by enabling sockets. Sockets also have `limitations and restrictions <https://cloud.google.com/appengine/docs/python/sockets/#limitations-and-restrictions>`_ and have a lower free quota than URLFetch. To use sockets, be sure to specify the following in your ``app.yaml``:: + + env_variables: + GAE_USE_SOCKETS_HTTPLIB : 'true' + +3. If you are using `Managed VMs <https://cloud.google.com/appengine/docs/managed-vms/>`_, you can use the standard :class:`PoolManager` without any configuration or special environment variables. diff --git a/docs/exceptions.rst b/docs/exceptions.rst index f9e0553..cd451be 100644 --- a/docs/exceptions.rst +++ b/docs/exceptions.rst @@ -1,3 +1,5 @@ +.. _exceptions: + Exceptions ========== diff --git a/docs/helpers.rst b/docs/helpers.rst index 79f268b..6835e9a 100644 --- a/docs/helpers.rst +++ b/docs/helpers.rst @@ -1,3 +1,5 @@ +.. _helpers: + Helpers ======= diff --git a/docs/index.rst b/docs/index.rst index 81ac2d8..78d3601 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,9 +9,9 @@ urllib3 Documentation managers security helpers + exceptions collections contrib - security Highlights @@ -98,6 +98,27 @@ like buffering: >>> secondpart = b.read() +Upgrading & Versioning +---------------------- + +urllib3 uses a compatibility-based versioning scheme (let's call it +*compatver*). For the user, they indicate the required decision for upgrading. + +Given a version ``A.B.C``: + +``C.`` Strictly backwards-compatible, usually a bug-fix. **Always upgrade.** + +``B.`` Possibly partially incompatible, usually a new feature or a minor API +improvement. **Read the changelog and upgrade when ready.** + +``A.`` Major rewrite and possibly breaks everything. Not really an upgrade, +basically a new library under the same namespace, decide if you want to switch. + +For example, when going from urllib3 v1.2.3 to v1.2.4, you should always +upgrade without hesitation. When going from v1.2 to v1.3, you should read the +changes to make sure they're not going to affect you. + + Components ========== @@ -298,10 +319,8 @@ To aid the limited functionality of the :mod:`httplib` module, :mod:`urllib3` provides various helper methods which are used with the higher level components but can also be used independently. -.. toctree:: - - helpers - exceptions +* :ref:`helpers` +* :ref:`exceptions` Contrib Modules @@ -310,9 +329,7 @@ Contrib Modules These modules implement various extra features, that may not be ready for prime time. -.. toctree:: - - contrib +* :ref:`contrib-modules` Contributing @@ -352,7 +369,8 @@ benefits from this library. <a href="https://donorbox.org/personal-sponsor-urllib3" style="background-color:#1275ff;color:#fff;text-decoration:none;font-family:Verdana,sans-serif;display:inline-block;font-size:14px;padding:7px 16px;border-radius:5px;margin-right:2em;vertical-align:top;border:1px solid rgba(160,160,160,0.5);background-image:linear-gradient(#7dc5ee,#008cdd 85%,#30a2e4);box-shadow:inset 0 1px 0 rgba(255,255,255,0.25);">Sponsor with Credit Card</a> - <a class="coinbase-button" data-code="137087702cf2e77ce400d53867b164e6" href="https://coinbase.com/checkouts/137087702cf2e77ce400d53867b164e6">Sponsor with Bitcoin</a><script src="https://coinbase.com/assets/button.js" type="text/javascript"></script> + <a class="coinbase-button" data-code="137087702cf2e77ce400d53867b164e6" href="https://coinbase.com/checkouts/137087702cf2e77ce400d53867b164e6">Sponsor with Bitcoin</a> + <script src="https://www.coinbase.com/assets/button.js" type="text/javascript"></script> * **Recurring**: You're welcome to `support the maintainer on Gittip <https://www.gittip.com/shazow/>`_. diff --git a/docs/security.rst b/docs/security.rst index 881730e..0f5aa1c 100644 --- a/docs/security.rst +++ b/docs/security.rst @@ -128,6 +128,10 @@ packages:: $ pip install pyopenssl ndg-httpsclient pyasn1 +If ``cryptography`` fails to install as a dependency, make sure you have `libffi +<http://sourceware.org/libffi/>`_ available on your system and run +``pip install cryptography``. + Once the packages are installed, you can tell urllib3 to switch the ssl backend to PyOpenSSL with :func:`~urllib3.contrib.pyopenssl.inject_into_urllib3`:: @@ -138,6 +142,32 @@ Now you can continue using urllib3 as you normally would. For more details, check the :mod:`~urllib3.contrib.pyopenssl` module. +Installing urllib3 with SNI support and certificates +---------------------------------------------------- + +By default, if you need to use SNI on Python 2.6 or Python 2.7.0-2.7.8, you +have to install PyOpenSSL, ndghttpsclient, and pyasn1 separately. Further, to +use certifi you have to install it separately. If you know that you want these +dependencies when you install urllib3, you can now do:: + + pip install urllib3[secure] + +This will install the SNI dependencies on Python 2.6 and 2.7 (we cannot yet +restrict the microversion for 2.7) and certifi on all versions of Python. + +.. note:: + + If you do this on linux, e.g., Ubuntu 14.04, you will need extra system + dependencies for PyOpenSSL. Specifically, PyOpenSSL requires cryptography + which will require you to install: + + - build-essential + - python-dev + - libffi-dev + - libssl-dev + + The package names may vary depending on the distribution of linux you are + using. .. _insecurerequestwarning: diff --git a/dummyserver/handlers.py b/dummyserver/handlers.py index 53fbe4a..ffa1dd3 100644 --- a/dummyserver/handlers.py +++ b/dummyserver/handlers.py @@ -73,6 +73,10 @@ class TestingApp(RequestHandler): """ Handle OPTIONS requests """ self._call_method() + def head(self): + """ Handle HEAD requests """ + self._call_method() + def _call_method(self): """ Call the correct method in this class based on the incoming URI """ req = self.request @@ -232,6 +236,13 @@ class TestingApp(RequestHandler): return Response(chunks, headers=[('Content-Encoding', 'gzip')]) + def nbytes(self, request): + length = int(request.params.get('length')) + data = b'1' * length + return Response( + data, + headers=[('Content-Type', 'application/octet-stream')]) + def shutdown(self, request): sys.exit() diff --git a/dummyserver/server.py b/dummyserver/server.py index 63124d3..1999474 100755 --- a/dummyserver/server.py +++ b/dummyserver/server.py @@ -38,6 +38,35 @@ DEFAULT_CA = os.path.join(CERTS_PATH, 'cacert.pem') DEFAULT_CA_BAD = os.path.join(CERTS_PATH, 'client_bad.pem') NO_SAN_CA = os.path.join(CERTS_PATH, 'cacert.no_san.pem') +def _has_ipv6(host): + """ Returns True if the system can bind an IPv6 address. """ + sock = None + has_ipv6 = False + + if socket.has_ipv6: + # has_ipv6 returns true if cPython was compiled with IPv6 support. + # It does not tell us if the system has IPv6 support enabled. To + # determine that we must bind to an IPv6 address. + # https://github.com/shazow/urllib3/pull/611 + # https://bugs.python.org/issue658327 + try: + sock = socket.socket(socket.AF_INET6) + sock.bind((host, 0)) + has_ipv6 = True + except: + pass + + if sock: + sock.close() + return has_ipv6 + +# Some systems may have IPv6 support but DNS may not be configured +# properly. We can not count that localhost will resolve to ::1 on all +# systems. See https://github.com/shazow/urllib3/pull/611 and +# https://bugs.python.org/issue18792 +HAS_IPV6_AND_DNS = _has_ipv6('localhost') +HAS_IPV6 = _has_ipv6('::1') + # Different types of servers we have: @@ -64,7 +93,7 @@ class SocketServerThread(threading.Thread): self.ready_event = ready_event def _start_server(self): - if socket.has_ipv6: + if HAS_IPV6_AND_DNS: sock = socket.socket(socket.AF_INET6) else: warnings.warn("No IPv6 support. Falling back to IPv4.", @@ -117,7 +146,7 @@ def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, sockets = [] if address == "": address = None - if not socket.has_ipv6 and family == socket.AF_UNSPEC: + if not HAS_IPV6 and family == socket.AF_UNSPEC: # Python can be compiled with --disable-ipv6, which causes # operations on AF_INET6 sockets to fail, but does not # automatically exclude those results from getaddrinfo diff --git a/dummyserver/testcase.py b/dummyserver/testcase.py index 67e62cf..de6aedd 100644 --- a/dummyserver/testcase.py +++ b/dummyserver/testcase.py @@ -14,7 +14,6 @@ from dummyserver.handlers import TestingApp from dummyserver.proxy import ProxyHandler - class SocketDummyServerTestCase(unittest.TestCase): """ A simple socket-based server is created for this class that is good for @@ -131,3 +130,16 @@ class IPv6HTTPDummyServerTestCase(HTTPDummyServerTestCase): raise SkipTest('IPv6 not available') else: super(IPv6HTTPDummyServerTestCase, cls).setUpClass() + + +class IPv6HTTPDummyProxyTestCase(HTTPDummyProxyTestCase): + + http_host = 'localhost' + http_host_alt = '127.0.0.1' + + https_host = 'localhost' + https_host_alt = '127.0.0.1' + https_certs = DEFAULT_CERTS + + proxy_host = '::1' + proxy_host_alt = '127.0.0.1' @@ -8,6 +8,9 @@ cover-erase = true [flake8] max-line-length = 99 +[wheel] +universal = 1 + [egg_info] tag_build = tag_date = 0 @@ -54,4 +54,15 @@ setup(name='urllib3', 'tornado', ], test_suite='test', + extras_require={ + 'secure;python_version<="2.7"': [ + 'pyOpenSSL', + 'ndg-httpsclient', + 'pyasn1', + 'certifi', + ], + 'secure;python_version>"2.7"': [ + 'certifi', + ], + }, ) diff --git a/test/__init__.py b/test/__init__.py index 2fce71c..172493c 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -2,6 +2,7 @@ import warnings import sys import errno import functools +import logging import socket from nose.plugins.skip import SkipTest @@ -91,3 +92,37 @@ def requires_network(test): raise SkipTest(msg) raise return wrapper + + +class _ListHandler(logging.Handler): + def __init__(self): + super(_ListHandler, self).__init__() + self.records = [] + + def emit(self, record): + self.records.append(record) + + +class LogRecorder(object): + def __init__(self, target=logging.root): + super(LogRecorder, self).__init__() + self._target = target + self._handler = _ListHandler() + + @property + def records(self): + return self._handler.records + + def install(self): + self._target.addHandler(self._handler) + + def uninstall(self): + self._target.removeHandler(self._handler) + + def __enter__(self): + self.install() + return self.records + + def __exit__(self, exc_type, exc_value, traceback): + self.uninstall() + return False diff --git a/test/appengine/__init__.py b/test/appengine/__init__.py new file mode 100644 index 0000000..917544d --- /dev/null +++ b/test/appengine/__init__.py @@ -0,0 +1,71 @@ +import os +import sys +import unittest +from nose.plugins.skip import SkipTest + + +def activate_sandbox(): + """ + Enables parts of the GAE sandbox that are relevant. + + Inserts the stub module import hook which causes the usage of appengine-specific + httplib, httplib2, socket, etc. + """ + from google.appengine.tools.devappserver2.python import sandbox + + for name in list(sys.modules): + if name in sandbox.dist27.MODULE_OVERRIDES: + del sys.modules[name] + sys.meta_path.insert(0, sandbox.StubModuleImportHook()) + sys.path_importer_cache = {} + + +def deactivate_sandbox(): + from google.appengine.tools.devappserver2.python import sandbox + + sys.meta_path = [ + x for x in sys.meta_path if not isinstance(x, sandbox.StubModuleImportHook)] + sys.path_importer_cache = {} + + # Delete any instances of sandboxed modules. + for name in list(sys.modules): + if name in sandbox.dist27.MODULE_OVERRIDES: + del sys.modules[name] + + +class AppEngineSandboxTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + + if sys.version_info[:2] != (2, 7): + raise SkipTest("App Engine only tests on py2.7") + + if 'APPLICATION_ID' not in os.environ: + raise SkipTest("NoseGAE plugin not used.") + + try: + activate_sandbox() + except ImportError: + raise SkipTest("App Engine SDK not available.") + + @classmethod + def tearDownClass(self): + try: + deactivate_sandbox() + except ImportError: + pass + + +class MockResponse(object): + def __init__(self, content, status_code, content_was_truncated, final_url, headers): + import httplib + from StringIO import StringIO + + self.content = content + self.status_code = status_code + self.content_was_truncated = content_was_truncated + self.final_url = final_url + self.header_msg = httplib.HTTPMessage(StringIO(''.join( + ["%s: %s\n" % (k, v) for k, v in headers.iteritems()] + ["\n"]))) + self.headers = self.header_msg.items() diff --git a/test/appengine/app.yaml b/test/appengine/app.yaml new file mode 100644 index 0000000..907c57f --- /dev/null +++ b/test/appengine/app.yaml @@ -0,0 +1,11 @@ +# dummy app.yaml for nosegae + +api_version: 1 +runtime: python27 +threadsafe: true + +handlers: +- url: / + static_files: README.md + upload: README.md + mime_type: text/plain diff --git a/test/appengine/nose.cfg b/test/appengine/nose.cfg new file mode 100644 index 0000000..8d8b3f1 --- /dev/null +++ b/test/appengine/nose.cfg @@ -0,0 +1,4 @@ +[nosetests] +cover-min-percentage=0 +with-gae=1 +gae-application=test/appengine/app.yaml diff --git a/test/appengine/requirements.txt b/test/appengine/requirements.txt new file mode 100644 index 0000000..b6d79e0 --- /dev/null +++ b/test/appengine/requirements.txt @@ -0,0 +1 @@ +NoseGAE==0.5.7 diff --git a/test/appengine/test_urlfetch.py b/test/appengine/test_urlfetch.py new file mode 100644 index 0000000..3f72023 --- /dev/null +++ b/test/appengine/test_urlfetch.py @@ -0,0 +1,49 @@ +from . import AppEngineSandboxTest, MockResponse + +from mock import patch +from nose.plugins.skip import SkipTest +from ..test_no_ssl import TestWithoutSSL + + +class TestHTTP(AppEngineSandboxTest, TestWithoutSSL): + nosegae_urlfetch = True + + def test_urlfetch_called_with_http(self): + """ + Check that URLFetch is used to fetch non-https resources + """ + resp = MockResponse( + 'OK', + 200, + False, + 'http://www.google.com', + {'content-type': 'text/plain'}) + with patch('google.appengine.api.urlfetch.fetch', return_value=resp) as fetchmock: + import urllib3 + pool = urllib3.HTTPConnectionPool('www.google.com', '80') + r = pool.request('GET', '/') + self.assertEqual(r.status, 200, r.data) + self.assertEqual(fetchmock.call_count, 1) + + +class TestHTTPS(AppEngineSandboxTest): + nosegae_urlfetch = True + + def test_urlfetch_called_with_https(self): + """ + Check that URLFetch is used when fetching https resources + """ + raise SkipTest() # Skipped for now because it fails. + resp = MockResponse( + 'OK', + 200, + False, + 'https://www.google.com', + {'content-type': 'text/plain'}) + with patch('google.appengine.api.urlfetch.fetch', return_value=resp) as fetchmock: + import urllib3 + pool = urllib3.HTTPSConnectionPool('www.google.com', '443') + pool.ConnectionCls = urllib3.connection.UnverifiedHTTPSConnection + r = pool.request('GET', '/') + self.assertEqual(r.status, 200, r.data) + self.assertEqual(fetchmock.call_count, 1) diff --git a/test/contrib/test_gae_manager.py b/test/contrib/test_gae_manager.py new file mode 100644 index 0000000..aa909e9 --- /dev/null +++ b/test/contrib/test_gae_manager.py @@ -0,0 +1,185 @@ +import unittest + +from dummyserver.testcase import HTTPSDummyServerTestCase +from nose.plugins.skip import SkipTest + +try: + from google.appengine.api import urlfetch + (urlfetch) +except ImportError: + raise SkipTest("App Engine SDK not available.") + +from urllib3.contrib.appengine import AppEngineManager, AppEnginePlatformError +from urllib3.exceptions import ( + TimeoutError, + ProtocolError, + SSLError) +from urllib3.util.url import Url +from urllib3.util.retry import Retry + +from test.with_dummyserver.test_connectionpool import ( + TestConnectionPool, TestRetry) + + +# Prevent nose from running these test. +TestConnectionPool.__test__ = False +TestRetry.__test__ = False + + +# This class is used so we can re-use the tests from the connection pool. +# It proxies all requests to the manager. +class MockPool(object): + def __init__(self, host, port, manager, scheme='http'): + self.host = host + self.port = port + self.manager = manager + self.scheme = scheme + + def request(self, method, url, *args, **kwargs): + url = self._absolute_url(url) + return self.manager.request(method, url, *args, **kwargs) + + def urlopen(self, method, url, *args, **kwargs): + url = self._absolute_url(url) + return self.manager.urlopen(method, url, *args, **kwargs) + + def _absolute_url(self, path): + return Url( + scheme=self.scheme, + host=self.host, + port=self.port, + path=path).url + + +# Note that this doesn't run in the sandbox, it only runs with the URLFetch +# API stub enabled. There's no need to enable the sandbox as we know for a fact +# that URLFetch is used by the connection manager. +class TestGAEConnectionManager(TestConnectionPool): + __test__ = True + + # Magic class variable that tells NoseGAE to enable the URLFetch stub. + nosegae_urlfetch = True + + def setUp(self): + self.manager = AppEngineManager() + self.pool = MockPool(self.host, self.port, self.manager) + + # Tests specific to AppEngineManager + + def test_exceptions(self): + # DeadlineExceededError -> TimeoutError + self.assertRaises( + TimeoutError, + self.pool.request, + 'GET', + '/sleep?seconds=0.005', + timeout=0.001) + + # InvalidURLError -> ProtocolError + self.assertRaises( + ProtocolError, + self.manager.request, + 'GET', + 'ftp://invalid/url') + + # DownloadError -> ProtocolError + self.assertRaises( + ProtocolError, + self.manager.request, + 'GET', + 'http://0.0.0.0') + + # ResponseTooLargeError -> AppEnginePlatformError + self.assertRaises( + AppEnginePlatformError, + self.pool.request, + 'GET', + '/nbytes?length=33554433') # One byte over 32 megabtyes. + + # URLFetch reports the request too large error as a InvalidURLError, + # which maps to a AppEnginePlatformError. + body = b'1' * 10485761 # One byte over 10 megabytes. + self.assertRaises( + AppEnginePlatformError, + self.manager.request, + 'POST', + '/', + body=body) + + # Re-used tests below this line. + # Subsumed tests + test_timeout_float = None # Covered by test_exceptions. + + # Non-applicable tests + test_conn_closed = None + test_nagle = None + test_socket_options = None + test_disable_default_socket_options = None + test_defaults_are_applied = None + test_tunnel = None + test_keepalive = None + test_keepalive_close = None + test_connection_count = None + test_connection_count_bigpool = None + test_for_double_release = None + test_release_conn_parameter = None + test_stream_keepalive = None + test_cleanup_on_connection_error = None + + # Tests that should likely be modified for appengine specific stuff + test_timeout = None + test_connect_timeout = None + test_connection_error_retries = None + test_total_timeout = None + test_none_total_applies_connect = None + test_timeout_success = None + test_source_address_error = None + test_bad_connect = None + test_partial_response = None + test_dns_error = None + + +class TestGAEConnectionManagerWithSSL(HTTPSDummyServerTestCase): + nosegae_urlfetch = True + + def setUp(self): + self.manager = AppEngineManager() + self.pool = MockPool(self.host, self.port, self.manager, 'https') + + def test_exceptions(self): + # SSLCertificateError -> SSLError + # SSLError is raised with dummyserver because URLFetch doesn't allow + # self-signed certs. + self.assertRaises( + SSLError, + self.pool.request, + 'GET', + '/') + + +class TestGAERetry(TestRetry): + __test__ = True + + # Magic class variable that tells NoseGAE to enable the URLFetch stub. + nosegae_urlfetch = True + + def setUp(self): + self.manager = AppEngineManager() + self.pool = MockPool(self.host, self.port, self.manager) + + def test_default_method_whitelist_retried(self): + """ urllib3 should retry methods in the default method whitelist """ + retry = Retry(total=1, status_forcelist=[418]) + # Use HEAD instead of OPTIONS, as URLFetch doesn't support OPTIONS + resp = self.pool.request( + 'HEAD', '/successful_retry', + headers={'test-name': 'test_default_whitelist'}, + retries=retry) + self.assertEqual(resp.status, 200) + + #test_max_retry = None + #test_disabled_retry = None + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_collections.py b/test/test_collections.py index 0b36512..9d72939 100644 --- a/test/test_collections.py +++ b/test/test_collections.py @@ -237,7 +237,7 @@ class TestHTTPHeaderDict(unittest.TestCase): def test_copy(self): h = self.d.copy() self.assertTrue(self.d is not h) - self.assertEqual(self.d, h) + self.assertEqual(self.d, h) def test_getlist(self): self.assertEqual(self.d.getlist('cookie'), ['foo', 'bar']) @@ -302,6 +302,7 @@ class TestHTTPHeaderDict(unittest.TestCase): hdict = {'Content-Length': '0', 'Content-type': 'text/plain', 'Server': 'TornadoServer/1.2.3'} h = dict(HTTPHeaderDict(hdict).items()) self.assertEqual(hdict, h) + self.assertEqual(hdict, dict(HTTPHeaderDict(hdict))) def test_string_enforcement(self): # This currently throws AttributeError on key.lower(), should probably be something nicer diff --git a/test/test_connectionpool.py b/test/test_connectionpool.py index 0718b0f..ee37913 100644 --- a/test/test_connectionpool.py +++ b/test/test_connectionpool.py @@ -217,15 +217,23 @@ class TestConnectionPool(unittest.TestCase): old_pool_queue = pool.pool self.assertEqual(pool.pool, None) - self.assertRaises(ClosedPoolError, pool._get_conn) pool._put_conn(conn3) - self.assertRaises(ClosedPoolError, pool._get_conn) - self.assertRaises(Empty, old_pool_queue.get, block=False) + def test_absolute_url(self): + c = connection_from_url('http://google.com:80') + self.assertEqual( + 'http://google.com:80/path?query=foo', + c._absolute_url('path?query=foo')) + + def test_ca_certs_default_cert_required(self): + with connection_from_url('https://google.com:80', ca_certs='/etc/ssl/certs/custom.pem') as pool: + conn = pool._get_conn() + self.assertEqual(conn.cert_reqs, 'CERT_REQUIRED') + if __name__ == '__main__': unittest.main() diff --git a/test/test_exceptions.py b/test/test_exceptions.py index 4190a61..b5bb93e 100644 --- a/test/test_exceptions.py +++ b/test/test_exceptions.py @@ -4,7 +4,7 @@ import pickle from urllib3.exceptions import (HTTPError, MaxRetryError, LocationParseError, ClosedPoolError, EmptyPoolError, HostChangedError, ReadTimeoutError, - ConnectTimeoutError) + ConnectTimeoutError, HeaderParsingError) from urllib3.connectionpool import HTTPConnectionPool @@ -44,3 +44,11 @@ class TestPickle(unittest.TestCase): assert self.verify_pickling( ReadTimeoutError(HTTPConnectionPool('localhost'), '/', None)) + + +class TestFormat(unittest.TestCase): + def test_header_parsing_errors(self): + hpe = HeaderParsingError('defects', 'unparsed_data') + + self.assertTrue('defects' in str(hpe)) + self.assertTrue('unparsed_data' in str(hpe)) diff --git a/test/test_no_ssl.py b/test/test_no_ssl.py index b5961b8..79058f7 100644 --- a/test/test_no_ssl.py +++ b/test/test_no_ssl.py @@ -71,7 +71,7 @@ class TestWithoutSSL(unittest.TestCase): sys.meta_path.insert(0, ssl_blocker) def tearDown(self): - assert sys.meta_path.pop(0) == ssl_blocker + sys.meta_path.remove(ssl_blocker) module_stash.pop() diff --git a/test/test_proxymanager.py b/test/test_proxymanager.py index e7b5c48..7b19334 100644 --- a/test/test_proxymanager.py +++ b/test/test_proxymanager.py @@ -38,6 +38,10 @@ class TestProxyManager(unittest.TestCase): p = ProxyManager('https://something') self.assertEqual(p.proxy.port, 443) + def test_invalid_scheme(self): + self.assertRaises(AssertionError, ProxyManager, 'invalid://host/p') + self.assertRaises(ValueError, ProxyManager, 'invalid://host/p') + if __name__ == '__main__': unittest.main() diff --git a/test/test_response.py b/test/test_response.py index 2e2be0e..47d0521 100644 --- a/test/test_response.py +++ b/test/test_response.py @@ -7,7 +7,7 @@ try: except ImportError: import httplib from urllib3.response import HTTPResponse -from urllib3.exceptions import DecodeError, ResponseNotChunked +from urllib3.exceptions import DecodeError, ResponseNotChunked, ProtocolError from base64 import b64decode @@ -487,7 +487,7 @@ class TestResponse(unittest.TestCase): r.chunked = True r.chunk_left = None resp = HTTPResponse(r, preload_content=False, headers={'transfer-encoding': 'chunked'}) - self.assertRaises(httplib.IncompleteRead, next, resp.read_chunked()) + self.assertRaises(ProtocolError, next, resp.read_chunked()) def test_chunked_response_without_crlf_on_end(self): stream = [b"foo", b"bar", b"baz"] diff --git a/test/with_dummyserver/test_connectionpool.py b/test/with_dummyserver/test_connectionpool.py index d6cb162..741ae7b 100644 --- a/test/with_dummyserver/test_connectionpool.py +++ b/test/with_dummyserver/test_connectionpool.py @@ -36,7 +36,7 @@ from urllib3.util.timeout import Timeout import tornado from dummyserver.testcase import HTTPDummyServerTestCase -from dummyserver.server import NoIPv6Warning +from dummyserver.server import NoIPv6Warning, HAS_IPV6_AND_DNS from nose.tools import timed @@ -600,7 +600,7 @@ class TestConnectionPool(HTTPDummyServerTestCase): def test_source_address(self): for addr, is_ipv6 in VALID_SOURCE_ADDRESSES: - if is_ipv6 and not socket.has_ipv6: + if is_ipv6 and not HAS_IPV6_AND_DNS: warnings.warn("No IPv6 support: skipping.", NoIPv6Warning) continue @@ -647,6 +647,27 @@ class TestConnectionPool(HTTPDummyServerTestCase): self.assertEqual(b'123' * 4, response.read()) + def test_cleanup_on_connection_error(self): + ''' + Test that connections are recycled to the pool on + connection errors where no http response is received. + ''' + poolsize = 3 + with HTTPConnectionPool(self.host, self.port, maxsize=poolsize, block=True) as http: + self.assertEqual(http.pool.qsize(), poolsize) + + # force a connection error by supplying a non-existent + # url. We won't get a response for this and so the + # conn won't be implicitly returned to the pool. + self.assertRaises(MaxRetryError, + http.request, 'GET', '/redirect', fields={'target': '/'}, release_conn=False, retries=0) + + r = http.request('GET', '/redirect', fields={'target': '/'}, release_conn=False, retries=1) + r.release_conn() + + # the pool should still contain poolsize elements + self.assertEqual(http.pool.qsize(), http.pool.maxsize) + class TestRetry(HTTPDummyServerTestCase): def setUp(self): diff --git a/test/with_dummyserver/test_https.py b/test/with_dummyserver/test_https.py index 992b8ef..63aea66 100644 --- a/test/with_dummyserver/test_https.py +++ b/test/with_dummyserver/test_https.py @@ -419,6 +419,11 @@ class TestHTTPS_TLSv1(HTTPSDummyServerTestCase): self._pool.ca_certs = DEFAULT_CA self._pool.request('GET', '/') + def test_set_cert_default_cert_required(self): + conn = VerifiedHTTPSConnection(self.host, self.port) + conn.set_cert(ca_certs='/etc/ssl/certs/custom.pem') + self.assertEqual(conn.cert_reqs, 'CERT_REQUIRED') + class TestHTTPS_NoSAN(HTTPSDummyServerTestCase): certs = NO_SAN_CERTS diff --git a/test/with_dummyserver/test_poolmanager.py b/test/with_dummyserver/test_poolmanager.py index 7e51c73..099ac52 100644 --- a/test/with_dummyserver/test_poolmanager.py +++ b/test/with_dummyserver/test_poolmanager.py @@ -1,6 +1,8 @@ import unittest import json +from nose.plugins.skip import SkipTest +from dummyserver.server import HAS_IPV6 from dummyserver.testcase import (HTTPDummyServerTestCase, IPv6HTTPDummyServerTestCase) from urllib3.poolmanager import PoolManager @@ -128,6 +130,14 @@ class TestPoolManager(HTTPDummyServerTestCase): def test_headers(self): http = PoolManager(headers={'Foo': 'bar'}) + r = http.request('GET', '%s/headers' % self.base_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + + r = http.request('POST', '%s/headers' % self.base_url) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + r = http.request_encode_url('GET', '%s/headers' % self.base_url) returned_headers = json.loads(r.data.decode()) self.assertEqual(returned_headers.get('Foo'), 'bar') @@ -154,6 +164,9 @@ class TestPoolManager(HTTPDummyServerTestCase): class TestIPv6PoolManager(IPv6HTTPDummyServerTestCase): + if not HAS_IPV6: + raise SkipTest("IPv6 is not supported on this system.") + def setUp(self): self.base_url = 'http://[%s]:%d' % (self.host, self.port) diff --git a/test/with_dummyserver/test_proxy_poolmanager.py b/test/with_dummyserver/test_proxy_poolmanager.py index df300fe..c593f2d 100644 --- a/test/with_dummyserver/test_proxy_poolmanager.py +++ b/test/with_dummyserver/test_proxy_poolmanager.py @@ -4,11 +4,12 @@ import unittest from nose.tools import timed -from dummyserver.testcase import HTTPDummyProxyTestCase +from dummyserver.testcase import HTTPDummyProxyTestCase, IPv6HTTPDummyProxyTestCase from dummyserver.server import ( DEFAULT_CA, DEFAULT_CA_BAD, get_unreachable_address) from .. import TARPIT_HOST +from urllib3._collections import HTTPHeaderDict from urllib3.poolmanager import proxy_from_url, ProxyManager from urllib3.exceptions import ( MaxRetryError, SSLError, ProxyError, ConnectTimeoutError) @@ -48,7 +49,7 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): def test_proxy_conn_fail(self): host, port = get_unreachable_address() - http = proxy_from_url('http://%s:%s/' % (host, port), retries=1) + http = proxy_from_url('http://%s:%s/' % (host, port), retries=1, timeout=0.05) self.assertRaises(MaxRetryError, http.request, 'GET', '%s/' % self.https_url) self.assertRaises(MaxRetryError, http.request, 'GET', @@ -223,6 +224,22 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): self.assertEqual(returned_headers.get('Host'), '%s:%s'%(self.https_host,self.https_port)) + def test_headerdict(self): + default_headers = HTTPHeaderDict(a='b') + proxy_headers = HTTPHeaderDict() + proxy_headers.add('foo', 'bar') + + http = proxy_from_url( + self.proxy_url, + headers=default_headers, + proxy_headers=proxy_headers) + + request_headers = HTTPHeaderDict(baz='quux') + r = http.request('GET', '%s/headers' % self.http_url, headers=request_headers) + returned_headers = json.loads(r.data.decode()) + self.assertEqual(returned_headers.get('Foo'), 'bar') + self.assertEqual(returned_headers.get('Baz'), 'quux') + def test_proxy_pooling(self): http = proxy_from_url(self.proxy_url) @@ -283,5 +300,26 @@ class TestHTTPProxyManager(HTTPDummyProxyTestCase): except MaxRetryError as e: assert isinstance(e.reason, ConnectTimeoutError) + +class TestIPv6HTTPProxyManager(IPv6HTTPDummyProxyTestCase): + + def setUp(self): + self.http_url = 'http://%s:%d' % (self.http_host, self.http_port) + self.http_url_alt = 'http://%s:%d' % (self.http_host_alt, + self.http_port) + self.https_url = 'https://%s:%d' % (self.https_host, self.https_port) + self.https_url_alt = 'https://%s:%d' % (self.https_host_alt, + self.https_port) + self.proxy_url = 'http://[%s]:%d' % (self.proxy_host, self.proxy_port) + + def test_basic_ipv6_proxy(self): + http = proxy_from_url(self.proxy_url) + + r = http.request('GET', '%s/' % self.http_url) + self.assertEqual(r.status, 200) + + r = http.request('GET', '%s/' % self.https_url) + self.assertEqual(r.status, 200) + if __name__ == '__main__': unittest.main() diff --git a/test/with_dummyserver/test_socketlevel.py b/test/with_dummyserver/test_socketlevel.py index 6c99653..5af00e0 100644 --- a/test/with_dummyserver/test_socketlevel.py +++ b/test/with_dummyserver/test_socketlevel.py @@ -10,17 +10,24 @@ from urllib3.exceptions import ( SSLError, ProtocolError, ) +from urllib3.response import httplib from urllib3.util.ssl_ import HAS_SNI from urllib3.util.timeout import Timeout from urllib3.util.retry import Retry +from urllib3._collections import HTTPHeaderDict from dummyserver.testcase import SocketDummyServerTestCase from dummyserver.server import ( DEFAULT_CERTS, DEFAULT_CA, get_unreachable_address) -from .. import onlyPy3 +from .. import onlyPy3, LogRecorder from nose.plugins.skip import SkipTest +try: + from mimetools import Message as MimeToolMessage +except ImportError: + class MimeToolMessage(object): + pass from threading import Event import socket import ssl @@ -119,8 +126,9 @@ class TestSocketClosing(SocketDummyServerTestCase): def test_connection_refused(self): # Does the pool retry if there is no listener on the port? host, port = get_unreachable_address() - pool = HTTPConnectionPool(host, port) - self.assertRaises(MaxRetryError, pool.request, 'GET', '/', retries=0) + http = HTTPConnectionPool(host, port, maxsize=3, block=True) + self.assertRaises(MaxRetryError, http.request, 'GET', '/', retries=0, release_conn=False) + self.assertEqual(http.pool.qsize(), http.pool.maxsize) def test_connection_read_timeout(self): timed_out = Event() @@ -133,13 +141,15 @@ class TestSocketClosing(SocketDummyServerTestCase): sock.close() self._start_server(socket_handler) - pool = HTTPConnectionPool(self.host, self.port, timeout=0.001, retries=False) + http = HTTPConnectionPool(self.host, self.port, timeout=0.001, retries=False, maxsize=3, block=True) try: - self.assertRaises(ReadTimeoutError, pool.request, 'GET', '/') + self.assertRaises(ReadTimeoutError, http.request, 'GET', '/', release_conn=False) finally: timed_out.set() + self.assertEqual(http.pool.qsize(), http.pool.maxsize) + def test_https_connection_read_timeout(self): """ Handshake timeouts should fail with a Timeout""" timed_out = Event() @@ -297,6 +307,63 @@ class TestSocketClosing(SocketDummyServerTestCase): self.assertEqual(response.status, 200) self.assertEqual(response.data, b'foo') + def test_connection_cleanup_on_read_timeout(self): + timed_out = Event() + + def socket_handler(listener): + sock = listener.accept()[0] + buf = b'' + body = 'Hi' + while not buf.endswith(b'\r\n\r\n'): + buf = sock.recv(65536) + sock.send(('HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' % len(body)).encode('utf-8')) + + timed_out.wait() + sock.close() + + self._start_server(socket_handler) + with HTTPConnectionPool(self.host, self.port) as pool: + poolsize = pool.pool.qsize() + response = pool.urlopen('GET', '/', retries=0, preload_content=False, + timeout=Timeout(connect=1, read=0.001)) + try: + self.assertRaises(ReadTimeoutError, response.read) + self.assertEqual(poolsize, pool.pool.qsize()) + finally: + timed_out.set() + + def test_connection_cleanup_on_protocol_error_during_read(self): + body = 'Response' + partial_body = body[:2] + + def socket_handler(listener): + sock = listener.accept()[0] + + # Consume request + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf = sock.recv(65536) + + # Send partial response and close socket. + sock.send(( + 'HTTP/1.1 200 OK\r\n' + 'Content-Type: text/plain\r\n' + 'Content-Length: %d\r\n' + '\r\n' + '%s' % (len(body), partial_body)).encode('utf-8') + ) + sock.close() + + self._start_server(socket_handler) + with HTTPConnectionPool(self.host, self.port) as pool: + poolsize = pool.pool.qsize() + response = pool.request('GET', '/', retries=0, preload_content=False) + + self.assertRaises(ProtocolError, response.read) + self.assertEqual(poolsize, pool.pool.qsize()) class TestProxyManager(SocketDummyServerTestCase): @@ -355,7 +422,7 @@ class TestProxyManager(SocketDummyServerTestCase): base_url = 'http://%s:%d' % (self.host, self.port) # Define some proxy headers. - proxy_headers = {'For The Proxy': 'YEAH!'} + proxy_headers = HTTPHeaderDict({'For The Proxy': 'YEAH!'}) proxy = proxy_from_url(base_url, proxy_headers=proxy_headers) conn = proxy.connection_from_url('http://www.google.com/') @@ -617,6 +684,86 @@ class TestHeaders(SocketDummyServerTestCase): r = pool.request('GET', '/') self.assertEqual(HEADERS, dict(r.headers.items())) # to preserve case sensitivity + def test_headers_are_sent_with_the_original_case(self): + headers = {'foo': 'bar', 'bAz': 'quux'} + parsed_headers = {} + + def socket_handler(listener): + sock = listener.accept()[0] + + buf = b'' + while not buf.endswith(b'\r\n\r\n'): + buf += sock.recv(65536) + + headers_list = [header for header in buf.split(b'\r\n')[1:] if header] + + for header in headers_list: + (key, value) = header.split(b': ') + parsed_headers[key.decode()] = value.decode() + + # Send incomplete message (note Content-Length) + sock.send(( + 'HTTP/1.1 204 No Content\r\n' + 'Content-Length: 0\r\n' + '\r\n').encode('utf-8')) + + sock.close() + + self._start_server(socket_handler) + expected_headers = {'Accept-Encoding': 'identity', + 'Host': '{0}:{1}'.format(self.host, self.port)} + expected_headers.update(headers) + + pool = HTTPConnectionPool(self.host, self.port, retries=False) + pool.request('GET', '/', headers=HTTPHeaderDict(headers)) + self.assertEqual(expected_headers, parsed_headers) + + +class TestBrokenHeaders(SocketDummyServerTestCase): + def setUp(self): + if issubclass(httplib.HTTPMessage, MimeToolMessage): + raise SkipTest('Header parsing errors not available') + + super(TestBrokenHeaders, self).setUp() + + def _test_broken_header_parsing(self, headers): + handler = create_response_handler(( + b'HTTP/1.1 200 OK\r\n' + b'Content-Length: 0\r\n' + b'Content-type: text/plain\r\n' + ) + b'\r\n'.join(headers) + b'\r\n' + ) + + self._start_server(handler) + pool = HTTPConnectionPool(self.host, self.port, retries=False) + + with LogRecorder() as logs: + pool.request('GET', '/') + + for record in logs: + if 'Failed to parse headers' in record.msg and \ + pool._absolute_url('/') == record.args[0]: + return + self.fail('Missing log about unparsed headers') + + def test_header_without_name(self): + self._test_broken_header_parsing([ + b': Value\r\n', + b'Another: Header\r\n', + ]) + + def test_header_without_name_or_value(self): + self._test_broken_header_parsing([ + b':\r\n', + b'Another: Header\r\n', + ]) + + def test_header_without_colon_or_value(self): + self._test_broken_header_parsing([ + b'Broken Header', + b'Another: Header', + ]) + class TestHEAD(SocketDummyServerTestCase): def test_chunked_head_response_does_not_hang(self): diff --git a/urllib3.egg-info/PKG-INFO b/urllib3.egg-info/PKG-INFO index 123e7be..a19a535 100644 --- a/urllib3.egg-info/PKG-INFO +++ b/urllib3.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: urllib3 -Version: 1.10.4 +Version: 1.11 Summary: HTTP library with thread-safe connection pooling, file post, and more. Home-page: http://urllib3.readthedocs.org/ Author: Andrey Petrov @@ -156,6 +156,54 @@ Description: ======= Changes ======= + 1.11 (2015-07-21) + +++++++++++++++++ + + * When ``ca_certs`` is given, ``cert_reqs`` defaults to + ``'CERT_REQUIRED'``. (Issue #650) + + * ``pip install urllib3[secure]`` will install Certifi and + PyOpenSSL as dependencies. (Issue #678) + + * Made ``HTTPHeaderDict`` usable as a ``headers`` input value + (Issues #632, #679) + + * Added `urllib3.contrib.appengine <https://urllib3.readthedocs.org/en/latest/contrib.html#google-app-engine>`_ + which has an ``AppEngineManager`` for using ``URLFetch`` in a + Google AppEngine environment. (Issue #664) + + * Dev: Added test suite for AppEngine. (Issue #631) + + * Fix performance regression when using PyOpenSSL. (Issue #626) + + * Passing incorrect scheme (e.g. ``foo://``) will raise + ``ValueError`` instead of ``AssertionError`` (backwards + compatible for now, but please migrate). (Issue #640) + + * Fix pools not getting replenished when an error occurs during a + request using ``release_conn=False``. (Issue #644) + + * Fix pool-default headers not applying for url-encoded requests + like GET. (Issue #657) + + * log.warning in Python 3 when headers are skipped due to parsing + errors. (Issue #642) + + * Close and discard connections if an error occurs during read. + (Issue #660) + + * Fix host parsing for IPv6 proxies. (Issue #668) + + * Separate warning type SubjectAltNameWarning, now issued once + per host. (Issue #671) + + * Fix ``httplib.IncompleteRead`` not getting converted to + ``ProtocolError`` when using ``HTTPResponse.stream()`` + (Issue #674) + + * ... [Short description of non-trivial change.] (Issue #) + + 1.10.4 (2015-05-03) +++++++++++++++++++ diff --git a/urllib3.egg-info/SOURCES.txt b/urllib3.egg-info/SOURCES.txt index 16d8476..2f96e50 100644 --- a/urllib3.egg-info/SOURCES.txt +++ b/urllib3.egg-info/SOURCES.txt @@ -54,7 +54,13 @@ test/test_proxymanager.py test/test_response.py test/test_retry.py test/test_util.py +test/appengine/__init__.py +test/appengine/app.yaml +test/appengine/nose.cfg +test/appengine/requirements.txt +test/appengine/test_urlfetch.py test/contrib/__init__.py +test/contrib/test_gae_manager.py test/contrib/test_pyopenssl.py test/with_dummyserver/__init__.py test/with_dummyserver/test_connectionpool.py @@ -76,8 +82,10 @@ urllib3/response.py urllib3.egg-info/PKG-INFO urllib3.egg-info/SOURCES.txt urllib3.egg-info/dependency_links.txt +urllib3.egg-info/requires.txt urllib3.egg-info/top_level.txt urllib3/contrib/__init__.py +urllib3/contrib/appengine.py urllib3/contrib/ntlmpool.py urllib3/contrib/pyopenssl.py urllib3/packages/__init__.py diff --git a/urllib3.egg-info/requires.txt b/urllib3.egg-info/requires.txt new file mode 100644 index 0000000..2490b32 --- /dev/null +++ b/urllib3.egg-info/requires.txt @@ -0,0 +1,9 @@ + +[secure;python_version<="2.7"] +pyOpenSSL +ndg-httpsclient +pyasn1 +certifi + +[secure;python_version>"2.7"] +certifi diff --git a/urllib3/__init__.py b/urllib3/__init__.py index f48ac4a..747d09a 100644 --- a/urllib3/__init__.py +++ b/urllib3/__init__.py @@ -4,7 +4,7 @@ urllib3 - Thread-safe connection pooling and re-using. __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' __license__ = 'MIT' -__version__ = '1.10.4' +__version__ = '1.11' from .connectionpool import ( @@ -58,6 +58,8 @@ del NullHandler import warnings # SecurityWarning's always go off by default. warnings.simplefilter('always', exceptions.SecurityWarning, append=True) +# SubjectAltNameWarning's should go off once per host +warnings.simplefilter('default', exceptions.SubjectAltNameWarning) # InsecurePlatformWarning's don't vary between requests, so we keep it default. warnings.simplefilter('default', exceptions.InsecurePlatformWarning, append=True) diff --git a/urllib3/_collections.py b/urllib3/_collections.py index 279416c..b68b9a5 100644 --- a/urllib3/_collections.py +++ b/urllib3/_collections.py @@ -97,14 +97,7 @@ class RecentlyUsedContainer(MutableMapping): return list(iterkeys(self._container)) -_dict_setitem = dict.__setitem__ -_dict_getitem = dict.__getitem__ -_dict_delitem = dict.__delitem__ -_dict_contains = dict.__contains__ -_dict_setdefault = dict.setdefault - - -class HTTPHeaderDict(dict): +class HTTPHeaderDict(MutableMapping): """ :param headers: An iterable of field-value pairs. Must not contain multiple field names @@ -139,7 +132,8 @@ class HTTPHeaderDict(dict): """ def __init__(self, headers=None, **kwargs): - dict.__init__(self) + super(HTTPHeaderDict, self).__init__() + self._container = {} if headers is not None: if isinstance(headers, HTTPHeaderDict): self._copy_from(headers) @@ -149,38 +143,44 @@ class HTTPHeaderDict(dict): self.extend(kwargs) def __setitem__(self, key, val): - return _dict_setitem(self, key.lower(), (key, val)) + self._container[key.lower()] = (key, val) + return self._container[key.lower()] def __getitem__(self, key): - val = _dict_getitem(self, key.lower()) + val = self._container[key.lower()] return ', '.join(val[1:]) def __delitem__(self, key): - return _dict_delitem(self, key.lower()) + del self._container[key.lower()] def __contains__(self, key): - return _dict_contains(self, key.lower()) + return key.lower() in self._container def __eq__(self, other): if not isinstance(other, Mapping) and not hasattr(other, 'keys'): return False if not isinstance(other, type(self)): other = type(self)(other) - return dict((k1, self[k1]) for k1 in self) == dict((k2, other[k2]) for k2 in other) + return (dict((k.lower(), v) for k, v in self.itermerged()) == + dict((k.lower(), v) for k, v in other.itermerged())) def __ne__(self, other): return not self.__eq__(other) - values = MutableMapping.values - get = MutableMapping.get - update = MutableMapping.update - if not PY3: # Python 2 iterkeys = MutableMapping.iterkeys itervalues = MutableMapping.itervalues __marker = object() + def __len__(self): + return len(self._container) + + def __iter__(self): + # Only provide the originally cased names + for vals in self._container.values(): + yield vals[0] + def pop(self, key, default=__marker): '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. If key is not found, d is returned if given, otherwise KeyError is raised. @@ -216,7 +216,7 @@ class HTTPHeaderDict(dict): key_lower = key.lower() new_vals = key, val # Keep the common case aka no item present as fast as possible - vals = _dict_setdefault(self, key_lower, new_vals) + vals = self._container.setdefault(key_lower, new_vals) if new_vals is not vals: # new_vals was not inserted, as there was a previous one if isinstance(vals, list): @@ -225,7 +225,7 @@ class HTTPHeaderDict(dict): else: # vals should be a tuple then, i.e. only one item so far # Need to convert the tuple to list for further extension - _dict_setitem(self, key_lower, [vals[0], vals[1], val]) + self._container[key_lower] = [vals[0], vals[1], val] def extend(self, *args, **kwargs): """Generic import function for any type of header-like object. @@ -236,7 +236,7 @@ class HTTPHeaderDict(dict): raise TypeError("extend() takes at most 1 positional " "arguments ({} given)".format(len(args))) other = args[0] if len(args) >= 1 else () - + if isinstance(other, HTTPHeaderDict): for key, val in other.iteritems(): self.add(key, val) @@ -257,7 +257,7 @@ class HTTPHeaderDict(dict): """Returns a list of all the values for the named field. Returns an empty list if the key doesn't exist.""" try: - vals = _dict_getitem(self, key.lower()) + vals = self._container[key.lower()] except KeyError: return [] else: @@ -276,11 +276,11 @@ class HTTPHeaderDict(dict): def _copy_from(self, other): for key in other: - val = _dict_getitem(other, key) + val = other.getlist(key) if isinstance(val, list): # Don't need to convert tuples val = list(val) - _dict_setitem(self, key, val) + self._container[key.lower()] = [key] + val def copy(self): clone = type(self)() @@ -290,14 +290,14 @@ class HTTPHeaderDict(dict): def iteritems(self): """Iterate over all header lines, including duplicate ones.""" for key in self: - vals = _dict_getitem(self, key) + vals = self._container[key.lower()] for val in vals[1:]: yield vals[0], val def itermerged(self): """Iterate over all headers, merging duplicate ones together.""" for key in self: - val = _dict_getitem(self, key) + val = self._container[key.lower()] yield val[0], ', '.join(val[1:]) def items(self): @@ -307,16 +307,16 @@ class HTTPHeaderDict(dict): def from_httplib(cls, message): # Python 2 """Read headers from a Python 2 httplib message object.""" # python2.7 does not expose a proper API for exporting multiheaders - # efficiently. This function re-reads raw lines from the message + # efficiently. This function re-reads raw lines from the message # object and extracts the multiheaders properly. headers = [] - + for line in message.headers: if line.startswith((' ', '\t')): key, value = headers[-1] headers[-1] = (key, value + '\r\n' + line.rstrip()) continue - + key, value = line.split(':', 1) headers.append((key, value.strip())) diff --git a/urllib3/connection.py b/urllib3/connection.py index 2a8c359..f64dd1a 100644 --- a/urllib3/connection.py +++ b/urllib3/connection.py @@ -38,7 +38,7 @@ except NameError: # Python 2: from .exceptions import ( ConnectTimeoutError, SystemTimeWarning, - SecurityWarning, + SubjectAltNameWarning, ) from .packages.ssl_match_hostname import match_hostname @@ -192,6 +192,9 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert_reqs=None, ca_certs=None, assert_hostname=None, assert_fingerprint=None): + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs @@ -245,10 +248,11 @@ class VerifiedHTTPSConnection(HTTPSConnection): cert = self.sock.getpeercert() if not cert.get('subjectAltName', ()): warnings.warn(( - 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' - 'This feature is being removed by major browsers and deprecated by RFC 2818. ' - '(See https://github.com/shazow/urllib3/issues/497 for details.)'), - SecurityWarning + 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' + '`commonName` for now. This feature is being removed by major browsers and ' + 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' + 'for details.)'.format(hostname)), + SubjectAltNameWarning ) match_hostname(cert, self.assert_hostname or hostname) diff --git a/urllib3/connectionpool.py b/urllib3/connectionpool.py index 117269a..c958725 100644 --- a/urllib3/connectionpool.py +++ b/urllib3/connectionpool.py @@ -17,6 +17,7 @@ from .exceptions import ( ClosedPoolError, ProtocolError, EmptyPoolError, + HeaderParsingError, HostChangedError, LocationValueError, MaxRetryError, @@ -38,9 +39,10 @@ from .request import RequestMethods from .response import HTTPResponse from .util.connection import is_connection_dropped +from .util.response import assert_header_parsing from .util.retry import Retry from .util.timeout import Timeout -from .util.url import get_host +from .util.url import get_host, Url xrange = six.moves.xrange @@ -120,7 +122,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): :param maxsize: Number of connections to save that can be reused. More than 1 is useful - in multithreaded situations. If ``block`` is set to false, more + in multithreaded situations. If ``block`` is set to False, more connections will be created but they will not be saved once they've been used. @@ -381,8 +383,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): log.debug("\"%s %s %s\" %s %s" % (method, url, http_version, httplib_response.status, httplib_response.length)) + + try: + assert_header_parsing(httplib_response.msg) + except HeaderParsingError as hpe: # Platform-specific: Python 3 + log.warning( + 'Failed to parse headers (url=%s): %s', + self._absolute_url(url), hpe, exc_info=True) + return httplib_response + def _absolute_url(self, path): + return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url + def close(self): """ Close all pooled connections and disable the pool. @@ -409,7 +422,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # TODO: Add optional support for socket.gethostbyname checking. scheme, host, port = get_host(url) - + # Use explicit default port for comparison when none is given if self.port and not port: port = port_by_scheme.get(scheme) @@ -568,25 +581,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): # Close the connection. If a connection is reused on which there # was a Certificate error, the next request will certainly raise # another Certificate error. - if conn: - conn.close() - conn = None + conn = conn and conn.close() + release_conn = True raise SSLError(e) except SSLError: # Treat SSLError separately from BaseSSLError to preserve # traceback. - if conn: - conn.close() - conn = None + conn = conn and conn.close() + release_conn = True raise except (TimeoutError, HTTPException, SocketError, ConnectionError) as e: - if conn: - # Discard the connection for these exceptions. It will be - # be replaced during the next _get_conn() call. - conn.close() - conn = None + # Discard the connection for these exceptions. It will be + # be replaced during the next _get_conn() call. + conn = conn and conn.close() + release_conn = True if isinstance(e, SocketError) and self.proxy: e = ProxyError('Cannot connect to proxy.', e) @@ -626,6 +636,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods): retries = retries.increment(method, url, response=response, _pool=self) except MaxRetryError: if retries.raise_on_redirect: + # Release the connection for this response, since we're not + # returning it to be released manually. + response.release_conn() raise return response @@ -683,6 +696,10 @@ class HTTPSConnectionPool(HTTPConnectionPool): HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize, block, headers, retries, _proxy, _proxy_headers, **conn_kw) + + if ca_certs and cert_reqs is None: + cert_reqs = 'CERT_REQUIRED' + self.key_file = key_file self.cert_file = cert_file self.cert_reqs = cert_reqs diff --git a/urllib3/contrib/appengine.py b/urllib3/contrib/appengine.py new file mode 100644 index 0000000..ed9d8b8 --- /dev/null +++ b/urllib3/contrib/appengine.py @@ -0,0 +1,222 @@ +import logging +import os +import warnings + +from ..exceptions import ( + HTTPError, + HTTPWarning, + MaxRetryError, + ProtocolError, + TimeoutError, + SSLError +) + +from ..packages.six import BytesIO +from ..request import RequestMethods +from ..response import HTTPResponse +from ..util.timeout import Timeout +from ..util.retry import Retry + +try: + from google.appengine.api import urlfetch +except ImportError: + urlfetch = None + + +log = logging.getLogger(__name__) + + +class AppEnginePlatformWarning(HTTPWarning): + pass + + +class AppEnginePlatformError(HTTPError): + pass + + +class AppEngineManager(RequestMethods): + """ + Connection manager for Google App Engine sandbox applications. + + This manager uses the URLFetch service directly instead of using the + emulated httplib, and is subject to URLFetch limitations as described in + the App Engine documentation here: + + https://cloud.google.com/appengine/docs/python/urlfetch + + Notably it will raise an AppEnginePlatformError if: + * URLFetch is not available. + * If you attempt to use this on GAEv2 (Managed VMs), as full socket + support is available. + * If a request size is more than 10 megabytes. + * If a response size is more than 32 megabtyes. + * If you use an unsupported request method such as OPTIONS. + + Beyond those cases, it will raise normal urllib3 errors. + """ + + def __init__(self, headers=None, retries=None, validate_certificate=True): + if not urlfetch: + raise AppEnginePlatformError( + "URLFetch is not available in this environment.") + + if is_prod_appengine_v2(): + raise AppEnginePlatformError( + "Use normal urllib3.PoolManager instead of AppEngineManager" + "on Managed VMs, as using URLFetch is not necessary in " + "this environment.") + + warnings.warn( + "urllib3 is using URLFetch on Google App Engine sandbox instead " + "of sockets. To use sockets directly instead of URLFetch see " + "https://urllib3.readthedocs.org/en/latest/contrib.html.", + AppEnginePlatformWarning) + + RequestMethods.__init__(self, headers) + self.validate_certificate = validate_certificate + + self.retries = retries or Retry.DEFAULT + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # Return False to re-raise any potential exceptions + return False + + def urlopen(self, method, url, body=None, headers=None, + retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT, + **response_kw): + + retries = self._get_retries(retries, redirect) + + try: + response = urlfetch.fetch( + url, + payload=body, + method=method, + headers=headers or {}, + allow_truncated=False, + follow_redirects=( + redirect and + retries.redirect != 0 and + retries.total), + deadline=self._get_absolute_timeout(timeout), + validate_certificate=self.validate_certificate, + ) + except urlfetch.DeadlineExceededError as e: + raise TimeoutError(self, e) + + except urlfetch.InvalidURLError as e: + if 'too large' in e.message: + raise AppEnginePlatformError( + "URLFetch request too large, URLFetch only " + "supports requests up to 10mb in size.", e) + raise ProtocolError(e) + + except urlfetch.DownloadError as e: + if 'Too many redirects' in e.message: + raise MaxRetryError(self, url, reason=e) + raise ProtocolError(e) + + except urlfetch.ResponseTooLargeError as e: + raise AppEnginePlatformError( + "URLFetch response too large, URLFetch only supports" + "responses up to 32mb in size.", e) + + except urlfetch.SSLCertificateError as e: + raise SSLError(e) + + except urlfetch.InvalidMethodError as e: + raise AppEnginePlatformError( + "URLFetch does not support method: %s" % method, e) + + http_response = self._urlfetch_response_to_http_response( + response, **response_kw) + + # Check for redirect response + if (http_response.get_redirect_location() and + retries.raise_on_redirect and redirect): + raise MaxRetryError(self, url, "too many redirects") + + # Check if we should retry the HTTP response. + if retries.is_forced_retry(method, status_code=http_response.status): + retries = retries.increment( + method, url, response=http_response, _pool=self) + log.info("Forced retry: %s" % url) + retries.sleep() + return self.urlopen( + method, url, + body=body, headers=headers, + retries=retries, redirect=redirect, + timeout=timeout, **response_kw) + + return http_response + + def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): + + if is_prod_appengine_v1(): + # Production GAE handles deflate encoding automatically, but does + # not remove the encoding header. + content_encoding = urlfetch_resp.headers.get('content-encoding') + + if content_encoding == 'deflate': + del urlfetch_resp.headers['content-encoding'] + + return HTTPResponse( + # In order for decoding to work, we must present the content as + # a file-like object. + body=BytesIO(urlfetch_resp.content), + headers=urlfetch_resp.headers, + status=urlfetch_resp.status_code, + **response_kw + ) + + def _get_absolute_timeout(self, timeout): + if timeout is Timeout.DEFAULT_TIMEOUT: + return 5 # 5s is the default timeout for URLFetch. + if isinstance(timeout, Timeout): + if not timeout.read is timeout.connect: + warnings.warn( + "URLFetch does not support granular timeout settings, " + "reverting to total timeout.", AppEnginePlatformWarning) + return timeout.total + return timeout + + def _get_retries(self, retries, redirect): + if not isinstance(retries, Retry): + retries = Retry.from_int( + retries, redirect=redirect, default=self.retries) + + if retries.connect or retries.read or retries.redirect: + warnings.warn( + "URLFetch only supports total retries and does not " + "recognize connect, read, or redirect retry parameters.", + AppEnginePlatformWarning) + + return retries + + +def is_appengine(): + return (is_local_appengine() or + is_prod_appengine_v1() or + is_prod_appengine_v2()) + + +def is_appengine_sandbox(): + return is_appengine() and not is_prod_appengine_v2() + + +def is_local_appengine(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Development/' in os.environ['SERVER_SOFTWARE']) + + +def is_prod_appengine_v1(): + return ('APPENGINE_RUNTIME' in os.environ and + 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and + not is_prod_appengine_v2()) + + +def is_prod_appengine_v2(): + return os.environ.get('GAE_VM', False) == 'true' diff --git a/urllib3/contrib/pyopenssl.py b/urllib3/contrib/pyopenssl.py index b2c34a8..19c5b4e 100644 --- a/urllib3/contrib/pyopenssl.py +++ b/urllib3/contrib/pyopenssl.py @@ -85,6 +85,14 @@ _openssl_verify = { DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS +# OpenSSL will only write 16K at a time +SSL_WRITE_BLOCKSIZE = 16384 + +try: + _ = memoryview + has_memoryview = True +except NameError: + has_memoryview = False orig_util_HAS_SNI = util.HAS_SNI orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket @@ -204,13 +212,21 @@ class WrappedSocket(object): continue def sendall(self, data): - while len(data): - sent = self._send_until_done(data) - data = data[sent:] + if has_memoryview and not isinstance(data, memoryview): + data = memoryview(data) + + total_sent = 0 + while total_sent < len(data): + sent = self._send_until_done(data[total_sent:total_sent+SSL_WRITE_BLOCKSIZE]) + total_sent += sent + + def shutdown(self): + # FIXME rethrow compatible exceptions should we ever use this + self.connection.shutdown() def close(self): if self._makefile_refs < 1: - return self.connection.shutdown() + return self.connection.close() else: self._makefile_refs -= 1 @@ -287,7 +303,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, raise timeout('select timed out') continue except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake', e) + raise ssl.SSLError('bad handshake: %r' % e) break return WrappedSocket(cnx, sock) diff --git a/urllib3/exceptions.py b/urllib3/exceptions.py index 31bda1c..36ce0d1 100644 --- a/urllib3/exceptions.py +++ b/urllib3/exceptions.py @@ -149,6 +149,11 @@ class SecurityWarning(HTTPWarning): pass +class SubjectAltNameWarning(SecurityWarning): + "Warned when connecting to a host with a certificate missing a SAN." + pass + + class InsecureRequestWarning(SecurityWarning): "Warned when making an unverified HTTPS request." pass @@ -167,3 +172,19 @@ class InsecurePlatformWarning(SecurityWarning): class ResponseNotChunked(ProtocolError, ValueError): "Response needs to be chunked in order to read it as chunks." pass + + +class ProxySchemeUnknown(AssertionError, ValueError): + "ProxyManager does not support the supplied scheme" + # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. + + def __init__(self, scheme): + message = "Not supported proxy scheme %s" % scheme + super(ProxySchemeUnknown, self).__init__(message) + + +class HeaderParsingError(HTTPError): + "Raised by assert_header_parsing, but we convert it to a log.warning statement." + def __init__(self, defects, unparsed_data): + message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) + super(HeaderParsingError, self).__init__(message) diff --git a/urllib3/poolmanager.py b/urllib3/poolmanager.py index b8d1e74..76b6a12 100644 --- a/urllib3/poolmanager.py +++ b/urllib3/poolmanager.py @@ -8,7 +8,7 @@ except ImportError: from ._collections import RecentlyUsedContainer from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool from .connectionpool import port_by_scheme -from .exceptions import LocationValueError, MaxRetryError +from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown from .request import RequestMethods from .util.url import parse_url from .util.retry import Retry @@ -227,8 +227,8 @@ class ProxyManager(PoolManager): port = port_by_scheme.get(proxy.scheme, 80) proxy = proxy._replace(port=port) - assert proxy.scheme in ("http", "https"), \ - 'Not supported proxy scheme %s' % proxy.scheme + if proxy.scheme not in ("http", "https"): + raise ProxySchemeUnknown(proxy.scheme) self.proxy = proxy self.proxy_headers = proxy_headers or {} diff --git a/urllib3/request.py b/urllib3/request.py index b08d6c9..a1a12bc 100644 --- a/urllib3/request.py +++ b/urllib3/request.py @@ -71,14 +71,22 @@ class RequestMethods(object): headers=headers, **urlopen_kw) - def request_encode_url(self, method, url, fields=None, **urlopen_kw): + def request_encode_url(self, method, url, fields=None, headers=None, + **urlopen_kw): """ Make a request using :meth:`urlopen` with the ``fields`` encoded in the url. This is useful for request methods like GET, HEAD, DELETE, etc. """ + if headers is None: + headers = self.headers + + extra_kw = {'headers': headers} + extra_kw.update(urlopen_kw) + if fields: url += '?' + urlencode(fields) - return self.urlopen(method, url, **urlopen_kw) + + return self.urlopen(method, url, **extra_kw) def request_encode_body(self, method, url, fields=None, headers=None, encode_multipart=True, multipart_boundary=None, diff --git a/urllib3/response.py b/urllib3/response.py index 24140c4..15d4aac 100644 --- a/urllib3/response.py +++ b/urllib3/response.py @@ -2,6 +2,7 @@ try: import http.client as httplib except ImportError: import httplib +from contextlib import contextmanager import zlib import io from socket import timeout as SocketTimeout @@ -12,7 +13,7 @@ from .exceptions import ( ) from .packages.six import string_types as basestring, binary_type, PY3 from .connection import HTTPException, BaseSSLError -from .util.response import is_fp_closed +from .util.response import is_fp_closed, is_response_to_head class DeflateDecoder(object): @@ -202,6 +203,47 @@ class HTTPResponse(io.IOBase): return data + @contextmanager + def _error_catcher(self): + """ + Catch low-level python exceptions, instead re-raising urllib3 + variants, so that low-level exceptions are not leaked in the + high-level api. + + On exit, release the connection back to the pool. + """ + try: + try: + yield + + except SocketTimeout: + # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but + # there is yet no clean way to get at it from this context. + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except BaseSSLError as e: + # FIXME: Is there a better way to differentiate between SSLErrors? + if 'read operation timed out' not in str(e): # Defensive: + # This shouldn't happen but just in case we're missing an edge + # case, let's avoid swallowing SSL errors. + raise + + raise ReadTimeoutError(self._pool, None, 'Read timed out.') + + except HTTPException as e: + # This includes IncompleteRead. + raise ProtocolError('Connection broken: %r' % e, e) + except Exception: + # The response may not be closed but we're not going to use it anymore + # so close it now to ensure that the connection is released back to the pool. + if self._original_response and not self._original_response.isclosed(): + self._original_response.close() + + raise + finally: + if self._original_response and self._original_response.isclosed(): + self.release_conn() + def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional @@ -231,45 +273,28 @@ class HTTPResponse(io.IOBase): return flush_decoder = False - - try: - try: - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() + data = None + + with self._error_catcher(): + if amt is None: + # cStringIO doesn't like amt=None + data = self._fp.read() + flush_decoder = True + else: + cache_content = False + data = self._fp.read(amt) + if amt != 0 and not data: # Platform-specific: Buggy versions of Python. + # Close the connection when no data is returned + # + # This is redundant to what httplib/http.client _should_ + # already do. However, versions of python released before + # December 15, 2012 (http://bugs.python.org/issue16298) do + # not properly close the connection in all cases. There is + # no harm in redundantly calling close. + self._fp.close() flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do - # not properly close the connection in all cases. There is - # no harm in redundantly calling close. - self._fp.close() - flush_decoder = True - - except SocketTimeout: - # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but - # there is yet no clean way to get at it from this context. - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except BaseSSLError as e: - # FIXME: Is there a better way to differentiate between SSLErrors? - if 'read operation timed out' not in str(e): # Defensive: - # This shouldn't happen but just in case we're missing an edge - # case, let's avoid swallowing SSL errors. - raise - - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except HTTPException as e: - # This includes IncompleteRead. - raise ProtocolError('Connection broken: %r' % e, e) + if data: self._fp_bytes_read += len(data) data = self._decode(data, decode_content, flush_decoder) @@ -277,11 +302,8 @@ class HTTPResponse(io.IOBase): if cache_content: self._body = data - return data + return data - finally: - if self._original_response and self._original_response.isclosed(): - self.release_conn() def stream(self, amt=2**16, decode_content=None): """ @@ -319,6 +341,7 @@ class HTTPResponse(io.IOBase): with ``original_response=r``. """ headers = r.msg + if not isinstance(headers, HTTPHeaderDict): if PY3: # Python 3 headers = HTTPHeaderDict(headers.items()) @@ -437,30 +460,29 @@ class HTTPResponse(io.IOBase): raise ResponseNotChunked("Response is not chunked. " "Header 'transfer-encoding: chunked' is missing.") - if self._original_response and self._original_response._method.upper() == 'HEAD': - # Don't bother reading the body of a HEAD request. - # FIXME: Can we do this somehow without accessing private httplib _method? + # Don't bother reading the body of a HEAD request. + if self._original_response and is_response_to_head(self._original_response): self._original_response.close() return - while True: - self._update_chunk_length() - if self.chunk_left == 0: - break - chunk = self._handle_chunk(amt) - yield self._decode(chunk, decode_content=decode_content, - flush_decoder=True) - - # Chunk content ends with \r\n: discard it. - while True: - line = self._fp.fp.readline() - if not line: - # Some sites may not end with '\r\n'. - break - if line == b'\r\n': - break - - # We read everything; close the "file". - if self._original_response: - self._original_response.close() - self.release_conn() + with self._error_catcher(): + while True: + self._update_chunk_length() + if self.chunk_left == 0: + break + chunk = self._handle_chunk(amt) + yield self._decode(chunk, decode_content=decode_content, + flush_decoder=True) + + # Chunk content ends with \r\n: discard it. + while True: + line = self._fp.fp.readline() + if not line: + # Some sites may not end with '\r\n'. + break + if line == b'\r\n': + break + + # We read everything; close the "file". + if self._original_response: + self._original_response.close() diff --git a/urllib3/util/connection.py b/urllib3/util/connection.py index 859aec6..9ed5a64 100644 --- a/urllib3/util/connection.py +++ b/urllib3/util/connection.py @@ -60,6 +60,8 @@ def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, """ host, port = address + if host.startswith('['): + host = host.strip('[]') err = None for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res diff --git a/urllib3/util/response.py b/urllib3/util/response.py index 45fff55..767ee15 100644 --- a/urllib3/util/response.py +++ b/urllib3/util/response.py @@ -1,3 +1,11 @@ +try: + import http.client as httplib +except ImportError: + import httplib + +from ..exceptions import HeaderParsingError + + def is_fp_closed(obj): """ Checks whether a given file-like object is closed. @@ -20,3 +28,49 @@ def is_fp_closed(obj): pass raise ValueError("Unable to determine whether fp is closed.") + + +def assert_header_parsing(headers): + """ + Asserts whether all headers have been successfully parsed. + Extracts encountered errors from the result of parsing headers. + + Only works on Python 3. + + :param headers: Headers to verify. + :type headers: `httplib.HTTPMessage`. + + :raises urllib3.exceptions.HeaderParsingError: + If parsing errors are found. + """ + + # This will fail silently if we pass in the wrong kind of parameter. + # To make debugging easier add an explicit check. + if not isinstance(headers, httplib.HTTPMessage): + raise TypeError('expected httplib.Message, got {}.'.format( + type(headers))) + + defects = getattr(headers, 'defects', None) + get_payload = getattr(headers, 'get_payload', None) + + unparsed_data = None + if get_payload: # Platform-specific: Python 3. + unparsed_data = get_payload() + + if defects or unparsed_data: + raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) + + +def is_response_to_head(response): + """ + Checks, wether a the request of a response has been a HEAD-request. + Handles the quirks of AppEngine. + + :param conn: + :type conn: :class:`httplib.HTTPResponse` + """ + # FIXME: Can we do this somehow without accessing private httplib _method? + method = response._method + if isinstance(method, int): # Platform-specific: Appengine + return method == 3 + return method.upper() == 'HEAD' diff --git a/urllib3/util/retry.py b/urllib3/util/retry.py index 7e0959d..1fb1f23 100644 --- a/urllib3/util/retry.py +++ b/urllib3/util/retry.py @@ -94,7 +94,7 @@ class Retry(object): seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer - than :attr:`Retry.MAX_BACKOFF`. + than :attr:`Retry.BACKOFF_MAX`. By default, backoff is disabled (set to 0). diff --git a/urllib3/util/ssl_.py b/urllib3/util/ssl_.py index b846d42..311378b 100644 --- a/urllib3/util/ssl_.py +++ b/urllib3/util/ssl_.py @@ -8,6 +8,13 @@ SSLContext = None HAS_SNI = False create_default_context = None +# Maps the length of a digest to a possible hash function producing this digest +HASHFUNC_MAP = { + 32: md5, + 40: sha1, + 64: sha256, +} + import errno import warnings @@ -112,31 +119,21 @@ def assert_fingerprint(cert, fingerprint): Fingerprint as string of hexdigits, can be interspersed by colons. """ - # Maps the length of a digest to a possible hash function producing - # this digest. - hashfunc_map = { - 16: md5, - 20: sha1, - 32: sha256, - } - fingerprint = fingerprint.replace(':', '').lower() - digest_length, odd = divmod(len(fingerprint), 2) - - if odd or digest_length not in hashfunc_map: - raise SSLError('Fingerprint is of invalid length.') + digest_length = len(fingerprint) + hashfunc = HASHFUNC_MAP.get(digest_length) + if not hashfunc: + raise SSLError( + 'Fingerprint of invalid length: {0}'.format(fingerprint)) # We need encode() here for py32; works on py2 and p33. fingerprint_bytes = unhexlify(fingerprint.encode()) - hashfunc = hashfunc_map[digest_length] - cert_digest = hashfunc(cert).digest() - if not cert_digest == fingerprint_bytes: + if cert_digest != fingerprint_bytes: raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(hexlify(fingerprint_bytes), - hexlify(cert_digest))) + .format(fingerprint, hexlify(cert_digest))) def resolve_cert_reqs(candidate): |